diff -Nru mesa-18.3.3/Android.common.mk mesa-19.0.1/Android.common.mk --- mesa-18.3.3/Android.common.mk 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/Android.common.mk 2019-03-31 23:16:37.000000000 +0000 @@ -37,7 +37,6 @@ -Wno-missing-field-initializers \ -Wno-initializer-overrides \ -Wno-mismatched-tags \ - -DVERSION=\"$(MESA_VERSION)\" \ -DPACKAGE_VERSION=\"$(MESA_VERSION)\" \ -DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\" diff -Nru mesa-18.3.3/Android.mk mesa-19.0.1/Android.mk --- mesa-18.3.3/Android.mk 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/Android.mk 2019-03-31 23:16:37.000000000 +0000 @@ -24,7 +24,7 @@ # BOARD_GPU_DRIVERS should be defined. The valid values are # # classic drivers: i915 i965 -# gallium drivers: swrast freedreno i915g nouveau pl111 r300g r600g radeonsi vc4 virgl vmwgfx etnaviv imx +# gallium drivers: swrast freedreno i915g nouveau kmsro r300g r600g radeonsi vc4 virgl vmwgfx etnaviv # # The main target is libGLES_mesa. For each classic driver enabled, a DRI # module will also be built. DRI modules will be loaded by libGLES_mesa. @@ -52,15 +52,14 @@ freedreno.HAVE_GALLIUM_FREEDRENO \ i915g.HAVE_GALLIUM_I915 \ nouveau.HAVE_GALLIUM_NOUVEAU \ - pl111.HAVE_GALLIUM_PL111 \ + kmsro.HAVE_GALLIUM_KMSRO \ r300g.HAVE_GALLIUM_R300 \ r600g.HAVE_GALLIUM_R600 \ radeonsi.HAVE_GALLIUM_RADEONSI \ vmwgfx.HAVE_GALLIUM_VMWGFX \ vc4.HAVE_GALLIUM_VC4 \ virgl.HAVE_GALLIUM_VIRGL \ - etnaviv.HAVE_GALLIUM_ETNAVIV \ - imx.HAVE_GALLIUM_IMX + etnaviv.HAVE_GALLIUM_ETNAVIV ifeq ($(BOARD_GPU_DRIVERS),all) MESA_BUILD_CLASSIC := $(filter HAVE_%, $(subst ., , $(classic_drivers))) diff -Nru mesa-18.3.3/bin/.cherry-ignore mesa-19.0.1/bin/.cherry-ignore --- mesa-18.3.3/bin/.cherry-ignore 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/bin/.cherry-ignore 2019-03-31 23:16:37.000000000 +0000 @@ -1,16 +1,17 @@ -# fixes: Commit was squashed into the respective offenders -c02390f8fcd367c7350db568feabb2f062efca14 egl/wayland: rather obvious build fix -# fixes: The commit addresses b4476138d5ad3f8d30c14ee61f2f375edfdbab2a -ff6f1dd0d3c6b4c15ca51b478b2884d14f6a1e06 meson: libfreedreno depends upon libdrm (for fence support) +# Both of these were already merged with different shas +da48cba61ef6fefb799bf96e6364b70dbf4ec712 +c812c740e60c14060eb89db66039111881a0f42f -# fixes: This commit requires commits aeaf8dbd097 and 7484bc894b9 which did not -# land in branch. -f67dea5e19ef14187be0e8d0f61b1f764c7ccb4f radv: Fix multiview depth clears +# The commit these fix was reverted from 19.0, but fixed for 19.1 due +# to the number of fixes required to make that commit work +8d8f80af3a17354508f2ec9d6559c915d5be351d +0c0c69729b6d72a5297122856c8fe48510e90764 +0881e90c09965818b02e359474a6f7446b41d647 +b031c643491a92a5574c7a4bd659df33f2d89bb6 -# stable The commits aren't suitable in their present form. -bfe31c5e461a1330d6f606bf5310685eff1198dd nir/builder: Add nir_i2i and nir_u2u helpers which take a bit size -abfe674c54bee6f8fdcae411b07db89c10b9d530 spirv: Handle arbitrary bit sizes for deref array indices +# These were manually rebased by Jason, thanks! +8ab95b849e66f3221d80a67eef2ec6e3730901a8 +5c30fffeec1732c21d600c036f95f8cdb1bb5487 -# warn The commits refer stale sha, yet don't fix anything in particular. -98984b7cdd79c15cc7331c791f8be61e873b8bbd Revert "mapi/new: sort by slot number" -9f86f1da7c68b5b900cd6f60925610ff1225a72d egl: add glvnd entrypoints for EGL_MESA_query_driver +# This doesn't actually appliy to 19.0 +29179f58c6ba8099859ea25900214dbbd3814a92 \ No newline at end of file diff -Nru mesa-18.3.3/bin/get-pick-list.sh mesa-19.0.1/bin/get-pick-list.sh --- mesa-18.3.3/bin/get-pick-list.sh 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/bin/get-pick-list.sh 2019-03-31 23:16:37.000000000 +0000 @@ -13,12 +13,12 @@ is_stable_nomination() { - git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable" + git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-stable" } is_typod_nomination() { - git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev" + git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-dev" } fixes= diff -Nru mesa-18.3.3/bin/install_megadrivers.py mesa-19.0.1/bin/install_megadrivers.py --- mesa-18.3.3/bin/install_megadrivers.py 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/bin/install_megadrivers.py 2019-03-31 23:16:37.000000000 +0000 @@ -35,7 +35,11 @@ args = parser.parse_args() if os.path.isabs(args.libdir): - to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:]) + destdir = os.environ.get('DESTDIR') + if destdir: + to = os.path.join(destdir, args.libdir[1:]) + else: + to = args.libdir else: to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], args.libdir) diff -Nru mesa-18.3.3/bin/meson-cmd-extract.py mesa-19.0.1/bin/meson-cmd-extract.py --- mesa-18.3.3/bin/meson-cmd-extract.py 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/bin/meson-cmd-extract.py 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# Copyright © 2019 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +"""This script reads a meson build directory and gives back the command line it +was configured with. + +This only works for meson 0.49.0 and newer. +""" + +import argparse +import ast +import configparser +import pathlib +import sys + + +def parse_args() -> argparse.Namespace: + """Parse arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument( + 'build_dir', + help='Path the meson build directory') + args = parser.parse_args() + return args + + +def load_config(path: pathlib.Path) -> configparser.ConfigParser: + """Load config file.""" + conf = configparser.ConfigParser() + with path.open() as f: + conf.read_file(f) + return conf + + +def build_cmd(conf: configparser.ConfigParser) -> str: + """Rebuild the command line.""" + args = [] + for k, v in conf['options'].items(): + if ' ' in v: + args.append(f'-D{k}="{v}"') + else: + args.append(f'-D{k}={v}') + + cf = conf['properties'].get('cross_file') + if cf: + args.append('--cross-file={}'.format(cf)) + nf = conf['properties'].get('native_file') + if nf: + # this will be in the form "['str', 'str']", so use ast.literal_eval to + # convert it to a list of strings. + nf = ast.literal_eval(nf) + args.extend(['--native-file={}'.format(f) for f in nf]) + return ' '.join(args) + + +def main(): + args = parse_args() + path = pathlib.Path(args.build_dir, 'meson-private', 'cmd_line.txt') + if not path.exists(): + print('Cannot find the necessary file to rebuild command line. ' + 'Is your meson version >= 0.49.0?', file=sys.stderr) + sys.exit(1) + + conf = load_config(path) + cmd = build_cmd(conf) + print(cmd) + + +if __name__ == '__main__': + main() diff -Nru mesa-18.3.3/configure.ac mesa-19.0.1/configure.ac --- mesa-18.3.3/configure.ac 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/configure.ac 2019-03-31 23:16:37.000000000 +0000 @@ -52,6 +52,19 @@ ;; esac +AC_ARG_ENABLE(autotools, + [AS_HELP_STRING([--enable-autotools], + [Enable the use of this autotools based build configuration])], + [enable_autotools=$enableval], [enable_autotools=no]) + +if test "x$enable_autotools" != "xyes" ; then + AC_MSG_ERROR([the autotools build system has been deprecated in favour of + meson and will be removed eventually. For instructions on how to use meson + see https://www.mesa3d.org/meson.html. + If you still want to use the autotools build, then add --enable-autotools + to the configure command line.]) +fi + # Support silent build rules, requires at least automake-1.11. Disable # by either passing --disable-silent-rules to configure or passing V=1 # to make @@ -74,7 +87,7 @@ # in the first entry. LIBDRM_REQUIRED=2.4.75 LIBDRM_RADEON_REQUIRED=2.4.71 -LIBDRM_AMDGPU_REQUIRED=2.4.95 +LIBDRM_AMDGPU_REQUIRED=2.4.97 LIBDRM_INTEL_REQUIRED=2.4.75 LIBDRM_NVVIEUX_REQUIRED=2.4.66 LIBDRM_NOUVEAU_REQUIRED=2.4.66 @@ -107,9 +120,9 @@ LLVM_REQUIRED_GALLIUM=3.3.0 LLVM_REQUIRED_OPENCL=3.9.0 LLVM_REQUIRED_R600=3.9.0 -LLVM_REQUIRED_RADEONSI=6.0.0 -LLVM_REQUIRED_RADV=6.0.0 -LLVM_REQUIRED_SWR=6.0.0 +LLVM_REQUIRED_RADEONSI=7.0.0 +LLVM_REQUIRED_RADV=7.0.0 +LLVM_REQUIRED_SWR=7.0.0 dnl Check for progs AC_PROG_CPP @@ -1395,7 +1408,7 @@ AC_ARG_WITH([gallium-drivers], [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@], [comma delimited Gallium drivers list, e.g. - "i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,tegra,v3d,vc4,virgl,etnaviv,imx" + "i915,nouveau,r300,r600,radeonsi,freedreno,kmsro,svga,swrast,swr,tegra,v3d,vc4,virgl,etnaviv" @<:@default=r300,r600,svga,swrast@:>@])], [with_gallium_drivers="$withval"], [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"]) @@ -1909,7 +1922,7 @@ dri3_modifier_modules="xcb-dri3 >= $XCBDRI3_MODIFIERS_REQUIRED xcb-present >= $XCBPRESENT_MODIFIERS_REQUIRED" PKG_CHECK_MODULES([XCB_DRI3_MODIFIERS], [$dri3_modifier_modules], [have_dri3_modifiers=yes], [have_dri3_modifiers=no]) - if test "x$have_dri3_modifiers" == xyes; then + if test "x$have_dri3_modifiers" = xyes; then DEFINES="$DEFINES -DHAVE_DRI3_MODIFIERS" fi fi @@ -2728,9 +2741,6 @@ PKG_CHECK_MODULES([ETNAVIV], [libdrm >= $LIBDRM_ETNAVIV_REQUIRED libdrm_etnaviv >= $LIBDRM_ETNAVIV_REQUIRED]) require_libdrm "etnaviv" ;; - ximx) - HAVE_GALLIUM_IMX=yes - ;; xtegra) HAVE_GALLIUM_TEGRA=yes require_libdrm "tegra" @@ -2817,8 +2827,8 @@ DEFINES="$DEFINES -DUSE_V3D_SIMULATOR"], [USE_V3D_SIMULATOR=no]) ;; - xpl111) - HAVE_GALLIUM_PL111=yes + xkmsro) + HAVE_GALLIUM_KMSRO=yes ;; xvirgl) HAVE_GALLIUM_VIRGL=yes @@ -2835,8 +2845,8 @@ fi # XXX: Keep in sync with LLVM_REQUIRED_SWR -AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x6.0.0 -a \ - "x$LLVM_VERSION" != x6.0.1) +AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x7.0.0 -a \ + "x$LLVM_VERSION" != x7.0.1) if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium" @@ -2851,12 +2861,8 @@ dnl We need to validate some needed dependencies for renderonly drivers. -if test "x$HAVE_GALLIUM_ETNAVIV" != xyes -a "x$HAVE_GALLIUM_IMX" = xyes ; then - AC_MSG_ERROR([Building with imx requires etnaviv]) -fi - -if test "x$HAVE_GALLIUM_VC4" != xyes -a "x$HAVE_GALLIUM_PL111" = xyes ; then - AC_MSG_ERROR([Building with pl111 requires vc4]) +if test "x$HAVE_GALLIUM_VC4" != xyes -a "x$HAVE_GALLIUM_KMSRO" = xyes ; then + AC_MSG_ERROR([Building with kmsro requires vc4]) fi if test "x$HAVE_GALLIUM_NOUVEAU" != xyes -a "x$HAVE_GALLIUM_TEGRA" = xyes; then @@ -2904,6 +2910,7 @@ LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags` LLVM_CFLAGS=$LLVM_CPPFLAGS # CPPFLAGS seem to be sufficient LLVM_CXXFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cxxflags"` + LLVM_CXXFLAGS="$CXX11_CXXFLAGS $LLVM_CXXFLAGS" dnl Set LLVM_LIBS - This is done after the driver configuration so dnl that drivers can add additional components to LLVM_COMPONENTS. @@ -2938,11 +2945,11 @@ fi dnl The gallium-xlib GLX and gallium OSMesa targets directly embed the - dnl swr/llvmpipe driver into the final binary. Adding LLVM_LIBS results in + dnl swr/llvmpipe driver into the final binary. Adding LLVM_LIBS results in dnl the LLVM library propagated in the Libs.private of the respective .pc dnl file which ensures complete dependency information when statically dnl linking. - if test "x$enable_glx" == xgallium-xlib; then + if test "x$enable_glx" = xgallium-xlib; then GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $LLVM_LIBS" fi if test "x$enable_gallium_osmesa" = xyes; then @@ -2952,14 +2959,13 @@ AM_CONDITIONAL(HAVE_GALLIUM_SVGA, test "x$HAVE_GALLIUM_SVGA" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_I915, test "x$HAVE_GALLIUM_I915" = xyes) -AM_CONDITIONAL(HAVE_GALLIUM_PL111, test "x$HAVE_GALLIUM_PL111" = xyes) +AM_CONDITIONAL(HAVE_GALLIUM_KMSRO, test "x$HAVE_GALLIUM_KMSRO" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_ETNAVIV, test "x$HAVE_GALLIUM_ETNAVIV" = xyes) -AM_CONDITIONAL(HAVE_GALLIUM_IMX, test "x$HAVE_GALLIUM_IMX" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_TEGRA, test "x$HAVE_GALLIUM_TEGRA" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes) @@ -2998,6 +3004,7 @@ AM_CONDITIONAL(HAVE_BROADCOM_DRIVERS, test "x$HAVE_GALLIUM_VC4" = xyes -o \ "x$HAVE_GALLIUM_V3D" = xyes) +AM_CONDITIONAL(HAVE_FREEDRENO_DRIVERS, test "x$HAVE_GALLIUM_FREEDRENO" = xyes) AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \ "x$HAVE_I965_DRI" = xyes) @@ -3044,7 +3051,7 @@ AC_SUBST([XVMC_MINOR], 0) AC_SUBST([XA_MAJOR], 2) -AC_SUBST([XA_MINOR], 4) +AC_SUBST([XA_MINOR], 5) AC_SUBST([XA_PATCH], 0) AC_SUBST([XA_VERSION], "$XA_MAJOR.$XA_MINOR.$XA_PATCH") @@ -3090,6 +3097,7 @@ src/amd/vulkan/Makefile src/broadcom/Makefile src/compiler/Makefile + src/freedreno/Makefile src/egl/Makefile src/egl/main/egl.pc src/egl/wayland/wayland-drm/Makefile @@ -3100,7 +3108,7 @@ src/gallium/drivers/i915/Makefile src/gallium/drivers/llvmpipe/Makefile src/gallium/drivers/nouveau/Makefile - src/gallium/drivers/pl111/Makefile + src/gallium/drivers/kmsro/Makefile src/gallium/drivers/r300/Makefile src/gallium/drivers/r600/Makefile src/gallium/drivers/radeonsi/Makefile @@ -3109,7 +3117,6 @@ src/gallium/drivers/swr/Makefile src/gallium/drivers/tegra/Makefile src/gallium/drivers/etnaviv/Makefile - src/gallium/drivers/imx/Makefile src/gallium/drivers/v3d/Makefile src/gallium/drivers/vc4/Makefile src/gallium/drivers/virgl/Makefile @@ -3144,11 +3151,10 @@ src/gallium/tests/trivial/Makefile src/gallium/tests/unit/Makefile src/gallium/winsys/etnaviv/drm/Makefile - src/gallium/winsys/imx/drm/Makefile src/gallium/winsys/freedreno/drm/Makefile src/gallium/winsys/i915/drm/Makefile src/gallium/winsys/nouveau/drm/Makefile - src/gallium/winsys/pl111/drm/Makefile + src/gallium/winsys/kmsro/drm/Makefile src/gallium/winsys/radeon/drm/Makefile src/gallium/winsys/amdgpu/drm/Makefile src/gallium/winsys/svga/drm/Makefile diff -Nru mesa-18.3.3/debian/changelog mesa-19.0.1/debian/changelog --- mesa-18.3.3/debian/changelog 2019-02-01 12:03:52.000000000 +0000 +++ mesa-19.0.1/debian/changelog 2019-03-31 23:20:01.000000000 +0000 @@ -1,3 +1,10 @@ +mesa (19.0.1-0~c~padoka0) cosmic; urgency=high + + * backport from stable branch 19.0.1 + * compiled with llvm 8.x stable + + -- Paulo Dias Mon, 01 Apr 2019 00:20:01 +0100 + mesa (18.3.3-1~c~padoka0) cosmic; urgency=high * backport from stable branch 18.3.3 diff -Nru mesa-18.3.3/debian/control mesa-19.0.1/debian/control --- mesa-18.3.3/debian/control 2019-01-13 21:24:53.000000000 +0000 +++ mesa-19.0.1/debian/control 2019-03-31 23:17:04.000000000 +0000 @@ -39,11 +39,11 @@ python-mako, flex, bison, - llvm-7-dev (>= 1:7~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], + llvm-8-dev (>= 1:8~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], libelf-dev [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], libwayland-dev (>= 1.15.0) [linux-any], libwayland-egl-backend-dev (>= 1.15.0) [linux-any], - libclang-7-dev (>= 1:7~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], + libclang-8-dev (>= 1:8~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], libclc-dev (>= 0.2.0+git20180917-1~) [amd64 arm64 armel armhf i386 kfreebsd-amd64 kfreebsd-i386 mips mips64el mipsel powerpc ppc64 sparc64 ppc64el], wayland-protocols (>= 1.9), zlib1g-dev, diff -Nru mesa-18.3.3/debian/patches/version mesa-19.0.1/debian/patches/version --- mesa-18.3.3/debian/patches/version 2019-02-01 12:03:52.000000000 +0000 +++ mesa-19.0.1/debian/patches/version 2019-03-31 23:19:25.000000000 +0000 @@ -1,5 +1,5 @@ --- a/VERSION +++ b/VERSION @@ -1 +1 @@ --18.3.3 -+18.3.3 - padoka PPA +-19.0.1 ++19.0.1 - padoka PPA diff -Nru mesa-18.3.3/debian/rules mesa-19.0.1/debian/rules --- mesa-18.3.3/debian/rules 2019-01-13 21:25:16.000000000 +0000 +++ mesa-19.0.1/debian/rules 2019-03-31 23:17:49.000000000 +0000 @@ -99,7 +99,7 @@ GALLIUM_DRIVERS += radeonsi confflags_GALLIUM += --enable-llvm confflags_GALLIUM += --enable-opencl --enable-opencl-icd - confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-7 + confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-8 GALLIUM_DRIVERS += swrast # nine makes sense only on archs that build wine @@ -148,6 +148,7 @@ --enable-shared-glapi \ --disable-xvmc \ --disable-omx-bellagio \ + --enable-autotools \ $(confflags_DIRECT_RENDERING) \ $(confflags_GBM) \ $(confflags_DRI3) \ diff -Nru mesa-18.3.3/docs/autoconf.html mesa-19.0.1/docs/autoconf.html --- mesa-18.3.3/docs/autoconf.html 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/docs/autoconf.html 2019-03-31 23:16:37.000000000 +0000 @@ -26,6 +26,12 @@ +

ATTENTION:

+

+ The autotools build is being replaced by the meson + build system. If you haven't yet now is a good time to try using meson and + report any issues you run into. +

1. Basic Usage

diff -Nru mesa-18.3.3/docs/features.txt mesa-19.0.1/docs/features.txt --- mesa-18.3.3/docs/features.txt 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/docs/features.txt 2019-03-31 23:16:37.000000000 +0000 @@ -319,7 +319,7 @@ GL_EXT_memory_object DONE (radeonsi) GL_EXT_memory_object_fd DONE (radeonsi) GL_EXT_memory_object_win32 not started - GL_EXT_render_snorm DONE (i965) + GL_EXT_render_snorm DONE (i965, radeonsi) GL_EXT_semaphore DONE (radeonsi) GL_EXT_semaphore_fd DONE (radeonsi) GL_EXT_semaphore_win32 not started @@ -338,7 +338,7 @@ GL_OES_texture_float_linear DONE (freedreno, i965, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe) GL_OES_texture_half_float DONE (freedreno, i965, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe) GL_OES_texture_half_float_linear DONE (freedreno, i965, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe) - GL_OES_texture_view DONE (i965/gen8+) + GL_OES_texture_view DONE (freedreno, i965/gen8+, r600, radeonsi, nv50, nvc0, softpipe, llvmpipe, swr) GL_OES_viewport_array DONE (i965, nvc0, radeonsi) GLX_ARB_context_flush_control not started GLX_ARB_robustness_application_isolation not started diff -Nru mesa-18.3.3/docs/index.html mesa-19.0.1/docs/index.html --- mesa-18.3.3/docs/index.html 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/docs/index.html 2019-03-31 23:16:37.000000000 +0000 @@ -15,6 +15,53 @@

News

+

January 17, 2019

+

+Mesa 18.3.2 is released. +This is a bug-fix release. +

+ +

December 27, 2018

+

+Mesa 18.2.8 is released. +This is a bug-fix release. +
+NOTE: It is anticipated that 18.2.8 will be the final release in the +18.2 series. Users of 18.2 are encouraged to migrate to the 18.3 +series in order to obtain future fixes. +

+ +

December 13, 2018

+

+Mesa 18.2.7 is released. +This is a bug-fix release. +

+ +

December 11, 2018

+

+Mesa 18.3.1 is released. +This is a bug-fix release. +

+ +

December 7, 2018

+

+Mesa 18.3.0 is released. This is a +new development release. See the release notes for more information +about the release. +

+ +

November 28, 2018

+

+Mesa 18.2.6 is released. +This is a bug-fix release. +

+ +

November 15, 2018

+

+Mesa 18.2.5 is released. +This is a bug-fix release. +

+

October 31, 2018

Mesa 18.2.4 is released. diff -Nru mesa-18.3.3/docs/install.html mesa-19.0.1/docs/install.html --- mesa-18.3.3/docs/install.html 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/docs/install.html 2019-03-31 23:16:37.000000000 +0000 @@ -22,6 +22,7 @@

  • General prerequisites
  • For DRI and hardware acceleration +
  • Building with meson
  • Building with autoconf (Linux/Unix/X11)
  • Building with SCons (Windows/Linux)
  • Building with AOSP (Android) @@ -39,9 +40,10 @@

      -
    • Autoconf is required when building on *nix platforms. +
    • meson is recommended when building on *nix platforms. +
    • Autoconf is another option when building on *nix platforms.
    • SCons is required for building on -Windows and optional for Linux (it's an alternative to autoconf/automake.) +Windows and optional for Linux (it's an alternative to autoconf/automake or meson.)
    • Android Build system when building as native Android component. Autoconf is used when when building ARC. @@ -72,7 +74,9 @@
      • Python - Python is required. -Version 2.7 or later should work. +When building with scons 2.7 is required. +When building with meson 3.5 or newer is required. +When building with autotools 2.7, or 3.5 or later are required.
      • Python Mako module - Python Mako module is required. Version 0.8.0 or later should work. @@ -111,11 +115,31 @@ ... # others +

        2. Building with meson

        -

        2. Building with autoconf (Linux/Unix/X11)

        +

        +Meson is the latest build system in mesa, it is currently able to build for +*nix systems like Linux and BSD, and will be able to build for windows as well. +

        + +

        +The general approach is: +

        +
        +  meson builddir/
        +  ninja -C builddir/
        +  sudo ninja -C builddir/ install
        +
        +

        +Please read the detailed meson instructions +for more information +

        + +

        3. Building with autoconf (Linux/Unix/X11)

        -The primary method to build Mesa on Unix systems is with autoconf. +Although meson is recommended, another supported way to build on *nix systems +is with autoconf.

        @@ -133,7 +157,7 @@ -

        3. Building with SCons (Windows/Linux)

        +

        4. Building with SCons (Windows/Linux)

        To build Mesa with SCons on Linux or Windows do @@ -169,7 +193,7 @@ -

        4. Building with AOSP (Android)

        +

        5. Building with AOSP (Android)

        Currently one can build Mesa for Android as part of the AOSP project, yet @@ -188,7 +212,7 @@

        -

        5. Library Information

        +

        6. Library Information

        When compilation has finished, look in the top-level lib/ @@ -226,7 +250,7 @@

        -

        6. Building OpenGL programs with pkg-config

        +

        7. Building OpenGL programs with pkg-config

        Running make install will install package configuration files diff -Nru mesa-18.3.3/docs/mesa.css mesa-19.0.1/docs/mesa.css --- mesa-18.3.3/docs/mesa.css 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/docs/mesa.css 2019-03-31 23:16:37.000000000 +0000 @@ -29,6 +29,9 @@ /*font-family: monospace;*/ font-size: 10pt; /*color: black;*/ + background-color: #eee; + margin-left: 2em; + padding: .5em; } iframe { diff -Nru mesa-18.3.3/docs/meson.html mesa-19.0.1/docs/meson.html --- mesa-18.3.3/docs/meson.html 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/docs/meson.html 2019-03-31 23:16:37.000000000 +0000 @@ -16,6 +16,11 @@

        Compilation and Installation using Meson

        + +

        1. Basic Usage

        The Meson build system is generally considered stable and ready @@ -48,9 +53,13 @@ along with a build directory to view the selected options for. This will show your meson global arguments and project arguments, along with their defaults and your local settings. +

        +

        Meson does not currently support listing options before configure a build directory, but this feature is being discussed upstream. +For now, the only way to see what options exist is to look at the +meson_options.txt file at the root of the project.

        @@ -105,14 +114,14 @@
         
        Environment Variables

        Meson supports the standard CC and CXX environment variables for -changing the default compiler, and CFLAGS, CXXFLAGS, and LDFLAGS for setting -options to the compiler and linker during the initial configuration. +changing the default compiler. Meson does support CFLAGS, CXXFLAGS, etc. But +their use is discouraged because of the many caveats in using them. Instead it +is recomended to use -D${lang}_args and +-D${lang}_link_args instead. Among the benefits of these options +is that they are guaranteed to persist across rebuilds and reconfigurations. -These arguments are consumed and stored by meson when it is initialized. To -change these flags after the build is initialized (or when doing a first -initialization), consider using -D${lang}_args and --D${lang}_link_args instead. Meson will never change compiler in a -configured build directory. +Meson does not allow changing compiler in a configured builddir, you will need +to create a new build dir for a different compiler.

        @@ -135,11 +144,56 @@
         
         
        LLVM

        Meson includes upstream logic to wrap llvm-config using its standard -dependency interface. It will search $PATH (or %PATH% on windows) for -llvm-config (and llvm-config$version and llvm-config-$version), so using an -LLVM from a non-standard path is as easy as -PATH=/path/with/llvm-config:$PATH meson build. +dependency interface. +

        + +

        +As of meson 0.49.0 meson also has the concept of a +"native file", +these files provide information about the native build environment (as opposed +to a cross build environment). They are ini formatted and can override where to +find llvm-config: + +custom-llvm.ini +

        +    [binaries]
        +    llvm-config = '/usr/local/bin/llvm/llvm-config'
        +
        + +Then configure meson: + +
        +    meson builddir/ --native-file custom-llvm.ini
        +

        + +

        +For selecting llvm-config for cross compiling a +"cross file" +should be used. It uses the same format as the native file above: + +cross-llvm.ini +

        +    [binaries]
        +    ...
        +    llvm-config = '/usr/lib/llvm-config-32'
        +
        + +Then configure meson: + +
        +    meson builddir/ --cross-file cross-llvm.ini
        +
        + +See the Cross Compilation section for more information. +

        + +

        +For older versions of meson $PATH (or %PATH% on +windows) will be searched for llvm-config (and llvm-config$version and +llvm-config-$version), you can override this environment variable to control +the search: PATH=/path/with/llvm-config:$PATH meson build. +

        @@ -190,6 +244,93 @@
        +

        2. Cross-compilation and 32-bit builds

        + +

        Meson supports +cross-compilation by specifying a number of binary paths and +settings in a file and passing this file to meson or +meson configure with the --cross-file +parameter.

        + +

        This file can live at any location, but you can use the bare filename +(without the folder path) if you put it in $XDG_DATA_HOME/meson/cross or +~/.local/share/meson/cross

        + +

        Below are a few example of cross files, but keep in mind that you +will likely have to alter them for your system.

        + +

        +Those running on ArchLinux can use the AUR-maintained packages for some +of those, as they'll have the right values for your system: +

        +

        + +

        +32-bit build on x86 linux: +

        +[binaries]
        +c = '/usr/bin/gcc'
        +cpp = '/usr/bin/g++'
        +ar = '/usr/bin/gcc-ar'
        +strip = '/usr/bin/strip'
        +pkgconfig = '/usr/bin/pkg-config-32'
        +llvm-config = '/usr/bin/llvm-config32'
        +
        +[properties]
        +c_args = ['-m32']
        +c_link_args = ['-m32']
        +cpp_args = ['-m32']
        +cpp_link_args = ['-m32']
        +
        +[host_machine]
        +system = 'linux'
        +cpu_family = 'x86'
        +cpu = 'i686'
        +endian = 'little'
        +
        +

        + +

        +64-bit build on ARM linux: +

        +[binaries]
        +c = '/usr/bin/aarch64-linux-gnu-gcc'
        +cpp = '/usr/bin/aarch64-linux-gnu-g++'
        +ar = '/usr/bin/aarch64-linux-gnu-gcc-ar'
        +strip = '/usr/bin/aarch64-linux-gnu-strip'
        +pkgconfig = '/usr/bin/aarch64-linux-gnu-pkg-config'
        +exe_wrapper = '/usr/bin/qemu-aarch64-static'
        +
        +[host_machine]
        +system = 'linux'
        +cpu_family = 'aarch64'
        +cpu = 'aarch64'
        +endian = 'little'
        +
        +

        + +

        +64-bit build on x86 windows: +

        +[binaries]
        +c = '/usr/bin/x86_64-w64-mingw32-gcc'
        +cpp = '/usr/bin/x86_64-w64-mingw32-g++'
        +ar = '/usr/bin/x86_64-w64-mingw32-ar'
        +strip = '/usr/bin/x86_64-w64-mingw32-strip'
        +pkgconfig = '/usr/bin/x86_64-w64-mingw32-pkg-config'
        +exe_wrapper = 'wine'
        +
        +[host_machine]
        +system = 'windows'
        +cpu_family = 'x86_64'
        +cpu = 'i686'
        +endian = 'little'
        +
        +

        +
  • diff -Nru mesa-18.3.3/docs/release-calendar.html mesa-19.0.1/docs/release-calendar.html --- mesa-18.3.3/docs/release-calendar.html 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/docs/release-calendar.html 2019-03-31 23:16:37.000000000 +0000 @@ -23,6 +23,16 @@ The table below lists the date and release manager that is expected to do the specific release.
    +Regular updates will ensure that the schedule for the current and the +next two feature releases are shown in the table. +
    +In order to keep the whole releasing team up to date with the tools +used, best practices and other details, the member in charge of the +next feature release will be in constant rotation. +
    +The way the release schedule works is +explained here. +
    Take a look here if you'd like to nominate a patch in the next stable release.

    @@ -39,47 +49,129 @@ Notes -18.2 -2018-11-14 -18.2.5 -Juan A. Suarez - +18.3 +2019-01-30 +18.3.3 +Emil Velikov + -2018-11-28 -18.2.6 -Juan A. Suarez - +2019-02-13 +18.3.4 +Emil Velikov + -2018-12-12 -18.2.7 -Juan A. Suarez -Last planned 18.2.x release +2019-02-27 +18.3.5 +Emil Velikov + -18.3 -2018-10-31 -18.3.0-rc1 + +2019-03-13 +18.3.6 Emil Velikov - +Last planned 18.3.x release + + +19.0 +2019-01-29 +19.0.0-rc1 +Dylan Baker + + + +2019-02-05 +19.0.0-rc2 +Dylan Baker + + + +2019-02-12 +19.0.0-rc3 +Dylan Baker + + + +2019-02-19 +19.0.0-rc4 +Dylan Baker +Last planned RC/Final release + + +19.1 +2019-04-30 +19.1.0-rc1 +Andres Gomez + -2018-11-07 -18.3.0-rc2 +2019-05-07 +19.1.0-rc2 +Andres Gomez + + + +2019-05-14 +19.1.0-rc3 +Andres Gomez + + + +2019-05-21 +19.1.0-rc4 +Andres Gomez +Last planned RC/Final release + + +19.2 +2019-08-06 +19.2.0-rc1 Emil Velikov - + -2018-11-14 -18.3.0-rc3 +2019-08-13 +19.2.0-rc2 Emil Velikov - + -2018-11-21 -18.3.0-rc4 +2019-08-20 +19.2.0-rc3 Emil Velikov -Last planned RC/final release + + + +2019-08-27 +19.2.0-rc4 +Emil Velikov +Last planned RC/Final release + + +19.3 +2019-10-15 +19.3.0-rc1 +Juan A. Suarez + + + +2019-10-22 +19.3.0-rc2 +Juan A. Suarez + + + +2019-10-29 +19.3.0-rc3 +Juan A. Suarez + + + +2019-11-05 +19.3.0-rc4 +Juan A. Suarez +Last planned RC/Final release diff -Nru mesa-18.3.3/docs/releasing.html mesa-19.0.1/docs/releasing.html --- mesa-18.3.3/docs/releasing.html 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/docs/releasing.html 2019-03-31 23:16:37.000000000 +0000 @@ -56,9 +56,10 @@

    Releases should happen on Wednesdays. Delays can occur although those -should be keep to a minimum. +should be kept to a minimum.
    -See our calendar for the +See our calendar +for information about how the release schedule is planned, and the date and other details for individual releases.

    @@ -67,6 +68,9 @@
  • Available approximately every three months.
  • Initial timeplan available 2-4 weeks before the planned branchpoint (rc1) on the mesa-announce@ mailing list. +
  • Typically, the final release will happen after 4 +candidates. Additional ones may be needed in order to resolve blocking +regressions, though.
  • A pre-release announcement should be available approximately 24 hours before the final (non-rc) release. @@ -84,6 +88,12 @@
    The final release from the 12.0 series Mesa 12.0.5 will be out around the same time (or shortly after) 13.0.1 is out. +
    +This also involves that, as a final release may be delayed due to the +need of additional candidates to solve some blocking regression(s), +the release manager might have to update +the calendar with +additional bug fix releases of the current stable branch.

    @@ -112,18 +122,21 @@

    Done continuously up-to the pre-release announcement.

    -As an exception, patches can be applied up-to the last ~1h before the actual -release. This is made only with explicit permission/request, -and the patch must be very well contained. Thus it cannot -affect more than one driver/subsystem. +Developers can request, as an exception, patches to be applied up-to +the last one hour before the actual release. This is made only +with explicit permission/request, and the patch must be very +well contained. Thus it cannot affect more than one driver/subsystem.

    -

    -Currently Ilia Mirkin and AMD devs have requested "permanent" exception. -

    +

    Following developers have requested permanent exception

    +
      +
    • Ilia Mirkin +
    • AMD team +
    +

    The following must pass:

      -
    • make distcheck, scons and scons check must pass +
    • make distcheck, scons and scons check
    • Testing with different version of system components - LLVM and others is also performed where possible.
    • As a general rule, testing with various combinations of configure @@ -131,9 +144,9 @@

    -Achieved by combination of local ad-hoc scripts, mingw-w64 cross -compilation and AppVeyor plus Travis-CI, the latter as part of their -Github integration. +These are achieved by combination of local testing, +which includes mingw-w64 cross compilation and AppVeyor plus Travis-CI, the +latter two as part of their Github integration.

    @@ -225,7 +238,7 @@ Notes:

      -
    • People are encouraged to test the branch and report regressions.
    • +
    • People are encouraged to test the staging branch and report regressions.
    • The branch history is not stable and it will be rebased,
    @@ -445,7 +458,7 @@ relevant branch.

    -

    Perform basic testing

    +

    Perform basic testing

    Most of the testing should already be done during the diff -Nru mesa-18.3.3/docs/relnotes/18.2.5.html mesa-19.0.1/docs/relnotes/18.2.5.html --- mesa-18.3.3/docs/relnotes/18.2.5.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/docs/relnotes/18.2.5.html 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,172 @@ + + + + + Mesa Release Notes + + + + +

    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.2.5 Release Notes / November 15, 2018

    + +

    +Mesa 18.2.5 is a bug fix release which fixes bugs found since the 18.2.4 release. +

    +

    +Mesa 18.2.5 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +dddc28928b6f4083a0d5120b58c1c8e2dc189ab5c14299c08a386607fdbbdce7  mesa-18.2.5.tar.gz
    +b12c32872832e5353155e1e8026e1f1ab75bba9dc5b178d712045684d26c2b73  mesa-18.2.5.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + +
      + +
    • Bug 105731 - linker error "fragment shader input ... has no matching output in the previous stage" when previous stage's output declaration in a separate shader object
    • + +
    • Bug 107511 - KHR/khrplatform.h not always installed when needed
    • + +
    • Bug 107626 - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test "arb_texture_multisample-large-float-texture" with parameter --fp16
    • + +
    • Bug 108082 - warning: unknown warning option '-Wno-format-truncation' [-Wunknown-warning-option]
    • + +
    • Bug 108560 - Mesa 32 is built without sse
    • + +
    + + +

    Changes

    + +

    Andre Heider (1):

    +
      +
    • st/nine: fix stack corruption due to ABI mismatch
    • +
    + +

    Andrii Simiklit (1):

    +
      +
    • i965/batch: don't ignore the 'brw_new_batch' call for a 'new batch'
    • +
    + +

    Dylan Baker (2):

    +
      +
    • meson: link gallium nine with pthreads
    • +
    • meson: fix libatomic tests
    • +
    + +

    Emil Velikov (2):

    +
      +
    • egl/glvnd: correctly report errors when vendor cannot be found
    • +
    • m4: add Werror when checking for compiler flags
    • +
    + +

    Eric Engestrom (6):

    +
      +
    • svga: add missing meson build dependency
    • +
    • clover: add missing meson build dependency
    • +
    • wsi/wayland: use proper VkResult type
    • +
    • wsi/wayland: only finish() a successfully init()ed display
    • +
    • configure: install KHR/khrplatform.h when needed
    • +
    • meson: install KHR/khrplatform.h when needed
    • +
    + +

    Gert Wollny (1):

    +
      +
    • virgl/vtest-winsys: Use virgl version of bind flags
    • +
    + +

    Jonathan Gray (1):

    +
      +
    • intel/tools: include stdarg.h in error2aub
    • +
    + +

    Juan A. Suarez Romero (4):

    +
      +
    • docs: add sha256 checksums for 18.2.4
    • +
    • cherry-ignore: add explicit 18.3 only nominations
    • +
    • cherry-ignore: i965/batch: avoid reverting batch buffer if saved state is an empty
    • +
    • Update version to 18.2.5
    • +
    + +

    Lionel Landwerlin (1):

    +
      +
    • anv/android: mark gralloc allocated BOs as external
    • +
    + +

    Marek Olšák (3):

    +
      +
    • ac: fix ac_build_fdiv for f64
    • +
    • st/va: fix incorrect use of resource_destroy
    • +
    • include: update GL & GLES headers (v2)
    • +
    + +

    Matt Turner (2):

    +
      +
    • util/ralloc: Switch from DEBUG to NDEBUG
    • +
    • util/ralloc: Make sizeof(linear_header) a multiple of 8
    • +
    + +

    Olivier Fourdan (1):

    +
      +
    • wayland/egl: Resize EGL surface on update buffer for swrast
    • +
    + +

    Rhys Perry (1):

    +
      +
    • glsl_to_tgsi: don't create 64-bit integer MAD/FMA
    • +
    + +

    Samuel Pitoiset (2):

    +
      +
    • radv: disable conditional rendering for vkCmdCopyQueryPoolResults()
    • +
    • radv: only expose VK_SUBGROUP_FEATURE_ARITHMETIC_BIT for VI+
    • +
    + +

    Sergii Romantsov (1):

    +
      +
    • autotools: library-dependency when no sse and 32-bit
    • +
    + +

    Timothy Arceri (4):

    +
      +
    • st/mesa: calculate buffer size correctly for packed uniforms
    • +
    • st/glsl_to_nir: fix next_stage gathering
    • +
    • nir: add glsl_type_is_integer() helper
    • +
    • nir: don't pack varyings ints with floats unless flat
    • +
    + +

    Vadym Shovkoplias (1):

    +
      +
    • glsl/linker: Fix out variables linking during single stage
    • +
    + +

    Vinson Lee (1):

    +
      +
    • r600/sb: Fix constant logical operand in assert.
    • +
    + + +
    + + diff -Nru mesa-18.3.3/docs/relnotes/18.2.6.html mesa-19.0.1/docs/relnotes/18.2.6.html --- mesa-18.3.3/docs/relnotes/18.2.6.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/docs/relnotes/18.2.6.html 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,179 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.2.6 Release Notes / November 28, 2018

    + +

    +Mesa 18.2.6 is a bug fix release which fixes bugs found since the 18.2.5 release. +

    +

    +Mesa 18.2.6 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +e0ea1236dbc6c412b02e1b5d7f838072525971a6630246fa82ae4466a6d8a587  mesa-18.2.6.tar.gz
    +9ebafa4f8249df0c718e93b9ca155e3593a1239af303aa2a8b0f2056a7efdc12  mesa-18.2.6.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + +
      + +
    • Bug 107626 - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test "arb_texture_multisample-large-float-texture" with parameter --fp16
    • + +
    • Bug 107856 - i965 incorrectly calculates the number of layers for texture views (assert)
    • + +
    • Bug 108630 - [G965] piglit.spec.!opengl 1_2.tex3d-maxsize spins forever
    • + +
    • Bug 108713 - Gallium: use after free with transform feedback
    • + +
    • Bug 108829 - [meson] libglapi exports internal API
    • + +
    + + +

    Changes

    + +

    Andrii Simiklit (1):

    +
      +
    • i965/batch: avoid reverting batch buffer if saved state is an empty
    • +
    + +

    Bas Nieuwenhuizen (1):

    +
      +
    • radv: Fix opaque metadata descriptor last layer.
    • +
    + +

    Brian Paul (1):

    +
      +
    • scons/svga: remove opt from the list of valid build types
    • +
    + +

    Danylo Piliaiev (1):

    +
      +
    • i965: Fix calculation of layers array length for isl_view
    • +
    + +

    Dylan Baker (2):

    +
      +
    • meson: Don't set -Wall
    • +
    • meson: Don't force libva to required from auto
    • +
    + +

    Emil Velikov (13):

    +
      +
    • bin/get-pick-list.sh: simplify git oneline printing
    • +
    • bin/get-pick-list.sh: prefix output with "[stable] "
    • +
    • bin/get-pick-list.sh: handle "typod" usecase.
    • +
    • bin/get-pick-list.sh: handle the fixes tag
    • +
    • bin/get-pick-list.sh: tweak the commit sha matching pattern
    • +
    • bin/get-pick-list.sh: flesh out is_sha_nomination
    • +
    • bin/get-pick-list.sh: handle fixes tag with missing colon
    • +
    • bin/get-pick-list.sh: handle unofficial "broken by" tag
    • +
    • bin/get-pick-list.sh: use test instead of [ ]
    • +
    • bin/get-pick-list.sh: handle reverts prior to the branchpoint
    • +
    • travis: drop unneeded x11proto-xf86vidmode-dev
    • +
    • glx: make xf86vidmode mandatory for direct rendering
    • +
    • travis: adding missing x11-xcb for meson+vulkan
    • +
    + +

    Eric Anholt (1):

    +
      +
    • vc4: Make sure we make ro scanout resources for create_with_modifiers.
    • +
    + +

    Eric Engestrom (5):

    +
      +
    • meson: only run vulkan's meson.build when building vulkan
    • +
    • gbm: remove unnecessary meson include
    • +
    • meson: fix wayland-less builds
    • +
    • egl: add missing glvnd entrypoint for EGL_ANDROID_blob_cache
    • +
    • glapi: add missing visibility args
    • +
    + +

    Erik Faye-Lund (1):

    +
      +
    • mesa/main: remove bogus error for zero-sized images
    • +
    + +

    Gert Wollny (3):

    +
      +
    • mesa: Reference count shaders that are used by transform feedback objects
    • +
    • r600: clean up the GS ring buffers when the context is destroyed
    • +
    • glsl: free or reuse memory allocated for TF varying
    • +
    + +

    Jason Ekstrand (2):

    +
      +
    • nir/lower_alu_to_scalar: Don't try to lower unpack_32_2x16
    • +
    • anv: Put robust buffer access in the pipeline hash
    • +
    + +

    Juan A. Suarez Romero (6):

    +
      +
    • cherry-ignore: add explicit 18.3 only nominations
    • +
    • cherry-ignore: intel/aub_viewer: fix dynamic state printing
    • +
    • cherry-ignore: intel/aub_viewer: Print blend states properly
    • +
    • cherry-ignore: mesa/main: fix incorrect depth-error
    • +
    • docs: add sha256 checksums for 18.2.5
    • +
    • Update version to 18.2.6
    • +
    + +

    Karol Herbst (1):

    +
      +
    • nir/spirv: cast shift operand to u32
    • +
    + +

    Kenneth Graunke (1):

    +
      +
    • i965: Add PCI IDs for new Amberlake parts that are Coffeelake based
    • +
    + +

    Lionel Landwerlin (1):

    +
      +
    • egl/dri: fix error value with unknown drm format
    • +
    + +

    Marek Olšák (2):

    +
      +
    • winsys/amdgpu: fix a buffer leak in amdgpu_bo_from_handle
    • +
    • winsys/amdgpu: fix a device handle leak in amdgpu_winsys_create
    • +
    + +

    Rodrigo Vivi (4):

    +
      +
    • i965: Add a new CFL PCI ID.
    • +
    • intel: aubinator: Adding missed platforms to the error message.
    • +
    • intel: Introducing Amber Lake platform
    • +
    • intel: Introducing Whiskey Lake platform
    • +
    + + +
    + + diff -Nru mesa-18.3.3/docs/relnotes/18.2.7.html mesa-19.0.1/docs/relnotes/18.2.7.html --- mesa-18.3.3/docs/relnotes/18.2.7.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/docs/relnotes/18.2.7.html 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,167 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.2.7 Release Notes / December 13, 2018

    + +

    +Mesa 18.2.7 is a bug fix release which fixes bugs found since the 18.2.6 release. +

    +

    +Mesa 18.2.7 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +092351cfbcd430ec595fbd3a3d8d253fd62c29074e1740d7198b00289ab400f8  mesa-18.2.7.tar.gz
    +9c7b02560d89d77ca279cd21f36ea9a49e9ffc5611f6fe35099357d744d07ae6  mesa-18.2.7.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + +
      + +
    • Bug 106577 - broken rendering with nine and nouveau (GM107)
    • + +
    • Bug 108245 - RADV/Vega: Low mip levels of large BCn textures get corrupted by vkCmdCopyBufferToImage
    • + +
    • Bug 108311 - Query buffer object support is broken on r600.
    • + +
    • Bug 108894 - [anv] vkCmdCopyBuffer() and vkCmdCopyQueryPoolResults() write-after-write hazard
    • + +
    • Bug 108909 - Vkd3d test failure test_resolve_non_issued_query_data()
    • + +
    • Bug 108914 - blocky shadow artifacts in The Forest with DXVK, RADV_DEBUG=nohiz fixes this
    • + +
    • Bug 108925 - vkCmdCopyQueryPoolResults(VK_QUERY_RESULT_WAIT_BIT) for timestamps with large query count hangs
    • + +
    + + +

    Changes

    + +

    Alex Smith (1):

    +
      +
    • radv: Flush before vkCmdWriteTimestamp() if needed
    • +
    + +

    Bas Nieuwenhuizen (4):

    +
      +
    • radv: Align large buffers to the fragment size.
    • +
    • radv: Clamp gfx9 image view extents to the allocated image extents.
    • +
    • radv/android: Mark android WSI image as shareable.
    • +
    • radv/android: Use buffer metadata to determine scanout compat.
    • +
    + +

    Dave Airlie (2):

    +
      +
    • r600: make suballocator 256-bytes align
    • +
    • radv: use 3d shader for gfx9 copies if dst is 3d
    • +
    + +

    Emil Velikov (2):

    +
      +
    • egl/wayland: bail out when drmGetMagic fails
    • +
    • egl/wayland: plug memory leak in drm_handle_device()
    • +
    + +

    Eric Anholt (3):

    +
      +
    • v3d: Fix a leak of the transfer helper on screen destroy.
    • +
    • vc4: Fix a leak of the transfer helper on screen destroy.
    • +
    • v3d: Fix a leak of the disassembled instruction string during debug dumps.
    • +
    + +

    Eric Engestrom (3):

    +
      +
    • anv: correctly use vulkan 1.0 by default
    • +
    • wsi/display: fix mem leak when freeing swapchains
    • +
    • vulkan/wsi: fix s/,/;/ typo
    • +
    + +

    Gurchetan Singh (3):

    +
      +
    • virgl: quadruple command buffer size
    • +
    • virgl: avoid large inline transfers
    • +
    • virgl: don't mark buffers as unclean after a write
    • +
    + +

    Juan A. Suarez Romero (4):

    +
      +
    • docs: add sha256 checksums for 18.2.6
    • +
    • cherry-ignore: freedreno: Fix autotools build.
    • +
    • cherry-ignore: mesa: Revert INTEL_fragment_shader_ordering support
    • +
    • Update version to 18.2.7
    • +
    + +

    Karol Herbst (1):

    +
      +
    • nv50,nvc0: Fix gallium nine regression regarding sampler bindings
    • +
    + +

    Lionel Landwerlin (2):

    +
      +
    • anv: flush pipeline before query result copies
    • +
    • anv/query: flush render target before copying results
    • +
    + +

    Michal Srb (2):

    +
      +
    • gallium: Constify drisw_loader_funcs struct
    • +
    • drisw: Use separate drisw_loader_funcs for shm
    • +
    + +

    Nicolai Hähnle (2):

    +
      +
    • egl/wayland: rather obvious build fix
    • +
    • meson: link LLVM 'native' component when LLVM is available
    • +
    + +

    Samuel Pitoiset (1):

    +
      +
    • radv: rework the TC-compat HTILE hardware bug with COND_EXEC
    • +
    + +

    Thomas Hellstrom (2):

    +
      +
    • st/xa: Fix a memory leak
    • +
    • winsys/svga: Fix a memory leak
    • +
    + +

    Tobias Klausmann (1):

    +
      +
    • amd/vulkan: meson build - use radv_deps for libvulkan_radeon
    • +
    + +

    Vinson Lee (1):

    +
      +
    • st/xvmc: Add X11 include path.
    • +
    + + +
    + + diff -Nru mesa-18.3.3/docs/relnotes/18.2.8.html mesa-19.0.1/docs/relnotes/18.2.8.html --- mesa-18.3.3/docs/relnotes/18.2.8.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/docs/relnotes/18.2.8.html 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,183 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.2.8 Release Notes / December 27, 2018

    + +

    +Mesa 18.2.8 is a bug fix release which fixes bugs found since the 18.2.7 release. +

    +

    +Mesa 18.2.8 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +77512edc0a84e19c7131a0e2e5ebf1beaf1494dc4b71508fcc92d06d65f9f4f5  mesa-18.2.8.tar.gz
    +1d2ed9fd435d86d95b7215b287258d3e6b1180293a36f688e5a2efc18298d863  mesa-18.2.8.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + +
      + +
    • Bug 108114 - [vulkancts] new VK_KHR_16bit_storage tests fail.
    • + +
    • Bug 108116 - [vulkancts] stencil partial clear tests fail.
    • + +
    • Bug 108910 - Vkd3d test failure test_multisample_array_texture()
    • + +
    • Bug 108911 - Vkd3d test failure test_clear_render_target_view()
    • + +
    • Bug 109081 - [bisected] [HSW] Regression in clipping.user_defined.clip_* vulkancts tests
    • + +
    + + +

    Changes

    + +

    Alex Deucher (3):

    +
      +
    • pci_ids: add new vega10 pci ids
    • +
    • pci_ids: add new vega20 pci id
    • +
    • pci_ids: add new VegaM pci id
    • +
    + +

    Axel Davy (3):

    +
      +
    • st/nine: Fix volumetexture dtor on ctor failure
    • +
    • st/nine: Bind src not dst in nine_context_box_upload
    • +
    • st/nine: Add src reference to nine_context_range_upload
    • +
    + +

    Caio Marcelo de Oliveira Filho (1):

    +
      +
    • nir: properly clear the entry sources in copy_prop_vars
    • +
    + +

    Dylan Baker (1):

    +
      +
    • meson: Fix ppc64 little endian detection
    • +
    + +

    Emil Velikov (9):

    +
      +
    • glx: mandate xf86vidmode only for "drm" dri platforms
    • +
    • bin/get-pick-list.sh: rework handing of sha nominations
    • +
    • bin/get-pick-list.sh: warn when commit lists invalid sha
    • +
    • meson: don't require glx/egl/gbm with gallium drivers
    • +
    • pipe-loader: meson: reference correct library
    • +
    • TODO: glx: meson: build dri based glx tests, only with -Dglx=dri
    • +
    • glx: meson: drop includes from a link-only library
    • +
    • glx: meson: wire up the dispatch-index-check test
    • +
    • glx/test: meson: assorted include fixes
    • +
    + +

    Eric Anholt (2):

    +
      +
    • v3d: Make sure that a thrsw doesn't split a multop from its umul24.
    • +
    • v3d: Add missing flagging of SYNCB as a TSY op.
    • +
    + +

    Erik Faye-Lund (2):

    +
      +
    • virgl: wrap vertex element state in a struct
    • +
    • virgl: work around bad assumptions in virglrenderer
    • +
    + +

    Iago Toral Quiroga (1):

    +
      +
    • intel/compiler: do not copy-propagate strided regions to ddx/ddy arguments
    • +
    + +

    Ian Romanick (2):

    +
      +
    • i965/vec4/dce: Don't narrow the write mask if the flags are used
    • +
    • Revert "nir/lower_indirect: Bail early if modes == 0"
    • +
    + +

    Jan Vesely (1):

    +
      +
    • clover: Fix build after clang r348827
    • +
    + +

    Jason Ekstrand (1):

    +
      +
    • nir/constant_folding: Fix source bit size logic
    • +
    + +

    Jon Turney (1):

    +
      +
    • glx: Fix compilation with GLX_USE_WINDOWSGL
    • +
    + +

    Juan A. Suarez Romero (7):

    +
      +
    • docs: add sha256 checksums for 18.2.7
    • +
    • cherry-ignore: add explicit 18.3 only nominations
    • +
    • cherry-ignore: meson: libfreedreno depends upon libdrm (for fence support)
    • +
    • cherry-ignore: radv: Fix multiview depth clears
    • +
    • cherry-ignore: nir: properly find the entry to keep in copy_prop_vars
    • +
    • cherry-ignore: intel/compiler: move nir_lower_bool_to_int32 before nir_lower_locals_to_regs
    • +
    • Update version to 18.2.8
    • +
    + +

    Kirill Burtsev (1):

    +
      +
    • loader: free error state, when checking the drawable type
    • +
    + +

    Lionel Landwerlin (1):

    +
      +
    • anv: don't do partial resolve on layer > 0
    • +
    + +

    Rhys Perry (2):

    +
      +
    • radv: don't set surf_index for stencil-only images
    • +
    • ac: split 16-bit ssbo loads that may not be dword aligned
    • +
    + +

    Rob Clark (1):

    +
      +
    • mesa/st/nir: fix missing nir_compact_varyings
    • +
    + +

    Samuel Pitoiset (1):

    +
      +
    • radv: switch on EOP when primitive restart is enabled with triangle strips
    • +
    + +

    Vinson Lee (2):

    +
      +
    • meson: Fix typo.
    • +
    • meson: Fix libsensors detection.
    • +
    + + +
    + + diff -Nru mesa-18.3.3/docs/relnotes/18.3.3.html mesa-19.0.1/docs/relnotes/18.3.3.html --- mesa-18.3.3/docs/relnotes/18.3.3.html 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/docs/relnotes/18.3.3.html 1970-01-01 00:00:00.000000000 +0000 @@ -1,207 +0,0 @@ - - - - - Mesa Release Notes - - - - -
    -

    The Mesa 3D Graphics Library

    -
    - - -
    - -

    Mesa 18.3.3 Release Notes / January 31, 2019

    - -

    -Mesa 18.3.3 is a bug fix release which fixes bugs found since the 18.3.2 release. -

    -

    -Mesa 18.3.3 implements the OpenGL 4.5 API, but the version reported by -glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / -glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. -Some drivers don't support all the features required in OpenGL 4.5. OpenGL -4.5 is only available if requested at context creation. -Compatibility contexts may report a lower version depending on each driver. -

    - - -

    SHA256 checksums

    -
    -TBD
    -
    - - -

    New features

    -

    None

    - - -

    Bug fixes

    - -
      - -
    • Bug 108877 - OpenGL CTS gl43 test cases were interrupted due to segment fault
    • - -
    • Bug 109023 - error: inlining failed in call to always_inline ‘__m512 _mm512_and_ps(__m512, __m512)’: target specific option mismatch
    • - -
    • Bug 109129 - format_types.h:1220: undefined reference to `_mm256_cvtps_ph'
    • - -
    • Bug 109229 - glLinkProgram locks up for ~30 seconds
    • - -
    • Bug 109242 - [RADV] The Witcher 3 system freeze
    • - -
    • Bug 109488 - Mesa 18.3.2 crash on a specific fragment shader (assert triggered) / already fixed on the master branch.
    • - -
    - - -

    Changes

    - -

    Andres Gomez (2):

    -
      -
    • bin/get-pick-list.sh: fix the oneline printing
    • -
    • bin/get-pick-list.sh: fix redirection in sh
    • -
    - -

    Axel Davy (1):

    -
      -
    • st/nine: Immediately upload user provided textures
    • -
    - -

    Bas Nieuwenhuizen (3):

    -
      -
    • radv: Only use 32 KiB per threadgroup on Stoney.
    • -
    • radv: Set partial_vs_wave for pipelines with just GS, not tess.
    • -
    • nir: Account for atomics in copy propagation.
    • -
    - -

    Bruce Cherniak (1):

    -
      -
    • gallium/swr: Fix multi-context sync fence deadlock.
    • -
    - -

    Carsten Haitzler (Rasterman) (2):

    -
      -
    • vc4: Use named parameters for the NEON inline asm.
    • -
    • vc4: Declare the cpu pointers as being modified in NEON asm.
    • -
    - -

    Danylo Piliaiev (1):

    -
      -
    • glsl: Fix copying function's out to temp if dereferenced by array
    • -
    - -

    Dave Airlie (3):

    -
      -
    • dri_interface: add put shm image2 (v2)
    • -
    • glx: add support for putimageshm2 path (v2)
    • -
    • gallium: use put image shm2 path (v2)
    • -
    - -

    Dylan Baker (4):

    -
      -
    • meson: allow building dri driver without window system if osmesa is classic
    • -
    • meson: fix swr KNL build
    • -
    • meson: Fix compiler checks for SWR with ICC
    • -
    • meson: Add warnings and errors when using ICC
    • -
    - -

    Emil Velikov (4):

    -
      -
    • docs: add sha256 checksums for 18.3.2
    • -
    • cherry-ignore: radv: Fix multiview depth clears
    • -
    • cherry-ignore: spirv: Handle arbitrary bit sizes for deref array indices
    • -
    • cherry-ignore: WARNING: Commit XXX lists invalid sha
    • -
    - -

    Eric Anholt (2):

    -
      -
    • vc4: Don't leak the GPU fd for renderonly usage.
    • -
    • vc4: Enable NEON asm on meson cross-builds.
    • -
    - -

    Eric Engestrom (2):

    -
      -
    • configure: EGL requirements only apply if EGL is built
    • -
    • meson/vdpau: add missing soversion
    • -
    - -

    Iago Toral Quiroga (1):

    -
      -
    • anv/device: fix maximum number of images supported
    • -
    - -

    Jason Ekstrand (3):

    -
      -
    • anv/nir: Rework arguments to apply_pipeline_layout
    • -
    • anv: Only parse pImmutableSamplers if the descriptor has samplers
    • -
    • nir/xfb: Fix offset accounting for dvec3/4
    • -
    - -

    Karol Herbst (2):

    -
      -
    • nv50/ir: disable tryCollapseChainedMULs in ConstantFolding for precise instructions
    • -
    • glsl/lower_output_reads: set invariant and precise flags on temporaries
    • -
    - -

    Lionel Landwerlin (1):

    -
      -
    • anv: fix invalid binding table index computation
    • -
    - -

    Marek Olšák (4):

    -
      -
    • radeonsi: also apply the GS hang workaround to draws without tessellation
    • -
    • radeonsi: fix a u_blitter crash after a shader with FBFETCH
    • -
    • radeonsi: fix rendering to tiny viewports where the viewport center is > 8K
    • -
    • st/mesa: purge framebuffers when unbinding a context
    • -
    - -

    Niklas Haas (1):

    -
      -
    • radv: correctly use vulkan 1.0 by default
    • -
    - -

    Pierre Moreau (1):

    -
      -
    • meson: Fix with_gallium_icd to with_opencl_icd
    • -
    - -

    Rob Clark (1):

    -
      -
    • loader: fix the no-modifiers case
    • -
    - -

    Samuel Pitoiset (1):

    -
      -
    • radv: clean up setting partial_es_wave for distributed tess on VI
    • -
    - -

    Timothy Arceri (5):

    -
      -
    • ac/nir_to_llvm: fix interpolateAt* for arrays
    • -
    • ac/nir_to_llvm: fix clamp shadow reference for more hardware
    • -
    • radv/ac: fix some fp16 handling
    • -
    • glsl: use remap location when serialising uniform program resource data
    • -
    • glsl: Copy function out to temp if we don't directly ref a variable
    • -
    - -

    Tomeu Vizoso (1):

    -
      -
    • etnaviv: Consolidate buffer references from framebuffers
    • -
    - -

    Vinson Lee (1):

    -
      -
    • meson: Fix typo.
    • -
    - - - -
    - - - diff -Nru mesa-18.3.3/docs/relnotes/19.0.0.html mesa-19.0.1/docs/relnotes/19.0.0.html --- mesa-18.3.3/docs/relnotes/19.0.0.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/docs/relnotes/19.0.0.html 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,2475 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 19.0.0 Release Notes / TBD

    + +

    +Mesa 19.0.0 is a new development release. People who are concerned +with stability and reliability should stick with a previous release or +wait for Mesa 19.0.1. +

    +

    +Mesa 19.0.0 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + +

    SHA256 checksums

    +
    +  4c5b9c5227d37c1f6bdc786a6fa7ee7fbce40b2e8a87340c7d3234534ece3304  mesa-19.0.0.tar.gz
    +  5a549dfb40ec31e5c36c47aadac04554cb2e2a8d144a046a378fc16da57e38f8  mesa-19.0.0.tar.xz
    +
    + + +

    New features

    + +
      +
    • GL_AMD_texture_texture4 on all GL 4.0 drivers.
    • +
    • GL_EXT_shader_implicit_conversions on all drivers (ES extension).
    • +
    • GL_EXT_texture_compression_bptc on all GL 4.0 drivers (ES extension).
    • +
    • GL_EXT_texture_compression_rgtc on all GL 3.0 drivers (ES extension).
    • +
    • GL_EXT_render_snorm on gallium drivers (ES extension).
    • +
    • GL_EXT_texture_view on drivers supporting texture views (ES extension).
    • +
    • GL_OES_texture_view on drivers supporting texture views (ES extension).
    • +
    • GL_NV_shader_atomic_float on nvc0 (Fermi/Kepler only).
    • +
    • Shader-based software implementations of GL_ARB_gpu_shader_fp64, GL_ARB_gpu_shader_int64, GL_ARB_vertex_attrib_64bit, and GL_ARB_shader_ballot on i965.
    • +
    • VK_ANDROID_external_memory_android_hardware_buffer on Intel
    • +
    • Fixed and re-exposed VK_EXT_pci_bus_info on Intel and RADV
    • +
    • VK_EXT_scalar_block_layout on Intel and RADV
    • +
    • VK_KHR_depth_stencil_resolve on Intel
    • +
    • VK_KHR_draw_indirect_count on Intel
    • +
    • VK_EXT_conditional_rendering on Intel
    • +
    • VK_EXT_memory_budget on RADV
    • +
    + +

    Bug fixes

    + +
      + +
    • Bug 32211 - [GLSL] lower_jumps with continue-statements in for-loops prevents loop unrolling
    • + +
    • Bug 102349 - nv4x crashing with plasmashell - gdb log included
    • + +
    • Bug 102597 - [Regression] mpv, high rendering times (two to three times higher)
    • + +
    • Bug 104297 - [i965] Downward causes GPU hangs and misrendering on Haswell
    • + +
    • Bug 104602 - [apitrace] Graphical artifacts in Civilization VI on RX Vega
    • + +
    • Bug 105301 - The big SKQP bug
    • + +
    • Bug 106577 - broken rendering with nine and nouveau (GM107)
    • + +
    • Bug 106595 - [RADV] Rendering distortions only when MSAA is enabled
    • + +
    • Bug 107052 - [Regression][bisected]. Crookz - The Big Heist Demo can't be launched despite the "true" flag in "drirc"
    • + +
    • Bug 107510 - [GEN8+] up to 10% perf drop on several 3D benchmarks
    • + +
    • Bug 107626 - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test "arb_texture_multisample-large-float-texture" with parameter --fp16
    • + +
    • Bug 107728 - Wrong background in Sascha Willem's Multisampling Demo
    • + +
    • Bug 107842 - "invariant" qualifier on outputs of GLSL ES fragment shader causes compilation error.
    • + +
    • Bug 107856 - i965 incorrectly calculates the number of layers for texture views (assert)
    • + +
    • Bug 108114 - [vulkancts] new VK_KHR_16bit_storage tests fail.
    • + +
    • Bug 108116 - [vulkancts] stencil partial clear tests fail.
    • + +
    • Bug 108245 - RADV/Vega: Low mip levels of large BCn textures get corrupted by vkCmdCopyBufferToImage
    • + +
    • Bug 108311 - Query buffer object support is broken on r600.
    • + +
    • Bug 108457 - [OpenGL CTS] KHR-GL46.tessellation_shader.single.xfb_captures_data_from_correct_stage fails
    • + +
    • Bug 108560 - Mesa 32 is built without sse
    • + +
    • Bug 108624 - [regression][bisected] "nir: Copy propagation between blocks" regression
    • + +
    • Bug 108630 - [G965] piglit.spec.!opengl 1_2.tex3d-maxsize spins forever
    • + +
    • Bug 108635 - Mesa master commit 68dc591af16ebb36814e4c187e4998948103c99c causes XWayland to segfault
    • + +
    • Bug 108636 - test_optpass has use after free bug, failing with memory testing tools like address sanitizer
    • + +
    • Bug 108713 - Gallium: use after free with transform feedback
    • + +
    • Bug 108734 - Regression: [bisected] dEQP-GLES31.functional.tessellation.invariance.* start failing on r600
    • + +
    • Bug 108805 - i965 regressions from EXT_texture_sRGB_R8
    • + +
    • Bug 108829 - [meson] libglapi exports internal API
    • + +
    • Bug 108868 - [BYT IVB] Tesselation test regressions
    • + +
    • Bug 108877 - OpenGL CTS gl43 test cases were interrupted due to segment fault
    • + +
    • Bug 108894 - [anv] vkCmdCopyBuffer() and vkCmdCopyQueryPoolResults() write-after-write hazard
    • + +
    • Bug 108909 - Vkd3d test failure test_resolve_non_issued_query_data()
    • + +
    • Bug 108910 - Vkd3d test failure test_multisample_array_texture()
    • + +
    • Bug 108911 - Vkd3d test failure test_clear_render_target_view()
    • + +
    • Bug 108914 - blocky shadow artifacts in The Forest with DXVK, RADV_DEBUG=nohiz fixes this
    • + +
    • Bug 108925 - vkCmdCopyQueryPoolResults(VK_QUERY_RESULT_WAIT_BIT) for timestamps with large query count hangs
    • + +
    • Bug 108936 - [ILK,G45,G965] Regressions from texture-format enums rework
    • + +
    • Bug 108943 - Build fails on ppc64le with meson
    • + +
    • Bug 108961 - make check test_replace_src_bitsize failure
    • + +
    • Bug 108974 - make check DispatchSanity_test regression
    • + +
    • Bug 108999 - Calculating the scissors fields when the y is flipped (0 on top) can generate negative numbers that will cause assertion failure later on.
    • + +
    • Bug 109023 - error: inlining failed in call to always_inline ‘__m512 _mm512_and_ps(__m512, __m512)’: target specific option mismatch
    • + +
    • Bug 109072 - GPU hang in blender 2.80
    • + +
    • Bug 109075 - radv: New D3D boolean optimizations cause GPU hang in Witcher 3
    • + +
    • Bug 109081 - [bisected] [HSW] Regression in clipping.user_defined.clip_* vulkancts tests
    • + +
    • Bug 109086 - Crash software mesa with gl_select render mode
    • + +
    • Bug 109107 - gallium/st/va: change va max_profiles when using Radeon VCN Hardware
    • + +
    • Bug 109129 - format_types.h:1220: undefined reference to `_mm256_cvtps_ph'
    • + +
    • Bug 109151 - [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.
    • + +
    • Bug 109190 - virgl: buffer flushing error with some dEQP tests [bisected]
    • + +
    • Bug 109202 - nv50_ir.cpp:749:19: error: cannot use typeid with -fno-rtti
    • + +
    • Bug 109204 - [regression, bisected] retroarch's crt-royale shader crash radv
    • + +
    • Bug 109229 - glLinkProgram locks up for ~30 seconds
    • + +
    • Bug 109231 - [nir] src/compiler/nir/nir_loop_analyze.c uninitialized variable
    • + +
    • Bug 109242 - [RADV] The Witcher 3 system freeze
    • + +
    • Bug 109304 - GfxBench AztecRuins Vulkan version Segfault
    • + +
    • Bug 109325 - mesa: Need ability to retrieve command line of Meson configuration
    • + +
    • Bug 109328 - [BSW BXT GLK] dEQP-VK.subgroups.arithmetic.subgroup regressions
    • + +
    • Bug 109353 - [regression][bisected] "nir: Switch to using 1-bit Booleans for almost everything" regression with shared bools
    • + +
    • Bug 109401 - [DXVK] Project Cars rendering problems
    • + +
    • Bug 109404 - [ANV] The Witcher 3 shadows flickering
    • + +
    • Bug 109442 - "make check" test anv_block_pool_no_free fails intermittently
    • + +
    • Bug 109443 - Build failure with MSVC when using Scons >= 3.0.2
    • + +
    • Bug 109449 - [snb] quakespasm triggers a segmentation fault.
    • + +
    • Bug 109451 - [IVB,SNB] LINE_STRIPs following a TRIANGLE_FAN fail to use primitive restart
    • + +
    • Bug 109543 - After upgrade mesa to 19.0.0~rc1 all vulkan based application stop working ["vulkan-cube" received SIGSEGV in radv_pipeline_init_blend_state at ../src/amd/vulkan/radv_pipeline.c:699]
    • + +
    • Bug 109561 - [regression, bisected] code re-factor causing games to stutter or lock-up system
    • + +
    • Bug 109573 - dEQP-VK.spirv_assembly.instruction.graphics.module.same_module
    • + +
    • Bug 109575 - Mesa-19.0.0-rc1 : Computer Crashes trying to run anything Vulkan
    • + +
    • Bug 109581 - [BISECTED] Nothing is Rendered on Sascha Willem's "subpasses" demo
    • + +
    • Bug 109594 - totem assert failure: totem: src/intel/genxml/gen9_pack.h:72: __gen_uint: La declaración `v <= max' no se cumple.
    • + +
    • Bug 109597 - wreckfest issues with transparent objects & skybox
    • + +
    • Bug 109601 - [Regression] RuneLite GPU rendering broken on 18.3.x
    • + +
    • Bug 109603 - nir_instr_as_deref: Assertion `parent && parent->type == nir_instr_type_deref' failed.
    • + +
    • Bug 109698 - dri.pc contents invalid when built with meson
    • + +
    • Bug 109717 - [regression] Cull distance tests asserting
    • + +
    • Bug 109735 - [Regression] broken font with mesa_vulkan_overlay
    • + +
    • Bug 109759 - [BISECTED][REGRESSION][IVB, HSW] Font rendering problem in OpenGL
    • + +
    + +

    Changes

    + +
      + +

      Adam Jackson (4):

      +
        +
      • glx: Demand success from CreateContext requests (v2)
      • +
      • specs: Remove GLES profile interaction text from GLX_MESA_query_renderer
      • +
      • specs: Remove GLX_RENDERER_ID_MESA from GLX_MESA_query_renderer
      • +
      • specs: Bump GLX_MESA_query_renderer to version 9
      • +
      + +

      Aditya Swarup (1):

      +
        +
      • i965: Lift restriction in external textures for EGLImage support
      • +
      + +

      Alejandro Piñeiro (3):

      +
        +
      • nir: remove unused variable
      • +
      • nir/xfb: don't assert when xfb_buffer/stride is present but not xfb_offset
      • +
      • nir/xfb: distinguish array of structs vs array of blocks
      • +
      + +

      Alex Deucher (3):

      +
        +
      • pci_ids: add new vega10 pci ids
      • +
      • pci_ids: add new vega20 pci id
      • +
      • pci_ids: add new VegaM pci id
      • +
      + +

      Alex Smith (1):

      +
        +
      • radv: Flush before vkCmdWriteTimestamp() if needed
      • +
      + +

      Alexander von Gluck IV (1):

      +
        +
      • egl/haiku: Fix reference to disp vs dpy
      • +
      + +

      Alok Hota (8):

      +
        +
      • swr/rast: Use gfxptr_t value in JitGatherVertices
      • +
      • swr/rast: Add annotator to interleave isa text
      • +
      • swr/rast: partial support for Tiled Resources
      • +
      • swr/rast: Unaligned and translations in gathers
      • +
      • swr/rast: Scope MEM_CLIENT enum for mem usages
      • +
      • swr/rast: New execution engine per JIT
      • +
      • swr/rast: Store cached files in multiple subdirs
      • +
      • swr/rast: bypass size limit for non-sampled textures
      • +
      + +

      Alyssa Rosenzweig (1):

      +
        +
      • util: Fix warning in u_cpu_detect on non-x86
      • +
      + +

      Andre Heider (4):

      +
        +
      • st/nine: fix stack corruption due to ABI mismatch
      • +
      • st/nine: plug thread related leaks
      • +
      • st/nine: clean up thead shutdown sequence a bit
      • +
      • d3dadapter9: use snprintf(..., "%s", ...) instead of strncpy
      • +
      + +

      Andres Gomez (8):

      +
        +
      • glsl/linker: complete documentation for assign_attribute_or_color_locations
      • +
      • docs: update 18.3 and add 19.x cycles for the release calendar
      • +
      • glsl: correct typo in GLSL compilation error message
      • +
      • editorconfig: Add max_line_length property
      • +
      • glsl/linker: specify proper direction in location aliasing error
      • +
      • docs: complete the calendar and release schedule documentation
      • +
      • bin/get-pick-list.sh: fix the oneline printing
      • +
      • bin/get-pick-list.sh: fix redirection in sh
      • +
      + +

      Andrii Simiklit (9):

      +
        +
      • intel/tools: avoid 'unused variable' warnings
      • +
      • compiler: avoid 'unused variable' warnings
      • +
      • i965: avoid 'unused variable' warnings
      • +
      • i965/batch: avoid reverting batch buffer if saved state is an empty
      • +
      • intel/tools: make sure the binary file is properly read
      • +
      • anv/pipeline: remove unnecessary null-pointer check
      • +
      • intel/batch-decoder: fix vertex buffer size calculation for gen<8
      • +
      • intel/batch-decoder: fix a vb end address calculation
      • +
      • i965: re-emit index buffer state on a reset option change.
      • +
      + +

      Anuj Phogat (7):

      +
        +
      • i965/icl: Set Error Detection Behavior Control Bit in L3CNTLREG
      • +
      • anv/icl: Set Error Detection Behavior Control Bit in L3CNTLREG
      • +
      • anv/icl: Disable prefetching of sampler state entries
      • +
      • i965/icl: Fix L3 configurations
      • +
      • i965/icl: Set use full ways in L3CNTLREG
      • +
      • intel/icl: Set way_size_per_bank to 4
      • +
      • anv/icl: Set use full ways in L3CNTLREG
      • +
      + +

      Axel Davy (12):

      +
        +
      • st/nine: Allow 'triple buffering' with thread_submit
      • +
      • st/nine: Remove thread_submit warning
      • +
      • st/nine: Use helper to release swapchain buffers later
      • +
      • st/nine: Switch to presentation buffer if resize is detected
      • +
      • st/nine: Fix volumetexture dtor on ctor failure
      • +
      • st/nine: Bind src not dst in nine_context_box_upload
      • +
      • st/nine: Add src reference to nine_context_range_upload
      • +
      • st/nine: Increase the limit of cached ff shaders
      • +
      • st/nine: Immediately upload user provided textures
      • +
      • st/nine: Enable debug info if NDEBUG is not set
      • +
      • st/nine: Ignore window size if error
      • +
      • st/nine: Ignore multisample quality level if no ms
      • +
      + +

      Bart Oldeman (1):

      +
        +
      • gallium-xlib: query MIT-SHM before using it.
      • +
      + +

      Bas Nieuwenhuizen (41):

      +
        +
      • radv: Use structured intrinsics instead of indexing workaround for GFX9.
      • +
      • vulkan: Allow storage images in the WSI.
      • +
      • radv: Fix opaque metadata descriptor last layer.
      • +
      • radv: Clamp gfx9 image view extents to the allocated image extents.
      • +
      • radv: Align large buffers to the fragment size.
      • +
      • radv/android: Mark android WSI image as shareable.
      • +
      • radv/android: Use buffer metadata to determine scanout compat.
      • +
      • radv: Check for shareable images in central place.
      • +
      • radv: Remove redundant format check.
      • +
      • radv: Fix multiview depth clears
      • +
      • radv: Work around non-renderable 128bpp compressed 3d textures on GFX9.
      • +
      • radv: Fix wrongly positioned paren.
      • +
      • radv: Do a cache flush if needed before reading predicates.
      • +
      • radv: Implement buffer stores with less than 4 components.
      • +
      • anv/android: Do not reject storage images.
      • +
      • radv: Remove device path.
      • +
      • radv: Remove unused variable.
      • +
      • amd/common: Add some parentheses to silence warning.
      • +
      • radv: Fix rasterization precision bits.
      • +
      • spirv: Fix matrix parameters in function calls.
      • +
      • freedreno: Move register constant files to src/freedreno.
      • +
      • radv: Only use 32 KiB per threadgroup on Stoney.
      • +
      • radv: Set partial_vs_wave for pipelines with just GS, not tess.
      • +
      • nir: Account for atomics in copy propagation.
      • +
      • radv: Remove unused variable.
      • +
      • radv/winsys: Set winsys bo priority on creation.
      • +
      • radv/winsys: Add priority handling during submit.
      • +
      • radv: Enable VK_EXT_memory_priority.
      • +
      • radv: Fix the shader info pass for not having the variable.
      • +
      • amd/common: Fix stores to derefs with unknown variable.
      • +
      • amd/common: Add gep helper for pointer increment.
      • +
      • amd/common: Handle nir_deref_type_ptr_as_array for shared memory.
      • +
      • amd/common: handle nir_deref_cast for shared memory from integers.
      • +
      • radv: Only look at pImmutableSamples if the descriptor has a sampler.
      • +
      • amd/common: Use correct writemask for shared memory stores.
      • +
      • radv: Sync ETC2 whitelisted devices.
      • +
      • radv: Fix float16 interpolation set up.
      • +
      • radv: Allow interpolation on non-float types.
      • +
      • radv: Handle clip+cull distances more generally as compact arrays.
      • +
      • radv: Fix rebase issue in 19.0 for float16 fix.
      • +
      • radv: Interpolate less aggressively.
      • +
      + +

      Boyan Ding (3):

      +
        +
      • gk110/ir: Add rcp f64 implementation
      • +
      • gk110/ir: Add rsq f64 implementation
      • +
      • gk110/ir: Use the new rcp/rsq in library
      • +
      + +

      Brian Paul (3):

      +
        +
      • svga: add new gallium formats to the format conversion table
      • +
      • mesa: fix display list corner case assertion
      • +
      • svga: remove SVGA_RELOC_READ flag in SVGA3D_BindGBSurface()
      • +
      + +

      Bruce Cherniak (1):

      +
        +
      • gallium/swr: Fix multi-context sync fence deadlock.
      • +
      + +

      Caio Marcelo de Oliveira Filho (10):

      +
        +
      • nir: properly clear the entry sources in copy_prop_vars
      • +
      • nir: properly find the entry to keep in copy_prop_vars
      • +
      • nir: add a way to print the deref chain
      • +
      • nir: remove dead code from copy_prop_vars
      • +
      • nir: fix warning in nir_lower_io.c
      • +
      • util: Helper to create sets and hashes with pointer keys
      • +
      • src/compiler: use new hash table and set creation helpers
      • +
      • src/intel: use new hash table and set creation helpers
      • +
      • nir: check NIR_SKIP to skip passes by name
      • +
      • gallium: Add PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS
      • +
      + +

      Carlos Garnacho (1):

      +
        +
      • wayland/egl: Ensure EGL surface is resized on DRI update_buffers()
      • +
      + +

      Carsten Haitzler (Rasterman) (2):

      +
        +
      • vc4: Use named parameters for the NEON inline asm.
      • +
      • vc4: Declare the cpu pointers as being modified in NEON asm.
      • +
      + +

      Chad Versace (1):

      +
        +
      • i965: Fix -Wswitch on INTEL_COPY_STREAMING_LOAD
      • +
      + +

      Chia-I Wu (2):

      +
        +
      • meson: fix EGL/X11 build without GLX
      • +
      • freedreno/drm: sync uapi again
      • +
      + +

      Christian Gmeiner (6):

      +
        +
      • nir: add lowering for ffloor
      • +
      • etnaviv: drop redundant ctx function parameter
      • +
      • meson: add etnaviv to the tools option
      • +
      • etnaviv: extend etna_resource with an addressing mode
      • +
      • etnaviv: update headers from rnndb
      • +
      • etnaviv: add linear sampling support
      • +
      + +

      Connor Abbott (4):

      +
        +
      • Revert "radv: disable VK_SUBGROUP_FEATURE_VOTE_BIT"
      • +
      • nir/algebraic: Rewrite bit-size inference
      • +
      • nir/algebraic: Add unit tests for bitsize validation
      • +
      • nir: Fixup algebraic test for variable-sized conversions
      • +
      + +

      Daniel Stone (1):

      +
        +
      • gbm: Clarify acceptable formats for gbm_bo
      • +
      + +

      Danylo Piliaiev (9):

      +
        +
      • i965: Fix calculation of layers array length for isl_view
      • +
      • nir: add if opt opt_if_loop_last_continue()
      • +
      • glsl/linker: Fix unmatched TCS outputs being reduced to local variable
      • +
      • glsl: Make invariant outputs in ES fragment shader not to cause error
      • +
      • glsl: Fix copying function's out to temp if dereferenced by array
      • +
      • anv: Implement VK_KHR_draw_indirect_count for gen 7+
      • +
      • anv: Implement VK_EXT_conditional_rendering for gen 7.5+
      • +
      • anv: Fix VK_EXT_transform_feedback working with varyings packed in PSIZ
      • +
      • anv: Fix destroying descriptor sets when pool gets reset
      • +
      + +

      Dave Airlie (19):

      +
        +
      • radv: apply xfb buffer offset at buffer binding time not later. (v2)
      • +
      • radv: fix begin/end transform feedback with 0 counter buffers.
      • +
      • virgl: fix vtest regression since fencing changes.
      • +
      • spirv/vtn: handle variable pointers without offset lowering
      • +
      • nir: move getting deref from var after we check deref type.
      • +
      • nir: handle shared pointers in lowering indirect derefs.
      • +
      • ac: avoid casting pointers on bcsel and stores
      • +
      • radv: handle loading from shared pointers
      • +
      • ac: handle cast derefs
      • +
      • r600: make suballocator 256-bytes align
      • +
      • virgl: fix undefined shift to use unsigned.
      • +
      • virgl: fix const warning on debug flags.
      • +
      • radv: use 3d shader for gfx9 copies if dst is 3d
      • +
      • radv/xfb: fix counter buffer bounds checks.
      • +
      • virgl/vtest: fix front buffer flush with protocol version 0.
      • +
      • virgl: use primconvert provoking vertex properly
      • +
      • dri_interface: add put shm image2 (v2)
      • +
      • glx: add support for putimageshm2 path (v2)
      • +
      • gallium: use put image shm2 path (v2)
      • +
      + +

      David Shao (1):

      +
        +
      • meson: ensure that xmlpool_options.h is generated for gallium targets that need it
      • +
      + +

      Dieter Nützel (1):

      +
        +
      • docs/features: Delete double nv50 entry and wrong enumeration
      • +
      + +

      Dylan Baker (48):

      +
        +
      • meson: link gallium nine with pthreads
      • +
      • meson: Don't set -Wall
      • +
      • meson: fix libatomic tests
      • +
      • meson: Add tests to suites
      • +
      • util: promote u_memory to src/util
      • +
      • meson: Add nir_algebraic_parser_test to suites
      • +
      • meson: Fix ppc64 little endian detection
      • +
      • meson: remove duplicate definition
      • +
      • meson: Add support for gnu hurd
      • +
      • meson: Add toggle for glx-direct
      • +
      • docs/meson: Recommend not using CFLAGS and friends
      • +
      • travis: meson: use native files to override llvm-config
      • +
      • travis: Don't try to read libdrm out of configure.ac
      • +
      • travis: meson: enable unit tests
      • +
      • docs: add note about using backticks for rbs in gitlab
      • +
      • docs/install: Add meson to the main install page
      • +
      • docs/meson: Update LLVM section with information about native files
      • +
      • docs/install: Update python dependency section
      • +
      • docs/autoconf: Mark autoconf as being replaced
      • +
      • meson: Override C++ standard to gnu++11 when building with altivec on ppc64
      • +
      • meson: Error out if building nouveau and using LLVM without rtti
      • +
      • autotools: Remove tegra vdpau driver
      • +
      • meson: Add a script to extract the cmd line used for meson
      • +
      • meson: allow building dri driver without window system if osmesa is classic
      • +
      • bin/meson-cmd-extract: Also handle cross and native files
      • +
      • meson: fix swr KNL build
      • +
      • meson: Fix compiler checks for SWR with ICC
      • +
      • meson: Add warnings and errors when using ICC
      • +
      • automake: Fix path to generated source
      • +
      • automake: Add float64.glsl to dist tarball
      • +
      • automake: Add include dir for nir src directory
      • +
      • configure: Bump SWR LLVM requirement to 7
      • +
      • automake: Add --enable-autotools to distcheck flags
      • +
      • android,autotools,i965: Fix location of float64_glsl.h
      • +
      • VERSION: bump to 19.0.0-rc1
      • +
      • Version: Bump for rc2
      • +
      • cherry-ignore: Add some patches
      • +
      • Revert "intel/compiler: More peephole_select for pre-Gen6"
      • +
      • Revert "nir/opt_peephole_select: Don't peephole_select expensive math instructions"
      • +
      • Revert "intel/compiler: More peephole select"
      • +
      • Bump version for 19.0-rc3
      • +
      • version: bump for 19.0-rc4
      • +
      • get-pick-list: Add --pretty=medium to the arguments for Cc patches
      • +
      • meson: Add dependency on genxml to anvil
      • +
      • Version: update to 19.0-rc5
      • +
      • Bump version for rc6
      • +
      • VERSION: bump version for rc7
      • +
      • cherry-ignore: Update the cherry-ignore file
      • +
      + +

      Eduardo Lima Mitev (2):

      +
        +
      • freedreno/ir3: Make imageStore use num components from image format
      • +
      • freedreno/ir3: Handle GL_NONE in get_num_components_for_glformat()
      • +
      + +

      Eleni Maria Stea (1):

      +
        +
      • i965: fixed clamping in set_scissor_bits when the y is flipped
      • +
      + +

      Elie Tournier (17):

      +
        +
      • glsl: Add "built-in" function to do abs(fp64)
      • +
      • glsl: Add "built-in" functions to do neg(fp64)
      • +
      • glsl: Add "built-in" function to do sign(fp64)
      • +
      • glsl: Add "built-in" functions to do eq/ne(fp64, fp64)
      • +
      • glsl: Add utility function to extract 64-bit sign
      • +
      • glsl: Add "built-in" functions to do lt(fp64, fp64)
      • +
      • glsl: Add "built-in" functions to do add(fp64, fp64)
      • +
      • glsl: Add "built-in" functions to do mul(fp64, fp64)
      • +
      • glsl: Add "built-in" functions to do fp64_to_uint(fp64)
      • +
      • glsl: Add "built-in" functions to do uint_to_fp64(uint)
      • +
      • glsl: Add "built-in" functions to do fp64_to_int(fp64)
      • +
      • glsl: Add "built-in" functions to do int_to_fp64(int)
      • +
      • glsl: Add "built-in" functions to do fp64_to_fp32(fp64)
      • +
      • glsl: Add "built-in" functions to do fp32_to_fp64(fp32)
      • +
      • glsl: Add "built-in" functions to do sqrt(fp64)
      • +
      • glsl: Add "built-in" functions to do trunc(fp64)
      • +
      • glsl: Add "built-in" functions to do round(fp64)
      • +
      + +

      Emil Velikov (81):

      +
        +
      • mesa: bump version to 19.1.0-devel
      • +
      • docs: add 19.0.0-devel release notes template
      • +
      • docs: mention EXT_shader_implicit_conversions
      • +
      • egl: add EGL_EXT_device_base entrypoints
      • +
      • egl/glvnd: correctly report errors when vendor cannot be found
      • +
      • docs/releasing.html: polish cherry-picking/testing text
      • +
      • docs/submittingpatches.html: correctly handle the <p> tag
      • +
      • docs: document the staging branch and add reference to it
      • +
      • bin/get-pick-list.sh: simplify git oneline printing
      • +
      • bin/get-pick-list.sh: prefix output with "[stable] "
      • +
      • bin/get-pick-list.sh: handle "typod" usecase.
      • +
      • bin/get-pick-list.sh: handle the fixes tag
      • +
      • bin/get-pick-list.sh: tweak the commit sha matching pattern
      • +
      • bin/get-pick-list.sh: flesh out is_sha_nomination
      • +
      • bin/get-pick-list.sh: handle fixes tag with missing colon
      • +
      • bin/get-pick-list.sh: handle unofficial "broken by" tag
      • +
      • bin/get-pick-list.sh: use test instead of [ ]
      • +
      • bin/get-pick-list.sh: handle reverts prior to the branchpoint
      • +
      • travis: drop unneeded x11proto-xf86vidmode-dev
      • +
      • glx: make xf86vidmode mandatory for direct rendering
      • +
      • travis: adding missing x11-xcb for meson+vulkan
      • +
      • egl/wayland: bail out when drmGetMagic fails
      • +
      • egl/wayland: plug memory leak in drm_handle_device()
      • +
      • docs: update 18.3.0 release notes
      • +
      • docs: add sha256 checksums for 18.3.0
      • +
      • docs: update calendar, add news item and link release notes for 18.3.0
      • +
      • freedreno: drop duplicate MKDIR_GEN declaration
      • +
      • freedreno: add the missing _la in libfreedreno_ir3_la
      • +
      • amd/addrlib: drop si_ci_vi_merged_enum.h from the list
      • +
      • docs: add release notes for 18.3.1
      • +
      • docs: add sha256 checksums for 18.3.1
      • +
      • docs: update calendar, add news item and link release notes for 18.3.1
      • +
      • glx: mandate xf86vidmode only for "drm" dri platforms
      • +
      • bin/get-pick-list.sh: rework handing of sha nominations
      • +
      • bin/get-pick-list.sh: warn when commit lists invalid sha
      • +
      • meson: don't require glx/egl/gbm with gallium drivers
      • +
      • pipe-loader: meson: reference correct library
      • +
      • TODO: glx: meson: build dri based glx tests, only with -Dglx=dri
      • +
      • glx: meson: drop includes from a link-only library
      • +
      • glx: meson: wire up the dispatch-index-check test
      • +
      • glx/test: meson: assorted include fixes
      • +
      • configure: add CXX11_CXXFLAGS to LLVM_CXXFLAGS
      • +
      • travis: flip to distro xenial, drop sudo false
      • +
      • travis: meson: print the configured state
      • +
      • travis: printout llvm-config --version
      • +
      • travis: meson: use FOO_DRIVERS directly
      • +
      • travis: meson: add unwind handling
      • +
      • travis: meson: explicitly control the DRI loaders
      • +
      • travis: meson: add explicit handling to gallium ST
      • +
      • travis: meson: port gallium build combinations over
      • +
      • docs: add release notes for 18.3.2
      • +
      • docs: add sha256 checksums for 18.3.2
      • +
      • docs: update calendar, add news item and link release notes for 18.3.2
      • +
      • freedreno: automake: ship ir3_nir_trig.py in the tarball
      • +
      • mesa: correctly use os.path.join in our python scripts
      • +
      • Revert "mesa/main: remove ARB suffix from glGetnTexImage"
      • +
      • mapi: sort static entrypoints numerically
      • +
      • mapi: add all _glapi_table entrypoints to static_data.py
      • +
      • genCommon.py: Fix typo in _LIBRARY_FEATURE_NAMES.
      • +
      • mapi: move genCommon.py to src/mapi/new
      • +
      • mapi/new: import mapi scripts from glvnd
      • +
      • mapi/new: sort by slot number
      • +
      • mapi/new: use the static_data offsets in the new generator
      • +
      • mapi/new: reinstate _NO_HIDDEN suffixes in the new generator
      • +
      • mapi/new: split out public_entries handling
      • +
      • mapi/new: don't print info we don't need for ES1/ES2
      • +
      • mapi/new: fixup the GLDEBUGPROCKHR typedef to the non KHR one
      • +
      • mapi/new: remove duplicate GLvoid/void substitution
      • +
      • autotools: wire the new generator for es1 and es2
      • +
      • meson: wire the new generator for es1 and es2
      • +
      • scons: wire the new generator for es1 and es2
      • +
      • Revert "mapi/new: sort by slot number"
      • +
      • mapi/es*api: remove GL_OES_EGL_image entrypoints
      • +
      • mapi/es*api: remove GL_EXT_multi_draw_arrays entrypoints
      • +
      • mapi/es2api: remove no longer present entrypoints
      • +
      • mapi: remove old, unused ES* generator code
      • +
      • mapi: remove machinery handling CSV files
      • +
      • mapi: print function declarations for shared glapi
      • +
      • vc4: Declare the last cpu pointer as being modified in NEON asm.
      • +
      • anv: wire up the state_pool_padding test
      • +
      • meson: egl: correctly manage loader/xmlconfig
      • +
      + +

      Eric Anholt (171):

      +
        +
      • v3d: Fix a copy-and-paste comment in the simulator code.
      • +
      • v3d: Fix a typo in a comment in job handling.
      • +
      • v3d: Drop #if 0-ed out v3d_dump_to_file().
      • +
      • v3d: Respect user-passed strides for BO imports.
      • +
      • v3d: Take advantage of _mesa_hash_table_remove_key() in the simulator.
      • +
      • v3d: Use the TLB R/B swapping instead of recompiles when available.
      • +
      • v3d: Update the TLB config for depth writes on V3D 4.2.
      • +
      • vc4: Drop the winsys_stride relayout in the simluator
      • +
      • v3d: Maintain a mapping of the GEM buffer in the simulator.
      • +
      • v3d: Remove the special path for simulaton of the submit ioctl.
      • +
      • vc4: Take advantage of _mesa_hash_table_remove_key() in the simulator.
      • +
      • vc4: Maintain a separate GEM mapping of BOs in the simulator.
      • +
      • vc4: Use the normal simulator ioctl path for CL submit as well.
      • +
      • gbm: Move gbm_format_canonicalize() to the core.
      • +
      • gbm: Introduce a helper function for printing GBM format names.
      • +
      • egl: Improve the debugging of gbm format matching in DRI configs.
      • +
      • v3d: Fix double-swapping of R/B on V3D 4.1
      • +
      • v3d: Don't try to set PF flags on a LDTMU operation
      • +
      • vc4: Make sure we make ro scanout resources for create_with_modifiers.
      • +
      • vc4: Don't return a vc4 BO handle on a renderonly screen.
      • +
      • glx: Remove an old DEFAULT_DRIVER_DIR default.
      • +
      • glx: Move DRI extensions pointer loading to driOpenDriver().
      • +
      • egl: Move loader_set_logger() up to egl_dri2.c.
      • +
      • loader: Stop using a local definition for an in-tree header
      • +
      • loader: Factor out the common driver opening logic from each loader.
      • +
      • egl: Print the actual message to the console from _eglError().
      • +
      • gallium: Fix uninitialized variable warning in compute test.
      • +
      • gallium: Remove unused variable in u_tests.
      • +
      • v3d: Add renderonly support.
      • +
      • v3d: Add support for RGBA_SRGB along with BGRA_SRGB.
      • +
      • v3d: Add missing OES_half_float_linear support.
      • +
      • v3d: Use combined input/output segments.
      • +
      • v3d: Add the V3D TFU submit interface to the simulator.
      • +
      • v3d: Use the TFU to do generatemipmap.
      • +
      • v3d: Update simulator cache flushing code to match the kernel better.
      • +
      • v3d: Create a state uploader for packing our shaders together.
      • +
      • v3d: Put default vertex attribute values into the state uploader as well.
      • +
      • v3d: Re-use the wrap mode uniform on V3D 3.3.
      • +
      • v3d: Make an array for frag/vert texture state in the context.
      • +
      • v3d: Don't forget to flush writes to UBOs.
      • +
      • v3d: Convert to using nir_src_as_uint() from const_value derefs.
      • +
      • v3d: Fix a comment typo
      • +
      • v3d: Return the right gl_SampleMaskIn[] value.
      • +
      • v3d: Fix handling of texture first_layer offsets for 3D textures.
      • +
      • v3d: Avoid confusing auto-indenting in TEXTURE_SHADER_STATE packing
      • +
      • v3d: Split most of TEXTURE_SHADER_STATE setup out of sampler views.
      • +
      • v3d: Garbage collect unused uniforms code.
      • +
      • v3d: Simplify VIR uniform dumping using a temporary.
      • +
      • v3d: Add VIR dumping of TMU config p0/p1.
      • +
      • v3d: Fix a leak of the transfer helper on screen destroy.
      • +
      • vc4: Fix a leak of the transfer helper on screen destroy.
      • +
      • v3d: Fix a leak of the disassembled instruction string during debug dumps.
      • +
      • tfu
      • +
      • shader-packing
      • +
      • nir: Add some more consts to the nir_format_convert.h helpers.
      • +
      • nir: Pull some of intel's image load/store format conversion to nir_format.h
      • +
      • intel: Simplify the half-float packing in image load/store lowering.
      • +
      • mesa/st: Expose compute shaders when NIR support is advertised.
      • +
      • nir: Print the format of image variables.
      • +
      • Revert "intel: Simplify the half-float packing in image load/store lowering."
      • +
      • nir: Move intel's half-float image store lowering to to nir_format.h.
      • +
      • v3d: Don't forget to wait for our TFU job before rendering from it.
      • +
      • v3d: Set up the right stride for raster TFU.
      • +
      • v3d: Don't forget to bump the number of writes when doing TFU ops.
      • +
      • v3d: Add support for using the TFU to do some blits.
      • +
      • v3d: Add support for texturing from linear.
      • +
      • v3d: Add safety checks for resource_create().
      • +
      • v3d: Make sure that a thrsw doesn't split a multop from its umul24.
      • +
      • v3d: Add missing flagging of SYNCB as a TSY op.
      • +
      • v3d: Add support for draw indirect for GLES3.1.
      • +
      • v3d: Avoid assertion failures when removing end-of-shader instructions.
      • +
      • v3d: Move uinfo->data[] dereference to the top of v3d_write_uniforms().
      • +
      • v3d: Move uniform pretty-printing to its own helper function.
      • +
      • v3d: Use the uniform pretty-printer in v3d_write_uniforms()'s debug code.
      • +
      • v3d: Do uniform pretty-printing in the QPU dump.
      • +
      • v3d: Drop in a bunch of notes about performance improvement opportunities.
      • +
      • vc4: Use the original bit size when scalarizing uniform loads.
      • +
      • v3d: Use the original bit size when scalarizing uniform loads.
      • +
      • vc4: Reuse nir_format_convert.h in our blend lowering.
      • +
      • v3d: Fix the argument type for vir_BRANCH().
      • +
      • nir: Fix clamping of uints for image store lowering.
      • +
      • v3d: Put the dst bo first in the list of BOs for TFU calls.
      • +
      • v3d: Fix check for TFU job completion in the simulator.
      • +
      • v3d: Don't try to create shadow tiled temporaries for 1D textures.
      • +
      • v3d: Remove dead prototypes for load/store utile functions.
      • +
      • v3d: Implement texture_subdata to reduce teximage upload copies.
      • +
      • vc4: Move the utile load/store functions to a header for reuse by v3d.
      • +
      • v3d: Add a fallthrough path for utile load/store of 32 byte lines.
      • +
      • v3d: Load and store aligned utiles all at once.
      • +
      • docs: Add a note that MRs should still include any r-b or a-b tags.
      • +
      • docs: Add an encouraging note about providing reviews and acks.
      • +
      • v3d: Fix simulator mode on i915 render nodes.
      • +
      • v3d: Drop shadow comparison state from shader variant key.
      • +
      • v3d: Hook up perf_debug() output to GL_ARB_debug output as well.
      • +
      • vc4: Hook up perf_debug() output to GL_ARB_debug_output as well.
      • +
      • gallium/ttn: Fix setup of outputs_written.
      • +
      • v3d: Fix uniform pretty printing assertion failure with branches.
      • +
      • v3d: Add a "precompile" debug flag for shader-db.
      • +
      • v3d: Hook up some shader-db output to GL_ARB_debug_output.
      • +
      • v3d: Drop unused count_nir_instrs() helper.
      • +
      • v3d: Drop incorrect dependency for flpop.
      • +
      • v3d: Move "does this instruction have flags" from sched to generic helpers.
      • +
      • v3d: Don't generate temps for comparisons.
      • +
      • v3d: Dead-code eliminate unused flags updates.
      • +
      • v3d: Add a note for a potential performance win on multop/umul24.
      • +
      • v3d: Force sampling from base level for tg4.
      • +
      • v3d: Add support for non-constant texture offsets.
      • +
      • v3d: Add support for requesting the sample offsets.
      • +
      • v3d: Add support for textureSize() on MSAA textures.
      • +
      • v3d: Add support for gl_HelperInvocation.
      • +
      • v3d: Fix segfault when failing to compile a program.
      • +
      • v3d: Don't forget to include RT writes in precompiles.
      • +
      • v3d: Simplify the emission of comparisons for the bcsel optimization.
      • +
      • v3d: Move the "Find the ALU instruction generating our bool" out of bcsel.
      • +
      • v3d: Don't try to fold non-SSA-src comparisons into bcsels.
      • +
      • v3d: Fold comparisons for IF conditions into the flags for the IF.
      • +
      • v3d: Handle dynamically uniform IF statements with uniform control flow.
      • +
      • v3d: Refactor compiler entrypoints.
      • +
      • v3d: Reinstate the new shader-db output after v3d_compile() refactor.
      • +
      • v3d: Fix up VS output setup during precompiles.
      • +
      • v3d: Remove dead switch cases and comments from v3d_nir_lower_io.
      • +
      • v3d: Do UBO loads a vector at a time.
      • +
      • v3d: Stop scalarizing our uniform loads.
      • +
      • nir: Allow nir_format_unpack_int/sint to unpack larger values.
      • +
      • nir: Add nir_lower_tex options to lower sampler return formats.
      • +
      • v3d: Use the core tex lowering.
      • +
      • nir: Add nir_lower_tex support for Broadcom's swizzled TG4 results.
      • +
      • v3d: Enable GL_ARB_texture_gather on V3D 4.x.
      • +
      • nir: Make nir_deref_instr_build/get_const_offset actually use size_align.
      • +
      • glsl: Fix buffer overflow with an atomic buffer binding out of range.
      • +
      • v3d: Add support for flushing dirty TMU data at job end.
      • +
      • v3d: Add support for the early_fragment_tests flag.
      • +
      • v3d: Add support for GL_ARB_framebuffer_no_attachments.
      • +
      • v3d: Fix txf_ms 2D_ARRAY array index.
      • +
      • v3d: Add an isr to the simulator to catch GMP violations.
      • +
      • v3d: Add support for matrix inputs to the FS.
      • +
      • v3d: Drop the GLSL version level.
      • +
      • v3d: Add SSBO/atomic counters support.
      • +
      • v3d: Add support for shader_image_load_store.
      • +
      • v3d: Add support for CS workgroup/invocation id intrinsics.
      • +
      • v3d: Add support for CS shared variable load/store/atomics.
      • +
      • v3d: Add support for CS barrier() intrinsics.
      • +
      • v3d: SHARED but not necessarily SCANOUT buffers on RO must be linear.
      • +
      • v3d: If the modifier is not known on BO import, default to linear for RO.
      • +
      • v3d: Restructure RO allocations using resource_from_handle.
      • +
      • v3d: Don't leak the GPU fd for renderonly usage.
      • +
      • vc4: Don't leak the GPU fd for renderonly usage.
      • +
      • gallium: Enable unit tests as actual meson unit tests.
      • +
      • gallium: Fix comment about possible colorspaces.
      • +
      • gallium: Make sure we return is_unorm/is_snorm for compressed formats.
      • +
      • v3d: Rename gallium-local limits defines from VC5 to V3D.
      • +
      • v3d: Fix overly-large vattr_sizes structs.
      • +
      • v3d: Avoid duplicating limits defines between gallium and v3d core.
      • +
      • v3d: Drop maximum number of texture units down to 16.
      • +
      • v3d: Fix BO stats accounting for imported buffers.
      • +
      • v3d: Flush blit jobs immediately after generating them.
      • +
      • v3d: Fix release-build warning about utile_h.
      • +
      • v3d: Fix stencil sampling from packed depth/stencil.
      • +
      • v3d: Fix stencil sampling from a separate-stencil buffer.
      • +
      • v3d: Use the symbolic names for wrap modes from the XML.
      • +
      • v3d: Move the sampler state to the long-lived state uploader.
      • +
      • v3d: Create separate sampler states for the various blend formats.
      • +
      • pl111: Rename the pl111 driver to "kmsro".
      • +
      • kmsro: Extend to include hx8357d.
      • +
      • vc4: Enable NEON asm on meson cross-builds.
      • +
      • v3d: Fix the autotools build.
      • +
      • mesa: Skip partial InvalidateFramebuffer of packed depth/stencil.
      • +
      • v3d: Fix image_load_store clamping of signed integer stores.
      • +
      • v3d: Use the early_fragment_tests flag for the shader's disable-EZ field.
      • +
      • v3d: Fix the check for "is the last thrsw inside control flow"
      • +
      • st/dri: Set the PIPE_BIND_SHARED flag on create_image_with_modifiers.
      • +
      + +

      Eric Engestrom (47):

      +
        +
      • wsi/wayland: use proper VkResult type
      • +
      • wsi/wayland: only finish() a successfully init()ed display
      • +
      • REVIEWERS: add include path for EGL
      • +
      • REVIEWERS: add Emil as EGL reviewer
      • +
      • REVIEWERS: add Vulkan reviewer group
      • +
      • xmlpool: update translation po files
      • +
      • meson: only run vulkan's meson.build when building vulkan
      • +
      • gbm: remove unnecessary meson include
      • +
      • meson: fix wayland-less builds
      • +
      • gbm: add new entrypoint to symbols check
      • +
      • egl: add missing glvnd entrypoint for EGL_ANDROID_blob_cache
      • +
      • egl: fix bad rebase
      • +
      • gbm: add missing comma between strings
      • +
      • glapi: add missing visibility args
      • +
      • anv: correctly use vulkan 1.0 by default
      • +
      • vulkan/utils: s/VERSION/PACKAGE_VERSION/
      • +
      • build: stop defining unused VERSION
      • +
      • wsi/display: fix mem leak when freeing swapchains
      • +
      • vulkan/wsi: fix s/,/;/ typo
      • +
      • meson: skip asm check when asm is disabled
      • +
      • anv: add unreachable() for VK_EXT_fragment_density_map
      • +
      • mesa: drop unused & deprecated lib
      • +
      • loader: deduplicate logger function declaration
      • +
      • docs: add meson cross compilation instructions
      • +
      • docs: format code blocks a bit nicely
      • +
      • docs: fix the meson aarch64 cross-file
      • +
      • docs: advertise distro-provided meson cross-files
      • +
      • anv: drop unneeded KHR suffix
      • +
      • wsi: drop unneeded KHR suffix
      • +
      • radv: remove a few more unnecessary KHR suffixes
      • +
      • egl: add missing includes
      • +
      • egl: remove unused include
      • +
      • travis: avoid using unset llvm-config
      • +
      • egl: fix python lib deprecation warning
      • +
      • docs: explain how to see what meson options exist
      • +
      • travis: fix autotools build after --enable-autotools switch addition
      • +
      • configure: EGL requirements only apply if EGL is built
      • +
      • egl: finalize EGL_MESA_query_driver
      • +
      • egl: update headers from Khronos
      • +
      • egl: add glvnd entrypoints for EGL_MESA_query_driver
      • +
      • travis: bump libdrm to 2.4.97
      • +
      • egl/glvnd: sync egl.xml from Khronos
      • +
      • anv: drop always-successful VkResult
      • +
      • meson/vdpau: add missing soversion
      • +
      • xvmc: fix string comparison
      • +
      • xvmc: fix string comparison
      • +
      • egl: fix libdrm-less builds
      • +
      + +

      Erik Faye-Lund (70):

      +
        +
      • glsl: add has_implicit_conversions()-helper
      • +
      • glsl: add has_implicit_uint_to_int_conversion()-helper
      • +
      • glsl: fall back to inexact function-match
      • +
      • mesa/glsl: add support for EXT_shader_implicit_conversions
      • +
      • glsl: do not allow implicit casts of unsized array initializers
      • +
      • mesa: expose NV_conditional_render on GLES
      • +
      • mesa/main: fixup make check after NV_conditional_render for gles
      • +
      • Revert "mesa/main: fixup make check after NV_conditional_render for gles"
      • +
      • Revert "mesa: expose NV_conditional_render on GLES"
      • +
      • mesa/main: correct requirement for EXT_occlusion_query_boolean
      • +
      • mesa/main: correct year for EXT_occlusion_query_boolean
      • +
      • mesa/main: use non-prefixed enums for consistency
      • +
      • mesa/main: simplify pipeline-statistics query validation
      • +
      • mesa/main: fix validation of GL_SAMPLES_PASSED
      • +
      • mesa/main: fix validation of GL_ANY_SAMPLES_PASSED
      • +
      • mesa/main: fix validation of GL_ANY_SAMPLES_PASSED_CONSERVATIVE
      • +
      • mesa/main: fix validation of GL_TIME_ELAPSED
      • +
      • mesa/main: fix validation of transform-feedback queries
      • +
      • mesa/main: fix validation of transform-feedback overflow queries
      • +
      • mesa/main: fix validation of ARB_query_buffer_object
      • +
      • mesa/main: fix validation of GL_TIMESTAMP
      • +
      • mesa/main: remove overly strict query-validation
      • +
      • mesa/main: remove ARB suffix from glGetnTexImage
      • +
      • mesa/main: remove bogus error for zero-sized images
      • +
      • mesa/main: factor out tex-image error-checking
      • +
      • mesa/main: factor out common error-checking
      • +
      • mesa/main: check cube-completeness in common code
      • +
      • mesa/main: fix incorrect depth-error
      • +
      • mesa/main: fixup requirements for GL_PRIMITIVES_GENERATED
      • +
      • mesa/main: make _mesa_has_tessellation return bool
      • +
      • mesa/main: rename format-check function
      • +
      • mesa/main: clean up S3_s3tc check
      • +
      • mesa/main: clean up OES_texture_float_linear check
      • +
      • mesa/main: clean up ES2_compatibility check
      • +
      • mesa/main: clean up integer texture check
      • +
      • mesa/main: use _mesa_has_FOO_bar for compressed format checks
      • +
      • mesa/main: do not allow s3tc enums on gles1
      • +
      • mesa/main: do not allow etc2 enums on gles1
      • +
      • mesa/main: do not allow astc enums on gles1
      • +
      • mesa/main: do not allow depth-texture enums on gles1
      • +
      • mesa/main: do not allow stencil-texture enums on gles1
      • +
      • mesa/main: do not allow ARB_texture_rgb10_a2ui enums before gles3
      • +
      • mesa/main: do not allow integer-texture enums before gles3
      • +
      • mesa/main: do not allow ARB_depth_buffer_float enums before gles3
      • +
      • mesa/main: do not allow EXT_packed_float enums before gles3
      • +
      • mesa/main: do not allow rg-textures enums before gles3
      • +
      • mesa/main: do not allow EXT_texture_shared_exponent enums before gles3
      • +
      • mesa/main: do not allow MESA_ycbcr_texture enums on gles
      • +
      • mesa/main: do not allow type_2_10_10_10_REV enums before gles3
      • +
      • mesa/main: do not allow floating-point texture enums on gles1
      • +
      • mesa/main: do not allow snorm-texture enums before gles3
      • +
      • mesa/main: do not allow sRGB texture enums before gles3
      • +
      • mesa/main: do not allow EXT_texture_sRGB_R8 enums before gles3
      • +
      • mesa/main: split float-texture support checking in two
      • +
      • mesa/main: require EXT_texture_type_2_10_10_10_REV for gles3
      • +
      • mesa/main: require EXT_texture_sRGB for gles3
      • +
      • mesa/st: do not probe for the same texture-formats twice
      • +
      • mesa/main: do not require float-texture filtering for es3
      • +
      • mesa/main: correct validation for GL_RGB565
      • +
      • mesa/main: fix up _mesa_has_rg_textures for gles2
      • +
      • virgl: force linear texturing support
      • +
      • virgl: simplify virgl_hw_set_vertex_buffers
      • +
      • virgl: simplify virgl_hw_set_index_buffer
      • +
      • virgl: wrap vertex element state in a struct
      • +
      • virgl: work around bad assumptions in virglrenderer
      • +
      • anv/meson: make sure tests link with -msse2
      • +
      • anv/autotools: make sure tests link with -msse2
      • +
      • docs: add note about sending merge-requests from forks
      • +
      • mapi: drop unneeded gl_dispatch_stub declarations
      • +
      • virgl: remove unused variable
      • +
      + +

      Ernestas Kulik (2):

      +
        +
      • vc4: Fix leak in HW queries error path
      • +
      • v3d: Fix leak in resource setup error path
      • +
      + +

      Francisco Jerez (14):

      +
        +
      • intel/fs: Prevent emission of IR instructions not aligned to their own execution size.
      • +
      • intel/fs: Handle source modifiers in lower_integer_multiplication().
      • +
      • intel/fs: Implement quad swizzles on ICL+.
      • +
      • intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.
      • +
      • intel/eu/gen7: Fix brw_MOV() with DF destination and strided source.
      • +
      • intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass.
      • +
      • intel/fs: Constify fs_inst::can_do_source_mods().
      • +
      • intel/fs: Introduce regioning lowering pass.
      • +
      • intel/fs: Remove existing lower_conversions pass.
      • +
      • intel/fs: Remove nasty open-coded CHV/BXT 64-bit workarounds.
      • +
      • intel/fs: Remove FS_OPCODE_UNPACK_HALF_2x16_SPLIT opcodes.
      • +
      • intel/fs: Promote execution type to 32-bit when any half-float conversion is needed.
      • +
      • intel/fs: Exclude control sources from execution type and region alignment calculations.
      • +
      • intel/fs: Implement extended strides greater than 4 for IR source regions.
      • +
      + +

      Fritz Koenig (2):

      +
        +
      • freedreno: drm_fourcc.h header include
      • +
      • freedreno: add query for dmabuf modifiers
      • +
      + +

      Gert Wollny (30):

      +
        +
      • mesa/core: Add definitions and translations for EXT_texture_sRGB_R8
      • +
      • Gallium: Add format PIPE_FORMAT_R8_SRGB
      • +
      • mesa/st: Add support for EXT_texture_sRGB_R8
      • +
      • virgl/vtest-winsys: Use virgl version of bind flags
      • +
      • r600: Add support for EXT_texture_sRGB_R8
      • +
      • mesa: Reference count shaders that are used by transform feedback objects
      • +
      • virgl: Add command and flags to initiate debugging on the host (v2)
      • +
      • nir: Allow to skip integer ops in nir_lower_to_source_mods
      • +
      • i965: Correct L8_UNORM_SRGB table entry
      • +
      • i965: be more specific about FBO completeness errors
      • +
      • i965: Force zero swizzles for unused components in GL_RED and GL_RG
      • +
      • i965: Add support for and expose EXT_texture_sRGB_R8
      • +
      • virgl: Use file descriptor instead of un-allocated object
      • +
      • i965:use FRAMEBUFFER_UNSUPPORTED instead of FRAMEBUFFER_INCOMPLETE_DIMENSIONS
      • +
      • r600: Only set context streamout strides info from the shader that has outputs
      • +
      • r600: clean up the GS ring buffers when the context is destroyed
      • +
      • glsl: free or reuse memory allocated for TF varying
      • +
      • virgl,vtest: Initialize return value
      • +
      • virgl: Don't try handling server fences when they are not supported
      • +
      • i965: Explicitely handle swizzles for MESA_FORMAT_R_SRGB8
      • +
      • i965: Set the FBO error state INCOMPLETE_ATTACHMENT only for SRGB_R8
      • +
      • autotools: Deprecate the use of autotools
      • +
      • Gallium: Add new CAPS to indicate whether a driver can switch SRGB write
      • +
      • virgl: Set sRGB write control CAP based on host capabilities
      • +
      • mesa:main: Add flag for EXT_sRGB to gl_extensions
      • +
      • i965: Set flag for EXT_sRGB
      • +
      • mesa/st: rework support for sRGB framebuffer attachements
      • +
      • mesa/main: Use flag for EXT_sRGB instead of EXT_framebuffer_sRGB where possible
      • +
      • mesa/main/version: Lower the requirements for GLES 3.0
      • +
      • mesa/main: Expose EXT_sRGB_write_control
      • +
      + +

      Guido Günther (2):

      +
        +
      • etnaviv: Make sure rs alignment checks match
      • +
      • etnaviv: fix typo in cflush_all description
      • +
      + +

      Gurchetan Singh (18):

      +
        +
      • egl: add missing #include <stddef.h> in egldevice.h
      • +
      • virgl: quadruple command buffer size
      • +
      • virgl: avoid large inline transfers
      • +
      • virgl: don't mark buffers as unclean after a write
      • +
      • virgl: texture_transfer_pool --> transfer_pool
      • +
      • virgl: remove unnessecary code
      • +
      • virgl: move texture metadata to common code
      • +
      • virgl: move virgl_resource_layout to common code
      • +
      • virgl: move vrend_get_tex_image_offset to common code
      • +
      • virgl: store layer_stride in metadata
      • +
      • virgl: consolidate transfer code
      • +
      • virgl: make transfer code with PIPE_BUFFER targets
      • +
      • virgl: make virgl_buffers use resource helpers
      • +
      • virgl: modify how we handle GL_MAP_FLUSH_EXPLICIT_BIT
      • +
      • virgl: move resource metadata into base resource
      • +
      • virgl: move resource creation / import / destruction to common code
      • +
      • virgl: don't flush an empty range
      • +
      • virgl: remove empty file
      • +
      + +

      Hanno Böck (1):

      +
        +
      • glsl/test: Fix use after free in test_optpass.
      • +
      + +

      Hyunjun Ko (1):

      +
        +
      • freedreno: implements get_sample_position
      • +
      + +

      Iago Toral Quiroga (22):

      +
        +
      • intel/compiler: fix node interference of simd16 instructions
      • +
      • nir/constant_folding: fix incorrect bit-size check
      • +
      • nir/from_ssa: fix bit-size of temporary register
      • +
      • Revert "nir/builder: Assert that intN_t immediates fit"
      • +
      • intel/compiler: fix indentation style in opt_algebraic()
      • +
      • intel/compiler: fix register allocation in opt_peephole_sel
      • +
      • intel/compiler: do not copy-propagate strided regions to ddx/ddy arguments
      • +
      • intel/compiler: move nir_lower_bool_to_int32 before nir_lower_locals_to_regs
      • +
      • compiler/nir: add a nir_b2f() helper
      • +
      • compiler/nir: add nir_fadd_imm() and nir_fmul_imm() helpers
      • +
      • compiler/spirv: handle 16-bit float in radians() and degrees()
      • +
      • compiler/spirv: implement 16-bit asin
      • +
      • compiler/spirv: implement 16-bit acos
      • +
      • compiler/spirv: implement 16-bit atan
      • +
      • compiler/spirv: implement 16-bit atan2
      • +
      • compiler/spirv: implement 16-bit exp and log
      • +
      • compiler/spirv: implement 16-bit hyperbolic trigonometric functions
      • +
      • compiler/spirv: implement 16-bit frexp
      • +
      • compiler/spirv: use 32-bit polynomial approximation for 16-bit asin()
      • +
      • anv/pipeline_cache: fix incorrect guards for NIR cache
      • +
      • anv/pipeline_cache: free NIR shader cache
      • +
      • anv/device: fix maximum number of images supported
      • +
      + +

      Ian Romanick (28):

      +
        +
      • glsl: Add warning tests for identifiers with __
      • +
      • glsl: Add pragma to disable all warnings
      • +
      • glsl: prevent qualifiers modification of predeclared variables
      • +
      • glsl: Omit redundant qualifier checks on redeclarations
      • +
      • glsl: Refactor type checking for redeclarations
      • +
      • nir: Add a saturated unsigned integer add opcode
      • +
      • i965/fs: Implement nir_op_uadd_sat
      • +
      • nir/phi_builder: Internal users should use nir_phi_builder_value_set_block_def too
      • +
      • util/slab: Rename slab_mempool typed parameters to mempool
      • +
      • util/hash_table: Add _mesa_hash_table_init function
      • +
      • nir/phi_builder: Use per-value hash table to store [block] -> def mapping
      • +
      • nir: Fix holes in nir_instr
      • +
      • nir: Release per-block metadata in nir_sweep
      • +
      • i965/vec4: Silence unused parameter warnings in vec4 compiler tests
      • +
      • i965/vec4/dce: Don't narrow the write mask if the flags are used
      • +
      • i965/fs: Eliminate unary op on operand of compare-with-zero
      • +
      • i965/vec4: Propagate conditional modifiers from more compares to other compares
      • +
      • nir/opt_peephole_select: Don't try to remove flow control around indirect loads
      • +
      • intel/compiler: More peephole select
      • +
      • nir/opt_peephole_select: Don't peephole_select expensive math instructions
      • +
      • intel/compiler: More peephole_select for pre-Gen6
      • +
      • Revert "nir/lower_indirect: Bail early if modes == 0"
      • +
      • nir/algebraic: Don't put quotes around floating point literals
      • +
      • glsl: Add utility to convert text files to C strings
      • +
      • nir: Silence zillions of unused parameter warnings in release builds
      • +
      • spirv: Add missing break
      • +
      • intel/fs: nir_op_extract_i8 extracts a byte, not a word
      • +
      • intel/fs: Fix extract_u8 of an odd byte from a 64-bit integer
      • +
      + +

      Ilia Mirkin (37):

      +
        +
      • nv50/ir: delete MINMAX instruction that is no longer in the BB
      • +
      • nv50/ir/ra: improve condition for short regs, unify with cond for 16-bit
      • +
      • nv50/ir/ra: enforce max register requirement, and change spill order
      • +
      • nv50/ir: remove dnz flag when converting MAD to ADD due to optimizations
      • +
      • nv50: always keep TSC slot 0 bound
      • +
      • nv50,nvc0: add explicit handling of PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET
      • +
      • nouveau: set texture upload budget
      • +
      • nvc0: replace use of explicit default_tsc with entry 0
      • +
      • nvc0: always keep TSC slot 0 bound to fix TXF
      • +
      • st/mesa: remove sampler associated with buffer texture in pbo logic
      • +
      • st/mesa: allow glDrawElements to work with GL_SELECT feedback
      • +
      • tgsi: add ATOMFADD operation
      • +
      • gallium: add PIPE_CAP_TGSI_ATOMFADD to indicate support
      • +
      • st/mesa: select ATOMFADD when source type is float
      • +
      • st/mesa: expose GL_NV_shader_atomic_float when ATOMFADD is supported
      • +
      • nv50/ir: add support for converting ATOMFADD to proper ir
      • +
      • nvc0: enable GL_NV_shader_atomic_float on pre-Maxwell
      • +
      • nv50,nvc0: add missing CAPs for unsupported features
      • +
      • nv30: avoid setting user_priv without setting cur_ctx
      • +
      • nv30: fix rare issue with fp unbinding not finding the bufctx
      • +
      • nv30: add support for multi-layer transfers
      • +
      • nv30: use correct helper to get blocks in y direction
      • +
      • nv30: fix some s3tc layout issues
      • +
      • nv30: disable rendering to 3D textures
      • +
      • docs: fix gallium screen cap docs
      • +
      • nv50,nvc0: mark textures dirty on fb update
      • +
      • nvc0: don't put text segment into bufctx
      • +
      • nvc0/ir: fix second tex argument after levelZero optimization
      • +
      • nv50,nvc0: add explicit settings for recent caps
      • +
      • nvc0: add support for handling indirect draws with attrib conversion
      • +
      • nvc0/ir: always use CG mode for loads from atomic-only buffers
      • +
      • nvc0: fix 3d images on kepler
      • +
      • nv50,nvc0: use condition for occlusion queries when already complete
      • +
      • nvc0: stick zero values for the compute invocation counts
      • +
      • nvc0: we have 16k-sized framebuffers, fix default scissors
      • +
      • swr: set PIPE_CAP_MAX_VARYINGS correctly
      • +
      • glsl: fix recording of variables for XFB in TCS shaders
      • +
      + +

      Indrajit Das (1):

      +
        +
      • st/va: Return correct status from vlVaQuerySurfaceStatus
      • +
      + +

      Jakob Bornecrantz (1):

      +
        +
      • virgl/vtest: Use default socket name from protocol header
      • +
      + +

      Jan Vesely (2):

      +
        +
      • amd: Make vgpr-spilling depend on llvm version
      • +
      • clover: Fix build after clang r348827
      • +
      + +

      Jason Ekstrand (207):

      +
        +
      • vulkan: Update the XML and headers to 1.1.91
      • +
      • intel/fs,vec4: Clean up a repeated pattern with SSBOs
      • +
      • intel/fs: Use the new nir_src_is_const and friends
      • +
      • nir: Add a read_mask helper for ALU instructions
      • +
      • intel/vec4: Use the new nir_src_is_const and friends
      • +
      • intel/analyze_ubo_ranges: Use nir_src_is_const and friends
      • +
      • anv: Use nir_src_is_const and friends in lowering code
      • +
      • intel/fs: Add an assert to optimize_frontfacing_ternary
      • +
      • nir/lower_alu_to_scalar: Don't try to lower unpack_32_2x16
      • +
      • nir/builder: Assert that intN_t immediates fit
      • +
      • nir/builder: Add iadd_imm and imul_imm helpers
      • +
      • nir/builder: Add a nir_pack/unpack/bitcast helpers
      • +
      • nir/spirv: Force 32-bit for UBO and SSBO Booleans
      • +
      • nir/glsl: Force 32-bit for UBO and SSBO Booleans
      • +
      • nir/lower_io: Add shared to get_io_offset_src
      • +
      • nir: Add alignment parameters to SSBO, UBO, and shared access
      • +
      • intel/compiler: Lower SSBO and shared loads/stores in NIR
      • +
      • intel,nir: Move gl_LocalInvocationID lowering to nir_lower_system_values
      • +
      • intel/fs,vec4: Fix a compiler warning
      • +
      • vulkan: Update the XML and headers to 1.1.93
      • +
      • anv: Expose VK_EXT_scalar_block_layout
      • +
      • anv: Put robust buffer access in the pipeline hash
      • +
      • anv/nir: Rework arguments to apply_pipeline_layout
      • +
      • nir/derefs: Add a nir_derefs_do_not_alias enum value
      • +
      • vulkan: Update the XML and headers to 1.1.95
      • +
      • nir/opcodes: Pull in the type helpers from constant_expressions
      • +
      • nir/opcodes: Rename tbool to tbool32
      • +
      • nir/algebraic: Clean up some __str__ cruft
      • +
      • nir/algebraic: Refactor codegen a bit
      • +
      • nir/algebraic: Add support for unsized conversion opcodes
      • +
      • nir/opt_algebraic: Simplify an optimization using the new search ops
      • +
      • nir/opt_algebraic: Drop bit-size suffixes from conversions
      • +
      • nir/opt_algebraic: Add 32-bit specifiers to a bunch of booleans
      • +
      • nir: Make boolean conversions sized just like the others
      • +
      • anv,radv: Disable VK_EXT_pci_bus_info
      • +
      • intel/ir: Don't allow allocating zero registers
      • +
      • spirv: Add support for MinLod
      • +
      • nir/lower_tex: Simplify lower_gradient logic
      • +
      • nir/lower_tex: Modify txd instructions instead of replacing them
      • +
      • nir/lower_tex: Add lowering for some min_lod cases
      • +
      • intel/fs: Support min_lod parameters on texture instructions
      • +
      • anv: Advertise support for MinLod on Skylake+
      • +
      • anv/pipeline: Set the correct binding count for compute shaders
      • +
      • intel/blorp: Assert that we don't re-layout a compressed surface
      • +
      • nir: Document the function inlining process
      • +
      • nir: Allow [iu]mul_high on non-32-bit types
      • +
      • nir/lower_int64: Add support for [iu]mul_high
      • +
      • nir: Add a pass for lowering integer division by constants
      • +
      • i965/vec4: Implement nir_op_uadd_sat
      • +
      • i965: Enable nir_opt_idiv_const for 32 and 64-bit integers
      • +
      • nir/lower_idiv: Use ilt instead of bit twiddling
      • +
      • nir/tgsi: Use nir_bany in ttn_kill_if
      • +
      • nir/constant_folding: Fix source bit size logic
      • +
      • nir/algebraic: Optimize x2b(xneg(a)) -> a
      • +
      • nir: Drop support for lower_b2f
      • +
      • nir/algebraic: Make an optimization more specific
      • +
      • nir: Rename Boolean-related opcodes to include 32 in the name
      • +
      • nir/constant_expressions: Rework Boolean handling
      • +
      • nir: Add support for 1-bit data types
      • +
      • nir/large_constants: Properly handle 1-bit bools
      • +
      • nir/algebraic: Generalize an optimization
      • +
      • nir: Add 1-bit Boolean opcodes
      • +
      • nir: Add a bool to int32 lowering pass
      • +
      • nir: Switch to using 1-bit Booleans for almost everything
      • +
      • nir/algebraic: Optimize 1-bit Booleans
      • +
      • nir/algebraic: Add some optimizations for D3D-style Booleans
      • +
      • radv: Fix a stupid if in gather_intrinsic_info
      • +
      • st/nir: Use nir_src_as_uint for tokens
      • +
      • vulkan: Update the XML and headers to 1.1.96
      • +
      • anv,radv: Re-enable VK_EXT_pci_bus_info
      • +
      • anv: Bump the patch version to 96
      • +
      • nir/propagate_invariant: Skip unknown vars
      • +
      • nir/linking_helpers: Look at derefs for modes
      • +
      • nir/lower_io_arrays_to_elements: Look at derefs for modes
      • +
      • nir/lower_io_to_scalar: Look at derefs for modes
      • +
      • nir/lower_wpos_center: Look at derefs for modes
      • +
      • nir/copy_prop_vars: Get modes directly from derefs
      • +
      • nir/dead_write_vars: Get modes directly from derefs
      • +
      • radv/query: Add a nir_test_flag helper
      • +
      • radv/query: Use 1-bit booleans in query shaders
      • +
      • intel/blorp: Be more conservative about copying clear colors
      • +
      • vulkan: Update the XML and headers to 1.1.97
      • +
      • glsl_type: Support serializing 8 and 16-bit types
      • +
      • spirv: Handle any bit size in vector_insert/extract
      • +
      • anv/apply_pipeline_layout: Set the cursor in lower_res_reindex_intrinsic
      • +
      • spirv: Sign-extend array indices
      • +
      • spirv: Emit NIR deref instructions on-the-fly
      • +
      • nir/builder: Add nir_i2i and nir_u2u helpers which take a bit size
      • +
      • spirv: Handle arbitrary bit sizes for deref array indices
      • +
      • nir/validate: Require array indices to match the deref bit size
      • +
      • nir: Allow storing to shader_storage
      • +
      • nir: Distinguish between normal uniforms and UBOs
      • +
      • glsl_type: Drop the glsl_get_array_instance C helper
      • +
      • glsl_type: Add a C wrapper to get struct field offsets
      • +
      • glsl_type: Simplify glsl_channel_type
      • +
      • glsl_type: Add support for explicitly laid out matrices and arrays
      • +
      • spirv: Propagate layout decorations to created glsl_types
      • +
      • nir: Move propagation of cast derefs to a new nir_opt_deref pass
      • +
      • nir: Add a ptr_as_array deref type
      • +
      • nir/validate: Don't allow derefs in if conditions
      • +
      • nir/opt_deref: Properly optimize ptr_as_array derefs
      • +
      • nir/deref: Support casts and ptr_as_array in comparisons
      • +
      • nir/deref: Skip over casts in fixup_deref_modes
      • +
      • nir/remove_dead_variables: Properly handle deref casts
      • +
      • nir/validate: Allow derefs in phi nodes
      • +
      • nir/intrinsics: Allow deref sources to consume anything
      • +
      • nir/intrinsics: Add access flags to load/store_deref
      • +
      • nir/validate: Allow array derefs on vectors in more modes
      • +
      • nir/lower_io: Add "explicit" IO lowering
      • +
      • nir/vulkan: Add a descriptor type to vulkan resource intrinsics
      • +
      • spirv: Add error checking for Block and BufferBlock decorations
      • +
      • spirv: Choose atomic deref type with pointer_uses_ssa_offset
      • +
      • spirv: Add explicit pointer types
      • +
      • spirv: Make better use of vtn_pointer_uses_ssa_offset
      • +
      • spirv: Add support for using derefs for UBO/SSBO access
      • +
      • anv: Enable the new deref-based UBO/SSBO path
      • +
      • spirv: Sort supported capabilities
      • +
      • anv: Sort properties and features switch statements
      • +
      • nir: Add some more int64 lowering helpers
      • +
      • anv/pipeline: Constant fold after apply_pipeline_layout
      • +
      • anv/pipeline: Move wpos and input attachment lowering to lower_nir
      • +
      • compiler/types: Serialize/deserialize subpass input types correctly
      • +
      • anv/pipeline: Hash shader modules and spec constants separately
      • +
      • anv/pipeline_cache: Add support for caching NIR
      • +
      • anv/pipeline: Cache the pre-lowered NIR
      • +
      • intel/peephole_ffma: Fix swizzle propagation
      • +
      • spirv: Whack sampler/image pointers to uniform
      • +
      • spirv: Contain the GLSLang issue #179 workaround to old GLSLang
      • +
      • intel/nir: Call nir_opt_deref in brw_nir_optimize
      • +
      • nir/gcm: Support deref instructions
      • +
      • spirv: Emit switch conditions on-the-fly
      • +
      • intel/blorp: Add two more filter modes
      • +
      • anv: Rename has_resolve to has_color_resolve
      • +
      • anv/blorp: Refactor MSAA resolves into an exportable helper function
      • +
      • anv: Move resolve_subpass to genX_cmd_buffer.c
      • +
      • anv: Implement VK_KHR_depth_stencil_resolve
      • +
      • nir: Add a bool to float32 lowering pass
      • +
      • intel/eu: Stop overriding exec sizes in send_indirect_message
      • +
      • intel/fs: Don't touch accumulator destination while applying regioning alignment rule
      • +
      • anv: Re-sort the extensions list
      • +
      • anv: Only parse pImmutableSamplers if the descriptor has samplers
      • +
      • relnotes: Add newly added Vulkan extensions
      • +
      • anv/pipeline: Add a pdevice helper variable
      • +
      • nir: Mark deref UBO and SSBO access as non-scalar
      • +
      • spirv: Update the JSON and headers from Khronos master
      • +
      • anv: Always emit at least one vertex element
      • +
      • spirv: Initialize struct member offsets to -1
      • +
      • spirv: Only split blocks
      • +
      • spirv: Only set interface_type on blocks
      • +
      • nir: Preserve offsets in lower_io_to_scalar_early
      • +
      • nir/xfb: Fix offset accounting for dvec3/4
      • +
      • nir/xfb: Properly handle arrays of blocks
      • +
      • anv: Add but do not enable VK_EXT_transform_feedback
      • +
      • anv: Add pipeline cache support for xfb_info
      • +
      • anv: Implement the basic form of VK_EXT_transform_feedback
      • +
      • anv: Implement vkCmdDrawIndirectByteCountEXT
      • +
      • anv: Implement CmdBegin/EndQueryIndexed
      • +
      • genxml: Add SO_PRIM_STORAGE_NEEDED and SO_NUM_PRIMS_WRITTEN
      • +
      • anv: Implement transform feedback queries
      • +
      • nir: Add load/store/atomic global intrinsics
      • +
      • nir/lower_io: Add a 32 and 64-bit global address formats
      • +
      • nir/lower_io: Add support for nir_var_mem_global
      • +
      • nir/validate: Allow array derefs of vectors for nir_var_mem_global
      • +
      • nir: Allow SSBOs and global to alias
      • +
      • spirv: Drop a bogus assert
      • +
      • spirv: Handle OpTypeForwardPointer
      • +
      • spirv: Implement OpConvertPtrToU and OpConvertUToPtr
      • +
      • spirv: Add support for SPV_EXT_physical_storage_buffer
      • +
      • intel/fs: Get rid of fs_inst::equals
      • +
      • intel/defines: Explicitly cast to uint32_t in SET_FIELD and SET_BITS
      • +
      • intel/fs: Handle IMAGE_SIZE in size_read() and is_send_from_grf()
      • +
      • intel/fs: Take an explicit exec size in brw_surface_payload_size()
      • +
      • intel/eu: Add has_simd4x2 bools to surface_write functions
      • +
      • intel/eu: Rework surface descriptor helpers
      • +
      • intel/fs: Add a generic SEND opcode
      • +
      • intel/fs: Use SHADER_OPCODE_SEND for surface messages
      • +
      • intel/fs: Use a logical opcode for IMAGE_SIZE
      • +
      • intel/fs: Use SHADER_OPCODE_SEND for texturing on gen7+
      • +
      • intel/fs: Use SHADER_OPCODE_SEND for varying UBO pulls on gen7+
      • +
      • intel/eu: Use GET_BITS in brw_inst_set_send_ex_desc
      • +
      • intel/eu/validate: SEND restrictions also apply to SENDC
      • +
      • intel/eu: Add more message descriptor helpers
      • +
      • intel/disasm: Rework SEND decoding to use descriptors
      • +
      • intel/inst: Fix the ia16_addr_imm helpers
      • +
      • intel/inst: Indent some code
      • +
      • intel/eu: Add support for the SENDS[C] messages
      • +
      • intel/disasm: Properly disassemble split sends
      • +
      • intel/fs: Support SENDS in SHADER_OPCODE_SEND
      • +
      • intel/fs: Add interference between SENDS sources
      • +
      • intel/fs: Use split sends for surface writes on gen9+
      • +
      • intel/fs: Do the grf127 hack on SIMD8 instructions in SIMD16 mode
      • +
      • nir/deref: Rematerialize parents in rematerialize_derefs_in_use_blocks
      • +
      • intel/fs: Bail in optimize_extract_to_float if we have modifiers
      • +
      • compiler/types: Add a contains_64bit helper
      • +
      • nir/xfb: Properly align 64-bit values
      • +
      • nir: Rewrite lower_clip_cull_distance_arrays to do a lot less lowering
      • +
      • nir/xfb: Work in terms of components rather than slots
      • +
      • nir/xfb: Handle compact arrays in gather_xfb_info
      • +
      • nir/lower_clip_cull: Fix an incorrect assert
      • +
      • anv: Count surfaces for non-YCbCr images in GetDescriptorSetLayoutSupport
      • +
      • spirv: OpImageQueryLod requires a sampler
      • +
      • intel,nir: Lower TXD with min_lod when the sampler index is not < 16
      • +
      • spirv: Pull offset/stride from the pointer for OpArrayLength
      • +
      • anv: Refactor descriptor pushing a bit
      • +
      • anv: Take references to push descriptor set layouts
      • +
      • nir: Add a pass for lowering IO back to vector when possible
      • +
      • intel/nir: Vectorize all IO
      • +
      + +

      Jiang, Sonny (1):

      +
        +
      • radeonsi: add compute_last_block to configure the partial block fields
      • +
      + +

      Jon Turney (3):

      +
        +
      • glx: Fix compilation with GLX_USE_WINDOWSGL
      • +
      • appveyor: put build steps in a script, rather than inline in appveyor.yml
      • +
      • appveyor: Add a Cygwin build script
      • +
      + +

      Jonathan Marek (42):

      +
        +
      • nir: add fceil lowering
      • +
      • freedreno: a2xx: fd2_draw update
      • +
      • freedreno/a2xx: fix POINT_MINMAX_MAX overflow
      • +
      • freedreno: add missing a20x ids
      • +
      • freedreno/a2xx: set VIZ_QUERY_ID on a20x
      • +
      • freedreno/a2xx: Compute depth base in gmem correctly
      • +
      • freedreno: a2xx texture update
      • +
      • freedreno: use GENERIC instead of TEXCOORD for blit program
      • +
      • freedreno: use MSM_BO_SCANOUT with scanout buffers
      • +
      • glsl/nir: int constants as float for native_integers=false
      • +
      • glsl/nir: ftrunc for native_integers=false float to int cast
      • +
      • glsl/nir: keep bool types when native_integers=false
      • +
      • freedreno: a2xx: cleanup init_shader_const
      • +
      • freedreno: a2xx: cleanup REG_A2XX_PA_CL_VTE_CNTL
      • +
      • freedreno: a2xx: fix gmem2mem viewport
      • +
      • freedreno: a2xx: fix VERTEX_REUSE/DEALLOC on a20x
      • +
      • freedreno: a2xx: fix non-zero texture base offsets
      • +
      • freedreno: a2xx: sysmem rendering
      • +
      • freedreno: a2xx: NIR backend
      • +
      • freedreno: a2xx: insert scalar MOV to allow 2 source scalar
      • +
      • freedreno: a2xx: add ir2 copy propagation
      • +
      • freedreno: a2xx: add partial lower_scalar pass for ir2
      • +
      • freedreno: add renderonly scanout
      • +
      • freedreno: a2xx: ir2 cleanup
      • +
      • freedreno: a2xx: enable early-Z testing
      • +
      • freedreno: update a2xx registers
      • +
      • freedreno: a2xx: a20x hw binning
      • +
      • freedreno: a2xx: clear fixes and fast clear path
      • +
      • freedreno: a2xx: minor solid_vertexbuf fixups
      • +
      • freedreno: a2xx: add perfcntrs
      • +
      • kmsro: Add freedreno renderonly support
      • +
      • st/dri: invalidate_resource depth/stencil before flush_resource
      • +
      • mesa/st: wire up DiscardFramebuffer
      • +
      • freedreno: fix invalidate logic
      • +
      • freedreno: fix depth usage logic
      • +
      • freedreno: fix sysmem rendering being used when clear is used
      • +
      • freedreno: a2xx: fix fast clear
      • +
      • freedreno: a2xx: don't write 4th vertex in mem2gmem
      • +
      • freedreno: a2xx: add use_hw_binning function
      • +
      • freedreno: a2xx: fix fast clear for some gmem configurations
      • +
      • freedreno: a2xx: fix mipmapping for NPOT textures
      • +
      • freedreno: use renderonly path for buffers allocated with modifiers
      • +
      + +

      Jordan Justen (3):

      +
        +
      • docs: Document GitLab merge request process (email alternative)
      • +
      • i965/genX_state: Add register access functions
      • +
      • i965/compute: Emit GPGPU_WALKER in genX_state_upload
      • +
      + +

      Jose Maria Casanova Crespo (1):

      +
        +
      • glsl: TCS outputs can not be transform feedback candidates on GLES
      • +
      + +

      José Fonseca (2):

      +
        +
      • appveyor: Revert commits adding Cygwin support.
      • +
      • scons: Workaround failures with MSVC when using SCons 3.0.[2-4].
      • +
      + +

      Juan A. Suarez Romero (17):

      +
        +
      • docs: add release notes for 18.2.5
      • +
      • docs: add sha256 checksums for 18.2.5
      • +
      • docs: update calendar, add news item and link release notes for 18.2.5
      • +
      • docs: add release notes for 18.2.6
      • +
      • docs: add sha256 checksums for 18.2.6
      • +
      • docs: update calendar, add news item and link release notes for 18.2.6
      • +
      • docs: extends 18.2 lifecycle
      • +
      • docs: add release notes for 18.2.7
      • +
      • docs: add sha256 checksums for 18.2.7
      • +
      • docs: update calendar, add news item and link release notes for 18.2.7
      • +
      • docs: add release notes for 18.2.8
      • +
      • docs: add sha256 checksums for 18.2.8
      • +
      • docs: update calendar, add news item and link release notes for 18.2.8
      • +
      • anv/cmd_buffer: check for NULL framebuffer
      • +
      • genxml: add missing field values for 3DSTATE_SF
      • +
      • anv: advertise 8 subpixel precision bits
      • +
      • anv: destroy descriptor sets when pool gets reset
      • +
      + +

      Józef Kucia (1):

      +
        +
      • nir: Fix assert in print_intrinsic_instr().
      • +
      + +

      Karol Herbst (35):

      +
        +
      • nv50/ir: print color masks of tex instructions
      • +
      • nv50/ra: add condenseDef overloads for partial condenses
      • +
      • nv50/ir: add scalar field to TexInstructions
      • +
      • gm107/ir: use scalar tex instructions where possible
      • +
      • gm107/ir: fix compile time warning in getTEXSMask
      • +
      • nir: add const_index parameters to system value builder function
      • +
      • nir: replace nir_load_system_value calls with appropiate builder functions
      • +
      • nir/spirv: cast shift operand to u32
      • +
      • nv50,nvc0: Fix gallium nine regression regarding sampler bindings
      • +
      • nv50/ir: initialize relDegree staticly
      • +
      • nouveau: use atomic operations for driver statistics
      • +
      • nv50/ir: fix use-after-free in ConstantFolding::visit
      • +
      • nir: rename global/local to private/function memory
      • +
      • nv50/ir: disable tryCollapseChainedMULs in ConstantFolding for precise instructions
      • +
      • gm107/ir: disable TEXS for tex with derivAll set
      • +
      • nir: rename nir_var_private to nir_var_shader_temp
      • +
      • nir: rename nir_var_function to nir_var_function_temp
      • +
      • nir: rename nir_var_ubo to nir_var_mem_ubo
      • +
      • nir: rename nir_var_ssbo to nir_var_mem_ssbo
      • +
      • nir: rename nir_var_shared to nir_var_mem_shared
      • +
      • nir/spirv: handle SpvStorageClassCrossWorkgroup
      • +
      • glsl/lower_output_reads: set invariant and precise flags on temporaries
      • +
      • nir: replace more nir_load_system_value calls with builder functions
      • +
      • nir/validate: allow to check against a bitmask of bit_sizes
      • +
      • nir: add legal bit_sizes to intrinsics
      • +
      • nir: add bit_size parameter to system values with multiple allowed bit sizes
      • +
      • mesa: add MESA_SHADER_KERNEL
      • +
      • vtn: handle SpvExecutionModelKernel
      • +
      • nir/spirv: handle ContractionOff execution mode
      • +
      • gk104/ir: Use the new rcp/rsq in library
      • +
      • gm107/ir: add fp64 rcp
      • +
      • gm107/ir: add fp64 rsq
      • +
      • gallium: add PIPE_CAP_MAX_VARYINGS
      • +
      • st/mesa: require RGBA2, RGB4, and RGBA4 to be renderable
      • +
      • nir/opt_if: don't mark progress if nothing changes
      • +
      + +

      Kenneth Graunke (41):

      +
        +
      • intel: Use a URB start offset of 0 for disabled stages.
      • +
      • st/mesa: Pull nir_lower_wpos_ytransform work into a helper function.
      • +
      • st/nir: Drop unused parameter from st_nir_assign_uniform_locations().
      • +
      • st/mesa: Don't record garbage streamout information in the non-SSO case.
      • +
      • i915: Delete swizzling detection logic.
      • +
      • nir: Use nir_shader_get_entrypoint in nir_lower_clip_vs().
      • +
      • nir: Inline lower_clip_vs() into nir_lower_clip_vs().
      • +
      • nir: Save nir_variable pointers in nir_lower_clip_vs rather than locs.
      • +
      • nir: Make nir_lower_clip_vs optionally work with variables.
      • +
      • i965: Allow only one slot of clip distances to be set on Gen4-5.
      • +
      • i965: Use a 'nir' temporary rather than poking at brw_program
      • +
      • i965: Do NIR shader cloning in the caller.
      • +
      • intel/compiler: Use nir's info when checking uses_streams.
      • +
      • intel/blorp: Expand blorp_address::offset to be 64 bits.
      • +
      • i965: Delete dead brw_meta_resolve_color prototype.
      • +
      • i965: Flip arguments to load_register_reg helpers.
      • +
      • genxml: Consistently use a numeric "MOCS" field
      • +
      • i965: Don't override subslice count to 4 on Gen11.
      • +
      • st/mesa: Drop dead 'passthrough_fs' field.
      • +
      • st/mesa: Drop !passColor optimization in drawpixels shaders.
      • +
      • st/mesa: Don't open code the drawpixels vertex shader.
      • +
      • st/mesa: Combine the DrawPixels and Bitmap passthrough VS programs.
      • +
      • st/nir: Gather info after applying lowering FS variant features
      • +
      • st/nir: Drop unused gl_program parameter in VS input handling helper.
      • +
      • nir: Fix gl_nir_lower_samplers_as_deref's structure type handling.
      • +
      • nir: Make gl_nir_lower_samplers use gl_nir_lower_samplers_as_deref
      • +
      • blorp: Add blorp_get_surface_address to the driver interface.
      • +
      • blorp: Pass the batch to lookup/upload_shader instead of context
      • +
      • nir: Allow a non-existent sampler deref in nir_lower_samplers_as_deref
      • +
      • st/nir: Lower TES gl_PatchVerticesIn to a constant if linked with a TCS.
      • +
      • i965: Drop mark_surface_used mechanism.
      • +
      • st/mesa: Make an enum for pipeline statistics query result indices.
      • +
      • st/mesa: Rearrange PIPE_QUERY_PIPELINE_STATISTICS result fetching.
      • +
      • gallium: Add the ability to query a single pipeline statistics counter
      • +
      • st/mesa: Optionally override RGB/RGBX dst alpha blend factors
      • +
      • gallium: Add forgotten docs for PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS.
      • +
      • st/mesa: Limit GL_MAX_[NATIVE_]PROGRAM_PARAMETERS_ARB to 2048
      • +
      • anv: Put MOCS in the correct location
      • +
      • nir: Don't reassociate add/mul chains containing only constants
      • +
      • compiler: Mark clip/cull distance arrays as compact before lowering.
      • +
      • spirv: Eliminate dead input/output variables after translation.
      • +
      + +

      Kirill Burtsev (1):

      +
        +
      • loader: free error state, when checking the drawable type
      • +
      + +

      Kristian H. Kristensen (14):

      +
        +
      • freedreno/a6xx: Clear z32 and separate stencil with blitter
      • +
      • freedreno/a6xx: Move restore blits to IB
      • +
      • freedreno/a6xx: Move resolve blits to an IB
      • +
      • freedreno/a6xx: Clear gmem buffers at flush time
      • +
      • gallium: Android build fixes
      • +
      • mesa: Add core support for EXT_multisampled_render_to_texture{,2}
      • +
      • gallium: Add new PIPE_CAP_SURFACE_SAMPLE_COUNT
      • +
      • st/mesa: Add support for EXT_multisampled_render_to_texture
      • +
      • freedreno: Add support for EXT_multisampled_render_to_texture
      • +
      • freedreno: Fix the Makefile.am fix
      • +
      • glapi: fixup EXT_multisampled_render_to_texture dispatch
      • +
      • freedreno: Synchronize batch and flush for staging resource
      • +
      • freedreno/a6xx: Turn on texture tiling by default
      • +
      • freedreno/a6xx: Emit blitter dst with OUT_RELOCW
      • +
      + +

      Leo Liu (2):

      +
        +
      • st/va: fix the incorrect max profiles report
      • +
      • st/va/vp9: set max reference as default of VP9 reference number
      • +
      + +

      Lionel Landwerlin (47):

      +
        +
      • intel/dump_gpu: add missing gdb option
      • +
      • intel/sanitize_gpu: add help/gdb options to wrapper
      • +
      • intel/sanitize_gpu: deal with non page multiple buffer sizes
      • +
      • intel/sanitize_gpu: add debug message on mmap fail
      • +
      • intel/decoders: fix instruction base address parsing
      • +
      • anv: stub internal android code
      • +
      • anv/android: mark gralloc allocated BOs as external
      • +
      • intel/dump_gpu: move output option together
      • +
      • intel/dump_gpu: add platform option
      • +
      • intel/aub_read: remove useless breaks
      • +
      • nir/lower_tex: add alpha channel parameter for yuv lowering
      • +
      • nir/lower_tex: Add AYUV lowering support
      • +
      • dri: add AYUV format
      • +
      • i965: add support for sampling from AYUV
      • +
      • anv: simplify internal address offset
      • +
      • anv/image: remove unused parameter
      • +
      • anv/lower_ycbcr: make sure to set 0s on all components
      • +
      • anv: associate vulkan formats with aspects
      • +
      • anv: use image aspects rather than computed ones
      • +
      • anv: move helper function internally
      • +
      • egl/dri: fix error value with unknown drm format
      • +
      • intel/decoders: read ring buffer length
      • +
      • intel/aubinator: fix ring buffer pointer
      • +
      • intel/aub_viewer: fix dynamic state printing
      • +
      • intel/aub_viewer: Print blend states properly
      • +
      • anv: flush pipeline before query result copies
      • +
      • anv/query: flush render target before copying results
      • +
      • anv: don't do partial resolve on layer > 0
      • +
      • intel/aub_viewer: fix shader get_bo
      • +
      • intel/aub_viewer: fixup 0x address prefix
      • +
      • intel/aub_viewer: print address of missing shader
      • +
      • intel/aub_viewer: fix shader view
      • +
      • intel/aub_viewer: fold binding/sampler table items
      • +
      • intel/aub_viewer: highlight true booleans
      • +
      • i965: limit VF caching workaround to gen8/9/10
      • +
      • intel/blorp: emit VF caching workaround before 3DSTATE_VERTEX_BUFFERS
      • +
      • i965: include draw_params/derived_draw_params for VF cache workaround
      • +
      • i965: add CS stall on VF invalidation workaround
      • +
      • anv: explictly specify format for blorp ccs/mcs op
      • +
      • anv: flush fast clear colors into compressed surfaces
      • +
      • anv: fix invalid binding table index computation
      • +
      • anv: narrow flushing of the render target to buffer writes
      • +
      • anv: document cache flushes & invalidations
      • +
      • intel/genxml: add missing MI_PREDICATE compare operations
      • +
      • vulkan: make generated enum to strings helpers available from c++
      • +
      • intel: fix urb size for CFL GT1
      • +
      • intel/compiler: use correct swizzle for replacement
      • +
      + +

      Lucas Stach (6):

      +
        +
      • etnaviv: use dummy RT buffer when rendering without color buffer
      • +
      • etnaviv: use surface format directly
      • +
      • st/dri: allow both render and sampler compatible dma-buf formats
      • +
      • st/dri: replace format conversion functions with single mapping table
      • +
      • etnaviv: enable full overwrite in a few more cases
      • +
      • etnaviv: annotate variables only used in debug build
      • +
      + +

      Marek Olšák (94):

      +
        +
      • st/va: fix incorrect use of resource_destroy
      • +
      • ac/surface: remove the overallocation workaround for Vega12
      • +
      • radeonsi: use better DCC clear codes
      • +
      • radeonsi: don't set the CB clear color registers for 0/1 clear colors on Raven2
      • +
      • gallium: add PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET
      • +
      • radeonsi: stop command submission with PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET only
      • +
      • st/mesa: disable L3 thread pinning
      • +
      • mesa: mark GL_SR8_EXT non-renderable on GLES
      • +
      • radeonsi: fix video APIs on Raven2
      • +
      • gallium/u_tests: add a compute shader test that clears an image
      • +
      • gallium/u_tests: fix MSVC build by using old-style zero initializers
      • +
      • mesa/glthread: pass the function name to _mesa_glthread_restore_dispatch
      • +
      • mesa/glthread: enable immediate mode
      • +
      • drirc: enable glthread for Talos Principle
      • +
      • st/mesa: regularly re-pin driver threads to the CCX where the app thread is
      • +
      • st/mesa: pin driver threads to a fixed CCX when glthread is enabled
      • +
      • radeonsi: don't send data after write-confirm with BOTTOM_OF_PIPE_TS
      • +
      • radeonsi: go back to using bottom-of-pipe for beginning of TIME_ELAPSED
      • +
      • winsys/amdgpu: fix a buffer leak in amdgpu_bo_from_handle
      • +
      • winsys/amdgpu: fix a device handle leak in amdgpu_winsys_create
      • +
      • radeonsi: clean up primitive binning enablement
      • +
      • radeonsi: use structured buffer intrinsics for image views
      • +
      • radeonsi: fix is_oneway_access_only for image stores
      • +
      • radeonsi: small cleanup for memory opcodes
      • +
      • tgsi/scan: add more information about bindless usage
      • +
      • radeonsi/nir: parse more information about bindless usage
      • +
      • radeonsi: fix is_oneway_access_only for bindless images
      • +
      • winsys/amdgpu: always reclaim/release slabs if there is not enough memory
      • +
      • radeonsi: generalize the slab allocator code to allow layered slab allocators
      • +
      • winsys/amdgpu: optimize slab allocation for 2 MB amdgpu page tables
      • +
      • winsys/amdgpu: clean up code around BO VM alignment
      • +
      • winsys/amdgpu: use >= instead of > for VM address alignment
      • +
      • winsys/amdgpu: increase the VM alignment to the MSB of the size for Gfx9
      • +
      • winsys/amdgpu: overallocate buffers for faster address translation on Gfx9
      • +
      • winsys/amdgpu,radeon: pass vm_alignment to buffer_from_handle
      • +
      • winsys/amdgpu: use optimal VM alignment for imported buffers
      • +
      • winsys/amdgpu: use optimal VM alignment for CPU allocations
      • +
      • radeonsi: allow si_cp_dma_clear_buffer to clear GDS from any IB
      • +
      • winsys/amdgpu: add support for allocating GDS and OA resources
      • +
      • radeonsi: add memory management stress tests for GDS
      • +
      • Revert "winsys/amdgpu: overallocate buffers for faster address translation on Gfx9"
      • +
      • st/mesa: expose GL_OES_texture_view
      • +
      • mesa: expose GL_EXT_texture_view as an alias of GL_OES_texture_view
      • +
      • mesa: expose EXT_texture_compression_rgtc on GLES
      • +
      • mesa: expose EXT_texture_compression_bptc in GLES
      • +
      • mesa: expose AMD_texture_texture4
      • +
      • st/mesa: expose EXT_render_snorm on GLES
      • +
      • radeonsi: don't emit redundant PKT3_NUM_INSTANCES packets
      • +
      • radeonsi: call si_fix_resource_usage for the GS copy shader as well
      • +
      • radeonsi: make si_cp_wait_mem more configurable
      • +
      • radeonsi: use u_decomposed_prims_for_vertices instead of u_prims_for_vertices
      • +
      • radeonsi: remove unused variables in si_insert_input_ptr
      • +
      • radeonsi: always unmap texture CPU mappings on 32-bit CPU architectures
      • +
      • ac: remove unused variable from ac_build_ddxy
      • +
      • st/mesa: unify window-system renderbuffer initialization
      • +
      • st/mesa: don't reference pipe_surface locally in PBO code
      • +
      • st/mesa: don't leak pipe_surface if pipe_context is not current
      • +
      • st/dri: fix dri2_format_table for argb1555 and rgb565
      • +
      • radeonsi: also apply the GS hang workaround to draws without tessellation
      • +
      • winsys/amdgpu: fix whitespace
      • +
      • winsys/amdgpu: use the new BO list API
      • +
      • radeonsi: fix a u_blitter crash after a shader with FBFETCH
      • +
      • radeonsi: fix rendering to tiny viewports where the viewport center is > 8K
      • +
      • radeonsi: use buffer_store_format_x & xy
      • +
      • radeonsi: remove redundant call to emit_cache_flush in compute clear/copy
      • +
      • radeonsi: compile clear and copy buffer compute shaders on demand
      • +
      • radeonsi: correct WRITE_DATA.DST_SEL definitions
      • +
      • radeonsi: fix the top-of-pipe fence on SI
      • +
      • radeonsi: don't use WRITE_DATA.DST_SEL == MEM_GRBM on >= CIK
      • +
      • radeonsi: move PKT3_WRITE_DATA generation into a helper function
      • +
      • gallium: add SINT formats to have exact counterparts to SNORM formats
      • +
      • gallium/util: add util_format_snorm8_to_sint8 (from radeonsi)
      • +
      • radeonsi: disable render cond & pipeline stats for internal compute dispatches
      • +
      • radeonsi: rename rscreen -> sscreen
      • +
      • radeonsi: rename rview -> sview
      • +
      • winsys/amdgpu: rename rfence, rsrc, rdst -> afence, asrc, adst
      • +
      • radeonsi: remove r600 from comments
      • +
      • radeonsi: rename r600_resource -> si_resource
      • +
      • radeonsi: rename rquery -> squery
      • +
      • radeonsi: rename rsrc -> ssrc, rdst -> sdst
      • +
      • radeonsi: rename rbo, rbuffer to buf or buffer
      • +
      • radeonsi: rename rfence -> sfence
      • +
      • st/mesa: purge framebuffers when unbinding a context
      • +
      • st/mesa: fix PRIMITIVES_GENERATED query after the "pipeline stat single" changes
      • +
      • ac: use the correct LLVM processor name on Raven2
      • +
      • radeonsi: fix crashing performance counters (division by zero)
      • +
      • meson: drop the xcb-xrandr version requirement
      • +
      • gallium/u_threaded: fix EXPLICIT_FLUSH for flush offsets > 0
      • +
      • radeonsi: fix EXPLICIT_FLUSH for flush offsets > 0
      • +
      • winsys/amdgpu: don't drop manually added fence dependencies
      • +
      • radeonsi: add driconf option radeonsi_enable_nir
      • +
      • radeonsi: always enable NIR for Civilization 6 to fix corruption
      • +
      • driconf: add Civ6Sub executable for Civilization 6
      • +
      • tgsi: don't set tgsi_info::uses_bindless_images for constbufs and hw atomics
      • +
      + +

      Mario Kleiner (4):

      +
        +
      • radeonsi: Fix use of 1- or 2- component GL_DOUBLE vbo's.
      • +
      • egl/wayland: Allow client->server format conversion for PRIME offload. (v2)
      • +
      • egl/wayland-drm: Only announce formats via wl_drm which the driver supports.
      • +
      • drirc: Add sddm-greeter to adaptive_sync blacklist.
      • +
      + +

      Mark Janes (3):

      +
        +
      • Revert "i965/batch: avoid reverting batch buffer if saved state is an empty"
      • +
      • Revert "Implementation of egl dri2 drivers for MESA_query_driver"
      • +
      • Revert "Implement EGL API for MESA_query_driver"
      • +
      + +

      Mathias Fröhlich (17):

      +
        +
      • mesa: Remove needless indirection in some draw functions.
      • +
      • mesa: Rename gl_vertex_array_object::_Enabled -> Enabled.
      • +
      • mesa: Use the gl_vertex_array_object::Enabled bitfield.
      • +
      • mesa: Use gl_vertex_array_object::Enabled for glGet.
      • +
      • mesa: Remove gl_array_attributes::Enabled.
      • +
      • mesa: Work with bitmasks when en/dis-abling VAO arrays.
      • +
      • mesa: Unify glEdgeFlagPointer data type.
      • +
      • nouveau: Use gl_array_attribute::_ElementSize.
      • +
      • tnl: Use gl_array_attribute::_ElementSize.
      • +
      • mesa: Factor out struct gl_vertex_format.
      • +
      • mesa: Remove unneeded bitfield widths from the VAO.
      • +
      • mesa/st: Only care about the uploader if it was used.
      • +
      • mesa/st: Only unmap the uploader that was actually used.
      • +
      • mesa/st: Factor out array and buffer setup from st_atom_array.c.
      • +
      • mesa/st: Avoid extra references in the feedback draw function scope.
      • +
      • mesa/st: Use binding information from the VAO in feedback rendering.
      • +
      • mesa/st: Make st_pipe_vertex_format static.
      • +
      + +

      Matt Turner (41):

      +
        +
      • util/ralloc: Switch from DEBUG to NDEBUG
      • +
      • util/ralloc: Make sizeof(linear_header) a multiple of 8
      • +
      • nir: Call fflush() at the end of nir_print_shader()
      • +
      • glsl: Remove unused member variable
      • +
      • gallivm: Use nextafterf(0.5, 0.0) as rounding constant
      • +
      • mesa: Revert INTEL_fragment_shader_ordering support
      • +
      • Revert "st/mesa: silenced unhanded enum warning in st_glsl_to_tgsi.cpp"
      • +
      • i965/fs: Handle V/UV immediates in dump_instructions()
      • +
      • glsl: Add function support to glsl_to_nir
      • +
      • glsl: Create file to contain software fp64 functions
      • +
      • glsl: Add "built-in" functions to do ffma(fp64)
      • +
      • glsl: Add "built-in" functions to do fmin/fmax(fp64)
      • +
      • glsl: Add "built-in" function to do ffloor(fp64)
      • +
      • glsl: Add "built-in" functions to do ffract(fp64)
      • +
      • glsl: Add "built-in" functions to convert bool to double
      • +
      • nir: Rework nir_lower_constant_initializers() to handle functions
      • +
      • nir: Tag entrypoint for easy recognition by nir_shader_get_entrypoint()
      • +
      • nir: Wire up int64 lowering functions
      • +
      • nir: Implement lowering of 64-bit shift operations
      • +
      • nir: Add and set info::uses_64bit
      • +
      • nir: Create nir_builder in nir_lower_doubles_impl()
      • +
      • nir: Add lowering support for 64-bit operations to software
      • +
      • nir: Unset metadata debug bit if no progress made
      • +
      • intel/compiler: Lower 64-bit MOV/SEL operations
      • +
      • intel/compiler: Split 64-bit MOV-indirects if needed
      • +
      • intel/compiler: Avoid false positive assertions
      • +
      • intel/compiler: Rearrange code to avoid future problems
      • +
      • intel/compiler: Prevent warnings in the following patch
      • +
      • intel/compiler: Expand size of the 'nr' field
      • +
      • intel/compiler: Heap-allocate temporary storage
      • +
      • i965: Compile fp64 software routines and lower double-ops
      • +
      • i965: Enable 64-bit GLSL extensions
      • +
      • i965: Compile fp64 funcs only if we do not have 64-bit hardware support
      • +
      • intel/compiler: Reset default flag register in brw_find_live_channel()
      • +
      • gallium: Enable ASIMD/NEON on aarch64.
      • +
      • gallivm: Return true from arch_rounding_available() if NEON is available
      • +
      • intel/compiler: Add a file-level description of brw_eu_validate.c
      • +
      • i965: Always compile fp64 funcs when needed
      • +
      • nir: Optimize double-precision lower_round_even()
      • +
      • intel/compiler: Avoid propagating inequality cmods if types are different
      • +
      • intel/compiler/test: Add unit test for mismatched signedness comparison
      • +
      + +

      Mauro Rossi (6):

      +
        +
      • android: gallium/auxiliary: add include to get u_debug.h header
      • +
      • android: radv: add libmesa_git_sha1 static dependency
      • +
      • android: amd/addrlib: update Mesa's copy of addrlib
      • +
      • android: st/mesa: fix building error due to sched_getcpu()
      • +
      • android: anv: fix generated files depedencies (v2)
      • +
      • android: anv: fix libexpat shared dependency
      • +
      + +

      Maya Rashish (2):

      +
        +
      • radeon: fix printf format specifier.
      • +
      • configure: fix test portability
      • +
      + +

      Michal Srb (2):

      +
        +
      • gallium: Constify drisw_loader_funcs struct
      • +
      • drisw: Use separate drisw_loader_funcs for shm
      • +
      + +

      Michel Dänzer (4):

      +
        +
      • winsys/amdgpu: Stop using amdgpu_bo_handle_type_kms_noimport
      • +
      • winsys/amdgpu: Pull in LLVM CFLAGS
      • +
      • amd/common: Restore v4i32 suffix for llvm.SI.load.const intrinsic
      • +
      • loader/dri3: Use strlen instead of sizeof for creating VRR property atom
      • +
      + +

      Neha Bhende (1):

      +
        +
      • st/mesa: Fix topogun-1.06-orc-84k-resize.trace crash
      • +
      + +

      Neil Roberts (4):

      +
        +
      • freedreno: Add .dir-locals to the common directory
      • +
      • spirv/nir: handle location decorations on block interface members
      • +
      • glsl_types: Rename parameter of glsl_count_attribute_slots
      • +
      • spirv: Don't use special semantics when counting vertex attribute size
      • +
      + +

      Nicholas Kazlauskas (5):

      +
        +
      • util: Get program name based on path when possible
      • +
      • util: Add adaptive_sync driconf option
      • +
      • drirc: Initial blacklist for adaptive sync
      • +
      • loader/dri3: Enable adaptive_sync via _VARIABLE_REFRESH property
      • +
      • radeonsi: Enable adaptive_sync by default for radeon
      • +
      + +

      Nicolai Hähnle (37):

      +
        +
      • radv: include LLVM IR in the VK_AMD_shader_info "disassembly"
      • +
      • radeonsi: fix an out-of-bounds read reported by ASAN
      • +
      • winsys/amdgpu: add amdgpu_winsys_bo::lock
      • +
      • winsys/amdgpu: explicitly declare whether buffer_map is permanent or not
      • +
      • egl/wayland: rather obvious build fix
      • +
      • radv: remove dependency on addrlib gfx9_enum.h
      • +
      • ac/surface/gfx9: let addrlib choose the preferred swizzle kind
      • +
      • amd/addrlib: update Mesa's copy of addrlib
      • +
      • meson: link LLVM 'native' component when LLVM is available
      • +
      • ddebug: simplify watchdog loop and fix crash in the no-timeout case
      • +
      • ddebug: always flush when requested, even when hang detection is disabled
      • +
      • r600: remove redundant semicolon
      • +
      • amd/sid_tables: add additional python3 compatibility imports
      • +
      • amd/common: whitespace fixes
      • +
      • amd/common: add ac_build_ifcc
      • +
      • amd/common: scan/reduce across waves of a workgroup
      • +
      • amd/common: add i1 special case to ac_build_{inclusive,exclusive}_scan
      • +
      • ac/surface: 3D and cube surfaces are never displayable
      • +
      • radeonsi: move SI_FORCE_FAMILY functionality to winsys
      • +
      • radeonsi: extract declare_vs_blit_inputs
      • +
      • radeonsi: add si_init_draw_functions and make some functions static
      • +
      • radeonsi/gfx9: use SET_UCONFIG_REG_INDEX packets when available
      • +
      • radeonsi: don't set RAW_WAIT for CP DMA clears
      • +
      • radeonsi: rename SI_RESOURCE_FLAG_FORCE_TILING to clarify its purpose
      • +
      • radeonsi: const-ify si_set_tesseval_regs
      • +
      • radeonsi: show the fixed function TCS in debug dumps
      • +
      • radeonsi: avoid using hard-coded SI_NUM_RW_BUFFERS
      • +
      • radeonsi: add an si_set_rw_shader_buffer convenience function
      • +
      • radeonsi: use si_set_rw_shader_buffer for setting streamout buffers
      • +
      • radeonsi: track constant buffer bind history in si_pipe_set_constant_buffer
      • +
      • radeonsi: move remaining perfcounter code into si_perfcounter.c
      • +
      • radeonsi: move query suspend logic into the top-level si_query struct
      • +
      • radeonsi: factor si_query_buffer logic out of si_query_hw
      • +
      • radeonsi: split perfcounter queries from si_query_hw
      • +
      • radeonsi: const-ify the si_query_ops
      • +
      • amd/common: use llvm.amdgcn.s.buffer.load for LLVM 8.0
      • +
      • amd/common/vi+: enable SMEM loads with GLC=1
      • +
      + +

      Niklas Haas (3):

      +
        +
      • glsl: fix block member alignment validation for vec3
      • +
      • radv: correctly use vulkan 1.0 by default
      • +
      • radv: add device->instance extension dependencies
      • +
      + +

      Olivier Fourdan (1):

      +
        +
      • wayland/egl: Resize EGL surface on update buffer for swrast
      • +
      + +

      Oscar Blumberg (1):

      +
        +
      • radeonsi: Fix guardband computation for large render targets
      • +
      + +

      Pierre Moreau (2):

      +
        +
      • clover/meson: Ignore 'svn' suffix when computing CLANG_RESOURCE_DIR
      • +
      • meson: Fix with_gallium_icd to with_opencl_icd
      • +
      + +

      Plamena Manolova (1):

      +
        +
      • nir: Don't lower the local work group size if it's variable.
      • +
      + +

      Rafael Antognolli (24):

      +
        +
      • intel/genxml: Add register for object preemption.
      • +
      • i965/gen10+: Enable object level preemption.
      • +
      • i965/gen9: Add workarounds for object preemption.
      • +
      • anv/tests: Fix block_pool_no_free test.
      • +
      • anv/allocator: Add anv_state_table.
      • +
      • anv/allocator: Add getter for anv_block_pool.
      • +
      • anv/allocator: Add helper to push states back to the state table.
      • +
      • anv/allocator: Use anv_state_table on anv_state_pool_alloc.
      • +
      • anv/allocator: Use anv_state_table on back_alloc too.
      • +
      • anv/allocator: Remove anv_free_list.
      • +
      • anv/allocator: Rename anv_free_list2 to anv_free_list.
      • +
      • anv/allocator: Remove pool->map.
      • +
      • anv: Update usage of block_pool->bo.
      • +
      • anv/allocator: Add support for a list of BOs in block pool.
      • +
      • anv: Split code to add BO dependencies to execbuf.
      • +
      • anv: Validate the list of BOs from the block pool.
      • +
      • anv: Remove some asserts.
      • +
      • anv/allocator: Rework chunk return to the state pool.
      • +
      • anv/allocator: Add padding information.
      • +
      • anv/allocator: Enable snooping on block pool and anv_bo_pool BOs.
      • +
      • anv: Remove state flush.
      • +
      • anv/allocator: Add support for non-userptr.
      • +
      • anv/tests: Adding test for the state_pool padding.
      • +
      • anv/allocator: Avoid race condition in anv_block_pool_map.
      • +
      + +

      Ray Zhang (1):

      +
        +
      • glx: fix shared memory leak in X11
      • +
      + +

      Rhys Kidd (5):

      +
        +
      • travis: radeonsi and radv require LLVM 7.0
      • +
      • meson: libfreedreno depends upon libdrm (for fence support)
      • +
      • v3d: Wire up core pipe_debug_callback
      • +
      • vc4: Wire up core pipe_debug_callback
      • +
      • nv50,nvc0: add missing CAPs for unsupported features
      • +
      + +

      Rhys Perry (14):

      +
        +
      • nir: fix constness in nir_intrinsic_align()
      • +
      • ac: refactor visit_load_buffer
      • +
      • ac: split 16-bit ssbo loads that may not be dword aligned
      • +
      • radv: don't set surf_index for stencil-only images
      • +
      • radv: switch from nir_bcsel to nir_b32csel
      • +
      • ac/nir,radv,radeonsi/nir: use correct indices for interpolation intrinsics
      • +
      • nir: fix copy-paste error in nir_lower_constant_initializers
      • +
      • radv: use dithered alpha-to-coverage
      • +
      • radv: pass radv_draw_info to radv_emit_draw_registers()
      • +
      • radv: add missed situations for scissor bug workaround
      • +
      • radv: avoid context rolls when binding graphics pipelines
      • +
      • radv: prevent dirtying of dynamic state when it does not change
      • +
      • radv: bitcast 16-bit outputs to integers
      • +
      • radv: ensure export arguments are always float
      • +
      + +

      Rob Clark (79):

      +
        +
      • freedreno: update generated headers
      • +
      • freedreno/a6xx: fix VSC bug with larger # of tiles
      • +
      • freedreno/drm: fix unused 'entry' warnings
      • +
      • freedreno/drm: remove dependency on gallium driver
      • +
      • freedreno: move drm to common location
      • +
      • freedreno/ir3: standalone compiler updates
      • +
      • freedreno: shader_t -> gl_shader_stage
      • +
      • freedreno: remove shader_stage_name()
      • +
      • freedreno: FD_SHADER_DEBUG -> IR3_SHADER_DEBUG
      • +
      • freedreno/ir3: move disasm and optmsgs debug flags
      • +
      • util: env_var_as_unsigned() helper
      • +
      • freedreno/ir3: use env_var_as_unsigned()
      • +
      • freedreno/ir3: some header file cleanup
      • +
      • freedreno/ir3: remove pipe_stream_output_info dependency
      • +
      • freedreno/ir3: split up ir3_shader
      • +
      • freedreno/ir3: remove u_inlines usage
      • +
      • freedreno: move ir3 to common location
      • +
      • mesa/st: swap order of clear() and clear_with_quad()
      • +
      • mesa/st: better colormask check for clear fallback
      • +
      • freedreno/a6xx: disable LRZ for z32
      • +
      • freedreno/a6xx: set guardband clip
      • +
      • freedreno: update generated headers
      • +
      • freedreno/a3xx: also set FSSUPERTHREADENABLE
      • +
      • freedreno/a6xx: MSAA
      • +
      • freedreno: remove unused fd_surface fields
      • +
      • gallium: fix typo
      • +
      • freedreno/a5xx+a6xx: remove unused fs/vs pvt mem
      • +
      • freedreno/drm: fix relocs in nested stateobjs
      • +
      • freedreno: update generated headers
      • +
      • freedreno/a6xx: blitter fixes
      • +
      • freedreno/ir3: don't fetch unused tex components
      • +
      • freedreno/ir3: sync instr/disasm
      • +
      • freedreno/ir3: code-motion
      • +
      • freedreno/ir3: track max flow control depth for a5xx/a6xx
      • +
      • freedreno/drm: fix memory leak
      • +
      • nir: fix spelling typo
      • +
      • mesa/st/nir: fix missing nir_compact_varyings
      • +
      • freedreno/drm: sync uapi and enable softpin
      • +
      • freedreno: debug GEM obj names
      • +
      • freedreno: also set DUMP flag on shaders
      • +
      • freedreno/ir3: fix crash
      • +
      • freedreno/ir3: don't remove unused input components
      • +
      • freedreno/a6xx: fix blitter crash
      • +
      • gallium/aux: add is_unorm() helper
      • +
      • freedreno: update generated headers
      • +
      • freedreno/a6xx: more blitter fixes
      • +
      • freedreno: move fd_resource_copy_region()
      • +
      • freedreno/a6xx: fix resource_copy_region()
      • +
      • freedreno/a6xx: fix corrupted uniforms
      • +
      • freedreno/ir3: fix fallout of extra assert
      • +
      • freedreno/ir3: don't treat all inputs/outputs as vec4
      • +
      • freedreno: combine fd_resource_layer_offset()/fd_resource_offset()
      • +
      • freedreno/a6xx: simplify special case for 3d layout
      • +
      • freedreno/a6xx: improve setup_slices() debug msgs
      • +
      • freedreno: update generated headers
      • +
      • freedreno/a6xx: fix 3d texture layout
      • +
      • freedreno: skip depth resolve if not written
      • +
      • freedreno: rework blit API
      • +
      • freedreno: try blitter for fd_resource_copy_region()
      • +
      • freedreno/a6xx: rework blitter API
      • +
      • freedreno: remove blit_via_copy_region()
      • +
      • freedreno: fix staging resource size for arrays
      • +
      • freedreno: make cmdstream bo's read-only to GPU
      • +
      • freedreno/a6xx: separate stencil restore/resolve fixes
      • +
      • freedreno/a6xx: move tile_mode to sampler-view CSO
      • +
      • freedreno/a6xx: fix 3d+tiled layout
      • +
      • nir/vtn: add caps for some cl related capabilities
      • +
      • loader: fix the no-modifiers case
      • +
      • freedreno: core buffer modifier support
      • +
      • freedreno: set modifier when exporting buffer
      • +
      • freedreno: limit tiling to PIPE_BIND_SAMPLER_VIEW
      • +
      • freedreno/a2xx: fix unused variable warning
      • +
      • freedreno/a5xx: fix blitter nr_samples check
      • +
      • freedreno/a6xx: fix blitter nr_samples check
      • +
      • freedreno: stop frob'ing pipe_resource::nr_samples
      • +
      • freedreno: minor cleanups
      • +
      • mesa: wire up InvalidateFramebuffer
      • +
      • freedreno: fix release tarball
      • +
      • freedreno: more fixing release tarball
      • +
      + +

      Rob Herring (3):

      +
        +
      • pipe-loader: Fallback to kmsro driver when no matching driver name found
      • +
      • kmsro: Add etnaviv renderonly support
      • +
      • Switch imx to kmsro and remove the imx winsys
      • +
      + +

      Robert Foss (3):

      +
        +
      • virgl: native fence fd support
      • +
      • virgl: Clean up fences commit
      • +
      • virgl: add assert and missing function parameter
      • +
      + +

      Rodrigo Vivi (1):

      +
        +
      • intel: Add more PCI Device IDs for Coffee Lake and Ice Lake.
      • +
      + +

      Roland Scheidegger (7):

      +
        +
      • gallivm: fix improper clamping of vertex index when fetching gs inputs
      • +
      • draw: fix infinite loop in line stippling
      • +
      • gallivm: remove unused float coord wrapping for aos sampling
      • +
      • gallivm: use llvm jit code for decoding s3tc
      • +
      • gallivm: don't use pavg.b intrinsic on llvm >= 6.0
      • +
      • gallivm: abort when trying to use non-existing intrinsic
      • +
      • Revert "llvmpipe: Always return some fence in flush (v2)"
      • +
      + +

      Sagar Ghuge (14):

      +
        +
      • intel/compiler: Disassemble GEN6_SFID_DATAPORT_SAMPLER_CACHE as dp_sampler
      • +
      • intel/compiler: Set swizzle to BRW_SWIZZLE_XXXX for scalar region
      • +
      • intel/compiler: Always print flag subregister number
      • +
      • nir: Add a new lowering option to lower 3D surfaces from txd to txl.
      • +
      • glsl: Add "built-in" functions to do uint64_to_fp64(uint64_t)
      • +
      • glsl: Add "built-in" functions to do int64_to_fp64(int64_t)
      • +
      • glsl: Add "built-in" functions to do uint64_to_fp32(uint64_t)
      • +
      • glsl: Add "built-in" functions to do int64_to_fp32(int64_t)
      • +
      • glsl: Add utility function to round and pack uint64_t value
      • +
      • glsl: Add "built-in" functions to do fp64_to_uint64(fp64)
      • +
      • glsl: Add utility function to round and pack int64_t value
      • +
      • glsl: Add "built-in" functions to do fp64_to_int64(fp64)
      • +
      • glsl: Add "built-in" functions to do fp32_to_uint64(fp32)
      • +
      • glsl: Add "built-in" functions to do fp32_to_int64(fp32)
      • +
      + +

      Samuel Pitoiset (103):

      +
        +
      • radv: remove useless sync after copying query results with compute
      • +
      • radv: add missing TFB queries support to CmdCopyQueryPoolsResults()
      • +
      • radv: replace si_emit_wait_fence() with radv_cp_wait_mem()
      • +
      • radv: more use of radv_cp_wait_mem()
      • +
      • radv: allocate enough space in CS when copying query results with compute
      • +
      • radv: disable conditional rendering for vkCmdCopyQueryPoolResults()
      • +
      • radv: only expose VK_SUBGROUP_FEATURE_ARITHMETIC_BIT for VI+
      • +
      • radv: use LOAD_CONTEXT_REG when loading fast clear values
      • +
      • radv: fix GPU hangs when loading depth/stencil clear values on SI/CIK
      • +
      • radv: cleanup and document a Hawaii bug with offchip buffers
      • +
      • radv: clean up setting partial_es_wave for distributed tess on VI
      • +
      • radv: make use of num_good_cu_per_sh in si_emit_graphics() too
      • +
      • radv: binding streamout buffers doesn't change context regs
      • +
      • radv: set PA.SC_CONSERVATIVE_RASTERIZATION.NULL_SQUAD_AA_MASK_ENABLE
      • +
      • radv: set optimal OVERWRITE_COMBINER_WATERMARK on GFX9
      • +
      • radv: add a debug option for disabling primitive binning
      • +
      • radv: enable primitive binning by default
      • +
      • radv: tidy up radv_set_dcc_need_cmask_elim_pred()
      • +
      • radv: always clear the FCE predicate after DCC/FMASK/CMASK decompressions
      • +
      • radv/winsys: remove the max IBs per submit limit for the fallback path
      • +
      • radv/winsys: remove the max IBs per submit limit for the sysmem path
      • +
      • radv: remove unnecessary goto in the fast clear paths
      • +
      • radv: add radv_get_htile_fast_clear_value() helper
      • +
      • radv: add radv_is_fast_clear_{depth,stencil}_allowed() helpers
      • +
      • radv: check allowed fast HTILE clears a bit earlier
      • +
      • radv: rewrite the condition that checks allowed depth/stencil values
      • +
      • radv: implement fast HTILE clears for depth or stencil only on GFX9
      • +
      • ac/nir: fix intrinsic name string size in visit_image_atomic()
      • +
      • radv: ignore subpass self-dependencies
      • +
      • radv: only sync CP DMA for transfer operations or bottom pipe
      • +
      • radv: remove useless sync after CmdClear{Color,DepthStencil}Image()
      • +
      • radv: remove useless sync before CmdClear{Color,DepthStencil}Image()
      • +
      • radv: ignore subpass self-dependencies for CreateRenderPass() too
      • +
      • radv: remove useless check in emit_fast_color_clear()
      • +
      • radv: add radv_image_can_fast_clear() helper
      • +
      • radv: add radv_image_view_can_fast_clear() helper
      • +
      • radv: add radv_can_fast_clear_{color,depth}() helpers
      • +
      • radv: simplify a check in emit_fast_color_clear()
      • +
      • radv: refactor the fast clear path for better re-use
      • +
      • radv: optimize CmdClear{Color,DepthStencil}Image() for layered textures
      • +
      • radv: remove unused pending_clears param in the transition path
      • +
      • radv: drop few useless state changes when doing color/depth decompressions
      • +
      • radv: rework the TC-compat HTILE hardware bug with COND_EXEC
      • +
      • radv: reset pending_reset_query when flushing caches
      • +
      • radv: wait on the high 32 bits of timestamp queries
      • +
      • spirv: add SpvCapabilityInt64Atomics
      • +
      • radv: expose VK_EXT_scalar_block_layout
      • +
      • amd: remove support for LLVM 6.0
      • +
      • gallium: add missing PIPE_CAP_SURFACE_SAMPLE_COUNT default value
      • +
      • radv: bump reported version to 1.1.90
      • +
      • radv: add a predicate for reflecting DCC decompression state
      • +
      • radv: allow to skip DCC decompressions with the new predicate
      • +
      • radv: switch on EOP when primitive restart is enabled with triangle strips
      • +
      • radv: check if addrlib enabled HTILE in radv_image_can_enable_htile()
      • +
      • radv: don't check if format is depth in radv_image_can_enable_hile()
      • +
      • radv: report Vulkan version 1.1.90 for real
      • +
      • ac/nir: remove the bitfield_extract workaround for LLVM 8
      • +
      • radv: drop the amdgpu-skip-threshold=1 workaround for LLVM 8
      • +
      • radv: fix subpass image transitions with multiviews
      • +
      • radv: compute optimal VM alignment for imported buffers
      • +
      • spirv: add support for SpvCapabilityStorageImageMultisample
      • +
      • ac/nir: restrict fmask lookup to image load intrinsics
      • +
      • radv: initialize FMASK for images in fully expanded mode
      • +
      • radv: add support for FMASK expand
      • +
      • radv: enable shaderStorageImageMultisample feature on GFX8+
      • +
      • radv: get rid of bunch of KHR suffixes
      • +
      • radv: enable variable pointers
      • +
      • radv: skip draws with instance_count == 0
      • +
      • ac/nir: add get_cache_policy() helper and use it
      • +
      • ac/nir: set cache policy when loading/storing buffer images
      • +
      • ac: add missing 16-bit types to glsl_base_to_llvm_type()
      • +
      • radv: remove unnecessary returns in GetPhysicalDevice*Properties()
      • +
      • radv: add two small helpers for getting VRAM and visible VRAM sizes
      • +
      • radv: add support for VK_EXT_memory_budget
      • +
      • ac/nir: don't trash L1 caches for store operations with writeonly memory
      • +
      • radv: drop unused code related to 16 sample locations
      • +
      • radv: reduce size of the per-queue descriptor BO
      • +
      • radv: do not write unused descriptors to the per-queue BO
      • +
      • radv: initialize the per-queue descriptor BO only once
      • +
      • nir: do not remove varyings used for transform feedback
      • +
      • nir: fix lowering arrays to elements for XFB outputs
      • +
      • radv: improve gathering of load_push_constants with dynamic bindings
      • +
      • radv: remove old_fence parameter from si_cs_emit_write_event_eop()
      • +
      • radv: only allocate the GFX9 fence and EOP BOs for the gfx queue
      • +
      • radv: compute the GFX9 fence VA at allocation time
      • +
      • radv: always pass the GFX9 fence data to si_cs_emit_cache_flush()
      • +
      • radv: fix computing number of user SGPRs for streamout buffers
      • +
      • radv: remove radv_userdata_info::indirect field
      • +
      • radv: simplify allocating user SGPRS for descriptor sets
      • +
      • radv: set noalias/dereferenceable LLVM attributes based on param types
      • +
      • radv: re-enable fast depth clears for 16-bit surfaces on VI
      • +
      • radv/winsys: fix hash when adding internal buffers
      • +
      • radv: fix compiler issues with GCC 9
      • +
      • radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8
      • +
      • radv/winsys: fix BO list creation when RADV_DEBUG=allbos is set
      • +
      • radv: always export gl_SampleMask when the fragment shader uses it
      • +
      • radv: write the alpha channel of MRT0 when alpha coverage is enabled
      • +
      • radv: fix writing the alpha channel of MRT0 when alpha coverage is enabled
      • +
      • radv: fix out-of-bounds access when copying descriptors BO list
      • +
      • radv: don't copy buffer descriptors list for samplers
      • +
      • radv: fix clearing attachments in secondary command buffers
      • +
      • radv: properly align the fence and EOP bug VA on GFX9
      • +
      • radv: fix pointSizeRange limits
      • +
      + +

      Sergii Romantsov (4):

      +
        +
      • autotools: library-dependency when no sse and 32-bit
      • +
      • i965/batch/debug: Allow log be dumped before assert
      • +
      • nir: Length of boolean vtn_value now is 1
      • +
      • dri: meson: do not prefix user provided dri-drivers-path
      • +
      + +

      Sonny Jiang (1):

      +
        +
      • radeonsi: use compute for resource_copy_region when possible
      • +
      + +

      Tapani Pälli (27):

      +
        +
      • anv: allow exporting an imported SYNC_FD semaphore type
      • +
      • anv: add create_flags as part of anv_image
      • +
      • anv: refactor make_surface to use data from anv_image
      • +
      • anv: make anv_get_image_format_features public
      • +
      • anv: add from/to helpers with android and vulkan formats
      • +
      • anv/android: add GetAndroidHardwareBufferPropertiesANDROID
      • +
      • anv: add anv_ahw_usage_from_vk_usage helper function
      • +
      • anv: refactor, remove else block in AllocateMemory
      • +
      • anv/android: support import/export of AHardwareBuffer objects
      • +
      • anv/android: add ahardwarebuffer external memory properties
      • +
      • anv/android: support creating images from external format
      • +
      • anv: support VkExternalFormatANDROID in vkCreateSamplerYcbcrConversion
      • +
      • anv: add VkFormat field as part of anv_format
      • +
      • anv: support VkSamplerYcbcrConversionInfo in vkCreateImageView
      • +
      • anv: ignore VkSamplerYcbcrConversion on non-yuv formats
      • +
      • anv/android: turn on VK_ANDROID_external_memory_android_hardware_buffer
      • +
      • dri3: initialize adaptive_sync as false before configQueryb
      • +
      • intel/isl: move tiled_memcpy static libs from i965 to isl
      • +
      • anv: do not advertise AHW support if extension not enabled
      • +
      • nir: cleanup glsl_get_struct_field_offset, glsl_get_explicit_stride
      • +
      • android: fix build issues with libmesa_anv_gen* libraries
      • +
      • mesa: return NULL if we exceed MaxColorAttachments in get_fb_attachment
      • +
      • nir: initialize value in copy_prop_vars_block
      • +
      • anv: retain the is_array state in create_plane_tex_instr_implicit
      • +
      • anv: release memory allocated by glsl types during spirv_to_nir
      • +
      • anv: revert "anv: release memory allocated by glsl types during spirv_to_nir"
      • +
      • anv: destroy descriptor sets when pool gets destroyed
      • +
      + +

      Thomas Hellstrom (9):

      +
        +
      • st/xa: Render update. Better support for solid pictures
      • +
      • st/xa: Support higher color precision for solid pictures
      • +
      • st/xa: Support a couple of new formats
      • +
      • st/xa: Fix transformations when we have both source and mask samplers
      • +
      • st/xa: Minor renderer cleanups
      • +
      • st/xa: Support Component Alpha with trivial blending
      • +
      • st/xa: Bump minor
      • +
      • st/xa: Fix a memory leak
      • +
      • winsys/svga: Fix a memory leak
      • +
      + +

      Timothy Arceri (56):

      +
        +
      • nir: allow propagation of if evaluation for bcsel
      • +
      • nir: fix condition propagation when src has a swizzle
      • +
      • ac/nir_to_llvm: fix b2f for f64
      • +
      • nir: add new linking opt nir_link_constant_varyings()
      • +
      • st/mesa: make use of nir_link_constant_varyings()
      • +
      • nir: add glsl_type_is_integer() helper
      • +
      • nir: don't pack varyings ints with floats unless flat
      • +
      • anv/i965: make use of nir_link_constant_varyings()
      • +
      • nir: add support for removing redundant stores to copy prop var
      • +
      • radv: make use of nir_move_out_const_to_consumer()
      • +
      • nir: small tidy ups for nir_loop_analyze()
      • +
      • nir: clarify some nit_loop_info member names
      • +
      • nir: add a new nir_cf_list_clone_and_reinsert() helper
      • +
      • nir: make use of new nir_cf_list_clone_and_reinsert() helper
      • +
      • nir: factor out some of the complex loop unroll code to a helper
      • +
      • nir: rework force_unroll_array_access()
      • +
      • nir: in loop analysis track actual control flow type
      • +
      • nir: reword code comment
      • +
      • nir: detect more induction variables
      • +
      • nir: fix opt_if_loop_last_continue()
      • +
      • tgsi/scan: fix loop exit point in tgsi_scan_tess_ctrl()
      • +
      • tgsi/scan: correctly walk instructions in tgsi_scan_tess_ctrl()
      • +
      • radeonsi: remove unrequired param in si_nir_scan_tess_ctrl()
      • +
      • ac/nir_to_llvm: add ac_are_tessfactors_def_in_all_invocs()
      • +
      • radeonsi: make use of ac_are_tessfactors_def_in_all_invocs()
      • +
      • st/glsl_to_nir: call nir_lower_load_const_to_scalar() in the st
      • +
      • nir: rename nir_link_constant_varyings() nir_link_opt_varyings()
      • +
      • nir: add can_replace_varying() helper
      • +
      • nir: rework nir_link_opt_varyings()
      • +
      • nir: link time opt duplicate varyings
      • +
      • nir: make nir_opt_remove_phis_impl() static
      • +
      • nir: make use of does_varying_match() helper
      • +
      • nir: simplify does_varying_match()
      • +
      • nir: add rewrite_phi_predecessor_blocks() helper
      • +
      • nir: merge some basic consecutive ifs
      • +
      • st/glsl: refactor st_link_nir()
      • +
      • nir: avoid uninitialized variable warning
      • +
      • glsl: Copy function out to temp if we don't directly ref a variable
      • +
      • ac/nir_to_llvm: fix type handling in image code
      • +
      • radeonsi/nir: get correct type for images inside structs
      • +
      • ac/nir_to_llvm: fix regression in bindless support
      • +
      • ac/nir_to_llvm: add support for structs to get_sampler_desc()
      • +
      • glsl: don't skip GLSL IR opts on first-time compiles
      • +
      • glsl: be much more aggressive when skipping shader compilation
      • +
      • Revert "glsl: be much more aggressive when skipping shader compilation"
      • +
      • ac/nir_to_llvm: fix interpolateAt* for arrays
      • +
      • glsl: be much more aggressive when skipping shader compilation
      • +
      • radeonsi/nir: add missing piece for bindless image support
      • +
      • ac/nir_to_llvm: add bindless support for uniform handles
      • +
      • ac/nir_to_llvm: fix interpolateAt* for structs
      • +
      • ac/nir_to_llvm: fix clamp shadow reference for more hardware
      • +
      • tgsi: remove culldist semantic from docs
      • +
      • radv/ac: fix some fp16 handling
      • +
      • glsl: use remap location when serialising uniform program resource data
      • +
      • radeonsi: fix query buffer allocation
      • +
      • glsl: fix shader cache for packed param list
      • +
      + +

      Tobias Klausmann (1):

      +
        +
      • amd/vulkan: meson build - use radv_deps for libvulkan_radeon
      • +
      + +

      Tomasz Figa (1):

      +
        +
      • llvmpipe: Always return some fence in flush (v2)
      • +
      + +

      Tomeu Vizoso (1):

      +
        +
      • etnaviv: Consolidate buffer references from framebuffers
      • +
      + +

      Toni Lönnberg (14):

      +
        +
      • intel/decoder: Engine parameter for instructions
      • +
      • intel/decoder: tools: gen_engine to drm_i915_gem_engine_class
      • +
      • intel/decoder: tools: Use engine for decoding batch instructions
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen4)
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen45)
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen5)
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen6)
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen7)
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen75)
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen8)
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen9)
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen10)
      • +
      • intel/genxml: Add engine definition to render engine instructions (gen11)
      • +
      • intel/aubinator_error_decode: Get rid of warning for missing switch case
      • +
      + +

      Topi Pohjolainen (1):

      +
        +
      • i965/icl: Disable prefetching of sampler state entries
      • +
      + +

      Veluri Mithun (5):

      +
        +
      • Add extension doc for MESA_query_driver
      • +
      • Implement EGL API for MESA_query_driver
      • +
      • Implementation of egl dri2 drivers for MESA_query_driver
      • +
      • egl: Implement EGL API for MESA_query_driver
      • +
      • egl: Implementation of egl dri2 drivers for MESA_query_driver
      • +
      + +

      Vinson Lee (7):

      +
        +
      • r600/sb: Fix constant logical operand in assert.
      • +
      • freedreno: Fix autotools build.
      • +
      • st/xvmc: Add X11 include path.
      • +
      • nir/algebraic: Make algebraic_parser_test.sh executable.
      • +
      • meson: Fix typo.
      • +
      • meson: Fix libsensors detection.
      • +
      • meson: Fix typo.
      • +
      + +

      Yevhenii Kolesnikov (1):

      +
        +
      • i965: Fix allow_higher_compat_version workaround limited by OpenGL 3.0
      • +
      + +

      pal1000 (1):

      +
        +
      • scons: Compatibility with Scons development version string
      • +
      + +
    + +
    + + diff -Nru mesa-18.3.3/docs/relnotes/19.0.1.html mesa-19.0.1/docs/relnotes/19.0.1.html --- mesa-18.3.3/docs/relnotes/19.0.1.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/docs/relnotes/19.0.1.html 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 19.0.1 Release Notes / March 27, 2019

    + +

    +Mesa 19.0.1 is a bug fix release which fixes bugs found since the 19.0.0 release. +

    +

    +Mesa 19.0.1 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +  TBD
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + +
      + +
    • Bug 100316 - Linking GLSL 1.30 shaders with invariant and deprecated variables triggers an 'mismatching invariant qualifiers' error
    • + +
    • Bug 107563 - [RADV] Broken rendering in Unity demos
    • + +
    • Bug 109698 - dri.pc contents invalid when built with meson
    • + +
    • Bug 109980 - [i915 CI][HSW] spec@arb_fragment_shader_interlock@arb_fragment_shader_interlock-image-load-store - fail
    • + +
    • Bug 110211 - If DESTDIR is set to an empty string, the dri drivers are not installed
    • + +
    • Bug 110221 - build error with meson
    • + +
    + +

    Changes

    + +

    Andres Gomez (4):

    +
      +
    • glsl: correctly validate component layout qualifier for dvec{3,4}
    • +
    • glsl/linker: don't fail non static used inputs without matching outputs
    • +
    • glsl/linker: simplify xfb_offset vs xfb_stride overflow check
    • +
    • Revert "glsl: relax input->output validation for SSO programs"
    • +
    + +

    Bas Nieuwenhuizen (2):

    +
      +
    • radv: Use correct image view comparison for fast clears.
    • +
    • ac/nir: Return frag_coord as integer.
    • +
    + +

    Danylo Piliaiev (2):

    +
      +
    • anv: Treat zero size XFB buffer as disabled
    • +
    • glsl: Cross validate variable's invariance by explicit invariance only
    • +
    + +

    Dave Airlie (1):

    +
      +
    • softpipe: fix texture view crashes
    • +
    + +

    Dylan Baker (5):

    +
      +
    • docs: Add SHA256 sums for 19.0.0
    • +
    • cherry-ignore: Add commit that doesn't apply
    • +
    • bin/install_megadrivers.py: Correctly handle DESTDIR=''
    • +
    • bin/install_megadrivers.py: Fix regression for set DESTDIR
    • +
    • bump version for 19.0.1
    • +
    + +

    Eric Anholt (1):

    +
      +
    • v3d: Fix leak of the renderonly struct on screen destruction.
    • +
    + +

    Jason Ekstrand (6):

    +
      +
    • glsl/lower_vector_derefs: Don't use a temporary for TCS outputs
    • +
    • glsl/list: Add a list variant of insert_after
    • +
    • anv/pass: Flag the need for a RT flush for resolve attachments
    • +
    • nir/builder: Add a vector extract helper
    • +
    • nir: Add a new pass to lower array dereferences on vectors
    • +
    • intel/nir: Lower array-deref-of-vector UBO and SSBO loads
    • +
    + +

    Józef Kucia (2):

    +
      +
    • radv: Fix driverUUID
    • +
    • mesa: Fix GL_NUM_DEVICE_UUIDS_EXT
    • +
    + +

    Kenneth Graunke (1):

    +
      +
    • intel/fs: Fix opt_peephole_csel to not throw away saturates.
    • +
    + +

    Kevin Strasser (1):

    +
      +
    • egl/dri: Avoid out of bounds array access
    • +
    + +

    Mark Janes (1):

    +
      +
    • mesa: properly report the length of truncated log messages
    • +
    + +

    Plamena Manolova (1):

    +
      +
    • i965: Disable ARB_fragment_shader_interlock for platforms prior to GEN9
    • +
    + +

    Samuel Pitoiset (3):

    +
      +
    • radv: set the maximum number of IBs per submit to 192
    • +
    • radv: always initialize HTILE when the src layout is UNDEFINED
    • +
    • radv: fix binding transform feedback buffers
    • +
    + +

    Sergii Romantsov (1):

    +
      +
    • d3d: meson: do not prefix user provided d3d-drivers-path
    • +
    + +

    Tapani Pälli (2):

    +
      +
    • isl: fix automake build when sse41 is not supported
    • +
    • anv/radv: release memory allocated by glsl types during spirv_to_nir
    • +
    + + +
    + + diff -Nru mesa-18.3.3/docs/relnotes.html mesa-19.0.1/docs/relnotes.html --- mesa-18.3.3/docs/relnotes.html 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/docs/relnotes.html 2019-03-31 23:16:37.000000000 +0000 @@ -21,6 +21,13 @@

      +
    • 18.3.2 release notes +
    • 18.2.8 release notes +
    • 18.2.7 release notes +
    • 18.3.1 release notes +
    • 18.3.0 release notes +
    • 18.2.6 release notes +
    • 18.2.5 release notes
    • 18.2.4 release notes
    • 18.2.3 release notes
    • 18.2.2 release notes diff -Nru mesa-18.3.3/docs/specs/EGL_MESA_query_driver.txt mesa-19.0.1/docs/specs/EGL_MESA_query_driver.txt --- mesa-18.3.3/docs/specs/EGL_MESA_query_driver.txt 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/docs/specs/EGL_MESA_query_driver.txt 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,95 @@ +Name + + MESA_query_driver + +Name Strings + + EGL_MESA_query_driver + +Contact + + Rob Clark + Nicolai Hähnle + +Contibutors + + Veluri Mithun + +Status + + Complete + +Version + + Version 3, 2019-01-24 + +Number + + EGL Extension 131 + +Dependencies + + EGL 1.0 is required. + +Overview + + When an application has to query the name of a driver and for + obtaining driver's option list (UTF-8 encoded XML) of a driver + the below functions are useful. + + XML file formally describes all available options and also + includes verbal descriptions in multiple languages. Its main purpose + is to be automatically processed by configuration GUIs. + The XML shall respect the following DTD: + + + + + + + + + + +New Procedures and Functions + + char* eglGetDisplayDriverConfig(EGLDisplay dpy); + const char* eglGetDisplayDriverName(EGLDisplay dpy); + +Description + + By passing EGLDisplay as parameter to `eglGetDisplayDriverName` one can retrieve + driverName. Similarly passing EGLDisplay to `eglGetDisplayDriverConfig` we can retrieve + driverConfig options of the driver in XML format. + + The string returned by `eglGetDisplayDriverConfig` is heap-allocated and caller + is responsible for freeing it. + + EGL_BAD_DISPLAY is generated if `disp` is not an EGL display connection. + + EGL_NOT_INITIALIZED is generated if `disp` has not been initialized. + + If the implementation does not have enough resources to allocate the XML then an + EGL_BAD_ALLOC error is generated. + +New Tokens + + No new tokens + +Issues + + None + + +Revision History + + Version 1, 2018-11-05 - First draft (Veluri Mithun) + Version 2, 2019-01-23 - Final version (Veluri Mithun) + Version 3, 2019-01-24 - Mark as complete, add Khronos extension + number, fix parameter name in prototypes, + write revision history (Eric Engestrom) diff -Nru mesa-18.3.3/docs/specs/MESA_query_renderer.spec mesa-19.0.1/docs/specs/MESA_query_renderer.spec --- mesa-18.3.3/docs/specs/MESA_query_renderer.spec 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/docs/specs/MESA_query_renderer.spec 2019-03-31 23:16:37.000000000 +0000 @@ -20,11 +20,11 @@ Version - Version 8, 14-February-2014 + Version 9, 09 November 2018 Number - TBD. + OpenGL Extension #446 Dependencies @@ -32,9 +32,6 @@ GLX_ARB_create_context and GLX_ARB_create_context_profile are required. - This extension interacts with GLX_EXT_create_context_es2_profile and - GLX_EXT_create_context_es_profile. - Overview In many situations, applications want to detect characteristics of a @@ -95,18 +92,13 @@ GLX_RENDERER_VENDOR_ID_MESA GLX_RENDERER_DEVICE_ID_MESA - Accepted as an attribute name in <*attrib_list> in - glXCreateContextAttribsARB: - - GLX_RENDERER_ID_MESA 0x818E - Additions to the OpenGL / WGL Specifications None. This specification is written for GLX. Additions to the GLX 1.4 Specification - [Add the following to Section X.Y.Z of the GLX Specification] + [Add to Section 3.3.2 "GLX Versioning" of the GLX Specification] To obtain information about the available renderers for a particular display and screen, @@ -206,29 +198,6 @@ format as the string that would be returned by glGetString of GL_RENDERER. It may, however, have a different value. - - [Add to section section 3.3.7 "Rendering Contexts"] - - The attribute name GLX_RENDERER_ID_MESA specified the index of the render - against which the context should be created. The default value of - GLX_RENDERER_ID_MESA is 0. - - - [Add to list of errors for glXCreateContextAttribsARB in section section - 3.3.7 "Rendering Contexts"] - - * If the value of GLX_RENDERER_ID_MESA specifies a non-existent - renderer, BadMatch is generated. - -Dependencies on GLX_EXT_create_context_es_profile and -GLX_EXT_create_context_es2_profile - - If neither extension is supported, remove all mention of - GLX_RENDERER_OPENGL_ES2_PROFILE_VERSION_MESA from the spec. - - If GLX_EXT_create_context_es_profile is not supported, remove all mention of - GLX_RENDERER_OPENGL_ES_PROFILE_VERSION_MESA from the spec. - Issues 1) How should the difference between on-card and GART memory be exposed? @@ -408,3 +377,9 @@ read GLX_RENDERER_ID_MESA. The VENDOR/DEVICE_ID example given in issue #17 should be 0x5143 and 0xFFFFFFFF respectively. + + Version 9, 2018/11/09 - Remove GLX_RENDERER_ID_MESA, which has never been + implemented. Remove the unnecessary interactions + with the GLX GLES profile extensions. Note the + official GL extension number. Specify the section + of the GLX spec to modify. diff -Nru mesa-18.3.3/docs/submittingpatches.html mesa-19.0.1/docs/submittingpatches.html --- mesa-18.3.3/docs/submittingpatches.html 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/docs/submittingpatches.html 2019-03-31 23:16:37.000000000 +0000 @@ -21,7 +21,7 @@
    • Basic guidelines
    • Patch formatting
    • Testing Patches -
    • Mailing Patches +
    • Submitting Patches
    • Reviewing Patches
    • Nominating a commit for a stable branch
    • Criteria for accepting patches to the stable branch @@ -42,8 +42,10 @@ git bisect.)
    • Patches should be properly formatted.
    • Patches should be sufficiently tested before submitting. -
    • Patches should be submitted to mesa-dev -for review using git send-email. +
    • Patches should be submitted +to mesa-dev or with +a merge request +for review.
    @@ -156,18 +158,29 @@ A good way to test this is to make use of the `git rebase` command, to run your tests on each commit. Assuming your branch is based off origin/master, you can run: +

     $ git rebase --interactive --exec "make check" origin/master
     
    +

    replacing "make check" with whatever other test you want to run.

    -

    Mailing Patches

    +

    Submitting Patches

    -Patches should be sent to the mesa-dev mailing list for review: +Patches may be submitted to the Mesa project by +email or with a +GitLab merge request. To prevent +duplicate code review, only use one method to submit your changes. +

    + +

    Mailing Patches

    + +

    +Patches may be sent to the mesa-dev mailing list for review: mesa-dev@lists.freedesktop.org. When submitting a patch make sure to use @@ -201,9 +214,66 @@ your email administrator for this.)

    +

    GitLab Merge Requests

    + +

    + GitLab Merge + Requests (MR) can also be used to submit patches for Mesa. +

    + +

    + If the MR may have interest for most of the Mesa community, you can + send an email to the mesa-dev email list including a link to the MR. + Don't send the patch to mesa-dev, just the MR link. +

    +

    + Add labels to your MR to help reviewers find it. For example: +

      +
    • Mesa changes affecting all drivers: mesa +
    • Hardware vendor specific code: amd, intel, nvidia, ... +
    • Driver specific code: anvil, freedreno, i965, iris, radeonsi, + radv, vc4, ... +
    • Other tag examples: gallium, util +
    +

    +

    + If you revise your patches based on code review and push an update + to your branch, you should maintain a clean history + in your patches. There should not be "fixup" patches in the history. + The series should be buildable and functional after every commit + whenever you push the branch. +

    +

    + It is your responsibility to keep the MR alive and making progress, + as there are no guarantees that a Mesa dev will independently take + interest in it. +

    +

    + Some other notes: +

      +
    • Make changes and update your branch based on feedback +
    • Old, stale MR may be closed, but you can reopen it if you + still want to pursue the changes +
    • You should periodically check to see if your MR needs to be + rebased +
    • Make sure your MR is closed if your patches get pushed outside + of GitLab +
    • Please send MRs from a personal fork rather than from the main + Mesa repository, as it clutters it unnecessarily. +
    +

    +

    Reviewing Patches

    + To participate in code review, you should monitor the + + mesa-dev email list and the GitLab + Mesa Merge + Requests page. +

    + +

    When you've reviewed a patch on the mailing list, please be unambiguous about your review. That is, state either

    @@ -229,6 +299,29 @@ as the issues are resolved first.

    +

    +These Reviewed-by, Acked-by, and Tested-by tags should also be amended +into commits in a MR before it is merged. +

    + +

    +When providing a Reviewed-by, Acked-by, or Tested-by tag in a gitlab MR, +enclose the tag in backticks: +

    +
    +  `Reviewed-by: Joe Hacker <jhacker@example.com>`
    +

    +This is the markdown format for literal, and will prevent gitlab from hiding +the < and > symbols. +

    + +

    +Review by non-experts is encouraged. Understanding how someone else +goes about solving a problem is a great way to learn your way around +the project. The submitter is expected to evaluate whether they have +an appropriate amount of review feedback from people who also +understand the code before merging their patches. +

    Nominating a commit for a stable branch

    diff -Nru mesa-18.3.3/.editorconfig mesa-19.0.1/.editorconfig --- mesa-18.3.3/.editorconfig 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/.editorconfig 2019-03-31 23:16:37.000000000 +0000 @@ -11,6 +11,7 @@ [*.{c,h,cpp,hpp,cc,hh}] indent_style = space indent_size = 3 +max_line_length = 78 [{Makefile*,*.mk}] indent_style = tab diff -Nru mesa-18.3.3/include/drm-uapi/drm_fourcc.h mesa-19.0.1/include/drm-uapi/drm_fourcc.h --- mesa-18.3.3/include/drm-uapi/drm_fourcc.h 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/include/drm-uapi/drm_fourcc.h 2019-03-31 23:16:37.000000000 +0000 @@ -298,6 +298,19 @@ */ #define DRM_FORMAT_MOD_SAMSUNG_64_32_TILE fourcc_mod_code(SAMSUNG, 1) +/* + * Qualcomm Compressed Format + * + * Refers to a compressed variant of the base format that is compressed. + * Implementation may be platform and base-format specific. + * + * Each macrotile consists of m x n (mostly 4 x 4) tiles. + * Pixel data pitch/stride is aligned with macrotile width. + * Pixel data height is aligned with macrotile height. + * Entire pixel data buffer is aligned with 4k(bytes). + */ +#define DRM_FORMAT_MOD_QCOM_COMPRESSED fourcc_mod_code(QCOM, 1) + /* Vivante framebuffer modifiers */ /* diff -Nru mesa-18.3.3/include/drm-uapi/v3d_drm.h mesa-19.0.1/include/drm-uapi/v3d_drm.h --- mesa-18.3.3/include/drm-uapi/v3d_drm.h 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/include/drm-uapi/v3d_drm.h 2019-03-31 23:16:37.000000000 +0000 @@ -36,6 +36,7 @@ #define DRM_V3D_MMAP_BO 0x03 #define DRM_V3D_GET_PARAM 0x04 #define DRM_V3D_GET_BO_OFFSET 0x05 +#define DRM_V3D_SUBMIT_TFU 0x06 #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -43,6 +44,7 @@ #define DRM_IOCTL_V3D_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo) #define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param) #define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) +#define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) /** * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D @@ -58,10 +60,15 @@ * coordinate shader to determine where primitives land on the screen, * then writes out the state updates and draw calls necessary per tile * to the tile allocation BO. + * + * This BCL will block on any previous BCL submitted on the + * same FD, but not on any RCL or BCLs submitted by other + * clients -- that is left up to the submitter to control + * using in_sync_bcl if necessary. */ __u32 bcl_start; - /** End address of the BCL (first byte after the BCL) */ + /** End address of the BCL (first byte after the BCL) */ __u32 bcl_end; /* Offset of the render command list. @@ -69,10 +76,15 @@ * This is the second set of commands executed, which will either * execute the tiles that have been set up by the BCL, or a fixed set * of tiles (in the case of RCL-only blits). + * + * This RCL will block on this submit's BCL, and any previous + * RCL submitted on the same FD, but not on any RCL or BCLs + * submitted by other clients -- that is left up to the + * submitter to control using in_sync_rcl if necessary. */ __u32 rcl_start; - /** End address of the RCL (first byte after the RCL) */ + /** End address of the RCL (first byte after the RCL) */ __u32 rcl_end; /** An optional sync object to wait on before starting the BCL. */ @@ -169,6 +181,7 @@ DRM_V3D_PARAM_V3D_CORE0_IDENT0, DRM_V3D_PARAM_V3D_CORE0_IDENT1, DRM_V3D_PARAM_V3D_CORE0_IDENT2, + DRM_V3D_PARAM_SUPPORTS_TFU, }; struct drm_v3d_get_param { @@ -187,6 +200,28 @@ __u32 offset; }; +struct drm_v3d_submit_tfu { + __u32 icfg; + __u32 iia; + __u32 iis; + __u32 ica; + __u32 iua; + __u32 ioa; + __u32 ios; + __u32 coef[4]; + /* First handle is the output BO, following are other inputs. + * 0 for unused. + */ + __u32 bo_handles[4]; + /* sync object to block on before running the TFU job. Each TFU + * job will execute in the order submitted to its FD. Synchronization + * against rendering jobs requires using sync objects. + */ + __u32 in_sync; + /* Sync object to signal when the TFU job is done. */ + __u32 out_sync; +}; + #if defined(__cplusplus) } #endif diff -Nru mesa-18.3.3/include/EGL/eglext.h mesa-19.0.1/include/EGL/eglext.h --- mesa-18.3.3/include/EGL/eglext.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/include/EGL/eglext.h 2019-03-31 23:16:37.000000000 +0000 @@ -28,17 +28,17 @@ ** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. */ /* -** This header is generated from the Khronos OpenGL / OpenGL ES XML -** API Registry. The current version of the Registry, generator scripts +** This header is generated from the Khronos EGL XML API Registry. +** The current version of the Registry, generator scripts ** used to make the header, and the header can be found at ** http://www.khronos.org/registry/egl ** -** Khronos $Git commit SHA1: bae3518c48 $ on $Git commit date: 2018-05-17 10:56:57 -0700 $ +** Khronos $Git commit SHA1: 9ed2ec4c67 $ on $Git commit date: 2019-01-09 17:54:35 -0800 $ */ #include -#define EGL_EGLEXT_VERSION 20180517 +#define EGL_EGLEXT_VERSION 20190124 /* Generated C header for: * API: egl @@ -681,6 +681,7 @@ #ifndef EGL_EXT_device_drm #define EGL_EXT_device_drm 1 #define EGL_DRM_DEVICE_FILE_EXT 0x3233 +#define EGL_DRM_MASTER_FD_EXT 0x333C #endif /* EGL_EXT_device_drm */ #ifndef EGL_EXT_device_enumeration @@ -716,6 +717,11 @@ #define EGL_GL_COLORSPACE_DISPLAY_P3_LINEAR_EXT 0x3362 #endif /* EGL_EXT_gl_colorspace_display_p3_linear */ +#ifndef EGL_EXT_gl_colorspace_display_p3_passthrough +#define EGL_EXT_gl_colorspace_display_p3_passthrough 1 +#define EGL_GL_COLORSPACE_DISPLAY_P3_PASSTHROUGH_EXT 0x3490 +#endif /* EGL_EXT_gl_colorspace_display_p3_passthrough */ + #ifndef EGL_EXT_gl_colorspace_scrgb #define EGL_EXT_gl_colorspace_scrgb 1 #define EGL_GL_COLORSPACE_SCRGB_EXT 0x3351 @@ -1025,6 +1031,16 @@ #define EGL_PLATFORM_SURFACELESS_MESA 0x31DD #endif /* EGL_MESA_platform_surfaceless */ +#ifndef EGL_MESA_query_driver +#define EGL_MESA_query_driver 1 +typedef char *(EGLAPIENTRYP PFNEGLGETDISPLAYDRIVERCONFIGPROC) (EGLDisplay dpy); +typedef const char *(EGLAPIENTRYP PFNEGLGETDISPLAYDRIVERNAMEPROC) (EGLDisplay dpy); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI char *EGLAPIENTRY eglGetDisplayDriverConfig (EGLDisplay dpy); +EGLAPI const char *EGLAPIENTRY eglGetDisplayDriverName (EGLDisplay dpy); +#endif +#endif /* EGL_MESA_query_driver */ + #ifndef EGL_NOK_swap_region #define EGL_NOK_swap_region 1 typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects); diff -Nru mesa-18.3.3/include/EGL/egl.h mesa-19.0.1/include/EGL/egl.h --- mesa-18.3.3/include/EGL/egl.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/include/EGL/egl.h 2019-03-31 23:16:37.000000000 +0000 @@ -28,17 +28,17 @@ ** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. */ /* -** This header is generated from the Khronos OpenGL / OpenGL ES XML -** API Registry. The current version of the Registry, generator scripts +** This header is generated from the Khronos EGL XML API Registry. +** The current version of the Registry, generator scripts ** used to make the header, and the header can be found at ** http://www.khronos.org/registry/egl ** -** Khronos $Git commit SHA1: a732b061e7 $ on $Git commit date: 2017-06-17 23:27:53 +0100 $ +** Khronos $Git commit SHA1: 9ed2ec4c67 $ on $Git commit date: 2019-01-09 17:54:35 -0800 $ */ #include -/* Generated on date 20170627 */ +/* Generated on date 20190124 */ /* Generated C header for: * API: egl diff -Nru mesa-18.3.3/include/GL/internal/dri_interface.h mesa-19.0.1/include/GL/internal/dri_interface.h --- mesa-18.3.3/include/GL/internal/dri_interface.h 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/include/GL/internal/dri_interface.h 2019-03-31 23:16:37.000000000 +0000 @@ -1344,6 +1344,7 @@ #define __DRI_IMAGE_FOURCC_NV16 0x3631564e #define __DRI_IMAGE_FOURCC_YUYV 0x56595559 #define __DRI_IMAGE_FOURCC_UYVY 0x59565955 +#define __DRI_IMAGE_FOURCC_AYUV 0x56555941 #define __DRI_IMAGE_FOURCC_YVU410 0x39555659 #define __DRI_IMAGE_FOURCC_YVU411 0x31315659 @@ -1370,6 +1371,7 @@ #define __DRI_IMAGE_COMPONENTS_Y_UV 0x3004 #define __DRI_IMAGE_COMPONENTS_Y_XUXV 0x3005 #define __DRI_IMAGE_COMPONENTS_Y_UXVX 0x3008 +#define __DRI_IMAGE_COMPONENTS_AYUV 0x3009 #define __DRI_IMAGE_COMPONENTS_R 0x3006 #define __DRI_IMAGE_COMPONENTS_RG 0x3007 diff -Nru mesa-18.3.3/include/pci_ids/i965_pci_ids.h mesa-19.0.1/include/pci_ids/i965_pci_ids.h --- mesa-18.3.3/include/pci_ids/i965_pci_ids.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/include/pci_ids/i965_pci_ids.h 2019-03-31 23:16:37.000000000 +0000 @@ -171,6 +171,7 @@ CHIPSET(0x3E90, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)") CHIPSET(0x3E93, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)") CHIPSET(0x3E99, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)") +CHIPSET(0x3E9C, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)") CHIPSET(0x3E91, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)") CHIPSET(0x3E92, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)") CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") @@ -203,6 +204,10 @@ CHIPSET(0x8A50, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)") CHIPSET(0x8A51, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)") CHIPSET(0x8A52, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)") +CHIPSET(0x8A56, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)") +CHIPSET(0x8A57, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)") +CHIPSET(0x8A58, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)") +CHIPSET(0x8A59, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)") CHIPSET(0x8A5A, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)") CHIPSET(0x8A5B, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)") CHIPSET(0x8A5C, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)") diff -Nru mesa-18.3.3/include/vulkan/vulkan_android.h mesa-19.0.1/include/vulkan/vulkan_android.h --- mesa-18.3.3/include/vulkan/vulkan_android.h 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_android.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/include/vulkan/vulkan_core.h mesa-19.0.1/include/vulkan/vulkan_core.h --- mesa-18.3.3/include/vulkan/vulkan_core.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_core.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. @@ -43,13 +43,12 @@ #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff) #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff) // Version of this file -#define VK_HEADER_VERSION 90 +#define VK_HEADER_VERSION 97 #define VK_NULL_HANDLE 0 - #define VK_DEFINE_HANDLE(object) typedef struct object##_T* object; @@ -62,7 +61,6 @@ #endif - typedef uint32_t VkFlags; typedef uint32_t VkBool32; typedef uint64_t VkDeviceSize; @@ -150,6 +148,7 @@ VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT = -1000158000, VK_ERROR_FRAGMENTATION_EXT = -1000161000, VK_ERROR_NOT_PERMITTED_EXT = -1000174001, + VK_ERROR_INVALID_DEVICE_ADDRESS_EXT = -1000244000, VK_ERROR_OUT_OF_POOL_MEMORY_KHR = VK_ERROR_OUT_OF_POOL_MEMORY, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR = VK_ERROR_INVALID_EXTERNAL_HANDLE, VK_RESULT_BEGIN_RANGE = VK_ERROR_FRAGMENTED_POOL, @@ -287,7 +286,6 @@ VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR = 1000004000, VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR = 1000005000, VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR = 1000006000, - VK_STRUCTURE_TYPE_MIR_SURFACE_CREATE_INFO_KHR = 1000007000, VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR = 1000008000, VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR = 1000009000, VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT = 1000011000, @@ -330,6 +328,7 @@ VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT = 1000081000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT = 1000081001, VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT = 1000081002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR = 1000082000, VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR = 1000084000, VK_STRUCTURE_TYPE_OBJECT_TABLE_CREATE_INFO_NVX = 1000086000, VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NVX = 1000086001, @@ -419,17 +418,17 @@ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADING_RATE_IMAGE_FEATURES_NV = 1000164001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADING_RATE_IMAGE_PROPERTIES_NV = 1000164002, VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_COARSE_SAMPLE_ORDER_STATE_CREATE_INFO_NV = 1000164005, - VK_STRUCTURE_TYPE_RAYTRACING_PIPELINE_CREATE_INFO_NVX = 1000165000, - VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NVX = 1000165001, - VK_STRUCTURE_TYPE_GEOMETRY_INSTANCE_NVX = 1000165002, - VK_STRUCTURE_TYPE_GEOMETRY_NVX = 1000165003, - VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NVX = 1000165004, - VK_STRUCTURE_TYPE_GEOMETRY_AABB_NVX = 1000165005, - VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NVX = 1000165006, - VK_STRUCTURE_TYPE_DESCRIPTOR_ACCELERATION_STRUCTURE_INFO_NVX = 1000165007, - VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NVX = 1000165008, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAYTRACING_PROPERTIES_NVX = 1000165009, - VK_STRUCTURE_TYPE_HIT_SHADER_MODULE_CREATE_INFO_NVX = 1000165010, + VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV = 1000165000, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV = 1000165001, + VK_STRUCTURE_TYPE_GEOMETRY_NV = 1000165003, + VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV = 1000165004, + VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV = 1000165005, + VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV = 1000165006, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_NV = 1000165007, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV = 1000165008, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV = 1000165009, + VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV = 1000165011, + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV = 1000165012, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_REPRESENTATIVE_FRAGMENT_TEST_FEATURES_NV = 1000166000, VK_STRUCTURE_TYPE_PIPELINE_REPRESENTATIVE_FRAGMENT_TEST_STATE_CREATE_INFO_NV = 1000166001, VK_STRUCTURE_TYPE_DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT = 1000174000, @@ -440,10 +439,14 @@ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR = 1000180000, VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT = 1000184000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD = 1000185000, + VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD = 1000189000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT = 1000190000, VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT = 1000190001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT = 1000190002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR = 1000196000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR = 1000197000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR = 1000199000, + VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR = 1000199001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV = 1000201000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV = 1000202000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV = 1000202001, @@ -456,6 +459,18 @@ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR = 1000211000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT = 1000212000, VK_STRUCTURE_TYPE_IMAGEPIPE_SURFACE_CREATE_INFO_FUCHSIA = 1000214000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT = 1000218000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT = 1000218001, + VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT = 1000218002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT = 1000221000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 1000237000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT = 1000238000, + VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT = 1000238001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT = 1000244000, + VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT = 1000244001, + VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT = 1000244002, + VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT = 1000246000, + VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT = 1000247000, VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, @@ -848,7 +863,7 @@ VK_QUERY_TYPE_PIPELINE_STATISTICS = 1, VK_QUERY_TYPE_TIMESTAMP = 2, VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT = 1000028004, - VK_QUERY_TYPE_COMPACTED_SIZE_NVX = 1000165000, + VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_NV = 1000165000, VK_QUERY_TYPE_BEGIN_RANGE = VK_QUERY_TYPE_OCCLUSION, VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_TIMESTAMP, VK_QUERY_TYPE_RANGE_SIZE = (VK_QUERY_TYPE_TIMESTAMP - VK_QUERY_TYPE_OCCLUSION + 1), @@ -879,6 +894,7 @@ VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002, VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR = 1000111000, VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV = 1000164003, + VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT = 1000218000, VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL_KHR = VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED, @@ -1178,7 +1194,7 @@ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10, VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT = 1000138000, - VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NVX = 1000165000, + VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV = 1000165000, VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER, VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, VK_DESCRIPTOR_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), @@ -1207,7 +1223,7 @@ typedef enum VkPipelineBindPoint { VK_PIPELINE_BIND_POINT_GRAPHICS = 0, VK_PIPELINE_BIND_POINT_COMPUTE = 1, - VK_PIPELINE_BIND_POINT_RAYTRACING_NVX = 1000165000, + VK_PIPELINE_BIND_POINT_RAY_TRACING_NV = 1000165000, VK_PIPELINE_BIND_POINT_BEGIN_RANGE = VK_PIPELINE_BIND_POINT_GRAPHICS, VK_PIPELINE_BIND_POINT_END_RANGE = VK_PIPELINE_BIND_POINT_COMPUTE, VK_PIPELINE_BIND_POINT_RANGE_SIZE = (VK_PIPELINE_BIND_POINT_COMPUTE - VK_PIPELINE_BIND_POINT_GRAPHICS + 1), @@ -1226,6 +1242,7 @@ typedef enum VkIndexType { VK_INDEX_TYPE_UINT16 = 0, VK_INDEX_TYPE_UINT32 = 1, + VK_INDEX_TYPE_NONE_NV = 1000165000, VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT16, VK_INDEX_TYPE_END_RANGE = VK_INDEX_TYPE_UINT32, VK_INDEX_TYPE_RANGE_SIZE = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1), @@ -1279,7 +1296,7 @@ VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX = 1000086001, VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT = 1000128000, VK_OBJECT_TYPE_VALIDATION_CACHE_EXT = 1000160000, - VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NVX = 1000165000, + VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV = 1000165000, VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR = VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE, VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR = VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION, VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_UNKNOWN, @@ -1325,6 +1342,7 @@ VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT = 0x00800000, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_IMG = 0x00002000, VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT = 0x00010000, + VK_FORMAT_FEATURE_FRAGMENT_DENSITY_MAP_BIT_EXT = 0x01000000, VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT, VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR = VK_FORMAT_FEATURE_TRANSFER_DST_BIT, VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT_KHR = VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT, @@ -1348,6 +1366,7 @@ VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040, VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080, VK_IMAGE_USAGE_SHADING_RATE_IMAGE_BIT_NV = 0x00000100, + VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT = 0x00000200, VK_IMAGE_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkImageUsageFlagBits; typedef VkFlags VkImageUsageFlags; @@ -1367,6 +1386,7 @@ VK_IMAGE_CREATE_DISJOINT_BIT = 0x00000200, VK_IMAGE_CREATE_CORNER_SAMPLED_BIT_NV = 0x00002000, VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT = 0x00001000, + VK_IMAGE_CREATE_SUBSAMPLED_BIT_EXT = 0x00004000, VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT_KHR = VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT, VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR = VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT_KHR = VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT, @@ -1447,9 +1467,11 @@ VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT = 0x00040000, VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX = 0x00020000, VK_PIPELINE_STAGE_SHADING_RATE_IMAGE_BIT_NV = 0x00400000, - VK_PIPELINE_STAGE_RAYTRACING_BIT_NVX = 0x00200000, + VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV = 0x00200000, + VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV = 0x02000000, VK_PIPELINE_STAGE_TASK_SHADER_BIT_NV = 0x00080000, VK_PIPELINE_STAGE_MESH_SHADER_BIT_NV = 0x00100000, + VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT = 0x00800000, VK_PIPELINE_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkPipelineStageFlagBits; typedef VkFlags VkPipelineStageFlags; @@ -1527,6 +1549,7 @@ VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, VK_BUFFER_CREATE_PROTECTED_BIT = 0x00000008, + VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT = 0x00000010, VK_BUFFER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkBufferCreateFlagBits; typedef VkFlags VkBufferCreateFlags; @@ -1544,11 +1567,17 @@ VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT = 0x00000800, VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT = 0x00001000, VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT = 0x00000200, - VK_BUFFER_USAGE_RAYTRACING_BIT_NVX = 0x00000400, + VK_BUFFER_USAGE_RAY_TRACING_BIT_NV = 0x00000400, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT = 0x00020000, VK_BUFFER_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkBufferUsageFlagBits; typedef VkFlags VkBufferUsageFlags; typedef VkFlags VkBufferViewCreateFlags; + +typedef enum VkImageViewCreateFlagBits { + VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DYNAMIC_BIT_EXT = 0x00000001, + VK_IMAGE_VIEW_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkImageViewCreateFlagBits; typedef VkFlags VkImageViewCreateFlags; typedef VkFlags VkShaderModuleCreateFlags; typedef VkFlags VkPipelineCacheCreateFlags; @@ -1559,7 +1588,7 @@ VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT = 0x00000008, VK_PIPELINE_CREATE_DISPATCH_BASE = 0x00000010, - VK_PIPELINE_CREATE_DEFER_COMPILE_BIT_NVX = 0x00000020, + VK_PIPELINE_CREATE_DEFER_COMPILE_BIT_NV = 0x00000020, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR = VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT, VK_PIPELINE_CREATE_DISPATCH_BASE_KHR = VK_PIPELINE_CREATE_DISPATCH_BASE, VK_PIPELINE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF @@ -1576,12 +1605,12 @@ VK_SHADER_STAGE_COMPUTE_BIT = 0x00000020, VK_SHADER_STAGE_ALL_GRAPHICS = 0x0000001F, VK_SHADER_STAGE_ALL = 0x7FFFFFFF, - VK_SHADER_STAGE_RAYGEN_BIT_NVX = 0x00000100, - VK_SHADER_STAGE_ANY_HIT_BIT_NVX = 0x00000200, - VK_SHADER_STAGE_CLOSEST_HIT_BIT_NVX = 0x00000400, - VK_SHADER_STAGE_MISS_BIT_NVX = 0x00000800, - VK_SHADER_STAGE_INTERSECTION_BIT_NVX = 0x00001000, - VK_SHADER_STAGE_CALLABLE_BIT_NVX = 0x00002000, + VK_SHADER_STAGE_RAYGEN_BIT_NV = 0x00000100, + VK_SHADER_STAGE_ANY_HIT_BIT_NV = 0x00000200, + VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV = 0x00000400, + VK_SHADER_STAGE_MISS_BIT_NV = 0x00000800, + VK_SHADER_STAGE_INTERSECTION_BIT_NV = 0x00001000, + VK_SHADER_STAGE_CALLABLE_BIT_NV = 0x00002000, VK_SHADER_STAGE_TASK_BIT_NV = 0x00000040, VK_SHADER_STAGE_MESH_BIT_NV = 0x00000080, VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF @@ -1615,6 +1644,12 @@ typedef VkFlags VkPipelineDynamicStateCreateFlags; typedef VkFlags VkPipelineLayoutCreateFlags; typedef VkFlags VkShaderStageFlags; + +typedef enum VkSamplerCreateFlagBits { + VK_SAMPLER_CREATE_SUBSAMPLED_BIT_EXT = 0x00000001, + VK_SAMPLER_CREATE_SUBSAMPLED_COARSE_RECONSTRUCTION_BIT_EXT = 0x00000002, + VK_SAMPLER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSamplerCreateFlagBits; typedef VkFlags VkSamplerCreateFlags; typedef enum VkDescriptorSetLayoutCreateFlagBits { @@ -1673,8 +1708,9 @@ VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX = 0x00040000, VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT = 0x00080000, VK_ACCESS_SHADING_RATE_IMAGE_READ_BIT_NV = 0x00800000, - VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NVX = 0x00200000, - VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NVX = 0x00400000, + VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV = 0x00200000, + VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV = 0x00400000, + VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT = 0x01000000, VK_ACCESS_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkAccessFlagBits; typedef VkFlags VkAccessFlags; @@ -4355,7 +4391,7 @@ typedef struct VkDescriptorUpdateTemplateCreateInfo { VkStructureType sType; - void* pNext; + const void* pNext; VkDescriptorUpdateTemplateCreateFlags flags; uint32_t descriptorUpdateEntryCount; const VkDescriptorUpdateTemplateEntry* pDescriptorUpdateEntries; @@ -4794,6 +4830,7 @@ typedef enum VkSwapchainCreateFlagBitsKHR { VK_SWAPCHAIN_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT_KHR = 0x00000001, VK_SWAPCHAIN_CREATE_PROTECTED_BIT_KHR = 0x00000002, + VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR = 0x00000004, VK_SWAPCHAIN_CREATE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF } VkSwapchainCreateFlagBitsKHR; typedef VkFlags VkSwapchainCreateFlagsKHR; @@ -5470,6 +5507,19 @@ const void* pData); #endif +#define VK_KHR_shader_float16_int8 1 +#define VK_KHR_SHADER_FLOAT16_INT8_SPEC_VERSION 1 +#define VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME "VK_KHR_shader_float16_int8" + +typedef struct VkPhysicalDeviceFloat16Int8FeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; +} VkPhysicalDeviceFloat16Int8FeaturesKHR; + + + #define VK_KHR_16bit_storage 1 #define VK_KHR_16BIT_STORAGE_SPEC_VERSION 1 #define VK_KHR_16BIT_STORAGE_EXTENSION_NAME "VK_KHR_16bit_storage" @@ -6101,9 +6151,10 @@ VK_DRIVER_ID_IMAGINATION_PROPRIETARY_KHR = 7, VK_DRIVER_ID_QUALCOMM_PROPRIETARY_KHR = 8, VK_DRIVER_ID_ARM_PROPRIETARY_KHR = 9, + VK_DRIVER_ID_GOOGLE_PASTEL_KHR = 10, VK_DRIVER_ID_BEGIN_RANGE_KHR = VK_DRIVER_ID_AMD_PROPRIETARY_KHR, - VK_DRIVER_ID_END_RANGE_KHR = VK_DRIVER_ID_ARM_PROPRIETARY_KHR, - VK_DRIVER_ID_RANGE_SIZE_KHR = (VK_DRIVER_ID_ARM_PROPRIETARY_KHR - VK_DRIVER_ID_AMD_PROPRIETARY_KHR + 1), + VK_DRIVER_ID_END_RANGE_KHR = VK_DRIVER_ID_GOOGLE_PASTEL_KHR, + VK_DRIVER_ID_RANGE_SIZE_KHR = (VK_DRIVER_ID_GOOGLE_PASTEL_KHR - VK_DRIVER_ID_AMD_PROPRIETARY_KHR + 1), VK_DRIVER_ID_MAX_ENUM_KHR = 0x7FFFFFFF } VkDriverIdKHR; @@ -6125,6 +6176,73 @@ +#define VK_KHR_shader_float_controls 1 +#define VK_KHR_SHADER_FLOAT_CONTROLS_SPEC_VERSION 1 +#define VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME "VK_KHR_shader_float_controls" + +typedef struct VkPhysicalDeviceFloatControlsPropertiesKHR { + VkStructureType sType; + void* pNext; + VkBool32 separateDenormSettings; + VkBool32 separateRoundingModeSettings; + VkBool32 shaderSignedZeroInfNanPreserveFloat16; + VkBool32 shaderSignedZeroInfNanPreserveFloat32; + VkBool32 shaderSignedZeroInfNanPreserveFloat64; + VkBool32 shaderDenormPreserveFloat16; + VkBool32 shaderDenormPreserveFloat32; + VkBool32 shaderDenormPreserveFloat64; + VkBool32 shaderDenormFlushToZeroFloat16; + VkBool32 shaderDenormFlushToZeroFloat32; + VkBool32 shaderDenormFlushToZeroFloat64; + VkBool32 shaderRoundingModeRTEFloat16; + VkBool32 shaderRoundingModeRTEFloat32; + VkBool32 shaderRoundingModeRTEFloat64; + VkBool32 shaderRoundingModeRTZFloat16; + VkBool32 shaderRoundingModeRTZFloat32; + VkBool32 shaderRoundingModeRTZFloat64; +} VkPhysicalDeviceFloatControlsPropertiesKHR; + + + +#define VK_KHR_depth_stencil_resolve 1 +#define VK_KHR_DEPTH_STENCIL_RESOLVE_SPEC_VERSION 1 +#define VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME "VK_KHR_depth_stencil_resolve" + + +typedef enum VkResolveModeFlagBitsKHR { + VK_RESOLVE_MODE_NONE_KHR = 0, + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR = 0x00000001, + VK_RESOLVE_MODE_AVERAGE_BIT_KHR = 0x00000002, + VK_RESOLVE_MODE_MIN_BIT_KHR = 0x00000004, + VK_RESOLVE_MODE_MAX_BIT_KHR = 0x00000008, + VK_RESOLVE_MODE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkResolveModeFlagBitsKHR; +typedef VkFlags VkResolveModeFlagsKHR; + +typedef struct VkSubpassDescriptionDepthStencilResolveKHR { + VkStructureType sType; + const void* pNext; + VkResolveModeFlagBitsKHR depthResolveMode; + VkResolveModeFlagBitsKHR stencilResolveMode; + const VkAttachmentReference2KHR* pDepthStencilResolveAttachment; +} VkSubpassDescriptionDepthStencilResolveKHR; + +typedef struct VkPhysicalDeviceDepthStencilResolvePropertiesKHR { + VkStructureType sType; + void* pNext; + VkResolveModeFlagsKHR supportedDepthResolveModes; + VkResolveModeFlagsKHR supportedStencilResolveModes; + VkBool32 independentResolveNone; + VkBool32 independentResolve; +} VkPhysicalDeviceDepthStencilResolvePropertiesKHR; + + + +#define VK_KHR_swapchain_mutable_format 1 +#define VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_SPEC_VERSION 1 +#define VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME "VK_KHR_swapchain_mutable_format" + + #define VK_KHR_vulkan_memory_model 1 #define VK_KHR_VULKAN_MEMORY_MODEL_SPEC_VERSION 2 #define VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME "VK_KHR_vulkan_memory_model" @@ -6182,7 +6300,7 @@ VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT = 33, VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT = 1000156000, VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT = 1000085000, - VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NVX_EXT = 1000165000, + VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV_EXT = 1000165000, VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT, @@ -7446,11 +7564,11 @@ int32_t messageIdNumber; const char* pMessage; uint32_t queueLabelCount; - VkDebugUtilsLabelEXT* pQueueLabels; + const VkDebugUtilsLabelEXT* pQueueLabels; uint32_t cmdBufLabelCount; - VkDebugUtilsLabelEXT* pCmdBufLabels; + const VkDebugUtilsLabelEXT* pCmdBufLabels; uint32_t objectCount; - VkDebugUtilsObjectNameInfoEXT* pObjects; + const VkDebugUtilsObjectNameInfoEXT* pObjects; } VkDebugUtilsMessengerCallbackDataEXT; typedef VkBool32 (VKAPI_PTR *PFN_vkDebugUtilsMessengerCallbackEXT)( @@ -7791,8 +7909,6 @@ #define VK_EXT_image_drm_format_modifier 1 -#define VK_EXT_EXTENSION_159_SPEC_VERSION 0 -#define VK_EXT_EXTENSION_159_EXTENSION_NAME "VK_EXT_extension_159" #define VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_SPEC_VERSION 1 #define VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME "VK_EXT_image_drm_format_modifier" @@ -8113,81 +8229,113 @@ const VkCoarseSampleOrderCustomNV* pCustomSampleOrders); #endif -#define VK_NVX_raytracing 1 -VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkAccelerationStructureNVX) +#define VK_NV_ray_tracing 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkAccelerationStructureNV) -#define VK_NVX_RAYTRACING_SPEC_VERSION 1 -#define VK_NVX_RAYTRACING_EXTENSION_NAME "VK_NVX_raytracing" +#define VK_NV_RAY_TRACING_SPEC_VERSION 3 +#define VK_NV_RAY_TRACING_EXTENSION_NAME "VK_NV_ray_tracing" +#define VK_SHADER_UNUSED_NV (~0U) + + +typedef enum VkRayTracingShaderGroupTypeNV { + VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV = 0, + VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV = 1, + VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_NV = 2, + VK_RAY_TRACING_SHADER_GROUP_TYPE_BEGIN_RANGE_NV = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + VK_RAY_TRACING_SHADER_GROUP_TYPE_END_RANGE_NV = VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_NV, + VK_RAY_TRACING_SHADER_GROUP_TYPE_RANGE_SIZE_NV = (VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_NV - VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV + 1), + VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkRayTracingShaderGroupTypeNV; + +typedef enum VkGeometryTypeNV { + VK_GEOMETRY_TYPE_TRIANGLES_NV = 0, + VK_GEOMETRY_TYPE_AABBS_NV = 1, + VK_GEOMETRY_TYPE_BEGIN_RANGE_NV = VK_GEOMETRY_TYPE_TRIANGLES_NV, + VK_GEOMETRY_TYPE_END_RANGE_NV = VK_GEOMETRY_TYPE_AABBS_NV, + VK_GEOMETRY_TYPE_RANGE_SIZE_NV = (VK_GEOMETRY_TYPE_AABBS_NV - VK_GEOMETRY_TYPE_TRIANGLES_NV + 1), + VK_GEOMETRY_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkGeometryTypeNV; + +typedef enum VkAccelerationStructureTypeNV { + VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV = 0, + VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV = 1, + VK_ACCELERATION_STRUCTURE_TYPE_BEGIN_RANGE_NV = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV, + VK_ACCELERATION_STRUCTURE_TYPE_END_RANGE_NV = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, + VK_ACCELERATION_STRUCTURE_TYPE_RANGE_SIZE_NV = (VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV - VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV + 1), + VK_ACCELERATION_STRUCTURE_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkAccelerationStructureTypeNV; + +typedef enum VkCopyAccelerationStructureModeNV { + VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NV = 0, + VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV = 1, + VK_COPY_ACCELERATION_STRUCTURE_MODE_BEGIN_RANGE_NV = VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NV, + VK_COPY_ACCELERATION_STRUCTURE_MODE_END_RANGE_NV = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV, + VK_COPY_ACCELERATION_STRUCTURE_MODE_RANGE_SIZE_NV = (VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV - VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NV + 1), + VK_COPY_ACCELERATION_STRUCTURE_MODE_MAX_ENUM_NV = 0x7FFFFFFF +} VkCopyAccelerationStructureModeNV; + +typedef enum VkAccelerationStructureMemoryRequirementsTypeNV { + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV = 0, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV = 1, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV = 2, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BEGIN_RANGE_NV = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_END_RANGE_NV = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV, + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_RANGE_SIZE_NV = (VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV - VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV + 1), + VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkAccelerationStructureMemoryRequirementsTypeNV; + + +typedef enum VkGeometryFlagBitsNV { + VK_GEOMETRY_OPAQUE_BIT_NV = 0x00000001, + VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_NV = 0x00000002, + VK_GEOMETRY_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF +} VkGeometryFlagBitsNV; +typedef VkFlags VkGeometryFlagsNV; + +typedef enum VkGeometryInstanceFlagBitsNV { + VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV = 0x00000001, + VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_NV = 0x00000002, + VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_NV = 0x00000004, + VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_NV = 0x00000008, + VK_GEOMETRY_INSTANCE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF +} VkGeometryInstanceFlagBitsNV; +typedef VkFlags VkGeometryInstanceFlagsNV; + +typedef enum VkBuildAccelerationStructureFlagBitsNV { + VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NV = 0x00000001, + VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NV = 0x00000002, + VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV = 0x00000004, + VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV = 0x00000008, + VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_NV = 0x00000010, + VK_BUILD_ACCELERATION_STRUCTURE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF +} VkBuildAccelerationStructureFlagBitsNV; +typedef VkFlags VkBuildAccelerationStructureFlagsNV; +typedef struct VkRayTracingShaderGroupCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkRayTracingShaderGroupTypeNV type; + uint32_t generalShader; + uint32_t closestHitShader; + uint32_t anyHitShader; + uint32_t intersectionShader; +} VkRayTracingShaderGroupCreateInfoNV; -typedef enum VkGeometryTypeNVX { - VK_GEOMETRY_TYPE_TRIANGLES_NVX = 0, - VK_GEOMETRY_TYPE_AABBS_NVX = 1, - VK_GEOMETRY_TYPE_BEGIN_RANGE_NVX = VK_GEOMETRY_TYPE_TRIANGLES_NVX, - VK_GEOMETRY_TYPE_END_RANGE_NVX = VK_GEOMETRY_TYPE_AABBS_NVX, - VK_GEOMETRY_TYPE_RANGE_SIZE_NVX = (VK_GEOMETRY_TYPE_AABBS_NVX - VK_GEOMETRY_TYPE_TRIANGLES_NVX + 1), - VK_GEOMETRY_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF -} VkGeometryTypeNVX; - -typedef enum VkAccelerationStructureTypeNVX { - VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NVX = 0, - VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NVX = 1, - VK_ACCELERATION_STRUCTURE_TYPE_BEGIN_RANGE_NVX = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NVX, - VK_ACCELERATION_STRUCTURE_TYPE_END_RANGE_NVX = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NVX, - VK_ACCELERATION_STRUCTURE_TYPE_RANGE_SIZE_NVX = (VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NVX - VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NVX + 1), - VK_ACCELERATION_STRUCTURE_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF -} VkAccelerationStructureTypeNVX; - -typedef enum VkCopyAccelerationStructureModeNVX { - VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NVX = 0, - VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NVX = 1, - VK_COPY_ACCELERATION_STRUCTURE_MODE_BEGIN_RANGE_NVX = VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NVX, - VK_COPY_ACCELERATION_STRUCTURE_MODE_END_RANGE_NVX = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NVX, - VK_COPY_ACCELERATION_STRUCTURE_MODE_RANGE_SIZE_NVX = (VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NVX - VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NVX + 1), - VK_COPY_ACCELERATION_STRUCTURE_MODE_MAX_ENUM_NVX = 0x7FFFFFFF -} VkCopyAccelerationStructureModeNVX; - - -typedef enum VkGeometryFlagBitsNVX { - VK_GEOMETRY_OPAQUE_BIT_NVX = 0x00000001, - VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_NVX = 0x00000002, - VK_GEOMETRY_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF -} VkGeometryFlagBitsNVX; -typedef VkFlags VkGeometryFlagsNVX; - -typedef enum VkGeometryInstanceFlagBitsNVX { - VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NVX = 0x00000001, - VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_FLIP_WINDING_BIT_NVX = 0x00000002, - VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_NVX = 0x00000004, - VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_NVX = 0x00000008, - VK_GEOMETRY_INSTANCE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF -} VkGeometryInstanceFlagBitsNVX; -typedef VkFlags VkGeometryInstanceFlagsNVX; - -typedef enum VkBuildAccelerationStructureFlagBitsNVX { - VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NVX = 0x00000001, - VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NVX = 0x00000002, - VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NVX = 0x00000004, - VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NVX = 0x00000008, - VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_NVX = 0x00000010, - VK_BUILD_ACCELERATION_STRUCTURE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF -} VkBuildAccelerationStructureFlagBitsNVX; -typedef VkFlags VkBuildAccelerationStructureFlagsNVX; - -typedef struct VkRaytracingPipelineCreateInfoNVX { - VkStructureType sType; - const void* pNext; - VkPipelineCreateFlags flags; - uint32_t stageCount; - const VkPipelineShaderStageCreateInfo* pStages; - const uint32_t* pGroupNumbers; - uint32_t maxRecursionDepth; - VkPipelineLayout layout; - VkPipeline basePipelineHandle; - int32_t basePipelineIndex; -} VkRaytracingPipelineCreateInfoNVX; +typedef struct VkRayTracingPipelineCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkPipelineCreateFlags flags; + uint32_t stageCount; + const VkPipelineShaderStageCreateInfo* pStages; + uint32_t groupCount; + const VkRayTracingShaderGroupCreateInfoNV* pGroups; + uint32_t maxRecursionDepth; + VkPipelineLayout layout; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; +} VkRayTracingPipelineCreateInfoNV; -typedef struct VkGeometryTrianglesNVX { +typedef struct VkGeometryTrianglesNV { VkStructureType sType; const void* pNext; VkBuffer vertexData; @@ -8201,136 +8349,138 @@ VkIndexType indexType; VkBuffer transformData; VkDeviceSize transformOffset; -} VkGeometryTrianglesNVX; +} VkGeometryTrianglesNV; -typedef struct VkGeometryAABBNVX { +typedef struct VkGeometryAABBNV { VkStructureType sType; const void* pNext; VkBuffer aabbData; uint32_t numAABBs; uint32_t stride; VkDeviceSize offset; -} VkGeometryAABBNVX; +} VkGeometryAABBNV; -typedef struct VkGeometryDataNVX { - VkGeometryTrianglesNVX triangles; - VkGeometryAABBNVX aabbs; -} VkGeometryDataNVX; +typedef struct VkGeometryDataNV { + VkGeometryTrianglesNV triangles; + VkGeometryAABBNV aabbs; +} VkGeometryDataNV; -typedef struct VkGeometryNVX { - VkStructureType sType; - const void* pNext; - VkGeometryTypeNVX geometryType; - VkGeometryDataNVX geometry; - VkGeometryFlagsNVX flags; -} VkGeometryNVX; +typedef struct VkGeometryNV { + VkStructureType sType; + const void* pNext; + VkGeometryTypeNV geometryType; + VkGeometryDataNV geometry; + VkGeometryFlagsNV flags; +} VkGeometryNV; -typedef struct VkAccelerationStructureCreateInfoNVX { - VkStructureType sType; - const void* pNext; - VkAccelerationStructureTypeNVX type; - VkBuildAccelerationStructureFlagsNVX flags; - VkDeviceSize compactedSize; - uint32_t instanceCount; - uint32_t geometryCount; - const VkGeometryNVX* pGeometries; -} VkAccelerationStructureCreateInfoNVX; +typedef struct VkAccelerationStructureInfoNV { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureTypeNV type; + VkBuildAccelerationStructureFlagsNV flags; + uint32_t instanceCount; + uint32_t geometryCount; + const VkGeometryNV* pGeometries; +} VkAccelerationStructureInfoNV; -typedef struct VkBindAccelerationStructureMemoryInfoNVX { - VkStructureType sType; - const void* pNext; - VkAccelerationStructureNVX accelerationStructure; - VkDeviceMemory memory; - VkDeviceSize memoryOffset; - uint32_t deviceIndexCount; - const uint32_t* pDeviceIndices; -} VkBindAccelerationStructureMemoryInfoNVX; +typedef struct VkAccelerationStructureCreateInfoNV { + VkStructureType sType; + const void* pNext; + VkDeviceSize compactedSize; + VkAccelerationStructureInfoNV info; +} VkAccelerationStructureCreateInfoNV; -typedef struct VkDescriptorAccelerationStructureInfoNVX { - VkStructureType sType; - const void* pNext; - uint32_t accelerationStructureCount; - const VkAccelerationStructureNVX* pAccelerationStructures; -} VkDescriptorAccelerationStructureInfoNVX; +typedef struct VkBindAccelerationStructureMemoryInfoNV { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureNV accelerationStructure; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; + uint32_t deviceIndexCount; + const uint32_t* pDeviceIndices; +} VkBindAccelerationStructureMemoryInfoNV; -typedef struct VkAccelerationStructureMemoryRequirementsInfoNVX { - VkStructureType sType; - const void* pNext; - VkAccelerationStructureNVX accelerationStructure; -} VkAccelerationStructureMemoryRequirementsInfoNVX; +typedef struct VkWriteDescriptorSetAccelerationStructureNV { + VkStructureType sType; + const void* pNext; + uint32_t accelerationStructureCount; + const VkAccelerationStructureNV* pAccelerationStructures; +} VkWriteDescriptorSetAccelerationStructureNV; + +typedef struct VkAccelerationStructureMemoryRequirementsInfoNV { + VkStructureType sType; + const void* pNext; + VkAccelerationStructureMemoryRequirementsTypeNV type; + VkAccelerationStructureNV accelerationStructure; +} VkAccelerationStructureMemoryRequirementsInfoNV; -typedef struct VkPhysicalDeviceRaytracingPropertiesNVX { +typedef struct VkPhysicalDeviceRayTracingPropertiesNV { VkStructureType sType; void* pNext; - uint32_t shaderHeaderSize; + uint32_t shaderGroupHandleSize; uint32_t maxRecursionDepth; - uint32_t maxGeometryCount; -} VkPhysicalDeviceRaytracingPropertiesNVX; - - -typedef VkResult (VKAPI_PTR *PFN_vkCreateAccelerationStructureNVX)(VkDevice device, const VkAccelerationStructureCreateInfoNVX* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkAccelerationStructureNVX* pAccelerationStructure); -typedef void (VKAPI_PTR *PFN_vkDestroyAccelerationStructureNVX)(VkDevice device, VkAccelerationStructureNVX accelerationStructure, const VkAllocationCallbacks* pAllocator); -typedef void (VKAPI_PTR *PFN_vkGetAccelerationStructureMemoryRequirementsNVX)(VkDevice device, const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo, VkMemoryRequirements2KHR* pMemoryRequirements); -typedef void (VKAPI_PTR *PFN_vkGetAccelerationStructureScratchMemoryRequirementsNVX)(VkDevice device, const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo, VkMemoryRequirements2KHR* pMemoryRequirements); -typedef VkResult (VKAPI_PTR *PFN_vkBindAccelerationStructureMemoryNVX)(VkDevice device, uint32_t bindInfoCount, const VkBindAccelerationStructureMemoryInfoNVX* pBindInfos); -typedef void (VKAPI_PTR *PFN_vkCmdBuildAccelerationStructureNVX)(VkCommandBuffer commandBuffer, VkAccelerationStructureTypeNVX type, uint32_t instanceCount, VkBuffer instanceData, VkDeviceSize instanceOffset, uint32_t geometryCount, const VkGeometryNVX* pGeometries, VkBuildAccelerationStructureFlagsNVX flags, VkBool32 update, VkAccelerationStructureNVX dst, VkAccelerationStructureNVX src, VkBuffer scratch, VkDeviceSize scratchOffset); -typedef void (VKAPI_PTR *PFN_vkCmdCopyAccelerationStructureNVX)(VkCommandBuffer commandBuffer, VkAccelerationStructureNVX dst, VkAccelerationStructureNVX src, VkCopyAccelerationStructureModeNVX mode); -typedef void (VKAPI_PTR *PFN_vkCmdTraceRaysNVX)(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer, VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer, VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride, VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset, VkDeviceSize hitShaderBindingStride, uint32_t width, uint32_t height); -typedef VkResult (VKAPI_PTR *PFN_vkCreateRaytracingPipelinesNVX)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkRaytracingPipelineCreateInfoNVX* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); -typedef VkResult (VKAPI_PTR *PFN_vkGetRaytracingShaderHandlesNVX)(VkDevice device, VkPipeline pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData); -typedef VkResult (VKAPI_PTR *PFN_vkGetAccelerationStructureHandleNVX)(VkDevice device, VkAccelerationStructureNVX accelerationStructure, size_t dataSize, void* pData); -typedef void (VKAPI_PTR *PFN_vkCmdWriteAccelerationStructurePropertiesNVX)(VkCommandBuffer commandBuffer, VkAccelerationStructureNVX accelerationStructure, VkQueryType queryType, VkQueryPool queryPool, uint32_t query); -typedef VkResult (VKAPI_PTR *PFN_vkCompileDeferredNVX)(VkDevice device, VkPipeline pipeline, uint32_t shader); + uint32_t maxShaderGroupStride; + uint32_t shaderGroupBaseAlignment; + uint64_t maxGeometryCount; + uint64_t maxInstanceCount; + uint64_t maxTriangleCount; + uint32_t maxDescriptorSetAccelerationStructures; +} VkPhysicalDeviceRayTracingPropertiesNV; + + +typedef VkResult (VKAPI_PTR *PFN_vkCreateAccelerationStructureNV)(VkDevice device, const VkAccelerationStructureCreateInfoNV* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkAccelerationStructureNV* pAccelerationStructure); +typedef void (VKAPI_PTR *PFN_vkDestroyAccelerationStructureNV)(VkDevice device, VkAccelerationStructureNV accelerationStructure, const VkAllocationCallbacks* pAllocator); +typedef void (VKAPI_PTR *PFN_vkGetAccelerationStructureMemoryRequirementsNV)(VkDevice device, const VkAccelerationStructureMemoryRequirementsInfoNV* pInfo, VkMemoryRequirements2KHR* pMemoryRequirements); +typedef VkResult (VKAPI_PTR *PFN_vkBindAccelerationStructureMemoryNV)(VkDevice device, uint32_t bindInfoCount, const VkBindAccelerationStructureMemoryInfoNV* pBindInfos); +typedef void (VKAPI_PTR *PFN_vkCmdBuildAccelerationStructureNV)(VkCommandBuffer commandBuffer, const VkAccelerationStructureInfoNV* pInfo, VkBuffer instanceData, VkDeviceSize instanceOffset, VkBool32 update, VkAccelerationStructureNV dst, VkAccelerationStructureNV src, VkBuffer scratch, VkDeviceSize scratchOffset); +typedef void (VKAPI_PTR *PFN_vkCmdCopyAccelerationStructureNV)(VkCommandBuffer commandBuffer, VkAccelerationStructureNV dst, VkAccelerationStructureNV src, VkCopyAccelerationStructureModeNV mode); +typedef void (VKAPI_PTR *PFN_vkCmdTraceRaysNV)(VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer, VkDeviceSize raygenShaderBindingOffset, VkBuffer missShaderBindingTableBuffer, VkDeviceSize missShaderBindingOffset, VkDeviceSize missShaderBindingStride, VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset, VkDeviceSize hitShaderBindingStride, VkBuffer callableShaderBindingTableBuffer, VkDeviceSize callableShaderBindingOffset, VkDeviceSize callableShaderBindingStride, uint32_t width, uint32_t height, uint32_t depth); +typedef VkResult (VKAPI_PTR *PFN_vkCreateRayTracingPipelinesNV)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkRayTracingPipelineCreateInfoNV* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); +typedef VkResult (VKAPI_PTR *PFN_vkGetRayTracingShaderGroupHandlesNV)(VkDevice device, VkPipeline pipeline, uint32_t firstGroup, uint32_t groupCount, size_t dataSize, void* pData); +typedef VkResult (VKAPI_PTR *PFN_vkGetAccelerationStructureHandleNV)(VkDevice device, VkAccelerationStructureNV accelerationStructure, size_t dataSize, void* pData); +typedef void (VKAPI_PTR *PFN_vkCmdWriteAccelerationStructuresPropertiesNV)(VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount, const VkAccelerationStructureNV* pAccelerationStructures, VkQueryType queryType, VkQueryPool queryPool, uint32_t firstQuery); +typedef VkResult (VKAPI_PTR *PFN_vkCompileDeferredNV)(VkDevice device, VkPipeline pipeline, uint32_t shader); #ifndef VK_NO_PROTOTYPES -VKAPI_ATTR VkResult VKAPI_CALL vkCreateAccelerationStructureNVX( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateAccelerationStructureNV( VkDevice device, - const VkAccelerationStructureCreateInfoNVX* pCreateInfo, + const VkAccelerationStructureCreateInfoNV* pCreateInfo, const VkAllocationCallbacks* pAllocator, - VkAccelerationStructureNVX* pAccelerationStructure); + VkAccelerationStructureNV* pAccelerationStructure); -VKAPI_ATTR void VKAPI_CALL vkDestroyAccelerationStructureNVX( +VKAPI_ATTR void VKAPI_CALL vkDestroyAccelerationStructureNV( VkDevice device, - VkAccelerationStructureNVX accelerationStructure, + VkAccelerationStructureNV accelerationStructure, const VkAllocationCallbacks* pAllocator); -VKAPI_ATTR void VKAPI_CALL vkGetAccelerationStructureMemoryRequirementsNVX( - VkDevice device, - const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo, - VkMemoryRequirements2KHR* pMemoryRequirements); - -VKAPI_ATTR void VKAPI_CALL vkGetAccelerationStructureScratchMemoryRequirementsNVX( +VKAPI_ATTR void VKAPI_CALL vkGetAccelerationStructureMemoryRequirementsNV( VkDevice device, - const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo, + const VkAccelerationStructureMemoryRequirementsInfoNV* pInfo, VkMemoryRequirements2KHR* pMemoryRequirements); -VKAPI_ATTR VkResult VKAPI_CALL vkBindAccelerationStructureMemoryNVX( +VKAPI_ATTR VkResult VKAPI_CALL vkBindAccelerationStructureMemoryNV( VkDevice device, uint32_t bindInfoCount, - const VkBindAccelerationStructureMemoryInfoNVX* pBindInfos); + const VkBindAccelerationStructureMemoryInfoNV* pBindInfos); -VKAPI_ATTR void VKAPI_CALL vkCmdBuildAccelerationStructureNVX( +VKAPI_ATTR void VKAPI_CALL vkCmdBuildAccelerationStructureNV( VkCommandBuffer commandBuffer, - VkAccelerationStructureTypeNVX type, - uint32_t instanceCount, + const VkAccelerationStructureInfoNV* pInfo, VkBuffer instanceData, VkDeviceSize instanceOffset, - uint32_t geometryCount, - const VkGeometryNVX* pGeometries, - VkBuildAccelerationStructureFlagsNVX flags, VkBool32 update, - VkAccelerationStructureNVX dst, - VkAccelerationStructureNVX src, + VkAccelerationStructureNV dst, + VkAccelerationStructureNV src, VkBuffer scratch, VkDeviceSize scratchOffset); -VKAPI_ATTR void VKAPI_CALL vkCmdCopyAccelerationStructureNVX( +VKAPI_ATTR void VKAPI_CALL vkCmdCopyAccelerationStructureNV( VkCommandBuffer commandBuffer, - VkAccelerationStructureNVX dst, - VkAccelerationStructureNVX src, - VkCopyAccelerationStructureModeNVX mode); + VkAccelerationStructureNV dst, + VkAccelerationStructureNV src, + VkCopyAccelerationStructureModeNV mode); -VKAPI_ATTR void VKAPI_CALL vkCmdTraceRaysNVX( +VKAPI_ATTR void VKAPI_CALL vkCmdTraceRaysNV( VkCommandBuffer commandBuffer, VkBuffer raygenShaderBindingTableBuffer, VkDeviceSize raygenShaderBindingOffset, @@ -8340,18 +8490,22 @@ VkBuffer hitShaderBindingTableBuffer, VkDeviceSize hitShaderBindingOffset, VkDeviceSize hitShaderBindingStride, + VkBuffer callableShaderBindingTableBuffer, + VkDeviceSize callableShaderBindingOffset, + VkDeviceSize callableShaderBindingStride, uint32_t width, - uint32_t height); + uint32_t height, + uint32_t depth); -VKAPI_ATTR VkResult VKAPI_CALL vkCreateRaytracingPipelinesNVX( +VKAPI_ATTR VkResult VKAPI_CALL vkCreateRayTracingPipelinesNV( VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, - const VkRaytracingPipelineCreateInfoNVX* pCreateInfos, + const VkRayTracingPipelineCreateInfoNV* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); -VKAPI_ATTR VkResult VKAPI_CALL vkGetRaytracingShaderHandlesNVX( +VKAPI_ATTR VkResult VKAPI_CALL vkGetRayTracingShaderGroupHandlesNV( VkDevice device, VkPipeline pipeline, uint32_t firstGroup, @@ -8359,20 +8513,21 @@ size_t dataSize, void* pData); -VKAPI_ATTR VkResult VKAPI_CALL vkGetAccelerationStructureHandleNVX( +VKAPI_ATTR VkResult VKAPI_CALL vkGetAccelerationStructureHandleNV( VkDevice device, - VkAccelerationStructureNVX accelerationStructure, + VkAccelerationStructureNV accelerationStructure, size_t dataSize, void* pData); -VKAPI_ATTR void VKAPI_CALL vkCmdWriteAccelerationStructurePropertiesNVX( +VKAPI_ATTR void VKAPI_CALL vkCmdWriteAccelerationStructuresPropertiesNV( VkCommandBuffer commandBuffer, - VkAccelerationStructureNVX accelerationStructure, + uint32_t accelerationStructureCount, + const VkAccelerationStructureNV* pAccelerationStructures, VkQueryType queryType, VkQueryPool queryPool, - uint32_t query); + uint32_t firstQuery); -VKAPI_ATTR VkResult VKAPI_CALL vkCompileDeferredNVX( +VKAPI_ATTR VkResult VKAPI_CALL vkCompileDeferredNV( VkDevice device, VkPipeline pipeline, uint32_t shader); @@ -8534,6 +8689,29 @@ +#define VK_AMD_memory_overallocation_behavior 1 +#define VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_SPEC_VERSION 1 +#define VK_AMD_MEMORY_OVERALLOCATION_BEHAVIOR_EXTENSION_NAME "VK_AMD_memory_overallocation_behavior" + + +typedef enum VkMemoryOverallocationBehaviorAMD { + VK_MEMORY_OVERALLOCATION_BEHAVIOR_DEFAULT_AMD = 0, + VK_MEMORY_OVERALLOCATION_BEHAVIOR_ALLOWED_AMD = 1, + VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD = 2, + VK_MEMORY_OVERALLOCATION_BEHAVIOR_BEGIN_RANGE_AMD = VK_MEMORY_OVERALLOCATION_BEHAVIOR_DEFAULT_AMD, + VK_MEMORY_OVERALLOCATION_BEHAVIOR_END_RANGE_AMD = VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD, + VK_MEMORY_OVERALLOCATION_BEHAVIOR_RANGE_SIZE_AMD = (VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD - VK_MEMORY_OVERALLOCATION_BEHAVIOR_DEFAULT_AMD + 1), + VK_MEMORY_OVERALLOCATION_BEHAVIOR_MAX_ENUM_AMD = 0x7FFFFFFF +} VkMemoryOverallocationBehaviorAMD; + +typedef struct VkDeviceMemoryOverallocationCreateInfoAMD { + VkStructureType sType; + const void* pNext; + VkMemoryOverallocationBehaviorAMD overallocationBehavior; +} VkDeviceMemoryOverallocationCreateInfoAMD; + + + #define VK_EXT_vertex_attribute_divisor 1 #define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION 3 #define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME "VK_EXT_vertex_attribute_divisor" @@ -8730,30 +8908,187 @@ #endif #define VK_EXT_pci_bus_info 1 -#define VK_EXT_PCI_BUS_INFO_SPEC_VERSION 1 +#define VK_EXT_PCI_BUS_INFO_SPEC_VERSION 2 #define VK_EXT_PCI_BUS_INFO_EXTENSION_NAME "VK_EXT_pci_bus_info" typedef struct VkPhysicalDevicePCIBusInfoPropertiesEXT { VkStructureType sType; void* pNext; - uint16_t pciDomain; - uint8_t pciBus; - uint8_t pciDevice; - uint8_t pciFunction; + uint32_t pciDomain; + uint32_t pciBus; + uint32_t pciDevice; + uint32_t pciFunction; } VkPhysicalDevicePCIBusInfoPropertiesEXT; +#define VK_EXT_fragment_density_map 1 +#define VK_EXT_FRAGMENT_DENSITY_MAP_SPEC_VERSION 1 +#define VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME "VK_EXT_fragment_density_map" + +typedef struct VkPhysicalDeviceFragmentDensityMapFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 fragmentDensityMap; + VkBool32 fragmentDensityMapDynamic; + VkBool32 fragmentDensityMapNonSubsampledImages; +} VkPhysicalDeviceFragmentDensityMapFeaturesEXT; + +typedef struct VkPhysicalDeviceFragmentDensityMapPropertiesEXT { + VkStructureType sType; + void* pNext; + VkExtent2D minFragmentDensityTexelSize; + VkExtent2D maxFragmentDensityTexelSize; + VkBool32 fragmentDensityInvocations; +} VkPhysicalDeviceFragmentDensityMapPropertiesEXT; + +typedef struct VkRenderPassFragmentDensityMapCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkAttachmentReference fragmentDensityMapAttachment; +} VkRenderPassFragmentDensityMapCreateInfoEXT; + + + +#define VK_EXT_scalar_block_layout 1 +#define VK_EXT_SCALAR_BLOCK_LAYOUT_SPEC_VERSION 1 +#define VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME "VK_EXT_scalar_block_layout" + +typedef struct VkPhysicalDeviceScalarBlockLayoutFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 scalarBlockLayout; +} VkPhysicalDeviceScalarBlockLayoutFeaturesEXT; + + + #define VK_GOOGLE_hlsl_functionality1 1 -#define VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION 0 +#define VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION 1 #define VK_GOOGLE_HLSL_FUNCTIONALITY1_EXTENSION_NAME "VK_GOOGLE_hlsl_functionality1" #define VK_GOOGLE_decorate_string 1 -#define VK_GOOGLE_DECORATE_STRING_SPEC_VERSION 0 +#define VK_GOOGLE_DECORATE_STRING_SPEC_VERSION 1 #define VK_GOOGLE_DECORATE_STRING_EXTENSION_NAME "VK_GOOGLE_decorate_string" +#define VK_EXT_memory_budget 1 +#define VK_EXT_MEMORY_BUDGET_SPEC_VERSION 1 +#define VK_EXT_MEMORY_BUDGET_EXTENSION_NAME "VK_EXT_memory_budget" + +typedef struct VkPhysicalDeviceMemoryBudgetPropertiesEXT { + VkStructureType sType; + void* pNext; + VkDeviceSize heapBudget[VK_MAX_MEMORY_HEAPS]; + VkDeviceSize heapUsage[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryBudgetPropertiesEXT; + + + +#define VK_EXT_memory_priority 1 +#define VK_EXT_MEMORY_PRIORITY_SPEC_VERSION 1 +#define VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME "VK_EXT_memory_priority" + +typedef struct VkPhysicalDeviceMemoryPriorityFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 memoryPriority; +} VkPhysicalDeviceMemoryPriorityFeaturesEXT; + +typedef struct VkMemoryPriorityAllocateInfoEXT { + VkStructureType sType; + const void* pNext; + float priority; +} VkMemoryPriorityAllocateInfoEXT; + + + +#define VK_EXT_buffer_device_address 1 +typedef uint64_t VkDeviceAddress; + +#define VK_EXT_BUFFER_DEVICE_ADDRESS_SPEC_VERSION 2 +#define VK_EXT_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME "VK_EXT_buffer_device_address" + +typedef struct VkPhysicalDeviceBufferAddressFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 bufferDeviceAddress; + VkBool32 bufferDeviceAddressCaptureReplay; + VkBool32 bufferDeviceAddressMultiDevice; +} VkPhysicalDeviceBufferAddressFeaturesEXT; + +typedef struct VkBufferDeviceAddressInfoEXT { + VkStructureType sType; + const void* pNext; + VkBuffer buffer; +} VkBufferDeviceAddressInfoEXT; + +typedef struct VkBufferDeviceAddressCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkDeviceSize deviceAddress; +} VkBufferDeviceAddressCreateInfoEXT; + + +typedef VkDeviceAddress (VKAPI_PTR *PFN_vkGetBufferDeviceAddressEXT)(VkDevice device, const VkBufferDeviceAddressInfoEXT* pInfo); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkDeviceAddress VKAPI_CALL vkGetBufferDeviceAddressEXT( + VkDevice device, + const VkBufferDeviceAddressInfoEXT* pInfo); +#endif + +#define VK_EXT_separate_stencil_usage 1 +#define VK_EXT_SEPARATE_STENCIL_USAGE_SPEC_VERSION 1 +#define VK_EXT_SEPARATE_STENCIL_USAGE_EXTENSION_NAME "VK_EXT_separate_stencil_usage" + +typedef struct VkImageStencilUsageCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkImageUsageFlags stencilUsage; +} VkImageStencilUsageCreateInfoEXT; + + + +#define VK_EXT_validation_features 1 +#define VK_EXT_VALIDATION_FEATURES_SPEC_VERSION 1 +#define VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME "VK_EXT_validation_features" + + +typedef enum VkValidationFeatureEnableEXT { + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1, + VK_VALIDATION_FEATURE_ENABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, + VK_VALIDATION_FEATURE_ENABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT + 1), + VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureEnableEXT; + +typedef enum VkValidationFeatureDisableEXT { + VK_VALIDATION_FEATURE_DISABLE_ALL_EXT = 0, + VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT = 1, + VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT = 2, + VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT = 3, + VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT = 4, + VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT = 5, + VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT = 6, + VK_VALIDATION_FEATURE_DISABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_ALL_EXT, + VK_VALIDATION_FEATURE_DISABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT, + VK_VALIDATION_FEATURE_DISABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT - VK_VALIDATION_FEATURE_DISABLE_ALL_EXT + 1), + VK_VALIDATION_FEATURE_DISABLE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationFeatureDisableEXT; + +typedef struct VkValidationFeaturesEXT { + VkStructureType sType; + const void* pNext; + uint32_t enabledValidationFeatureCount; + const VkValidationFeatureEnableEXT* pEnabledValidationFeatures; + uint32_t disabledValidationFeatureCount; + const VkValidationFeatureDisableEXT* pDisabledValidationFeatures; +} VkValidationFeaturesEXT; + + + #ifdef __cplusplus } #endif diff -Nru mesa-18.3.3/include/vulkan/vulkan_fuchsia.h mesa-19.0.1/include/vulkan/vulkan_fuchsia.h --- mesa-18.3.3/include/vulkan/vulkan_fuchsia.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_fuchsia.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/include/vulkan/vulkan.h mesa-19.0.1/include/vulkan/vulkan.h --- mesa-18.3.3/include/vulkan/vulkan.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan.h 2019-03-31 23:16:37.000000000 +0000 @@ -2,7 +2,7 @@ #define VULKAN_H_ 1 /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. @@ -39,12 +39,6 @@ #endif -#ifdef VK_USE_PLATFORM_MIR_KHR -#include -#include "vulkan_mir.h" -#endif - - #ifdef VK_USE_PLATFORM_VI_NN #include "vulkan_vi.h" #endif diff -Nru mesa-18.3.3/include/vulkan/vulkan_ios.h mesa-19.0.1/include/vulkan/vulkan_ios.h --- mesa-18.3.3/include/vulkan/vulkan_ios.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_ios.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/include/vulkan/vulkan_macos.h mesa-19.0.1/include/vulkan/vulkan_macos.h --- mesa-18.3.3/include/vulkan/vulkan_macos.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_macos.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/include/vulkan/vulkan_mir.h mesa-19.0.1/include/vulkan/vulkan_mir.h --- mesa-18.3.3/include/vulkan/vulkan_mir.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_mir.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,65 +0,0 @@ -#ifndef VULKAN_MIR_H_ -#define VULKAN_MIR_H_ 1 - -#ifdef __cplusplus -extern "C" { -#endif - -/* -** Copyright (c) 2015-2018 The Khronos Group Inc. -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ - -/* -** This header is generated from the Khronos Vulkan XML API Registry. -** -*/ - - -#define VK_KHR_mir_surface 1 -#define VK_KHR_MIR_SURFACE_SPEC_VERSION 4 -#define VK_KHR_MIR_SURFACE_EXTENSION_NAME "VK_KHR_mir_surface" - -typedef VkFlags VkMirSurfaceCreateFlagsKHR; - -typedef struct VkMirSurfaceCreateInfoKHR { - VkStructureType sType; - const void* pNext; - VkMirSurfaceCreateFlagsKHR flags; - MirConnection* connection; - MirSurface* mirSurface; -} VkMirSurfaceCreateInfoKHR; - - -typedef VkResult (VKAPI_PTR *PFN_vkCreateMirSurfaceKHR)(VkInstance instance, const VkMirSurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface); -typedef VkBool32 (VKAPI_PTR *PFN_vkGetPhysicalDeviceMirPresentationSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, MirConnection* connection); - -#ifndef VK_NO_PROTOTYPES -VKAPI_ATTR VkResult VKAPI_CALL vkCreateMirSurfaceKHR( - VkInstance instance, - const VkMirSurfaceCreateInfoKHR* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSurfaceKHR* pSurface); - -VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceMirPresentationSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - MirConnection* connection); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff -Nru mesa-18.3.3/include/vulkan/vulkan_vi.h mesa-19.0.1/include/vulkan/vulkan_vi.h --- mesa-18.3.3/include/vulkan/vulkan_vi.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_vi.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/include/vulkan/vulkan_wayland.h mesa-19.0.1/include/vulkan/vulkan_wayland.h --- mesa-18.3.3/include/vulkan/vulkan_wayland.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_wayland.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/include/vulkan/vulkan_win32.h mesa-19.0.1/include/vulkan/vulkan_win32.h --- mesa-18.3.3/include/vulkan/vulkan_win32.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_win32.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/include/vulkan/vulkan_xcb.h mesa-19.0.1/include/vulkan/vulkan_xcb.h --- mesa-18.3.3/include/vulkan/vulkan_xcb.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_xcb.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/include/vulkan/vulkan_xlib.h mesa-19.0.1/include/vulkan/vulkan_xlib.h --- mesa-18.3.3/include/vulkan/vulkan_xlib.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_xlib.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/include/vulkan/vulkan_xlib_xrandr.h mesa-19.0.1/include/vulkan/vulkan_xlib_xrandr.h --- mesa-18.3.3/include/vulkan/vulkan_xlib_xrandr.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/include/vulkan/vulkan_xlib_xrandr.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,7 +6,7 @@ #endif /* -** Copyright (c) 2015-2018 The Khronos Group Inc. +** Copyright (c) 2015-2019 The Khronos Group Inc. ** ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. diff -Nru mesa-18.3.3/Makefile.am mesa-19.0.1/Makefile.am --- mesa-18.3.3/Makefile.am 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -22,6 +22,7 @@ SUBDIRS = src AM_DISTCHECK_CONFIGURE_FLAGS = \ + --enable-autotools \ --enable-dri \ --enable-dri3 \ --enable-egl \ @@ -45,7 +46,7 @@ --enable-libunwind \ --with-platforms=x11,wayland,drm,surfaceless \ --with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \ - --with-gallium-drivers=i915,nouveau,r300,pl111,r600,radeonsi,freedreno,svga,swrast,vc4,tegra,virgl,swr,etnaviv,imx \ + --with-gallium-drivers=i915,nouveau,r300,kmsro,r600,radeonsi,freedreno,svga,swrast,vc4,tegra,virgl,swr,etnaviv \ --with-vulkan-drivers=intel,radeon ACLOCAL_AMFLAGS = -I m4 diff -Nru mesa-18.3.3/meson.build mesa-19.0.1/meson.build --- mesa-18.3.3/meson.build 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -34,8 +34,6 @@ null_dep = dependency('', required : false) -system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'dragonfly', 'linux'].contains(host_machine.system()) - # Arguments for the preprocessor, put these in a separate array from the C and # C++ (cpp in meson terminology) arguments since they need to be added to the # default arguments for both C and C++. @@ -43,8 +41,7 @@ '-D__STDC_CONSTANT_MACROS', '-D__STDC_FORMAT_MACROS', '-D__STDC_LIMIT_MACROS', - '-DVERSION="@0@"'.format(meson.project_version()), - '-DPACKAGE_VERSION=VERSION', + '-DPACKAGE_VERSION="@0@"'.format(meson.project_version()), '-DPACKAGE_BUGREPORT="https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa"', ] @@ -59,16 +56,16 @@ with_swr_arches = get_option('swr-arches') with_tools = get_option('tools') if with_tools.contains('all') - with_tools = ['freedreno', 'glsl', 'intel', 'nir', 'nouveau', 'xvmc'] + with_tools = ['etnaviv', 'freedreno', 'glsl', 'intel', 'nir', 'nouveau', 'xvmc'] endif dri_drivers_path = get_option('dri-drivers-path') if dri_drivers_path == '' - dri_drivers_path = join_paths(get_option('libdir'), 'dri') + dri_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'dri') endif dri_search_path = get_option('dri-search-path') if dri_search_path == '' - dri_search_path = join_paths(get_option('prefix'), dri_drivers_path) + dri_search_path = dri_drivers_path endif with_gles1 = get_option('gles1') @@ -134,7 +131,7 @@ ] elif ['arm', 'aarch64'].contains(host_machine.cpu_family()) _drivers = [ - 'pl111', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'imx', 'nouveau', + 'kmsro', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'nouveau', 'tegra', 'virgl', 'swrast', ] else @@ -148,7 +145,7 @@ host_machine.system())) endif endif -with_gallium_pl111 = _drivers.contains('pl111') +with_gallium_kmsro = _drivers.contains('kmsro') with_gallium_radeonsi = _drivers.contains('radeonsi') with_gallium_r300 = _drivers.contains('r300') with_gallium_r600 = _drivers.contains('r600') @@ -158,7 +155,6 @@ with_gallium_vc4 = _drivers.contains('vc4') with_gallium_v3d = _drivers.contains('v3d') with_gallium_etnaviv = _drivers.contains('etnaviv') -with_gallium_imx = _drivers.contains('imx') with_gallium_tegra = _drivers.contains('tegra') with_gallium_i915 = _drivers.contains('i915') with_gallium_svga = _drivers.contains('svga') @@ -213,11 +209,8 @@ if with_dri_i915 and with_gallium_i915 error('Only one i915 provider can be built') endif -if with_gallium_imx and not with_gallium_etnaviv - error('IMX driver requires etnaviv driver') -endif -if with_gallium_pl111 and not with_gallium_vc4 - error('pl111 driver requires vc4 driver') +if with_gallium_kmsro and not (with_gallium_vc4 or with_gallium_etnaviv or with_gallium_freedreno) + error('kmsro driver requires one or more renderonly drivers (vc4, etnaviv, freedreno)') endif if with_gallium_tegra and not with_gallium_nouveau error('tegra driver requires nouveau driver') @@ -615,7 +608,7 @@ d3d_drivers_path = get_option('d3d-drivers-path') if d3d_drivers_path == '' - d3d_drivers_path = join_paths(get_option('libdir'), 'd3d') + d3d_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'd3d') endif with_gallium_st_nine = get_option('gallium-nine') @@ -936,7 +929,7 @@ # case of cross compiling where we can use asm, and that's x86_64 -> x86 when # host OS == build OS, since in that case the build machine can run the host's # binaries. -if meson.is_cross_build() +if with_asm and meson.is_cross_build() if build_machine.system() != host_machine.system() # TODO: It may be possible to do this with an exe_wrapper (like wine). message('Cross compiling from one OS to another, disabling assembly.') @@ -1120,7 +1113,7 @@ dep_libdrm_etnaviv = null_dep dep_libdrm_intel = null_dep -_drm_amdgpu_ver = '2.4.95' +_drm_amdgpu_ver = '2.4.97' _drm_radeon_ver = '2.4.71' _drm_nouveau_ver = '2.4.66' _drm_etnaviv_ver = '2.4.89' @@ -1195,7 +1188,7 @@ endif if with_amd_vk or with_gallium_radeonsi - _llvm_version = '>= 6.0.0' + _llvm_version = '>= 7.0.0' elif with_gallium_swr _llvm_version = '>= 6.0.0' elif with_gallium_opencl or with_gallium_r600 @@ -1372,7 +1365,7 @@ dep_xfixes = dependency('xfixes') dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1') endif - if (with_any_vk or with_glx == 'dri' or + if (with_any_vk or with_glx == 'dri' or with_egl or (with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or with_gallium_omx != 'disabled')) dep_xcb = dependency('xcb') @@ -1407,7 +1400,7 @@ dep_xcb_xfixes = dependency('xcb-xfixes') endif if with_xlib_lease - dep_xcb_xrandr = dependency('xcb-randr', version : '>= 1.12') + dep_xcb_xrandr = dependency('xcb-randr') dep_xlib_xrandr = dependency('xrandr', version : '>= 1.3') endif endif diff -Nru mesa-18.3.3/meson_options.txt mesa-19.0.1/meson_options.txt --- mesa-18.3.3/meson_options.txt 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/meson_options.txt 2019-03-31 23:16:37.000000000 +0000 @@ -58,8 +58,8 @@ type : 'array', value : ['auto'], choices : [ - '', 'auto', 'pl111', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno', - 'swrast', 'v3d', 'vc4', 'etnaviv', 'imx', 'tegra', 'i915', 'svga', 'virgl', + '', 'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno', + 'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl', 'swr', ], description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built' @@ -301,7 +301,7 @@ 'tools', type : 'array', value : [], - choices : ['freedreno', 'glsl', 'intel', 'intel-ui', 'nir', 'nouveau', 'xvmc', 'all'], + choices : ['etnaviv', 'freedreno', 'glsl', 'intel', 'intel-ui', 'nir', 'nouveau', 'xvmc', 'all'], description : 'List of tools to build. (Note: `intel-ui` selects `intel`)', ) option( diff -Nru mesa-18.3.3/REVIEWERS mesa-19.0.1/REVIEWERS --- mesa-18.3.3/REVIEWERS 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/REVIEWERS 2019-03-31 23:16:37.000000000 +0000 @@ -72,7 +72,9 @@ EGL R: Eric Engestrom +R: Emil Velikov F: src/egl/ +F: include/EGL/ HAIKU R: Alexander von Gluck IV @@ -136,3 +138,8 @@ GLX R: Adam Jackson F: src/glx/ + +VULKAN +R: Eric Engestrom +F: src/vulkan/ +F: include/vulkan/ diff -Nru mesa-18.3.3/scons/custom.py mesa-19.0.1/scons/custom.py --- mesa-18.3.3/scons/custom.py 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/scons/custom.py 2019-03-31 23:16:37.000000000 +0000 @@ -48,7 +48,12 @@ # a path directly. We want to support both, so we need to detect the SCons version, # for which no API is provided by SCons 8-P -scons_version = tuple(map(int, SCons.__version__.split('.'))) +# Scons version string has consistently been in this format: +# MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd] +# so this formula should cover all versions regardless of type +# stable, alpha or beta. +# For simplicity alpha and beta flags are removed. +scons_version = tuple(map(int, SCons.__version__.split('.')[:3])) def quietCommandLines(env): # Quiet command lines diff -Nru mesa-18.3.3/scons/gallium.py mesa-19.0.1/scons/gallium.py --- mesa-18.3.3/scons/gallium.py 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/scons/gallium.py 2019-03-31 23:16:37.000000000 +0000 @@ -308,7 +308,20 @@ if env.GetOption('num_jobs') <= 1: env.SetOption('num_jobs', num_jobs()) - env.Decider('MD5-timestamp') + # Speed up dependency checking. See + # - https://github.com/SCons/scons/wiki/GoFastButton + # - https://bugs.freedesktop.org/show_bug.cgi?id=109443 + + # Scons version string has consistently been in this format: + # MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd] + # so this formula should cover all versions regardless of type + # stable, alpha or beta. + # For simplicity alpha and beta flags are removed. + + scons_version = distutils.version.StrictVersion('.'.join(SCons.__version__.split('.')[:3])) + if scons_version < distutils.version.StrictVersion('3.0.2') or \ + scons_version > distutils.version.StrictVersion('3.0.4'): + env.Decider('MD5-timestamp') env.SetOption('max_drift', 60) # C preprocessor options diff -Nru mesa-18.3.3/src/amd/addrlib/addrinterface.cpp mesa-19.0.1/src/amd/addrlib/addrinterface.cpp --- mesa-18.3.3/src/amd/addrlib/addrinterface.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/addrinterface.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,1743 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrinterface.cpp -* @brief Contains the addrlib interface functions -**************************************************************************************************** -*/ -#include "addrinterface.h" -#include "addrlib1.h" -#include "addrlib2.h" - -#include "addrcommon.h" - -#include "util/macros.h" - -using namespace Addr; - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Create/Destroy/Config functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* AddrCreate -* -* @brief -* Create address lib object -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrCreate( - const ADDR_CREATE_INPUT* pAddrCreateIn, ///< [in] infomation for creating address lib object - ADDR_CREATE_OUTPUT* pAddrCreateOut) ///< [out] address lib handle -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - returnCode = Lib::Create(pAddrCreateIn, pAddrCreateOut); - - return returnCode; -} - - - -/** -**************************************************************************************************** -* AddrDestroy -* -* @brief -* Destroy address lib object -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrDestroy( - ADDR_HANDLE hLib) ///< address lib handle -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (hLib) - { - Lib* pLib = Lib::GetLib(hLib); - pLib->Destroy(); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Surface functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* AddrComputeSurfaceInfo -* -* @brief -* Calculate surface width/height/depth/alignments and suitable tiling mode -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] surface information - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) ///< [out] surface parameters and alignments -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeSurfaceInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -/** -**************************************************************************************************** -* AddrComputeSurfaceAddrFromCoord -* -* @brief -* Compute surface address according to coordinates -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] surface info and coordinates - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] surface address -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputeSurfaceCoordFromAddr -* -* @brief -* Compute coordinates according to surface address -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] surface info and address - ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) ///< [out] coordinates -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// HTile functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* AddrComputeHtileInfo -* -* @brief -* Compute Htile pitch, height, base alignment and size in bytes -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] Htile information - ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) ///< [out] Htile pitch, height and size in bytes -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeHtileInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputeHtileAddrFromCoord -* -* @brief -* Compute Htile address according to coordinates (of depth buffer) -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] Htile info and coordinates - ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Htile address -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputeHtileCoordFromAddr -* -* @brief -* Compute coordinates within depth buffer (1st pixel of a micro tile) according to -* Htile address -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] Htile info and address - ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] Htile coordinates -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// C-mask functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* AddrComputeCmaskInfo -* -* @brief -* Compute Cmask pitch, height, base alignment and size in bytes from color buffer -* info -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] Cmask pitch and height - ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) ///< [out] Cmask pitch, height and size in bytes -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeCmaskInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputeCmaskAddrFromCoord -* -* @brief -* Compute Cmask address according to coordinates (of MSAA color buffer) -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Cmask info and coordinates - ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Cmask address -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputeCmaskCoordFromAddr -* -* @brief -* Compute coordinates within color buffer (1st pixel of a micro tile) according to -* Cmask address -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Cmask info and address - ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Cmask coordinates -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// F-mask functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* AddrComputeFmaskInfo -* -* @brief -* Compute Fmask pitch/height/depth/alignments and size in bytes -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] Fmask information - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) ///< [out] Fmask pitch and height -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeFmaskInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputeFmaskAddrFromCoord -* -* @brief -* Compute Fmask address according to coordinates (x,y,slice,sample,plane) -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Fmask info and coordinates - ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Fmask address -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputeFmaskCoordFromAddr -* -* @brief -* Compute coordinates (x,y,slice,sample,plane) according to Fmask address -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Fmask info and address - ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Fmask coordinates -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// DCC key functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* AddrComputeDccInfo -* -* @brief -* Compute DCC key size, base alignment based on color surface size, tile info or tile index -* -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( - ADDR_HANDLE hLib, ///< handle of addrlib - const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input - ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) ///< [out] output -{ - ADDR_E_RETURNCODE returnCode; - - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->ComputeDccInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -/////////////////////////////////////////////////////////////////////////////// -// Below functions are element related or helper functions -/////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* AddrGetVersion -* -* @brief -* Get AddrLib version number. Client may check this return value against ADDRLIB_VERSION -* defined in addrinterface.h to see if there is a mismatch. -**************************************************************************************************** -*/ -UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib) -{ - UINT_32 version = 0; - - Addr::Lib* pLib = Lib::GetLib(hLib); - - ADDR_ASSERT(pLib != NULL); - - if (pLib) - { - version = pLib->GetVersion(); - } - - return version; -} - -/** -**************************************************************************************************** -* AddrUseTileIndex -* -* @brief -* Return TRUE if tileIndex is enabled in this address library -**************************************************************************************************** -*/ -BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib) -{ - BOOL_32 useTileIndex = FALSE; - - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_ASSERT(pLib != NULL); - - if (pLib) - { - useTileIndex = pLib->UseTileIndex(0); - } - - return useTileIndex; -} - -/** -**************************************************************************************************** -* AddrUseCombinedSwizzle -* -* @brief -* Return TRUE if combined swizzle is enabled in this address library -**************************************************************************************************** -*/ -BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib) -{ - BOOL_32 useCombinedSwizzle = FALSE; - - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_ASSERT(pLib != NULL); - - if (pLib) - { - useCombinedSwizzle = pLib->UseCombinedSwizzle(); - } - - return useCombinedSwizzle; -} - -/** -**************************************************************************************************** -* AddrExtractBankPipeSwizzle -* -* @brief -* Extract Bank and Pipe swizzle from base256b -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle( - ADDR_HANDLE hLib, ///< addrlib handle - const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure - ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) ///< [out] output structure -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->ExtractBankPipeSwizzle(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrCombineBankPipeSwizzle -* -* @brief -* Combine Bank and Pipe swizzle -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle( - ADDR_HANDLE hLib, - const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, - ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->CombineBankPipeSwizzle(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputeSliceSwizzle -* -* @brief -* Compute a swizzle for slice from a base swizzle -* @return -* ADDR_OK if no error -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, - ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->ComputeSliceTileSwizzle(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputeBaseSwizzle -* -* @brief -* Return a Combined Bank and Pipe swizzle base on surface based on surface type/index -* @return -* ADDR_OK if no error -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, - ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->ComputeBaseSwizzle(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* ElemFlt32ToDepthPixel -* -* @brief -* Convert a FLT_32 value to a depth/stencil pixel value -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -* -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel( - ADDR_HANDLE hLib, ///< addrlib handle - const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, ///< [in] per-component value - ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) ///< [out] final pixel value -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - Lib* pLib = Lib::GetLib(hLib); - - if (pLib != NULL) - { - pLib->Flt32ToDepthPixel(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* ElemFlt32ToColorPixel -* -* @brief -* Convert a FLT_32 value to a red/green/blue/alpha pixel value -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -* -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel( - ADDR_HANDLE hLib, ///< addrlib handle - const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, ///< [in] format, surface number and swap value - ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) ///< [out] final pixel value -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - Lib* pLib = Lib::GetLib(hLib); - - if (pLib != NULL) - { - pLib->Flt32ToColorPixel(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* ElemGetExportNorm -* -* @brief -* Helper function to check one format can be EXPORT_NUM, -* which is a register CB_COLOR_INFO.SURFACE_FORMAT. -* FP16 can be reported as EXPORT_NORM for rv770 in r600 -* family -* -**************************************************************************************************** -*/ -BOOL_32 ADDR_API ElemGetExportNorm( - ADDR_HANDLE hLib, ///< addrlib handle - const ELEM_GETEXPORTNORM_INPUT* pIn) ///< [in] input structure -{ - Addr::Lib* pLib = Lib::GetLib(hLib); - BOOL_32 enabled = FALSE; - - MAYBE_UNUSED ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - enabled = pLib->GetExportNorm(pIn); - } - else - { - returnCode = ADDR_ERROR; - } - - ADDR_ASSERT(returnCode == ADDR_OK); - - return enabled; -} - -/** -**************************************************************************************************** -* AddrConvertTileInfoToHW -* -* @brief -* Convert tile info from real value to hardware register value -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] tile info with real value - ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) ///< [out] tile info with HW register value -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ConvertTileInfoToHW(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrConvertTileIndex -* -* @brief -* Convert tile index to tile mode/type/info -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input - tile index - ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) ///< [out] tile mode/type/info -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ConvertTileIndex(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrGetMacroModeIndex -* -* @brief -* Get macro mode index based on input parameters -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_GET_MACROMODEINDEX_INPUT* pIn, ///< [in] input - ADDR_GET_MACROMODEINDEX_OUTPUT* pOut) ///< [out] macro mode index -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode; - - if (pLib != NULL) - { - returnCode = pLib->GetMacroModeIndex(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrConvertTileIndex1 -* -* @brief -* Convert tile index to tile mode/type/info -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, ///< [in] input - tile index - ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) ///< [out] tile mode/type/info -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ConvertTileIndex1(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrGetTileIndex -* -* @brief -* Get tile index from tile mode/type/info -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -* -* @note -* Only meaningful for SI (and above) -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex( - ADDR_HANDLE hLib, - const ADDR_GET_TILEINDEX_INPUT* pIn, - ADDR_GET_TILEINDEX_OUTPUT* pOut) -{ - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->GetTileIndex(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrComputePrtInfo -* -* @brief -* Interface function for ComputePrtInfo -* -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo( - ADDR_HANDLE hLib, - const ADDR_PRT_INFO_INPUT* pIn, - ADDR_PRT_INFO_OUTPUT* pOut) -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - V1::Lib* pLib = V1::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->ComputePrtInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrGetMaxAlignments -* -* @brief -* Convert maximum alignments -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( - ADDR_HANDLE hLib, ///< address lib handle - ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure -{ - Addr::Lib* pLib = Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->GetMaxAlignments(pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* AddrGetMaxMetaAlignments -* -* @brief -* Convert maximum alignments for metadata -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( - ADDR_HANDLE hLib, ///< address lib handle - ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure -{ - Addr::Lib* pLib = Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->GetMaxMetaAlignments(pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Surface functions for Addr2 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Addr2ComputeSurfaceInfo -* -* @brief -* Calculate surface width/height/depth/alignments and suitable tiling mode -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] surface information - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) ///< [out] surface parameters and alignments -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeSurfaceInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - -/** -**************************************************************************************************** -* Addr2ComputeSurfaceAddrFromCoord -* -* @brief -* Compute surface address according to coordinates -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] surface info and coordinates - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] surface address -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - -/** -**************************************************************************************************** -* Addr2ComputeSurfaceCoordFromAddr -* -* @brief -* Compute coordinates according to surface address -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] surface info and address - ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) ///< [out] coordinates -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// HTile functions for Addr2 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Addr2ComputeHtileInfo -* -* @brief -* Compute Htile pitch, height, base alignment and size in bytes -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] Htile information - ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) ///< [out] Htile pitch, height and size in bytes -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeHtileInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - -/** -**************************************************************************************************** -* Addr2ComputeHtileAddrFromCoord -* -* @brief -* Compute Htile address according to coordinates (of depth buffer) -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] Htile info and coordinates - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Htile address -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - -/** -**************************************************************************************************** -* Addr2ComputeHtileCoordFromAddr -* -* @brief -* Compute coordinates within depth buffer (1st pixel of a micro tile) according to -* Htile address -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] Htile info and address - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] Htile coordinates -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// C-mask functions for Addr2 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Addr2ComputeCmaskInfo -* -* @brief -* Compute Cmask pitch, height, base alignment and size in bytes from color buffer -* info -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] Cmask pitch and height - ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) ///< [out] Cmask pitch, height and size in bytes -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeCmaskInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - -/** -**************************************************************************************************** -* Addr2ComputeCmaskAddrFromCoord -* -* @brief -* Compute Cmask address according to coordinates (of MSAA color buffer) -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Cmask info and coordinates - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Cmask address -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - -/** -**************************************************************************************************** -* Addr2ComputeCmaskCoordFromAddr -* -* @brief -* Compute coordinates within color buffer (1st pixel of a micro tile) according to -* Cmask address -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Cmask info and address - ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Cmask coordinates -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// F-mask functions for Addr2 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Addr2ComputeFmaskInfo -* -* @brief -* Compute Fmask pitch/height/depth/alignments and size in bytes -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] Fmask information - ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut) ///< [out] Fmask pitch and height -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeFmaskInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - -/** -**************************************************************************************************** -* Addr2ComputeFmaskAddrFromCoord -* -* @brief -* Compute Fmask address according to coordinates (x,y,slice,sample,plane) -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Fmask info and coordinates - ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Fmask address -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - -/** -**************************************************************************************************** -* Addr2ComputeFmaskCoordFromAddr -* -* @brief -* Compute coordinates (x,y,slice,sample,plane) according to Fmask address -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Fmask info and address - ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Fmask coordinates -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// DCC key functions for Addr2 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Addr2ComputeDccInfo -* -* @brief -* Compute DCC key size, base alignment based on color surface size, tile info or tile index -* -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo( - ADDR_HANDLE hLib, ///< handle of addrlib - const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input - ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) ///< [out] output -{ - ADDR_E_RETURNCODE returnCode; - - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->ComputeDccInfo(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Addr2ComputeDccAddrFromCoord -* -* @brief -* Compute DCC key address according to coordinates -* -* @return -* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord( - ADDR_HANDLE hLib, ///< address lib handle - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] Dcc info and coordinates - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Dcc address -{ - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pLib != NULL) - { - returnCode = pLib->ComputeDccAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Addr2ComputePipeBankXor -* -* @brief -* Calculate a valid bank pipe xor value for client to use. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor( - ADDR_HANDLE hLib, ///< handle of addrlib - const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input - ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) ///< [out] output -{ - ADDR_E_RETURNCODE returnCode; - - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->ComputePipeBankXor(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Addr2ComputeSlicePipeBankXor -* -* @brief -* Calculate slice pipe bank xor value based on base pipe bank xor and slice id. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor( - ADDR_HANDLE hLib, ///< handle of addrlib - const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input - ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) ///< [out] output -{ - ADDR_E_RETURNCODE returnCode; - - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->ComputeSlicePipeBankXor(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Addr2ComputeSubResourceOffsetForSwizzlePattern -* -* @brief -* Calculate sub resource offset for swizzle pattern. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern( - ADDR_HANDLE hLib, ///< handle of addrlib - const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input - ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) ///< [out] output -{ - ADDR_E_RETURNCODE returnCode; - - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->ComputeSubResourceOffsetForSwizzlePattern(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Addr2GetPreferredSurfaceSetting -* -* @brief -* Suggest a preferred setting for client driver to program HW register -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting( - ADDR_HANDLE hLib, ///< handle of addrlib - const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input - ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) ///< [out] output -{ - ADDR_E_RETURNCODE returnCode; - - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - if (pLib != NULL) - { - returnCode = pLib->Addr2GetPreferredSurfaceSetting(pIn, pOut); - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Addr2IsValidDisplaySwizzleMode -* -* @brief -* Return whether the swizzle mode is supported by DCE / DCN. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( - ADDR_HANDLE hLib, - AddrSwizzleMode swizzleMode, - UINT_32 bpp, - bool *result) -{ - ADDR_E_RETURNCODE returnCode; - - V2::Lib* pLib = V2::Lib::GetLib(hLib); - - if (pLib != NULL) - { - ADDR2_COMPUTE_SURFACE_INFO_INPUT in; - in.swizzleMode = swizzleMode; - in.bpp = bpp; - - *result = pLib->IsValidDisplaySwizzleMode(&in); - returnCode = ADDR_OK; - } - else - { - returnCode = ADDR_ERROR; - } - - return returnCode; -} diff -Nru mesa-18.3.3/src/amd/addrlib/addrinterface.h mesa-19.0.1/src/amd/addrlib/addrinterface.h --- mesa-18.3.3/src/amd/addrlib/addrinterface.h 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/addrinterface.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,3717 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrinterface.h -* @brief Contains the addrlib interfaces declaration and parameter defines -**************************************************************************************************** -*/ -#ifndef __ADDR_INTERFACE_H__ -#define __ADDR_INTERFACE_H__ - -#include "addrtypes.h" - -#if defined(__cplusplus) -extern "C" -{ -#endif - -#define ADDRLIB_VERSION_MAJOR 6 -#define ADDRLIB_VERSION_MINOR 2 -#define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR) - -/// Virtually all interface functions need ADDR_HANDLE as first parameter -typedef VOID* ADDR_HANDLE; - -/// Client handle used in callbacks -typedef VOID* ADDR_CLIENT_HANDLE; - -/** -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* // Callback functions -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)( -* const ADDR_ALLOCSYSMEM_INPUT* pInput); -* typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)( -* VOID* pVirtAddr); -* typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)( -* const ADDR_DEBUGPRINT_INPUT* pInput); -* -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* // Create/Destroy/Config functions -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* AddrCreate() -* AddrDestroy() -* -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* // Surface functions -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* AddrComputeSurfaceInfo() -* AddrComputeSurfaceAddrFromCoord() -* AddrComputeSurfaceCoordFromAddr() -* -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* // HTile functions -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* AddrComputeHtileInfo() -* AddrComputeHtileAddrFromCoord() -* AddrComputeHtileCoordFromAddr() -* -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* // C-mask functions -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* AddrComputeCmaskInfo() -* AddrComputeCmaskAddrFromCoord() -* AddrComputeCmaskCoordFromAddr() -* -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* // F-mask functions -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* AddrComputeFmaskInfo() -* AddrComputeFmaskAddrFromCoord() -* AddrComputeFmaskCoordFromAddr() -* -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* // Element/Utility functions -* ///////////////////////////////////////////////////////////////////////////////////////////////// -* ElemFlt32ToDepthPixel() -* ElemFlt32ToColorPixel() -* AddrExtractBankPipeSwizzle() -* AddrCombineBankPipeSwizzle() -* AddrComputeSliceSwizzle() -* AddrConvertTileInfoToHW() -* AddrConvertTileIndex() -* AddrConvertTileIndex1() -* AddrGetTileIndex() -* AddrComputeBaseSwizzle() -* AddrUseTileIndex() -* AddrUseCombinedSwizzle() -* -**/ - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Callback functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* @brief channel setting structure -**************************************************************************************************** -*/ -typedef union _ADDR_CHANNEL_SETTING -{ - struct - { - UINT_8 valid : 1; ///< Indicate whehter this channel setting is valid - UINT_8 channel : 2; ///< 0 for x channel, 1 for y channel, 2 for z channel - UINT_8 index : 5; ///< Channel index - }; - UINT_8 value; ///< Value -} ADDR_CHANNEL_SETTING; - -/** -**************************************************************************************************** -* @brief address equation key structure -**************************************************************************************************** -*/ -typedef union _ADDR_EQUATION_KEY -{ - struct - { - UINT_32 log2ElementBytes : 3; ///< Log2 of Bytes per pixel - UINT_32 tileMode : 5; ///< Tile mode - UINT_32 microTileType : 3; ///< Micro tile type - UINT_32 pipeConfig : 5; ///< pipe config - UINT_32 numBanksLog2 : 3; ///< Number of banks log2 - UINT_32 bankWidth : 4; ///< Bank width - UINT_32 bankHeight : 4; ///< Bank height - UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio - UINT_32 prt : 1; ///< SI only, indicate whether this equation is for prt - UINT_32 reserved : 1; ///< Reserved bit - } fields; - UINT_32 value; -} ADDR_EQUATION_KEY; - -/** -**************************************************************************************************** -* @brief address equation structure -**************************************************************************************************** -*/ -#define ADDR_MAX_EQUATION_BIT 20u - -// Invalid equation index -#define ADDR_INVALID_EQUATION_INDEX 0xFFFFFFFF - -typedef struct _ADDR_EQUATION -{ - ADDR_CHANNEL_SETTING addr[ADDR_MAX_EQUATION_BIT]; ///< addr setting - ///< each bit is result of addr ^ xor ^ xor2 - ADDR_CHANNEL_SETTING xor1[ADDR_MAX_EQUATION_BIT]; ///< xor setting - ADDR_CHANNEL_SETTING xor2[ADDR_MAX_EQUATION_BIT]; ///< xor2 setting - UINT_32 numBits; ///< The number of bits in equation - BOOL_32 stackedDepthSlices; ///< TRUE if depth slices are treated as being - ///< stacked vertically prior to swizzling -} ADDR_EQUATION; - - -/** -**************************************************************************************************** -* @brief Alloc system memory flags. -* @note These flags are reserved for future use and if flags are added will minimize the impact -* of the client. -**************************************************************************************************** -*/ -typedef union _ADDR_ALLOCSYSMEM_FLAGS -{ - struct - { - UINT_32 reserved : 32; ///< Reserved for future use. - } fields; - UINT_32 value; - -} ADDR_ALLOCSYSMEM_FLAGS; - -/** -**************************************************************************************************** -* @brief Alloc system memory input structure -**************************************************************************************************** -*/ -typedef struct _ADDR_ALLOCSYSMEM_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR_ALLOCSYSMEM_FLAGS flags; ///< System memory flags. - UINT_32 sizeInBytes; ///< System memory allocation size in bytes. - ADDR_CLIENT_HANDLE hClient; ///< Client handle -} ADDR_ALLOCSYSMEM_INPUT; - -/** -**************************************************************************************************** -* ADDR_ALLOCSYSMEM -* @brief -* Allocate system memory callback function. Returns valid pointer on success. -**************************************************************************************************** -*/ -typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)( - const ADDR_ALLOCSYSMEM_INPUT* pInput); - -/** -**************************************************************************************************** -* @brief Free system memory input structure -**************************************************************************************************** -*/ -typedef struct _ADDR_FREESYSMEM_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - VOID* pVirtAddr; ///< Virtual address - ADDR_CLIENT_HANDLE hClient; ///< Client handle -} ADDR_FREESYSMEM_INPUT; - -/** -**************************************************************************************************** -* ADDR_FREESYSMEM -* @brief -* Free system memory callback function. -* Returns ADDR_OK on success. -**************************************************************************************************** -*/ -typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)( - const ADDR_FREESYSMEM_INPUT* pInput); - -/** -**************************************************************************************************** -* @brief Print debug message input structure -**************************************************************************************************** -*/ -typedef struct _ADDR_DEBUGPRINT_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - CHAR* pDebugString; ///< Debug print string - va_list ap; ///< Variable argument list - ADDR_CLIENT_HANDLE hClient; ///< Client handle -} ADDR_DEBUGPRINT_INPUT; - -/** -**************************************************************************************************** -* ADDR_DEBUGPRINT -* @brief -* Print debug message callback function. -* Returns ADDR_OK on success. -**************************************************************************************************** -*/ -typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)( - const ADDR_DEBUGPRINT_INPUT* pInput); - -/** -**************************************************************************************************** -* ADDR_CALLBACKS -* -* @brief -* Address Library needs client to provide system memory alloc/free routines. -**************************************************************************************************** -*/ -typedef struct _ADDR_CALLBACKS -{ - ADDR_ALLOCSYSMEM allocSysMem; ///< Routine to allocate system memory - ADDR_FREESYSMEM freeSysMem; ///< Routine to free system memory - ADDR_DEBUGPRINT debugPrint; ///< Routine to print debug message -} ADDR_CALLBACKS; - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Create/Destroy functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* ADDR_CREATE_FLAGS -* -* @brief -* This structure is used to pass some setup in creation of AddrLib -* @note -**************************************************************************************************** -*/ -typedef union _ADDR_CREATE_FLAGS -{ - struct - { - UINT_32 noCubeMipSlicesPad : 1; ///< Turn cubemap faces padding off - UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and - /// output structure - UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid - UINT_32 useCombinedSwizzle : 1; ///< Use combined tile swizzle - UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level - UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment - UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize - UINT_32 reserved : 25; ///< Reserved bits for future use - }; - - UINT_32 value; -} ADDR_CREATE_FLAGS; - -/** -**************************************************************************************************** -* ADDR_REGISTER_VALUE -* -* @brief -* Data from registers to setup AddrLib global data, used in AddrCreate -**************************************************************************************************** -*/ -typedef struct _ADDR_REGISTER_VALUE -{ - UINT_32 gbAddrConfig; ///< For R8xx, use GB_ADDR_CONFIG register value. - /// For R6xx/R7xx, use GB_TILING_CONFIG. - /// But they can be treated as the same. - /// if this value is 0, use chip to set default value - UINT_32 backendDisables; ///< 1 bit per backend, starting with LSB. 1=disabled,0=enabled. - /// Register value of CC_RB_BACKEND_DISABLE.BACKEND_DISABLE - - /// R800 registers----------------------------------------------- - UINT_32 noOfBanks; ///< Number of h/w ram banks - For r800: MC_ARB_RAMCFG.NOOFBANK - /// No enums for this value in h/w header files - /// 0: 4 - /// 1: 8 - /// 2: 16 - UINT_32 noOfRanks; /// MC_ARB_RAMCFG.NOOFRANK - /// 0: 1 - /// 1: 2 - /// SI (R1000) registers----------------------------------------- - const UINT_32* pTileConfig; ///< Global tile setting tables - UINT_32 noOfEntries; ///< Number of entries in pTileConfig - - ///< CI registers------------------------------------------------- - const UINT_32* pMacroTileConfig; ///< Global macro tile mode table - UINT_32 noOfMacroEntries; ///< Number of entries in pMacroTileConfig - - ///< GFX9 HW parameters - UINT_32 blockVarSizeLog2; ///< SW_VAR_* block size -} ADDR_REGISTER_VALUE; - -/** -**************************************************************************************************** -* ADDR_CREATE_INPUT -* -* @brief -* Parameters use to create an AddrLib Object. Caller must provide all fields. -* -**************************************************************************************************** -*/ -typedef struct _ADDR_CREATE_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 chipEngine; ///< Chip Engine - UINT_32 chipFamily; ///< Chip Family - UINT_32 chipRevision; ///< Chip Revision - ADDR_CALLBACKS callbacks; ///< Callbacks for sysmem alloc/free/print - ADDR_CREATE_FLAGS createFlags; ///< Flags to setup AddrLib - ADDR_REGISTER_VALUE regValue; ///< Data from registers to setup AddrLib global data - ADDR_CLIENT_HANDLE hClient; ///< Client handle - UINT_32 minPitchAlignPixels; ///< Minimum pitch alignment in pixels -} ADDR_CREATE_INPUT; - -/** -**************************************************************************************************** -* ADDR_CREATEINFO_OUTPUT -* -* @brief -* Return AddrLib handle to client driver -* -**************************************************************************************************** -*/ -typedef struct _ADDR_CREATE_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR_HANDLE hLib; ///< Address lib handle - - UINT_32 numEquations; ///< Number of equations in the table - const ADDR_EQUATION* pEquationTable; ///< Pointer to the equation table -} ADDR_CREATE_OUTPUT; - -/** -**************************************************************************************************** -* AddrCreate -* -* @brief -* Create AddrLib object, must be called before any interface calls -* -* @return -* ADDR_OK if successful -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrCreate( - const ADDR_CREATE_INPUT* pAddrCreateIn, - ADDR_CREATE_OUTPUT* pAddrCreateOut); - - - -/** -**************************************************************************************************** -* AddrDestroy -* -* @brief -* Destroy AddrLib object, must be called to free internally allocated resources. -* -* @return -* ADDR_OK if successful -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrDestroy( - ADDR_HANDLE hLib); - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Surface functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* @brief -* Bank/tiling parameters. On function input, these can be set as desired or -* left 0 for AddrLib to calculate/default. On function output, these are the actual -* parameters used. -* @note -* Valid bankWidth/bankHeight value: -* 1,2,4,8. They are factors instead of pixels or bytes. -* -* The bank number remains constant across each row of the -* macro tile as each pipe is selected, so the number of -* tiles in the x direction with the same bank number will -* be bank_width * num_pipes. -**************************************************************************************************** -*/ -typedef struct _ADDR_TILEINFO -{ - /// Any of these parameters can be set to 0 to use the HW default. - UINT_32 banks; ///< Number of banks, numerical value - UINT_32 bankWidth; ///< Number of tiles in the X direction in the same bank - UINT_32 bankHeight; ///< Number of tiles in the Y direction in the same bank - UINT_32 macroAspectRatio; ///< Macro tile aspect ratio. 1-1:1, 2-4:1, 4-16:1, 8-64:1 - UINT_32 tileSplitBytes; ///< Tile split size, in bytes - AddrPipeCfg pipeConfig; ///< Pipe Config = HW enum + 1 -} ADDR_TILEINFO; - -// Create a define to avoid client change. The removal of R800 is because we plan to implement SI -// within 800 HWL - An AddrPipeCfg is added in above data structure -typedef ADDR_TILEINFO ADDR_R800_TILEINFO; - -/** -**************************************************************************************************** -* @brief -* Information needed by quad buffer stereo support -**************************************************************************************************** -*/ -typedef struct _ADDR_QBSTEREOINFO -{ - UINT_32 eyeHeight; ///< Height (in pixel rows) to right eye - UINT_32 rightOffset; ///< Offset (in bytes) to right eye - UINT_32 rightSwizzle; ///< TileSwizzle for right eyes -} ADDR_QBSTEREOINFO; - -/** -**************************************************************************************************** -* ADDR_SURFACE_FLAGS -* -* @brief -* Surface flags -**************************************************************************************************** -*/ -typedef union _ADDR_SURFACE_FLAGS -{ - struct - { - UINT_32 color : 1; ///< Flag indicates this is a color buffer - UINT_32 depth : 1; ///< Flag indicates this is a depth/stencil buffer - UINT_32 stencil : 1; ///< Flag indicates this is a stencil buffer - UINT_32 texture : 1; ///< Flag indicates this is a texture - UINT_32 cube : 1; ///< Flag indicates this is a cubemap - UINT_32 volume : 1; ///< Flag indicates this is a volume texture - UINT_32 fmask : 1; ///< Flag indicates this is an fmask - UINT_32 cubeAsArray : 1; ///< Flag indicates if treat cubemap as arrays - UINT_32 compressZ : 1; ///< Flag indicates z buffer is compressed - UINT_32 overlay : 1; ///< Flag indicates this is an overlay surface - UINT_32 noStencil : 1; ///< Flag indicates this depth has no separate stencil - UINT_32 display : 1; ///< Flag indicates this should match display controller req. - UINT_32 opt4Space : 1; ///< Flag indicates this surface should be optimized for space - /// i.e. save some memory but may lose performance - UINT_32 prt : 1; ///< Flag for partially resident texture - UINT_32 qbStereo : 1; ///< Quad buffer stereo surface - UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0) - UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding - UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable - UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce - UINT_32 dccCompatible : 1; ///< VI: whether to make MSAA surface support dcc fast clear - UINT_32 dccPipeWorkaround : 1; ///< VI: whether to workaround the HW limit that - /// dcc can't be enabled if pipe config of tile mode - /// is different from that of ASIC, this flag - /// is address lib internal flag, client should ignore it - UINT_32 czDispCompatible : 1; ///< SI+: CZ family has a HW bug needs special alignment. - /// This flag indicates we need to follow the - /// alignment with CZ families or other ASICs under - /// PX configuration + CZ. - UINT_32 nonSplit : 1; ///< CI: depth texture should not be split - UINT_32 disableLinearOpt : 1; ///< Disable tile mode optimization to linear - UINT_32 needEquation : 1; ///< Make the surface tile setting equation compatible. - /// This flag indicates we need to override tile - /// mode to PRT_* tile mode to disable slice rotation, - /// which is needed by swizzle pattern equation. - UINT_32 skipIndicesOutput : 1; ///< Skipping indices in output. - UINT_32 rotateDisplay : 1; ///< Rotate micro tile type - UINT_32 minimizeAlignment : 1; ///< Minimize alignment - UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode - UINT_32 matchStencilTileCfg : 1; ///< Select tile index of stencil as well as depth surface - /// to make sure they share same tile config parameters - UINT_32 disallowLargeThickDegrade : 1; ///< Disallow large thick tile degrade - UINT_32 reserved : 1; ///< Reserved bits - }; - - UINT_32 value; -} ADDR_SURFACE_FLAGS; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_SURFACE_INFO_INPUT -* -* @brief -* Input structure for AddrComputeSurfaceInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrTileMode tileMode; ///< Tile mode - AddrFormat format; ///< If format is set to valid one, bpp/width/height - /// might be overwritten - UINT_32 bpp; ///< Bits per pixel - UINT_32 numSamples; ///< Number of samples - UINT_32 width; ///< Width, in pixels - UINT_32 height; ///< Height, in pixels - UINT_32 numSlices; ///< Number of surface slices or depth - UINT_32 slice; ///< Slice index - UINT_32 mipLevel; ///< Current mipmap level - UINT_32 numMipLevels; ///< Number of mips in mip chain - ADDR_SURFACE_FLAGS flags; ///< Surface type flags - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - /// r800 and later HWL parameters - // Needed by 2D tiling, for linear and 1D tiling, just keep them 0's - ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Set to 0 to default/calculate - AddrTileType tileType; ///< Micro tiling type, not needed when tileIndex != -1 - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - UINT_32 basePitch; ///< Base level pitch in pixels, 0 means ignored, is a - /// must for mip levels from SI+. - /// Don't use pitch in blocks for compressed formats! - UINT_32 maxBaseAlign; ///< Max base alignment request from client - UINT_32 pitchAlign; ///< Pitch alignment request from client - UINT_32 heightAlign; ///< Height alignment request from client -} ADDR_COMPUTE_SURFACE_INFO_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_SURFACE_INFO_OUTPUT -* -* @brief -* Output structure for AddrComputeSurfInfo -* @note - Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch - Pixel: Original pixel -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 pitch; ///< Pitch in elements (in blocks for compressed formats) - UINT_32 height; ///< Height in elements (in blocks for compressed formats) - UINT_32 depth; ///< Number of slice/depth - UINT_64 surfSize; ///< Surface size in bytes - AddrTileMode tileMode; ///< Actual tile mode. May differ from that in input - UINT_32 baseAlign; ///< Base address alignment - UINT_32 pitchAlign; ///< Pitch alignment, in elements - UINT_32 heightAlign; ///< Height alignment, in elements - UINT_32 depthAlign; ///< Depth alignment, aligned to thickness, for 3d texture - UINT_32 bpp; ///< Bits per elements (e.g. blocks for BCn, 1/3 for 96bit) - UINT_32 pixelPitch; ///< Pitch in original pixels - UINT_32 pixelHeight; ///< Height in original pixels - UINT_32 pixelBits; ///< Original bits per pixel, passed from input - UINT_64 sliceSize; ///< Size of slice specified by input's slice - /// The result is controlled by surface flags & createFlags - /// By default this value equals to surfSize for volume - UINT_32 pitchTileMax; ///< PITCH_TILE_MAX value for h/w register - UINT_32 heightTileMax; ///< HEIGHT_TILE_MAX value for h/w register - UINT_32 sliceTileMax; ///< SLICE_TILE_MAX value for h/w register - - UINT_32 numSamples; ///< Pass the effective numSamples processed in this call - - /// r800 and later HWL parameters - ADDR_TILEINFO* pTileInfo; ///< Tile parameters used. Filled in if 0 on input - AddrTileType tileType; ///< Micro tiling type, only valid when tileIndex != -1 - INT_32 tileIndex; ///< Tile index, MAY be "downgraded" - - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - /// Output flags - struct - { - /// Special information to work around SI mipmap swizzle bug UBTS #317508 - UINT_32 last2DLevel : 1; ///< TRUE if this is the last 2D(3D) tiled - ///< Only meaningful when create flag checkLast2DLevel is set - UINT_32 tcCompatible : 1; ///< If the surface can be shader compatible - UINT_32 dccUnsupport : 1; ///< If the surface can support DCC compressed rendering - UINT_32 prtTileIndex : 1; ///< SI only, indicate the returned tile index is for PRT - ///< If address lib return true for mip 0, client should set prt flag - ///< for child mips in subsequent compute surface info calls - UINT_32 reserved :28; ///< Reserved bits - }; - - UINT_32 equationIndex; ///< Equation index in the equation table; - - UINT_32 blockWidth; ///< Width in element inside one block(1D->Micro, 2D->Macro) - UINT_32 blockHeight; ///< Height in element inside one block(1D->Micro, 2D->Macro) - UINT_32 blockSlices; ///< Slice number inside one block(1D->Micro, 2D->Macro) - - /// Stereo info - ADDR_QBSTEREOINFO* pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE - - INT_32 stencilTileIdx; ///< stencil tile index output when matchStencilTileCfg was set -} ADDR_COMPUTE_SURFACE_INFO_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeSurfaceInfo -* -* @brief -* Compute surface width/height/depth/alignments and suitable tiling mode -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT -* -* @brief -* Input structure for AddrComputeSurfaceAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Slice index - UINT_32 sample; ///< Sample index, use fragment index for EQAA - - UINT_32 bpp; ///< Bits per pixel - UINT_32 pitch; ///< Surface pitch, in pixels - UINT_32 height; ///< Surface height, in pixels - UINT_32 numSlices; ///< Surface depth - UINT_32 numSamples; ///< Number of samples - - AddrTileMode tileMode; ///< Tile mode - BOOL_32 isDepth; ///< TRUE if the surface uses depth sample ordering within - /// micro tile. Textures can also choose depth sample order - UINT_32 tileBase; ///< Base offset (in bits) inside micro tile which handles - /// the case that components are stored separately - UINT_32 compBits; ///< The component bits actually needed(for planar surface) - - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - /// r800 and later HWL parameters - // Used for 1D tiling above - AddrTileType tileType; ///< See defintion of AddrTileType - struct - { - UINT_32 ignoreSE : 1; ///< TRUE if shader engines are ignored. This is texture - /// only flag. Only non-RT texture can set this to TRUE - UINT_32 reserved :31; ///< Reserved for future use. - }; - // 2D tiling needs following structure - ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - union - { - struct - { - UINT_32 bankSwizzle; ///< Bank swizzle - UINT_32 pipeSwizzle; ///< Pipe swizzle - }; - UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE - }; -} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT -* -* @brief -* Output structure for AddrComputeSurfaceAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Byte address - UINT_32 bitPosition; ///< Bit position within surfaceAddr, 0-7. - /// For surface bpp < 8, e.g. FMT_1. - UINT_32 prtBlockIndex; ///< Index of a PRT tile (64K block) -} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeSurfaceAddrFromCoord -* -* @brief -* Compute surface address from a given coordinate. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT -* -* @brief -* Input structure for AddrComputeSurfaceCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Address in bytes - UINT_32 bitPosition; ///< Bit position in addr. 0-7. for surface bpp < 8, - /// e.g. FMT_1; - UINT_32 bpp; ///< Bits per pixel - UINT_32 pitch; ///< Pitch, in pixels - UINT_32 height; ///< Height in pixels - UINT_32 numSlices; ///< Surface depth - UINT_32 numSamples; ///< Number of samples - - AddrTileMode tileMode; ///< Tile mode - BOOL_32 isDepth; ///< Surface uses depth sample ordering within micro tile. - /// Note: Textures can choose depth sample order as well. - UINT_32 tileBase; ///< Base offset (in bits) inside micro tile which handles - /// the case that components are stored separately - UINT_32 compBits; ///< The component bits actually needed(for planar surface) - - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - /// r800 and later HWL parameters - // Used for 1D tiling above - AddrTileType tileType; ///< See defintion of AddrTileType - struct - { - UINT_32 ignoreSE : 1; ///< TRUE if shader engines are ignored. This is texture - /// only flag. Only non-RT texture can set this to TRUE - UINT_32 reserved :31; ///< Reserved for future use. - }; - // 2D tiling needs following structure - ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - union - { - struct - { - UINT_32 bankSwizzle; ///< Bank swizzle - UINT_32 pipeSwizzle; ///< Pipe swizzle - }; - UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE - }; -} ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT -* -* @brief -* Output structure for AddrComputeSurfaceCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Index of slices - UINT_32 sample; ///< Index of samples, means fragment index for EQAA -} ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeSurfaceCoordFromAddr -* -* @brief -* Compute coordinate from a given surface address -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut); - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// HTile functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* ADDR_HTILE_FLAGS -* -* @brief -* HTILE flags -**************************************************************************************************** -*/ -typedef union _ADDR_HTILE_FLAGS -{ - struct - { - UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable - UINT_32 skipTcCompatSizeAlign : 1; ///< Flag indicates that addrLib will not align htile - /// size to 256xBankxPipe when computing tc-compatible - /// htile info. - UINT_32 reserved : 30; ///< Reserved bits - }; - - UINT_32 value; -} ADDR_HTILE_FLAGS; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_HTILE_INFO_INPUT -* -* @brief -* Input structure of AddrComputeHtileInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_HTILE_INFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR_HTILE_FLAGS flags; ///< HTILE flags - UINT_32 pitch; ///< Surface pitch, in pixels - UINT_32 height; ///< Surface height, in pixels - UINT_32 numSlices; ///< Number of slices - BOOL_32 isLinear; ///< Linear or tiled HTILE layout - AddrHtileBlockSize blockWidth; ///< 4 or 8. EG above only support 8 - AddrHtileBlockSize blockHeight; ///< 4 or 8. EG above only support 8 - ADDR_TILEINFO* pTileInfo; ///< Tile info - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_COMPUTE_HTILE_INFO_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_HTILE_INFO_OUTPUT -* -* @brief -* Output structure of AddrComputeHtileInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 pitch; ///< Pitch in pixels of depth buffer represented in this - /// HTile buffer. This might be larger than original depth - /// buffer pitch when called with an unaligned pitch. - UINT_32 height; ///< Height in pixels, as above - UINT_64 htileBytes; ///< Size of HTILE buffer, in bytes - UINT_32 baseAlign; ///< Base alignment - UINT_32 bpp; ///< Bits per pixel for HTILE is how many bits for an 8x8 block! - UINT_32 macroWidth; ///< Macro width in pixels, actually squared cache shape - UINT_32 macroHeight; ///< Macro height in pixels - UINT_64 sliceSize; ///< Slice size, in bytes. - BOOL_32 sliceInterleaved; ///< Flag to indicate if different slice's htile is interleaved - /// Compute engine clear can't be used if htile is interleaved - BOOL_32 nextMipLevelCompressible; ///< Flag to indicate whether HTILE can be enabled in - /// next mip level, it also indicates if memory set based - /// fast clear can be used for current mip level. -} ADDR_COMPUTE_HTILE_INFO_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeHtileInfo -* -* @brief -* Compute Htile pitch, height, base alignment and size in bytes -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, - ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT -* -* @brief -* Input structure for AddrComputeHtileAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 pitch; ///< Pitch, in pixels - UINT_32 height; ///< Height in pixels - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Index of slice - UINT_32 numSlices; ///< Number of slices - BOOL_32 isLinear; ///< Linear or tiled HTILE layout - ADDR_HTILE_FLAGS flags; ///< htile flags - AddrHtileBlockSize blockWidth; ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8 - AddrHtileBlockSize blockHeight; ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8 - ADDR_TILEINFO* pTileInfo; ///< Tile info - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid - UINT_32 bpp; ///< depth/stencil buffer bit per pixel size - UINT_32 zStencilAddr; ///< tcCompatible Z/Stencil surface address -} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT -* -* @brief -* Output structure for AddrComputeHtileAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Address in bytes - UINT_32 bitPosition; ///< Bit position, 0 or 4. CMASK and HTILE shares some lib method. - /// So we keep bitPosition for HTILE as well -} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeHtileAddrFromCoord -* -* @brief -* Compute Htile address according to coordinates (of depth buffer) -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT -* -* @brief -* Input structure for AddrComputeHtileCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Address - UINT_32 bitPosition; ///< Bit position 0 or 4. CMASK and HTILE share some methods - /// so we keep bitPosition for HTILE as well - UINT_32 pitch; ///< Pitch, in pixels - UINT_32 height; ///< Height, in pixels - UINT_32 numSlices; ///< Number of slices - BOOL_32 isLinear; ///< Linear or tiled HTILE layout - AddrHtileBlockSize blockWidth; ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8 - AddrHtileBlockSize blockHeight; ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8 - ADDR_TILEINFO* pTileInfo; ///< Tile info - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT -* -* @brief -* Output structure for AddrComputeHtileCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Slice index -} ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeHtileCoordFromAddr -* -* @brief -* Compute coordinates within depth buffer (1st pixel of a micro tile) according to -* Htile address -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// C-mask functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* ADDR_CMASK_FLAGS -* -* @brief -* CMASK flags -**************************************************************************************************** -*/ -typedef union _ADDR_CMASK_FLAGS -{ - struct - { - UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable - UINT_32 reserved :31; ///< Reserved bits - }; - - UINT_32 value; -} ADDR_CMASK_FLAGS; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_CMASK_INFO_INPUT -* -* @brief -* Input structure of AddrComputeCmaskInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_CMASKINFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR_CMASK_FLAGS flags; ///< CMASK flags - UINT_32 pitch; ///< Pitch, in pixels, of color buffer - UINT_32 height; ///< Height, in pixels, of color buffer - UINT_32 numSlices; ///< Number of slices, of color buffer - BOOL_32 isLinear; ///< Linear or tiled layout, Only SI can be linear - ADDR_TILEINFO* pTileInfo; ///< Tile info - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_COMPUTE_CMASK_INFO_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_CMASK_INFO_OUTPUT -* -* @brief -* Output structure of AddrComputeCmaskInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_CMASK_INFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 pitch; ///< Pitch in pixels of color buffer which - /// this Cmask matches. The size might be larger than - /// original color buffer pitch when called with - /// an unaligned pitch. - UINT_32 height; ///< Height in pixels, as above - UINT_64 cmaskBytes; ///< Size in bytes of CMask buffer - UINT_32 baseAlign; ///< Base alignment - UINT_32 blockMax; ///< Cmask block size. Need this to set CB_COLORn_MASK register - UINT_32 macroWidth; ///< Macro width in pixels, actually squared cache shape - UINT_32 macroHeight; ///< Macro height in pixels - UINT_64 sliceSize; ///< Slice size, in bytes. -} ADDR_COMPUTE_CMASK_INFO_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeCmaskInfo -* -* @brief -* Compute Cmask pitch, height, base alignment and size in bytes from color buffer -* info -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, - ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT -* -* @brief -* Input structure for AddrComputeCmaskAddrFromCoord -* -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_64 fmaskAddr; ///< Fmask addr for tc compatible Cmask - UINT_32 slice; ///< Slice index - UINT_32 pitch; ///< Pitch in pixels, of color buffer - UINT_32 height; ///< Height in pixels, of color buffer - UINT_32 numSlices; ///< Number of slices - UINT_32 bpp; - BOOL_32 isLinear; ///< Linear or tiled layout, Only SI can be linear - ADDR_CMASK_FLAGS flags; ///< CMASK flags - ADDR_TILEINFO* pTileInfo; ///< Tile info - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - ///< while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT -* -* @brief -* Output structure for AddrComputeCmaskAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< CMASK address in bytes - UINT_32 bitPosition; ///< Bit position within addr, 0-7. CMASK is 4 bpp, - /// so the address may be located in bit 0 (0) or 4 (4) -} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeCmaskAddrFromCoord -* -* @brief -* Compute Cmask address according to coordinates (of MSAA color buffer) -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT -* -* @brief -* Input structure for AddrComputeCmaskCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< CMASK address in bytes - UINT_32 bitPosition; ///< Bit position within addr, 0-7. CMASK is 4 bpp, - /// so the address may be located in bit 0 (0) or 4 (4) - UINT_32 pitch; ///< Pitch, in pixels - UINT_32 height; ///< Height in pixels - UINT_32 numSlices; ///< Number of slices - BOOL_32 isLinear; ///< Linear or tiled layout, Only SI can be linear - ADDR_TILEINFO* pTileInfo; ///< Tile info - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT -* -* @brief -* Output structure for AddrComputeCmaskCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Slice index -} ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeCmaskCoordFromAddr -* -* @brief -* Compute coordinates within color buffer (1st pixel of a micro tile) according to -* Cmask address -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut); - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// F-mask functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* ADDR_COMPUTE_FMASK_INFO_INPUT -* -* @brief -* Input structure for AddrComputeFmaskInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_FMASK_INFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrTileMode tileMode; ///< Tile mode - UINT_32 pitch; ///< Surface pitch, in pixels - UINT_32 height; ///< Surface height, in pixels - UINT_32 numSlices; ///< Number of slice/depth - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - /// r800 and later HWL parameters - struct - { - UINT_32 resolved: 1; ///< TRUE if the surface is for resolved fmask, only used - /// by H/W clients. S/W should always set it to FALSE. - UINT_32 reserved: 31; ///< Reserved for future use. - }; - ADDR_TILEINFO* pTileInfo; ///< 2D tiling parameters. Clients must give valid data - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 -} ADDR_COMPUTE_FMASK_INFO_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_FMASK_INFO_OUTPUT -* -* @brief -* Output structure for AddrComputeFmaskInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_FMASK_INFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 pitch; ///< Pitch of fmask in pixels - UINT_32 height; ///< Height of fmask in pixels - UINT_32 numSlices; ///< Slices of fmask - UINT_64 fmaskBytes; ///< Size of fmask in bytes - UINT_32 baseAlign; ///< Base address alignment - UINT_32 pitchAlign; ///< Pitch alignment - UINT_32 heightAlign; ///< Height alignment - UINT_32 bpp; ///< Bits per pixel of FMASK is: number of bit planes - UINT_32 numSamples; ///< Number of samples, used for dump, export this since input - /// may be changed in 9xx and above - /// r800 and later HWL parameters - ADDR_TILEINFO* pTileInfo; ///< Tile parameters used. Fmask can have different - /// bank_height from color buffer - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - UINT_64 sliceSize; ///< Size of slice in bytes -} ADDR_COMPUTE_FMASK_INFO_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeFmaskInfo -* -* @brief -* Compute Fmask pitch/height/depth/alignments and size in bytes -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT -* -* @brief -* Input structure for AddrComputeFmaskAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Slice index - UINT_32 plane; ///< Plane number - UINT_32 sample; ///< Sample index (fragment index for EQAA) - - UINT_32 pitch; ///< Surface pitch, in pixels - UINT_32 height; ///< Surface height, in pixels - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - - AddrTileMode tileMode; ///< Tile mode - union - { - struct - { - UINT_32 bankSwizzle; ///< Bank swizzle - UINT_32 pipeSwizzle; ///< Pipe swizzle - }; - UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE - }; - - /// r800 and later HWL parameters - struct - { - UINT_32 resolved: 1; ///< TRUE if this is a resolved fmask, used by H/W clients - UINT_32 ignoreSE: 1; ///< TRUE if shader engines are ignored. - UINT_32 reserved: 30; ///< Reserved for future use. - }; - ADDR_TILEINFO* pTileInfo; ///< 2D tiling parameters. Client must provide all data - -} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT -* -* @brief -* Output structure for AddrComputeFmaskAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Fmask address - UINT_32 bitPosition; ///< Bit position within fmaskAddr, 0-7. -} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeFmaskAddrFromCoord -* -* @brief -* Compute Fmask address according to coordinates (x,y,slice,sample,plane) -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT -* -* @brief -* Input structure for AddrComputeFmaskCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Address - UINT_32 bitPosition; ///< Bit position within addr, 0-7. - - UINT_32 pitch; ///< Pitch, in pixels - UINT_32 height; ///< Height in pixels - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments - AddrTileMode tileMode; ///< Tile mode - union - { - struct - { - UINT_32 bankSwizzle; ///< Bank swizzle - UINT_32 pipeSwizzle; ///< Pipe swizzle - }; - UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE - }; - - /// r800 and later HWL parameters - struct - { - UINT_32 resolved: 1; ///< TRUE if this is a resolved fmask, used by HW components - UINT_32 ignoreSE: 1; ///< TRUE if shader engines are ignored. - UINT_32 reserved: 30; ///< Reserved for future use. - }; - ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data - -} ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT -* -* @brief -* Output structure for AddrComputeFmaskCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Slice index - UINT_32 plane; ///< Plane number - UINT_32 sample; ///< Sample index (fragment index for EQAA) -} ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeFmaskCoordFromAddr -* -* @brief -* Compute FMASK coordinate from an given address -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut); - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Element/utility functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* AddrGetVersion -* -* @brief -* Get AddrLib version number -**************************************************************************************************** -*/ -UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib); - -/** -**************************************************************************************************** -* AddrUseTileIndex -* -* @brief -* Return TRUE if tileIndex is enabled in this address library -**************************************************************************************************** -*/ -BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib); - -/** -**************************************************************************************************** -* AddrUseCombinedSwizzle -* -* @brief -* Return TRUE if combined swizzle is enabled in this address library -**************************************************************************************************** -*/ -BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib); - -/** -**************************************************************************************************** -* ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT -* -* @brief -* Input structure of AddrExtractBankPipeSwizzle -**************************************************************************************************** -*/ -typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 base256b; ///< Base256b value - - /// r800 and later HWL parameters - ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT; - -/** -**************************************************************************************************** -* ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT -* -* @brief -* Output structure of AddrExtractBankPipeSwizzle -**************************************************************************************************** -*/ -typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 bankSwizzle; ///< Bank swizzle - UINT_32 pipeSwizzle; ///< Pipe swizzle -} ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT; - -/** -**************************************************************************************************** -* AddrExtractBankPipeSwizzle -* -* @brief -* Extract Bank and Pipe swizzle from base256b -* @return -* ADDR_OK if no error -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle( - ADDR_HANDLE hLib, - const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, - ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut); - - -/** -**************************************************************************************************** -* ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT -* -* @brief -* Input structure of AddrCombineBankPipeSwizzle -**************************************************************************************************** -*/ -typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 bankSwizzle; ///< Bank swizzle - UINT_32 pipeSwizzle; ///< Pipe swizzle - UINT_64 baseAddr; ///< Base address (leave it zero for driver clients) - - /// r800 and later HWL parameters - ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT -* -* @brief -* Output structure of AddrCombineBankPipeSwizzle -**************************************************************************************************** -*/ -typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 tileSwizzle; ///< Combined swizzle -} ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT; - -/** -**************************************************************************************************** -* AddrCombineBankPipeSwizzle -* -* @brief -* Combine Bank and Pipe swizzle -* @return -* ADDR_OK if no error -* @note -* baseAddr here is full MCAddress instead of base256b -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle( - ADDR_HANDLE hLib, - const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, - ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_SLICESWIZZLE_INPUT -* -* @brief -* Input structure of AddrComputeSliceSwizzle -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_SLICESWIZZLE_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrTileMode tileMode; ///< Tile Mode - UINT_32 baseSwizzle; ///< Base tile swizzle - UINT_32 slice; ///< Slice index - UINT_64 baseAddr; ///< Base address, driver should leave it 0 in most cases - - /// r800 and later HWL parameters - ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Actually banks needed here! - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_COMPUTE_SLICESWIZZLE_INPUT; - - - -/** -**************************************************************************************************** -* ADDR_COMPUTE_SLICESWIZZLE_OUTPUT -* -* @brief -* Output structure of AddrComputeSliceSwizzle -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_SLICESWIZZLE_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 tileSwizzle; ///< Recalculated tileSwizzle value -} ADDR_COMPUTE_SLICESWIZZLE_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeSliceSwizzle -* -* @brief -* Extract Bank and Pipe swizzle from base256b -* @return -* ADDR_OK if no error -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, - ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut); - - -/** -**************************************************************************************************** -* AddrSwizzleGenOption -* -* @brief -* Which swizzle generating options: legacy or linear -**************************************************************************************************** -*/ -typedef enum _AddrSwizzleGenOption -{ - ADDR_SWIZZLE_GEN_DEFAULT = 0, ///< As is in client driver implemention for swizzle - ADDR_SWIZZLE_GEN_LINEAR = 1, ///< Using a linear increment of swizzle -} AddrSwizzleGenOption; - -/** -**************************************************************************************************** -* AddrSwizzleOption -* -* @brief -* Controls how swizzle is generated -**************************************************************************************************** -*/ -typedef union _ADDR_SWIZZLE_OPTION -{ - struct - { - UINT_32 genOption : 1; ///< The way swizzle is generated, see AddrSwizzleGenOption - UINT_32 reduceBankBit : 1; ///< TRUE if we need reduce swizzle bits - UINT_32 reserved :30; ///< Reserved bits - }; - - UINT_32 value; - -} ADDR_SWIZZLE_OPTION; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_BASE_SWIZZLE_INPUT -* -* @brief -* Input structure of AddrComputeBaseSwizzle -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR_SWIZZLE_OPTION option; ///< Swizzle option - UINT_32 surfIndex; ///< Index of this surface type - AddrTileMode tileMode; ///< Tile Mode - - /// r800 and later HWL parameters - ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Actually banks needed here! - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_COMPUTE_BASE_SWIZZLE_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT -* -* @brief -* Output structure of AddrComputeBaseSwizzle -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 tileSwizzle; ///< Combined swizzle -} ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeBaseSwizzle -* -* @brief -* Return a Combined Bank and Pipe swizzle base on surface based on surface type/index -* @return -* ADDR_OK if no error -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, - ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ELEM_GETEXPORTNORM_INPUT -* -* @brief -* Input structure for ElemGetExportNorm -* -**************************************************************************************************** -*/ -typedef struct _ELEM_GETEXPORTNORM_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrColorFormat format; ///< Color buffer format; Client should use ColorFormat - AddrSurfaceNumber num; ///< Surface number type; Client should use NumberType - AddrSurfaceSwap swap; ///< Surface swap byte swap; Client should use SurfaceSwap - UINT_32 numSamples; ///< Number of samples -} ELEM_GETEXPORTNORM_INPUT; - -/** -**************************************************************************************************** -* ElemGetExportNorm -* -* @brief -* Helper function to check one format can be EXPORT_NUM, which is a register -* CB_COLOR_INFO.SURFACE_FORMAT. FP16 can be reported as EXPORT_NORM for rv770 in r600 -* family -* @note -* The implementation is only for r600. -* 00 - EXPORT_FULL: PS exports are 4 pixels with 4 components with 32-bits-per-component. (two -* clocks per export) -* 01 - EXPORT_NORM: PS exports are 4 pixels with 4 components with 16-bits-per-component. (one -* clock per export) -* -**************************************************************************************************** -*/ -BOOL_32 ADDR_API ElemGetExportNorm( - ADDR_HANDLE hLib, - const ELEM_GETEXPORTNORM_INPUT* pIn); - - - -/** -**************************************************************************************************** -* ELEM_FLT32TODEPTHPIXEL_INPUT -* -* @brief -* Input structure for addrFlt32ToDepthPixel -* -**************************************************************************************************** -*/ -typedef struct _ELEM_FLT32TODEPTHPIXEL_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrDepthFormat format; ///< Depth buffer format - ADDR_FLT_32 comps[2]; ///< Component values (Z/stencil) -} ELEM_FLT32TODEPTHPIXEL_INPUT; - -/** -**************************************************************************************************** -* ELEM_FLT32TODEPTHPIXEL_INPUT -* -* @brief -* Output structure for ElemFlt32ToDepthPixel -* -**************************************************************************************************** -*/ -typedef struct _ELEM_FLT32TODEPTHPIXEL_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_8* pPixel; ///< Real depth value. Same data type as depth buffer. - /// Client must provide enough storage for this type. - UINT_32 depthBase; ///< Tile base in bits for depth bits - UINT_32 stencilBase; ///< Tile base in bits for stencil bits - UINT_32 depthBits; ///< Bits for depth - UINT_32 stencilBits; ///< Bits for stencil -} ELEM_FLT32TODEPTHPIXEL_OUTPUT; - -/** -**************************************************************************************************** -* ElemFlt32ToDepthPixel -* -* @brief -* Convert a FLT_32 value to a depth/stencil pixel value -* -* @return -* Return code -* -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel( - ADDR_HANDLE hLib, - const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, - ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ELEM_FLT32TOCOLORPIXEL_INPUT -* -* @brief -* Input structure for addrFlt32ToColorPixel -* -**************************************************************************************************** -*/ -typedef struct _ELEM_FLT32TOCOLORPIXEL_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrColorFormat format; ///< Color buffer format - AddrSurfaceNumber surfNum; ///< Surface number - AddrSurfaceSwap surfSwap; ///< Surface swap - ADDR_FLT_32 comps[4]; ///< Component values (r/g/b/a) -} ELEM_FLT32TOCOLORPIXEL_INPUT; - -/** -**************************************************************************************************** -* ELEM_FLT32TOCOLORPIXEL_INPUT -* -* @brief -* Output structure for ElemFlt32ToColorPixel -* -**************************************************************************************************** -*/ -typedef struct _ELEM_FLT32TOCOLORPIXEL_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_8* pPixel; ///< Real color value. Same data type as color buffer. - /// Client must provide enough storage for this type. -} ELEM_FLT32TOCOLORPIXEL_OUTPUT; - -/** -**************************************************************************************************** -* ElemFlt32ToColorPixel -* -* @brief -* Convert a FLT_32 value to a red/green/blue/alpha pixel value -* -* @return -* Return code -* -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel( - ADDR_HANDLE hLib, - const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, - ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut); - - -/** -**************************************************************************************************** -* ADDR_CONVERT_TILEINFOTOHW_INPUT -* -* @brief -* Input structure for AddrConvertTileInfoToHW -* @note -* When reverse is TRUE, indices are igonred -**************************************************************************************************** -*/ -typedef struct _ADDR_CONVERT_TILEINFOTOHW_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - BOOL_32 reverse; ///< Convert control flag. - /// FALSE: convert from real value to HW value; - /// TRUE: convert from HW value to real value. - - /// r800 and later HWL parameters - ADDR_TILEINFO* pTileInfo; ///< Tile parameters with real value - - INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it - /// while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid - UINT_32 bpp; ///< Bits per pixel -} ADDR_CONVERT_TILEINFOTOHW_INPUT; - -/** -**************************************************************************************************** -* ADDR_CONVERT_TILEINFOTOHW_OUTPUT -* -* @brief -* Output structure for AddrConvertTileInfoToHW -**************************************************************************************************** -*/ -typedef struct _ADDR_CONVERT_TILEINFOTOHW_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - /// r800 and later HWL parameters - ADDR_TILEINFO* pTileInfo; ///< Tile parameters with hardware register value - -} ADDR_CONVERT_TILEINFOTOHW_OUTPUT; - -/** -**************************************************************************************************** -* AddrConvertTileInfoToHW -* -* @brief -* Convert tile info from real value to hardware register value -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW( - ADDR_HANDLE hLib, - const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, - ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_CONVERT_TILEINDEX_INPUT -* -* @brief -* Input structure for AddrConvertTileIndex -**************************************************************************************************** -*/ -typedef struct _ADDR_CONVERT_TILEINDEX_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - INT_32 tileIndex; ///< Tile index - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - UINT_32 bpp; ///< Bits per pixel - BOOL_32 tileInfoHw; ///< Set to TRUE if client wants HW enum, otherwise actual -} ADDR_CONVERT_TILEINDEX_INPUT; - -/** -**************************************************************************************************** -* ADDR_CONVERT_TILEINDEX_OUTPUT -* -* @brief -* Output structure for AddrConvertTileIndex -**************************************************************************************************** -*/ -typedef struct _ADDR_CONVERT_TILEINDEX_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrTileMode tileMode; ///< Tile mode - AddrTileType tileType; ///< Tile type - ADDR_TILEINFO* pTileInfo; ///< Tile info - -} ADDR_CONVERT_TILEINDEX_OUTPUT; - -/** -**************************************************************************************************** -* AddrConvertTileIndex -* -* @brief -* Convert tile index to tile mode/type/info -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex( - ADDR_HANDLE hLib, - const ADDR_CONVERT_TILEINDEX_INPUT* pIn, - ADDR_CONVERT_TILEINDEX_OUTPUT* pOut); - -/** -**************************************************************************************************** -* ADDR_GET_MACROMODEINDEX_INPUT -* -* @brief -* Input structure for AddrGetMacroModeIndex -**************************************************************************************************** -*/ -typedef struct _ADDR_GET_MACROMODEINDEX_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - ADDR_SURFACE_FLAGS flags; ///< Surface flag - INT_32 tileIndex; ///< Tile index - UINT_32 bpp; ///< Bits per pixel - UINT_32 numFrags; ///< Number of color fragments -} ADDR_GET_MACROMODEINDEX_INPUT; - -/** -**************************************************************************************************** -* ADDR_GET_MACROMODEINDEX_OUTPUT -* -* @brief -* Output structure for AddrGetMacroModeIndex -**************************************************************************************************** -*/ -typedef struct _ADDR_GET_MACROMODEINDEX_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) -} ADDR_GET_MACROMODEINDEX_OUTPUT; - -/** -**************************************************************************************************** -* AddrGetMacroModeIndex -* -* @brief -* Get macro mode index based on input parameters -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex( - ADDR_HANDLE hLib, - const ADDR_GET_MACROMODEINDEX_INPUT* pIn, - ADDR_GET_MACROMODEINDEX_OUTPUT* pOut); - -/** -**************************************************************************************************** -* ADDR_CONVERT_TILEINDEX1_INPUT -* -* @brief -* Input structure for AddrConvertTileIndex1 (without macro mode index) -**************************************************************************************************** -*/ -typedef struct _ADDR_CONVERT_TILEINDEX1_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - INT_32 tileIndex; ///< Tile index - UINT_32 bpp; ///< Bits per pixel - UINT_32 numSamples; ///< Number of samples - BOOL_32 tileInfoHw; ///< Set to TRUE if client wants HW enum, otherwise actual -} ADDR_CONVERT_TILEINDEX1_INPUT; - -/** -**************************************************************************************************** -* AddrConvertTileIndex1 -* -* @brief -* Convert tile index to tile mode/type/info -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1( - ADDR_HANDLE hLib, - const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, - ADDR_CONVERT_TILEINDEX_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_GET_TILEINDEX_INPUT -* -* @brief -* Input structure for AddrGetTileIndex -**************************************************************************************************** -*/ -typedef struct _ADDR_GET_TILEINDEX_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrTileMode tileMode; ///< Tile mode - AddrTileType tileType; ///< Tile-type: disp/non-disp/... - ADDR_TILEINFO* pTileInfo; ///< Pointer to tile-info structure, can be NULL for linear/1D -} ADDR_GET_TILEINDEX_INPUT; - -/** -**************************************************************************************************** -* ADDR_GET_TILEINDEX_OUTPUT -* -* @brief -* Output structure for AddrGetTileIndex -**************************************************************************************************** -*/ -typedef struct _ADDR_GET_TILEINDEX_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - INT_32 index; ///< index in table -} ADDR_GET_TILEINDEX_OUTPUT; - -/** -**************************************************************************************************** -* AddrGetTileIndex -* -* @brief -* Get the tiling mode index in table -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex( - ADDR_HANDLE hLib, - const ADDR_GET_TILEINDEX_INPUT* pIn, - ADDR_GET_TILEINDEX_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_PRT_INFO_INPUT -* -* @brief -* Input structure for AddrComputePrtInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_PRT_INFO_INPUT -{ - AddrFormat format; ///< Surface format - UINT_32 baseMipWidth; ///< Base mipmap width - UINT_32 baseMipHeight; ///< Base mipmap height - UINT_32 baseMipDepth; ///< Base mipmap depth - UINT_32 numFrags; ///< Number of fragments, -} ADDR_PRT_INFO_INPUT; - -/** -**************************************************************************************************** -* ADDR_PRT_INFO_OUTPUT -* -* @brief -* Input structure for AddrComputePrtInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_PRT_INFO_OUTPUT -{ - UINT_32 prtTileWidth; - UINT_32 prtTileHeight; -} ADDR_PRT_INFO_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputePrtInfo -* -* @brief -* Compute prt surface related information -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo( - ADDR_HANDLE hLib, - const ADDR_PRT_INFO_INPUT* pIn, - ADDR_PRT_INFO_OUTPUT* pOut); - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// DCC key functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* _ADDR_COMPUTE_DCCINFO_INPUT -* -* @brief -* Input structure of AddrComputeDccInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_DCCINFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - UINT_32 bpp; ///< BitPP of color surface - UINT_32 numSamples; ///< Sample number of color surface - UINT_64 colorSurfSize; ///< Size of color surface to which dcc key is bound - AddrTileMode tileMode; ///< Tile mode of color surface - ADDR_TILEINFO tileInfo; ///< Tile info of color surface - UINT_32 tileSwizzle; ///< Tile swizzle - INT_32 tileIndex; ///< Tile index of color surface, - ///< MUST be -1 if you don't want to use it - ///< while the global useTileIndex is set to 1 - INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) - ///< README: When tileIndex is not -1, this must be valid -} ADDR_COMPUTE_DCCINFO_INPUT; - -/** -**************************************************************************************************** -* ADDR_COMPUTE_DCCINFO_OUTPUT -* -* @brief -* Output structure of AddrComputeDccInfo -**************************************************************************************************** -*/ -typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - UINT_32 dccRamBaseAlign; ///< Base alignment of dcc key - UINT_64 dccRamSize; ///< Size of dcc key - UINT_64 dccFastClearSize; ///< Size of dcc key portion that can be fast cleared - BOOL_32 subLvlCompressible; ///< Whether sub resource is compressiable - BOOL_32 dccRamSizeAligned; ///< Whether the dcc key size is aligned -} ADDR_COMPUTE_DCCINFO_OUTPUT; - -/** -**************************************************************************************************** -* AddrComputeDccInfo -* -* @brief -* Compute DCC key size, base alignment -* info -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( - ADDR_HANDLE hLib, - const ADDR_COMPUTE_DCCINFO_INPUT* pIn, - ADDR_COMPUTE_DCCINFO_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR_GET_MAX_ALINGMENTS_OUTPUT -* -* @brief -* Output structure of AddrGetMaxAlignments -**************************************************************************************************** -*/ -typedef struct _ADDR_GET_MAX_ALINGMENTS_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - UINT_32 baseAlign; ///< Maximum base alignment in bytes -} ADDR_GET_MAX_ALINGMENTS_OUTPUT; - -/** -**************************************************************************************************** -* AddrGetMaxAlignments -* -* @brief -* Gets maximnum alignments -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( - ADDR_HANDLE hLib, - ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut); - -/** -**************************************************************************************************** -* AddrGetMaxMetaAlignments -* -* @brief -* Gets maximnum alignments for metadata -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( - ADDR_HANDLE hLib, - ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut); - -/** -**************************************************************************************************** -* Address library interface version 2 -* available from Gfx9 hardware -**************************************************************************************************** -* Addr2ComputeSurfaceInfo() -* Addr2ComputeSurfaceAddrFromCoord() -* Addr2ComputeSurfaceCoordFromAddr() - -* Addr2ComputeHtileInfo() -* Addr2ComputeHtileAddrFromCoord() -* Addr2ComputeHtileCoordFromAddr() -* -* Addr2ComputeCmaskInfo() -* Addr2ComputeCmaskAddrFromCoord() -* Addr2ComputeCmaskCoordFromAddr() -* -* Addr2ComputeFmaskInfo() -* Addr2ComputeFmaskAddrFromCoord() -* Addr2ComputeFmaskCoordFromAddr() -* -* Addr2ComputeDccInfo() -* -**/ - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Surface functions for Gfx9 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* ADDR2_SURFACE_FLAGS -* -* @brief -* Surface flags -**************************************************************************************************** -*/ -typedef union _ADDR2_SURFACE_FLAGS -{ - struct - { - UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV - UINT_32 depth : 1; ///< Thie resource is a depth buffer, can be used with DSV - UINT_32 stencil : 1; ///< Thie resource is a stencil buffer, can be used with DSV - UINT_32 fmask : 1; ///< This is an fmask surface - UINT_32 overlay : 1; ///< This is an overlay surface - UINT_32 display : 1; ///< This resource is displable, can be used with DRV - UINT_32 prt : 1; ///< This is a partially resident texture - UINT_32 qbStereo : 1; ///< This is a quad buffer stereo surface - UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding - UINT_32 texture : 1; ///< This resource can be used with SRV - UINT_32 unordered : 1; ///< This resource can be used with UAV - UINT_32 rotated : 1; ///< This resource is rotated and displable - UINT_32 needEquation : 1; ///< This resource needs equation to be generated if possible - UINT_32 opt4space : 1; ///< This resource should be optimized for space - UINT_32 minimizeAlign : 1; ///< This resource should use minimum alignment - UINT_32 noMetadata : 1; ///< This resource has no metadata - UINT_32 metaRbUnaligned : 1; ///< This resource has rb unaligned metadata - UINT_32 metaPipeUnaligned : 1; ///< This resource has pipe unaligned metadata - UINT_32 reserved : 14; ///< Reserved bits - }; - - UINT_32 value; -} ADDR2_SURFACE_FLAGS; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SURFACE_INFO_INPUT -* -* @brief -* Input structure for Addr2ComputeSurfaceInfo -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SURFACE_INFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR2_SURFACE_FLAGS flags; ///< Surface flags - AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Gfx9 - AddrResourceType resourceType; ///< Surface type - AddrFormat format; ///< Surface format - UINT_32 bpp; ///< bits per pixel - UINT_32 width; ///< Width (of mip0), in pixels - UINT_32 height; ///< Height (of mip0), in pixels - UINT_32 numSlices; ///< Number surface slice/depth (of mip0), - UINT_32 numMipLevels; ///< Total mipmap levels. - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats) - UINT_32 sliceAlign; ///< Required slice size in bytes -} ADDR2_COMPUTE_SURFACE_INFO_INPUT; - -/** -**************************************************************************************************** -* ADDR2_MIP_INFO -* -* @brief -* Structure that contains information for mip level -* -**************************************************************************************************** -*/ -typedef struct _ADDR2_MIP_INFO -{ - UINT_32 pitch; ///< Pitch in elements - UINT_32 height; ///< Padded height in elements - UINT_32 depth; ///< Padded depth - UINT_32 pixelPitch; ///< Pitch in pixels - UINT_32 pixelHeight; ///< Padded height in pixels - UINT_32 equationIndex; ///< Equation index in the equation table - UINT_64 offset; ///< Offset in bytes from mip base, should only be used - ///< to setup vam surface descriptor, can't be used - ///< to setup swizzle pattern - UINT_64 macroBlockOffset; ///< macro block offset in bytes from mip base - UINT_32 mipTailOffset; ///< mip tail offset in bytes - UINT_32 mipTailCoordX; ///< mip tail coord x - UINT_32 mipTailCoordY; ///< mip tail coord y - UINT_32 mipTailCoordZ; ///< mip tail coord z -} ADDR2_MIP_INFO; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SURFACE_INFO_OUTPUT -* -* @brief -* Output structure for Addr2ComputeSurfInfo -* @note - Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch - Pixel: Original pixel -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SURFACE_INFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 pitch; ///< Pitch in elements (blocks for compressed formats) - UINT_32 height; ///< Padded height (of mip0) in elements - UINT_32 numSlices; ///< Padded depth for 3d resource - ///< or padded number of slices for 2d array resource - UINT_32 mipChainPitch; ///< Pitch (of total mip chain) in elements - UINT_32 mipChainHeight; ///< Padded height (of total mip chain) in elements - UINT_32 mipChainSlice; ///< Padded depth (of total mip chain) - UINT_64 sliceSize; ///< Slice (total mip chain) size in bytes - UINT_64 surfSize; ///< Surface (total mip chain) size in bytes - UINT_32 baseAlign; ///< Base address alignment - UINT_32 bpp; ///< Bits per elements - /// (e.g. blocks for BCn, 1/3 for 96bit) - UINT_32 pixelMipChainPitch; ///< Mip chain pitch in original pixels - UINT_32 pixelMipChainHeight; ///< Mip chain height in original pixels - UINT_32 pixelPitch; ///< Pitch in original pixels - UINT_32 pixelHeight; ///< Height in original pixels - UINT_32 pixelBits; ///< Original bits per pixel, passed from input - - UINT_32 blockWidth; ///< Width in element inside one block - UINT_32 blockHeight; ///< Height in element inside one block - UINT_32 blockSlices; ///< Slice number inside one block - ///< Prt tile is one block, its width/height/slice - ///< equals to blcok width/height/slice - - BOOL_32 epitchIsHeight; ///< Whether to use height to program epitch register - /// Stereo info - ADDR_QBSTEREOINFO* pStereoInfo; ///< Stereo info, needed if qbStereo flag is TRUE - /// Mip info - ADDR2_MIP_INFO* pMipInfo; ///< Pointer to mip information array - /// if it is not NULL, the array is assumed to - /// contain numMipLevels entries - - UINT_32 equationIndex; ///< Equation index in the equation table of mip0 - BOOL_32 mipChainInTail; ///< If whole mipchain falls into mip tail block - UINT_32 firstMipIdInTail; ///< The id of first mip in tail, if there is no mip - /// in tail, it will be set to number of mip levels -} ADDR2_COMPUTE_SURFACE_INFO_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeSurfaceInfo -* -* @brief -* Compute surface width/height/slices/alignments and suitable tiling mode -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT -* -* @brief -* Input structure for Addr2ComputeSurfaceAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Slice index - UINT_32 sample; ///< Sample index, use fragment index for EQAA - UINT_32 mipId; ///< the mip ID in mip chain - - AddrSwizzleMode swizzleMode; ///< Swizzle mode for Gfx9 - ADDR2_SURFACE_FLAGS flags; ///< Surface flags - AddrResourceType resourceType; ///< Surface type - UINT_32 bpp; ///< Bits per pixel - UINT_32 unalignedWidth; ///< Surface original width (of mip0) - UINT_32 unalignedHeight; ///< Surface original height (of mip0) - UINT_32 numSlices; ///< Surface original slices (of mip0) - UINT_32 numMipLevels; ///< Total mipmap levels - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - - UINT_32 pipeBankXor; ///< Combined swizzle used to do bank/pipe rotation - UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats) -} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT -* -* @brief -* Output structure for Addr2ComputeSurfaceAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Byte address - UINT_32 bitPosition; ///< Bit position within surfaceAddr, 0-7. - /// For surface bpp < 8, e.g. FMT_1. - UINT_32 prtBlockIndex; ///< Index of a PRT tile (64K block) -} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeSurfaceAddrFromCoord -* -* @brief -* Compute surface address from a given coordinate. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT -* -* @brief -* Input structure for Addr2ComputeSurfaceCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Address in bytes - UINT_32 bitPosition; ///< Bit position in addr. 0-7. for surface bpp < 8, - /// e.g. FMT_1; - - AddrSwizzleMode swizzleMode; ///< Swizzle mode for Gfx9 - ADDR2_SURFACE_FLAGS flags; ///< Surface flags - AddrResourceType resourceType; ///< Surface type - UINT_32 bpp; ///< Bits per pixel - UINT_32 unalignedWidth; ///< Surface original width (of mip0) - UINT_32 unalignedHeight; ///< Surface original height (of mip0) - UINT_32 numSlices; ///< Surface original slices (of mip0) - UINT_32 numMipLevels; ///< Total mipmap levels. - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - - UINT_32 pipeBankXor; ///< Combined swizzle used to do bank/pipe rotation - UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats) -} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT -* -* @brief -* Output structure for Addr2ComputeSurfaceCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Index of slices - UINT_32 sample; ///< Index of samples, means fragment index for EQAA - UINT_32 mipId; ///< mipmap level id -} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeSurfaceCoordFromAddr -* -* @brief -* Compute coordinate from a given surface address -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut); - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// HTile functions for Gfx9 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* ADDR2_META_FLAGS -* -* @brief -* Metadata flags -**************************************************************************************************** -*/ -typedef union _ADDR2_META_FLAGS -{ - struct - { - UINT_32 pipeAligned : 1; ///< if Metadata being pipe aligned - UINT_32 rbAligned : 1; ///< if Metadata being RB aligned - UINT_32 linear : 1; ///< if Metadata linear, GFX9 does not suppord this! - UINT_32 reserved : 29; ///< Reserved bits - }; - - UINT_32 value; -} ADDR2_META_FLAGS; - -/** -**************************************************************************************************** -* ADDR2_META_MIP_INFO -* -* @brief -* Structure to store per mip metadata information -**************************************************************************************************** -*/ -typedef struct _ADDR2_META_MIP_INFO -{ - BOOL_32 inMiptail; - union - { - struct - { - UINT_32 startX; - UINT_32 startY; - UINT_32 startZ; - UINT_32 width; - UINT_32 height; - UINT_32 depth; - }; - - struct - { - UINT_32 offset; - UINT_32 sliceSize; - }; - }; -} ADDR2_META_MIP_INFO; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_HTILE_INFO_INPUT -* -* @brief -* Input structure of Addr2ComputeHtileInfo -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_HTILE_INFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR2_META_FLAGS hTileFlags; ///< HTILE flags - ADDR2_SURFACE_FLAGS depthFlags; ///< Depth surface flags - AddrSwizzleMode swizzleMode; ///< Depth surface swizzle mode - UINT_32 unalignedWidth; ///< Depth surface original width (of mip0) - UINT_32 unalignedHeight; ///< Depth surface original height (of mip0) - UINT_32 numSlices; ///< Number of slices of depth surface (of mip0) - UINT_32 numMipLevels; ///< Total mipmap levels of color surface - UINT_32 firstMipIdInTail; -} ADDR2_COMPUTE_HTILE_INFO_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_HTILE_INFO_OUTPUT -* -* @brief -* Output structure of Addr2ComputeHtileInfo -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_HTILE_INFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 pitch; ///< Pitch in pixels of depth buffer represented in this - /// HTile buffer. This might be larger than original depth - /// buffer pitch when called with an unaligned pitch. - UINT_32 height; ///< Height in pixels, as above - UINT_32 baseAlign; ///< Base alignment - UINT_32 sliceSize; ///< Slice size, in bytes. - UINT_32 htileBytes; ///< Size of HTILE buffer, in bytes - UINT_32 metaBlkWidth; ///< Meta block width - UINT_32 metaBlkHeight; ///< Meta block height - UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice - - ADDR2_META_MIP_INFO* pMipInfo; ///< HTILE mip information -} ADDR2_COMPUTE_HTILE_INFO_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeHtileInfo -* -* @brief -* Compute Htile pitch, height, base alignment and size in bytes -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, - ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT -* -* @brief -* Input structure for Addr2ComputeHtileAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Index of slices - UINT_32 mipId; ///< mipmap level id - - ADDR2_META_FLAGS hTileFlags; ///< HTILE flags - ADDR2_SURFACE_FLAGS depthflags; ///< Depth surface flags - AddrSwizzleMode swizzleMode; ///< Depth surface swizzle mode - UINT_32 bpp; ///< Depth surface bits per pixel - UINT_32 unalignedWidth; ///< Depth surface original width (of mip0) - UINT_32 unalignedHeight; ///< Depth surface original height (of mip0) - UINT_32 numSlices; ///< Depth surface original depth (of mip0) - UINT_32 numMipLevels; ///< Depth surface total mipmap levels - UINT_32 numSamples; ///< Depth surface number of samples - UINT_32 pipeXor; ///< Pipe xor setting -} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT -* -* @brief -* Output structure for Addr2ComputeHtileAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Address in bytes -} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeHtileAddrFromCoord -* -* @brief -* Compute Htile address according to coordinates (of depth buffer) -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT -* -* @brief -* Input structure for Addr2ComputeHtileCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Address - - ADDR2_META_FLAGS hTileFlags; ///< HTILE flags - ADDR2_SURFACE_FLAGS depthFlags; ///< Depth surface flags - AddrSwizzleMode swizzleMode; ///< Depth surface swizzle mode - UINT_32 bpp; ///< Depth surface bits per pixel - UINT_32 unalignedWidth; ///< Depth surface original width (of mip0) - UINT_32 unalignedHeight; ///< Depth surface original height (of mip0) - UINT_32 numSlices; ///< Depth surface original depth (of mip0) - UINT_32 numMipLevels; ///< Depth surface total mipmap levels - UINT_32 numSamples; ///< Depth surface number of samples - UINT_32 pipeXor; ///< Pipe xor setting -} ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT -* -* @brief -* Output structure for Addr2ComputeHtileCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Index of slices - UINT_32 mipId; ///< mipmap level id -} ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeHtileCoordFromAddr -* -* @brief -* Compute coordinates within depth buffer (1st pixel of a micro tile) according to -* Htile address -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// C-mask functions for Gfx9 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_CMASK_INFO_INPUT -* -* @brief -* Input structure of Addr2ComputeCmaskInfo -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_CMASKINFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR2_META_FLAGS cMaskFlags; ///< CMASK flags - ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags - AddrResourceType resourceType; ///< Color surface type - AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode - UINT_32 unalignedWidth; ///< Color surface original width - UINT_32 unalignedHeight; ///< Color surface original height - UINT_32 numSlices; ///< Number of slices of color buffer -} ADDR2_COMPUTE_CMASK_INFO_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_CMASK_INFO_OUTPUT -* -* @brief -* Output structure of Addr2ComputeCmaskInfo -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_CMASK_INFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 pitch; ///< Pitch in pixels of color buffer which - /// this Cmask matches. The size might be larger than - /// original color buffer pitch when called with - /// an unaligned pitch. - UINT_32 height; ///< Height in pixels, as above - UINT_32 baseAlign; ///< Base alignment - UINT_32 sliceSize; ///< Slice size, in bytes. - UINT_32 cmaskBytes; ///< Size in bytes of CMask buffer - UINT_32 metaBlkWidth; ///< Meta block width - UINT_32 metaBlkHeight; ///< Meta block height - - UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice -} ADDR2_COMPUTE_CMASK_INFO_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeCmaskInfo -* -* @brief -* Compute Cmask pitch, height, base alignment and size in bytes from color buffer -* info -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, - ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT -* -* @brief -* Input structure for Addr2ComputeCmaskAddrFromCoord -* -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Index of slices - - ADDR2_META_FLAGS cMaskFlags; ///< CMASK flags - ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags - AddrResourceType resourceType; ///< Color surface type - AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode - - UINT_32 unalignedWidth; ///< Color surface original width (of mip0) - UINT_32 unalignedHeight; ///< Color surface original height (of mip0) - UINT_32 numSlices; ///< Color surface original slices (of mip0) - - UINT_32 numSamples; ///< Color surfae sample number - UINT_32 numFrags; ///< Color surface fragment number - - UINT_32 pipeXor; ///< pipe Xor setting -} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT -* -* @brief -* Output structure for Addr2ComputeCmaskAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< CMASK address in bytes - UINT_32 bitPosition; ///< Bit position within addr, 0 or 4 -} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeCmaskAddrFromCoord -* -* @brief -* Compute Cmask address according to coordinates (of MSAA color buffer) -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT -* -* @brief -* Input structure for Addr2ComputeCmaskCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< CMASK address in bytes - UINT_32 bitPosition; ///< Bit position within addr, 0 or 4 - - ADDR2_META_FLAGS cMaskFlags; ///< CMASK flags - ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags - AddrResourceType resourceType; ///< Color surface type - AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode - - UINT_32 unalignedWidth; ///< Color surface original width (of mip0) - UINT_32 unalignedHeight; ///< Color surface original height (of mip0) - UINT_32 numSlices; ///< Color surface original slices (of mip0) - UINT_32 numMipLevels; ///< Color surface total mipmap levels. -} ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT -* -* @brief -* Output structure for Addr2ComputeCmaskCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Index of slices - UINT_32 mipId; ///< mipmap level id -} ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeCmaskCoordFromAddr -* -* @brief -* Compute coordinates within color buffer (1st pixel of a micro tile) according to -* Cmask address -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut); - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// F-mask functions for Gfx9 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* ADDR2_FMASK_FLAGS -* -* @brief -* FMASK flags -**************************************************************************************************** -*/ -typedef union _ADDR2_FMASK_FLAGS -{ - struct - { - UINT_32 resolved : 1; ///< TRUE if this is a resolved fmask, used by H/W clients - /// by H/W clients. S/W should always set it to FALSE. - UINT_32 reserved : 31; ///< Reserved for future use. - }; - - UINT_32 value; -} ADDR2_FMASK_FLAGS; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_FMASK_INFO_INPUT -* -* @brief -* Input structure for Addr2ComputeFmaskInfo -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_FMASK_INFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode - UINT_32 unalignedWidth; ///< Color surface original width - UINT_32 unalignedHeight; ///< Color surface original height - UINT_32 numSlices; ///< Number of slices/depth - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - ADDR2_FMASK_FLAGS fMaskFlags; ///< FMASK flags -} ADDR2_COMPUTE_FMASK_INFO_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_FMASK_INFO_OUTPUT -* -* @brief -* Output structure for Addr2ComputeFmaskInfo -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_FMASK_INFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 pitch; ///< Pitch of fmask in pixels - UINT_32 height; ///< Height of fmask in pixels - UINT_32 baseAlign; ///< Base alignment - UINT_32 numSlices; ///< Slices of fmask - UINT_32 fmaskBytes; ///< Size of fmask in bytes - UINT_32 bpp; ///< Bits per pixel of FMASK is: number of bit planes - UINT_32 numSamples; ///< Number of samples - UINT_32 sliceSize; ///< Size of slice in bytes -} ADDR2_COMPUTE_FMASK_INFO_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeFmaskInfo -* -* @brief -* Compute Fmask pitch/height/slices/alignments and size in bytes -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, - ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT -* -* @brief -* Input structure for Addr2ComputeFmaskAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Slice index - UINT_32 sample; ///< Sample index (fragment index for EQAA) - UINT_32 plane; ///< Plane number - - UINT_32 unalignedWidth; ///< Color surface original width - UINT_32 unalignedHeight; ///< Color surface original height - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - UINT_32 tileSwizzle; ///< Combined swizzle used to do bank/pipe rotation - - ADDR2_FMASK_FLAGS fMaskFlags; ///< FMASK flags -} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT -* -* @brief -* Output structure for Addr2ComputeFmaskAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Fmask address - UINT_32 bitPosition; ///< Bit position within fmaskAddr, 0-7. -} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeFmaskAddrFromCoord -* -* @brief -* Compute Fmask address according to coordinates (x,y,slice,sample,plane) -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut); - - - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT -* -* @brief -* Input structure for Addr2ComputeFmaskCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< Address - UINT_32 bitPosition; ///< Bit position within addr, 0-7. - AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode - - UINT_32 unalignedWidth; ///< Color surface original width - UINT_32 unalignedHeight; ///< Color surface original height - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments - - UINT_32 tileSwizzle; ///< Combined swizzle used to do bank/pipe rotation - - ADDR2_FMASK_FLAGS fMaskFlags; ///< FMASK flags -} ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT -* -* @brief -* Output structure for Addr2ComputeFmaskCoordFromAddr -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Slice index - UINT_32 sample; ///< Sample index (fragment index for EQAA) - UINT_32 plane; ///< Plane number -} ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeFmaskCoordFromAddr -* -* @brief -* Compute FMASK coordinate from an given address -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut); - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// DCC key functions for Gfx9 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* _ADDR2_COMPUTE_DCCINFO_INPUT -* -* @brief -* Input structure of Addr2ComputeDccInfo -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_DCCINFO_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR2_META_FLAGS dccKeyFlags; ///< DCC key flags - ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags - AddrResourceType resourceType; ///< Color surface type - AddrSwizzleMode swizzleMode; ///< Color surface swizzle mode - UINT_32 bpp; ///< bits per pixel - UINT_32 unalignedWidth; ///< Color surface original width (of mip0) - UINT_32 unalignedHeight; ///< Color surface original height (of mip0) - UINT_32 numSlices; ///< Number of slices, of color surface (of mip0) - UINT_32 numFrags; ///< Fragment number of color surface - UINT_32 numMipLevels; ///< Total mipmap levels of color surface - UINT_32 dataSurfaceSize; ///< The padded size of all slices and mip levels - ///< useful in meta linear case - UINT_32 firstMipIdInTail; -} ADDR2_COMPUTE_DCCINFO_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_DCCINFO_OUTPUT -* -* @brief -* Output structure of Addr2ComputeDccInfo -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_DCCINFO_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 dccRamBaseAlign; ///< Base alignment of dcc key - UINT_32 dccRamSize; ///< Size of dcc key - - UINT_32 pitch; ///< DCC surface mip chain pitch - UINT_32 height; ///< DCC surface mip chain height - UINT_32 depth; ///< DCC surface mip chain depth - - UINT_32 compressBlkWidth; ///< DCC compress block width - UINT_32 compressBlkHeight; ///< DCC compress block height - UINT_32 compressBlkDepth; ///< DCC compress block depth - - UINT_32 metaBlkWidth; ///< DCC meta block width - UINT_32 metaBlkHeight; ///< DCC meta block height - UINT_32 metaBlkDepth; ///< DCC meta block depth - - UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice - - union - { - UINT_32 fastClearSizePerSlice; ///< Size of DCC within a slice should be fast cleared - UINT_32 dccRamSliceSize; - }; - - ADDR2_META_MIP_INFO* pMipInfo; ///< DCC mip information -} ADDR2_COMPUTE_DCCINFO_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeDccInfo -* -* @brief -* Compute DCC key size, base alignment -* info -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, - ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut); - - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT -* -* @brief -* Input structure for Addr2ComputeDccAddrFromCoord -* -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_32 x; ///< X coordinate - UINT_32 y; ///< Y coordinate - UINT_32 slice; ///< Index of slices - UINT_32 sample; ///< Index of samples, means fragment index for EQAA - UINT_32 mipId; ///< mipmap level id - - ADDR2_META_FLAGS dccKeyFlags; ///< DCC flags - ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags - AddrResourceType resourceType; ///< Color surface type - AddrSwizzleMode swizzleMode; ///< Color surface swizzle mode - UINT_32 bpp; ///< Color surface bits per pixel - UINT_32 unalignedWidth; ///< Color surface original width (of mip0) - UINT_32 unalignedHeight; ///< Color surface original height (of mip0) - UINT_32 numSlices; ///< Color surface original slices (of mip0) - UINT_32 numMipLevels; ///< Color surface mipmap levels - UINT_32 numFrags; ///< Color surface fragment number - - UINT_32 pipeXor; ///< pipe Xor setting -} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT -* -* @brief -* Output structure for Addr2ComputeDccAddrFromCoord -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - UINT_64 addr; ///< DCC address in bytes -} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeDccAddrFromCoord -* -* @brief -* Compute DCC address according to coordinates (of MSAA color buffer) -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut); - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Misc functions for Gfx9 -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_PIPEBANKXOR_INPUT -* -* @brief -* Input structure of Addr2ComputePipebankXor -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - UINT_32 surfIndex; ///< Input surface index - ADDR2_SURFACE_FLAGS flags; ///< Surface flag - AddrSwizzleMode swizzleMode; ///< Surface swizzle mode - AddrResourceType resourceType; ///< Surface resource type - AddrFormat format; ///< Surface format - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA -} ADDR2_COMPUTE_PIPEBANKXOR_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT -* -* @brief -* Output structure of Addr2ComputePipebankXor -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - UINT_32 pipeBankXor; ///< Pipe bank xor -} ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputePipeBankXor -* -* @brief -* Calculate a valid bank pipe xor value for client to use. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut); - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT -* -* @brief -* Input structure of Addr2ComputeSlicePipeBankXor -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - AddrSwizzleMode swizzleMode; ///< Surface swizzle mode - AddrResourceType resourceType; ///< Surface resource type - UINT_32 basePipeBankXor; ///< Base pipe bank xor - UINT_32 slice; ///< Slice id - UINT_32 numSamples; ///< Number of samples -} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT -* -* @brief -* Output structure of Addr2ComputeSlicePipeBankXor -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - UINT_32 pipeBankXor; ///< Pipe bank xor -} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeSlicePipeBankXor -* -* @brief -* Calculate slice pipe bank xor value based on base pipe bank xor and slice id. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut); - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT -* -* @brief -* Input structure of Addr2ComputeSubResourceOffsetForSwizzlePattern -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - AddrSwizzleMode swizzleMode; ///< Surface swizzle mode - AddrResourceType resourceType; ///< Surface resource type - UINT_32 pipeBankXor; ///< Per resource xor - UINT_32 slice; ///< Slice id - UINT_64 sliceSize; ///< Slice size of a mip chain - UINT_64 macroBlockOffset; ///< Macro block offset, returned in ADDR2_MIP_INFO - UINT_32 mipTailOffset; ///< Mip tail offset, returned in ADDR2_MIP_INFO -} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT; - -/** -**************************************************************************************************** -* ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT -* -* @brief -* Output structure of Addr2ComputeSubResourceOffsetForSwizzlePattern -**************************************************************************************************** -*/ -typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - UINT_64 offset; ///< offset -} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT; - -/** -**************************************************************************************************** -* Addr2ComputeSubResourceOffsetForSwizzlePattern -* -* @brief -* Calculate sub resource offset to support swizzle pattern. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern( - ADDR_HANDLE hLib, - const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, - ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut); - -/** -**************************************************************************************************** -* ADDR2_BLOCK_SET -* -* @brief -* Bit field that defines block type -**************************************************************************************************** -*/ -typedef union _ADDR2_BLOCK_SET -{ - struct - { - UINT_32 micro : 1; // 256B block for 2D resource - UINT_32 macro4KB : 1; // 4KB for 2D/3D resource - UINT_32 macro64KB : 1; // 64KB for 2D/3D resource - UINT_32 var : 1; // VAR block - UINT_32 linear : 1; // Linear block - UINT_32 reserved : 27; - }; - - UINT_32 value; -} ADDR2_BLOCK_SET; - -/** -**************************************************************************************************** -* ADDR2_SWTYPE_SET -* -* @brief -* Bit field that defines swizzle type -**************************************************************************************************** -*/ -typedef union _ADDR2_SWTYPE_SET -{ - struct - { - UINT_32 sw_Z : 1; // SW_*_Z_* - UINT_32 sw_S : 1; // SW_*_S_* - UINT_32 sw_D : 1; // SW_*_D_* - UINT_32 sw_R : 1; // SW_*_R_* - UINT_32 reserved : 28; - }; - - UINT_32 value; -} ADDR2_SWTYPE_SET; - -/** -**************************************************************************************************** -* ADDR2_GET_PREFERRED_SURF_SETTING_INPUT -* -* @brief -* Input structure of Addr2GetPreferredSurfaceSetting -**************************************************************************************************** -*/ -typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - ADDR2_SURFACE_FLAGS flags; ///< Surface flags - AddrResourceType resourceType; ///< Surface type - AddrFormat format; ///< Surface format - AddrResrouceLocation resourceLoction; ///< Surface heap choice - ADDR2_BLOCK_SET forbiddenBlock; ///< Client can use it to disable some block setting - ///< such as linear for DXTn, tiled for YUV - ADDR2_SWTYPE_SET preferredSwSet; ///< Client can use it to specify sw type(s) wanted - BOOL_32 noXor; ///< Do not use xor mode for this resource - UINT_32 bpp; ///< bits per pixel - UINT_32 width; ///< Width (of mip0), in pixels - UINT_32 height; ///< Height (of mip0), in pixels - UINT_32 numSlices; ///< Number surface slice/depth (of mip0), - UINT_32 numMipLevels; ///< Total mipmap levels. - UINT_32 numSamples; ///< Number of samples - UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as - /// number of samples for normal AA; Set it to the - /// number of fragments for EQAA - UINT_32 maxAlign; ///< maximum base/size alignment requested by client - UINT_32 minSizeAlign; ///< memory allocated for surface in client driver will - /// be padded to multiple of this value (in bytes) -} ADDR2_GET_PREFERRED_SURF_SETTING_INPUT; - -/** -**************************************************************************************************** -* ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT -* -* @brief -* Output structure of Addr2GetPreferredSurfaceSetting -**************************************************************************************************** -*/ -typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT -{ - UINT_32 size; ///< Size of this structure in bytes - - AddrSwizzleMode swizzleMode; ///< Suggested swizzle mode to be used - AddrResourceType resourceType; ///< Suggested resource type to program HW - ADDR2_BLOCK_SET validBlockSet; ///< Valid block type bit conbination - BOOL_32 canXor; ///< If client can use xor on a valid macro block - /// type - ADDR2_SWTYPE_SET validSwTypeSet; ///< Valid swizzle type bit combination - ADDR2_SWTYPE_SET clientPreferredSwSet; ///< Client-preferred swizzle type bit combination -} ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT; - -/** -**************************************************************************************************** -* Addr2GetPreferredSurfaceSetting -* -* @brief -* Suggest a preferred setting for client driver to program HW register -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting( - ADDR_HANDLE hLib, - const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, - ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut); - -/** -**************************************************************************************************** -* Addr2IsValidDisplaySwizzleMode -* -* @brief -* Return whether the swizzle mode is supported by DCE / DCN. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( - ADDR_HANDLE hLib, - AddrSwizzleMode swizzleMode, - UINT_32 bpp, - bool *result); - -#if defined(__cplusplus) -} -#endif - -#endif // __ADDR_INTERFACE_H__ diff -Nru mesa-18.3.3/src/amd/addrlib/addrtypes.h mesa-19.0.1/src/amd/addrlib/addrtypes.h --- mesa-18.3.3/src/amd/addrlib/addrtypes.h 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/addrtypes.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,749 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrtypes.h -* @brief Contains the helper function and constants -**************************************************************************************************** -*/ -#ifndef __ADDR_TYPES_H__ -#define __ADDR_TYPES_H__ - -#if defined(__APPLE__) && !defined(HAVE_TSERVER) -// External definitions header maintained by Apple driver team, but not for diag team under Mac. -// Helps address compilation issues & reduces code covered by NDA -#include "addrExtDef.h" - -#else - -// Windows and/or Linux -#if !defined(VOID) -typedef void VOID; -#endif - -#if !defined(FLOAT) -typedef float FLOAT; -#endif - -#if !defined(CHAR) -typedef char CHAR; -#endif - -#if !defined(INT) -typedef int INT; -#endif - -#include // va_list...etc need this header - -#endif // defined (__APPLE__) && !defined(HAVE_TSERVER) - -/** -**************************************************************************************************** -* Calling conventions -**************************************************************************************************** -*/ -#ifndef ADDR_CDECL - #if defined(__GNUC__) - #define ADDR_CDECL __attribute__((cdecl)) - #else - #define ADDR_CDECL __cdecl - #endif -#endif - -#ifndef ADDR_STDCALL - #if defined(__GNUC__) - #if defined(__amd64__) || defined(__x86_64__) - #define ADDR_STDCALL - #else - #define ADDR_STDCALL __attribute__((stdcall)) - #endif - #else - #define ADDR_STDCALL __stdcall - #endif -#endif - -#ifndef ADDR_FASTCALL - #if defined(BRAHMA_ARM) - #define ADDR_FASTCALL - #elif defined(__GNUC__) - #if defined(__i386__) - #define ADDR_FASTCALL __attribute__((regparm(0))) - #else - #define ADDR_FASTCALL - #endif - #else - #define ADDR_FASTCALL __fastcall - #endif -#endif - -#ifndef GC_CDECL - #define GC_CDECL ADDR_CDECL -#endif - -#ifndef GC_STDCALL - #define GC_STDCALL ADDR_STDCALL -#endif - -#ifndef GC_FASTCALL - #define GC_FASTCALL ADDR_FASTCALL -#endif - - -#if defined(__GNUC__) - #define ADDR_INLINE static inline // inline needs to be static to link -#else - // win32, win64, other platforms - #define ADDR_INLINE __inline -#endif // #if defined(__GNUC__) - -#define ADDR_API ADDR_FASTCALL //default call convention is fast call - -/** -**************************************************************************************************** -* Global defines used by other modules -**************************************************************************************************** -*/ -#if !defined(TILEINDEX_INVALID) -#define TILEINDEX_INVALID -1 -#endif - -#if !defined(TILEINDEX_LINEAR_GENERAL) -#define TILEINDEX_LINEAR_GENERAL -2 -#endif - -#if !defined(TILEINDEX_LINEAR_ALIGNED) -#define TILEINDEX_LINEAR_ALIGNED 8 -#endif - -/** -**************************************************************************************************** -* Return codes -**************************************************************************************************** -*/ -typedef enum _ADDR_E_RETURNCODE -{ - // General Return - ADDR_OK = 0, - ADDR_ERROR = 1, - - // Specific Errors - ADDR_OUTOFMEMORY, - ADDR_INVALIDPARAMS, - ADDR_NOTSUPPORTED, - ADDR_NOTIMPLEMENTED, - ADDR_PARAMSIZEMISMATCH, - ADDR_INVALIDGBREGVALUES, - -} ADDR_E_RETURNCODE; - -/** -**************************************************************************************************** -* @brief -* Neutral enums that define tile modes for all H/W -* @note -* R600/R800 tiling mode can be cast to hw enums directly but never cast into HW enum from -* ADDR_TM_2D_TILED_XTHICK -* -**************************************************************************************************** -*/ -typedef enum _AddrTileMode -{ - ADDR_TM_LINEAR_GENERAL = 0, ///< Least restrictions, pitch: multiple of 8 if not buffer - ADDR_TM_LINEAR_ALIGNED = 1, ///< Requests pitch or slice to be multiple of 64 pixels - ADDR_TM_1D_TILED_THIN1 = 2, ///< Linear array of 8x8 tiles - ADDR_TM_1D_TILED_THICK = 3, ///< Linear array of 8x8x4 tiles - ADDR_TM_2D_TILED_THIN1 = 4, ///< A set of macro tiles consist of 8x8 tiles - ADDR_TM_2D_TILED_THIN2 = 5, ///< 600 HWL only, macro tile ratio is 1:4 - ADDR_TM_2D_TILED_THIN4 = 6, ///< 600 HWL only, macro tile ratio is 1:16 - ADDR_TM_2D_TILED_THICK = 7, ///< A set of macro tiles consist of 8x8x4 tiles - ADDR_TM_2B_TILED_THIN1 = 8, ///< 600 HWL only, with bank swap - ADDR_TM_2B_TILED_THIN2 = 9, ///< 600 HWL only, with bank swap and ratio is 1:4 - ADDR_TM_2B_TILED_THIN4 = 10, ///< 600 HWL only, with bank swap and ratio is 1:16 - ADDR_TM_2B_TILED_THICK = 11, ///< 600 HWL only, with bank swap, consists of 8x8x4 tiles - ADDR_TM_3D_TILED_THIN1 = 12, ///< Macro tiling w/ pipe rotation between slices - ADDR_TM_3D_TILED_THICK = 13, ///< Macro tiling w/ pipe rotation bwtween slices, thick - ADDR_TM_3B_TILED_THIN1 = 14, ///< 600 HWL only, with bank swap - ADDR_TM_3B_TILED_THICK = 15, ///< 600 HWL only, with bank swap, thick - ADDR_TM_2D_TILED_XTHICK = 16, ///< Tile is 8x8x8, valid from NI - ADDR_TM_3D_TILED_XTHICK = 17, ///< Tile is 8x8x8, valid from NI - ADDR_TM_POWER_SAVE = 18, ///< Power save mode, only used by KMD on NI - ADDR_TM_PRT_TILED_THIN1 = 19, ///< No bank/pipe rotation or hashing beyond macrotile size - ADDR_TM_PRT_2D_TILED_THIN1 = 20, ///< Same as 2D_TILED_THIN1, PRT only - ADDR_TM_PRT_3D_TILED_THIN1 = 21, ///< Same as 3D_TILED_THIN1, PRT only - ADDR_TM_PRT_TILED_THICK = 22, ///< No bank/pipe rotation or hashing beyond macrotile size - ADDR_TM_PRT_2D_TILED_THICK = 23, ///< Same as 2D_TILED_THICK, PRT only - ADDR_TM_PRT_3D_TILED_THICK = 24, ///< Same as 3D_TILED_THICK, PRT only - ADDR_TM_UNKNOWN = 25, ///< Unkown tile mode, should be decided by address lib - ADDR_TM_COUNT = 26, ///< Must be the value of the last tile mode -} AddrTileMode; - -/** -**************************************************************************************************** -* @brief -* Neutral enums that define swizzle modes for Gfx9 ASIC -* @note -* -* ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resouce -* ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resouce -* ADDR_SW_4KB_* addressing block aligned size is 4KB, for 2D/3D resouce -* ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resouce -* ADDR_SW_VAR_* addressing block aligned size is ASIC specific, for 2D/3D resouce -* -* ADDR_SW_*_Z For 2D resouce, represents Z-order swizzle mode for depth/stencil/FMask - For 3D resouce, represents a swizzle mode similar to legacy thick tile mode -* ADDR_SW_*_S represents standard swizzle mode defined by MS -* ADDR_SW_*_D For 2D resouce, represents a swizzle mode for displayable resource -* For 3D resouce, represents a swizzle mode which places each slice in order & pixel - within slice is placed as 2D ADDR_SW_*_S. Don't use this combination if possible! -* ADDR_SW_*_R For 2D resouce only, represents a swizzle mode for rotated displayable resource -* -**************************************************************************************************** -*/ -typedef enum _AddrSwizzleMode -{ - ADDR_SW_LINEAR = 0, - ADDR_SW_256B_S = 1, - ADDR_SW_256B_D = 2, - ADDR_SW_256B_R = 3, - ADDR_SW_4KB_Z = 4, - ADDR_SW_4KB_S = 5, - ADDR_SW_4KB_D = 6, - ADDR_SW_4KB_R = 7, - ADDR_SW_64KB_Z = 8, - ADDR_SW_64KB_S = 9, - ADDR_SW_64KB_D = 10, - ADDR_SW_64KB_R = 11, - ADDR_SW_VAR_Z = 12, - ADDR_SW_VAR_S = 13, - ADDR_SW_VAR_D = 14, - ADDR_SW_VAR_R = 15, - ADDR_SW_64KB_Z_T = 16, - ADDR_SW_64KB_S_T = 17, - ADDR_SW_64KB_D_T = 18, - ADDR_SW_64KB_R_T = 19, - ADDR_SW_4KB_Z_X = 20, - ADDR_SW_4KB_S_X = 21, - ADDR_SW_4KB_D_X = 22, - ADDR_SW_4KB_R_X = 23, - ADDR_SW_64KB_Z_X = 24, - ADDR_SW_64KB_S_X = 25, - ADDR_SW_64KB_D_X = 26, - ADDR_SW_64KB_R_X = 27, - ADDR_SW_VAR_Z_X = 28, - ADDR_SW_VAR_S_X = 29, - ADDR_SW_VAR_D_X = 30, - ADDR_SW_VAR_R_X = 31, - ADDR_SW_LINEAR_GENERAL = 32, - ADDR_SW_MAX_TYPE = 33, - - // Used for represent block with identical size - ADDR_SW_256B = ADDR_SW_256B_S, - ADDR_SW_4KB = ADDR_SW_4KB_S_X, - ADDR_SW_64KB = ADDR_SW_64KB_S_X, - ADDR_SW_VAR = ADDR_SW_VAR_S_X, -} AddrSwizzleMode; - -/** -**************************************************************************************************** -* @brief -* Neutral enums that define image type -* @note -* this is new for address library interface version 2 -* -**************************************************************************************************** -*/ -typedef enum _AddrResourceType -{ - ADDR_RSRC_TEX_1D = 0, - ADDR_RSRC_TEX_2D = 1, - ADDR_RSRC_TEX_3D = 2, - ADDR_RSRC_MAX_TYPE = 3, -} AddrResourceType; - -/** -**************************************************************************************************** -* @brief -* Neutral enums that define resource heap location -* @note -* this is new for address library interface version 2 -* -**************************************************************************************************** -*/ -typedef enum _AddrResrouceLocation -{ - ADDR_RSRC_LOC_UNDEF = 0, // Resource heap is undefined/unknown - ADDR_RSRC_LOC_LOCAL = 1, // CPU visable and CPU invisable local heap - ADDR_RSRC_LOC_USWC = 2, // CPU write-combined non-cached nonlocal heap - ADDR_RSRC_LOC_CACHED = 3, // CPU cached nonlocal heap - ADDR_RSRC_LOC_INVIS = 4, // CPU invisable local heap only - ADDR_RSRC_LOC_MAX_TYPE = 5, -} AddrResrouceLocation; - -/** -**************************************************************************************************** -* @brief -* Neutral enums that define resource basic swizzle mode -* @note -* this is new for address library interface version 2 -* -**************************************************************************************************** -*/ -typedef enum _AddrSwType -{ - ADDR_SW_Z = 0, // Resource basic swizzle mode is ZOrder - ADDR_SW_S = 1, // Resource basic swizzle mode is Standard - ADDR_SW_D = 2, // Resource basic swizzle mode is Display - ADDR_SW_R = 3, // Resource basic swizzle mode is Rotated -} AddrSwType; - -/** -**************************************************************************************************** -* @brief -* Neutral enums that define mipmap major mode -* @note -* this is new for address library interface version 2 -* -**************************************************************************************************** -*/ -typedef enum _AddrMajorMode -{ - ADDR_MAJOR_X = 0, - ADDR_MAJOR_Y = 1, - ADDR_MAJOR_Z = 2, - ADDR_MAJOR_MAX_TYPE = 3, -} AddrMajorMode; - -/** -**************************************************************************************************** -* AddrFormat -* -* @brief -* Neutral enum for SurfaceFormat -* -**************************************************************************************************** -*/ -typedef enum _AddrFormat { - ADDR_FMT_INVALID = 0x00000000, - ADDR_FMT_8 = 0x00000001, - ADDR_FMT_4_4 = 0x00000002, - ADDR_FMT_3_3_2 = 0x00000003, - ADDR_FMT_RESERVED_4 = 0x00000004, - ADDR_FMT_16 = 0x00000005, - ADDR_FMT_16_FLOAT = 0x00000006, - ADDR_FMT_8_8 = 0x00000007, - ADDR_FMT_5_6_5 = 0x00000008, - ADDR_FMT_6_5_5 = 0x00000009, - ADDR_FMT_1_5_5_5 = 0x0000000a, - ADDR_FMT_4_4_4_4 = 0x0000000b, - ADDR_FMT_5_5_5_1 = 0x0000000c, - ADDR_FMT_32 = 0x0000000d, - ADDR_FMT_32_FLOAT = 0x0000000e, - ADDR_FMT_16_16 = 0x0000000f, - ADDR_FMT_16_16_FLOAT = 0x00000010, - ADDR_FMT_8_24 = 0x00000011, - ADDR_FMT_8_24_FLOAT = 0x00000012, - ADDR_FMT_24_8 = 0x00000013, - ADDR_FMT_24_8_FLOAT = 0x00000014, - ADDR_FMT_10_11_11 = 0x00000015, - ADDR_FMT_10_11_11_FLOAT = 0x00000016, - ADDR_FMT_11_11_10 = 0x00000017, - ADDR_FMT_11_11_10_FLOAT = 0x00000018, - ADDR_FMT_2_10_10_10 = 0x00000019, - ADDR_FMT_8_8_8_8 = 0x0000001a, - ADDR_FMT_10_10_10_2 = 0x0000001b, - ADDR_FMT_X24_8_32_FLOAT = 0x0000001c, - ADDR_FMT_32_32 = 0x0000001d, - ADDR_FMT_32_32_FLOAT = 0x0000001e, - ADDR_FMT_16_16_16_16 = 0x0000001f, - ADDR_FMT_16_16_16_16_FLOAT = 0x00000020, - ADDR_FMT_RESERVED_33 = 0x00000021, - ADDR_FMT_32_32_32_32 = 0x00000022, - ADDR_FMT_32_32_32_32_FLOAT = 0x00000023, - ADDR_FMT_RESERVED_36 = 0x00000024, - ADDR_FMT_1 = 0x00000025, - ADDR_FMT_1_REVERSED = 0x00000026, - ADDR_FMT_GB_GR = 0x00000027, - ADDR_FMT_BG_RG = 0x00000028, - ADDR_FMT_32_AS_8 = 0x00000029, - ADDR_FMT_32_AS_8_8 = 0x0000002a, - ADDR_FMT_5_9_9_9_SHAREDEXP = 0x0000002b, - ADDR_FMT_8_8_8 = 0x0000002c, - ADDR_FMT_16_16_16 = 0x0000002d, - ADDR_FMT_16_16_16_FLOAT = 0x0000002e, - ADDR_FMT_32_32_32 = 0x0000002f, - ADDR_FMT_32_32_32_FLOAT = 0x00000030, - ADDR_FMT_BC1 = 0x00000031, - ADDR_FMT_BC2 = 0x00000032, - ADDR_FMT_BC3 = 0x00000033, - ADDR_FMT_BC4 = 0x00000034, - ADDR_FMT_BC5 = 0x00000035, - ADDR_FMT_BC6 = 0x00000036, - ADDR_FMT_BC7 = 0x00000037, - ADDR_FMT_32_AS_32_32_32_32 = 0x00000038, - ADDR_FMT_APC3 = 0x00000039, - ADDR_FMT_APC4 = 0x0000003a, - ADDR_FMT_APC5 = 0x0000003b, - ADDR_FMT_APC6 = 0x0000003c, - ADDR_FMT_APC7 = 0x0000003d, - ADDR_FMT_CTX1 = 0x0000003e, - ADDR_FMT_RESERVED_63 = 0x0000003f, - ADDR_FMT_ASTC_4x4 = 0x00000040, - ADDR_FMT_ASTC_5x4 = 0x00000041, - ADDR_FMT_ASTC_5x5 = 0x00000042, - ADDR_FMT_ASTC_6x5 = 0x00000043, - ADDR_FMT_ASTC_6x6 = 0x00000044, - ADDR_FMT_ASTC_8x5 = 0x00000045, - ADDR_FMT_ASTC_8x6 = 0x00000046, - ADDR_FMT_ASTC_8x8 = 0x00000047, - ADDR_FMT_ASTC_10x5 = 0x00000048, - ADDR_FMT_ASTC_10x6 = 0x00000049, - ADDR_FMT_ASTC_10x8 = 0x0000004a, - ADDR_FMT_ASTC_10x10 = 0x0000004b, - ADDR_FMT_ASTC_12x10 = 0x0000004c, - ADDR_FMT_ASTC_12x12 = 0x0000004d, - ADDR_FMT_ETC2_64BPP = 0x0000004e, - ADDR_FMT_ETC2_128BPP = 0x0000004f, -} AddrFormat; - -/** -**************************************************************************************************** -* AddrDepthFormat -* -* @brief -* Neutral enum for addrFlt32ToDepthPixel -* -**************************************************************************************************** -*/ -typedef enum _AddrDepthFormat -{ - ADDR_DEPTH_INVALID = 0x00000000, - ADDR_DEPTH_16 = 0x00000001, - ADDR_DEPTH_X8_24 = 0x00000002, - ADDR_DEPTH_8_24 = 0x00000003, - ADDR_DEPTH_X8_24_FLOAT = 0x00000004, - ADDR_DEPTH_8_24_FLOAT = 0x00000005, - ADDR_DEPTH_32_FLOAT = 0x00000006, - ADDR_DEPTH_X24_8_32_FLOAT = 0x00000007, - -} AddrDepthFormat; - -/** -**************************************************************************************************** -* AddrColorFormat -* -* @brief -* Neutral enum for ColorFormat -* -**************************************************************************************************** -*/ -typedef enum _AddrColorFormat -{ - ADDR_COLOR_INVALID = 0x00000000, - ADDR_COLOR_8 = 0x00000001, - ADDR_COLOR_4_4 = 0x00000002, - ADDR_COLOR_3_3_2 = 0x00000003, - ADDR_COLOR_RESERVED_4 = 0x00000004, - ADDR_COLOR_16 = 0x00000005, - ADDR_COLOR_16_FLOAT = 0x00000006, - ADDR_COLOR_8_8 = 0x00000007, - ADDR_COLOR_5_6_5 = 0x00000008, - ADDR_COLOR_6_5_5 = 0x00000009, - ADDR_COLOR_1_5_5_5 = 0x0000000a, - ADDR_COLOR_4_4_4_4 = 0x0000000b, - ADDR_COLOR_5_5_5_1 = 0x0000000c, - ADDR_COLOR_32 = 0x0000000d, - ADDR_COLOR_32_FLOAT = 0x0000000e, - ADDR_COLOR_16_16 = 0x0000000f, - ADDR_COLOR_16_16_FLOAT = 0x00000010, - ADDR_COLOR_8_24 = 0x00000011, - ADDR_COLOR_8_24_FLOAT = 0x00000012, - ADDR_COLOR_24_8 = 0x00000013, - ADDR_COLOR_24_8_FLOAT = 0x00000014, - ADDR_COLOR_10_11_11 = 0x00000015, - ADDR_COLOR_10_11_11_FLOAT = 0x00000016, - ADDR_COLOR_11_11_10 = 0x00000017, - ADDR_COLOR_11_11_10_FLOAT = 0x00000018, - ADDR_COLOR_2_10_10_10 = 0x00000019, - ADDR_COLOR_8_8_8_8 = 0x0000001a, - ADDR_COLOR_10_10_10_2 = 0x0000001b, - ADDR_COLOR_X24_8_32_FLOAT = 0x0000001c, - ADDR_COLOR_32_32 = 0x0000001d, - ADDR_COLOR_32_32_FLOAT = 0x0000001e, - ADDR_COLOR_16_16_16_16 = 0x0000001f, - ADDR_COLOR_16_16_16_16_FLOAT = 0x00000020, - ADDR_COLOR_RESERVED_33 = 0x00000021, - ADDR_COLOR_32_32_32_32 = 0x00000022, - ADDR_COLOR_32_32_32_32_FLOAT = 0x00000023, -} AddrColorFormat; - -/** -**************************************************************************************************** -* AddrSurfaceNumber -* -* @brief -* Neutral enum for SurfaceNumber -* -**************************************************************************************************** -*/ -typedef enum _AddrSurfaceNumber { - ADDR_NUMBER_UNORM = 0x00000000, - ADDR_NUMBER_SNORM = 0x00000001, - ADDR_NUMBER_USCALED = 0x00000002, - ADDR_NUMBER_SSCALED = 0x00000003, - ADDR_NUMBER_UINT = 0x00000004, - ADDR_NUMBER_SINT = 0x00000005, - ADDR_NUMBER_SRGB = 0x00000006, - ADDR_NUMBER_FLOAT = 0x00000007, -} AddrSurfaceNumber; - -/** -**************************************************************************************************** -* AddrSurfaceSwap -* -* @brief -* Neutral enum for SurfaceSwap -* -**************************************************************************************************** -*/ -typedef enum _AddrSurfaceSwap { - ADDR_SWAP_STD = 0x00000000, - ADDR_SWAP_ALT = 0x00000001, - ADDR_SWAP_STD_REV = 0x00000002, - ADDR_SWAP_ALT_REV = 0x00000003, -} AddrSurfaceSwap; - -/** -**************************************************************************************************** -* AddrHtileBlockSize -* -* @brief -* Size of HTILE blocks, valid values are 4 or 8 for now -**************************************************************************************************** -*/ -typedef enum _AddrHtileBlockSize -{ - ADDR_HTILE_BLOCKSIZE_4 = 4, - ADDR_HTILE_BLOCKSIZE_8 = 8, -} AddrHtileBlockSize; - - -/** -**************************************************************************************************** -* AddrPipeCfg -* -* @brief -* The pipe configuration field specifies both the number of pipes and -* how pipes are interleaved on the surface. -* The expression of number of pipes, the shader engine tile size, and packer tile size -* is encoded in a PIPE_CONFIG register field. -* In general the number of pipes usually matches the number of memory channels of the -* hardware configuration. -* For hw configurations w/ non-pow2 memory number of memory channels, it usually matches -* the number of ROP units(? TODO: which registers??) -* The enum value = hw enum + 1 which is to reserve 0 for requesting default. -**************************************************************************************************** -*/ -typedef enum _AddrPipeCfg -{ - ADDR_PIPECFG_INVALID = 0, - ADDR_PIPECFG_P2 = 1, /// 2 pipes, - ADDR_PIPECFG_P4_8x16 = 5, /// 4 pipes, - ADDR_PIPECFG_P4_16x16 = 6, - ADDR_PIPECFG_P4_16x32 = 7, - ADDR_PIPECFG_P4_32x32 = 8, - ADDR_PIPECFG_P8_16x16_8x16 = 9, /// 8 pipes - ADDR_PIPECFG_P8_16x32_8x16 = 10, - ADDR_PIPECFG_P8_32x32_8x16 = 11, - ADDR_PIPECFG_P8_16x32_16x16 = 12, - ADDR_PIPECFG_P8_32x32_16x16 = 13, - ADDR_PIPECFG_P8_32x32_16x32 = 14, - ADDR_PIPECFG_P8_32x64_32x32 = 15, - ADDR_PIPECFG_P16_32x32_8x16 = 17, /// 16 pipes - ADDR_PIPECFG_P16_32x32_16x16 = 18, - ADDR_PIPECFG_MAX = 19, -} AddrPipeCfg; - -/** -**************************************************************************************************** -* AddrTileType -* -* @brief -* Neutral enums that specifies micro tile type (MICRO_TILE_MODE) -**************************************************************************************************** -*/ -typedef enum _AddrTileType -{ - ADDR_DISPLAYABLE = 0, ///< Displayable tiling - ADDR_NON_DISPLAYABLE = 1, ///< Non-displayable tiling, a.k.a thin micro tiling - ADDR_DEPTH_SAMPLE_ORDER = 2, ///< Same as non-displayable plus depth-sample-order - ADDR_ROTATED = 3, ///< Rotated displayable tiling - ADDR_THICK = 4, ///< Thick micro-tiling, only valid for THICK and XTHICK -} AddrTileType; - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// -// Type definitions: short system-independent names for address library types -// -//////////////////////////////////////////////////////////////////////////////////////////////////// - -#if !defined(__APPLE__) || defined(HAVE_TSERVER) - -#ifndef BOOL_32 // no bool type in C -/// @brief Boolean type, since none is defined in C -/// @ingroup type -#define BOOL_32 int -#endif - -#ifndef INT_32 -#define INT_32 int -#endif - -#ifndef UINT_32 -#define UINT_32 unsigned int -#endif - -#ifndef INT_16 -#define INT_16 short -#endif - -#ifndef UINT_16 -#define UINT_16 unsigned short -#endif - -#ifndef INT_8 -#define INT_8 char -#endif - -#ifndef UINT_8 -#define UINT_8 unsigned char -#endif - -#ifndef NULL -#define NULL 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -// -// 64-bit integer types depend on the compiler -// -#if defined( __GNUC__ ) || defined( __WATCOMC__ ) -#define INT_64 long long -#define UINT_64 unsigned long long - -#elif defined( _WIN32 ) -#define INT_64 __int64 -#define UINT_64 unsigned __int64 - -#else -#error Unsupported compiler and/or operating system for 64-bit integers - -/// @brief 64-bit signed integer type (compiler dependent) -/// @ingroup type -/// -/// The addrlib defines a 64-bit signed integer type for either -/// Gnu/Watcom compilers (which use the first syntax) or for -/// the Windows VCC compiler (which uses the second syntax). -#define INT_64 long long OR __int64 - -/// @brief 64-bit unsigned integer type (compiler dependent) -/// @ingroup type -/// -/// The addrlib defines a 64-bit unsigned integer type for either -/// Gnu/Watcom compilers (which use the first syntax) or for -/// the Windows VCC compiler (which uses the second syntax). -/// -#define UINT_64 unsigned long long OR unsigned __int64 -#endif - -#endif // #if !defined(__APPLE__) || defined(HAVE_TSERVER) - -// ADDR64X is used to print addresses in hex form on both Windows and Linux -// -#if defined( __GNUC__ ) || defined( __WATCOMC__ ) -#define ADDR64X "llx" -#define ADDR64D "lld" - -#elif defined( _WIN32 ) -#define ADDR64X "I64x" -#define ADDR64D "I64d" - -#else -#error Unsupported compiler and/or operating system for 64-bit integers - -/// @brief Addrlib device address 64-bit printf tag (compiler dependent) -/// @ingroup type -/// -/// This allows printf to display an ADDR_64 for either the Windows VCC compiler -/// (which used this value) or the Gnu/Watcom compilers (which use "llx". -/// An example of use is printf("addr 0x%"ADDR64X"\n", address); -/// -#define ADDR64X "llx" OR "I64x" -#define ADDR64D "lld" OR "I64d" -#endif - - -/// @brief Union for storing a 32-bit float or 32-bit integer -/// @ingroup type -/// -/// This union provides a simple way to convert between a 32-bit float -/// and a 32-bit integer. It also prevents the compiler from producing -/// code that alters NaN values when assiging or coying floats. -/// Therefore, all address library routines that pass or return 32-bit -/// floating point data do so by passing or returning a FLT_32. -/// -typedef union { - INT_32 i; - UINT_32 u; - float f; -} ADDR_FLT_32; - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// -// Macros for controlling linking and building on multiple systems -// -//////////////////////////////////////////////////////////////////////////////////////////////////// -#if defined(_MSC_VER) -#if defined(va_copy) -#undef va_copy //redefine va_copy to support VC2013 -#endif -#endif - -#if !defined(va_copy) -#define va_copy(dst, src) \ - ((void) memcpy(&(dst), &(src), sizeof(va_list))) -#endif - -#endif // __ADDR_TYPES_H__ - diff -Nru mesa-18.3.3/src/amd/addrlib/amdgpu_asic_addr.h mesa-19.0.1/src/amd/addrlib/amdgpu_asic_addr.h --- mesa-18.3.3/src/amd/addrlib/amdgpu_asic_addr.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/amdgpu_asic_addr.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,138 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -#ifndef _AMDGPU_ASIC_ADDR_H -#define _AMDGPU_ASIC_ADDR_H - -#define ATI_VENDOR_ID 0x1002 -#define AMD_VENDOR_ID 0x1022 - -// AMDGPU_VENDOR_IS_AMD(vendorId) -#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID)) - -#define FAMILY_UNKNOWN 0x00 -#define FAMILY_TN 0x69 -#define FAMILY_SI 0x6E -#define FAMILY_CI 0x78 -#define FAMILY_KV 0x7D -#define FAMILY_VI 0x82 -#define FAMILY_POLARIS 0x82 -#define FAMILY_CZ 0x87 -#define FAMILY_AI 0x8D -#define FAMILY_RV 0x8E - -// AMDGPU_FAMILY_IS(familyId, familyName) -#define FAMILY_IS(f, fn) (f == FAMILY_##fn) -#define FAMILY_IS_TN(f) FAMILY_IS(f, TN) -#define FAMILY_IS_SI(f) FAMILY_IS(f, SI) -#define FAMILY_IS_CI(f) FAMILY_IS(f, CI) -#define FAMILY_IS_KV(f) FAMILY_IS(f, KV) -#define FAMILY_IS_VI(f) FAMILY_IS(f, VI) -#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS) -#define FAMILY_IS_CZ(f) FAMILY_IS(f, CZ) -#define FAMILY_IS_AI(f) FAMILY_IS(f, AI) -#define FAMILY_IS_RV(f) FAMILY_IS(f, RV) - -#define AMDGPU_UNKNOWN 0xFF - -#define AMDGPU_TAHITI_RANGE 0x05, 0x14 -#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28 -#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C -#define AMDGPU_OLAND_RANGE 0x3C, 0x46 -#define AMDGPU_HAINAN_RANGE 0x46, 0xFF - -#define AMDGPU_BONAIRE_RANGE 0x14, 0x28 -#define AMDGPU_HAWAII_RANGE 0x28, 0x3C - -#define AMDGPU_SPECTRE_RANGE 0x01, 0x41 -#define AMDGPU_SPOOKY_RANGE 0x41, 0x81 -#define AMDGPU_KALINDI_RANGE 0x81, 0xA1 -#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF - -#define AMDGPU_ICELAND_RANGE 0x01, 0x14 -#define AMDGPU_TONGA_RANGE 0x14, 0x28 -#define AMDGPU_FIJI_RANGE 0x3C, 0x50 - -#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A -#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64 -#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E -#define AMDGPU_VEGAM_RANGE 0x6E, 0xFF - -#define AMDGPU_CARRIZO_RANGE 0x01, 0x21 -#define AMDGPU_BRISTOL_RANGE 0x10, 0x21 -#define AMDGPU_STONEY_RANGE 0x61, 0xFF - -#define AMDGPU_VEGA10_RANGE 0x01, 0x14 -#define AMDGPU_VEGA12_RANGE 0x14, 0x28 -#define AMDGPU_VEGA20_RANGE 0x28, 0xFF - -#define AMDGPU_RAVEN_RANGE 0x01, 0x81 -#define AMDGPU_RAVEN2_RANGE 0x81, 0xFF - -#define AMDGPU_EXPAND_FIX(x) x -#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max)) -#define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__)) - - -// ASICREV_IS(eRevisionId, revisionName) -#define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE) -#define ASICREV_IS_TAHITI_P(r) ASICREV_IS(r, TAHITI) -#define ASICREV_IS_PITCAIRN_PM(r) ASICREV_IS(r, PITCAIRN) -#define ASICREV_IS_CAPEVERDE_M(r) ASICREV_IS(r, CAPEVERDE) -#define ASICREV_IS_OLAND_M(r) ASICREV_IS(r, OLAND) -#define ASICREV_IS_HAINAN_V(r) ASICREV_IS(r, HAINAN) - -#define ASICREV_IS_BONAIRE_M(r) ASICREV_IS(r, BONAIRE) -#define ASICREV_IS_HAWAII_P(r) ASICREV_IS(r, HAWAII) - -#define ASICREV_IS_SPECTRE(r) ASICREV_IS(r, SPECTRE) -#define ASICREV_IS_SPOOKY(r) ASICREV_IS(r, SPOOKY) -#define ASICREV_IS_KALINDI(r) ASICREV_IS(r, KALINDI) -#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI) - -#define ASICREV_IS_ICELAND_M(r) ASICREV_IS(r, ICELAND) -#define ASICREV_IS_TONGA_P(r) ASICREV_IS(r, TONGA) -#define ASICREV_IS_FIJI_P(r) ASICREV_IS(r, FIJI) - -#define ASICREV_IS_POLARIS10_P(r) ASICREV_IS(r, POLARIS10) -#define ASICREV_IS_POLARIS11_M(r) ASICREV_IS(r, POLARIS11) -#define ASICREV_IS_POLARIS12_V(r) ASICREV_IS(r, POLARIS12) -#define ASICREV_IS_VEGAM_P(r) ASICREV_IS(r, VEGAM) - -#define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO) -#define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL) -#define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY) - -#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10) -#define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10) -#define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12) -#define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12) -#define ASICREV_IS_VEGA20_P(r) ASICREV_IS(r, VEGA20) - -#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN) -#define ASICREV_IS_RAVEN2(r) ASICREV_IS(r, RAVEN2) - -#endif // _AMDGPU_ASIC_ADDR_H diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrcommon.h mesa-19.0.1/src/amd/addrlib/core/addrcommon.h --- mesa-18.3.3/src/amd/addrlib/core/addrcommon.h 2017-12-02 01:35:56.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrcommon.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,924 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrcommon.h -* @brief Contains the helper function and constants. -**************************************************************************************************** -*/ - -#ifndef __ADDR_COMMON_H__ -#define __ADDR_COMMON_H__ - -#include "addrinterface.h" - -#include -#include -#include - -#if !defined(DEBUG) -#ifdef NDEBUG -#define DEBUG 0 -#else -#define DEBUG 1 -#endif -#endif - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Platform specific debug break defines -//////////////////////////////////////////////////////////////////////////////////////////////////// -#if DEBUG - #if defined(__GNUC__) - #define ADDR_DBG_BREAK() assert(false) - #elif defined(__APPLE__) - #define ADDR_DBG_BREAK() { IOPanic("");} - #else - #define ADDR_DBG_BREAK() { __debugbreak(); } - #endif -#else - #define ADDR_DBG_BREAK() -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Debug assertions used in AddrLib -//////////////////////////////////////////////////////////////////////////////////////////////////// -#if defined(_WIN32) && (_MSC_VER >= 1400) - #define ADDR_ANALYSIS_ASSUME(expr) __analysis_assume(expr) -#else - #define ADDR_ANALYSIS_ASSUME(expr) do { (void)(expr); } while (0) -#endif - -#define ADDR_ASSERT(__e) assert(__e) -#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK() -#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case") -#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented"); -//////////////////////////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Debug print macro from legacy address library -//////////////////////////////////////////////////////////////////////////////////////////////////// -#if DEBUG - -#define ADDR_PRNT(a) Object::DebugPrint a - -/// @brief Macro for reporting informational messages -/// @ingroup util -/// -/// This macro optionally prints an informational message to stdout. -/// The first parameter is a condition -- if it is true, nothing is done. -/// The second pararmeter MUST be a parenthesis-enclosed list of arguments, -/// starting with a string. This is passed to printf() or an equivalent -/// in order to format the informational message. For example, -/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3". -/// -#define ADDR_INFO(cond, a) \ -{ if (!(cond)) { ADDR_PRNT(a); } } - - -/// @brief Macro for reporting error warning messages -/// @ingroup util -/// -/// This macro optionally prints an error warning message to stdout, -/// followed by the file name and line number where the macro was called. -/// The first parameter is a condition -- if it is true, nothing is done. -/// The second pararmeter MUST be a parenthesis-enclosed list of arguments, -/// starting with a string. This is passed to printf() or an equivalent -/// in order to format the informational message. For example, -/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by -/// a second line with the file name and line number. -/// -#define ADDR_WARN(cond, a) \ -{ if (!(cond)) \ - { ADDR_PRNT(a); \ - ADDR_PRNT((" WARNING in file %s, line %d\n", __FILE__, __LINE__)); \ -} } - - -/// @brief Macro for reporting fatal error conditions -/// @ingroup util -/// -/// This macro optionally stops execution of the current routine -/// after printing an error warning message to stdout, -/// followed by the file name and line number where the macro was called. -/// The first parameter is a condition -- if it is true, nothing is done. -/// The second pararmeter MUST be a parenthesis-enclosed list of arguments, -/// starting with a string. This is passed to printf() or an equivalent -/// in order to format the informational message. For example, -/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by -/// a second line with the file name and line number, then stops execution. -/// -#define ADDR_EXIT(cond, a) \ -{ if (!(cond)) \ - { ADDR_PRNT(a); ADDR_DBG_BREAK();\ -} } - -#else // DEBUG - -#define ADDRDPF 1 ? (void)0 : (void) - -#define ADDR_PRNT(a) - -#define ADDR_DBG_BREAK() - -#define ADDR_INFO(cond, a) - -#define ADDR_WARN(cond, a) - -#define ADDR_EXIT(cond, a) - -#endif // DEBUG -//////////////////////////////////////////////////////////////////////////////////////////////////// - -#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1] - -namespace Addr -{ - -namespace V1 -{ -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Common constants -//////////////////////////////////////////////////////////////////////////////////////////////////// -static const UINT_32 MicroTileWidth = 8; ///< Micro tile width, for 1D and 2D tiling -static const UINT_32 MicroTileHeight = 8; ///< Micro tile height, for 1D and 2D tiling -static const UINT_32 ThickTileThickness = 4; ///< Micro tile thickness, for THICK modes -static const UINT_32 XThickTileThickness = 8; ///< Extra thick tiling thickness -static const UINT_32 PowerSaveTileBytes = 64; ///< Nuber of bytes per tile for power save 64 -static const UINT_32 CmaskCacheBits = 1024; ///< Number of bits for CMASK cache -static const UINT_32 CmaskElemBits = 4; ///< Number of bits for CMASK element -static const UINT_32 HtileCacheBits = 16384; ///< Number of bits for HTILE cache 512*32 - -static const UINT_32 MicroTilePixels = MicroTileWidth * MicroTileHeight; - -static const INT_32 TileIndexInvalid = TILEINDEX_INVALID; -static const INT_32 TileIndexLinearGeneral = TILEINDEX_LINEAR_GENERAL; -static const INT_32 TileIndexNoMacroIndex = -3; - -} // V1 - -namespace V2 -{ -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Common constants -//////////////////////////////////////////////////////////////////////////////////////////////////// -static const UINT_32 MaxSurfaceHeight = 16384; - -} // V2 - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Common macros -//////////////////////////////////////////////////////////////////////////////////////////////////// -#define BITS_PER_BYTE 8 -#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE ) -#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE ) - -/// Helper macros to select a single bit from an int (undefined later in section) -#define _BIT(v,b) (((v) >> (b) ) & 1) - -/** -**************************************************************************************************** -* @brief Enums to identify AddrLib type -**************************************************************************************************** -*/ -enum LibClass -{ - BASE_ADDRLIB = 0x0, - R600_ADDRLIB = 0x6, - R800_ADDRLIB = 0x8, - SI_ADDRLIB = 0xa, - CI_ADDRLIB = 0xb, - AI_ADDRLIB = 0xd, -}; - -/** -**************************************************************************************************** -* ChipFamily -* -* @brief -* Neutral enums that specifies chip family. -* -**************************************************************************************************** -*/ -enum ChipFamily -{ - ADDR_CHIP_FAMILY_IVLD, ///< Invalid family - ADDR_CHIP_FAMILY_R6XX, - ADDR_CHIP_FAMILY_R7XX, - ADDR_CHIP_FAMILY_R8XX, - ADDR_CHIP_FAMILY_NI, - ADDR_CHIP_FAMILY_SI, - ADDR_CHIP_FAMILY_CI, - ADDR_CHIP_FAMILY_VI, - ADDR_CHIP_FAMILY_AI, -}; - -/** -**************************************************************************************************** -* ConfigFlags -* -* @brief -* This structure is used to set configuration flags. -**************************************************************************************************** -*/ -union ConfigFlags -{ - struct - { - /// These flags are set up internally thru AddrLib::Create() based on ADDR_CREATE_FLAGS - UINT_32 optimalBankSwap : 1; ///< New bank tiling for RV770 only - UINT_32 noCubeMipSlicesPad : 1; ///< Disables faces padding for cubemap mipmaps - UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and - /// output structure - UINT_32 ignoreTileInfo : 1; ///< Don't use tile info structure - UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid - UINT_32 useCombinedSwizzle : 1; ///< Use combined swizzle - UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level - UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment - UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize - UINT_32 disableLinearOpt : 1; ///< Disallow tile modes to be optimized to linear - UINT_32 reserved : 22; ///< Reserved bits for future use - }; - - UINT_32 value; -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Misc helper functions -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* AddrXorReduce -* -* @brief -* Xor the right-side numberOfBits bits of x. -**************************************************************************************************** -*/ -static inline UINT_32 XorReduce( - UINT_32 x, - UINT_32 numberOfBits) -{ - UINT_32 i; - UINT_32 result = x & 1; - - for (i=1; i>i) & 1); - } - - return result; -} - -/** -**************************************************************************************************** -* IsPow2 -* -* @brief -* Check if the size (UINT_32) is pow 2 -**************************************************************************************************** -*/ -static inline UINT_32 IsPow2( - UINT_32 dim) ///< [in] dimension of miplevel -{ - ADDR_ASSERT(dim > 0); - return !(dim & (dim - 1)); -} - -/** -**************************************************************************************************** -* IsPow2 -* -* @brief -* Check if the size (UINT_64) is pow 2 -**************************************************************************************************** -*/ -static inline UINT_64 IsPow2( - UINT_64 dim) ///< [in] dimension of miplevel -{ - ADDR_ASSERT(dim > 0); - return !(dim & (dim - 1)); -} - -/** -**************************************************************************************************** -* ByteAlign -* -* @brief -* Align UINT_32 "x" to "align" alignment, "align" should be power of 2 -**************************************************************************************************** -*/ -static inline UINT_32 PowTwoAlign( - UINT_32 x, - UINT_32 align) -{ - // - // Assert that x is a power of two. - // - ADDR_ASSERT(IsPow2(align)); - return (x + (align - 1)) & (~(align - 1)); -} - -/** -**************************************************************************************************** -* ByteAlign -* -* @brief -* Align UINT_64 "x" to "align" alignment, "align" should be power of 2 -**************************************************************************************************** -*/ -static inline UINT_64 PowTwoAlign( - UINT_64 x, - UINT_64 align) -{ - // - // Assert that x is a power of two. - // - ADDR_ASSERT(IsPow2(align)); - return (x + (align - 1)) & (~(align - 1)); -} - -/** -**************************************************************************************************** -* Min -* -* @brief -* Get the min value between two unsigned values -**************************************************************************************************** -*/ -static inline UINT_32 Min( - UINT_32 value1, - UINT_32 value2) -{ - return ((value1 < (value2)) ? (value1) : value2); -} - -/** -**************************************************************************************************** -* Min -* -* @brief -* Get the min value between two signed values -**************************************************************************************************** -*/ -static inline INT_32 Min( - INT_32 value1, - INT_32 value2) -{ - return ((value1 < (value2)) ? (value1) : value2); -} - -/** -**************************************************************************************************** -* Max -* -* @brief -* Get the max value between two unsigned values -**************************************************************************************************** -*/ -static inline UINT_32 Max( - UINT_32 value1, - UINT_32 value2) -{ - return ((value1 > (value2)) ? (value1) : value2); -} - -/** -**************************************************************************************************** -* Max -* -* @brief -* Get the max value between two signed values -**************************************************************************************************** -*/ -static inline INT_32 Max( - INT_32 value1, - INT_32 value2) -{ - return ((value1 > (value2)) ? (value1) : value2); -} - -/** -**************************************************************************************************** -* NextPow2 -* -* @brief -* Compute the mipmap's next level dim size -**************************************************************************************************** -*/ -static inline UINT_32 NextPow2( - UINT_32 dim) ///< [in] dimension of miplevel -{ - UINT_32 newDim = 1; - - if (dim > 0x7fffffff) - { - ADDR_ASSERT_ALWAYS(); - newDim = 0x80000000; - } - else - { - while (newDim < dim) - { - newDim <<= 1; - } - } - - return newDim; -} - -/** -**************************************************************************************************** -* Log2NonPow2 -* -* @brief -* Compute log of base 2 no matter the target is power of 2 or not -**************************************************************************************************** -*/ -static inline UINT_32 Log2NonPow2( - UINT_32 x) ///< [in] the value should calculate log based 2 -{ - UINT_32 y; - - y = 0; - while (x > 1) - { - x >>= 1; - y++; - } - - return y; -} - -/** -**************************************************************************************************** -* Log2 -* -* @brief -* Compute log of base 2 -**************************************************************************************************** -*/ -static inline UINT_32 Log2( - UINT_32 x) ///< [in] the value should calculate log based 2 -{ - // Assert that x is a power of two. - ADDR_ASSERT(IsPow2(x)); - - return Log2NonPow2(x); -} - -/** -**************************************************************************************************** -* QLog2 -* -* @brief -* Compute log of base 2 quickly (<= 16) -**************************************************************************************************** -*/ -static inline UINT_32 QLog2( - UINT_32 x) ///< [in] the value should calculate log based 2 -{ - ADDR_ASSERT(x <= 16); - - UINT_32 y = 0; - - switch (x) - { - case 1: - y = 0; - break; - case 2: - y = 1; - break; - case 4: - y = 2; - break; - case 8: - y = 3; - break; - case 16: - y = 4; - break; - default: - ADDR_ASSERT_ALWAYS(); - } - - return y; -} - -/** -**************************************************************************************************** -* SafeAssign -* -* @brief -* NULL pointer safe assignment -**************************************************************************************************** -*/ -static inline VOID SafeAssign( - UINT_32* pLVal, ///< [in] Pointer to left val - UINT_32 rVal) ///< [in] Right value -{ - if (pLVal) - { - *pLVal = rVal; - } -} - -/** -**************************************************************************************************** -* SafeAssign -* -* @brief -* NULL pointer safe assignment for 64bit values -**************************************************************************************************** -*/ -static inline VOID SafeAssign( - UINT_64* pLVal, ///< [in] Pointer to left val - UINT_64 rVal) ///< [in] Right value -{ - if (pLVal) - { - *pLVal = rVal; - } -} - -/** -**************************************************************************************************** -* SafeAssign -* -* @brief -* NULL pointer safe assignment for AddrTileMode -**************************************************************************************************** -*/ -static inline VOID SafeAssign( - AddrTileMode* pLVal, ///< [in] Pointer to left val - AddrTileMode rVal) ///< [in] Right value -{ - if (pLVal) - { - *pLVal = rVal; - } -} - -/** -**************************************************************************************************** -* RoundHalf -* -* @brief -* return (x + 1) / 2 -**************************************************************************************************** -*/ -static inline UINT_32 RoundHalf( - UINT_32 x) ///< [in] input value -{ - ADDR_ASSERT(x != 0); - -#if 1 - return (x >> 1) + (x & 1); -#else - return (x + 1) >> 1; -#endif -} - -/** -**************************************************************************************************** -* SumGeo -* -* @brief -* Calculate sum of a geometric progression whose ratio is 1/2 -**************************************************************************************************** -*/ -static inline UINT_32 SumGeo( - UINT_32 base, ///< [in] First term in the geometric progression - UINT_32 num) ///< [in] Number of terms to be added into sum -{ - ADDR_ASSERT(base > 0); - - UINT_32 sum = 0; - UINT_32 i = 0; - for (; (i < num) && (base > 1); i++) - { - sum += base; - base = RoundHalf(base); - } - sum += num - i; - - return sum; -} - -/** -**************************************************************************************************** -* GetBit -* -* @brief -* Extract bit N value (0 or 1) of a UINT32 value. -**************************************************************************************************** -*/ -static inline UINT_32 GetBit( - UINT_32 u32, ///< [in] UINT32 value - UINT_32 pos) ///< [in] bit position from LSB, valid range is [0..31] -{ - ADDR_ASSERT(pos <= 31); - - return (u32 >> pos) & 0x1; -} - -/** -**************************************************************************************************** -* GetBits -* -* @brief -* Copy 'bitsNum' bits from src start from srcStartPos into destination from dstStartPos -* srcStartPos: 0~31 for UINT_32 -* bitsNum : 1~32 for UINT_32 -* srcStartPos: 0~31 for UINT_32 -* src start position -* | -* src : b[31] b[30] b[29] ... ... ... ... ... ... ... ... b[end]..b[beg] ... b[1] b[0] -* || Bits num || copy length || Bits num || -* dst : b[31] b[30] b[29] ... b[end]..b[beg] ... ... ... ... ... ... ... ... b[1] b[0] -* | -* dst start position -**************************************************************************************************** -*/ -static inline UINT_32 GetBits( - UINT_32 src, - UINT_32 srcStartPos, - UINT_32 bitsNum, - UINT_32 dstStartPos) -{ - ADDR_ASSERT((srcStartPos < 32) && (dstStartPos < 32) && (bitsNum > 0)); - ADDR_ASSERT((bitsNum + dstStartPos <= 32) && (bitsNum + srcStartPos <= 32)); - - return ((src >> srcStartPos) << (32 - bitsNum)) >> (32 - bitsNum - dstStartPos); -} - -/** -**************************************************************************************************** -* MortonGen2d -* -* @brief -* Generate 2D Morton interleave code with num lowest bits in each channel -**************************************************************************************************** -*/ -static inline UINT_32 MortonGen2d( - UINT_32 x, ///< [in] First channel - UINT_32 y, ///< [in] Second channel - UINT_32 num) ///< [in] Number of bits extracted from each channel -{ - UINT_32 mort = 0; - - for (UINT_32 i = 0; i < num; i++) - { - mort |= (GetBit(y, i) << (2 * i)); - mort |= (GetBit(x, i) << (2 * i + 1)); - } - - return mort; -} - -/** -**************************************************************************************************** -* MortonGen3d -* -* @brief -* Generate 3D Morton interleave code with num lowest bits in each channel -**************************************************************************************************** -*/ -static inline UINT_32 MortonGen3d( - UINT_32 x, ///< [in] First channel - UINT_32 y, ///< [in] Second channel - UINT_32 z, ///< [in] Third channel - UINT_32 num) ///< [in] Number of bits extracted from each channel -{ - UINT_32 mort = 0; - - for (UINT_32 i = 0; i < num; i++) - { - mort |= (GetBit(z, i) << (3 * i)); - mort |= (GetBit(y, i) << (3 * i + 1)); - mort |= (GetBit(x, i) << (3 * i + 2)); - } - - return mort; -} - -/** -**************************************************************************************************** -* ReverseBitVector -* -* @brief -* Return reversed lowest num bits of v: v[0]v[1]...v[num-2]v[num-1] -**************************************************************************************************** -*/ -static inline UINT_32 ReverseBitVector( - UINT_32 v, ///< [in] Reverse operation base value - UINT_32 num) ///< [in] Number of bits used in reverse operation -{ - UINT_32 reverse = 0; - - for (UINT_32 i = 0; i < num; i++) - { - reverse |= (GetBit(v, num - 1 - i) << i); - } - - return reverse; -} - -/** -**************************************************************************************************** -* FoldXor2d -* -* @brief -* Xor bit vector v[num-1]v[num-2]...v[1]v[0] with v[num]v[num+1]...v[2*num-2]v[2*num-1] -**************************************************************************************************** -*/ -static inline UINT_32 FoldXor2d( - UINT_32 v, ///< [in] Xor operation base value - UINT_32 num) ///< [in] Number of bits used in fold xor operation -{ - return (v & ((1 << num) - 1)) ^ ReverseBitVector(v >> num, num); -} - -/** -**************************************************************************************************** -* DeMort -* -* @brief -* Return v[0] | v[2] | v[4] | v[6]... | v[2*num - 2] -**************************************************************************************************** -*/ -static inline UINT_32 DeMort( - UINT_32 v, ///< [in] DeMort operation base value - UINT_32 num) ///< [in] Number of bits used in fold DeMort operation -{ - UINT_32 d = 0; - - for (UINT_32 i = 0; i < num; i++) - { - d |= ((v & (1 << (i << 1))) >> i); - } - - return d; -} - -/** -**************************************************************************************************** -* FoldXor3d -* -* @brief -* v[0]...v[num-1] ^ v[3*num-1]v[3*num-3]...v[num+2]v[num] ^ v[3*num-2]...v[num+1]v[num-1] -**************************************************************************************************** -*/ -static inline UINT_32 FoldXor3d( - UINT_32 v, ///< [in] Xor operation base value - UINT_32 num) ///< [in] Number of bits used in fold xor operation -{ - UINT_32 t = v & ((1 << num) - 1); - t ^= ReverseBitVector(DeMort(v >> num, num), num); - t ^= ReverseBitVector(DeMort(v >> (num + 1), num), num); - - return t; -} - -/** -**************************************************************************************************** -* InitChannel -* -* @brief -* Set channel initialization value via a return value -**************************************************************************************************** -*/ -static inline ADDR_CHANNEL_SETTING InitChannel( - UINT_32 valid, ///< [in] valid setting - UINT_32 channel, ///< [in] channel setting - UINT_32 index) ///< [in] index setting -{ - ADDR_CHANNEL_SETTING t; - t.valid = valid; - t.channel = channel; - t.index = index; - - return t; -} - -/** -**************************************************************************************************** -* InitChannel -* -* @brief -* Set channel initialization value via channel pointer -**************************************************************************************************** -*/ -static inline VOID InitChannel( - UINT_32 valid, ///< [in] valid setting - UINT_32 channel, ///< [in] channel setting - UINT_32 index, ///< [in] index setting - ADDR_CHANNEL_SETTING *pChanSet) ///< [out] channel setting to be initialized -{ - pChanSet->valid = valid; - pChanSet->channel = channel; - pChanSet->index = index; -} - - -/** -**************************************************************************************************** -* InitChannel -* -* @brief -* Set channel initialization value via another channel -**************************************************************************************************** -*/ -static inline VOID InitChannel( - ADDR_CHANNEL_SETTING *pChanDst, ///< [in] channel setting to be copied from - ADDR_CHANNEL_SETTING *pChanSrc) ///< [out] channel setting to be initialized -{ - pChanDst->valid = pChanSrc->valid; - pChanDst->channel = pChanSrc->channel; - pChanDst->index = pChanSrc->index; -} - -/** -**************************************************************************************************** -* GetMaxValidChannelIndex -* -* @brief -* Get max valid index for a specific channel -**************************************************************************************************** -*/ -static inline UINT_32 GetMaxValidChannelIndex( - const ADDR_CHANNEL_SETTING *pChanSet, ///< [in] channel setting to be initialized - UINT_32 searchCount,///< [in] number of channel setting to be searched - UINT_32 channel) ///< [in] channel to be searched -{ - UINT_32 index = 0; - - for (UINT_32 i = 0; i < searchCount; i++) - { - if (pChanSet[i].valid && (pChanSet[i].channel == channel)) - { - index = Max(index, static_cast(pChanSet[i].index)); - } - } - - return index; -} - -/** -**************************************************************************************************** -* GetCoordActiveMask -* -* @brief -* Get bit mask which indicates which positions in the equation match the target coord -**************************************************************************************************** -*/ -static inline UINT_32 GetCoordActiveMask( - const ADDR_CHANNEL_SETTING *pChanSet, ///< [in] channel setting to be initialized - UINT_32 searchCount,///< [in] number of channel setting to be searched - UINT_32 channel, ///< [in] channel to be searched - UINT_32 index) ///< [in] index to be searched -{ - UINT_32 mask = 0; - - for (UINT_32 i = 0; i < searchCount; i++) - { - if ((pChanSet[i].valid == TRUE) && - (pChanSet[i].channel == channel) && - (pChanSet[i].index == index)) - { - mask |= (1 << i); - } - } - - return mask; -} - -} // Addr - -#endif // __ADDR_COMMON_H__ - diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrelemlib.cpp mesa-19.0.1/src/amd/addrlib/core/addrelemlib.cpp --- mesa-18.3.3/src/amd/addrlib/core/addrelemlib.cpp 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrelemlib.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,1843 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrelemlib.cpp -* @brief Contains the class implementation for element/pixel related functions. -**************************************************************************************************** -*/ - -#include "addrelemlib.h" -#include "addrlib.h" - -namespace Addr -{ - -/** -**************************************************************************************************** -* ElemLib::ElemLib -* -* @brief -* constructor -* -* @return -* N/A -**************************************************************************************************** -*/ -ElemLib::ElemLib( - Lib* pAddrLib) ///< [in] Parent addrlib instance pointer - : - Object(pAddrLib->GetClient()), - m_pAddrLib(pAddrLib) -{ - switch (m_pAddrLib->GetChipFamily()) - { - case ADDR_CHIP_FAMILY_R6XX: - m_depthPlanarType = ADDR_DEPTH_PLANAR_R600; - m_fp16ExportNorm = 0; - break; - case ADDR_CHIP_FAMILY_R7XX: - m_depthPlanarType = ADDR_DEPTH_PLANAR_R600; - m_fp16ExportNorm = 1; - break; - case ADDR_CHIP_FAMILY_R8XX: - case ADDR_CHIP_FAMILY_NI: // Same as 8xx - m_depthPlanarType = ADDR_DEPTH_PLANAR_R800; - m_fp16ExportNorm = 1; - break; - default: - m_fp16ExportNorm = 1; - m_depthPlanarType = ADDR_DEPTH_PLANAR_R800; - } - - m_configFlags.value = 0; -} - -/** -**************************************************************************************************** -* ElemLib::~ElemLib -* -* @brief -* destructor -* -* @return -* N/A -**************************************************************************************************** -*/ -ElemLib::~ElemLib() -{ -} - -/** -**************************************************************************************************** -* ElemLib::Create -* -* @brief -* Creates and initializes AddrLib object. -* -* @return -* Returns point to ADDR_CREATEINFO if successful. -**************************************************************************************************** -*/ -ElemLib* ElemLib::Create( - const Lib* pAddrLib) ///< [in] Pointer of parent AddrLib instance -{ - ElemLib* pElemLib = NULL; - - if (pAddrLib) - { - VOID* pObj = Object::ClientAlloc(sizeof(ElemLib), pAddrLib->GetClient()); - if (pObj) - { - pElemLib = new(pObj) ElemLib(const_cast(pAddrLib)); - } - } - - return pElemLib; -} - -/************************************************************************************************** -* ElemLib::Flt32sToInt32s -* -* @brief -* Convert a ADDR_FLT_32 value to Int32 value -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID ElemLib::Flt32sToInt32s( - ADDR_FLT_32 value, ///< [in] ADDR_FLT_32 value - UINT_32 bits, ///< [in] nubmer of bits in value - NumberType numberType, ///< [in] the type of number - UINT_32* pResult) ///< [out] Int32 value -{ - UINT_8 round = 128; //ADDR_ROUND_BY_HALF - UINT_32 uscale; - UINT_32 sign; - - //convert each component to an INT_32 - switch ( numberType ) - { - case ADDR_NO_NUMBER: //fall through - case ADDR_ZERO: //fall through - case ADDR_ONE: //fall through - case ADDR_EPSILON: //fall through - return; // these are zero-bit components, so don't set result - - case ADDR_UINT_BITS: // unsigned integer bit field, clamped to range - uscale = (1< uscale)) - { - *pResult = uscale; - } - else - { - *pResult = value.i; - } - return; - } - - // The algorithm used in the DB and TX differs at one value for 24-bit unorms - case ADDR_UNORM_R6XXDB: // unsigned repeating fraction - if ((bits==24) && (value.i == 0x33000000)) - { - *pResult = 1; - return; - } // Else treat like ADDR_UNORM_R6XX - - case ADDR_UNORM_R6XX: // unsigned repeating fraction - if (value.f <= 0) - { - *pResult = 0; // first clamp to [0..1] - } - else - { - if (value.f >= 1) - { - *pResult = (1<(f + (round/256.0f)); - } - #endif - else - { - ADDR_FLT_32 scaled; - ADDR_FLT_32 shifted; - UINT_64 truncated, rounded; - UINT_32 altShift; - UINT_32 mask = (1 << bits) - 1; - UINT_32 half = 1 << (bits - 1); - UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000; - UINT_64 temp = mant24 - (mant24>>bits) - - static_cast((mant24 & mask) > half); - UINT_32 exp8 = value.i >> 23; - UINT_32 shift = 126 - exp8 + 24 - bits; - UINT_64 final; - - if (shift >= 32) // This is zero, even with maximum dither add - { - final = 0; - } - else - { - final = ((temp<<8) + (static_cast(round)<> (shift+8); - } - //ADDR_EXIT( *pResult == final, - // ("Float %x converted to %d-bit Unorm %x != bitwise %x", - // value.u, bits, (UINT_32)*pResult, (UINT_32)final) ); - if (final > mask) - { - final = mask; - } - - scaled.f = value.f * ((1<>23)&0xFF); - truncated = (altShift > 60) ? 0 : truncated >> altShift; - rounded = static_cast((round + truncated) >> 8); - //if (rounded > ((1<(rounded); //(INT_32)final; - } - } - } - - return; - - case ADDR_S8FLOAT32: // 32-bit IEEE float, passes through NaN values - *pResult = value.i; - return; - - // @@ FIX ROUNDING in this code, fix the denorm case - case ADDR_U4FLOATC: // Unsigned float, 4-bit exponent. bias 15, clamped [0..1] - sign = (value.i >> 31) & 1; - if ((value.i&0x7F800000) == 0x7F800000) // If NaN or INF: - { - if ((value.i&0x007FFFFF) != 0) // then if NaN - { - *pResult = 0; // return 0 - } - else - { - *pResult = (sign)?0:0xF00000; // else +INF->+1, -INF->0 - } - return; - } - if (value.f <= 0) - { - *pResult = 0; - } - else - { - if (value.f>=1) - { - *pResult = 0xF << (bits-4); - } - else - { - if ((value.i>>23) > 112 ) - { - // 24-bit float: normalized - // value.i += 1 << (22-bits+4); - // round the IEEE mantissa to mantissa size - // @@ NOTE: add code to support rounding - value.u &= 0x7FFFFFF; // mask off high 4 exponent bits - *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits - } - else - { - // 24-bit float: denormalized - value.f = value.f / (1<<28) / (1<<28); - value.f = value.f / (1<<28) / (1<<28); // convert to IEEE denorm - // value.i += 1 << (22-bits+4); - // round the IEEE mantissa to mantissa size - // @@ NOTE: add code to support rounding - *pResult = value.i >> (23-bits+4); // shift off unused mantissa bits - } - } - } - - return; - - default: // invalid number mode - //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) ); - break; - - } -} - -/** -**************************************************************************************************** -* ElemLib::Int32sToPixel -* -* @brief -* Pack 32-bit integer values into an uncompressed pixel, -* in the proper order -* -* @return -* N/A -* -* @note -* This entry point packes four 32-bit integer values into -* an uncompressed pixel. The pixel values are specifies in -* standard order, e.g. depth/stencil. This routine asserts -* if called on compressed pixel. -**************************************************************************************************** -*/ -VOID ElemLib::Int32sToPixel( - UINT_32 numComps, ///< [in] number of components - UINT_32* pComps, ///< [in] compnents - UINT_32* pCompBits, ///< [in] total bits in each component - UINT_32* pCompStart, ///< [in] the first bit position of each component - ComponentFlags properties, ///< [in] properties about byteAligned, exportNorm - UINT_32 resultBits, ///< [in] result bits: total bpp after decompression - UINT_8* pPixel) ///< [out] a depth/stencil pixel value -{ - UINT_32 i; - UINT_32 j; - UINT_32 start; - UINT_32 size; - UINT_32 byte; - UINT_32 value = 0; - UINT_32 compMask; - UINT_32 elemMask=0; - UINT_32 elementXor = 0; // address xor when reading bytes from elements - - - // @@ NOTE: assert if called on a compressed format! - - if (properties.byteAligned) // Components are all byte-sized - { - for (i = 0; i < numComps; i++) // Then for each component - { - // Copy the bytes of the component into the element - start = pCompStart[i] / 8; - size = pCompBits[i] / 8; - for (j = 0; j < size; j++) - { - pPixel[(j+start)^elementXor] = static_cast(pComps[i] >> (8*j)); - } - } - } - else // Element is 32-bits or less, components are bit fields - { - // First, extract each component in turn and combine it into a 32-bit value - for (i = 0; i < numComps; i++) - { - compMask = (1 << pCompBits[i]) - 1; - elemMask |= compMask << pCompStart[i]; - value |= (pComps[i] & compMask) << pCompStart[i]; - } - - // Mext, copy the masked value into the element - size = (resultBits + 7) / 8; - for (i = 0; i < size; i++) - { - byte = pPixel[i^elementXor] & ~(elemMask >> (8*i)); - pPixel[i^elementXor] = static_cast(byte | ((elemMask & value) >> (8*i))); - } - } -} - -/** -**************************************************************************************************** -* Flt32ToDepthPixel -* -* @brief -* Convert a FLT_32 value to a depth/stencil pixel value -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID ElemLib::Flt32ToDepthPixel( - AddrDepthFormat format, ///< [in] Depth format - const ADDR_FLT_32 comps[2], ///< [in] two components of depth - UINT_8* pPixel ///< [out] depth pixel value - ) const -{ - UINT_32 i; - UINT_32 values[2]; - ComponentFlags properties; // byteAligned, exportNorm - UINT_32 resultBits = 0; // result bits: total bits per pixel after decompression - - PixelFormatInfo fmt; - - // get type for each component - PixGetDepthCompInfo(format, &fmt); - - //initialize properties - properties.byteAligned = TRUE; - properties.exportNorm = TRUE; - properties.floatComp = FALSE; - - //set properties and result bits - for (i = 0; i < 2; i++) - { - if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7)) - { - properties.byteAligned = FALSE; - } - - if (resultBits < fmt.compStart[i] + fmt.compBit[i]) - { - resultBits = fmt.compStart[i] + fmt.compBit[i]; - } - - // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format - if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED) - { - properties.exportNorm = FALSE; - } - - // Mark if there are any floating point components - if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) ) - { - properties.floatComp = TRUE; - } - } - - // Convert the two input floats to integer values - for (i = 0; i < 2; i++) - { - Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]); - } - - // Then pack the two integer components, in the proper order - Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel ); - -} - -/** -**************************************************************************************************** -* Flt32ToColorPixel -* -* @brief -* Convert a FLT_32 value to a red/green/blue/alpha pixel value -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID ElemLib::Flt32ToColorPixel( - AddrColorFormat format, ///< [in] Color format - AddrSurfaceNumber surfNum, ///< [in] Surface number - AddrSurfaceSwap surfSwap, ///< [in] Surface swap - const ADDR_FLT_32 comps[4], ///< [in] four components of color - UINT_8* pPixel ///< [out] a red/green/blue/alpha pixel value - ) const -{ - PixelFormatInfo pixelInfo; - - UINT_32 i; - UINT_32 values[4]; - ComponentFlags properties; // byteAligned, exportNorm - UINT_32 resultBits = 0; // result bits: total bits per pixel after decompression - - memset(&pixelInfo, 0, sizeof(PixelFormatInfo)); - - PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo); - - //initialize properties - properties.byteAligned = TRUE; - properties.exportNorm = TRUE; - properties.floatComp = FALSE; - - //set properties and result bits - for (i = 0; i < 4; i++) - { - if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) ) - { - properties.byteAligned = FALSE; - } - - if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i]) - { - resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i]; - } - - if (m_fp16ExportNorm) - { - // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format - // or if it's not FP and <=16 bits - if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED)) - && (pixelInfo.numType[i] !=ADDR_U4FLOATC)) - { - properties.exportNorm = FALSE; - } - } - else - { - // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format - if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED) - { - properties.exportNorm = FALSE; - } - } - - // Mark if there are any floating point components - if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) || - (pixelInfo.numType[i] >= ADDR_S8FLOAT) ) - { - properties.floatComp = TRUE; - } - } - - // Convert the four input floats to integer values - for (i = 0; i < 4; i++) - { - Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]); - } - - // Then pack the four integer components, in the proper order - Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0], - properties, resultBits, pPixel); -} - -/** -**************************************************************************************************** -* ElemLib::GetCompType -* -* @brief -* Fill per component info -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID ElemLib::GetCompType( - AddrColorFormat format, ///< [in] surface format - AddrSurfaceNumber numType, ///< [in] number type - PixelFormatInfo* pInfo) ///< [in][out] per component info out -{ - BOOL_32 handled = FALSE; - - // Floating point formats override the number format - switch (format) - { - case ADDR_COLOR_16_FLOAT: // fall through for all pure floating point format - case ADDR_COLOR_16_16_FLOAT: - case ADDR_COLOR_16_16_16_16_FLOAT: - case ADDR_COLOR_32_FLOAT: - case ADDR_COLOR_32_32_FLOAT: - case ADDR_COLOR_32_32_32_32_FLOAT: - case ADDR_COLOR_10_11_11_FLOAT: - case ADDR_COLOR_11_11_10_FLOAT: - numType = ADDR_NUMBER_FLOAT; - break; - // Special handling for the depth formats - case ADDR_COLOR_8_24: // fall through for these 2 similar format - case ADDR_COLOR_24_8: - for (UINT_32 c = 0; c < 4; c++) - { - if (pInfo->compBit[c] == 8) - { - pInfo->numType[c] = ADDR_UINT_BITS; - } - else if (pInfo->compBit[c] == 24) - { - pInfo->numType[c] = ADDR_UNORM_R6XX; - } - else - { - pInfo->numType[c] = ADDR_NO_NUMBER; - } - } - handled = TRUE; - break; - case ADDR_COLOR_8_24_FLOAT: // fall through for these 3 similar format - case ADDR_COLOR_24_8_FLOAT: - case ADDR_COLOR_X24_8_32_FLOAT: - for (UINT_32 c = 0; c < 4; c++) - { - if (pInfo->compBit[c] == 8) - { - pInfo->numType[c] = ADDR_UINT_BITS; - } - else if (pInfo->compBit[c] == 24) - { - pInfo->numType[c] = ADDR_U4FLOATC; - } - else if (pInfo->compBit[c] == 32) - { - pInfo->numType[c] = ADDR_S8FLOAT32; - } - else - { - pInfo->numType[c] = ADDR_NO_NUMBER; - } - } - handled = TRUE; - break; - default: - break; - } - - if (!handled) - { - for (UINT_32 c = 0; c < 4; c++) - { - // Assign a number type for each component - AddrSurfaceNumber cnum; - - // First handle default component values - if (pInfo->compBit[c] == 0) - { - if (c < 3) - { - pInfo->numType[c] = ADDR_ZERO; // Default is zero for RGB - } - else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT) - { - pInfo->numType[c] = ADDR_EPSILON; // Alpha INT_32 bits default is 0x01 - } - else - { - pInfo->numType[c] = ADDR_ONE; // Alpha normal default is float 1.0 - } - continue; - } - // Now handle small components - else if (pInfo->compBit[c] == 1) - { - if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT) - { - cnum = ADDR_NUMBER_UINT; - } - else - { - cnum = ADDR_NUMBER_UNORM; - } - } - else - { - cnum = numType; - } - - // If no default, set the number type fom num, compbits, and architecture - switch (cnum) - { - case ADDR_NUMBER_SRGB: - pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX; - break; - case ADDR_NUMBER_UNORM: - pInfo->numType[c] = ADDR_UNORM_R6XX; - break; - case ADDR_NUMBER_SNORM: - pInfo->numType[c] = ADDR_SNORM_R6XX; - break; - case ADDR_NUMBER_USCALED: - pInfo->numType[c] = ADDR_USCALED; // @@ Do we need separate Pele routine? - break; - case ADDR_NUMBER_SSCALED: - pInfo->numType[c] = ADDR_SSCALED; // @@ Do we need separate Pele routine? - break; - case ADDR_NUMBER_FLOAT: - if (pInfo->compBit[c] == 32) - { - pInfo->numType[c] = ADDR_S8FLOAT32; - } - else if (pInfo->compBit[c] == 16) - { - pInfo->numType[c] = ADDR_S5FLOAT; - } - else if (pInfo->compBit[c] >= 10) - { - pInfo->numType[c] = ADDR_U5FLOAT; - } - else - { - ADDR_ASSERT_ALWAYS(); - } - break; - case ADDR_NUMBER_SINT: - pInfo->numType[c] = ADDR_SINT_BITS; - break; - case ADDR_NUMBER_UINT: - pInfo->numType[c] = ADDR_UINT_BITS; - break; - - default: - ADDR_ASSERT(!"Invalid number type"); - pInfo->numType[c] = ADDR_NO_NUMBER; - break; - } - } - } -} - -/** -**************************************************************************************************** -* ElemLib::GetCompSwap -* -* @brief -* Get components swapped for color surface -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID ElemLib::GetCompSwap( - AddrSurfaceSwap swap, ///< [in] swap mode - PixelFormatInfo* pInfo) ///< [in,out] output per component info -{ - switch (pInfo->comps) - { - case 4: - switch (swap) - { - case ADDR_SWAP_ALT: - SwapComps( 0, 2, pInfo ); - break; // BGRA - case ADDR_SWAP_STD_REV: - SwapComps( 0, 3, pInfo ); - SwapComps( 1, 2, pInfo ); - break; // ABGR - case ADDR_SWAP_ALT_REV: - SwapComps( 0, 3, pInfo ); - SwapComps( 0, 2, pInfo ); - SwapComps( 0, 1, pInfo ); - break; // ARGB - default: - break; - } - break; - case 3: - switch (swap) - { - case ADDR_SWAP_ALT_REV: - SwapComps( 0, 3, pInfo ); - SwapComps( 0, 2, pInfo ); - break; // AGR - case ADDR_SWAP_STD_REV: - SwapComps( 0, 2, pInfo ); - break; // BGR - case ADDR_SWAP_ALT: - SwapComps( 2, 3, pInfo ); - break; // RGA - default: - break; // RGB - } - break; - case 2: - switch (swap) - { - case ADDR_SWAP_ALT_REV: - SwapComps( 0, 1, pInfo ); - SwapComps( 1, 3, pInfo ); - break; // AR - case ADDR_SWAP_STD_REV: - SwapComps( 0, 1, pInfo ); - break; // GR - case ADDR_SWAP_ALT: - SwapComps( 1, 3, pInfo ); - break; // RA - default: - break; // RG - } - break; - case 1: - switch (swap) - { - case ADDR_SWAP_ALT_REV: - SwapComps( 0, 3, pInfo ); - break; // A - case ADDR_SWAP_STD_REV: - SwapComps( 0, 2, pInfo ); - break; // B - case ADDR_SWAP_ALT: - SwapComps( 0, 1, pInfo ); - break; // G - default: - break; // R - } - break; - } -} - -/** -**************************************************************************************************** -* ElemLib::GetCompSwap -* -* @brief -* Get components swapped for color surface -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID ElemLib::SwapComps( - UINT_32 c0, ///< [in] component index 0 - UINT_32 c1, ///< [in] component index 1 - PixelFormatInfo* pInfo) ///< [in,out] output per component info -{ - UINT_32 start; - UINT_32 bits; - - start = pInfo->compStart[c0]; - pInfo->compStart[c0] = pInfo->compStart[c1]; - pInfo->compStart[c1] = start; - - bits = pInfo->compBit[c0]; - pInfo->compBit[c0] = pInfo->compBit[c1]; - pInfo->compBit[c1] = bits; -} - -/** -**************************************************************************************************** -* ElemLib::PixGetColorCompInfo -* -* @brief -* Get per component info for color surface -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID ElemLib::PixGetColorCompInfo( - AddrColorFormat format, ///< [in] surface format, read from register - AddrSurfaceNumber number, ///< [in] pixel number type - AddrSurfaceSwap swap, ///< [in] component swap mode - PixelFormatInfo* pInfo ///< [out] output per component info - ) const -{ - // 1. Get componet bits - switch (format) - { - case ADDR_COLOR_8: - GetCompBits(8, 0, 0, 0, pInfo); - break; - case ADDR_COLOR_1_5_5_5: - GetCompBits(5, 5, 5, 1, pInfo); - break; - case ADDR_COLOR_5_6_5: - GetCompBits(8, 6, 5, 0, pInfo); - break; - case ADDR_COLOR_6_5_5: - GetCompBits(5, 5, 6, 0, pInfo); - break; - case ADDR_COLOR_8_8: - GetCompBits(8, 8, 0, 0, pInfo); - break; - case ADDR_COLOR_4_4_4_4: - GetCompBits(4, 4, 4, 4, pInfo); - break; - case ADDR_COLOR_16: - GetCompBits(16, 0, 0, 0, pInfo); - break; - case ADDR_COLOR_8_8_8_8: - GetCompBits(8, 8, 8, 8, pInfo); - break; - case ADDR_COLOR_2_10_10_10: - GetCompBits(10, 10, 10, 2, pInfo); - break; - case ADDR_COLOR_10_11_11: - GetCompBits(11, 11, 10, 0, pInfo); - break; - case ADDR_COLOR_11_11_10: - GetCompBits(10, 11, 11, 0, pInfo); - break; - case ADDR_COLOR_16_16: - GetCompBits(16, 16, 0, 0, pInfo); - break; - case ADDR_COLOR_16_16_16_16: - GetCompBits(16, 16, 16, 16, pInfo); - break; - case ADDR_COLOR_16_FLOAT: - GetCompBits(16, 0, 0, 0, pInfo); - break; - case ADDR_COLOR_16_16_FLOAT: - GetCompBits(16, 16, 0, 0, pInfo); - break; - case ADDR_COLOR_32_FLOAT: - GetCompBits(32, 0, 0, 0, pInfo); - break; - case ADDR_COLOR_32_32_FLOAT: - GetCompBits(32, 32, 0, 0, pInfo); - break; - case ADDR_COLOR_16_16_16_16_FLOAT: - GetCompBits(16, 16, 16, 16, pInfo); - break; - case ADDR_COLOR_32_32_32_32_FLOAT: - GetCompBits(32, 32, 32, 32, pInfo); - break; - - case ADDR_COLOR_32: - GetCompBits(32, 0, 0, 0, pInfo); - break; - case ADDR_COLOR_32_32: - GetCompBits(32, 32, 0, 0, pInfo); - break; - case ADDR_COLOR_32_32_32_32: - GetCompBits(32, 32, 32, 32, pInfo); - break; - case ADDR_COLOR_10_10_10_2: - GetCompBits(2, 10, 10, 10, pInfo); - break; - case ADDR_COLOR_10_11_11_FLOAT: - GetCompBits(11, 11, 10, 0, pInfo); - break; - case ADDR_COLOR_11_11_10_FLOAT: - GetCompBits(10, 11, 11, 0, pInfo); - break; - case ADDR_COLOR_5_5_5_1: - GetCompBits(1, 5, 5, 5, pInfo); - break; - case ADDR_COLOR_3_3_2: - GetCompBits(2, 3, 3, 0, pInfo); - break; - case ADDR_COLOR_4_4: - GetCompBits(4, 4, 0, 0, pInfo); - break; - case ADDR_COLOR_8_24: - case ADDR_COLOR_8_24_FLOAT: // same bit count, fall through - GetCompBits(24, 8, 0, 0, pInfo); - break; - case ADDR_COLOR_24_8: - case ADDR_COLOR_24_8_FLOAT: // same bit count, fall through - GetCompBits(8, 24, 0, 0, pInfo); - break; - case ADDR_COLOR_X24_8_32_FLOAT: - GetCompBits(32, 8, 0, 0, pInfo); - break; - - case ADDR_COLOR_INVALID: - GetCompBits(0, 0, 0, 0, pInfo); - break; - default: - ADDR_ASSERT(0); - GetCompBits(0, 0, 0, 0, pInfo); - break; - } - - // 2. Get component number type - - GetCompType(format, number, pInfo); - - // 3. Swap components if needed - - GetCompSwap(swap, pInfo); -} - -/** -**************************************************************************************************** -* ElemLib::PixGetDepthCompInfo -* -* @brief -* Get per component info for depth surface -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID ElemLib::PixGetDepthCompInfo( - AddrDepthFormat format, ///< [in] surface format, read from register - PixelFormatInfo* pInfo ///< [out] output per component bits and type - ) const -{ - if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800) - { - if (format == ADDR_DEPTH_8_24_FLOAT) - { - format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8 - } - - if (format == ADDR_DEPTH_X8_24_FLOAT) - { - format = ADDR_DEPTH_32_FLOAT; - } - } - - switch (format) - { - case ADDR_DEPTH_16: - GetCompBits(16, 0, 0, 0, pInfo); - break; - case ADDR_DEPTH_8_24: - case ADDR_DEPTH_8_24_FLOAT: // similar format, fall through - GetCompBits(24, 8, 0, 0, pInfo); - break; - case ADDR_DEPTH_X8_24: - case ADDR_DEPTH_X8_24_FLOAT: // similar format, fall through - GetCompBits(24, 0, 0, 0, pInfo); - break; - case ADDR_DEPTH_32_FLOAT: - GetCompBits(32, 0, 0, 0, pInfo); - break; - case ADDR_DEPTH_X24_8_32_FLOAT: - GetCompBits(32, 8, 0, 0, pInfo); - break; - case ADDR_DEPTH_INVALID: - GetCompBits(0, 0, 0, 0, pInfo); - break; - default: - ADDR_ASSERT(0); - GetCompBits(0, 0, 0, 0, pInfo); - break; - } - - switch (format) - { - case ADDR_DEPTH_16: - pInfo->numType [0] = ADDR_UNORM_R6XX; - pInfo->numType [1] = ADDR_ZERO; - break; - case ADDR_DEPTH_8_24: - pInfo->numType [0] = ADDR_UNORM_R6XXDB; - pInfo->numType [1] = ADDR_UINT_BITS; - break; - case ADDR_DEPTH_8_24_FLOAT: - pInfo->numType [0] = ADDR_U4FLOATC; - pInfo->numType [1] = ADDR_UINT_BITS; - break; - case ADDR_DEPTH_X8_24: - pInfo->numType [0] = ADDR_UNORM_R6XXDB; - pInfo->numType [1] = ADDR_ZERO; - break; - case ADDR_DEPTH_X8_24_FLOAT: - pInfo->numType [0] = ADDR_U4FLOATC; - pInfo->numType [1] = ADDR_ZERO; - break; - case ADDR_DEPTH_32_FLOAT: - pInfo->numType [0] = ADDR_S8FLOAT32; - pInfo->numType [1] = ADDR_ZERO; - break; - case ADDR_DEPTH_X24_8_32_FLOAT: - pInfo->numType [0] = ADDR_S8FLOAT32; - pInfo->numType [1] = ADDR_UINT_BITS; - break; - default: - pInfo->numType [0] = ADDR_NO_NUMBER; - pInfo->numType [1] = ADDR_NO_NUMBER; - break; - } - - pInfo->numType [2] = ADDR_NO_NUMBER; - pInfo->numType [3] = ADDR_NO_NUMBER; -} - -/** -**************************************************************************************************** -* ElemLib::PixGetExportNorm -* -* @brief -* Check if fp16 export norm can be enabled. -* -* @return -* TRUE if this can be enabled. -* -**************************************************************************************************** -*/ -BOOL_32 ElemLib::PixGetExportNorm( - AddrColorFormat colorFmt, ///< [in] surface format, read from register - AddrSurfaceNumber numberFmt, ///< [in] pixel number type - AddrSurfaceSwap swap ///< [in] components swap type - ) const -{ - BOOL_32 enabled = TRUE; - - PixelFormatInfo formatInfo; - - PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo); - - for (UINT_32 c = 0; c < 4; c++) - { - if (m_fp16ExportNorm) - { - if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) && - (formatInfo.numType[c] != ADDR_U4FLOATC) && - (formatInfo.numType[c] != ADDR_S5FLOAT) && - (formatInfo.numType[c] != ADDR_S5FLOATM) && - (formatInfo.numType[c] != ADDR_U5FLOAT) && - (formatInfo.numType[c] != ADDR_U3FLOATM)) - { - enabled = FALSE; - break; - } - } - else - { - if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) - { - enabled = FALSE; - break; - } - } - } - - return enabled; -} - -/** -**************************************************************************************************** -* ElemLib::AdjustSurfaceInfo -* -* @brief -* Adjust bpp/base pitch/width/height according to elemMode and expandX/Y -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID ElemLib::AdjustSurfaceInfo( - ElemMode elemMode, ///< [in] element mode - UINT_32 expandX, ///< [in] decompression expansion factor in X - UINT_32 expandY, ///< [in] decompression expansion factor in Y - UINT_32* pBpp, ///< [in,out] bpp - UINT_32* pBasePitch, ///< [in,out] base pitch - UINT_32* pWidth, ///< [in,out] width - UINT_32* pHeight) ///< [in,out] height -{ - UINT_32 packedBits; - UINT_32 basePitch; - UINT_32 width; - UINT_32 height; - UINT_32 bpp; - BOOL_32 bBCnFormat = FALSE; - - ADDR_ASSERT(pBpp != NULL); - ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL); - - if (pBpp) - { - bpp = *pBpp; - - switch (elemMode) - { - case ADDR_EXPANDED: - packedBits = bpp / expandX / expandY; - break; - case ADDR_PACKED_STD: // Different bit order - case ADDR_PACKED_REV: - packedBits = bpp * expandX * expandY; - break; - case ADDR_PACKED_GBGR: - case ADDR_PACKED_BGRG: - packedBits = bpp; // 32-bit packed ==> 2 32-bit result - break; - case ADDR_PACKED_BC1: // Fall through - case ADDR_PACKED_BC4: - packedBits = 64; - bBCnFormat = TRUE; - break; - case ADDR_PACKED_BC2: // Fall through - case ADDR_PACKED_BC3: // Fall through - case ADDR_PACKED_BC5: // Fall through - bBCnFormat = TRUE; - // fall through - case ADDR_PACKED_ASTC: - case ADDR_PACKED_ETC2_128BPP: - packedBits = 128; - break; - case ADDR_PACKED_ETC2_64BPP: - packedBits = 64; - break; - case ADDR_ROUND_BY_HALF: // Fall through - case ADDR_ROUND_TRUNCATE: // Fall through - case ADDR_ROUND_DITHER: // Fall through - case ADDR_UNCOMPRESSED: - packedBits = bpp; - break; - default: - packedBits = bpp; - ADDR_ASSERT_ALWAYS(); - break; - } - - *pBpp = packedBits; - } - - if (pWidth && pHeight && pBasePitch) - { - basePitch = *pBasePitch; - width = *pWidth; - height = *pHeight; - - if ((expandX > 1) || (expandY > 1)) - { - if (elemMode == ADDR_EXPANDED) - { - basePitch *= expandX; - width *= expandX; - height *= expandY; - } - else - { - // Evergreen family workaround - if (bBCnFormat && (m_pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_R8XX)) - { - // For BCn we now pad it to POW2 at the beginning so it is safe to - // divide by 4 directly - basePitch = basePitch / expandX; - width = width / expandX; - height = height / expandY; -#if DEBUG - width = (width == 0) ? 1 : width; - height = (height == 0) ? 1 : height; - - if ((*pWidth > PowTwoAlign(width, 8) * expandX) || - (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment - { - // if this assertion is hit we may have issues if app samples - // rightmost/bottommost pixels - ADDR_ASSERT_ALWAYS(); - } -#endif - } - else // Not BCn format we still keep old way (FMT_1? No real test yet) - { - basePitch = (basePitch + expandX - 1) / expandX; - width = (width + expandX - 1) / expandX; - height = (height + expandY - 1) / expandY; - } - } - - *pBasePitch = basePitch; // 0 is legal value for base pitch. - *pWidth = (width == 0) ? 1 : width; - *pHeight = (height == 0) ? 1 : height; - } //if (pWidth && pHeight && pBasePitch) - } -} - -/** -**************************************************************************************************** -* ElemLib::RestoreSurfaceInfo -* -* @brief -* Reverse operation of AdjustSurfaceInfo -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID ElemLib::RestoreSurfaceInfo( - ElemMode elemMode, ///< [in] element mode - UINT_32 expandX, ///< [in] decompression expansion factor in X - UINT_32 expandY, ///< [out] decompression expansion factor in Y - UINT_32* pBpp, ///< [in,out] bpp - UINT_32* pWidth, ///< [in,out] width - UINT_32* pHeight) ///< [in,out] height -{ - UINT_32 originalBits; - UINT_32 width; - UINT_32 height; - UINT_32 bpp; - - BOOL_32 bBCnFormat = FALSE; - (void)bBCnFormat; - - ADDR_ASSERT(pBpp != NULL); - ADDR_ASSERT(pWidth != NULL && pHeight != NULL); - - if (pBpp) - { - bpp = *pBpp; - - switch (elemMode) - { - case ADDR_EXPANDED: - originalBits = bpp * expandX * expandY; - break; - case ADDR_PACKED_STD: // Different bit order - case ADDR_PACKED_REV: - originalBits = bpp / expandX / expandY; - break; - case ADDR_PACKED_GBGR: - case ADDR_PACKED_BGRG: - originalBits = bpp; // 32-bit packed ==> 2 32-bit result - break; - case ADDR_PACKED_BC1: // Fall through - case ADDR_PACKED_BC4: - originalBits = 64; - bBCnFormat = TRUE; - break; - case ADDR_PACKED_BC2: // Fall through - case ADDR_PACKED_BC3: // Fall through - case ADDR_PACKED_BC5: - bBCnFormat = TRUE; - // fall through - case ADDR_PACKED_ASTC: - case ADDR_PACKED_ETC2_128BPP: - originalBits = 128; - break; - case ADDR_PACKED_ETC2_64BPP: - originalBits = 64; - break; - case ADDR_ROUND_BY_HALF: // Fall through - case ADDR_ROUND_TRUNCATE: // Fall through - case ADDR_ROUND_DITHER: // Fall through - case ADDR_UNCOMPRESSED: - originalBits = bpp; - break; - default: - originalBits = bpp; - ADDR_ASSERT_ALWAYS(); - break; - } - - *pBpp = originalBits; - } - - if (pWidth && pHeight) - { - width = *pWidth; - height = *pHeight; - - if ((expandX > 1) || (expandY > 1)) - { - if (elemMode == ADDR_EXPANDED) - { - width /= expandX; - height /= expandY; - } - else - { - width *= expandX; - height *= expandY; - } - } - - *pWidth = (width == 0) ? 1 : width; - *pHeight = (height == 0) ? 1 : height; - } -} - -/** -**************************************************************************************************** -* ElemLib::GetBitsPerPixel -* -* @brief -* Compute the total bits per element according to a format -* code. For compressed formats, this is not the same as -* the number of bits per decompressed element. -* -* @return -* Bits per pixel -**************************************************************************************************** -*/ -UINT_32 ElemLib::GetBitsPerPixel( - AddrFormat format, ///< [in] surface format code - ElemMode* pElemMode, ///< [out] element mode - UINT_32* pExpandX, ///< [out] decompression expansion factor in X - UINT_32* pExpandY, ///< [out] decompression expansion factor in Y - UINT_32* pUnusedBits) ///< [out] bits unused -{ - UINT_32 bpp; - UINT_32 expandX = 1; - UINT_32 expandY = 1; - UINT_32 bitUnused = 0; - ElemMode elemMode = ADDR_UNCOMPRESSED; // default value - - switch (format) - { - case ADDR_FMT_8: - bpp = 8; - break; - case ADDR_FMT_1_5_5_5: - case ADDR_FMT_5_6_5: - case ADDR_FMT_6_5_5: - case ADDR_FMT_8_8: - case ADDR_FMT_4_4_4_4: - case ADDR_FMT_16: - case ADDR_FMT_16_FLOAT: - bpp = 16; - break; - case ADDR_FMT_GB_GR: // treat as FMT_8_8 - elemMode = ADDR_PACKED_GBGR; - bpp = 16; - break; - case ADDR_FMT_BG_RG: // treat as FMT_8_8 - elemMode = ADDR_PACKED_BGRG; - bpp = 16; - break; - case ADDR_FMT_8_8_8_8: - case ADDR_FMT_2_10_10_10: - case ADDR_FMT_10_11_11: - case ADDR_FMT_11_11_10: - case ADDR_FMT_16_16: - case ADDR_FMT_16_16_FLOAT: - case ADDR_FMT_32: - case ADDR_FMT_32_FLOAT: - case ADDR_FMT_24_8: - case ADDR_FMT_24_8_FLOAT: - bpp = 32; - break; - case ADDR_FMT_16_16_16_16: - case ADDR_FMT_16_16_16_16_FLOAT: - case ADDR_FMT_32_32: - case ADDR_FMT_32_32_FLOAT: - case ADDR_FMT_CTX1: - bpp = 64; - break; - case ADDR_FMT_32_32_32_32: - case ADDR_FMT_32_32_32_32_FLOAT: - bpp = 128; - break; - case ADDR_FMT_INVALID: - bpp = 0; - break; - case ADDR_FMT_1_REVERSED: - elemMode = ADDR_PACKED_REV; - expandX = 8; - bpp = 1; - break; - case ADDR_FMT_1: - elemMode = ADDR_PACKED_STD; - expandX = 8; - bpp = 1; - break; - case ADDR_FMT_4_4: - case ADDR_FMT_3_3_2: - bpp = 8; - break; - case ADDR_FMT_5_5_5_1: - bpp = 16; - break; - case ADDR_FMT_32_AS_8: - case ADDR_FMT_32_AS_8_8: - case ADDR_FMT_8_24: - case ADDR_FMT_8_24_FLOAT: - case ADDR_FMT_10_10_10_2: - case ADDR_FMT_10_11_11_FLOAT: - case ADDR_FMT_11_11_10_FLOAT: - case ADDR_FMT_5_9_9_9_SHAREDEXP: - bpp = 32; - break; - case ADDR_FMT_X24_8_32_FLOAT: - bpp = 64; - bitUnused = 24; - break; - case ADDR_FMT_8_8_8: - elemMode = ADDR_EXPANDED; - bpp = 24;//@@ 8; // read 3 elements per pixel - expandX = 3; - break; - case ADDR_FMT_16_16_16: - case ADDR_FMT_16_16_16_FLOAT: - elemMode = ADDR_EXPANDED; - bpp = 48;//@@ 16; // read 3 elements per pixel - expandX = 3; - break; - case ADDR_FMT_32_32_32_FLOAT: - case ADDR_FMT_32_32_32: - elemMode = ADDR_EXPANDED; - expandX = 3; - bpp = 96;//@@ 32; // read 3 elements per pixel - break; - case ADDR_FMT_BC1: - elemMode = ADDR_PACKED_BC1; - expandX = 4; - expandY = 4; - bpp = 64; - break; - case ADDR_FMT_BC4: - elemMode = ADDR_PACKED_BC4; - expandX = 4; - expandY = 4; - bpp = 64; - break; - case ADDR_FMT_BC2: - elemMode = ADDR_PACKED_BC2; - expandX = 4; - expandY = 4; - bpp = 128; - break; - case ADDR_FMT_BC3: - elemMode = ADDR_PACKED_BC3; - expandX = 4; - expandY = 4; - bpp = 128; - break; - case ADDR_FMT_BC5: - case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5 - case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5 - elemMode = ADDR_PACKED_BC5; - expandX = 4; - expandY = 4; - bpp = 128; - break; - - case ADDR_FMT_ETC2_64BPP: - elemMode = ADDR_PACKED_ETC2_64BPP; - expandX = 4; - expandY = 4; - bpp = 64; - break; - - case ADDR_FMT_ETC2_128BPP: - elemMode = ADDR_PACKED_ETC2_128BPP; - expandX = 4; - expandY = 4; - bpp = 128; - break; - - case ADDR_FMT_ASTC_4x4: - elemMode = ADDR_PACKED_ASTC; - expandX = 4; - expandY = 4; - bpp = 128; - break; - - case ADDR_FMT_ASTC_5x4: - elemMode = ADDR_PACKED_ASTC; - expandX = 5; - expandY = 4; - bpp = 128; - break; - - case ADDR_FMT_ASTC_5x5: - elemMode = ADDR_PACKED_ASTC; - expandX = 5; - expandY = 5; - bpp = 128; - break; - - case ADDR_FMT_ASTC_6x5: - elemMode = ADDR_PACKED_ASTC; - expandX = 6; - expandY = 5; - bpp = 128; - break; - - case ADDR_FMT_ASTC_6x6: - elemMode = ADDR_PACKED_ASTC; - expandX = 6; - expandY = 6; - bpp = 128; - break; - - case ADDR_FMT_ASTC_8x5: - elemMode = ADDR_PACKED_ASTC; - expandX = 8; - expandY = 5; - bpp = 128; - break; - - case ADDR_FMT_ASTC_8x6: - elemMode = ADDR_PACKED_ASTC; - expandX = 8; - expandY = 6; - bpp = 128; - break; - - case ADDR_FMT_ASTC_8x8: - elemMode = ADDR_PACKED_ASTC; - expandX = 8; - expandY = 8; - bpp = 128; - break; - - case ADDR_FMT_ASTC_10x5: - elemMode = ADDR_PACKED_ASTC; - expandX = 10; - expandY = 5; - bpp = 128; - break; - - case ADDR_FMT_ASTC_10x6: - elemMode = ADDR_PACKED_ASTC; - expandX = 10; - expandY = 6; - bpp = 128; - break; - - case ADDR_FMT_ASTC_10x8: - elemMode = ADDR_PACKED_ASTC; - expandX = 10; - expandY = 8; - bpp = 128; - break; - - case ADDR_FMT_ASTC_10x10: - elemMode = ADDR_PACKED_ASTC; - expandX = 10; - expandY = 10; - bpp = 128; - break; - - case ADDR_FMT_ASTC_12x10: - elemMode = ADDR_PACKED_ASTC; - expandX = 12; - expandY = 10; - bpp = 128; - break; - - case ADDR_FMT_ASTC_12x12: - elemMode = ADDR_PACKED_ASTC; - expandX = 12; - expandY = 12; - bpp = 128; - break; - - default: - bpp = 0; - ADDR_ASSERT_ALWAYS(); - break; - // @@ or should this be an error? - } - - SafeAssign(pExpandX, expandX); - SafeAssign(pExpandY, expandY); - SafeAssign(pUnusedBits, bitUnused); - SafeAssign(reinterpret_cast(pElemMode), elemMode); - - return bpp; -} - -/** -**************************************************************************************************** -* ElemLib::GetCompBits -* -* @brief -* Set each component's bit size and bit start. And set element mode and number type -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID ElemLib::GetCompBits( - UINT_32 c0, ///< [in] bits of component 0 - UINT_32 c1, ///< [in] bits of component 1 - UINT_32 c2, ///< [in] bits of component 2 - UINT_32 c3, ///< [in] bits of component 3 - PixelFormatInfo* pInfo, ///< [out] per component info out - ElemMode elemMode) ///< [in] element mode -{ - pInfo->comps = 0; - - pInfo->compBit[0] = c0; - pInfo->compBit[1] = c1; - pInfo->compBit[2] = c2; - pInfo->compBit[3] = c3; - - pInfo->compStart[0] = 0; - pInfo->compStart[1] = c0; - pInfo->compStart[2] = c0+c1; - pInfo->compStart[3] = c0+c1+c2; - - pInfo->elemMode = elemMode; - // still needed since component swap may depend on number of components - for (INT i=0; i<4; i++) - { - if (pInfo->compBit[i] == 0) - { - pInfo->compStart[i] = 0; // all null components start at bit 0 - pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type - } - else - { - pInfo->comps++; - } - } -} - -/** -**************************************************************************************************** -* ElemLib::GetCompBits -* -* @brief -* Set the clear color (or clear depth/stencil) for a surface -* -* @note -* If clearColor is zero, a default clear value is used in place of comps[4]. -* If float32 is set, full precision is used, else the mantissa is reduced to 12-bits -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID ElemLib::SetClearComps( - ADDR_FLT_32 comps[4], ///< [in,out] components - BOOL_32 clearColor, ///< [in] TRUE if clear color is set (CLEAR_COLOR) - BOOL_32 float32) ///< [in] TRUE if float32 component (BLEND_FLOAT32) -{ - INT_32 i; - - // Use default clearvalues if clearColor is disabled - if (clearColor == FALSE) - { - for (i=0; i<3; i++) - { - comps[i].f = 0.0; - } - comps[3].f = 1.0; - } - - // Otherwise use the (modified) clear value - else - { - for (i=0; i<4; i++) - { // If full precision, use clear value unchanged - if (float32) - { - // Do nothing - //comps[i] = comps[i]; - } - // Else if it is a NaN, use the standard NaN value - else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000) - { - comps[i].u = 0xFFC00000; - } - // Else reduce the mantissa precision - else - { - comps[i].u = comps[i].u & 0xFFFFF000; - } - } - } -} - -/** -**************************************************************************************************** -* ElemLib::IsBlockCompressed -* -* @brief -* TRUE if this is block compressed format -* -* @note -* -* @return -* BOOL_32 -**************************************************************************************************** -*/ -BOOL_32 ElemLib::IsBlockCompressed( - AddrFormat format) ///< [in] Format -{ - return (((format >= ADDR_FMT_BC1) && (format <= ADDR_FMT_BC7)) || - ((format >= ADDR_FMT_ASTC_4x4) && (format <= ADDR_FMT_ETC2_128BPP))); -} - - -/** -**************************************************************************************************** -* ElemLib::IsCompressed -* -* @brief -* TRUE if this is block compressed format or 1 bit format -* -* @note -* -* @return -* BOOL_32 -**************************************************************************************************** -*/ -BOOL_32 ElemLib::IsCompressed( - AddrFormat format) ///< [in] Format -{ - return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7; -} - -/** -**************************************************************************************************** -* ElemLib::IsExpand3x -* -* @brief -* TRUE if this is 3x expand format -* -* @note -* -* @return -* BOOL_32 -**************************************************************************************************** -*/ -BOOL_32 ElemLib::IsExpand3x( - AddrFormat format) ///< [in] Format -{ - BOOL_32 is3x = FALSE; - - switch (format) - { - case ADDR_FMT_8_8_8: - case ADDR_FMT_16_16_16: - case ADDR_FMT_16_16_16_FLOAT: - case ADDR_FMT_32_32_32: - case ADDR_FMT_32_32_32_FLOAT: - is3x = TRUE; - break; - default: - break; - } - - return is3x; -} - -/** -**************************************************************************************************** -* ElemLib::IsMacroPixelPacked -* -* @brief -* TRUE if this is a macro-pixel-packed format. -* -* @note -* -* @return -* BOOL_32 -**************************************************************************************************** -*/ -BOOL_32 ElemLib::IsMacroPixelPacked( - AddrFormat format) ///< [in] Format -{ - BOOL_32 isMacroPixelPacked = FALSE; - - switch (format) - { - case ADDR_FMT_BG_RG: - case ADDR_FMT_GB_GR: - isMacroPixelPacked = TRUE; - break; - default: - break; - } - - return isMacroPixelPacked; -} - -} diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrelemlib.h mesa-19.0.1/src/amd/addrlib/core/addrelemlib.h --- mesa-18.3.3/src/amd/addrlib/core/addrelemlib.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrelemlib.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,279 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrelemlib.h -* @brief Contains the class for element/pixel related functions. -**************************************************************************************************** -*/ - -#ifndef __ELEM_LIB_H__ -#define __ELEM_LIB_H__ - -#include "addrinterface.h" -#include "addrobject.h" -#include "addrcommon.h" - -namespace Addr -{ - -class Lib; - -// The masks for property bits within the Properties INT_32 -union ComponentFlags -{ - struct - { - UINT_32 byteAligned : 1; ///< all components are byte aligned - UINT_32 exportNorm : 1; ///< components support R6xx NORM compression - UINT_32 floatComp : 1; ///< there is at least one floating point component - }; - - UINT_32 value; -}; - -// Copy from legacy lib's NumberType -enum NumberType -{ - // The following number types have the range [-1..1] - ADDR_NO_NUMBER, // This component doesn't exist and has no default value - ADDR_EPSILON, // Force component value to integer 0x00000001 - ADDR_ZERO, // Force component value to integer 0x00000000 - ADDR_ONE, // Force component value to floating point 1.0 - // Above values don't have any bits per component (keep ADDR_ONE the last of these) - - ADDR_UNORM, // Unsigned normalized (repeating fraction) full precision - ADDR_SNORM, // Signed normalized (repeating fraction) full precision - ADDR_GAMMA, // Gamma-corrected, full precision - - ADDR_UNORM_R5XXRB, // Unsigned normalized (repeating fraction) for r5xx RB - ADDR_SNORM_R5XXRB, // Signed normalized (repeating fraction) for r5xx RB - ADDR_GAMMA_R5XXRB, // Gamma-corrected for r5xx RB (note: unnormalized value) - ADDR_UNORM_R5XXBC, // Unsigned normalized (repeating fraction) for r5xx BC - ADDR_SNORM_R5XXBC, // Signed normalized (repeating fraction) for r5xx BC - ADDR_GAMMA_R5XXBC, // Gamma-corrected for r5xx BC (note: unnormalized value) - - ADDR_UNORM_R6XX, // Unsigned normalized (repeating fraction) for R6xx - ADDR_UNORM_R6XXDB, // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX - ADDR_SNORM_R6XX, // Signed normalized (repeating fraction) for R6xx - ADDR_GAMMA8_R6XX, // Gamma-corrected for r6xx - ADDR_GAMMA8_R7XX_TP, // Gamma-corrected for r7xx TP 12bit unorm 8.4. - - ADDR_U4FLOATC, // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1] - ADDR_GAMMA_4SEG, // Gamma-corrected, four segment approximation - ADDR_U0FIXED, // Unsigned 0.N-bit fixed point - - // The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine) - ADDR_USCALED, // Unsigned integer converted to/from floating point - ADDR_SSCALED, // Signed integer converted to/from floating point - ADDR_USCALED_R5XXRB, // Unsigned integer to/from floating point for r5xx RB - ADDR_SSCALED_R5XXRB, // Signed integer to/from floating point for r5xx RB - ADDR_UINT_BITS, // Keep in unsigned integer form, clamped to specified range - ADDR_SINT_BITS, // Keep in signed integer form, clamped to specified range - ADDR_UINTBITS, // @@ remove Keep in unsigned integer form, use modulus to reduce bits - ADDR_SINTBITS, // @@ remove Keep in signed integer form, use modulus to reduce bits - - // The following number types and ADDR_U4FLOATC have exponents - // (LEAVE ADDR_S8FLOAT first or fix Finish routine) - ADDR_S8FLOAT, // Signed floating point with 8-bit exponent, bias=127 - ADDR_S8FLOAT32, // 32-bit IEEE float, passes through NaN values - ADDR_S5FLOAT, // Signed floating point with 5-bit exponent, bias=15 - ADDR_S5FLOATM, // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf - ADDR_U5FLOAT, // Signed floating point with 5-bit exponent, bias=15 - ADDR_U3FLOATM, // Unsigned floating point with 3-bit exponent, bias=3 - - ADDR_S5FIXED, // Signed 5.N-bit fixed point, with rounding - - ADDR_END_NUMBER // Used for range comparisons -}; - -// Copy from legacy lib's AddrElement -enum ElemMode -{ - // These formats allow both packing an unpacking - ADDR_ROUND_BY_HALF, // add 1/2 and truncate when packing this element - ADDR_ROUND_TRUNCATE, // truncate toward 0 for sign/mag, else toward neg - ADDR_ROUND_DITHER, // Pack by dithering -- requires (x,y) position - - // These formats only allow unpacking, no packing - ADDR_UNCOMPRESSED, // Elements are not compressed: one data element per pixel/texel - ADDR_EXPANDED, // Elements are split up and stored in multiple data elements - ADDR_PACKED_STD, // Elements are compressed into ExpandX by ExpandY data elements - ADDR_PACKED_REV, // Like ADDR_PACKED, but X order of pixels is reverved - ADDR_PACKED_GBGR, // Elements are compressed 4:2:2 in G1B_G0R order (high to low) - ADDR_PACKED_BGRG, // Elements are compressed 4:2:2 in BG1_RG0 order (high to low) - ADDR_PACKED_BC1, // Each data element is uncompressed to a 4x4 pixel/texel array - ADDR_PACKED_BC2, // Each data element is uncompressed to a 4x4 pixel/texel array - ADDR_PACKED_BC3, // Each data element is uncompressed to a 4x4 pixel/texel array - ADDR_PACKED_BC4, // Each data element is uncompressed to a 4x4 pixel/texel array - ADDR_PACKED_BC5, // Each data element is uncompressed to a 4x4 pixel/texel array - ADDR_PACKED_ETC2_64BPP, // ETC2 formats that use 64bpp to represent each 4x4 block - ADDR_PACKED_ETC2_128BPP, // ETC2 formats that use 128bpp to represent each 4x4 block - ADDR_PACKED_ASTC, // Various ASTC formats, all are 128bpp with varying block sizes - - // These formats provide various kinds of compression - ADDR_ZPLANE_R5XX, // Compressed Zplane using r5xx architecture format - ADDR_ZPLANE_R6XX, // Compressed Zplane using r6xx architecture format - //@@ Fill in the compression modes - - ADDR_END_ELEMENT // Used for range comparisons -}; - -enum DepthPlanarType -{ - ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl - ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile - ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes -}; - -/** -**************************************************************************************************** -* PixelFormatInfo -* -* @brief -* Per component info -* -**************************************************************************************************** -*/ -struct PixelFormatInfo -{ - UINT_32 compBit[4]; - NumberType numType[4]; - UINT_32 compStart[4]; - ElemMode elemMode; - UINT_32 comps; ///< Number of components -}; - -/** -**************************************************************************************************** -* @brief This class contains asic indepentent element related attributes and operations -**************************************************************************************************** -*/ -class ElemLib : public Object -{ -protected: - ElemLib(Lib* pAddrLib); - -public: - - /// Makes this class virtual - virtual ~ElemLib(); - - static ElemLib* Create( - const Lib* pAddrLib); - - /// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx - BOOL_32 PixGetExportNorm( - AddrColorFormat colorFmt, - AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const; - - /// Below method are asic independent, so make them just static. - /// Remove static if we need different operation in hwl. - - VOID Flt32ToDepthPixel( - AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const; - - VOID Flt32ToColorPixel( - AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap, - const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const; - - static VOID Flt32sToInt32s( - ADDR_FLT_32 value, UINT_32 bits, NumberType numberType, UINT_32* pResult); - - static VOID Int32sToPixel( - UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart, - ComponentFlags properties, UINT_32 resultBits, UINT_8* pPixel); - - VOID PixGetColorCompInfo( - AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap, - PixelFormatInfo* pInfo) const; - - VOID PixGetDepthCompInfo( - AddrDepthFormat format, PixelFormatInfo* pInfo) const; - - UINT_32 GetBitsPerPixel( - AddrFormat format, ElemMode* pElemMode = NULL, - UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL); - - static VOID SetClearComps( - ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32); - - VOID AdjustSurfaceInfo( - ElemMode elemMode, UINT_32 expandX, UINT_32 expandY, - UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight); - - VOID RestoreSurfaceInfo( - ElemMode elemMode, UINT_32 expandX, UINT_32 expandY, - UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight); - - /// Checks if depth and stencil are planar inside a tile - BOOL_32 IsDepthStencilTilePlanar() - { - return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE; - } - - /// Sets m_configFlags, copied from AddrLib - VOID SetConfigFlags(ConfigFlags flags) - { - m_configFlags = flags; - } - - static BOOL_32 IsCompressed(AddrFormat format); - static BOOL_32 IsBlockCompressed(AddrFormat format); - static BOOL_32 IsExpand3x(AddrFormat format); - static BOOL_32 IsMacroPixelPacked(AddrFormat format); - -protected: - - static VOID GetCompBits( - UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3, - PixelFormatInfo* pInfo, - ElemMode elemMode = ADDR_ROUND_BY_HALF); - - static VOID GetCompType( - AddrColorFormat format, AddrSurfaceNumber numType, - PixelFormatInfo* pInfo); - - static VOID GetCompSwap( - AddrSurfaceSwap swap, PixelFormatInfo* pInfo); - - static VOID SwapComps( - UINT_32 c0, UINT_32 c1, PixelFormatInfo* pInfo); - -private: - - UINT_32 m_fp16ExportNorm; ///< If allow FP16 to be reported as EXPORT_NORM - DepthPlanarType m_depthPlanarType; - - ConfigFlags m_configFlags; ///< Copy of AddrLib's configFlags - Addr::Lib* const m_pAddrLib; ///< Pointer to parent addrlib instance -}; - -} //Addr - -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib1.cpp mesa-19.0.1/src/amd/addrlib/core/addrlib1.cpp --- mesa-18.3.3/src/amd/addrlib/core/addrlib1.cpp 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrlib1.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,4076 +0,0 @@ -/* - * Copyright © 2016 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addr1lib.cpp -* @brief Contains the implementation for the Addr::V1::Lib base class. -**************************************************************************************************** -*/ - -#include "addrinterface.h" -#include "addrlib1.h" -#include "addrcommon.h" - -namespace Addr -{ -namespace V1 -{ - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Static Const Member -//////////////////////////////////////////////////////////////////////////////////////////////////// - -const TileModeFlags Lib::ModeFlags[ADDR_TM_COUNT] = -{// T L 1 2 3 P Pr B - {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_GENERAL - {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_ALIGNED - {1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THIN1 - {4, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THICK - {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN1 - {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN2 - {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN4 - {4, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THICK - {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN1 - {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN2 - {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN4 - {4, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THICK - {1, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THIN1 - {4, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THICK - {1, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THIN1 - {4, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THICK - {8, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_XTHICK - {8, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_XTHICK - {1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_POWER_SAVE - {1, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THIN1 - {1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THIN1 - {1, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THIN1 - {4, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THICK - {4, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THICK - {4, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THICK - {0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_UNKNOWN -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Constructor/Destructor -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Lib::AddrLib1 -* -* @brief -* Constructor for the AddrLib1 class -* -**************************************************************************************************** -*/ -Lib::Lib() - : - Addr::Lib() -{ -} - -/** -**************************************************************************************************** -* Lib::Lib -* -* @brief -* Constructor for the Addr::V1::Lib class with hClient as parameter -* -**************************************************************************************************** -*/ -Lib::Lib(const Client* pClient) - : - Addr::Lib(pClient) -{ -} - -/** -**************************************************************************************************** -* Lib::~AddrLib1 -* -* @brief -* Destructor for the AddrLib1 class -* -**************************************************************************************************** -*/ -Lib::~Lib() -{ -} - -/** -**************************************************************************************************** -* Lib::GetLib -* -* @brief -* Get AddrLib1 pointer -* -* @return -* An Addr::V1::Lib class pointer -**************************************************************************************************** -*/ -Lib* Lib::GetLib( - ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE -{ - Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib); - if ((pAddrLib != NULL) && - ((pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_IVLD) || - (pAddrLib->GetChipFamily() > ADDR_CHIP_FAMILY_VI))) - { - // only valid and pre-VI AISC can use AddrLib1 function. - ADDR_ASSERT_ALWAYS(); - hLib = NULL; - } - return static_cast(hLib); -} - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Surface Methods -//////////////////////////////////////////////////////////////////////////////////////////////////// - - -/** -**************************************************************************************************** -* Lib::ComputeSurfaceInfo -* -* @brief -* Interface function stub of AddrComputeSurfaceInfo. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - // We suggest client do sanity check but a check here is also good - if (pIn->bpp > 128) - { - returnCode = ADDR_INVALIDPARAMS; - } - - if ((pIn->tileMode == ADDR_TM_UNKNOWN) && (pIn->mipLevel > 0)) - { - returnCode = ADDR_INVALIDPARAMS; - } - - // Thick modes don't support multisample - if ((Thickness(pIn->tileMode) > 1) && (pIn->numSamples > 1)) - { - returnCode = ADDR_INVALIDPARAMS; - } - - if (returnCode == ADDR_OK) - { - // Get a local copy of input structure and only reference pIn for unadjusted values - ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; - ADDR_TILEINFO tileInfoNull = {0}; - - if (UseTileInfo()) - { - // If the original input has a valid ADDR_TILEINFO pointer then copy its contents. - // Otherwise the default 0's in tileInfoNull are used. - if (pIn->pTileInfo) - { - tileInfoNull = *pIn->pTileInfo; - } - localIn.pTileInfo = &tileInfoNull; - } - - localIn.numSamples = (pIn->numSamples == 0) ? 1 : pIn->numSamples; - - // Do mipmap check first - // If format is BCn, pre-pad dimension to power-of-two according to HWL - ComputeMipLevel(&localIn); - - if (m_configFlags.checkLast2DLevel) - { - // Save this level's original height in pixels - pOut->height = pIn->height; - } - - UINT_32 expandX = 1; - UINT_32 expandY = 1; - ElemMode elemMode; - - // Save outputs that may not go through HWL - pOut->pixelBits = localIn.bpp; - pOut->numSamples = localIn.numSamples; - pOut->last2DLevel = FALSE; - pOut->tcCompatible = FALSE; - -#if !ALT_TEST - if (localIn.numSamples > 1) - { - ADDR_ASSERT(localIn.mipLevel == 0); - } -#endif - - if (localIn.format != ADDR_FMT_INVALID) // Set format to INVALID will skip this conversion - { - // Get compression/expansion factors and element mode - // (which indicates compression/expansion - localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format, - &elemMode, - &expandX, - &expandY); - - // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is - // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear- - // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw - // restrictions are different. - // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround - // but we use this flag to skip RestoreSurfaceInfo below - - if ((elemMode == ADDR_EXPANDED) && (expandX > 1)) - { - ADDR_ASSERT(IsLinear(localIn.tileMode)); - } - - GetElemLib()->AdjustSurfaceInfo(elemMode, - expandX, - expandY, - &localIn.bpp, - &localIn.basePitch, - &localIn.width, - &localIn.height); - - // Overwrite these parameters if we have a valid format - } - else if (localIn.bpp != 0) - { - localIn.width = (localIn.width != 0) ? localIn.width : 1; - localIn.height = (localIn.height != 0) ? localIn.height : 1; - } - else // Rule out some invalid parameters - { - ADDR_ASSERT_ALWAYS(); - - returnCode = ADDR_INVALIDPARAMS; - } - - // Check mipmap after surface expansion - if (returnCode == ADDR_OK) - { - returnCode = PostComputeMipLevel(&localIn, pOut); - } - - if (returnCode == ADDR_OK) - { - if (UseTileIndex(localIn.tileIndex)) - { - // Make sure pTileInfo is not NULL - ADDR_ASSERT(localIn.pTileInfo); - - UINT_32 numSamples = GetNumFragments(localIn.numSamples, localIn.numFrags); - - INT_32 macroModeIndex = TileIndexNoMacroIndex; - - if (localIn.tileIndex != TileIndexLinearGeneral) - { - // Try finding a macroModeIndex - macroModeIndex = HwlComputeMacroModeIndex(localIn.tileIndex, - localIn.flags, - localIn.bpp, - numSamples, - localIn.pTileInfo, - &localIn.tileMode, - &localIn.tileType); - } - - // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info - if (macroModeIndex == TileIndexNoMacroIndex) - { - returnCode = HwlSetupTileCfg(localIn.bpp, - localIn.tileIndex, macroModeIndex, - localIn.pTileInfo, - &localIn.tileMode, &localIn.tileType); - } - // If macroModeIndex is invalid, then assert this is not macro tiled - else if (macroModeIndex == TileIndexInvalid) - { - ADDR_ASSERT(!IsMacroTiled(localIn.tileMode)); - } - - pOut->macroModeIndex = macroModeIndex; - } - } - - if (returnCode == ADDR_OK) - { - localIn.flags.dccPipeWorkaround = localIn.flags.dccCompatible; - - if (localIn.tileMode == ADDR_TM_UNKNOWN) - { - // HWL layer may override tile mode if necessary - HwlSelectTileMode(&localIn); - } - else - { - // HWL layer may override tile mode if necessary - HwlOverrideTileMode(&localIn); - - // Optimize tile mode if possible - OptimizeTileMode(&localIn); - } - } - - // Call main function to compute surface info - if (returnCode == ADDR_OK) - { - returnCode = HwlComputeSurfaceInfo(&localIn, pOut); - } - - if (returnCode == ADDR_OK) - { - // Since bpp might be changed we just pass it through - pOut->bpp = localIn.bpp; - - // Also original width/height/bpp - pOut->pixelPitch = pOut->pitch; - pOut->pixelHeight = pOut->height; - -#if DEBUG - if (localIn.flags.display) - { - ADDR_ASSERT((pOut->pitchAlign % 32) == 0); - } -#endif //DEBUG - - if (localIn.format != ADDR_FMT_INVALID) - { - // - // Note: For 96 bit surface, the pixelPitch returned might be an odd number, but it - // is okay to program texture pitch as HW's mip calculator would multiply 3 first, - // then do the appropriate paddings (linear alignment requirement and possible the - // nearest power-of-two for mipmaps), which results in the original pitch. - // - GetElemLib()->RestoreSurfaceInfo(elemMode, - expandX, - expandY, - &localIn.bpp, - &pOut->pixelPitch, - &pOut->pixelHeight); - } - - if (localIn.flags.qbStereo) - { - if (pOut->pStereoInfo) - { - ComputeQbStereoInfo(pOut); - } - } - - if (localIn.flags.volume) // For volume sliceSize equals to all z-slices - { - pOut->sliceSize = pOut->surfSize; - } - else // For array: sliceSize is likely to have slice-padding (the last one) - { - pOut->sliceSize = pOut->surfSize / pOut->depth; - - // array or cubemap - if (pIn->numSlices > 1) - { - // If this is the last slice then add the padding size to this slice - if (pIn->slice == (pIn->numSlices - 1)) - { - pOut->sliceSize += pOut->sliceSize * (pOut->depth - pIn->numSlices); - } - else if (m_configFlags.checkLast2DLevel) - { - // Reset last2DLevel flag if this is not the last array slice - pOut->last2DLevel = FALSE; - } - } - } - - pOut->pitchTileMax = pOut->pitch / 8 - 1; - pOut->heightTileMax = pOut->height / 8 - 1; - pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1; - } - } - - ValidBaseAlignments(pOut->baseAlign); - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeSurfaceInfo -* -* @brief -* Interface function stub of AddrComputeSurfaceInfo. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord( - const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - const ADDR_SURFACE_FLAGS flags = {{0}}; - UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags); - - // Try finding a macroModeIndex - INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex, - flags, - input.bpp, - numSamples, - input.pTileInfo, - &input.tileMode, - &input.tileType); - - // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info - if (macroModeIndex == TileIndexNoMacroIndex) - { - returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex, - input.pTileInfo, &input.tileMode, &input.tileType); - } - // If macroModeIndex is invalid, then assert this is not macro tiled - else if (macroModeIndex == TileIndexInvalid) - { - ADDR_ASSERT(!IsMacroTiled(input.tileMode)); - } - - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - returnCode = HwlComputeSurfaceAddrFromCoord(pIn, pOut); - - if (returnCode == ADDR_OK) - { - pOut->prtBlockIndex = static_cast(pOut->addr / (64 * 1024)); - } - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeSurfaceCoordFromAddr -* -* @brief -* Interface function stub of ComputeSurfaceCoordFromAddr. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr( - const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - const ADDR_SURFACE_FLAGS flags = {{0}}; - UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags); - - // Try finding a macroModeIndex - INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex, - flags, - input.bpp, - numSamples, - input.pTileInfo, - &input.tileMode, - &input.tileType); - - // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info - if (macroModeIndex == TileIndexNoMacroIndex) - { - returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex, - input.pTileInfo, &input.tileMode, &input.tileType); - } - // If macroModeIndex is invalid, then assert this is not macro tiled - else if (macroModeIndex == TileIndexInvalid) - { - ADDR_ASSERT(!IsMacroTiled(input.tileMode)); - } - - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - returnCode = HwlComputeSurfaceCoordFromAddr(pIn, pOut); - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeSliceTileSwizzle -* -* @brief -* Interface function stub of ComputeSliceTileSwizzle. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeSliceTileSwizzle( - const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_SLICESWIZZLE_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, - input.pTileInfo, &input.tileMode); - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - returnCode = HwlComputeSliceTileSwizzle(pIn, pOut); - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ExtractBankPipeSwizzle -* -* @brief -* Interface function stub of AddrExtractBankPipeSwizzle. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ExtractBankPipeSwizzle( - const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure - ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT)) || - (pOut->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - returnCode = HwlExtractBankPipeSwizzle(pIn, pOut); - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::CombineBankPipeSwizzle -* -* @brief -* Interface function stub of AddrCombineBankPipeSwizzle. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::CombineBankPipeSwizzle( - const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure - ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - returnCode = HwlCombineBankPipeSwizzle(pIn->bankSwizzle, - pIn->pipeSwizzle, - pIn->pTileInfo, - pIn->baseAddr, - &pOut->tileSwizzle); - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeBaseSwizzle -* -* @brief -* Interface function stub of AddrCompueBaseSwizzle. -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeBaseSwizzle( - const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, - ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_BASE_SWIZZLE_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - if (IsMacroTiled(pIn->tileMode)) - { - returnCode = HwlComputeBaseSwizzle(pIn, pOut); - } - else - { - pOut->tileSwizzle = 0; - } - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeFmaskInfo -* -* @brief -* Interface function stub of ComputeFmaskInfo. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeFmaskInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure - ) -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - // No thick MSAA - if (Thickness(pIn->tileMode) > 1) - { - returnCode = ADDR_INVALIDPARAMS; - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_FMASK_INFO_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - - if (pOut->pTileInfo) - { - // Use temp tile info for calcalation - input.pTileInfo = pOut->pTileInfo; - } - else - { - input.pTileInfo = &tileInfoNull; - } - - ADDR_SURFACE_FLAGS flags = {{0}}; - flags.fmask = 1; - - // Try finding a macroModeIndex - INT_32 macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex, - flags, - HwlComputeFmaskBits(pIn, NULL), - pIn->numSamples, - input.pTileInfo, - &input.tileMode); - - // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info - if (macroModeIndex == TileIndexNoMacroIndex) - { - returnCode = HwlSetupTileCfg(0, input.tileIndex, macroModeIndex, - input.pTileInfo, &input.tileMode); - } - - ADDR_ASSERT(macroModeIndex != TileIndexInvalid); - - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - if (pIn->numSamples > 1) - { - returnCode = HwlComputeFmaskInfo(pIn, pOut); - } - else - { - memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)); - - returnCode = ADDR_INVALIDPARAMS; - } - } - } - - ValidBaseAlignments(pOut->baseAlign); - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeFmaskAddrFromCoord -* -* @brief -* Interface function stub of ComputeFmaskAddrFromCoord. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord( - const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_ASSERT(pIn->numSamples > 1); - - if (pIn->numSamples > 1) - { - returnCode = HwlComputeFmaskAddrFromCoord(pIn, pOut); - } - else - { - returnCode = ADDR_INVALIDPARAMS; - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeFmaskCoordFromAddr -* -* @brief -* Interface function stub of ComputeFmaskAddrFromCoord. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr( - const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_ASSERT(pIn->numSamples > 1); - - if (pIn->numSamples > 1) - { - returnCode = HwlComputeFmaskCoordFromAddr(pIn, pOut); - } - else - { - returnCode = ADDR_INVALIDPARAMS; - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ConvertTileInfoToHW -* -* @brief -* Convert tile info from real value to HW register value in HW layer -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ConvertTileInfoToHW( - const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure - ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_INPUT)) || - (pOut->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_CONVERT_TILEINFOTOHW_INPUT input; - // if pIn->reverse is TRUE, indices are ignored - if (pIn->reverse == FALSE && UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, - input.macroModeIndex, input.pTileInfo); - - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - returnCode = HwlConvertTileInfoToHW(pIn, pOut); - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ConvertTileIndex -* -* @brief -* Convert tile index to tile mode/type/info -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ConvertTileIndex( - const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input structure - ADDR_CONVERT_TILEINDEX_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX_INPUT)) || - (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - - returnCode = HwlSetupTileCfg(pIn->bpp, pIn->tileIndex, pIn->macroModeIndex, - pOut->pTileInfo, &pOut->tileMode, &pOut->tileType); - - if (returnCode == ADDR_OK && pIn->tileInfoHw) - { - ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0}; - ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0}; - - hwInput.pTileInfo = pOut->pTileInfo; - hwInput.tileIndex = -1; - hwOutput.pTileInfo = pOut->pTileInfo; - - returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput); - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::GetMacroModeIndex -* -* @brief -* Get macro mode index based on input info -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::GetMacroModeIndex( - const ADDR_GET_MACROMODEINDEX_INPUT* pIn, ///< [in] input structure - ADDR_GET_MACROMODEINDEX_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags()) - { - if ((pIn->size != sizeof(ADDR_GET_MACROMODEINDEX_INPUT)) || - (pOut->size != sizeof(ADDR_GET_MACROMODEINDEX_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfo = {0}; - pOut->macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex, pIn->flags, pIn->bpp, - pIn->numFrags, &tileInfo); - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ConvertTileIndex1 -* -* @brief -* Convert tile index to tile mode/type/info -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ConvertTileIndex1( - const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, ///< [in] input structure - ADDR_CONVERT_TILEINDEX_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX1_INPUT)) || - (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_SURFACE_FLAGS flags = {{0}}; - - HwlComputeMacroModeIndex(pIn->tileIndex, flags, pIn->bpp, pIn->numSamples, - pOut->pTileInfo, &pOut->tileMode, &pOut->tileType); - - if (pIn->tileInfoHw) - { - ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0}; - ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0}; - - hwInput.pTileInfo = pOut->pTileInfo; - hwInput.tileIndex = -1; - hwOutput.pTileInfo = pOut->pTileInfo; - - returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput); - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::GetTileIndex -* -* @brief -* Get tile index from tile mode/type/info -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::GetTileIndex( - const ADDR_GET_TILEINDEX_INPUT* pIn, ///< [in] input structure - ADDR_GET_TILEINDEX_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_GET_TILEINDEX_INPUT)) || - (pOut->size != sizeof(ADDR_GET_TILEINDEX_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - returnCode = HwlGetTileIndex(pIn, pOut); - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::Thickness -* -* @brief -* Get tile mode thickness -* -* @return -* Tile mode thickness -**************************************************************************************************** -*/ -UINT_32 Lib::Thickness( - AddrTileMode tileMode) ///< [in] tile mode -{ - return ModeFlags[tileMode].thickness; -} - - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// CMASK/HTILE -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Lib::ComputeHtileInfo -* -* @brief -* Interface function stub of AddrComputeHtilenfo -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeHtileInfo( - const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE; - BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_HTILE_INFO_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); - - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - if (pIn->flags.tcCompatible) - { - const UINT_32 sliceSize = pIn->pitch * pIn->height * 4 / (8 * 8); - const UINT_32 align = HwlGetPipes(pIn->pTileInfo) * pIn->pTileInfo->banks * m_pipeInterleaveBytes; - - if (pIn->numSlices > 1) - { - const UINT_32 surfBytes = (sliceSize * pIn->numSlices); - - pOut->sliceSize = sliceSize; - pOut->htileBytes = pIn->flags.skipTcCompatSizeAlign ? - surfBytes : PowTwoAlign(surfBytes, align); - pOut->sliceInterleaved = ((sliceSize % align) != 0) ? TRUE : FALSE; - } - else - { - pOut->sliceSize = pIn->flags.skipTcCompatSizeAlign ? - sliceSize : PowTwoAlign(sliceSize, align); - pOut->htileBytes = pOut->sliceSize; - pOut->sliceInterleaved = FALSE; - } - - pOut->nextMipLevelCompressible = ((sliceSize % align) == 0) ? TRUE : FALSE; - - pOut->pitch = pIn->pitch; - pOut->height = pIn->height; - pOut->baseAlign = align; - pOut->macroWidth = 0; - pOut->macroHeight = 0; - pOut->bpp = 32; - } - else - { - pOut->bpp = ComputeHtileInfo(pIn->flags, - pIn->pitch, - pIn->height, - pIn->numSlices, - pIn->isLinear, - isWidth8, - isHeight8, - pIn->pTileInfo, - &pOut->pitch, - &pOut->height, - &pOut->htileBytes, - &pOut->macroWidth, - &pOut->macroHeight, - &pOut->sliceSize, - &pOut->baseAlign); - } - } - } - - ValidMetaBaseAlignments(pOut->baseAlign); - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeCmaskInfo -* -* @brief -* Interface function stub of AddrComputeCmaskInfo -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( - const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_INFO_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_CMASK_INFO_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_CMASK_INFO_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); - - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - returnCode = ComputeCmaskInfo(pIn->flags, - pIn->pitch, - pIn->height, - pIn->numSlices, - pIn->isLinear, - pIn->pTileInfo, - &pOut->pitch, - &pOut->height, - &pOut->cmaskBytes, - &pOut->macroWidth, - &pOut->macroHeight, - &pOut->sliceSize, - &pOut->baseAlign, - &pOut->blockMax); - } - } - - ValidMetaBaseAlignments(pOut->baseAlign); - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeDccInfo -* -* @brief -* Interface function to compute DCC key info -* -* @return -* return code of HwlComputeDccInfo -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeDccInfo( - const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE ret = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_DCCINFO_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT))) - { - ret = ADDR_PARAMSIZEMISMATCH; - } - } - - if (ret == ADDR_OK) - { - ADDR_COMPUTE_DCCINFO_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - - ret = HwlSetupTileCfg(input.bpp, input.tileIndex, input.macroModeIndex, - &input.tileInfo, &input.tileMode); - - pIn = &input; - } - - if (ret == ADDR_OK) - { - ret = HwlComputeDccInfo(pIn, pOut); - - ValidMetaBaseAlignments(pOut->dccRamBaseAlign); - } - } - - return ret; -} - -/** -**************************************************************************************************** -* Lib::ComputeHtileAddrFromCoord -* -* @brief -* Interface function stub of AddrComputeHtileAddrFromCoord -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord( - const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE; - BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); - - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - if (pIn->flags.tcCompatible) - { - HwlComputeHtileAddrFromCoord(pIn, pOut); - } - else - { - pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch, - pIn->height, - pIn->x, - pIn->y, - pIn->slice, - pIn->numSlices, - 1, - pIn->isLinear, - isWidth8, - isHeight8, - pIn->pTileInfo, - &pOut->bitPosition); - } - } - } - - return returnCode; - -} - -/** -**************************************************************************************************** -* Lib::ComputeHtileCoordFromAddr -* -* @brief -* Interface function stub of AddrComputeHtileCoordFromAddr -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr( - const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE; - BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); - - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - HwlComputeXmaskCoordFromAddr(pIn->addr, - pIn->bitPosition, - pIn->pitch, - pIn->height, - pIn->numSlices, - 1, - pIn->isLinear, - isWidth8, - isHeight8, - pIn->pTileInfo, - &pOut->x, - &pOut->y, - &pOut->slice); - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeCmaskAddrFromCoord -* -* @brief -* Interface function stub of AddrComputeCmaskAddrFromCoord -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord( - const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); - - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - if (pIn->flags.tcCompatible == TRUE) - { - returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut); - } - else - { - pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch, - pIn->height, - pIn->x, - pIn->y, - pIn->slice, - pIn->numSlices, - 2, - pIn->isLinear, - FALSE, //this is cmask, isWidth8 is not needed - FALSE, //this is cmask, isHeight8 is not needed - pIn->pTileInfo, - &pOut->bitPosition); - } - - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeCmaskCoordFromAddr -* -* @brief -* Interface function stub of AddrComputeCmaskCoordFromAddr -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr( - const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT)) || - (pOut->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - ADDR_TILEINFO tileInfoNull; - ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT input; - - if (UseTileIndex(pIn->tileIndex)) - { - input = *pIn; - // Use temp tile info for calcalation - input.pTileInfo = &tileInfoNull; - - returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); - - // Change the input structure - pIn = &input; - } - - if (returnCode == ADDR_OK) - { - HwlComputeXmaskCoordFromAddr(pIn->addr, - pIn->bitPosition, - pIn->pitch, - pIn->height, - pIn->numSlices, - 2, - pIn->isLinear, - FALSE, - FALSE, - pIn->pTileInfo, - &pOut->x, - &pOut->y, - &pOut->slice); - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeTileDataWidthAndHeight -* -* @brief -* Compute the squared cache shape for per-tile data (CMASK and HTILE) -* -* @return -* N/A -* -* @note -* MacroWidth and macroHeight are measured in pixels -**************************************************************************************************** -*/ -VOID Lib::ComputeTileDataWidthAndHeight( - UINT_32 bpp, ///< [in] bits per pixel - UINT_32 cacheBits, ///< [in] bits of cache - ADDR_TILEINFO* pTileInfo, ///< [in] Tile info - UINT_32* pMacroWidth, ///< [out] macro tile width - UINT_32* pMacroHeight ///< [out] macro tile height - ) const -{ - UINT_32 height = 1; - UINT_32 width = cacheBits / bpp; - UINT_32 pipes = HwlGetPipes(pTileInfo); - - // Double height until the macro-tile is close to square - // Height can only be doubled if width is even - - while ((width > height * 2 * pipes) && !(width & 1)) - { - width /= 2; - height *= 2; - } - - *pMacroWidth = 8 * width; - *pMacroHeight = 8 * height * pipes; - - // Note: The above iterative comptuation is equivalent to the following - // - //int log2_height = ((log2(cacheBits)-log2(bpp)-log2(pipes))/2); - //int macroHeight = pow2( 3+log2(pipes)+log2_height ); -} - -/** -**************************************************************************************************** -* Lib::HwlComputeTileDataWidthAndHeightLinear -* -* @brief -* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout -* -* @return -* N/A -* -* @note -* MacroWidth and macroHeight are measured in pixels -**************************************************************************************************** -*/ -VOID Lib::HwlComputeTileDataWidthAndHeightLinear( - UINT_32* pMacroWidth, ///< [out] macro tile width - UINT_32* pMacroHeight, ///< [out] macro tile height - UINT_32 bpp, ///< [in] bits per pixel - ADDR_TILEINFO* pTileInfo ///< [in] tile info - ) const -{ - ADDR_ASSERT(bpp != 4); // Cmask does not support linear layout prior to SI - *pMacroWidth = 8 * 512 / bpp; // Align width to 512-bit memory accesses - *pMacroHeight = 8 * m_pipes; // Align height to number of pipes -} - -/** -**************************************************************************************************** -* Lib::ComputeHtileInfo -* -* @brief -* Compute htile pitch,width, bytes per 2D slice -* -* @return -* Htile bpp i.e. How many bits for an 8x8 tile -* Also returns by output parameters: -* *Htile pitch, height, total size in bytes, macro-tile dimensions and slice size* -**************************************************************************************************** -*/ -UINT_32 Lib::ComputeHtileInfo( - ADDR_HTILE_FLAGS flags, ///< [in] htile flags - UINT_32 pitchIn, ///< [in] pitch input - UINT_32 heightIn, ///< [in] height input - UINT_32 numSlices, ///< [in] number of slices - BOOL_32 isLinear, ///< [in] if it is linear mode - BOOL_32 isWidth8, ///< [in] if htile block width is 8 - BOOL_32 isHeight8, ///< [in] if htile block height is 8 - ADDR_TILEINFO* pTileInfo, ///< [in] Tile info - UINT_32* pPitchOut, ///< [out] pitch output - UINT_32* pHeightOut, ///< [out] height output - UINT_64* pHtileBytes, ///< [out] bytes per 2D slice - UINT_32* pMacroWidth, ///< [out] macro-tile width in pixels - UINT_32* pMacroHeight, ///< [out] macro-tile width in pixels - UINT_64* pSliceSize, ///< [out] slice size in bytes - UINT_32* pBaseAlign ///< [out] base alignment - ) const -{ - - UINT_32 macroWidth; - UINT_32 macroHeight; - UINT_32 baseAlign; - UINT_64 surfBytes; - UINT_64 sliceBytes; - - numSlices = Max(1u, numSlices); - - const UINT_32 bpp = HwlComputeHtileBpp(isWidth8, isHeight8); - const UINT_32 cacheBits = HtileCacheBits; - - if (isLinear) - { - HwlComputeTileDataWidthAndHeightLinear(¯oWidth, - ¯oHeight, - bpp, - pTileInfo); - } - else - { - ComputeTileDataWidthAndHeight(bpp, - cacheBits, - pTileInfo, - ¯oWidth, - ¯oHeight); - } - - *pPitchOut = PowTwoAlign(pitchIn, macroWidth); - *pHeightOut = PowTwoAlign(heightIn, macroHeight); - - baseAlign = HwlComputeHtileBaseAlign(flags.tcCompatible, isLinear, pTileInfo); - - surfBytes = HwlComputeHtileBytes(*pPitchOut, - *pHeightOut, - bpp, - isLinear, - numSlices, - &sliceBytes, - baseAlign); - - *pHtileBytes = surfBytes; - - // - // Use SafeAssign since they are optional - // - SafeAssign(pMacroWidth, macroWidth); - - SafeAssign(pMacroHeight, macroHeight); - - SafeAssign(pSliceSize, sliceBytes); - - SafeAssign(pBaseAlign, baseAlign); - - return bpp; -} - -/** -**************************************************************************************************** -* Lib::ComputeCmaskBaseAlign -* -* @brief -* Compute cmask base alignment -* -* @return -* Cmask base alignment -**************************************************************************************************** -*/ -UINT_32 Lib::ComputeCmaskBaseAlign( - ADDR_CMASK_FLAGS flags, ///< [in] Cmask flags - ADDR_TILEINFO* pTileInfo ///< [in] Tile info - ) const -{ - UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo); - - if (flags.tcCompatible) - { - ADDR_ASSERT(pTileInfo != NULL); - if (pTileInfo) - { - baseAlign *= pTileInfo->banks; - } - } - - return baseAlign; -} - -/** -**************************************************************************************************** -* Lib::ComputeCmaskBytes -* -* @brief -* Compute cmask size in bytes -* -* @return -* Cmask size in bytes -**************************************************************************************************** -*/ -UINT_64 Lib::ComputeCmaskBytes( - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 numSlices ///< [in] number of slices - ) const -{ - return BITS_TO_BYTES(static_cast(pitch) * height * numSlices * CmaskElemBits) / - MicroTilePixels; -} - -/** -**************************************************************************************************** -* Lib::ComputeCmaskInfo -* -* @brief -* Compute cmask pitch,width, bytes per 2D slice -* -* @return -* BlockMax. Also by output parameters: Cmask pitch,height, total size in bytes, -* macro-tile dimensions -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( - ADDR_CMASK_FLAGS flags, ///< [in] cmask flags - UINT_32 pitchIn, ///< [in] pitch input - UINT_32 heightIn, ///< [in] height input - UINT_32 numSlices, ///< [in] number of slices - BOOL_32 isLinear, ///< [in] is linear mode - ADDR_TILEINFO* pTileInfo, ///< [in] Tile info - UINT_32* pPitchOut, ///< [out] pitch output - UINT_32* pHeightOut, ///< [out] height output - UINT_64* pCmaskBytes, ///< [out] bytes per 2D slice - UINT_32* pMacroWidth, ///< [out] macro-tile width in pixels - UINT_32* pMacroHeight, ///< [out] macro-tile width in pixels - UINT_64* pSliceSize, ///< [out] slice size in bytes - UINT_32* pBaseAlign, ///< [out] base alignment - UINT_32* pBlockMax ///< [out] block max == slice / 128 / 128 - 1 - ) const -{ - UINT_32 macroWidth; - UINT_32 macroHeight; - UINT_32 baseAlign; - UINT_64 surfBytes; - UINT_64 sliceBytes; - - numSlices = Max(1u, numSlices); - - const UINT_32 bpp = CmaskElemBits; - const UINT_32 cacheBits = CmaskCacheBits; - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (isLinear) - { - HwlComputeTileDataWidthAndHeightLinear(¯oWidth, - ¯oHeight, - bpp, - pTileInfo); - } - else - { - ComputeTileDataWidthAndHeight(bpp, - cacheBits, - pTileInfo, - ¯oWidth, - ¯oHeight); - } - - *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1); - *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1); - - - sliceBytes = ComputeCmaskBytes(*pPitchOut, - *pHeightOut, - 1); - - baseAlign = ComputeCmaskBaseAlign(flags, pTileInfo); - - while (sliceBytes % baseAlign) - { - *pHeightOut += macroHeight; - - sliceBytes = ComputeCmaskBytes(*pPitchOut, - *pHeightOut, - 1); - } - - surfBytes = sliceBytes * numSlices; - - *pCmaskBytes = surfBytes; - - // - // Use SafeAssign since they are optional - // - SafeAssign(pMacroWidth, macroWidth); - - SafeAssign(pMacroHeight, macroHeight); - - SafeAssign(pBaseAlign, baseAlign); - - SafeAssign(pSliceSize, sliceBytes); - - UINT_32 slice = (*pPitchOut) * (*pHeightOut); - UINT_32 blockMax = slice / 128 / 128 - 1; - -#if DEBUG - if (slice % (64*256) != 0) - { - ADDR_ASSERT_ALWAYS(); - } -#endif //DEBUG - - UINT_32 maxBlockMax = HwlGetMaxCmaskBlockMax(); - - if (blockMax > maxBlockMax) - { - blockMax = maxBlockMax; - returnCode = ADDR_INVALIDPARAMS; - } - - SafeAssign(pBlockMax, blockMax); - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::ComputeXmaskCoordYFromPipe -* -* @brief -* Compute the Y coord from pipe number for cmask/htile -* -* @return -* Y coordinate -* -**************************************************************************************************** -*/ -UINT_32 Lib::ComputeXmaskCoordYFromPipe( - UINT_32 pipe, ///< [in] pipe number - UINT_32 x ///< [in] x coordinate - ) const -{ - UINT_32 pipeBit0; - UINT_32 pipeBit1; - UINT_32 xBit0; - UINT_32 xBit1; - UINT_32 yBit0; - UINT_32 yBit1; - - UINT_32 y = 0; - - UINT_32 numPipes = m_pipes; // SI has its implementation - // - // Convert pipe + x to y coordinate. - // - switch (numPipes) - { - case 1: - // - // 1 pipe - // - // p0 = 0 - // - y = 0; - break; - case 2: - // - // 2 pipes - // - // p0 = x0 ^ y0 - // - // y0 = p0 ^ x0 - // - pipeBit0 = pipe & 0x1; - - xBit0 = x & 0x1; - - yBit0 = pipeBit0 ^ xBit0; - - y = yBit0; - break; - case 4: - // - // 4 pipes - // - // p0 = x1 ^ y0 - // p1 = x0 ^ y1 - // - // y0 = p0 ^ x1 - // y1 = p1 ^ x0 - // - pipeBit0 = pipe & 0x1; - pipeBit1 = (pipe & 0x2) >> 1; - - xBit0 = x & 0x1; - xBit1 = (x & 0x2) >> 1; - - yBit0 = pipeBit0 ^ xBit1; - yBit1 = pipeBit1 ^ xBit0; - - y = (yBit0 | - (yBit1 << 1)); - break; - case 8: - // - // 8 pipes - // - // r600 and r800 have different method - // - y = HwlComputeXmaskCoordYFrom8Pipe(pipe, x); - break; - default: - break; - } - return y; -} - -/** -**************************************************************************************************** -* Lib::HwlComputeXmaskCoordFromAddr -* -* @brief -* Compute the coord from an address of a cmask/htile -* -* @return -* N/A -* -* @note -* This method is reused by htile, so rename to Xmask -**************************************************************************************************** -*/ -VOID Lib::HwlComputeXmaskCoordFromAddr( - UINT_64 addr, ///< [in] address - UINT_32 bitPosition, ///< [in] bitPosition in a byte - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 numSlices, ///< [in] number of slices - UINT_32 factor, ///< [in] factor that indicates cmask or htile - BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout - BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value - BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value - ADDR_TILEINFO* pTileInfo, ///< [in] Tile info - UINT_32* pX, ///< [out] x coord - UINT_32* pY, ///< [out] y coord - UINT_32* pSlice ///< [out] slice index - ) const -{ - UINT_32 pipe; - UINT_32 numPipes; - UINT_32 numGroupBits; - (void)numGroupBits; - UINT_32 numPipeBits; - UINT_32 macroTilePitch; - UINT_32 macroTileHeight; - - UINT_64 bitAddr; - - UINT_32 microTileCoordY; - - UINT_32 elemBits; - - UINT_32 pitchAligned = pitch; - UINT_32 heightAligned = height; - UINT_64 totalBytes; - - UINT_64 elemOffset; - - UINT_64 macroIndex; - UINT_32 microIndex; - - UINT_64 macroNumber; - UINT_32 microNumber; - - UINT_32 macroX; - UINT_32 macroY; - UINT_32 macroZ; - - UINT_32 microX; - UINT_32 microY; - - UINT_32 tilesPerMacro; - UINT_32 macrosPerPitch; - UINT_32 macrosPerSlice; - - // - // Extract pipe. - // - numPipes = HwlGetPipes(pTileInfo); - pipe = ComputePipeFromAddr(addr, numPipes); - - // - // Compute the number of group and pipe bits. - // - numGroupBits = Log2(m_pipeInterleaveBytes); - numPipeBits = Log2(numPipes); - - UINT_32 groupBits = 8 * m_pipeInterleaveBytes; - UINT_32 pipes = numPipes; - - - // - // Compute the micro tile size, in bits. And macro tile pitch and height. - // - if (factor == 2) //CMASK - { - ADDR_CMASK_FLAGS flags = {{0}}; - - elemBits = CmaskElemBits; - - ComputeCmaskInfo(flags, - pitch, - height, - numSlices, - isLinear, - pTileInfo, - &pitchAligned, - &heightAligned, - &totalBytes, - ¯oTilePitch, - ¯oTileHeight); - } - else //HTILE - { - ADDR_HTILE_FLAGS flags = {{0}}; - - if (factor != 1) - { - factor = 1; - } - - elemBits = HwlComputeHtileBpp(isWidth8, isHeight8); - - ComputeHtileInfo(flags, - pitch, - height, - numSlices, - isLinear, - isWidth8, - isHeight8, - pTileInfo, - &pitchAligned, - &heightAligned, - &totalBytes, - ¯oTilePitch, - ¯oTileHeight); - } - - // Should use aligned dims - // - pitch = pitchAligned; - height = heightAligned; - - - // - // Convert byte address to bit address. - // - bitAddr = BYTES_TO_BITS(addr) + bitPosition; - - - // - // Remove pipe bits from address. - // - - bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits); - - - elemOffset = bitAddr / elemBits; - - tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits; - - macrosPerPitch = pitch / (macroTilePitch/factor); - macrosPerSlice = macrosPerPitch * height / macroTileHeight; - - macroIndex = elemOffset / factor / tilesPerMacro; - microIndex = static_cast(elemOffset % (tilesPerMacro * factor)); - - macroNumber = macroIndex * factor + microIndex % factor; - microNumber = microIndex / factor; - - macroX = static_cast((macroNumber % macrosPerPitch)); - macroY = static_cast((macroNumber % macrosPerSlice) / macrosPerPitch); - macroZ = static_cast((macroNumber / macrosPerSlice)); - - - microX = microNumber % (macroTilePitch / factor / MicroTileWidth); - microY = (microNumber / (macroTilePitch / factor / MicroTileHeight)); - - *pX = macroX * (macroTilePitch/factor) + microX * MicroTileWidth; - *pY = macroY * macroTileHeight + (microY * MicroTileHeight << numPipeBits); - *pSlice = macroZ; - - microTileCoordY = ComputeXmaskCoordYFromPipe(pipe, - *pX/MicroTileWidth); - - - // - // Assemble final coordinates. - // - *pY += microTileCoordY * MicroTileHeight; - -} - -/** -**************************************************************************************************** -* Lib::HwlComputeXmaskAddrFromCoord -* -* @brief -* Compute the address from an address of cmask (prior to si) -* -* @return -* Address in bytes -* -**************************************************************************************************** -*/ -UINT_64 Lib::HwlComputeXmaskAddrFromCoord( - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 x, ///< [in] x coord - UINT_32 y, ///< [in] y coord - UINT_32 slice, ///< [in] slice/depth index - UINT_32 numSlices, ///< [in] number of slices - UINT_32 factor, ///< [in] factor that indicates cmask(2) or htile(1) - BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout - BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value - BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value - ADDR_TILEINFO* pTileInfo, ///< [in] Tile info - UINT_32* pBitPosition ///< [out] bit position inside a byte - ) const -{ - UINT_64 addr; - UINT_32 numGroupBits; - UINT_32 numPipeBits; - UINT_32 newPitch = 0; - UINT_32 newHeight = 0; - UINT_64 sliceBytes = 0; - UINT_64 totalBytes = 0; - UINT_64 sliceOffset; - UINT_32 pipe; - UINT_32 macroTileWidth; - UINT_32 macroTileHeight; - UINT_32 macroTilesPerRow; - UINT_32 macroTileBytes; - UINT_32 macroTileIndexX; - UINT_32 macroTileIndexY; - UINT_64 macroTileOffset; - UINT_32 pixelBytesPerRow; - UINT_32 pixelOffsetX; - UINT_32 pixelOffsetY; - UINT_32 pixelOffset; - UINT_64 totalOffset; - UINT_64 offsetLo; - UINT_64 offsetHi; - UINT_64 groupMask; - - - UINT_32 elemBits = 0; - - UINT_32 numPipes = m_pipes; // This function is accessed prior to si only - - if (factor == 2) //CMASK - { - elemBits = CmaskElemBits; - - // For asics before SI, cmask is always tiled - isLinear = FALSE; - } - else //HTILE - { - if (factor != 1) // Fix compile warning - { - factor = 1; - } - - elemBits = HwlComputeHtileBpp(isWidth8, isHeight8); - } - - // - // Compute the number of group bits and pipe bits. - // - numGroupBits = Log2(m_pipeInterleaveBytes); - numPipeBits = Log2(numPipes); - - // - // Compute macro tile dimensions. - // - if (factor == 2) // CMASK - { - ADDR_CMASK_FLAGS flags = {{0}}; - - ComputeCmaskInfo(flags, - pitch, - height, - numSlices, - isLinear, - pTileInfo, - &newPitch, - &newHeight, - &totalBytes, - ¯oTileWidth, - ¯oTileHeight); - - sliceBytes = totalBytes / numSlices; - } - else // HTILE - { - ADDR_HTILE_FLAGS flags = {{0}}; - - ComputeHtileInfo(flags, - pitch, - height, - numSlices, - isLinear, - isWidth8, - isHeight8, - pTileInfo, - &newPitch, - &newHeight, - &totalBytes, - ¯oTileWidth, - ¯oTileHeight, - &sliceBytes); - } - - sliceOffset = slice * sliceBytes; - - // - // Get the pipe. Note that neither slice rotation nor pipe swizzling apply for CMASK. - // - pipe = ComputePipeFromCoord(x, - y, - 0, - ADDR_TM_2D_TILED_THIN1, - 0, - FALSE, - pTileInfo); - - // - // Compute the number of macro tiles per row. - // - macroTilesPerRow = newPitch / macroTileWidth; - - // - // Compute the number of bytes per macro tile. - // - macroTileBytes = BITS_TO_BYTES((macroTileWidth * macroTileHeight * elemBits) / MicroTilePixels); - - // - // Compute the offset to the macro tile containing the specified coordinate. - // - macroTileIndexX = x / macroTileWidth; - macroTileIndexY = y / macroTileHeight; - macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes; - - // - // Compute the pixel offset within the macro tile. - // - pixelBytesPerRow = BITS_TO_BYTES(macroTileWidth * elemBits) / MicroTileWidth; - - // - // The nibbles are interleaved (see below), so the part of the offset relative to the x - // coordinate repeats halfway across the row. (Not for HTILE) - // - if (factor == 2) - { - pixelOffsetX = (x % (macroTileWidth / 2)) / MicroTileWidth; - } - else - { - pixelOffsetX = (x % (macroTileWidth)) / MicroTileWidth * BITS_TO_BYTES(elemBits); - } - - // - // Compute the y offset within the macro tile. - // - pixelOffsetY = (((y % macroTileHeight) / MicroTileHeight) / numPipes) * pixelBytesPerRow; - - pixelOffset = pixelOffsetX + pixelOffsetY; - - // - // Combine the slice offset and macro tile offset with the pixel offset, accounting for the - // pipe bits in the middle of the address. - // - totalOffset = ((sliceOffset + macroTileOffset) >> numPipeBits) + pixelOffset; - - // - // Split the offset to put some bits below the pipe bits and some above. - // - groupMask = (1 << numGroupBits) - 1; - offsetLo = totalOffset & groupMask; - offsetHi = (totalOffset & ~groupMask) << numPipeBits; - - // - // Assemble the address from its components. - // - addr = offsetLo; - addr |= offsetHi; - // This is to remove warning with /analyze option - UINT_32 pipeBits = pipe << numGroupBits; - addr |= pipeBits; - - // - // Compute the bit position. The lower nibble is used when the x coordinate within the macro - // tile is less than half of the macro tile width, and the upper nibble is used when the x - // coordinate within the macro tile is greater than or equal to half the macro tile width. - // - *pBitPosition = ((x % macroTileWidth) < (macroTileWidth / factor)) ? 0 : 4; - - return addr; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Surface Addressing Shared -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Lib::ComputeSurfaceAddrFromCoordLinear -* -* @brief -* Compute address from coord for linear surface -* -* @return -* Address in bytes -* -**************************************************************************************************** -*/ -UINT_64 Lib::ComputeSurfaceAddrFromCoordLinear( - UINT_32 x, ///< [in] x coord - UINT_32 y, ///< [in] y coord - UINT_32 slice, ///< [in] slice/depth index - UINT_32 sample, ///< [in] sample index - UINT_32 bpp, ///< [in] bits per pixel - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 numSlices, ///< [in] number of slices - UINT_32* pBitPosition ///< [out] bit position inside a byte - ) const -{ - const UINT_64 sliceSize = static_cast(pitch) * height; - - UINT_64 sliceOffset = (slice + sample * numSlices)* sliceSize; - UINT_64 rowOffset = static_cast(y) * pitch; - UINT_64 pixOffset = x; - - UINT_64 addr = (sliceOffset + rowOffset + pixOffset) * bpp; - - *pBitPosition = static_cast(addr % 8); - addr /= 8; - - return addr; -} - -/** -**************************************************************************************************** -* Lib::ComputeSurfaceCoordFromAddrLinear -* -* @brief -* Compute the coord from an address of a linear surface -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID Lib::ComputeSurfaceCoordFromAddrLinear( - UINT_64 addr, ///< [in] address - UINT_32 bitPosition, ///< [in] bitPosition in a byte - UINT_32 bpp, ///< [in] bits per pixel - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 numSlices, ///< [in] number of slices - UINT_32* pX, ///< [out] x coord - UINT_32* pY, ///< [out] y coord - UINT_32* pSlice, ///< [out] slice/depth index - UINT_32* pSample ///< [out] sample index - ) const -{ - const UINT_64 sliceSize = static_cast(pitch) * height; - const UINT_64 linearOffset = (BYTES_TO_BITS(addr) + bitPosition) / bpp; - - *pX = static_cast((linearOffset % sliceSize) % pitch); - *pY = static_cast((linearOffset % sliceSize) / pitch % height); - *pSlice = static_cast((linearOffset / sliceSize) % numSlices); - *pSample = static_cast((linearOffset / sliceSize) / numSlices); -} - -/** -**************************************************************************************************** -* Lib::ComputeSurfaceCoordFromAddrMicroTiled -* -* @brief -* Compute the coord from an address of a micro tiled surface -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID Lib::ComputeSurfaceCoordFromAddrMicroTiled( - UINT_64 addr, ///< [in] address - UINT_32 bitPosition, ///< [in] bitPosition in a byte - UINT_32 bpp, ///< [in] bits per pixel - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 numSamples, ///< [in] number of samples - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 tileBase, ///< [in] base offset within a tile - UINT_32 compBits, ///< [in] component bits actually needed(for planar surface) - UINT_32* pX, ///< [out] x coord - UINT_32* pY, ///< [out] y coord - UINT_32* pSlice, ///< [out] slice/depth index - UINT_32* pSample, ///< [out] sample index, - AddrTileType microTileType, ///< [in] micro tiling order - BOOL_32 isDepthSampleOrder ///< [in] TRUE if in depth sample order - ) const -{ - UINT_64 bitAddr; - UINT_32 microTileThickness; - UINT_32 microTileBits; - UINT_64 sliceBits; - UINT_64 rowBits; - UINT_32 sliceIndex; - UINT_32 microTileCoordX; - UINT_32 microTileCoordY; - UINT_32 pixelOffset; - UINT_32 pixelCoordX = 0; - UINT_32 pixelCoordY = 0; - UINT_32 pixelCoordZ = 0; - UINT_32 pixelCoordS = 0; - - // - // Convert byte address to bit address. - // - bitAddr = BYTES_TO_BITS(addr) + bitPosition; - - // - // Compute the micro tile size, in bits. - // - switch (tileMode) - { - case ADDR_TM_1D_TILED_THICK: - microTileThickness = ThickTileThickness; - break; - default: - microTileThickness = 1; - break; - } - - microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples; - - // - // Compute number of bits per slice and number of bits per row of micro tiles. - // - sliceBits = static_cast(pitch) * height * microTileThickness * bpp * numSamples; - - rowBits = (pitch / MicroTileWidth) * microTileBits; - - // - // Extract the slice index. - // - sliceIndex = static_cast(bitAddr / sliceBits); - bitAddr -= sliceIndex * sliceBits; - - // - // Extract the y coordinate of the micro tile. - // - microTileCoordY = static_cast(bitAddr / rowBits) * MicroTileHeight; - bitAddr -= (microTileCoordY / MicroTileHeight) * rowBits; - - // - // Extract the x coordinate of the micro tile. - // - microTileCoordX = static_cast(bitAddr / microTileBits) * MicroTileWidth; - - // - // Compute the pixel offset within the micro tile. - // - pixelOffset = static_cast(bitAddr % microTileBits); - - // - // Extract pixel coordinates from the offset. - // - HwlComputePixelCoordFromOffset(pixelOffset, - bpp, - numSamples, - tileMode, - tileBase, - compBits, - &pixelCoordX, - &pixelCoordY, - &pixelCoordZ, - &pixelCoordS, - microTileType, - isDepthSampleOrder); - - // - // Assemble final coordinates. - // - *pX = microTileCoordX + pixelCoordX; - *pY = microTileCoordY + pixelCoordY; - *pSlice = (sliceIndex * microTileThickness) + pixelCoordZ; - *pSample = pixelCoordS; - - if (microTileThickness > 1) - { - *pSample = 0; - } -} - -/** -**************************************************************************************************** -* Lib::ComputePipeFromAddr -* -* @brief -* Compute the pipe number from an address -* -* @return -* Pipe number -* -**************************************************************************************************** -*/ -UINT_32 Lib::ComputePipeFromAddr( - UINT_64 addr, ///< [in] address - UINT_32 numPipes ///< [in] number of banks - ) const -{ - UINT_32 pipe; - - UINT_32 groupBytes = m_pipeInterleaveBytes; //just different terms - - // R600 - // The LSBs of the address are arranged as follows: - // bank | pipe | group - // - // To get the pipe number, shift off the group bits and mask the pipe bits. - // - - // R800 - // The LSBs of the address are arranged as follows: - // bank | bankInterleave | pipe | pipeInterleave - // - // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits. - // - - pipe = static_cast(addr >> Log2(groupBytes)) & (numPipes - 1); - - return pipe; -} - -/** -**************************************************************************************************** -* Lib::ComputeMicroTileEquation -* -* @brief -* Compute micro tile equation -* -* @return -* If equation can be computed -* -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputeMicroTileEquation( - UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel - AddrTileMode tileMode, ///< [in] tile mode - AddrTileType microTileType, ///< [in] pixel order in display/non-display mode - ADDR_EQUATION* pEquation ///< [out] equation - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - for (UINT_32 i = 0; i < log2BytesPP; i++) - { - pEquation->addr[i].valid = 1; - pEquation->addr[i].channel = 0; - pEquation->addr[i].index = i; - } - - ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[log2BytesPP]; - - ADDR_CHANNEL_SETTING x0 = InitChannel(1, 0, log2BytesPP + 0); - ADDR_CHANNEL_SETTING x1 = InitChannel(1, 0, log2BytesPP + 1); - ADDR_CHANNEL_SETTING x2 = InitChannel(1, 0, log2BytesPP + 2); - ADDR_CHANNEL_SETTING y0 = InitChannel(1, 1, 0); - ADDR_CHANNEL_SETTING y1 = InitChannel(1, 1, 1); - ADDR_CHANNEL_SETTING y2 = InitChannel(1, 1, 2); - ADDR_CHANNEL_SETTING z0 = InitChannel(1, 2, 0); - ADDR_CHANNEL_SETTING z1 = InitChannel(1, 2, 1); - ADDR_CHANNEL_SETTING z2 = InitChannel(1, 2, 2); - - UINT_32 thickness = Thickness(tileMode); - UINT_32 bpp = 1 << (log2BytesPP + 3); - - if (microTileType != ADDR_THICK) - { - if (microTileType == ADDR_DISPLAYABLE) - { - switch (bpp) - { - case 8: - pixelBit[0] = x0; - pixelBit[1] = x1; - pixelBit[2] = x2; - pixelBit[3] = y1; - pixelBit[4] = y0; - pixelBit[5] = y2; - break; - case 16: - pixelBit[0] = x0; - pixelBit[1] = x1; - pixelBit[2] = x2; - pixelBit[3] = y0; - pixelBit[4] = y1; - pixelBit[5] = y2; - break; - case 32: - pixelBit[0] = x0; - pixelBit[1] = x1; - pixelBit[2] = y0; - pixelBit[3] = x2; - pixelBit[4] = y1; - pixelBit[5] = y2; - break; - case 64: - pixelBit[0] = x0; - pixelBit[1] = y0; - pixelBit[2] = x1; - pixelBit[3] = x2; - pixelBit[4] = y1; - pixelBit[5] = y2; - break; - case 128: - pixelBit[0] = y0; - pixelBit[1] = x0; - pixelBit[2] = x1; - pixelBit[3] = x2; - pixelBit[4] = y1; - pixelBit[5] = y2; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - } - else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER) - { - pixelBit[0] = x0; - pixelBit[1] = y0; - pixelBit[2] = x1; - pixelBit[3] = y1; - pixelBit[4] = x2; - pixelBit[5] = y2; - } - else if (microTileType == ADDR_ROTATED) - { - ADDR_ASSERT(thickness == 1); - - switch (bpp) - { - case 8: - pixelBit[0] = y0; - pixelBit[1] = y1; - pixelBit[2] = y2; - pixelBit[3] = x1; - pixelBit[4] = x0; - pixelBit[5] = x2; - break; - case 16: - pixelBit[0] = y0; - pixelBit[1] = y1; - pixelBit[2] = y2; - pixelBit[3] = x0; - pixelBit[4] = x1; - pixelBit[5] = x2; - break; - case 32: - pixelBit[0] = y0; - pixelBit[1] = y1; - pixelBit[2] = x0; - pixelBit[3] = y2; - pixelBit[4] = x1; - pixelBit[5] = x2; - break; - case 64: - pixelBit[0] = y0; - pixelBit[1] = x0; - pixelBit[2] = y1; - pixelBit[3] = x1; - pixelBit[4] = x2; - pixelBit[5] = y2; - break; - default: - retCode = ADDR_NOTSUPPORTED; - break; - } - } - - if (thickness > 1) - { - pixelBit[6] = z0; - pixelBit[7] = z1; - pEquation->numBits = 8 + log2BytesPP; - } - else - { - pEquation->numBits = 6 + log2BytesPP; - } - } - else // ADDR_THICK - { - ADDR_ASSERT(thickness > 1); - - switch (bpp) - { - case 8: - case 16: - pixelBit[0] = x0; - pixelBit[1] = y0; - pixelBit[2] = x1; - pixelBit[3] = y1; - pixelBit[4] = z0; - pixelBit[5] = z1; - break; - case 32: - pixelBit[0] = x0; - pixelBit[1] = y0; - pixelBit[2] = x1; - pixelBit[3] = z0; - pixelBit[4] = y1; - pixelBit[5] = z1; - break; - case 64: - case 128: - pixelBit[0] = x0; - pixelBit[1] = y0; - pixelBit[2] = z0; - pixelBit[3] = x1; - pixelBit[4] = y1; - pixelBit[5] = z1; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - pixelBit[6] = x2; - pixelBit[7] = y2; - pEquation->numBits = 8 + log2BytesPP; - } - - if (thickness == 8) - { - pixelBit[8] = z2; - pEquation->numBits = 9 + log2BytesPP; - } - - // stackedDepthSlices is used for addressing mode that a tile block contains multiple slices, - // which is not supported by our address lib - pEquation->stackedDepthSlices = FALSE; - - return retCode; -} - -/** -**************************************************************************************************** -* Lib::ComputePixelIndexWithinMicroTile -* -* @brief -* Compute the pixel index inside a micro tile of surface -* -* @return -* Pixel index -* -**************************************************************************************************** -*/ -UINT_32 Lib::ComputePixelIndexWithinMicroTile( - UINT_32 x, ///< [in] x coord - UINT_32 y, ///< [in] y coord - UINT_32 z, ///< [in] slice/depth index - UINT_32 bpp, ///< [in] bits per pixel - AddrTileMode tileMode, ///< [in] tile mode - AddrTileType microTileType ///< [in] pixel order in display/non-display mode - ) const -{ - UINT_32 pixelBit0 = 0; - UINT_32 pixelBit1 = 0; - UINT_32 pixelBit2 = 0; - UINT_32 pixelBit3 = 0; - UINT_32 pixelBit4 = 0; - UINT_32 pixelBit5 = 0; - UINT_32 pixelBit6 = 0; - UINT_32 pixelBit7 = 0; - UINT_32 pixelBit8 = 0; - UINT_32 pixelNumber; - - UINT_32 x0 = _BIT(x, 0); - UINT_32 x1 = _BIT(x, 1); - UINT_32 x2 = _BIT(x, 2); - UINT_32 y0 = _BIT(y, 0); - UINT_32 y1 = _BIT(y, 1); - UINT_32 y2 = _BIT(y, 2); - UINT_32 z0 = _BIT(z, 0); - UINT_32 z1 = _BIT(z, 1); - UINT_32 z2 = _BIT(z, 2); - - UINT_32 thickness = Thickness(tileMode); - - // Compute the pixel number within the micro tile. - - if (microTileType != ADDR_THICK) - { - if (microTileType == ADDR_DISPLAYABLE) - { - switch (bpp) - { - case 8: - pixelBit0 = x0; - pixelBit1 = x1; - pixelBit2 = x2; - pixelBit3 = y1; - pixelBit4 = y0; - pixelBit5 = y2; - break; - case 16: - pixelBit0 = x0; - pixelBit1 = x1; - pixelBit2 = x2; - pixelBit3 = y0; - pixelBit4 = y1; - pixelBit5 = y2; - break; - case 32: - pixelBit0 = x0; - pixelBit1 = x1; - pixelBit2 = y0; - pixelBit3 = x2; - pixelBit4 = y1; - pixelBit5 = y2; - break; - case 64: - pixelBit0 = x0; - pixelBit1 = y0; - pixelBit2 = x1; - pixelBit3 = x2; - pixelBit4 = y1; - pixelBit5 = y2; - break; - case 128: - pixelBit0 = y0; - pixelBit1 = x0; - pixelBit2 = x1; - pixelBit3 = x2; - pixelBit4 = y1; - pixelBit5 = y2; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - } - else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER) - { - pixelBit0 = x0; - pixelBit1 = y0; - pixelBit2 = x1; - pixelBit3 = y1; - pixelBit4 = x2; - pixelBit5 = y2; - } - else if (microTileType == ADDR_ROTATED) - { - ADDR_ASSERT(thickness == 1); - - switch (bpp) - { - case 8: - pixelBit0 = y0; - pixelBit1 = y1; - pixelBit2 = y2; - pixelBit3 = x1; - pixelBit4 = x0; - pixelBit5 = x2; - break; - case 16: - pixelBit0 = y0; - pixelBit1 = y1; - pixelBit2 = y2; - pixelBit3 = x0; - pixelBit4 = x1; - pixelBit5 = x2; - break; - case 32: - pixelBit0 = y0; - pixelBit1 = y1; - pixelBit2 = x0; - pixelBit3 = y2; - pixelBit4 = x1; - pixelBit5 = x2; - break; - case 64: - pixelBit0 = y0; - pixelBit1 = x0; - pixelBit2 = y1; - pixelBit3 = x1; - pixelBit4 = x2; - pixelBit5 = y2; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - } - - if (thickness > 1) - { - pixelBit6 = z0; - pixelBit7 = z1; - } - } - else // ADDR_THICK - { - ADDR_ASSERT(thickness > 1); - - switch (bpp) - { - case 8: - case 16: - pixelBit0 = x0; - pixelBit1 = y0; - pixelBit2 = x1; - pixelBit3 = y1; - pixelBit4 = z0; - pixelBit5 = z1; - break; - case 32: - pixelBit0 = x0; - pixelBit1 = y0; - pixelBit2 = x1; - pixelBit3 = z0; - pixelBit4 = y1; - pixelBit5 = z1; - break; - case 64: - case 128: - pixelBit0 = x0; - pixelBit1 = y0; - pixelBit2 = z0; - pixelBit3 = x1; - pixelBit4 = y1; - pixelBit5 = z1; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - pixelBit6 = x2; - pixelBit7 = y2; - } - - if (thickness == 8) - { - pixelBit8 = z2; - } - - pixelNumber = ((pixelBit0 ) | - (pixelBit1 << 1) | - (pixelBit2 << 2) | - (pixelBit3 << 3) | - (pixelBit4 << 4) | - (pixelBit5 << 5) | - (pixelBit6 << 6) | - (pixelBit7 << 7) | - (pixelBit8 << 8)); - - return pixelNumber; -} - -/** -**************************************************************************************************** -* Lib::AdjustPitchAlignment -* -* @brief -* Adjusts pitch alignment for flipping surface -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID Lib::AdjustPitchAlignment( - ADDR_SURFACE_FLAGS flags, ///< [in] Surface flags - UINT_32* pPitchAlign ///< [out] Pointer to pitch alignment - ) const -{ - // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO which means 32 pixel alignment - // Maybe it will be fixed in future but let's make it general for now. - if (flags.display || flags.overlay) - { - *pPitchAlign = PowTwoAlign(*pPitchAlign, 32); - - if(flags.display) - { - *pPitchAlign = Max(m_minPitchAlignPixels, *pPitchAlign); - } - } -} - -/** -**************************************************************************************************** -* Lib::PadDimensions -* -* @brief -* Helper function to pad dimensions -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID Lib::PadDimensions( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 numSamples, ///< [in] number of samples - ADDR_TILEINFO* pTileInfo, ///< [in,out] bank structure. - UINT_32 padDims, ///< [in] Dimensions to pad valid value 1,2,3 - UINT_32 mipLevel, ///< [in] MipLevel - UINT_32* pPitch, ///< [in,out] pitch in pixels - UINT_32* pPitchAlign, ///< [in,out] pitch align could be changed in HwlPadDimensions - UINT_32* pHeight, ///< [in,out] height in pixels - UINT_32 heightAlign, ///< [in] height alignment - UINT_32* pSlices, ///< [in,out] number of slices - UINT_32 sliceAlign ///< [in] number of slice alignment - ) const -{ - UINT_32 pitchAlign = *pPitchAlign; - UINT_32 thickness = Thickness(tileMode); - - ADDR_ASSERT(padDims <= 3); - - // - // Override padding for mip levels - // - if (mipLevel > 0) - { - if (flags.cube) - { - // for cubemap, we only pad when client call with 6 faces as an identity - if (*pSlices > 1) - { - padDims = 3; // we should pad cubemap sub levels when we treat it as 3d texture - } - else - { - padDims = 2; - } - } - } - - // Any possibilities that padDims is 0? - if (padDims == 0) - { - padDims = 3; - } - - if (IsPow2(pitchAlign)) - { - *pPitch = PowTwoAlign((*pPitch), pitchAlign); - } - else // add this code to pass unit test, r600 linear mode is not align bpp to pow2 for linear - { - *pPitch += pitchAlign - 1; - *pPitch /= pitchAlign; - *pPitch *= pitchAlign; - } - - if (padDims > 1) - { - if (IsPow2(heightAlign)) - { - *pHeight = PowTwoAlign((*pHeight), heightAlign); - } - else - { - *pHeight += heightAlign - 1; - *pHeight /= heightAlign; - *pHeight *= heightAlign; - } - } - - if (padDims > 2 || thickness > 1) - { - // for cubemap single face, we do not pad slices. - // if we pad it, the slice number should be set to 6 and current mip level > 1 - if (flags.cube && (!m_configFlags.noCubeMipSlicesPad || flags.cubeAsArray)) - { - *pSlices = NextPow2(*pSlices); - } - - // normal 3D texture or arrays or cubemap has a thick mode? (Just pass unit test) - if (thickness > 1) - { - *pSlices = PowTwoAlign((*pSlices), sliceAlign); - } - - } - - HwlPadDimensions(tileMode, - bpp, - flags, - numSamples, - pTileInfo, - mipLevel, - pPitch, - pPitchAlign, - *pHeight, - heightAlign); -} - - -/** -**************************************************************************************************** -* Lib::HwlPreHandleBaseLvl3xPitch -* -* @brief -* Pre-handler of 3x pitch (96 bit) adjustment -* -* @return -* Expected pitch -**************************************************************************************************** -*/ -UINT_32 Lib::HwlPreHandleBaseLvl3xPitch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input - UINT_32 expPitch ///< [in] pitch - ) const -{ - ADDR_ASSERT(pIn->width == expPitch); - // - // If pitch is pre-multiplied by 3, we retrieve original one here to get correct miplevel size - // - if (ElemLib::IsExpand3x(pIn->format) && - pIn->mipLevel == 0 && - pIn->tileMode == ADDR_TM_LINEAR_ALIGNED) - { - expPitch /= 3; - expPitch = NextPow2(expPitch); - } - - return expPitch; -} - -/** -**************************************************************************************************** -* Lib::HwlPostHandleBaseLvl3xPitch -* -* @brief -* Post-handler of 3x pitch adjustment -* -* @return -* Expected pitch -**************************************************************************************************** -*/ -UINT_32 Lib::HwlPostHandleBaseLvl3xPitch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input - UINT_32 expPitch ///< [in] pitch - ) const -{ - // - // 96 bits surface of sub levels require element pitch of 32 bits instead - // So we just return pitch in 32 bit pixels without timing 3 - // - if (ElemLib::IsExpand3x(pIn->format) && - pIn->mipLevel == 0 && - pIn->tileMode == ADDR_TM_LINEAR_ALIGNED) - { - expPitch *= 3; - } - - return expPitch; -} - - -/** -**************************************************************************************************** -* Lib::IsMacroTiled -* -* @brief -* Check if the tile mode is macro tiled -* -* @return -* TRUE if it is macro tiled (2D/2B/3D/3B) -**************************************************************************************************** -*/ -BOOL_32 Lib::IsMacroTiled( - AddrTileMode tileMode) ///< [in] tile mode -{ - return ModeFlags[tileMode].isMacro; -} - -/** -**************************************************************************************************** -* Lib::IsMacro3dTiled -* -* @brief -* Check if the tile mode is 3D macro tiled -* -* @return -* TRUE if it is 3D macro tiled -**************************************************************************************************** -*/ -BOOL_32 Lib::IsMacro3dTiled( - AddrTileMode tileMode) ///< [in] tile mode -{ - return ModeFlags[tileMode].isMacro3d; -} - -/** -**************************************************************************************************** -* Lib::IsMicroTiled -* -* @brief -* Check if the tile mode is micro tiled -* -* @return -* TRUE if micro tiled -**************************************************************************************************** -*/ -BOOL_32 Lib::IsMicroTiled( - AddrTileMode tileMode) ///< [in] tile mode -{ - return ModeFlags[tileMode].isMicro; -} - -/** -**************************************************************************************************** -* Lib::IsLinear -* -* @brief -* Check if the tile mode is linear -* -* @return -* TRUE if linear -**************************************************************************************************** -*/ -BOOL_32 Lib::IsLinear( - AddrTileMode tileMode) ///< [in] tile mode -{ - return ModeFlags[tileMode].isLinear; -} - -/** -**************************************************************************************************** -* Lib::IsPrtNoRotationTileMode -* -* @brief -* Return TRUE if it is prt tile without rotation -* @note -* This function just used by CI -**************************************************************************************************** -*/ -BOOL_32 Lib::IsPrtNoRotationTileMode( - AddrTileMode tileMode) -{ - return ModeFlags[tileMode].isPrtNoRotation; -} - -/** -**************************************************************************************************** -* Lib::IsPrtTileMode -* -* @brief -* Return TRUE if it is prt tile -* @note -* This function just used by CI -**************************************************************************************************** -*/ -BOOL_32 Lib::IsPrtTileMode( - AddrTileMode tileMode) -{ - return ModeFlags[tileMode].isPrt; -} - -/** -**************************************************************************************************** -* Lib::ComputeMipLevel -* -* @brief -* Compute mipmap level width/height/slices -* @return -* N/A -**************************************************************************************************** -*/ -VOID Lib::ComputeMipLevel( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure - ) const -{ - // Check if HWL has handled - BOOL_32 hwlHandled = FALSE; - (void)hwlHandled; - - if (ElemLib::IsBlockCompressed(pIn->format)) - { - if (pIn->mipLevel == 0) - { - // DXTn's level 0 must be multiple of 4 - // But there are exceptions: - // 1. Internal surface creation in hostblt/vsblt/etc... - // 2. Runtime doesn't reject ATI1/ATI2 whose width/height are not multiple of 4 - pIn->width = PowTwoAlign(pIn->width, 4); - pIn->height = PowTwoAlign(pIn->height, 4); - } - } - - hwlHandled = HwlComputeMipLevel(pIn); -} - -/** -**************************************************************************************************** -* Lib::DegradeTo1D -* -* @brief -* Check if surface can be degraded to 1D -* @return -* TRUE if degraded -**************************************************************************************************** -*/ -BOOL_32 Lib::DegradeTo1D( - UINT_32 width, ///< surface width - UINT_32 height, ///< surface height - UINT_32 macroTilePitchAlign, ///< macro tile pitch align - UINT_32 macroTileHeightAlign ///< macro tile height align - ) -{ - BOOL_32 degrade = ((width < macroTilePitchAlign) || (height < macroTileHeightAlign)); - - // Check whether 2D tiling still has too much footprint - if (degrade == FALSE) - { - // Only check width and height as slices are aligned to thickness - UINT_64 unalignedSize = width * height; - - UINT_32 alignedPitch = PowTwoAlign(width, macroTilePitchAlign); - UINT_32 alignedHeight = PowTwoAlign(height, macroTileHeightAlign); - UINT_64 alignedSize = alignedPitch * alignedHeight; - - // alignedSize > 1.5 * unalignedSize - if (2 * alignedSize > 3 * unalignedSize) - { - degrade = TRUE; - } - } - - return degrade; -} - -/** -**************************************************************************************************** -* Lib::OptimizeTileMode -* -* @brief -* Check if base level's tile mode can be optimized (degraded) -* @return -* N/A -**************************************************************************************************** -*/ -VOID Lib::OptimizeTileMode( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in, out] structure for surface info - ) const -{ - AddrTileMode tileMode = pInOut->tileMode; - - BOOL_32 doOpt = (pInOut->flags.opt4Space == TRUE) || - (pInOut->flags.minimizeAlignment == TRUE) || - (pInOut->maxBaseAlign != 0); - - BOOL_32 convertToPrt = FALSE; - - // Optimization can only be done on level 0 and samples <= 1 - if ((doOpt == TRUE) && - (pInOut->mipLevel == 0) && - (IsPrtTileMode(tileMode) == FALSE) && - (pInOut->flags.prt == FALSE)) - { - UINT_32 width = pInOut->width; - UINT_32 height = pInOut->height; - UINT_32 thickness = Thickness(tileMode); - BOOL_32 macroTiledOK = TRUE; - UINT_32 macroWidthAlign = 0; - UINT_32 macroHeightAlign = 0; - UINT_32 macroSizeAlign = 0; - - if (IsMacroTiled(tileMode)) - { - macroTiledOK = HwlGetAlignmentInfoMacroTiled(pInOut, - ¯oWidthAlign, - ¯oHeightAlign, - ¯oSizeAlign); - } - - if (macroTiledOK) - { - if ((pInOut->flags.display == FALSE) && - (pInOut->flags.opt4Space == TRUE) && - (pInOut->numSamples <= 1)) - { - // Check if linear mode is optimal - if ((pInOut->height == 1) && - (IsLinear(tileMode) == FALSE) && - (ElemLib::IsBlockCompressed(pInOut->format) == FALSE) && - (pInOut->flags.depth == FALSE) && - (pInOut->flags.stencil == FALSE) && - (m_configFlags.disableLinearOpt == FALSE) && - (pInOut->flags.disableLinearOpt == FALSE)) - { - tileMode = ADDR_TM_LINEAR_ALIGNED; - } - else if (IsMacroTiled(tileMode) && (pInOut->flags.tcCompatible == FALSE)) - { - if (DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign)) - { - tileMode = (thickness == 1) ? - ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; - } - else if ((thickness > 1) && (pInOut->flags.disallowLargeThickDegrade == 0)) - { - // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to - // thinner modes, we should re-evaluate whether the corresponding - // thinner modes should be degraded. If so, we choose 1D thick mode instead. - tileMode = DegradeLargeThickTile(pInOut->tileMode, pInOut->bpp); - - if (tileMode != pInOut->tileMode) - { - // Get thickness again after large thick degrade - thickness = Thickness(tileMode); - - ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pInOut; - input.tileMode = tileMode; - - macroTiledOK = HwlGetAlignmentInfoMacroTiled(&input, - ¯oWidthAlign, - ¯oHeightAlign, - ¯oSizeAlign); - - if (macroTiledOK && - DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign)) - { - tileMode = ADDR_TM_1D_TILED_THICK; - } - } - } - } - } - - if (macroTiledOK) - { - if ((pInOut->flags.minimizeAlignment == TRUE) && - (pInOut->numSamples <= 1) && - (IsMacroTiled(tileMode) == TRUE)) - { - UINT_32 macroSize = PowTwoAlign(width, macroWidthAlign) * - PowTwoAlign(height, macroHeightAlign); - UINT_32 microSize = PowTwoAlign(width, MicroTileWidth) * - PowTwoAlign(height, MicroTileHeight); - - if (macroSize > microSize) - { - tileMode = (thickness == 1) ? - ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; - } - } - - if ((pInOut->maxBaseAlign != 0) && - (IsMacroTiled(tileMode) == TRUE)) - { - if (macroSizeAlign > pInOut->maxBaseAlign) - { - if (pInOut->numSamples > 1) - { - ADDR_ASSERT(pInOut->maxBaseAlign >= Block64K); - - convertToPrt = TRUE; - } - else if (pInOut->maxBaseAlign < Block64K) - { - tileMode = (thickness == 1) ? - ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; - } - else - { - convertToPrt = TRUE; - } - } - } - } - } - } - - if (convertToPrt) - { - if ((pInOut->flags.matchStencilTileCfg == TRUE) && (pInOut->numSamples <= 1)) - { - pInOut->tileMode = ADDR_TM_1D_TILED_THIN1; - } - else - { - HwlSetPrtTileMode(pInOut); - } - } - else if (tileMode != pInOut->tileMode) - { - pInOut->tileMode = tileMode; - } - - HwlOptimizeTileMode(pInOut); -} - -/** -**************************************************************************************************** -* Lib::DegradeLargeThickTile -* -* @brief -* Check if the thickness needs to be reduced if a tile is too large -* @return -* The degraded tile mode (unchanged if not degraded) -**************************************************************************************************** -*/ -AddrTileMode Lib::DegradeLargeThickTile( - AddrTileMode tileMode, - UINT_32 bpp) const -{ - // Override tilemode - // When tile_width (8) * tile_height (8) * thickness * element_bytes is > row_size, - // it is better to just use THIN mode in this case - UINT_32 thickness = Thickness(tileMode); - - if (thickness > 1 && m_configFlags.allowLargeThickTile == 0) - { - UINT_32 tileSize = MicroTilePixels * thickness * (bpp >> 3); - - if (tileSize > m_rowSize) - { - switch (tileMode) - { - case ADDR_TM_2D_TILED_XTHICK: - if ((tileSize >> 1) <= m_rowSize) - { - tileMode = ADDR_TM_2D_TILED_THICK; - break; - } - // else fall through - case ADDR_TM_2D_TILED_THICK: - tileMode = ADDR_TM_2D_TILED_THIN1; - break; - - case ADDR_TM_3D_TILED_XTHICK: - if ((tileSize >> 1) <= m_rowSize) - { - tileMode = ADDR_TM_3D_TILED_THICK; - break; - } - // else fall through - case ADDR_TM_3D_TILED_THICK: - tileMode = ADDR_TM_3D_TILED_THIN1; - break; - - case ADDR_TM_PRT_TILED_THICK: - tileMode = ADDR_TM_PRT_TILED_THIN1; - break; - - case ADDR_TM_PRT_2D_TILED_THICK: - tileMode = ADDR_TM_PRT_2D_TILED_THIN1; - break; - - case ADDR_TM_PRT_3D_TILED_THICK: - tileMode = ADDR_TM_PRT_3D_TILED_THIN1; - break; - - default: - break; - } - } - } - - return tileMode; -} - -/** -**************************************************************************************************** -* Lib::PostComputeMipLevel -* @brief -* Compute MipLevel info (including level 0) after surface adjustment -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::PostComputeMipLevel( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in,out] Input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output structure - ) const -{ - // Mipmap including level 0 must be pow2 padded since either SI hw expects so or it is - // required by CFX for Hw Compatibility between NI and SI. Otherwise it is only needed for - // mipLevel > 0. Any h/w has different requirement should implement its own virtual function - - if (pIn->flags.pow2Pad) - { - pIn->width = NextPow2(pIn->width); - pIn->height = NextPow2(pIn->height); - pIn->numSlices = NextPow2(pIn->numSlices); - } - else if (pIn->mipLevel > 0) - { - pIn->width = NextPow2(pIn->width); - pIn->height = NextPow2(pIn->height); - - if (!pIn->flags.cube) - { - pIn->numSlices = NextPow2(pIn->numSlices); - } - - // for cubemap, we keep its value at first - } - - return ADDR_OK; -} - -/** -**************************************************************************************************** -* Lib::HwlSetupTileCfg -* -* @brief -* Map tile index to tile setting. -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::HwlSetupTileCfg( - UINT_32 bpp, ///< Bits per pixel - INT_32 index, ///< [in] Tile index - INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI) - ADDR_TILEINFO* pInfo, ///< [out] Tile Info - AddrTileMode* pMode, ///< [out] Tile mode - AddrTileType* pType ///< [out] Tile type - ) const -{ - return ADDR_NOTSUPPORTED; -} - -/** -**************************************************************************************************** -* Lib::HwlGetPipes -* -* @brief -* Get number pipes -* @return -* num pipes -**************************************************************************************************** -*/ -UINT_32 Lib::HwlGetPipes( - const ADDR_TILEINFO* pTileInfo ///< [in] Tile info - ) const -{ - //pTileInfo can be NULL when asic is 6xx and 8xx. - return m_pipes; -} - -/** -**************************************************************************************************** -* Lib::ComputeQbStereoInfo -* -* @brief -* Get quad buffer stereo information -* @return -* N/A -**************************************************************************************************** -*/ -VOID Lib::ComputeQbStereoInfo( - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] updated pOut+pStereoInfo - ) const -{ - ADDR_ASSERT(pOut->bpp >= 8); - ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0); - - // Save original height - pOut->pStereoInfo->eyeHeight = pOut->height; - - // Right offset - pOut->pStereoInfo->rightOffset = static_cast(pOut->surfSize); - - pOut->pStereoInfo->rightSwizzle = HwlComputeQbStereoRightSwizzle(pOut); - // Double height - pOut->height <<= 1; - pOut->pixelHeight <<= 1; - - // Double size - pOut->surfSize <<= 1; - - // Right start address meets the base align since it is guaranteed by AddrLib1 - - // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo. -} - - -/** -**************************************************************************************************** -* Lib::ComputePrtInfo -* -* @brief -* Compute prt surface related info -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::ComputePrtInfo( - const ADDR_PRT_INFO_INPUT* pIn, - ADDR_PRT_INFO_OUTPUT* pOut) const -{ - ADDR_ASSERT(pOut != NULL); - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - UINT_32 expandX = 1; - UINT_32 expandY = 1; - ElemMode elemMode; - - UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, - &elemMode, - &expandX, - &expandY); - - if (bpp <8 || bpp == 24 || bpp == 48 || bpp == 96) - { - returnCode = ADDR_INVALIDPARAMS; - } - - UINT_32 numFrags = pIn->numFrags; - ADDR_ASSERT(numFrags <= 8); - - UINT_32 tileWidth = 0; - UINT_32 tileHeight = 0; - if (returnCode == ADDR_OK) - { - // 3D texture without depth or 2d texture - if (pIn->baseMipDepth > 1 || pIn->baseMipHeight > 1) - { - if (bpp == 8) - { - tileWidth = 256; - tileHeight = 256; - } - else if (bpp == 16) - { - tileWidth = 256; - tileHeight = 128; - } - else if (bpp == 32) - { - tileWidth = 128; - tileHeight = 128; - } - else if (bpp == 64) - { - // assume it is BC1/4 - tileWidth = 512; - tileHeight = 256; - - if (elemMode == ADDR_UNCOMPRESSED) - { - tileWidth = 128; - tileHeight = 64; - } - } - else if (bpp == 128) - { - // assume it is BC2/3/5/6H/7 - tileWidth = 256; - tileHeight = 256; - - if (elemMode == ADDR_UNCOMPRESSED) - { - tileWidth = 64; - tileHeight = 64; - } - } - - if (numFrags == 2) - { - tileWidth = tileWidth / 2; - } - else if (numFrags == 4) - { - tileWidth = tileWidth / 2; - tileHeight = tileHeight / 2; - } - else if (numFrags == 8) - { - tileWidth = tileWidth / 4; - tileHeight = tileHeight / 2; - } - } - else // 1d - { - tileHeight = 1; - if (bpp == 8) - { - tileWidth = 65536; - } - else if (bpp == 16) - { - tileWidth = 32768; - } - else if (bpp == 32) - { - tileWidth = 16384; - } - else if (bpp == 64) - { - tileWidth = 8192; - } - else if (bpp == 128) - { - tileWidth = 4096; - } - } - } - - pOut->prtTileWidth = tileWidth; - pOut->prtTileHeight = tileHeight; - - return returnCode; -} - -} // V1 -} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib1.h mesa-19.0.1/src/amd/addrlib/core/addrlib1.h --- mesa-18.3.3/src/amd/addrlib/core/addrlib1.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrlib1.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,545 +0,0 @@ -/* - * Copyright © 2016 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrlib1.h -* @brief Contains the Addr::V1::Lib class definition. -**************************************************************************************************** -*/ - -#ifndef __ADDR_LIB1_H__ -#define __ADDR_LIB1_H__ - -#include "addrlib.h" - -namespace Addr -{ -namespace V1 -{ - -/** -**************************************************************************************************** -* @brief Neutral enums that define bank swap size -**************************************************************************************************** -*/ -enum SampleSplitSize -{ - ADDR_SAMPLESPLIT_1KB = 1024, - ADDR_SAMPLESPLIT_2KB = 2048, - ADDR_SAMPLESPLIT_4KB = 4096, - ADDR_SAMPLESPLIT_8KB = 8192, -}; - -/** -**************************************************************************************************** -* @brief Flags for AddrTileMode -**************************************************************************************************** -*/ -struct TileModeFlags -{ - UINT_32 thickness : 4; - UINT_32 isLinear : 1; - UINT_32 isMicro : 1; - UINT_32 isMacro : 1; - UINT_32 isMacro3d : 1; - UINT_32 isPrt : 1; - UINT_32 isPrtNoRotation : 1; - UINT_32 isBankSwapped : 1; -}; - -static const UINT_32 Block64K = 0x10000; -static const UINT_32 PrtTileSize = Block64K; - -/** -**************************************************************************************************** -* @brief This class contains asic independent address lib functionalities -**************************************************************************************************** -*/ -class Lib : public Addr::Lib -{ -public: - virtual ~Lib(); - - static Lib* GetLib( - ADDR_HANDLE hLib); - - /// Returns tileIndex support - BOOL_32 UseTileIndex(INT_32 index) const - { - return m_configFlags.useTileIndex && (index != TileIndexInvalid); - } - - /// Returns combined swizzle support - BOOL_32 UseCombinedSwizzle() const - { - return m_configFlags.useCombinedSwizzle; - } - - // - // Interface stubs - // - ADDR_E_RETURNCODE ComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord( - const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr( - const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSliceTileSwizzle( - const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, - ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ExtractBankPipeSwizzle( - const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, - ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE CombineBankPipeSwizzle( - const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, - ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeBaseSwizzle( - const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, - ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeFmaskInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); - - ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord( - const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr( - const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ConvertTileInfoToHW( - const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, - ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ConvertTileIndex( - const ADDR_CONVERT_TILEINDEX_INPUT* pIn, - ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE GetMacroModeIndex( - const ADDR_GET_MACROMODEINDEX_INPUT* pIn, - ADDR_GET_MACROMODEINDEX_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ConvertTileIndex1( - const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, - ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE GetTileIndex( - const ADDR_GET_TILEINDEX_INPUT* pIn, - ADDR_GET_TILEINDEX_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeHtileInfo( - const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, - ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeCmaskInfo( - const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, - ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeDccInfo( - const ADDR_COMPUTE_DCCINFO_INPUT* pIn, - ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeHtileAddrFromCoord( - const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord( - const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeHtileCoordFromAddr( - const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr( - const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputePrtInfo( - const ADDR_PRT_INFO_INPUT* pIn, - ADDR_PRT_INFO_OUTPUT* pOut) const; -protected: - Lib(); // Constructor is protected - Lib(const Client* pClient); - - /// Pure Virtual function for Hwl computing surface info - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; - - /// Pure Virtual function for Hwl computing surface address from coord - virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord( - const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0; - - /// Pure Virtual function for Hwl computing surface coord from address - virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr( - const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0; - - /// Pure Virtual function for Hwl computing surface tile swizzle - virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle( - const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, - ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0; - - /// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b - virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle( - const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, - ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0; - - /// Pure Virtual function for Hwl combining bank/pipe swizzle - virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle( - UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO* pTileInfo, - UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0; - - /// Pure Virtual function for Hwl computing base swizzle - virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle( - const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, - ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0; - - /// Pure Virtual function for Hwl computing HTILE base align - virtual UINT_32 HwlComputeHtileBaseAlign( - BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0; - - /// Pure Virtual function for Hwl computing HTILE bpp - virtual UINT_32 HwlComputeHtileBpp( - BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0; - - /// Pure Virtual function for Hwl computing HTILE bytes - virtual UINT_64 HwlComputeHtileBytes( - UINT_32 pitch, UINT_32 height, UINT_32 bpp, - BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0; - - /// Pure Virtual function for Hwl computing FMASK info - virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0; - - /// Pure Virtual function for Hwl FMASK address from coord - virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord( - const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0; - - /// Pure Virtual function for Hwl FMASK coord from address - virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr( - const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0; - - /// Pure Virtual function for Hwl convert tile info from real value to HW value - virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW( - const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, - ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0; - - /// Pure Virtual function for Hwl compute mipmap info - virtual BOOL_32 HwlComputeMipLevel( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0; - - /// Pure Virtual function for Hwl compute max cmask blockMax value - virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0; - - /// Pure Virtual function for Hwl compute fmask bits - virtual UINT_32 HwlComputeFmaskBits( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, - UINT_32* pNumSamples) const = 0; - - /// Virtual function to get index (not pure then no need to implement this in all hwls - virtual ADDR_E_RETURNCODE HwlGetTileIndex( - const ADDR_GET_TILEINDEX_INPUT* pIn, - ADDR_GET_TILEINDEX_OUTPUT* pOut) const - { - return ADDR_NOTSUPPORTED; - } - - /// Virtual function for Hwl to compute Dcc info - virtual ADDR_E_RETURNCODE HwlComputeDccInfo( - const ADDR_COMPUTE_DCCINFO_INPUT* pIn, - ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const - { - return ADDR_NOTSUPPORTED; - } - - /// Virtual function to get cmask address for tc compatible cmask - virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( - const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const - { - return ADDR_NOTSUPPORTED; - } - - /// Virtual function to get htile address for tc compatible htile - virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( - const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const - { - return ADDR_NOTSUPPORTED; - } - - // Compute attributes - - // HTILE - UINT_32 ComputeHtileInfo( - ADDR_HTILE_FLAGS flags, - UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, - BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, - ADDR_TILEINFO* pTileInfo, - UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes, - UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL, - UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const; - - // CMASK - ADDR_E_RETURNCODE ComputeCmaskInfo( - ADDR_CMASK_FLAGS flags, - UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear, - ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes, - UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL, - UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const; - - virtual VOID HwlComputeTileDataWidthAndHeightLinear( - UINT_32* pMacroWidth, UINT_32* pMacroHeight, - UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const; - - // CMASK & HTILE addressing - virtual UINT_64 HwlComputeXmaskAddrFromCoord( - UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, - UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, - BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo, - UINT_32* bitPosition) const; - - virtual VOID HwlComputeXmaskCoordFromAddr( - UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices, - UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, - ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const; - - // Surface mipmap - VOID ComputeMipLevel( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const; - - /// Pure Virtual function for Hwl to get macro tiled alignment info - virtual BOOL_32 HwlGetAlignmentInfoMacroTiled( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const = 0; - - - virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const - { - // not supported in hwl layer - } - - virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const - { - // not supported in hwl layer - } - - virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const - { - // not supported in hwl layer - } - - AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const; - - VOID PadDimensions( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, - UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel, - UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32* pHeight, UINT_32 heightAlign, - UINT_32* pSlices, UINT_32 sliceAlign) const; - - virtual VOID HwlPadDimensions( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, - UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel, - UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32 height, UINT_32 heightAlign) const - { - } - - // - // Addressing shared for linear/1D tiling - // - UINT_64 ComputeSurfaceAddrFromCoordLinear( - UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, - UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices, - UINT_32* pBitPosition) const; - - VOID ComputeSurfaceCoordFromAddrLinear( - UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp, - UINT_32 pitch, UINT_32 height, UINT_32 numSlices, - UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const; - - VOID ComputeSurfaceCoordFromAddrMicroTiled( - UINT_64 addr, UINT_32 bitPosition, - UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, - AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, - UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, - AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const; - - ADDR_E_RETURNCODE ComputeMicroTileEquation( - UINT_32 bpp, AddrTileMode tileMode, - AddrTileType microTileType, ADDR_EQUATION* pEquation) const; - - UINT_32 ComputePixelIndexWithinMicroTile( - UINT_32 x, UINT_32 y, UINT_32 z, - UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const; - - /// Pure Virtual function for Hwl computing coord from offset inside micro tile - virtual VOID HwlComputePixelCoordFromOffset( - UINT_32 offset, UINT_32 bpp, UINT_32 numSamples, - AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, - UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, - AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0; - - // - // Addressing shared by all - // - virtual UINT_32 HwlGetPipes( - const ADDR_TILEINFO* pTileInfo) const; - - UINT_32 ComputePipeFromAddr( - UINT_64 addr, UINT_32 numPipes) const; - - virtual ADDR_E_RETURNCODE ComputePipeEquation( - UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const - { - return ADDR_NOTSUPPORTED; - } - - /// Pure Virtual function for Hwl computing pipe from coord - virtual UINT_32 ComputePipeFromCoord( - UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode, - UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0; - - /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile - virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe( - UINT_32 pipe, UINT_32 x) const = 0; - - // - // Misc helper - // - static const TileModeFlags ModeFlags[ADDR_TM_COUNT]; - - static UINT_32 Thickness( - AddrTileMode tileMode); - - // Checking tile mode - static BOOL_32 IsMacroTiled(AddrTileMode tileMode); - static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode); - static BOOL_32 IsLinear(AddrTileMode tileMode); - static BOOL_32 IsMicroTiled(AddrTileMode tileMode); - static BOOL_32 IsPrtTileMode(AddrTileMode tileMode); - static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode); - - /// Return TRUE if tile info is needed - BOOL_32 UseTileInfo() const - { - return !m_configFlags.ignoreTileInfo; - } - - /// Adjusts pitch alignment for flipping surface - VOID AdjustPitchAlignment( - ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const; - - /// Overwrite tile config according to tile index - virtual ADDR_E_RETURNCODE HwlSetupTileCfg( - UINT_32 bpp, INT_32 index, INT_32 macroModeIndex, - ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const; - - /// Overwrite macro tile config according to tile index - virtual INT_32 HwlComputeMacroModeIndex( - INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples, - ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL - ) const - { - return TileIndexNoMacroIndex; - } - - /// Pre-handler of 3x pitch (96 bit) adjustment - virtual UINT_32 HwlPreHandleBaseLvl3xPitch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; - /// Post-handler of 3x pitch adjustment - virtual UINT_32 HwlPostHandleBaseLvl3xPitch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; - /// Check miplevel after surface adjustment - ADDR_E_RETURNCODE PostComputeMipLevel( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - /// Quad buffer stereo support, has its implementation in ind. layer - VOID ComputeQbStereoInfo( - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - /// Pure virutual function to compute stereo bank swizzle for right eye - virtual UINT_32 HwlComputeQbStereoRightSwizzle( - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; - - VOID OptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; - - /// Overwrite tile setting to PRT - virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const - { - } - - static BOOL_32 DegradeTo1D( - UINT_32 width, UINT_32 height, - UINT_32 macroTilePitchAlign, UINT_32 macroTileHeightAlign); - -private: - // Disallow the copy constructor - Lib(const Lib& a); - - // Disallow the assignment operator - Lib& operator=(const Lib& a); - - UINT_32 ComputeCmaskBaseAlign( - ADDR_CMASK_FLAGS flags, ADDR_TILEINFO* pTileInfo) const; - - UINT_64 ComputeCmaskBytes( - UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const; - - // - // CMASK/HTILE shared methods - // - VOID ComputeTileDataWidthAndHeight( - UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo, - UINT_32* pMacroWidth, UINT_32* pMacroHeight) const; - - UINT_32 ComputeXmaskCoordYFromPipe( - UINT_32 pipe, UINT_32 x) const; -}; - -} // V1 -} // Addr - -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib2.cpp mesa-19.0.1/src/amd/addrlib/core/addrlib2.cpp --- mesa-18.3.3/src/amd/addrlib/core/addrlib2.cpp 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrlib2.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,1891 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -************************************************************************************************************************ -* @file addrlib2.cpp -* @brief Contains the implementation for the AddrLib2 base class. -************************************************************************************************************************ -*/ - -#include "addrinterface.h" -#include "addrlib2.h" -#include "addrcommon.h" - -namespace Addr -{ -namespace V2 -{ - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Static Const Member -//////////////////////////////////////////////////////////////////////////////////////////////////// - -const Dim2d Lib::Block256_2d[] = {{16, 16}, {16, 8}, {8, 8}, {8, 4}, {4, 4}}; - -const Dim3d Lib::Block1K_3d[] = {{16, 8, 8}, {8, 8, 8}, {8, 8, 4}, {8, 4, 4}, {4, 4, 4}}; - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Constructor/Destructor -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -************************************************************************************************************************ -* Lib::Lib -* -* @brief -* Constructor for the Addr::V2::Lib class -* -************************************************************************************************************************ -*/ -Lib::Lib() - : - Addr::Lib() -{ -} - -/** -************************************************************************************************************************ -* Lib::Lib -* -* @brief -* Constructor for the AddrLib2 class with hClient as parameter -* -************************************************************************************************************************ -*/ -Lib::Lib(const Client* pClient) - : - Addr::Lib(pClient) -{ -} - -/** -************************************************************************************************************************ -* Lib::~Lib -* -* @brief -* Destructor for the AddrLib2 class -* -************************************************************************************************************************ -*/ -Lib::~Lib() -{ -} - -/** -************************************************************************************************************************ -* Lib::GetLib -* -* @brief -* Get Addr::V2::Lib pointer -* -* @return -* An Addr::V2::Lib class pointer -************************************************************************************************************************ -*/ -Lib* Lib::GetLib( - ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE -{ - Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib); - if ((pAddrLib != NULL) && - (pAddrLib->GetChipFamily() <= ADDR_CHIP_FAMILY_VI)) - { - // only valid and GFX9+ AISC can use AddrLib2 function. - ADDR_ASSERT_ALWAYS(); - hLib = NULL; - } - return static_cast(hLib); -} - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Surface Methods -//////////////////////////////////////////////////////////////////////////////////////////////////// - - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceInfo -* -* @brief -* Interface function stub of AddrComputeSurfaceInfo. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - // Adjust coming parameters. - ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; - localIn.width = Max(pIn->width, 1u); - localIn.height = Max(pIn->height, 1u); - localIn.numMipLevels = Max(pIn->numMipLevels, 1u); - localIn.numSlices = Max(pIn->numSlices, 1u); - localIn.numSamples = Max(pIn->numSamples, 1u); - localIn.numFrags = (localIn.numFrags == 0) ? localIn.numSamples : pIn->numFrags; - - UINT_32 expandX = 1; - UINT_32 expandY = 1; - ElemMode elemMode = ADDR_UNCOMPRESSED; - - if (returnCode == ADDR_OK) - { - // Set format to INVALID will skip this conversion - if (localIn.format != ADDR_FMT_INVALID) - { - // Get compression/expansion factors and element mode which indicates compression/expansion - localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format, - &elemMode, - &expandX, - &expandY); - - // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is - // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear- - // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw - // restrictions are different. - // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround - // but we use this flag to skip RestoreSurfaceInfo below - - if ((elemMode == ADDR_EXPANDED) && (expandX > 1)) - { - ADDR_ASSERT(IsLinear(localIn.swizzleMode)); - } - - UINT_32 basePitch = 0; - GetElemLib()->AdjustSurfaceInfo(elemMode, - expandX, - expandY, - &localIn.bpp, - &basePitch, - &localIn.width, - &localIn.height); - - // Overwrite these parameters if we have a valid format - } - - if (localIn.bpp != 0) - { - localIn.width = Max(localIn.width, 1u); - localIn.height = Max(localIn.height, 1u); - } - else // Rule out some invalid parameters - { - ADDR_ASSERT_ALWAYS(); - - returnCode = ADDR_INVALIDPARAMS; - } - } - - if (returnCode == ADDR_OK) - { - returnCode = ComputeSurfaceInfoSanityCheck(&localIn); - } - - if (returnCode == ADDR_OK) - { - VerifyMipLevelInfo(pIn); - - if (IsLinear(pIn->swizzleMode)) - { - // linear mode - returnCode = ComputeSurfaceInfoLinear(&localIn, pOut); - } - else - { - // tiled mode - returnCode = ComputeSurfaceInfoTiled(&localIn, pOut); - } - - if (returnCode == ADDR_OK) - { - pOut->bpp = localIn.bpp; - pOut->pixelPitch = pOut->pitch; - pOut->pixelHeight = pOut->height; - pOut->pixelMipChainPitch = pOut->mipChainPitch; - pOut->pixelMipChainHeight = pOut->mipChainHeight; - pOut->pixelBits = localIn.bpp; - - if (localIn.format != ADDR_FMT_INVALID) - { - UINT_32 pixelBits = pOut->pixelBits; - - GetElemLib()->RestoreSurfaceInfo(elemMode, - expandX, - expandY, - &pOut->pixelBits, - &pOut->pixelPitch, - &pOut->pixelHeight); - - GetElemLib()->RestoreSurfaceInfo(elemMode, - expandX, - expandY, - &pixelBits, - &pOut->pixelMipChainPitch, - &pOut->pixelMipChainHeight); - - if ((localIn.numMipLevels > 1) && (pOut->pMipInfo != NULL)) - { - for (UINT_32 i = 0; i < localIn.numMipLevels; i++) - { - pOut->pMipInfo[i].pixelPitch = pOut->pMipInfo[i].pitch; - pOut->pMipInfo[i].pixelHeight = pOut->pMipInfo[i].height; - - GetElemLib()->RestoreSurfaceInfo(elemMode, - expandX, - expandY, - &pixelBits, - &pOut->pMipInfo[i].pixelPitch, - &pOut->pMipInfo[i].pixelHeight); - } - } - } - - if (localIn.flags.needEquation && (Log2(localIn.numFrags) == 0)) - { - pOut->equationIndex = GetEquationIndex(&localIn, pOut); - } - - if (localIn.flags.qbStereo) - { - if (pOut->pStereoInfo != NULL) - { - ComputeQbStereoInfo(pOut); - } - } - } - } - - ADDR_ASSERT(pOut->surfSize != 0); - - ValidBaseAlignments(pOut->baseAlign); - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceInfo -* -* @brief -* Interface function stub of AddrComputeSurfaceInfo. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord( - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT localIn = *pIn; - localIn.unalignedWidth = Max(pIn->unalignedWidth, 1u); - localIn.unalignedHeight = Max(pIn->unalignedHeight, 1u); - localIn.numMipLevels = Max(pIn->numMipLevels, 1u); - localIn.numSlices = Max(pIn->numSlices, 1u); - localIn.numSamples = Max(pIn->numSamples, 1u); - localIn.numFrags = Max(pIn->numFrags, 1u); - - if ((localIn.bpp < 8) || - (localIn.bpp > 128) || - ((localIn.bpp % 8) != 0) || - (localIn.sample >= localIn.numSamples) || - (localIn.slice >= localIn.numSlices) || - (localIn.mipId >= localIn.numMipLevels) || - (IsTex3d(localIn.resourceType) && - (Valid3DMipSliceIdConstraint(localIn.numSlices, localIn.mipId, localIn.slice) == FALSE))) - { - returnCode = ADDR_INVALIDPARAMS; - } - - if (returnCode == ADDR_OK) - { - if (IsLinear(localIn.swizzleMode)) - { - returnCode = ComputeSurfaceAddrFromCoordLinear(&localIn, pOut); - } - else - { - returnCode = ComputeSurfaceAddrFromCoordTiled(&localIn, pOut); - } - - if (returnCode == ADDR_OK) - { - pOut->prtBlockIndex = static_cast(pOut->addr / (64 * 1024)); - } - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceCoordFromAddr -* -* @brief -* Interface function stub of ComputeSurfaceCoordFromAddr. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr( - const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if ((pIn->bpp < 8) || - (pIn->bpp > 128) || - ((pIn->bpp % 8) != 0) || - (pIn->bitPosition >= 8)) - { - returnCode = ADDR_INVALIDPARAMS; - } - - if (returnCode == ADDR_OK) - { - if (IsLinear(pIn->swizzleMode)) - { - returnCode = ComputeSurfaceCoordFromAddrLinear(pIn, pOut); - } - else - { - returnCode = ComputeSurfaceCoordFromAddrTiled(pIn, pOut); - } - } - - return returnCode; -} - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// CMASK/HTILE -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -************************************************************************************************************************ -* Lib::ComputeHtileInfo -* -* @brief -* Interface function stub of AddrComputeHtilenfo -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeHtileInfo( - const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlComputeHtileInfo(pIn, pOut); - - ValidMetaBaseAlignments(pOut->baseAlign); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeHtileAddrFromCoord -* -* @brief -* Interface function stub of AddrComputeHtileAddrFromCoord -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord( - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlComputeHtileAddrFromCoord(pIn, pOut); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeHtileCoordFromAddr -* -* @brief -* Interface function stub of AddrComputeHtileCoordFromAddr -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr( - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlComputeHtileCoordFromAddr(pIn, pOut); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeCmaskInfo -* -* @brief -* Interface function stub of AddrComputeCmaskInfo -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( - const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else if (pIn->cMaskFlags.linear) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlComputeCmaskInfo(pIn, pOut); - - ValidMetaBaseAlignments(pOut->baseAlign); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeCmaskAddrFromCoord -* -* @brief -* Interface function stub of AddrComputeCmaskAddrFromCoord -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord( - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeCmaskCoordFromAddr -* -* @brief -* Interface function stub of AddrComputeCmaskCoordFromAddr -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr( - const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED; - - ADDR_NOT_IMPLEMENTED(); - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeFmaskInfo -* -* @brief -* Interface function stub of ComputeFmaskInfo. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeFmaskInfo( - const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure - ) -{ - ADDR_E_RETURNCODE returnCode; - - BOOL_32 valid = (IsZOrderSwizzle(pIn->swizzleMode) == TRUE) && - ((pIn->numSamples > 0) || (pIn->numFrags > 0)); - - if (GetFillSizeFieldsFlags()) - { - if ((pIn->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT))) - { - valid = FALSE; - } - } - - if (valid == FALSE) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; - - localIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT); - localOut.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT); - - localIn.swizzleMode = pIn->swizzleMode; - localIn.numSlices = Max(pIn->numSlices, 1u); - localIn.width = Max(pIn->unalignedWidth, 1u); - localIn.height = Max(pIn->unalignedHeight, 1u); - localIn.bpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); - localIn.flags.fmask = 1; - localIn.numFrags = 1; - localIn.numSamples = 1; - localIn.resourceType = ADDR_RSRC_TEX_2D; - - if (localIn.bpp == 8) - { - localIn.format = ADDR_FMT_8; - } - else if (localIn.bpp == 16) - { - localIn.format = ADDR_FMT_16; - } - else if (localIn.bpp == 32) - { - localIn.format = ADDR_FMT_32; - } - else - { - localIn.format = ADDR_FMT_32_32; - } - - returnCode = ComputeSurfaceInfo(&localIn, &localOut); - - if (returnCode == ADDR_OK) - { - pOut->pitch = localOut.pitch; - pOut->height = localOut.height; - pOut->baseAlign = localOut.baseAlign; - pOut->numSlices = localOut.numSlices; - pOut->fmaskBytes = static_cast(localOut.surfSize); - pOut->sliceSize = static_cast(localOut.sliceSize); - pOut->bpp = localIn.bpp; - pOut->numSamples = 1; - } - } - - ValidBaseAlignments(pOut->baseAlign); - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeFmaskAddrFromCoord -* -* @brief -* Interface function stub of ComputeFmaskAddrFromCoord. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord( - const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED; - - ADDR_NOT_IMPLEMENTED(); - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeFmaskCoordFromAddr -* -* @brief -* Interface function stub of ComputeFmaskAddrFromCoord. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr( - const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED; - - ADDR_NOT_IMPLEMENTED(); - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeDccInfo -* -* @brief -* Interface function to compute DCC key info -* -* @return -* return code of HwlComputeDccInfo -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeDccInfo( - const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_DCCINFO_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlComputeDccInfo(pIn, pOut); - - ValidMetaBaseAlignments(pOut->dccRamBaseAlign); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeDccAddrFromCoord -* -* @brief -* Interface function stub of ComputeDccAddrFromCoord -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeDccAddrFromCoord( - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlComputeDccAddrFromCoord(pIn, pOut); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputePipeBankXor -* -* @brief -* Interface function stub of Addr2ComputePipeBankXor. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputePipeBankXor( - const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else if (IsXor(pIn->swizzleMode) == FALSE) - { - returnCode = ADDR_NOTSUPPORTED; - } - else - { - returnCode = HwlComputePipeBankXor(pIn, pOut); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSlicePipeBankXor -* -* @brief -* Interface function stub of Addr2ComputeSlicePipeBankXor. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSlicePipeBankXor( - const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else if ((IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE) || - (IsNonPrtXor(pIn->swizzleMode) == FALSE) || - (pIn->numSamples > 1)) - { - returnCode = ADDR_NOTSUPPORTED; - } - else - { - returnCode = HwlComputeSlicePipeBankXor(pIn, pOut); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSubResourceOffsetForSwizzlePattern -* -* @brief -* Interface function stub of Addr2ComputeSubResourceOffsetForSwizzlePattern. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSubResourceOffsetForSwizzlePattern( - const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, - ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT)) || - (pOut->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlComputeSubResourceOffsetForSwizzlePattern(pIn, pOut); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ExtractPipeBankXor -* -* @brief -* Internal function to extract bank and pipe xor bits from combined xor bits. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ExtractPipeBankXor( - UINT_32 pipeBankXor, - UINT_32 bankBits, - UINT_32 pipeBits, - UINT_32* pBankX, - UINT_32* pPipeX) -{ - ADDR_E_RETURNCODE returnCode; - - if (pipeBankXor < (1u << (pipeBits + bankBits))) - { - *pPipeX = pipeBankXor % (1 << pipeBits); - *pBankX = pipeBankXor >> pipeBits; - returnCode = ADDR_OK; - } - else - { - ADDR_ASSERT_ALWAYS(); - returnCode = ADDR_INVALIDPARAMS; - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceInfoSanityCheck -* -* @brief -* Internal function to do basic sanity check before compute surface info -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoSanityCheck( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure - ) const -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - (pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlComputeSurfaceInfoSanityCheck(pIn); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ApplyCustomizedPitchHeight -* -* @brief -* Helper function to override hw required row pitch/slice pitch by customrized one -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - UINT_32 elementBytes, ///< [in] element bytes per element - UINT_32 pitchAlignInElement, ///< [in] pitch alignment in element - UINT_32* pPitch, ///< [in/out] pitch - UINT_32* pHeight ///< [in/out] height - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pIn->numMipLevels <= 1) - { - if (pIn->pitchInElement > 0) - { - if ((pIn->pitchInElement % pitchAlignInElement) != 0) - { - returnCode = ADDR_INVALIDPARAMS; - } - else if (pIn->pitchInElement < (*pPitch)) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - *pPitch = pIn->pitchInElement; - } - } - - if (returnCode == ADDR_OK) - { - if (pIn->sliceAlign > 0) - { - UINT_32 customizedHeight = pIn->sliceAlign / elementBytes / (*pPitch); - - if (customizedHeight * elementBytes * (*pPitch) != pIn->sliceAlign) - { - returnCode = ADDR_INVALIDPARAMS; - } - else if ((pIn->numSlices > 1) && ((*pHeight) != customizedHeight)) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - *pHeight = customizedHeight; - } - } - } - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceInfoLinear -* -* @brief -* Internal function to calculate alignment for linear swizzle surface -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoLinear( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - return HwlComputeSurfaceInfoLinear(pIn, pOut); -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceInfoTiled -* -* @brief -* Internal function to calculate alignment for tiled swizzle surface -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoTiled( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - return HwlComputeSurfaceInfoTiled(pIn, pOut); -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceAddrFromCoordLinear -* -* @brief -* Internal function to calculate address from coord for linear swizzle surface -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear( - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1) && (pIn->pipeBankXor == 0); - - if (valid) - { - if (IsTex1d(pIn->resourceType)) - { - valid = (pIn->y == 0); - } - } - - if (valid) - { - ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; - ADDR2_MIP_INFO mipInfo[MaxMipLevels]; - - localIn.bpp = pIn->bpp; - localIn.flags = pIn->flags; - localIn.width = Max(pIn->unalignedWidth, 1u); - localIn.height = Max(pIn->unalignedHeight, 1u); - localIn.numSlices = Max(pIn->numSlices, 1u); - localIn.numMipLevels = Max(pIn->numMipLevels, 1u); - localIn.resourceType = pIn->resourceType; - - if (localIn.numMipLevels <= 1) - { - localIn.pitchInElement = pIn->pitchInElement; - } - - localOut.pMipInfo = mipInfo; - - returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut); - - if (returnCode == ADDR_OK) - { - pOut->addr = (localOut.sliceSize * pIn->slice) + - mipInfo[pIn->mipId].offset + - (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3); - pOut->bitPosition = 0; - } - else - { - valid = FALSE; - } - } - - if (valid == FALSE) - { - returnCode = ADDR_INVALIDPARAMS; - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceAddrFromCoordTiled -* -* @brief -* Internal function to calculate address from coord for tiled swizzle surface -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordTiled( - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - return HwlComputeSurfaceAddrFromCoordTiled(pIn, pOut); -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceCoordFromAddrLinear -* -* @brief -* Internal function to calculate coord from address for linear swizzle surface -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrLinear( - const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1); - - if (valid) - { - if (IsTex1d(pIn->resourceType)) - { - valid = (pIn->unalignedHeight == 1); - } - } - - if (valid) - { - ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; - localIn.bpp = pIn->bpp; - localIn.flags = pIn->flags; - localIn.width = Max(pIn->unalignedWidth, 1u); - localIn.height = Max(pIn->unalignedHeight, 1u); - localIn.numSlices = Max(pIn->numSlices, 1u); - localIn.numMipLevels = Max(pIn->numMipLevels, 1u); - localIn.resourceType = pIn->resourceType; - if (localIn.numMipLevels <= 1) - { - localIn.pitchInElement = pIn->pitchInElement; - } - returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut); - - if (returnCode == ADDR_OK) - { - pOut->slice = static_cast(pIn->addr / localOut.sliceSize); - pOut->sample = 0; - - UINT_32 offsetInSlice = static_cast(pIn->addr % localOut.sliceSize); - UINT_32 elementBytes = pIn->bpp >> 3; - UINT_32 mipOffsetInSlice = 0; - UINT_32 mipSize = 0; - UINT_32 mipId = 0; - for (; mipId < pIn->numMipLevels ; mipId++) - { - if (IsTex1d(pIn->resourceType)) - { - mipSize = localOut.pitch * elementBytes; - } - else - { - UINT_32 currentMipHeight = (PowTwoAlign(localIn.height, (1 << mipId))) >> mipId; - mipSize = currentMipHeight * localOut.pitch * elementBytes; - } - - if (mipSize == 0) - { - valid = FALSE; - break; - } - else if ((mipSize + mipOffsetInSlice) > offsetInSlice) - { - break; - } - else - { - mipOffsetInSlice += mipSize; - if ((mipId == (pIn->numMipLevels - 1)) || - (mipOffsetInSlice >= localOut.sliceSize)) - { - valid = FALSE; - } - } - } - - if (valid) - { - pOut->mipId = mipId; - - UINT_32 elemOffsetInMip = (offsetInSlice - mipOffsetInSlice) / elementBytes; - if (IsTex1d(pIn->resourceType)) - { - if (elemOffsetInMip < localOut.pitch) - { - pOut->x = elemOffsetInMip; - pOut->y = 0; - } - else - { - valid = FALSE; - } - } - else - { - pOut->y = elemOffsetInMip / localOut.pitch; - pOut->x = elemOffsetInMip % localOut.pitch; - } - - if ((pOut->slice >= pIn->numSlices) || - (pOut->mipId >= pIn->numMipLevels) || - (pOut->x >= Max((pIn->unalignedWidth >> pOut->mipId), 1u)) || - (pOut->y >= Max((pIn->unalignedHeight >> pOut->mipId), 1u)) || - (IsTex3d(pIn->resourceType) && - (FALSE == Valid3DMipSliceIdConstraint(pIn->numSlices, - pOut->mipId, - pOut->slice)))) - { - valid = FALSE; - } - } - } - else - { - valid = FALSE; - } - } - - if (valid == FALSE) - { - returnCode = ADDR_INVALIDPARAMS; - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurfaceCoordFromAddrTiled -* -* @brief -* Internal function to calculate coord from address for tiled swizzle surface -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrTiled( - const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED; - - ADDR_NOT_IMPLEMENTED(); - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeBlockDimensionForSurf -* -* @brief -* Internal function to get block width/height/depth in element from surface input params. -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeBlockDimensionForSurf( - UINT_32* pWidth, - UINT_32* pHeight, - UINT_32* pDepth, - UINT_32 bpp, - UINT_32 numSamples, - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const -{ - ADDR_E_RETURNCODE returnCode = ComputeBlockDimension(pWidth, - pHeight, - pDepth, - bpp, - resourceType, - swizzleMode); - - if ((returnCode == ADDR_OK) && (numSamples > 1) && IsThin(resourceType, swizzleMode)) - { - const UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); - const UINT_32 log2sample = Log2(numSamples); - const UINT_32 q = log2sample >> 1; - const UINT_32 r = log2sample & 1; - - if (log2blkSize & 1) - { - *pWidth >>= q; - *pHeight >>= (q + r); - } - else - { - *pWidth >>= (q + r); - *pHeight >>= q; - } - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeBlockDimension -* -* @brief -* Internal function to get block width/height/depth in element without considering MSAA case -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeBlockDimension( - UINT_32* pWidth, - UINT_32* pHeight, - UINT_32* pDepth, - UINT_32 bpp, - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - UINT_32 eleBytes = bpp >> 3; - UINT_32 microBlockSizeTableIndex = Log2(eleBytes); - UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); - - if (IsThin(resourceType, swizzleMode)) - { - UINT_32 log2blkSizeIn256B = log2blkSize - 8; - UINT_32 widthAmp = log2blkSizeIn256B / 2; - UINT_32 heightAmp = log2blkSizeIn256B - widthAmp; - - ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0])); - - *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp); - *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp); - *pDepth = 1; - } - else if (IsThick(resourceType, swizzleMode)) - { - UINT_32 log2blkSizeIn1KB = log2blkSize - 10; - UINT_32 averageAmp = log2blkSizeIn1KB / 3; - UINT_32 restAmp = log2blkSizeIn1KB % 3; - - ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block1K_3d) / sizeof(Block1K_3d[0])); - - *pWidth = Block1K_3d[microBlockSizeTableIndex].w << averageAmp; - *pHeight = Block1K_3d[microBlockSizeTableIndex].h << (averageAmp + (restAmp / 2)); - *pDepth = Block1K_3d[microBlockSizeTableIndex].d << (averageAmp + ((restAmp != 0) ? 1 : 0)); - } - else - { - ADDR_ASSERT_ALWAYS(); - returnCode = ADDR_INVALIDPARAMS; - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::GetMipTailDim -* -* @brief -* Internal function to get out max dimension of first level in mip tail -* -* @return -* Max Width/Height/Depth value of the first mip fitted in mip tail -************************************************************************************************************************ -*/ -Dim3d Lib::GetMipTailDim( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - UINT_32 blockWidth, - UINT_32 blockHeight, - UINT_32 blockDepth) const -{ - Dim3d out = {blockWidth, blockHeight, blockDepth}; - UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); - - if (IsThick(resourceType, swizzleMode)) - { - UINT_32 dim = log2blkSize % 3; - - if (dim == 0) - { - out.h >>= 1; - } - else if (dim == 1) - { - out.w >>= 1; - } - else - { - out.d >>= 1; - } - } - else - { - if (log2blkSize & 1) - { - out.h >>= 1; - } - else - { - out.w >>= 1; - } - } - - return out; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurface2DMicroBlockOffset -* -* @brief -* Internal function to calculate micro block (256B) offset from coord for 2D resource -* -* @return -* micro block (256B) offset for 2D resource -************************************************************************************************************************ -*/ -UINT_32 Lib::ComputeSurface2DMicroBlockOffset( - const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const -{ - ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode)); - - UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3); - UINT_32 microBlockOffset = 0; - if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode)) - { - UINT_32 xBits = pIn->x << log2ElementBytes; - microBlockOffset = (xBits & 0xf) | ((pIn->y & 0x3) << 4); - if (log2ElementBytes < 3) - { - microBlockOffset |= (pIn->y & 0x4) << 4; - if (log2ElementBytes == 0) - { - microBlockOffset |= (pIn->y & 0x8) << 4; - } - else - { - microBlockOffset |= (xBits & 0x10) << 3; - } - } - else - { - microBlockOffset |= (xBits & 0x30) << 2; - } - } - else if (IsDisplaySwizzle(pIn->resourceType, pIn->swizzleMode)) - { - if (log2ElementBytes == 4) - { - microBlockOffset = (GetBit(pIn->x, 0) << 4) | - (GetBit(pIn->y, 0) << 5) | - (GetBit(pIn->x, 1) << 6) | - (GetBit(pIn->y, 1) << 7); - } - else - { - microBlockOffset = GetBits(pIn->x, 0, 3, log2ElementBytes) | - GetBits(pIn->y, 1, 2, 3 + log2ElementBytes) | - GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) | - GetBits(pIn->y, 3, 1, 6 + log2ElementBytes); - microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) | - (GetBit(pIn->y, 0) << 4) | - GetBits(microBlockOffset, 4, 3, 5); - } - } - else if (IsRotateSwizzle(pIn->swizzleMode)) - { - microBlockOffset = GetBits(pIn->y, 0, 3, log2ElementBytes) | - GetBits(pIn->x, 1, 2, 3 + log2ElementBytes) | - GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) | - GetBits(pIn->y, 3, 1, 6 + log2ElementBytes); - microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) | - (GetBit(pIn->x, 0) << 4) | - GetBits(microBlockOffset, 4, 3, 5); - if (log2ElementBytes == 3) - { - microBlockOffset = GetBits(microBlockOffset, 0, 6, 0) | - GetBits(pIn->x, 1, 2, 6); - } - } - - return microBlockOffset; -} - -/** -************************************************************************************************************************ -* Lib::ComputeSurface3DMicroBlockOffset -* -* @brief -* Internal function to calculate micro block (1KB) offset from coord for 3D resource -* -* @return -* micro block (1KB) offset for 3D resource -************************************************************************************************************************ -*/ -UINT_32 Lib::ComputeSurface3DMicroBlockOffset( - const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const -{ - ADDR_ASSERT(IsThick(pIn->resourceType, pIn->swizzleMode)); - - UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3); - UINT_32 microBlockOffset = 0; - if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode)) - { - if (log2ElementBytes == 0) - { - microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1); - } - else if (log2ElementBytes == 1) - { - microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1); - } - else if (log2ElementBytes == 2) - { - microBlockOffset = ((pIn->y & 4) >> 2) | ((pIn->x & 4) >> 1); - } - else if (log2ElementBytes == 3) - { - microBlockOffset = (pIn->x & 6) >> 1; - } - else - { - microBlockOffset = pIn->x & 3; - } - - microBlockOffset <<= 8; - - UINT_32 xBits = pIn->x << log2ElementBytes; - microBlockOffset |= (xBits & 0xf) | ((pIn->y & 0x3) << 4) | ((pIn->slice & 0x3) << 6); - } - else if (IsZOrderSwizzle(pIn->swizzleMode)) - { - UINT_32 xh, yh, zh; - - if (log2ElementBytes == 0) - { - microBlockOffset = - (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2); - microBlockOffset = microBlockOffset | ((pIn->slice & 3) << 4) | ((pIn->x & 4) << 4); - - xh = pIn->x >> 3; - yh = pIn->y >> 2; - zh = pIn->slice >> 2; - } - else if (log2ElementBytes == 1) - { - microBlockOffset = - (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2); - microBlockOffset = (microBlockOffset << 1) | ((pIn->slice & 3) << 5); - - xh = pIn->x >> 2; - yh = pIn->y >> 2; - zh = pIn->slice >> 2; - } - else if (log2ElementBytes == 2) - { - microBlockOffset = - (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->slice & 1) << 3); - microBlockOffset = (microBlockOffset << 2) | ((pIn->y & 2) << 5); - - xh = pIn->x >> 2; - yh = pIn->y >> 2; - zh = pIn->slice >> 1; - } - else if (log2ElementBytes == 3) - { - microBlockOffset = - (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2) | ((pIn->x & 2) << 2); - microBlockOffset <<= 3; - - xh = pIn->x >> 2; - yh = pIn->y >> 1; - zh = pIn->slice >> 1; - } - else - { - microBlockOffset = - (((pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2)) << 4); - - xh = pIn->x >> 1; - yh = pIn->y >> 1; - zh = pIn->slice >> 1; - } - - microBlockOffset |= ((MortonGen3d(xh, yh, zh, 1) << 7) & 0x380); - } - - return microBlockOffset; -} - -/** -************************************************************************************************************************ -* Lib::GetPipeXorBits -* -* @brief -* Internal function to get bits number for pipe/se xor operation -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -UINT_32 Lib::GetPipeXorBits( - UINT_32 macroBlockBits) const -{ - ADDR_ASSERT(macroBlockBits >= m_pipeInterleaveLog2); - - // Total available xor bits - UINT_32 xorBits = macroBlockBits - m_pipeInterleaveLog2; - - // Pipe/Se xor bits - UINT_32 pipeBits = Min(xorBits, m_pipesLog2 + m_seLog2); - - return pipeBits; -} - -/** -************************************************************************************************************************ -* Lib::GetBankXorBits -* -* @brief -* Internal function to get bits number for pipe/se xor operation -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -UINT_32 Lib::GetBankXorBits( - UINT_32 macroBlockBits) const -{ - UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); - - // Bank xor bits - UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2); - - return bankBits; -} - -/** -************************************************************************************************************************ -* Lib::Addr2GetPreferredSurfaceSetting -* -* @brief -* Internal function to get suggested surface information for cliet to use -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::Addr2GetPreferredSurfaceSetting( - const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, - ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const -{ - ADDR_E_RETURNCODE returnCode; - - if ((GetFillSizeFieldsFlags() == TRUE) && - ((pIn->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT)) || - (pOut->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT)))) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - returnCode = HwlGetPreferredSurfaceSetting(pIn, pOut); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Lib::ComputeBlock256Equation -* -* @brief -* Compute equation for block 256B -* -* @return -* If equation computed successfully -* -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeBlock256Equation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const -{ - ADDR_E_RETURNCODE ret; - - if (IsBlock256b(swMode)) - { - ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation); - } - else - { - ADDR_ASSERT_ALWAYS(); - ret = ADDR_INVALIDPARAMS; - } - - return ret; -} - -/** -************************************************************************************************************************ -* Lib::ComputeThinEquation -* -* @brief -* Compute equation for 2D/3D resource which use THIN mode -* -* @return -* If equation computed successfully -* -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeThinEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const -{ - ADDR_E_RETURNCODE ret; - - if (IsThin(rsrcType, swMode)) - { - ret = HwlComputeThinEquation(rsrcType, swMode, elementBytesLog2, pEquation); - } - else - { - ADDR_ASSERT_ALWAYS(); - ret = ADDR_INVALIDPARAMS; - } - - return ret; -} - -/** -************************************************************************************************************************ -* Lib::ComputeThickEquation -* -* @brief -* Compute equation for 3D resource which use THICK mode -* -* @return -* If equation computed successfully -* -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Lib::ComputeThickEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const -{ - ADDR_E_RETURNCODE ret; - - if (IsThick(rsrcType, swMode)) - { - ret = HwlComputeThickEquation(rsrcType, swMode, elementBytesLog2, pEquation); - } - else - { - ADDR_ASSERT_ALWAYS(); - ret = ADDR_INVALIDPARAMS; - } - - return ret; -} - -/** -************************************************************************************************************************ -* Lib::ComputeQbStereoInfo -* -* @brief -* Get quad buffer stereo information -* @return -* N/A -************************************************************************************************************************ -*/ -VOID Lib::ComputeQbStereoInfo( - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] updated pOut+pStereoInfo - ) const -{ - ADDR_ASSERT(pOut->bpp >= 8); - ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0); - - // Save original height - pOut->pStereoInfo->eyeHeight = pOut->height; - - // Right offset - pOut->pStereoInfo->rightOffset = static_cast(pOut->surfSize); - - // Double height - pOut->height <<= 1; - - ADDR_ASSERT(pOut->height <= MaxSurfaceHeight); - - pOut->pixelHeight <<= 1; - - // Double size - pOut->surfSize <<= 1; -} - - -} // V2 -} // Addr - diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib2.h mesa-19.0.1/src/amd/addrlib/core/addrlib2.h --- mesa-18.3.3/src/amd/addrlib/core/addrlib2.h 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrlib2.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,793 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -************************************************************************************************************************ -* @file addrlib2.h -* @brief Contains the Addr::V2::Lib class definition. -************************************************************************************************************************ -*/ - -#ifndef __ADDR2_LIB2_H__ -#define __ADDR2_LIB2_H__ - -#include "addrlib.h" - -namespace Addr -{ -namespace V2 -{ - -/** -************************************************************************************************************************ -* @brief Flags for SwizzleModeTable -************************************************************************************************************************ -*/ -struct SwizzleModeFlags -{ - // Swizzle mode - UINT_32 isLinear : 1; // Linear - - // Block size - UINT_32 is256b : 1; // Block size is 256B - UINT_32 is4kb : 1; // Block size is 4KB - UINT_32 is64kb : 1; // Block size is 64KB - UINT_32 isVar : 1; // Block size is variable - - UINT_32 isZ : 1; // Z order swizzle mode - UINT_32 isStd : 1; // Standard swizzle mode - UINT_32 isDisp : 1; // Display swizzle mode - UINT_32 isRot : 1; // Rotate swizzle mode - - // XOR mode - UINT_32 isXor : 1; // XOR after swizzle if set - - UINT_32 isT : 1; // T mode - - UINT_32 isRtOpt : 1; // mode opt for render target -}; - -struct Dim2d -{ - UINT_32 w; - UINT_32 h; -}; - -struct Dim3d -{ - UINT_32 w; - UINT_32 h; - UINT_32 d; -}; - -/** -************************************************************************************************************************ -* @brief This class contains asic independent address lib functionalities -************************************************************************************************************************ -*/ -class Lib : public Addr::Lib -{ -public: - virtual ~Lib(); - - static Lib* GetLib( - ADDR_HANDLE hLib); - - // - // Interface stubs - // - - // For data surface - ADDR_E_RETURNCODE ComputeSurfaceInfo( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord( - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr( - const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; - - // For HTile - ADDR_E_RETURNCODE ComputeHtileInfo( - const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, - ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeHtileAddrFromCoord( - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); - - ADDR_E_RETURNCODE ComputeHtileCoordFromAddr( - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); - - // For CMask - ADDR_E_RETURNCODE ComputeCmaskInfo( - const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, - ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord( - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); - - ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr( - const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const; - - // For FMask - ADDR_E_RETURNCODE ComputeFmaskInfo( - const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, - ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut); - - ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord( - const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr( - const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; - - // For DCC key - ADDR_E_RETURNCODE ComputeDccInfo( - const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, - ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeDccAddrFromCoord( - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut); - - // Misc - ADDR_E_RETURNCODE ComputePipeBankXor( - const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut); - - ADDR_E_RETURNCODE ComputeSlicePipeBankXor( - const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut); - - ADDR_E_RETURNCODE ComputeSubResourceOffsetForSwizzlePattern( - const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, - ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut); - - ADDR_E_RETURNCODE Addr2GetPreferredSurfaceSetting( - const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, - ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; - - virtual BOOL_32 IsValidDisplaySwizzleMode( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTIMPLEMENTED; - } - -protected: - Lib(); // Constructor is protected - Lib(const Client* pClient); - - static const UINT_32 MaxNumOfBpp = 5; - - static const Dim2d Block256_2d[MaxNumOfBpp]; - static const Dim3d Block1K_3d[MaxNumOfBpp]; - - static const UINT_32 PrtAlignment = 64 * 1024; - static const UINT_32 MaxMacroBits = 20; - - static const UINT_32 MaxMipLevels = 16; - - // Checking block size - BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].is256b; - } - - BOOL_32 IsBlock4kb(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].is4kb; - } - - BOOL_32 IsBlock64kb(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].is64kb; - } - - BOOL_32 IsBlockVariable(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].isVar; - } - - // Checking swizzle mode - BOOL_32 IsLinear(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].isLinear; - } - - BOOL_32 IsRtOptSwizzle(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].isRtOpt; - } - - BOOL_32 IsZOrderSwizzle(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].isZ; - } - - BOOL_32 IsStandardSwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const - { - return HwlIsStandardSwizzle(resourceType, swizzleMode); - } - - BOOL_32 IsDisplaySwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const - { - return HwlIsDisplaySwizzle(resourceType, swizzleMode); - } - - BOOL_32 IsRotateSwizzle(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].isRot; - } - - BOOL_32 IsXor(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].isXor; - } - - BOOL_32 IsPrt(AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].isT; - } - - BOOL_32 IsNonPrtXor(AddrSwizzleMode swizzleMode) const - { - return (IsXor(swizzleMode) && (IsPrt(swizzleMode) == FALSE)); - } - - // Checking resource type - static BOOL_32 IsTex1d(AddrResourceType resourceType) - { - return (resourceType == ADDR_RSRC_TEX_1D); - } - - static BOOL_32 IsTex2d(AddrResourceType resourceType) - { - return (resourceType == ADDR_RSRC_TEX_2D); - } - - static BOOL_32 IsTex3d(AddrResourceType resourceType) - { - return (resourceType == ADDR_RSRC_TEX_3D); - } - - BOOL_32 IsThick(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const - { - return HwlIsThick(resourceType, swizzleMode); - } - - BOOL_32 IsThin(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const - { - return HwlIsThin(resourceType, swizzleMode); - } - - UINT_32 GetBlockSizeLog2(AddrSwizzleMode swizzleMode) const - { - UINT_32 blockSizeLog2 = 0; - - if (IsBlock256b(swizzleMode) || IsLinear(swizzleMode)) - { - blockSizeLog2 = 8; - } - else if (IsBlock4kb(swizzleMode)) - { - blockSizeLog2 = 12; - } - else if (IsBlock64kb(swizzleMode)) - { - blockSizeLog2 = 16; - } - else if (IsBlockVariable(swizzleMode)) - { - blockSizeLog2 = m_blockVarSizeLog2; - } - else - { - ADDR_ASSERT_ALWAYS(); - } - - return blockSizeLog2; - } - - UINT_32 GetBlockSize(AddrSwizzleMode swizzleMode) const - { - return (1 << GetBlockSizeLog2(swizzleMode)); - } - - static UINT_32 GetFmaskBpp(UINT_32 sample, UINT_32 frag) - { - sample = (sample == 0) ? 1 : sample; - frag = (frag == 0) ? sample : frag; - - UINT_32 fmaskBpp = QLog2(frag); - - if (sample > frag) - { - fmaskBpp++; - } - - if (fmaskBpp == 3) - { - fmaskBpp = 4; - } - - fmaskBpp = Max(8u, fmaskBpp * sample); - - return fmaskBpp; - } - - virtual BOOL_32 HwlIsStandardSwizzle( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const - { - ADDR_NOT_IMPLEMENTED(); - return FALSE; - } - - virtual BOOL_32 HwlIsDisplaySwizzle( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const - { - ADDR_NOT_IMPLEMENTED(); - return FALSE; - } - - virtual BOOL_32 HwlIsThin( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const - { - ADDR_NOT_IMPLEMENTED(); - return FALSE; - } - - virtual BOOL_32 HwlIsThick( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const - { - ADDR_NOT_IMPLEMENTED(); - return FALSE; - } - - virtual ADDR_E_RETURNCODE HwlComputeHtileInfo( - const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, - ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo( - const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, - ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeDccInfo( - const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, - ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord( - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr( - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeThinEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeThickEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual UINT_32 HwlGetEquationIndex( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_INVALID_EQUATION_INDEX; - } - - UINT_32 GetEquationIndex( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const - { - return HwlGetEquationIndex(pIn, pOut); - } - - virtual ADDR_E_RETURNCODE HwlComputePipeBankXor( - const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor( - const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - - virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern( - const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, - ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting( - const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, - ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTSUPPORTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTIMPLEMENTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTIMPLEMENTED; - } - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled( - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const - { - ADDR_NOT_IMPLEMENTED(); - return ADDR_NOTIMPLEMENTED; - } - - ADDR_E_RETURNCODE ComputeBlock256Equation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const; - - ADDR_E_RETURNCODE ComputeThinEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const; - - ADDR_E_RETURNCODE ComputeThickEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const; - - ADDR_E_RETURNCODE ComputeSurfaceInfoSanityCheck( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; - - ADDR_E_RETURNCODE ComputeSurfaceInfoLinear( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSurfaceInfoTiled( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear( - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordTiled( - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrLinear( - const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrTiled( - const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; - - UINT_32 ComputeSurface2DMicroBlockOffset( - const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const; - - UINT_32 ComputeSurface3DMicroBlockOffset( - const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const; - - // Misc - ADDR_E_RETURNCODE ComputeBlockDimensionForSurf( - UINT_32* pWidth, - UINT_32* pHeight, - UINT_32* pDepth, - UINT_32 bpp, - UINT_32 numSamples, - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const; - - ADDR_E_RETURNCODE ComputeBlockDimension( - UINT_32* pWidth, - UINT_32* pHeight, - UINT_32* pDepth, - UINT_32 bpp, - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const; - - static UINT_64 ComputePadSize( - const Dim3d* pBlkDim, - UINT_32 width, - UINT_32 height, - UINT_32 numSlices, - Dim3d* pPadDim) - { - pPadDim->w = PowTwoAlign(width ,pBlkDim->w); - pPadDim->h = PowTwoAlign(height ,pBlkDim->h); - pPadDim->d = PowTwoAlign(numSlices, pBlkDim->d); - return static_cast(pPadDim->w) * pPadDim->h * pPadDim->d; - } - - static ADDR_E_RETURNCODE ExtractPipeBankXor( - UINT_32 pipeBankXor, - UINT_32 bankBits, - UINT_32 pipeBits, - UINT_32* pBankX, - UINT_32* pPipeX); - - static BOOL_32 Valid3DMipSliceIdConstraint( - UINT_32 numSlices, - UINT_32 mipId, - UINT_32 slice) - { - return (Max((numSlices >> mipId), 1u) > slice); - } - - Dim3d GetMipTailDim( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - UINT_32 blockWidth, - UINT_32 blockHeight, - UINT_32 blockDepth) const; - - BOOL_32 IsInMipTail( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - Dim3d mipTailDim, - UINT_32 width, - UINT_32 height, - UINT_32 depth) const - { - BOOL_32 inTail = ((width <= mipTailDim.w) && - (height <= mipTailDim.h) && - (IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d))); - - return inTail; - } - - static BOOL_32 IsLocalHeap(AddrResrouceLocation resourceType) - { - return ((resourceType == ADDR_RSRC_LOC_LOCAL) || - (resourceType == ADDR_RSRC_LOC_INVIS)); - } - - static BOOL_32 IsInvisibleHeap(AddrResrouceLocation resourceType) - { - return (resourceType == ADDR_RSRC_LOC_INVIS); - } - - static BOOL_32 IsNonlocalHeap(AddrResrouceLocation resourceType) - { - return ((resourceType == ADDR_RSRC_LOC_USWC) || - (resourceType == ADDR_RSRC_LOC_CACHED)); - } - - UINT_32 GetPipeLog2ForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const - { - UINT_32 numPipeLog2 = pipeAligned ? Min(m_pipesLog2 + m_seLog2, 5u) : 0; - - if (IsXor(swizzleMode)) - { - UINT_32 maxPipeLog2 = GetBlockSizeLog2(swizzleMode) - m_pipeInterleaveLog2; - - numPipeLog2 = Min(numPipeLog2, maxPipeLog2); - } - - return numPipeLog2; - } - - UINT_32 GetPipeNumForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const - { - return (1 << GetPipeLog2ForMetaAddressing(pipeAligned, swizzleMode)); - } - - VOID VerifyMipLevelInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const - { -#if DEBUG - if (pIn->numMipLevels > 1) - { - UINT_32 actualMipLevels = 1; - switch (pIn->resourceType) - { - case ADDR_RSRC_TEX_3D: - // Fall through to share 2D case - actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->numSlices) + 1); - case ADDR_RSRC_TEX_2D: - // Fall through to share 1D case - actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->height) + 1); - case ADDR_RSRC_TEX_1D: - // Base 1D case - actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->width) + 1); - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - // Client pass wrong number of MipLevels to addrlib and result will be bad. - // Not sure if we should fail this calling instead of putting an assertion here. - ADDR_ASSERT(actualMipLevels >= pIn->numMipLevels); - } -#endif - } - - ADDR_E_RETURNCODE ApplyCustomerPipeBankXor( - AddrSwizzleMode swizzleMode, - UINT_32 pipeBankXor, - UINT_32 bankBits, - UINT_32 pipeBits, - UINT_32* pBlockOffset) const - { - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (IsXor(swizzleMode)) - { - // Apply driver set bankPipeXor - UINT_32 bankX = 0; - UINT_32 pipeX = 0; - returnCode = ExtractPipeBankXor(pipeBankXor, bankBits, pipeBits, &bankX, &pipeX); - *pBlockOffset ^= (pipeX << m_pipeInterleaveLog2); - *pBlockOffset ^= (bankX << (m_pipeInterleaveLog2 + pipeBits)); - } - - return returnCode; - } - - UINT_32 GetPipeXorBits(UINT_32 macroBlockBits) const; - UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const; - - ADDR_E_RETURNCODE ApplyCustomizedPitchHeight( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - UINT_32 elementBytes, - UINT_32 pitchAlignInElement, - UINT_32* pPitch, - UINT_32* pHeight) const; - - VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - UINT_32 m_se; ///< Number of shader engine - UINT_32 m_rbPerSe; ///< Number of render backend per shader engine - UINT_32 m_maxCompFrag; ///< Number of max compressed fragment - - UINT_32 m_banksLog2; ///< Number of bank Log2 - UINT_32 m_pipesLog2; ///< Number of pipe per shader engine Log2 - UINT_32 m_seLog2; ///< Number of shader engine Log2 - UINT_32 m_rbPerSeLog2; ///< Number of render backend per shader engine Log2 - UINT_32 m_maxCompFragLog2; ///< Number of max compressed fragment Log2 - - UINT_32 m_pipeInterleaveLog2; ///< Log2 of pipe interleave bytes - - UINT_32 m_blockVarSizeLog2; ///< Log2 of block var size - - SwizzleModeFlags m_swizzleModeTable[ADDR_SW_MAX_TYPE]; ///< Swizzle mode table - -private: - // Disallow the copy constructor - Lib(const Lib& a); - - // Disallow the assignment operator - Lib& operator=(const Lib& a); -}; - -} // V2 -} // Addr - -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib.cpp mesa-19.0.1/src/amd/addrlib/core/addrlib.cpp --- mesa-18.3.3/src/amd/addrlib/core/addrlib.cpp 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrlib.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,644 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrlib.cpp -* @brief Contains the implementation for the Addr::Lib class. -**************************************************************************************************** -*/ - -#include "addrinterface.h" -#include "addrlib.h" -#include "addrcommon.h" - -#if defined(__APPLE__) - -UINT_32 div64_32(UINT_64 n, UINT_32 base) -{ - UINT_64 rem = n; - UINT_64 b = base; - UINT_64 res, d = 1; - UINT_32 high = rem >> 32; - - res = 0; - if (high >= base) - { - high /= base; - res = (UINT_64) high << 32; - rem -= (UINT_64) (high * base) << 32; - } - - while (((INT_64)b > 0) && (b < rem)) - { - b = b + b; - d = d + d; - } - - do - { - if (rem >= b) - { - rem -= b; - res += d; - } - b >>= 1; - d >>= 1; - } while (d); - - n = res; - return rem; -} - -extern "C" -UINT_32 __umoddi3(UINT_64 n, UINT_32 base) -{ - return div64_32(n, base); -} - -#endif // __APPLE__ - -namespace Addr -{ - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Constructor/Destructor -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Lib::Lib -* -* @brief -* Constructor for the AddrLib class -* -**************************************************************************************************** -*/ -Lib::Lib() : - m_class(BASE_ADDRLIB), - m_chipFamily(ADDR_CHIP_FAMILY_IVLD), - m_chipRevision(0), - m_version(ADDRLIB_VERSION), - m_pipes(0), - m_banks(0), - m_pipeInterleaveBytes(0), - m_rowSize(0), - m_minPitchAlignPixels(1), - m_maxSamples(8), - m_pElemLib(NULL) -{ - m_configFlags.value = 0; -} - -/** -**************************************************************************************************** -* Lib::Lib -* -* @brief -* Constructor for the AddrLib class with hClient as parameter -* -**************************************************************************************************** -*/ -Lib::Lib(const Client* pClient) : - Object(pClient), - m_class(BASE_ADDRLIB), - m_chipFamily(ADDR_CHIP_FAMILY_IVLD), - m_chipRevision(0), - m_version(ADDRLIB_VERSION), - m_pipes(0), - m_banks(0), - m_pipeInterleaveBytes(0), - m_rowSize(0), - m_minPitchAlignPixels(1), - m_maxSamples(8), - m_pElemLib(NULL) -{ - m_configFlags.value = 0; -} - -/** -**************************************************************************************************** -* Lib::~AddrLib -* -* @brief -* Destructor for the AddrLib class -* -**************************************************************************************************** -*/ -Lib::~Lib() -{ - if (m_pElemLib) - { - delete m_pElemLib; - m_pElemLib = NULL; - } -} - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Initialization/Helper -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/** -**************************************************************************************************** -* Lib::Create -* -* @brief -* Creates and initializes AddrLib object. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::Create( - const ADDR_CREATE_INPUT* pCreateIn, ///< [in] pointer to ADDR_CREATE_INPUT - ADDR_CREATE_OUTPUT* pCreateOut) ///< [out] pointer to ADDR_CREATE_OUTPUT -{ - Lib* pLib = NULL; - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pCreateIn->createFlags.fillSizeFields == TRUE) - { - if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) || - (pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if ((returnCode == ADDR_OK) && - (pCreateIn->callbacks.allocSysMem != NULL) && - (pCreateIn->callbacks.freeSysMem != NULL)) - { - Client client = { - pCreateIn->hClient, - pCreateIn->callbacks - }; - - switch (pCreateIn->chipEngine) - { - case CIASICIDGFXENGINE_SOUTHERNISLAND: - switch (pCreateIn->chipFamily) - { - case FAMILY_SI: - pLib = SiHwlInit(&client); - break; - case FAMILY_VI: - case FAMILY_CZ: // VI based fusion(carrizo) - case FAMILY_CI: - case FAMILY_KV: // CI based fusion - pLib = CiHwlInit(&client); - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - break; - case CIASICIDGFXENGINE_ARCTICISLAND: - switch (pCreateIn->chipFamily) - { - case FAMILY_AI: - case FAMILY_RV: - pLib = Gfx9HwlInit(&client); - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - } - - if (pLib != NULL) - { - BOOL_32 initValid; - - // Pass createFlags to configFlags first since these flags may be overwritten - pLib->m_configFlags.noCubeMipSlicesPad = pCreateIn->createFlags.noCubeMipSlicesPad; - pLib->m_configFlags.fillSizeFields = pCreateIn->createFlags.fillSizeFields; - pLib->m_configFlags.useTileIndex = pCreateIn->createFlags.useTileIndex; - pLib->m_configFlags.useCombinedSwizzle = pCreateIn->createFlags.useCombinedSwizzle; - pLib->m_configFlags.checkLast2DLevel = pCreateIn->createFlags.checkLast2DLevel; - pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign; - pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile; - pLib->m_configFlags.disableLinearOpt = FALSE; - - pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision); - - pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels); - - // Global parameters initialized and remaining configFlags bits are set as well - initValid = pLib->HwlInitGlobalParams(pCreateIn); - - if (initValid) - { - pLib->m_pElemLib = ElemLib::Create(pLib); - } - else - { - pLib->m_pElemLib = NULL; // Don't go on allocating element lib - returnCode = ADDR_INVALIDGBREGVALUES; - } - - if (pLib->m_pElemLib == NULL) - { - delete pLib; - pLib = NULL; - ADDR_ASSERT_ALWAYS(); - } - else - { - pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags); - } - } - - pCreateOut->hLib = pLib; - - if ((pLib != NULL) && - (returnCode == ADDR_OK)) - { - pCreateOut->numEquations = - pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable); - - pLib->SetMaxAlignments(); - - } - else if ((pLib == NULL) && - (returnCode == ADDR_OK)) - { - // Unknown failures, we return the general error code - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::SetChipFamily -* -* @brief -* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision -* @return -* N/A -**************************************************************************************************** -*/ -VOID Lib::SetChipFamily( - UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h - UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h -{ - ChipFamily family = HwlConvertChipFamily(uChipFamily, uChipRevision); - - ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD); - - m_chipFamily = family; - m_chipRevision = uChipRevision; -} - -/** -**************************************************************************************************** -* Lib::SetMinPitchAlignPixels -* -* @brief -* Set m_minPitchAlignPixels with input param -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID Lib::SetMinPitchAlignPixels( - UINT_32 minPitchAlignPixels) ///< [in] minmum pitch alignment in pixels -{ - m_minPitchAlignPixels = (minPitchAlignPixels == 0) ? 1 : minPitchAlignPixels; -} - -/** -**************************************************************************************************** -* Lib::SetMaxAlignments -* -* @brief -* Set max alignments -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID Lib::SetMaxAlignments() -{ - m_maxBaseAlign = HwlComputeMaxBaseAlignments(); - m_maxMetaBaseAlign = HwlComputeMaxMetaBaseAlignments(); -} - -/** -**************************************************************************************************** -* Lib::GetLib -* -* @brief -* Get AddrLib pointer -* -* @return -* An AddrLib class pointer -**************************************************************************************************** -*/ -Lib* Lib::GetLib( - ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE -{ - return static_cast(hLib); -} - -/** -**************************************************************************************************** -* Lib::GetMaxAlignments -* -* @brief -* Gets maximum alignments for data surface (include FMask) -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::GetMaxAlignments( - ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT)) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - if (m_maxBaseAlign != 0) - { - pOut->baseAlign = m_maxBaseAlign; - } - else - { - returnCode = ADDR_NOTIMPLEMENTED; - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::GetMaxMetaAlignments -* -* @brief -* Gets maximum alignments for metadata (CMask, DCC and HTile) -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::GetMaxMetaAlignments( - ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT)) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - if (m_maxMetaBaseAlign != 0) - { - pOut->baseAlign = m_maxMetaBaseAlign; - } - else - { - returnCode = ADDR_NOTIMPLEMENTED; - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::Bits2Number -* -* @brief -* Cat a array of binary bit to a number -* -* @return -* The number combined with the array of bits -**************************************************************************************************** -*/ -UINT_32 Lib::Bits2Number( - UINT_32 bitNum, ///< [in] how many bits - ...) ///< [in] varaible bits value starting from MSB -{ - UINT_32 number = 0; - UINT_32 i; - va_list bits_ptr; - - va_start(bits_ptr, bitNum); - - for(i = 0; i < bitNum; i++) - { - number |= va_arg(bits_ptr, UINT_32); - number <<= 1; - } - - number >>= 1; - - va_end(bits_ptr); - - return number; -} - - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Element lib -//////////////////////////////////////////////////////////////////////////////////////////////////// - - -/** -**************************************************************************************************** -* Lib::Flt32ToColorPixel -* -* @brief -* Convert a FLT_32 value to a depth/stencil pixel value -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::Flt32ToDepthPixel( - const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, - ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) || - (pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - GetElemLib()->Flt32ToDepthPixel(pIn->format, pIn->comps, pOut->pPixel); - - UINT_32 depthBase = 0; - UINT_32 stencilBase = 0; - UINT_32 depthBits = 0; - UINT_32 stencilBits = 0; - - switch (pIn->format) - { - case ADDR_DEPTH_16: - depthBits = 16; - break; - case ADDR_DEPTH_X8_24: - case ADDR_DEPTH_8_24: - case ADDR_DEPTH_X8_24_FLOAT: - case ADDR_DEPTH_8_24_FLOAT: - depthBase = 8; - depthBits = 24; - stencilBits = 8; - break; - case ADDR_DEPTH_32_FLOAT: - depthBits = 32; - break; - case ADDR_DEPTH_X24_8_32_FLOAT: - depthBase = 8; - depthBits = 32; - stencilBits = 8; - break; - default: - break; - } - - // Overwrite base since R800 has no "tileBase" - if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE) - { - depthBase = 0; - stencilBase = 0; - } - - depthBase *= 64; - stencilBase *= 64; - - pOut->stencilBase = stencilBase; - pOut->depthBase = depthBase; - pOut->depthBits = depthBits; - pOut->stencilBits = stencilBits; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* Lib::Flt32ToColorPixel -* -* @brief -* Convert a FLT_32 value to a red/green/blue/alpha pixel value -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE Lib::Flt32ToColorPixel( - const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, - ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) || - (pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT))) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - GetElemLib()->Flt32ToColorPixel(pIn->format, - pIn->surfNum, - pIn->surfSwap, - pIn->comps, - pOut->pPixel); - } - - return returnCode; -} - - -/** -**************************************************************************************************** -* Lib::GetExportNorm -* -* @brief -* Check one format can be EXPORT_NUM -* @return -* TRUE if EXPORT_NORM can be used -**************************************************************************************************** -*/ -BOOL_32 Lib::GetExportNorm( - const ELEM_GETEXPORTNORM_INPUT* pIn) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - BOOL_32 enabled = FALSE; - - if (GetFillSizeFieldsFlags() == TRUE) - { - if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT)) - { - returnCode = ADDR_PARAMSIZEMISMATCH; - } - } - - if (returnCode == ADDR_OK) - { - enabled = GetElemLib()->PixGetExportNorm(pIn->format, pIn->num, pIn->swap); - } - - return enabled; -} - -} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrlib.h mesa-19.0.1/src/amd/addrlib/core/addrlib.h --- mesa-18.3.3/src/amd/addrlib/core/addrlib.h 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrlib.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,413 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrlib.h -* @brief Contains the Addr::Lib base class definition. -**************************************************************************************************** -*/ - -#ifndef __ADDR_LIB_H__ -#define __ADDR_LIB_H__ - -#include "addrinterface.h" -#include "addrobject.h" -#include "addrelemlib.h" - -#include "amdgpu_asic_addr.h" - -#ifndef CIASICIDGFXENGINE_R600 -#define CIASICIDGFXENGINE_R600 0x00000006 -#endif - -#ifndef CIASICIDGFXENGINE_R800 -#define CIASICIDGFXENGINE_R800 0x00000008 -#endif - -#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND -#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A -#endif - -#ifndef CIASICIDGFXENGINE_ARCTICISLAND -#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D -#endif - -namespace Addr -{ - -/** -**************************************************************************************************** -* @brief Neutral enums that define pipeinterleave -**************************************************************************************************** -*/ -enum PipeInterleave -{ - ADDR_PIPEINTERLEAVE_256B = 256, - ADDR_PIPEINTERLEAVE_512B = 512, - ADDR_PIPEINTERLEAVE_1KB = 1024, - ADDR_PIPEINTERLEAVE_2KB = 2048, -}; - -/** -**************************************************************************************************** -* @brief Neutral enums that define DRAM row size -**************************************************************************************************** -*/ -enum RowSize -{ - ADDR_ROWSIZE_1KB = 1024, - ADDR_ROWSIZE_2KB = 2048, - ADDR_ROWSIZE_4KB = 4096, - ADDR_ROWSIZE_8KB = 8192, -}; - -/** -**************************************************************************************************** -* @brief Neutral enums that define bank interleave -**************************************************************************************************** -*/ -enum BankInterleave -{ - ADDR_BANKINTERLEAVE_1 = 1, - ADDR_BANKINTERLEAVE_2 = 2, - ADDR_BANKINTERLEAVE_4 = 4, - ADDR_BANKINTERLEAVE_8 = 8, -}; - -/** -**************************************************************************************************** -* @brief Neutral enums that define shader engine tile size -**************************************************************************************************** -*/ -enum ShaderEngineTileSize -{ - ADDR_SE_TILESIZE_16 = 16, - ADDR_SE_TILESIZE_32 = 32, -}; - -/** -**************************************************************************************************** -* @brief Neutral enums that define bank swap size -**************************************************************************************************** -*/ -enum BankSwapSize -{ - ADDR_BANKSWAP_128B = 128, - ADDR_BANKSWAP_256B = 256, - ADDR_BANKSWAP_512B = 512, - ADDR_BANKSWAP_1KB = 1024, -}; - -/** -**************************************************************************************************** -* @brief Enums that define max compressed fragments config -**************************************************************************************************** -*/ -enum NumMaxCompressedFragmentsConfig -{ - ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS = 0x00000000, - ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS = 0x00000001, - ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS = 0x00000002, - ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS = 0x00000003, -}; - -/** -**************************************************************************************************** -* @brief Enums that define num pipes config -**************************************************************************************************** -*/ -enum NumPipesConfig -{ - ADDR_CONFIG_1_PIPE = 0x00000000, - ADDR_CONFIG_2_PIPE = 0x00000001, - ADDR_CONFIG_4_PIPE = 0x00000002, - ADDR_CONFIG_8_PIPE = 0x00000003, - ADDR_CONFIG_16_PIPE = 0x00000004, - ADDR_CONFIG_32_PIPE = 0x00000005, - ADDR_CONFIG_64_PIPE = 0x00000006, -}; - -/** -**************************************************************************************************** -* @brief Enums that define num banks config -**************************************************************************************************** -*/ -enum NumBanksConfig -{ - ADDR_CONFIG_1_BANK = 0x00000000, - ADDR_CONFIG_2_BANK = 0x00000001, - ADDR_CONFIG_4_BANK = 0x00000002, - ADDR_CONFIG_8_BANK = 0x00000003, - ADDR_CONFIG_16_BANK = 0x00000004, -}; - -/** -**************************************************************************************************** -* @brief Enums that define num rb per shader engine config -**************************************************************************************************** -*/ -enum NumRbPerShaderEngineConfig -{ - ADDR_CONFIG_1_RB_PER_SHADER_ENGINE = 0x00000000, - ADDR_CONFIG_2_RB_PER_SHADER_ENGINE = 0x00000001, - ADDR_CONFIG_4_RB_PER_SHADER_ENGINE = 0x00000002, -}; - -/** -**************************************************************************************************** -* @brief Enums that define num shader engines config -**************************************************************************************************** -*/ -enum NumShaderEnginesConfig -{ - ADDR_CONFIG_1_SHADER_ENGINE = 0x00000000, - ADDR_CONFIG_2_SHADER_ENGINE = 0x00000001, - ADDR_CONFIG_4_SHADER_ENGINE = 0x00000002, - ADDR_CONFIG_8_SHADER_ENGINE = 0x00000003, -}; - -/** -**************************************************************************************************** -* @brief Enums that define pipe interleave size config -**************************************************************************************************** -*/ -enum PipeInterleaveSizeConfig -{ - ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x00000000, - ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x00000001, - ADDR_CONFIG_PIPE_INTERLEAVE_1KB = 0x00000002, - ADDR_CONFIG_PIPE_INTERLEAVE_2KB = 0x00000003, -}; - -/** -**************************************************************************************************** -* @brief Enums that define row size config -**************************************************************************************************** -*/ -enum RowSizeConfig -{ - ADDR_CONFIG_1KB_ROW = 0x00000000, - ADDR_CONFIG_2KB_ROW = 0x00000001, - ADDR_CONFIG_4KB_ROW = 0x00000002, -}; - -/** -**************************************************************************************************** -* @brief Enums that define bank interleave size config -**************************************************************************************************** -*/ -enum BankInterleaveSizeConfig -{ - ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x00000000, - ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x00000001, - ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x00000002, - ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x00000003, -}; - -/** -**************************************************************************************************** -* @brief Enums that define engine tile size config -**************************************************************************************************** -*/ -enum ShaderEngineTileSizeConfig -{ - ADDR_CONFIG_SE_TILE_16 = 0x00000000, - ADDR_CONFIG_SE_TILE_32 = 0x00000001, -}; - -/** -**************************************************************************************************** -* @brief This class contains asic independent address lib functionalities -**************************************************************************************************** -*/ -class Lib : public Object -{ -public: - virtual ~Lib(); - - static ADDR_E_RETURNCODE Create( - const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut); - - /// Pair of Create - VOID Destroy() - { - delete this; - } - - static Lib* GetLib(ADDR_HANDLE hLib); - - /// Returns AddrLib version (from compiled binary instead include file) - UINT_32 GetVersion() - { - return m_version; - } - - /// Returns asic chip family name defined by AddrLib - ChipFamily GetChipFamily() - { - return m_chipFamily; - } - - ADDR_E_RETURNCODE Flt32ToDepthPixel( - const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, - ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE Flt32ToColorPixel( - const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, - ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const; - - BOOL_32 GetExportNorm(const ELEM_GETEXPORTNORM_INPUT* pIn) const; - - ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE GetMaxMetaAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; - -protected: - Lib(); // Constructor is protected - Lib(const Client* pClient); - - /// Pure virtual function to get max base alignments - virtual UINT_32 HwlComputeMaxBaseAlignments() const = 0; - - /// Gets maximum alignements for metadata - virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const - { - ADDR_NOT_IMPLEMENTED(); - - return 0; - } - - VOID ValidBaseAlignments(UINT_32 alignment) const - { -#if DEBUG - ADDR_ASSERT(alignment <= m_maxBaseAlign); -#endif - } - - VOID ValidMetaBaseAlignments(UINT_32 metaAlignment) const - { -#if DEBUG - ADDR_ASSERT(metaAlignment <= m_maxMetaBaseAlign); -#endif - } - - // - // Initialization - // - /// Pure Virtual function for Hwl computing internal global parameters from h/w registers - virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) = 0; - - /// Pure Virtual function for Hwl converting chip family - virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0; - - /// Get equation table pointer and number of equations - virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const - { - *ppEquationTable = NULL; - - return 0; - } - - // - // Misc helper - // - static UINT_32 Bits2Number(UINT_32 bitNum, ...); - - static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags) - { - return (numFrags != 0) ? numFrags : Max(1u, numSamples); - } - - /// Returns pointer of ElemLib - ElemLib* GetElemLib() const - { - return m_pElemLib; - } - - /// Returns fillSizeFields flag - UINT_32 GetFillSizeFieldsFlags() const - { - return m_configFlags.fillSizeFields; - } - -private: - // Disallow the copy constructor - Lib(const Lib& a); - - // Disallow the assignment operator - Lib& operator=(const Lib& a); - - VOID SetChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision); - - VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels); - - VOID SetMaxAlignments(); - -protected: - LibClass m_class; ///< Store class type (HWL type) - - ChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h - - UINT_32 m_chipRevision; ///< Revision id from xxx_id.h - - UINT_32 m_version; ///< Current version - - // - // Global parameters - // - ConfigFlags m_configFlags; ///< Global configuration flags. Note this is setup by - /// AddrLib instead of Client except forceLinearAligned - - UINT_32 m_pipes; ///< Number of pipes - UINT_32 m_banks; ///< Number of banks - /// For r800 this is MC_ARB_RAMCFG.NOOFBANK - /// Keep it here to do default parameter calculation - - UINT_32 m_pipeInterleaveBytes; - ///< Specifies the size of contiguous address space - /// within each tiling pipe when making linear - /// accesses. (Formerly Group Size) - - UINT_32 m_rowSize; ///< DRAM row size, in bytes - - UINT_32 m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels - UINT_32 m_maxSamples; ///< Max numSamples - - UINT_32 m_maxBaseAlign; ///< Max base alignment for data surface - UINT_32 m_maxMetaBaseAlign; ///< Max base alignment for metadata - -private: - ElemLib* m_pElemLib; ///< Element Lib pointer -}; - -Lib* SiHwlInit (const Client* pClient); -Lib* CiHwlInit (const Client* pClient); -Lib* Gfx9HwlInit (const Client* pClient); - -} // Addr - -#endif diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrobject.cpp mesa-19.0.1/src/amd/addrlib/core/addrobject.cpp --- mesa-18.3.3/src/amd/addrlib/core/addrobject.cpp 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrobject.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,233 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrobject.cpp -* @brief Contains the Object base class implementation. -**************************************************************************************************** -*/ - -#include "addrinterface.h" -#include "addrobject.h" - -namespace Addr -{ - -/** -**************************************************************************************************** -* Object::Object -* -* @brief -* Constructor for the Object class. -**************************************************************************************************** -*/ -Object::Object() -{ - m_client.handle = NULL; - m_client.callbacks.allocSysMem = NULL; - m_client.callbacks.freeSysMem = NULL; - m_client.callbacks.debugPrint = NULL; -} - -/** -**************************************************************************************************** -* Object::Object -* -* @brief -* Constructor for the Object class. -**************************************************************************************************** -*/ -Object::Object(const Client* pClient) -{ - m_client = *pClient; -} - -/** -**************************************************************************************************** -* Object::~Object -* -* @brief -* Destructor for the Object class. -**************************************************************************************************** -*/ -Object::~Object() -{ -} - -/** -**************************************************************************************************** -* Object::ClientAlloc -* -* @brief -* Calls instanced allocSysMem inside Client -**************************************************************************************************** -*/ -VOID* Object::ClientAlloc( - size_t objSize, ///< [in] Size to allocate - const Client* pClient) ///< [in] Client pointer -{ - VOID* pObjMem = NULL; - - if (pClient->callbacks.allocSysMem != NULL) - { - ADDR_ALLOCSYSMEM_INPUT allocInput = {0}; - - allocInput.size = sizeof(ADDR_ALLOCSYSMEM_INPUT); - allocInput.flags.value = 0; - allocInput.sizeInBytes = static_cast(objSize); - allocInput.hClient = pClient->handle; - - pObjMem = pClient->callbacks.allocSysMem(&allocInput); - } - - return pObjMem; -} - -/** -**************************************************************************************************** -* Object::Alloc -* -* @brief -* A wrapper of ClientAlloc -**************************************************************************************************** -*/ -VOID* Object::Alloc( - size_t objSize ///< [in] Size to allocate - ) const -{ - return ClientAlloc(objSize, &m_client); -} - -/** -**************************************************************************************************** -* Object::ClientFree -* -* @brief -* Calls freeSysMem inside Client -**************************************************************************************************** -*/ -VOID Object::ClientFree( - VOID* pObjMem, ///< [in] User virtual address to free. - const Client* pClient) ///< [in] Client pointer -{ - if (pClient->callbacks.freeSysMem != NULL) - { - if (pObjMem != NULL) - { - ADDR_FREESYSMEM_INPUT freeInput = {0}; - - freeInput.size = sizeof(ADDR_FREESYSMEM_INPUT); - freeInput.hClient = pClient->handle; - freeInput.pVirtAddr = pObjMem; - - pClient->callbacks.freeSysMem(&freeInput); - } - } -} - -/** -**************************************************************************************************** -* Object::Free -* -* @brief -* A wrapper of ClientFree -**************************************************************************************************** -*/ -VOID Object::Free( - VOID* pObjMem ///< [in] User virtual address to free. - ) const -{ - ClientFree(pObjMem, &m_client); -} - -/** -**************************************************************************************************** -* Object::operator new -* -* @brief -* Placement new operator. (with pre-allocated memory pointer) -* -* @return -* Returns pre-allocated memory pointer. -**************************************************************************************************** -*/ -VOID* Object::operator new( - size_t objSize, ///< [in] Size to allocate - VOID* pMem) ///< [in] Pre-allocated pointer -{ - return pMem; -} - -/** -**************************************************************************************************** -* Object::operator delete -* -* @brief -* Frees Object object memory. -**************************************************************************************************** -*/ -VOID Object::operator delete( - VOID* pObjMem) ///< [in] User virtual address to free. -{ - Object* pObj = static_cast(pObjMem); - ClientFree(pObjMem, &pObj->m_client); -} - -/** -**************************************************************************************************** -* Object::DebugPrint -* -* @brief -* Print debug message -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID Object::DebugPrint( - const CHAR* pDebugString, ///< [in] Debug string - ... - ) const -{ -#if DEBUG - if (m_client.callbacks.debugPrint != NULL) - { - ADDR_DEBUGPRINT_INPUT debugPrintInput = {0}; - - debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT); - debugPrintInput.pDebugString = const_cast(pDebugString); - debugPrintInput.hClient = m_client.handle; - va_start(debugPrintInput.ap, pDebugString); - - m_client.callbacks.debugPrint(&debugPrintInput); - - va_end(debugPrintInput.ap); - } -#endif -} - -} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/core/addrobject.h mesa-19.0.1/src/amd/addrlib/core/addrobject.h --- mesa-18.3.3/src/amd/addrlib/core/addrobject.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/core/addrobject.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,95 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file addrobject.h -* @brief Contains the Object base class definition. -**************************************************************************************************** -*/ - -#ifndef __ADDR_OBJECT_H__ -#define __ADDR_OBJECT_H__ - -#include "addrtypes.h" -#include "addrcommon.h" - -namespace Addr -{ - -/** -**************************************************************************************************** -* @brief This structure contains client specific data -**************************************************************************************************** -*/ -struct Client -{ - ADDR_CLIENT_HANDLE handle; - ADDR_CALLBACKS callbacks; -}; -/** -**************************************************************************************************** -* @brief This class is the base class for all ADDR class objects. -**************************************************************************************************** -*/ -class Object -{ -public: - Object(); - Object(const Client* pClient); - virtual ~Object(); - - VOID* operator new(size_t size, VOID* pMem); - VOID operator delete(VOID* pObj); - /// Microsoft compiler requires a matching delete implementation, which seems to be called when - /// bad_alloc is thrown. But currently C++ exception isn't allowed so a dummy implementation is - /// added to eliminate the warning. - VOID operator delete(VOID* pObj, VOID* pMem) { ADDR_ASSERT_ALWAYS(); } - - VOID* Alloc(size_t size) const; - VOID Free(VOID* pObj) const; - - VOID DebugPrint(const CHAR* pDebugString, ...) const; - - const Client* GetClient() const {return &m_client;} - -protected: - Client m_client; - - static VOID* ClientAlloc(size_t size, const Client* pClient); - static VOID ClientFree(VOID* pObj, const Client* pClient); - -private: - // disallow the copy constructor - Object(const Object& a); - - // disallow the assignment operator - Object& operator=(const Object& a); -}; - -} // Addr -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/chip/gfx9_enum.h mesa-19.0.1/src/amd/addrlib/gfx9/chip/gfx9_enum.h --- mesa-18.3.3/src/amd/addrlib/gfx9/chip/gfx9_enum.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/gfx9/chip/gfx9_enum.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,10535 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -#if !defined (_vega10_ENUM_HEADER) -#define _vega10_ENUM_HEADER - - -#ifndef _DRIVER_BUILD -#ifndef GL_ZERO -#define GL__ZERO BLEND_ZERO -#define GL__ONE BLEND_ONE -#define GL__SRC_COLOR BLEND_SRC_COLOR -#define GL__ONE_MINUS_SRC_COLOR BLEND_ONE_MINUS_SRC_COLOR -#define GL__DST_COLOR BLEND_DST_COLOR -#define GL__ONE_MINUS_DST_COLOR BLEND_ONE_MINUS_DST_COLOR -#define GL__SRC_ALPHA BLEND_SRC_ALPHA -#define GL__ONE_MINUS_SRC_ALPHA BLEND_ONE_MINUS_SRC_ALPHA -#define GL__DST_ALPHA BLEND_DST_ALPHA -#define GL__ONE_MINUS_DST_ALPHA BLEND_ONE_MINUS_DST_ALPHA -#define GL__SRC_ALPHA_SATURATE BLEND_SRC_ALPHA_SATURATE -#define GL__CONSTANT_COLOR BLEND_CONSTANT_COLOR -#define GL__ONE_MINUS_CONSTANT_COLOR BLEND_ONE_MINUS_CONSTANT_COLOR -#define GL__CONSTANT_ALPHA BLEND_CONSTANT_ALPHA -#define GL__ONE_MINUS_CONSTANT_ALPHA BLEND_ONE_MINUS_CONSTANT_ALPHA -#endif -#endif - -/******************************************************* - * GDS DATA_TYPE Enums - *******************************************************/ - -#ifndef ENUMS_GDS_PERFCOUNT_SELECT_H -#define ENUMS_GDS_PERFCOUNT_SELECT_H -typedef enum GDS_PERFCOUNT_SELECT { - GDS_PERF_SEL_DS_ADDR_CONFL = 0, - GDS_PERF_SEL_DS_BANK_CONFL = 1, - GDS_PERF_SEL_WBUF_FLUSH = 2, - GDS_PERF_SEL_WR_COMP = 3, - GDS_PERF_SEL_WBUF_WR = 4, - GDS_PERF_SEL_RBUF_HIT = 5, - GDS_PERF_SEL_RBUF_MISS = 6, - GDS_PERF_SEL_SE0_SH0_NORET = 7, - GDS_PERF_SEL_SE0_SH0_RET = 8, - GDS_PERF_SEL_SE0_SH0_ORD_CNT = 9, - GDS_PERF_SEL_SE0_SH0_2COMP_REQ = 10, - GDS_PERF_SEL_SE0_SH0_ORD_WAVE_VALID = 11, - GDS_PERF_SEL_SE0_SH0_GDS_DATA_VALID = 12, - GDS_PERF_SEL_SE0_SH0_GDS_STALL_BY_ORD = 13, - GDS_PERF_SEL_SE0_SH0_GDS_WR_OP = 14, - GDS_PERF_SEL_SE0_SH0_GDS_RD_OP = 15, - GDS_PERF_SEL_SE0_SH0_GDS_ATOM_OP = 16, - GDS_PERF_SEL_SE0_SH0_GDS_REL_OP = 17, - GDS_PERF_SEL_SE0_SH0_GDS_CMPXCH_OP = 18, - GDS_PERF_SEL_SE0_SH0_GDS_BYTE_OP = 19, - GDS_PERF_SEL_SE0_SH0_GDS_SHORT_OP = 20, - GDS_PERF_SEL_SE0_SH1_NORET = 21, - GDS_PERF_SEL_SE0_SH1_RET = 22, - GDS_PERF_SEL_SE0_SH1_ORD_CNT = 23, - GDS_PERF_SEL_SE0_SH1_2COMP_REQ = 24, - GDS_PERF_SEL_SE0_SH1_ORD_WAVE_VALID = 25, - GDS_PERF_SEL_SE0_SH1_GDS_DATA_VALID = 26, - GDS_PERF_SEL_SE0_SH1_GDS_STALL_BY_ORD = 27, - GDS_PERF_SEL_SE0_SH1_GDS_WR_OP = 28, - GDS_PERF_SEL_SE0_SH1_GDS_RD_OP = 29, - GDS_PERF_SEL_SE0_SH1_GDS_ATOM_OP = 30, - GDS_PERF_SEL_SE0_SH1_GDS_REL_OP = 31, - GDS_PERF_SEL_SE0_SH1_GDS_CMPXCH_OP = 32, - GDS_PERF_SEL_SE0_SH1_GDS_BYTE_OP = 33, - GDS_PERF_SEL_SE0_SH1_GDS_SHORT_OP = 34, - GDS_PERF_SEL_SE1_SH0_NORET = 35, - GDS_PERF_SEL_SE1_SH0_RET = 36, - GDS_PERF_SEL_SE1_SH0_ORD_CNT = 37, - GDS_PERF_SEL_SE1_SH0_2COMP_REQ = 38, - GDS_PERF_SEL_SE1_SH0_ORD_WAVE_VALID = 39, - GDS_PERF_SEL_SE1_SH0_GDS_DATA_VALID = 40, - GDS_PERF_SEL_SE1_SH0_GDS_STALL_BY_ORD = 41, - GDS_PERF_SEL_SE1_SH0_GDS_WR_OP = 42, - GDS_PERF_SEL_SE1_SH0_GDS_RD_OP = 43, - GDS_PERF_SEL_SE1_SH0_GDS_ATOM_OP = 44, - GDS_PERF_SEL_SE1_SH0_GDS_REL_OP = 45, - GDS_PERF_SEL_SE1_SH0_GDS_CMPXCH_OP = 46, - GDS_PERF_SEL_SE1_SH0_GDS_BYTE_OP = 47, - GDS_PERF_SEL_SE1_SH0_GDS_SHORT_OP = 48, - GDS_PERF_SEL_SE1_SH1_NORET = 49, - GDS_PERF_SEL_SE1_SH1_RET = 50, - GDS_PERF_SEL_SE1_SH1_ORD_CNT = 51, - GDS_PERF_SEL_SE1_SH1_2COMP_REQ = 52, - GDS_PERF_SEL_SE1_SH1_ORD_WAVE_VALID = 53, - GDS_PERF_SEL_SE1_SH1_GDS_DATA_VALID = 54, - GDS_PERF_SEL_SE1_SH1_GDS_STALL_BY_ORD = 55, - GDS_PERF_SEL_SE1_SH1_GDS_WR_OP = 56, - GDS_PERF_SEL_SE1_SH1_GDS_RD_OP = 57, - GDS_PERF_SEL_SE1_SH1_GDS_ATOM_OP = 58, - GDS_PERF_SEL_SE1_SH1_GDS_REL_OP = 59, - GDS_PERF_SEL_SE1_SH1_GDS_CMPXCH_OP = 60, - GDS_PERF_SEL_SE1_SH1_GDS_BYTE_OP = 61, - GDS_PERF_SEL_SE1_SH1_GDS_SHORT_OP = 62, - GDS_PERF_SEL_SE2_SH0_NORET = 63, - GDS_PERF_SEL_SE2_SH0_RET = 64, - GDS_PERF_SEL_SE2_SH0_ORD_CNT = 65, - GDS_PERF_SEL_SE2_SH0_2COMP_REQ = 66, - GDS_PERF_SEL_SE2_SH0_ORD_WAVE_VALID = 67, - GDS_PERF_SEL_SE2_SH0_GDS_DATA_VALID = 68, - GDS_PERF_SEL_SE2_SH0_GDS_STALL_BY_ORD = 69, - GDS_PERF_SEL_SE2_SH0_GDS_WR_OP = 70, - GDS_PERF_SEL_SE2_SH0_GDS_RD_OP = 71, - GDS_PERF_SEL_SE2_SH0_GDS_ATOM_OP = 72, - GDS_PERF_SEL_SE2_SH0_GDS_REL_OP = 73, - GDS_PERF_SEL_SE2_SH0_GDS_CMPXCH_OP = 74, - GDS_PERF_SEL_SE2_SH0_GDS_BYTE_OP = 75, - GDS_PERF_SEL_SE2_SH0_GDS_SHORT_OP = 76, - GDS_PERF_SEL_SE2_SH1_NORET = 77, - GDS_PERF_SEL_SE2_SH1_RET = 78, - GDS_PERF_SEL_SE2_SH1_ORD_CNT = 79, - GDS_PERF_SEL_SE2_SH1_2COMP_REQ = 80, - GDS_PERF_SEL_SE2_SH1_ORD_WAVE_VALID = 81, - GDS_PERF_SEL_SE2_SH1_GDS_DATA_VALID = 82, - GDS_PERF_SEL_SE2_SH1_GDS_STALL_BY_ORD = 83, - GDS_PERF_SEL_SE2_SH1_GDS_WR_OP = 84, - GDS_PERF_SEL_SE2_SH1_GDS_RD_OP = 85, - GDS_PERF_SEL_SE2_SH1_GDS_ATOM_OP = 86, - GDS_PERF_SEL_SE2_SH1_GDS_REL_OP = 87, - GDS_PERF_SEL_SE2_SH1_GDS_CMPXCH_OP = 88, - GDS_PERF_SEL_SE2_SH1_GDS_BYTE_OP = 89, - GDS_PERF_SEL_SE2_SH1_GDS_SHORT_OP = 90, - GDS_PERF_SEL_SE3_SH0_NORET = 91, - GDS_PERF_SEL_SE3_SH0_RET = 92, - GDS_PERF_SEL_SE3_SH0_ORD_CNT = 93, - GDS_PERF_SEL_SE3_SH0_2COMP_REQ = 94, - GDS_PERF_SEL_SE3_SH0_ORD_WAVE_VALID = 95, - GDS_PERF_SEL_SE3_SH0_GDS_DATA_VALID = 96, - GDS_PERF_SEL_SE3_SH0_GDS_STALL_BY_ORD = 97, - GDS_PERF_SEL_SE3_SH0_GDS_WR_OP = 98, - GDS_PERF_SEL_SE3_SH0_GDS_RD_OP = 99, - GDS_PERF_SEL_SE3_SH0_GDS_ATOM_OP = 100, - GDS_PERF_SEL_SE3_SH0_GDS_REL_OP = 101, - GDS_PERF_SEL_SE3_SH0_GDS_CMPXCH_OP = 102, - GDS_PERF_SEL_SE3_SH0_GDS_BYTE_OP = 103, - GDS_PERF_SEL_SE3_SH0_GDS_SHORT_OP = 104, - GDS_PERF_SEL_SE3_SH1_NORET = 105, - GDS_PERF_SEL_SE3_SH1_RET = 106, - GDS_PERF_SEL_SE3_SH1_ORD_CNT = 107, - GDS_PERF_SEL_SE3_SH1_2COMP_REQ = 108, - GDS_PERF_SEL_SE3_SH1_ORD_WAVE_VALID = 109, - GDS_PERF_SEL_SE3_SH1_GDS_DATA_VALID = 110, - GDS_PERF_SEL_SE3_SH1_GDS_STALL_BY_ORD = 111, - GDS_PERF_SEL_SE3_SH1_GDS_WR_OP = 112, - GDS_PERF_SEL_SE3_SH1_GDS_RD_OP = 113, - GDS_PERF_SEL_SE3_SH1_GDS_ATOM_OP = 114, - GDS_PERF_SEL_SE3_SH1_GDS_REL_OP = 115, - GDS_PERF_SEL_SE3_SH1_GDS_CMPXCH_OP = 116, - GDS_PERF_SEL_SE3_SH1_GDS_BYTE_OP = 117, - GDS_PERF_SEL_SE3_SH1_GDS_SHORT_OP = 118, - GDS_PERF_SEL_GWS_RELEASED = 119, - GDS_PERF_SEL_GWS_BYPASS = 120, -} GDS_PERFCOUNT_SELECT; -#endif /*ENUMS_GDS_PERFCOUNT_SELECT_H*/ - -/******************************************************* - * Chip Enums - *******************************************************/ - -/* - * SurfaceEndian enum - */ - -typedef enum SurfaceEndian { -ENDIAN_NONE = 0x00000000, -ENDIAN_8IN16 = 0x00000001, -ENDIAN_8IN32 = 0x00000002, -ENDIAN_8IN64 = 0x00000003, -} SurfaceEndian; - -/* - * ArrayMode enum - */ - -typedef enum ArrayMode { -ARRAY_LINEAR_GENERAL = 0x00000000, -ARRAY_LINEAR_ALIGNED = 0x00000001, -ARRAY_1D_TILED_THIN1 = 0x00000002, -ARRAY_1D_TILED_THICK = 0x00000003, -ARRAY_2D_TILED_THIN1 = 0x00000004, -ARRAY_PRT_TILED_THIN1 = 0x00000005, -ARRAY_PRT_2D_TILED_THIN1 = 0x00000006, -ARRAY_2D_TILED_THICK = 0x00000007, -ARRAY_2D_TILED_XTHICK = 0x00000008, -ARRAY_PRT_TILED_THICK = 0x00000009, -ARRAY_PRT_2D_TILED_THICK = 0x0000000a, -ARRAY_PRT_3D_TILED_THIN1 = 0x0000000b, -ARRAY_3D_TILED_THIN1 = 0x0000000c, -ARRAY_3D_TILED_THICK = 0x0000000d, -ARRAY_3D_TILED_XTHICK = 0x0000000e, -ARRAY_PRT_3D_TILED_THICK = 0x0000000f, -} ArrayMode; - -/* - * PipeTiling enum - */ - -typedef enum PipeTiling { -CONFIG_1_PIPE = 0x00000000, -CONFIG_2_PIPE = 0x00000001, -CONFIG_4_PIPE = 0x00000002, -CONFIG_8_PIPE = 0x00000003, -} PipeTiling; - -/* - * BankTiling enum - */ - -typedef enum BankTiling { -CONFIG_4_BANK = 0x00000000, -CONFIG_8_BANK = 0x00000001, -} BankTiling; - -/* - * GroupInterleave enum - */ - -typedef enum GroupInterleave { -CONFIG_256B_GROUP = 0x00000000, -CONFIG_512B_GROUP = 0x00000001, -} GroupInterleave; - -/* - * RowTiling enum - */ - -typedef enum RowTiling { -CONFIG_1KB_ROW = 0x00000000, -CONFIG_2KB_ROW = 0x00000001, -CONFIG_4KB_ROW = 0x00000002, -CONFIG_8KB_ROW = 0x00000003, -CONFIG_1KB_ROW_OPT = 0x00000004, -CONFIG_2KB_ROW_OPT = 0x00000005, -CONFIG_4KB_ROW_OPT = 0x00000006, -CONFIG_8KB_ROW_OPT = 0x00000007, -} RowTiling; - -/* - * BankSwapBytes enum - */ - -typedef enum BankSwapBytes { -CONFIG_128B_SWAPS = 0x00000000, -CONFIG_256B_SWAPS = 0x00000001, -CONFIG_512B_SWAPS = 0x00000002, -CONFIG_1KB_SWAPS = 0x00000003, -} BankSwapBytes; - -/* - * SampleSplitBytes enum - */ - -typedef enum SampleSplitBytes { -CONFIG_1KB_SPLIT = 0x00000000, -CONFIG_2KB_SPLIT = 0x00000001, -CONFIG_4KB_SPLIT = 0x00000002, -CONFIG_8KB_SPLIT = 0x00000003, -} SampleSplitBytes; - -/* - * NumPipes enum - */ - -typedef enum NumPipes { -ADDR_CONFIG_1_PIPE = 0x00000000, -ADDR_CONFIG_2_PIPE = 0x00000001, -ADDR_CONFIG_4_PIPE = 0x00000002, -ADDR_CONFIG_8_PIPE = 0x00000003, -ADDR_CONFIG_16_PIPE = 0x00000004, -ADDR_CONFIG_32_PIPE = 0x00000005, -} NumPipes; - -/* - * NumBanksConfig enum - */ - -typedef enum NumBanksConfig { -ADDR_CONFIG_1_BANK = 0x00000000, -ADDR_CONFIG_2_BANK = 0x00000001, -ADDR_CONFIG_4_BANK = 0x00000002, -ADDR_CONFIG_8_BANK = 0x00000003, -ADDR_CONFIG_16_BANK = 0x00000004, -} NumBanksConfig; - -/* - * PipeInterleaveSize enum - */ - -typedef enum PipeInterleaveSize { -ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x00000000, -ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x00000001, -ADDR_CONFIG_PIPE_INTERLEAVE_1KB = 0x00000002, -ADDR_CONFIG_PIPE_INTERLEAVE_2KB = 0x00000003, -} PipeInterleaveSize; - -/* - * BankInterleaveSize enum - */ - -typedef enum BankInterleaveSize { -ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x00000000, -ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x00000001, -ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x00000002, -ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x00000003, -} BankInterleaveSize; - -/* - * NumShaderEngines enum - */ - -typedef enum NumShaderEngines { -ADDR_CONFIG_1_SHADER_ENGINE = 0x00000000, -ADDR_CONFIG_2_SHADER_ENGINE = 0x00000001, -ADDR_CONFIG_4_SHADER_ENGINE = 0x00000002, -ADDR_CONFIG_8_SHADER_ENGINE = 0x00000003, -} NumShaderEngines; - -/* - * NumRbPerShaderEngine enum - */ - -typedef enum NumRbPerShaderEngine { -ADDR_CONFIG_1_RB_PER_SHADER_ENGINE = 0x00000000, -ADDR_CONFIG_2_RB_PER_SHADER_ENGINE = 0x00000001, -ADDR_CONFIG_4_RB_PER_SHADER_ENGINE = 0x00000002, -} NumRbPerShaderEngine; - -/* - * NumGPUs enum - */ - -typedef enum NumGPUs { -ADDR_CONFIG_1_GPU = 0x00000000, -ADDR_CONFIG_2_GPU = 0x00000001, -ADDR_CONFIG_4_GPU = 0x00000002, -ADDR_CONFIG_8_GPU = 0x00000003, -} NumGPUs; - -/* - * NumMaxCompressedFragments enum - */ - -typedef enum NumMaxCompressedFragments { -ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS = 0x00000000, -ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS = 0x00000001, -ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS = 0x00000002, -ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS = 0x00000003, -} NumMaxCompressedFragments; - -/* - * ShaderEngineTileSize enum - */ - -typedef enum ShaderEngineTileSize { -ADDR_CONFIG_SE_TILE_16 = 0x00000000, -ADDR_CONFIG_SE_TILE_32 = 0x00000001, -} ShaderEngineTileSize; - -/* - * MultiGPUTileSize enum - */ - -typedef enum MultiGPUTileSize { -ADDR_CONFIG_GPU_TILE_16 = 0x00000000, -ADDR_CONFIG_GPU_TILE_32 = 0x00000001, -ADDR_CONFIG_GPU_TILE_64 = 0x00000002, -ADDR_CONFIG_GPU_TILE_128 = 0x00000003, -} MultiGPUTileSize; - -/* - * RowSize enum - */ - -typedef enum RowSize { -ADDR_CONFIG_1KB_ROW = 0x00000000, -ADDR_CONFIG_2KB_ROW = 0x00000001, -ADDR_CONFIG_4KB_ROW = 0x00000002, -} RowSize; - -/* - * NumLowerPipes enum - */ - -typedef enum NumLowerPipes { -ADDR_CONFIG_1_LOWER_PIPES = 0x00000000, -ADDR_CONFIG_2_LOWER_PIPES = 0x00000001, -} NumLowerPipes; - -/* - * ColorTransform enum - */ - -typedef enum ColorTransform { -DCC_CT_AUTO = 0x00000000, -DCC_CT_NONE = 0x00000001, -ABGR_TO_A_BG_G_RB = 0x00000002, -BGRA_TO_BG_G_RB_A = 0x00000003, -} ColorTransform; - -/* - * CompareRef enum - */ - -typedef enum CompareRef { -REF_NEVER = 0x00000000, -REF_LESS = 0x00000001, -REF_EQUAL = 0x00000002, -REF_LEQUAL = 0x00000003, -REF_GREATER = 0x00000004, -REF_NOTEQUAL = 0x00000005, -REF_GEQUAL = 0x00000006, -REF_ALWAYS = 0x00000007, -} CompareRef; - -/* - * ReadSize enum - */ - -typedef enum ReadSize { -READ_256_BITS = 0x00000000, -READ_512_BITS = 0x00000001, -} ReadSize; - -/* - * DepthFormat enum - */ - -typedef enum DepthFormat { -DEPTH_INVALID = 0x00000000, -DEPTH_16 = 0x00000001, -DEPTH_X8_24 = 0x00000002, -DEPTH_8_24 = 0x00000003, -DEPTH_X8_24_FLOAT = 0x00000004, -DEPTH_8_24_FLOAT = 0x00000005, -DEPTH_32_FLOAT = 0x00000006, -DEPTH_X24_8_32_FLOAT = 0x00000007, -} DepthFormat; - -/* - * ZFormat enum - */ - -typedef enum ZFormat { -Z_INVALID = 0x00000000, -Z_16 = 0x00000001, -Z_24 = 0x00000002, -Z_32_FLOAT = 0x00000003, -} ZFormat; - -/* - * StencilFormat enum - */ - -typedef enum StencilFormat { -STENCIL_INVALID = 0x00000000, -STENCIL_8 = 0x00000001, -} StencilFormat; - -/* - * CmaskMode enum - */ - -typedef enum CmaskMode { -CMASK_CLEAR_NONE = 0x00000000, -CMASK_CLEAR_ONE = 0x00000001, -CMASK_CLEAR_ALL = 0x00000002, -CMASK_ANY_EXPANDED = 0x00000003, -CMASK_ALPHA0_FRAG1 = 0x00000004, -CMASK_ALPHA0_FRAG2 = 0x00000005, -CMASK_ALPHA0_FRAG4 = 0x00000006, -CMASK_ALPHA0_FRAGS = 0x00000007, -CMASK_ALPHA1_FRAG1 = 0x00000008, -CMASK_ALPHA1_FRAG2 = 0x00000009, -CMASK_ALPHA1_FRAG4 = 0x0000000a, -CMASK_ALPHA1_FRAGS = 0x0000000b, -CMASK_ALPHAX_FRAG1 = 0x0000000c, -CMASK_ALPHAX_FRAG2 = 0x0000000d, -CMASK_ALPHAX_FRAG4 = 0x0000000e, -CMASK_ALPHAX_FRAGS = 0x0000000f, -} CmaskMode; - -/* - * QuadExportFormat enum - */ - -typedef enum QuadExportFormat { -EXPORT_UNUSED = 0x00000000, -EXPORT_32_R = 0x00000001, -EXPORT_32_GR = 0x00000002, -EXPORT_32_AR = 0x00000003, -EXPORT_FP16_ABGR = 0x00000004, -EXPORT_UNSIGNED16_ABGR = 0x00000005, -EXPORT_SIGNED16_ABGR = 0x00000006, -EXPORT_32_ABGR = 0x00000007, -EXPORT_32BPP_8PIX = 0x00000008, -EXPORT_16_16_UNSIGNED_8PIX = 0x00000009, -EXPORT_16_16_SIGNED_8PIX = 0x0000000a, -EXPORT_16_16_FLOAT_8PIX = 0x0000000b, -} QuadExportFormat; - -/* - * QuadExportFormatOld enum - */ - -typedef enum QuadExportFormatOld { -EXPORT_4P_32BPC_ABGR = 0x00000000, -EXPORT_4P_16BPC_ABGR = 0x00000001, -EXPORT_4P_32BPC_GR = 0x00000002, -EXPORT_4P_32BPC_AR = 0x00000003, -EXPORT_2P_32BPC_ABGR = 0x00000004, -EXPORT_8P_32BPC_R = 0x00000005, -} QuadExportFormatOld; - -/* - * ColorFormat enum - */ - -typedef enum ColorFormat { -COLOR_INVALID = 0x00000000, -COLOR_8 = 0x00000001, -COLOR_16 = 0x00000002, -COLOR_8_8 = 0x00000003, -COLOR_32 = 0x00000004, -COLOR_16_16 = 0x00000005, -COLOR_10_11_11 = 0x00000006, -COLOR_11_11_10 = 0x00000007, -COLOR_10_10_10_2 = 0x00000008, -COLOR_2_10_10_10 = 0x00000009, -COLOR_8_8_8_8 = 0x0000000a, -COLOR_32_32 = 0x0000000b, -COLOR_16_16_16_16 = 0x0000000c, -COLOR_RESERVED_13 = 0x0000000d, -COLOR_32_32_32_32 = 0x0000000e, -COLOR_RESERVED_15 = 0x0000000f, -COLOR_5_6_5 = 0x00000010, -COLOR_1_5_5_5 = 0x00000011, -COLOR_5_5_5_1 = 0x00000012, -COLOR_4_4_4_4 = 0x00000013, -COLOR_8_24 = 0x00000014, -COLOR_24_8 = 0x00000015, -COLOR_X24_8_32_FLOAT = 0x00000016, -COLOR_RESERVED_23 = 0x00000017, -COLOR_RESERVED_24 = 0x00000018, -COLOR_RESERVED_25 = 0x00000019, -COLOR_RESERVED_26 = 0x0000001a, -COLOR_RESERVED_27 = 0x0000001b, -COLOR_RESERVED_28 = 0x0000001c, -COLOR_RESERVED_29 = 0x0000001d, -COLOR_RESERVED_30 = 0x0000001e, -COLOR_2_10_10_10_6E4 = 0x0000001f, -} ColorFormat; - -/* - * SurfaceFormat enum - */ - -typedef enum SurfaceFormat { -FMT_INVALID = 0x00000000, -FMT_8 = 0x00000001, -FMT_16 = 0x00000002, -FMT_8_8 = 0x00000003, -FMT_32 = 0x00000004, -FMT_16_16 = 0x00000005, -FMT_10_11_11 = 0x00000006, -FMT_11_11_10 = 0x00000007, -FMT_10_10_10_2 = 0x00000008, -FMT_2_10_10_10 = 0x00000009, -FMT_8_8_8_8 = 0x0000000a, -FMT_32_32 = 0x0000000b, -FMT_16_16_16_16 = 0x0000000c, -FMT_32_32_32 = 0x0000000d, -FMT_32_32_32_32 = 0x0000000e, -FMT_RESERVED_4 = 0x0000000f, -FMT_5_6_5 = 0x00000010, -FMT_1_5_5_5 = 0x00000011, -FMT_5_5_5_1 = 0x00000012, -FMT_4_4_4_4 = 0x00000013, -FMT_8_24 = 0x00000014, -FMT_24_8 = 0x00000015, -FMT_X24_8_32_FLOAT = 0x00000016, -FMT_RESERVED_33 = 0x00000017, -FMT_11_11_10_FLOAT = 0x00000018, -FMT_16_FLOAT = 0x00000019, -FMT_32_FLOAT = 0x0000001a, -FMT_16_16_FLOAT = 0x0000001b, -FMT_8_24_FLOAT = 0x0000001c, -FMT_24_8_FLOAT = 0x0000001d, -FMT_32_32_FLOAT = 0x0000001e, -FMT_10_11_11_FLOAT = 0x0000001f, -FMT_16_16_16_16_FLOAT = 0x00000020, -FMT_3_3_2 = 0x00000021, -FMT_6_5_5 = 0x00000022, -FMT_32_32_32_32_FLOAT = 0x00000023, -FMT_RESERVED_36 = 0x00000024, -FMT_1 = 0x00000025, -FMT_1_REVERSED = 0x00000026, -FMT_GB_GR = 0x00000027, -FMT_BG_RG = 0x00000028, -FMT_32_AS_8 = 0x00000029, -FMT_32_AS_8_8 = 0x0000002a, -FMT_5_9_9_9_SHAREDEXP = 0x0000002b, -FMT_8_8_8 = 0x0000002c, -FMT_16_16_16 = 0x0000002d, -FMT_16_16_16_FLOAT = 0x0000002e, -FMT_4_4 = 0x0000002f, -FMT_32_32_32_FLOAT = 0x00000030, -FMT_BC1 = 0x00000031, -FMT_BC2 = 0x00000032, -FMT_BC3 = 0x00000033, -FMT_BC4 = 0x00000034, -FMT_BC5 = 0x00000035, -FMT_BC6 = 0x00000036, -FMT_BC7 = 0x00000037, -FMT_32_AS_32_32_32_32 = 0x00000038, -FMT_APC3 = 0x00000039, -FMT_APC4 = 0x0000003a, -FMT_APC5 = 0x0000003b, -FMT_APC6 = 0x0000003c, -FMT_APC7 = 0x0000003d, -FMT_CTX1 = 0x0000003e, -FMT_RESERVED_63 = 0x0000003f, -} SurfaceFormat; - -/* - * BUF_DATA_FORMAT enum - */ - -typedef enum BUF_DATA_FORMAT { -BUF_DATA_FORMAT_INVALID = 0x00000000, -BUF_DATA_FORMAT_8 = 0x00000001, -BUF_DATA_FORMAT_16 = 0x00000002, -BUF_DATA_FORMAT_8_8 = 0x00000003, -BUF_DATA_FORMAT_32 = 0x00000004, -BUF_DATA_FORMAT_16_16 = 0x00000005, -BUF_DATA_FORMAT_10_11_11 = 0x00000006, -BUF_DATA_FORMAT_11_11_10 = 0x00000007, -BUF_DATA_FORMAT_10_10_10_2 = 0x00000008, -BUF_DATA_FORMAT_2_10_10_10 = 0x00000009, -BUF_DATA_FORMAT_8_8_8_8 = 0x0000000a, -BUF_DATA_FORMAT_32_32 = 0x0000000b, -BUF_DATA_FORMAT_16_16_16_16 = 0x0000000c, -BUF_DATA_FORMAT_32_32_32 = 0x0000000d, -BUF_DATA_FORMAT_32_32_32_32 = 0x0000000e, -BUF_DATA_FORMAT_RESERVED_15 = 0x0000000f, -} BUF_DATA_FORMAT; - -/* - * IMG_DATA_FORMAT enum - */ - -typedef enum IMG_DATA_FORMAT { -IMG_DATA_FORMAT_INVALID = 0x00000000, -IMG_DATA_FORMAT_8 = 0x00000001, -IMG_DATA_FORMAT_16 = 0x00000002, -IMG_DATA_FORMAT_8_8 = 0x00000003, -IMG_DATA_FORMAT_32 = 0x00000004, -IMG_DATA_FORMAT_16_16 = 0x00000005, -IMG_DATA_FORMAT_10_11_11 = 0x00000006, -IMG_DATA_FORMAT_11_11_10 = 0x00000007, -IMG_DATA_FORMAT_10_10_10_2 = 0x00000008, -IMG_DATA_FORMAT_2_10_10_10 = 0x00000009, -IMG_DATA_FORMAT_8_8_8_8 = 0x0000000a, -IMG_DATA_FORMAT_32_32 = 0x0000000b, -IMG_DATA_FORMAT_16_16_16_16 = 0x0000000c, -IMG_DATA_FORMAT_32_32_32 = 0x0000000d, -IMG_DATA_FORMAT_32_32_32_32 = 0x0000000e, -IMG_DATA_FORMAT_RESERVED_15 = 0x0000000f, -IMG_DATA_FORMAT_5_6_5 = 0x00000010, -IMG_DATA_FORMAT_1_5_5_5 = 0x00000011, -IMG_DATA_FORMAT_5_5_5_1 = 0x00000012, -IMG_DATA_FORMAT_4_4_4_4 = 0x00000013, -IMG_DATA_FORMAT_8_24 = 0x00000014, -IMG_DATA_FORMAT_24_8 = 0x00000015, -IMG_DATA_FORMAT_X24_8_32 = 0x00000016, -IMG_DATA_FORMAT_8_AS_8_8_8_8 = 0x00000017, -IMG_DATA_FORMAT_ETC2_RGB = 0x00000018, -IMG_DATA_FORMAT_ETC2_RGBA = 0x00000019, -IMG_DATA_FORMAT_ETC2_R = 0x0000001a, -IMG_DATA_FORMAT_ETC2_RG = 0x0000001b, -IMG_DATA_FORMAT_ETC2_RGBA1 = 0x0000001c, -IMG_DATA_FORMAT_RESERVED_29 = 0x0000001d, -IMG_DATA_FORMAT_RESERVED_30 = 0x0000001e, -IMG_DATA_FORMAT_6E4 = 0x0000001f, -IMG_DATA_FORMAT_GB_GR = 0x00000020, -IMG_DATA_FORMAT_BG_RG = 0x00000021, -IMG_DATA_FORMAT_5_9_9_9 = 0x00000022, -IMG_DATA_FORMAT_BC1 = 0x00000023, -IMG_DATA_FORMAT_BC2 = 0x00000024, -IMG_DATA_FORMAT_BC3 = 0x00000025, -IMG_DATA_FORMAT_BC4 = 0x00000026, -IMG_DATA_FORMAT_BC5 = 0x00000027, -IMG_DATA_FORMAT_BC6 = 0x00000028, -IMG_DATA_FORMAT_BC7 = 0x00000029, -IMG_DATA_FORMAT_16_AS_32_32 = 0x0000002a, -IMG_DATA_FORMAT_16_AS_16_16_16_16 = 0x0000002b, -IMG_DATA_FORMAT_16_AS_32_32_32_32 = 0x0000002c, -IMG_DATA_FORMAT_FMASK = 0x0000002d, -IMG_DATA_FORMAT_ASTC_2D_LDR = 0x0000002e, -IMG_DATA_FORMAT_ASTC_2D_HDR = 0x0000002f, -IMG_DATA_FORMAT_ASTC_2D_LDR_SRGB = 0x00000030, -IMG_DATA_FORMAT_ASTC_3D_LDR = 0x00000031, -IMG_DATA_FORMAT_ASTC_3D_HDR = 0x00000032, -IMG_DATA_FORMAT_ASTC_3D_LDR_SRGB = 0x00000033, -IMG_DATA_FORMAT_N_IN_16 = 0x00000034, -IMG_DATA_FORMAT_N_IN_16_16 = 0x00000035, -IMG_DATA_FORMAT_N_IN_16_16_16_16 = 0x00000036, -IMG_DATA_FORMAT_N_IN_16_AS_16_16_16_16 = 0x00000037, -IMG_DATA_FORMAT_RESERVED_56 = 0x00000038, -IMG_DATA_FORMAT_4_4 = 0x00000039, -IMG_DATA_FORMAT_6_5_5 = 0x0000003a, -IMG_DATA_FORMAT_RESERVED_59 = 0x0000003b, -IMG_DATA_FORMAT_RESERVED_60 = 0x0000003c, -IMG_DATA_FORMAT_8_AS_32 = 0x0000003d, -IMG_DATA_FORMAT_8_AS_32_32 = 0x0000003e, -IMG_DATA_FORMAT_32_AS_32_32_32_32 = 0x0000003f, -} IMG_DATA_FORMAT; - -/* - * BUF_NUM_FORMAT enum - */ - -typedef enum BUF_NUM_FORMAT { -BUF_NUM_FORMAT_UNORM = 0x00000000, -BUF_NUM_FORMAT_SNORM = 0x00000001, -BUF_NUM_FORMAT_USCALED = 0x00000002, -BUF_NUM_FORMAT_SSCALED = 0x00000003, -BUF_NUM_FORMAT_UINT = 0x00000004, -BUF_NUM_FORMAT_SINT = 0x00000005, -BUF_NUM_FORMAT_UNORM_UINT = 0x00000006, -BUF_NUM_FORMAT_FLOAT = 0x00000007, -} BUF_NUM_FORMAT; - -/* - * IMG_NUM_FORMAT enum - */ - -typedef enum IMG_NUM_FORMAT { -IMG_NUM_FORMAT_UNORM = 0x00000000, -IMG_NUM_FORMAT_SNORM = 0x00000001, -IMG_NUM_FORMAT_USCALED = 0x00000002, -IMG_NUM_FORMAT_SSCALED = 0x00000003, -IMG_NUM_FORMAT_UINT = 0x00000004, -IMG_NUM_FORMAT_SINT = 0x00000005, -IMG_NUM_FORMAT_UNORM_UINT = 0x00000006, -IMG_NUM_FORMAT_FLOAT = 0x00000007, -IMG_NUM_FORMAT_RESERVED_8 = 0x00000008, -IMG_NUM_FORMAT_SRGB = 0x00000009, -IMG_NUM_FORMAT_RESERVED_10 = 0x0000000a, -IMG_NUM_FORMAT_RESERVED_11 = 0x0000000b, -IMG_NUM_FORMAT_RESERVED_12 = 0x0000000c, -IMG_NUM_FORMAT_RESERVED_13 = 0x0000000d, -IMG_NUM_FORMAT_RESERVED_14 = 0x0000000e, -IMG_NUM_FORMAT_RESERVED_15 = 0x0000000f, -} IMG_NUM_FORMAT; - -/* - * IMG_NUM_FORMAT_FMASK enum - */ - -typedef enum IMG_NUM_FORMAT_FMASK { -IMG_NUM_FORMAT_FMASK_8_2_1 = 0x00000000, -IMG_NUM_FORMAT_FMASK_8_4_1 = 0x00000001, -IMG_NUM_FORMAT_FMASK_8_8_1 = 0x00000002, -IMG_NUM_FORMAT_FMASK_8_2_2 = 0x00000003, -IMG_NUM_FORMAT_FMASK_8_4_2 = 0x00000004, -IMG_NUM_FORMAT_FMASK_8_4_4 = 0x00000005, -IMG_NUM_FORMAT_FMASK_16_16_1 = 0x00000006, -IMG_NUM_FORMAT_FMASK_16_8_2 = 0x00000007, -IMG_NUM_FORMAT_FMASK_32_16_2 = 0x00000008, -IMG_NUM_FORMAT_FMASK_32_8_4 = 0x00000009, -IMG_NUM_FORMAT_FMASK_32_8_8 = 0x0000000a, -IMG_NUM_FORMAT_FMASK_64_16_4 = 0x0000000b, -IMG_NUM_FORMAT_FMASK_64_16_8 = 0x0000000c, -IMG_NUM_FORMAT_FMASK_RESERVED_13 = 0x0000000d, -IMG_NUM_FORMAT_FMASK_RESERVED_14 = 0x0000000e, -IMG_NUM_FORMAT_FMASK_RESERVED_15 = 0x0000000f, -} IMG_NUM_FORMAT_FMASK; - -/* - * IMG_NUM_FORMAT_N_IN_16 enum - */ - -typedef enum IMG_NUM_FORMAT_N_IN_16 { -IMG_NUM_FORMAT_N_IN_16_RESERVED_0 = 0x00000000, -IMG_NUM_FORMAT_N_IN_16_UNORM_10 = 0x00000001, -IMG_NUM_FORMAT_N_IN_16_UNORM_9 = 0x00000002, -IMG_NUM_FORMAT_N_IN_16_RESERVED_3 = 0x00000003, -IMG_NUM_FORMAT_N_IN_16_UINT_10 = 0x00000004, -IMG_NUM_FORMAT_N_IN_16_UINT_9 = 0x00000005, -IMG_NUM_FORMAT_N_IN_16_RESERVED_6 = 0x00000006, -IMG_NUM_FORMAT_N_IN_16_UNORM_UINT_10 = 0x00000007, -IMG_NUM_FORMAT_N_IN_16_UNORM_UINT_9 = 0x00000008, -IMG_NUM_FORMAT_N_IN_16_RESERVED_9 = 0x00000009, -IMG_NUM_FORMAT_N_IN_16_RESERVED_10 = 0x0000000a, -IMG_NUM_FORMAT_N_IN_16_RESERVED_11 = 0x0000000b, -IMG_NUM_FORMAT_N_IN_16_RESERVED_12 = 0x0000000c, -IMG_NUM_FORMAT_N_IN_16_RESERVED_13 = 0x0000000d, -IMG_NUM_FORMAT_N_IN_16_RESERVED_14 = 0x0000000e, -IMG_NUM_FORMAT_N_IN_16_RESERVED_15 = 0x0000000f, -} IMG_NUM_FORMAT_N_IN_16; - -/* - * IMG_NUM_FORMAT_ASTC_2D enum - */ - -typedef enum IMG_NUM_FORMAT_ASTC_2D { -IMG_NUM_FORMAT_ASTC_2D_4x4 = 0x00000000, -IMG_NUM_FORMAT_ASTC_2D_5x4 = 0x00000001, -IMG_NUM_FORMAT_ASTC_2D_5x5 = 0x00000002, -IMG_NUM_FORMAT_ASTC_2D_6x5 = 0x00000003, -IMG_NUM_FORMAT_ASTC_2D_6x6 = 0x00000004, -IMG_NUM_FORMAT_ASTC_2D_8x5 = 0x00000005, -IMG_NUM_FORMAT_ASTC_2D_8x6 = 0x00000006, -IMG_NUM_FORMAT_ASTC_2D_8x8 = 0x00000007, -IMG_NUM_FORMAT_ASTC_2D_10x5 = 0x00000008, -IMG_NUM_FORMAT_ASTC_2D_10x6 = 0x00000009, -IMG_NUM_FORMAT_ASTC_2D_10x8 = 0x0000000a, -IMG_NUM_FORMAT_ASTC_2D_10x10 = 0x0000000b, -IMG_NUM_FORMAT_ASTC_2D_12x10 = 0x0000000c, -IMG_NUM_FORMAT_ASTC_2D_12x12 = 0x0000000d, -IMG_NUM_FORMAT_ASTC_2D_RESERVED_14 = 0x0000000e, -IMG_NUM_FORMAT_ASTC_2D_RESERVED_15 = 0x0000000f, -} IMG_NUM_FORMAT_ASTC_2D; - -/* - * IMG_NUM_FORMAT_ASTC_3D enum - */ - -typedef enum IMG_NUM_FORMAT_ASTC_3D { -IMG_NUM_FORMAT_ASTC_3D_3x3x3 = 0x00000000, -IMG_NUM_FORMAT_ASTC_3D_4x3x3 = 0x00000001, -IMG_NUM_FORMAT_ASTC_3D_4x4x3 = 0x00000002, -IMG_NUM_FORMAT_ASTC_3D_4x4x4 = 0x00000003, -IMG_NUM_FORMAT_ASTC_3D_5x4x4 = 0x00000004, -IMG_NUM_FORMAT_ASTC_3D_5x5x4 = 0x00000005, -IMG_NUM_FORMAT_ASTC_3D_5x5x5 = 0x00000006, -IMG_NUM_FORMAT_ASTC_3D_6x5x5 = 0x00000007, -IMG_NUM_FORMAT_ASTC_3D_6x6x5 = 0x00000008, -IMG_NUM_FORMAT_ASTC_3D_6x6x6 = 0x00000009, -IMG_NUM_FORMAT_ASTC_3D_RESERVED_10 = 0x0000000a, -IMG_NUM_FORMAT_ASTC_3D_RESERVED_11 = 0x0000000b, -IMG_NUM_FORMAT_ASTC_3D_RESERVED_12 = 0x0000000c, -IMG_NUM_FORMAT_ASTC_3D_RESERVED_13 = 0x0000000d, -IMG_NUM_FORMAT_ASTC_3D_RESERVED_14 = 0x0000000e, -IMG_NUM_FORMAT_ASTC_3D_RESERVED_15 = 0x0000000f, -} IMG_NUM_FORMAT_ASTC_3D; - -/* - * TileType enum - */ - -typedef enum TileType { -ARRAY_COLOR_TILE = 0x00000000, -ARRAY_DEPTH_TILE = 0x00000001, -} TileType; - -/* - * NonDispTilingOrder enum - */ - -typedef enum NonDispTilingOrder { -ADDR_SURF_MICRO_TILING_DISPLAY = 0x00000000, -ADDR_SURF_MICRO_TILING_NON_DISPLAY = 0x00000001, -} NonDispTilingOrder; - -/* - * MicroTileMode enum - */ - -typedef enum MicroTileMode { -ADDR_SURF_DISPLAY_MICRO_TILING = 0x00000000, -ADDR_SURF_THIN_MICRO_TILING = 0x00000001, -ADDR_SURF_DEPTH_MICRO_TILING = 0x00000002, -ADDR_SURF_ROTATED_MICRO_TILING = 0x00000003, -ADDR_SURF_THICK_MICRO_TILING = 0x00000004, -} MicroTileMode; - -/* - * TileSplit enum - */ - -typedef enum TileSplit { -ADDR_SURF_TILE_SPLIT_64B = 0x00000000, -ADDR_SURF_TILE_SPLIT_128B = 0x00000001, -ADDR_SURF_TILE_SPLIT_256B = 0x00000002, -ADDR_SURF_TILE_SPLIT_512B = 0x00000003, -ADDR_SURF_TILE_SPLIT_1KB = 0x00000004, -ADDR_SURF_TILE_SPLIT_2KB = 0x00000005, -ADDR_SURF_TILE_SPLIT_4KB = 0x00000006, -} TileSplit; - -/* - * SampleSplit enum - */ - -typedef enum SampleSplit { -ADDR_SURF_SAMPLE_SPLIT_1 = 0x00000000, -ADDR_SURF_SAMPLE_SPLIT_2 = 0x00000001, -ADDR_SURF_SAMPLE_SPLIT_4 = 0x00000002, -ADDR_SURF_SAMPLE_SPLIT_8 = 0x00000003, -} SampleSplit; - -/* - * PipeConfig enum - */ - -typedef enum PipeConfig { -ADDR_SURF_P2 = 0x00000000, -ADDR_SURF_P2_RESERVED0 = 0x00000001, -ADDR_SURF_P2_RESERVED1 = 0x00000002, -ADDR_SURF_P2_RESERVED2 = 0x00000003, -ADDR_SURF_P4_8x16 = 0x00000004, -ADDR_SURF_P4_16x16 = 0x00000005, -ADDR_SURF_P4_16x32 = 0x00000006, -ADDR_SURF_P4_32x32 = 0x00000007, -ADDR_SURF_P8_16x16_8x16 = 0x00000008, -ADDR_SURF_P8_16x32_8x16 = 0x00000009, -ADDR_SURF_P8_32x32_8x16 = 0x0000000a, -ADDR_SURF_P8_16x32_16x16 = 0x0000000b, -ADDR_SURF_P8_32x32_16x16 = 0x0000000c, -ADDR_SURF_P8_32x32_16x32 = 0x0000000d, -ADDR_SURF_P8_32x64_32x32 = 0x0000000e, -ADDR_SURF_P8_RESERVED0 = 0x0000000f, -ADDR_SURF_P16_32x32_8x16 = 0x00000010, -ADDR_SURF_P16_32x32_16x16 = 0x00000011, -} PipeConfig; - -/* - * SeEnable enum - */ - -typedef enum SeEnable { -ADDR_CONFIG_DISABLE_SE = 0x00000000, -ADDR_CONFIG_ENABLE_SE = 0x00000001, -} SeEnable; - -/* - * NumBanks enum - */ - -typedef enum NumBanks { -ADDR_SURF_2_BANK = 0x00000000, -ADDR_SURF_4_BANK = 0x00000001, -ADDR_SURF_8_BANK = 0x00000002, -ADDR_SURF_16_BANK = 0x00000003, -} NumBanks; - -/* - * BankWidth enum - */ - -typedef enum BankWidth { -ADDR_SURF_BANK_WIDTH_1 = 0x00000000, -ADDR_SURF_BANK_WIDTH_2 = 0x00000001, -ADDR_SURF_BANK_WIDTH_4 = 0x00000002, -ADDR_SURF_BANK_WIDTH_8 = 0x00000003, -} BankWidth; - -/* - * BankHeight enum - */ - -typedef enum BankHeight { -ADDR_SURF_BANK_HEIGHT_1 = 0x00000000, -ADDR_SURF_BANK_HEIGHT_2 = 0x00000001, -ADDR_SURF_BANK_HEIGHT_4 = 0x00000002, -ADDR_SURF_BANK_HEIGHT_8 = 0x00000003, -} BankHeight; - -/* - * BankWidthHeight enum - */ - -typedef enum BankWidthHeight { -ADDR_SURF_BANK_WH_1 = 0x00000000, -ADDR_SURF_BANK_WH_2 = 0x00000001, -ADDR_SURF_BANK_WH_4 = 0x00000002, -ADDR_SURF_BANK_WH_8 = 0x00000003, -} BankWidthHeight; - -/* - * MacroTileAspect enum - */ - -typedef enum MacroTileAspect { -ADDR_SURF_MACRO_ASPECT_1 = 0x00000000, -ADDR_SURF_MACRO_ASPECT_2 = 0x00000001, -ADDR_SURF_MACRO_ASPECT_4 = 0x00000002, -ADDR_SURF_MACRO_ASPECT_8 = 0x00000003, -} MacroTileAspect; - -/* - * GATCL1RequestType enum - */ - -typedef enum GATCL1RequestType { -GATCL1_TYPE_NORMAL = 0x00000000, -GATCL1_TYPE_SHOOTDOWN = 0x00000001, -GATCL1_TYPE_BYPASS = 0x00000002, -} GATCL1RequestType; - -/* - * UTCL1RequestType enum - */ - -typedef enum UTCL1RequestType { -UTCL1_TYPE_NORMAL = 0x00000000, -UTCL1_TYPE_SHOOTDOWN = 0x00000001, -UTCL1_TYPE_BYPASS = 0x00000002, -} UTCL1RequestType; - -/* - * UTCL1FaultType enum - */ - -typedef enum UTCL1FaultType { -UTCL1_XNACK_SUCCESS = 0x00000000, -UTCL1_XNACK_RETRY = 0x00000001, -UTCL1_XNACK_PRT = 0x00000002, -UTCL1_XNACK_NO_RETRY = 0x00000003, -} UTCL1FaultType; - -/* - * TCC_CACHE_POLICIES enum - */ - -typedef enum TCC_CACHE_POLICIES { -TCC_CACHE_POLICY_LRU = 0x00000000, -TCC_CACHE_POLICY_STREAM = 0x00000001, -} TCC_CACHE_POLICIES; - -/* - * MTYPE enum - */ - -typedef enum MTYPE { -MTYPE_NC = 0x00000000, -MTYPE_WC = 0x00000001, -MTYPE_CC = 0x00000002, -MTYPE_UC = 0x00000003, -} MTYPE; - -/* - * RMI_CID enum - */ - -typedef enum RMI_CID { -RMI_CID_CC = 0x00000000, -RMI_CID_FC = 0x00000001, -RMI_CID_CM = 0x00000002, -RMI_CID_DC = 0x00000003, -RMI_CID_Z = 0x00000004, -RMI_CID_S = 0x00000005, -RMI_CID_TILE = 0x00000006, -RMI_CID_ZPCPSD = 0x00000007, -} RMI_CID; - -/* - * PERFMON_COUNTER_MODE enum - */ - -typedef enum PERFMON_COUNTER_MODE { -PERFMON_COUNTER_MODE_ACCUM = 0x00000000, -PERFMON_COUNTER_MODE_ACTIVE_CYCLES = 0x00000001, -PERFMON_COUNTER_MODE_MAX = 0x00000002, -PERFMON_COUNTER_MODE_DIRTY = 0x00000003, -PERFMON_COUNTER_MODE_SAMPLE = 0x00000004, -PERFMON_COUNTER_MODE_CYCLES_SINCE_FIRST_EVENT = 0x00000005, -PERFMON_COUNTER_MODE_CYCLES_SINCE_LAST_EVENT = 0x00000006, -PERFMON_COUNTER_MODE_CYCLES_GE_HI = 0x00000007, -PERFMON_COUNTER_MODE_CYCLES_EQ_HI = 0x00000008, -PERFMON_COUNTER_MODE_INACTIVE_CYCLES = 0x00000009, -PERFMON_COUNTER_MODE_RESERVED = 0x0000000f, -} PERFMON_COUNTER_MODE; - -/* - * PERFMON_SPM_MODE enum - */ - -typedef enum PERFMON_SPM_MODE { -PERFMON_SPM_MODE_OFF = 0x00000000, -PERFMON_SPM_MODE_16BIT_CLAMP = 0x00000001, -PERFMON_SPM_MODE_16BIT_NO_CLAMP = 0x00000002, -PERFMON_SPM_MODE_32BIT_CLAMP = 0x00000003, -PERFMON_SPM_MODE_32BIT_NO_CLAMP = 0x00000004, -PERFMON_SPM_MODE_RESERVED_5 = 0x00000005, -PERFMON_SPM_MODE_RESERVED_6 = 0x00000006, -PERFMON_SPM_MODE_RESERVED_7 = 0x00000007, -PERFMON_SPM_MODE_TEST_MODE_0 = 0x00000008, -PERFMON_SPM_MODE_TEST_MODE_1 = 0x00000009, -PERFMON_SPM_MODE_TEST_MODE_2 = 0x0000000a, -} PERFMON_SPM_MODE; - -/* - * SurfaceTiling enum - */ - -typedef enum SurfaceTiling { -ARRAY_LINEAR = 0x00000000, -ARRAY_TILED = 0x00000001, -} SurfaceTiling; - -/* - * SurfaceArray enum - */ - -typedef enum SurfaceArray { -ARRAY_1D = 0x00000000, -ARRAY_2D = 0x00000001, -ARRAY_3D = 0x00000002, -ARRAY_3D_SLICE = 0x00000003, -} SurfaceArray; - -/* - * ColorArray enum - */ - -typedef enum ColorArray { -ARRAY_2D_ALT_COLOR = 0x00000000, -ARRAY_2D_COLOR = 0x00000001, -ARRAY_3D_SLICE_COLOR = 0x00000003, -} ColorArray; - -/* - * DepthArray enum - */ - -typedef enum DepthArray { -ARRAY_2D_ALT_DEPTH = 0x00000000, -ARRAY_2D_DEPTH = 0x00000001, -} DepthArray; - -/* - * ENUM_NUM_SIMD_PER_CU enum - */ - -typedef enum ENUM_NUM_SIMD_PER_CU { -NUM_SIMD_PER_CU = 0x00000004, -} ENUM_NUM_SIMD_PER_CU; - -/* - * DSM_ENABLE_ERROR_INJECT enum - */ - -typedef enum DSM_ENABLE_ERROR_INJECT { -DSM_ENABLE_ERROR_INJECT_FED_IN = 0x00000000, -DSM_ENABLE_ERROR_INJECT_SINGLE = 0x00000001, -DSM_ENABLE_ERROR_INJECT_UNCORRECTABLE = 0x00000002, -DSM_ENABLE_ERROR_INJECT_UNCORRECTABLE_LIMITED = 0x00000003, -} DSM_ENABLE_ERROR_INJECT; - -/* - * DSM_SELECT_INJECT_DELAY enum - */ - -typedef enum DSM_SELECT_INJECT_DELAY { -DSM_SELECT_INJECT_DELAY_NO_DELAY = 0x00000000, -DSM_SELECT_INJECT_DELAY_DELAY_ERROR = 0x00000001, -} DSM_SELECT_INJECT_DELAY; - -/* - * DSM_DATA_SEL enum - */ - -typedef enum DSM_DATA_SEL { -DSM_DATA_SEL_DISABLE = 0x00000000, -DSM_DATA_SEL_0 = 0x00000001, -DSM_DATA_SEL_1 = 0x00000002, -DSM_DATA_SEL_BOTH = 0x00000003, -} DSM_DATA_SEL; - -/* - * DSM_SINGLE_WRITE enum - */ - -typedef enum DSM_SINGLE_WRITE { -DSM_SINGLE_WRITE_DIS = 0x00000000, -DSM_SINGLE_WRITE_EN = 0x00000001, -} DSM_SINGLE_WRITE; - -/* - * SWIZZLE_TYPE_ENUM enum - */ - -typedef enum SWIZZLE_TYPE_ENUM { -SW_Z = 0x00000000, -SW_S = 0x00000001, -SW_D = 0x00000002, -SW_R = 0x00000003, -SW_L = 0x00000004, -} SWIZZLE_TYPE_ENUM; - -/* - * TC_MICRO_TILE_MODE enum - */ - -typedef enum TC_MICRO_TILE_MODE { -MICRO_TILE_MODE_LINEAR = 0x00000000, -MICRO_TILE_MODE_ROTATED = 0x00000001, -MICRO_TILE_MODE_STD_2D = 0x00000002, -MICRO_TILE_MODE_STD_3D = 0x00000003, -MICRO_TILE_MODE_DISPLAY_2D = 0x00000004, -MICRO_TILE_MODE_DISPLAY_3D = 0x00000005, -MICRO_TILE_MODE_Z_2D = 0x00000006, -MICRO_TILE_MODE_Z_3D = 0x00000007, -} TC_MICRO_TILE_MODE; - -/* - * SWIZZLE_MODE_ENUM enum - */ - -typedef enum SWIZZLE_MODE_ENUM { -SW_LINEAR = 0x00000000, -SW_256B_S = 0x00000001, -SW_256B_D = 0x00000002, -SW_256B_R = 0x00000003, -SW_4KB_Z = 0x00000004, -SW_4KB_S = 0x00000005, -SW_4KB_D = 0x00000006, -SW_4KB_R = 0x00000007, -SW_64KB_Z = 0x00000008, -SW_64KB_S = 0x00000009, -SW_64KB_D = 0x0000000a, -SW_64KB_R = 0x0000000b, -SW_VAR_Z = 0x0000000c, -SW_VAR_S = 0x0000000d, -SW_VAR_D = 0x0000000e, -SW_VAR_R = 0x0000000f, -SW_RESERVED_16 = 0x00000010, -SW_RESERVED_17 = 0x00000011, -SW_RESERVED_18 = 0x00000012, -SW_RESERVED_19 = 0x00000013, -SW_4KB_Z_X = 0x00000014, -SW_4KB_S_X = 0x00000015, -SW_4KB_D_X = 0x00000016, -SW_4KB_R_X = 0x00000017, -SW_64KB_Z_X = 0x00000018, -SW_64KB_S_X = 0x00000019, -SW_64KB_D_X = 0x0000001a, -SW_64KB_R_X = 0x0000001b, -SW_VAR_Z_X = 0x0000001c, -SW_VAR_S_X = 0x0000001d, -SW_VAR_D_X = 0x0000001e, -SW_VAR_R_X = 0x0000001f, -} SWIZZLE_MODE_ENUM; - -/******************************************************* - * IH Enums - *******************************************************/ - -/* - * IH_PERF_SEL enum - */ - -typedef enum IH_PERF_SEL { -IH_PERF_SEL_CYCLE = 0x00000000, -IH_PERF_SEL_IDLE = 0x00000001, -IH_PERF_SEL_INPUT_IDLE = 0x00000002, -IH_PERF_SEL_BUFFER_IDLE = 0x00000003, -IH_PERF_SEL_RB0_FULL = 0x00000004, -IH_PERF_SEL_RB0_OVERFLOW = 0x00000005, -IH_PERF_SEL_RB0_WPTR_WRITEBACK = 0x00000006, -IH_PERF_SEL_RB0_WPTR_WRAP = 0x00000007, -IH_PERF_SEL_RB0_RPTR_WRAP = 0x00000008, -IH_PERF_SEL_MC_WR_IDLE = 0x00000009, -IH_PERF_SEL_MC_WR_COUNT = 0x0000000a, -IH_PERF_SEL_MC_WR_STALL = 0x0000000b, -IH_PERF_SEL_MC_WR_CLEAN_PENDING = 0x0000000c, -IH_PERF_SEL_MC_WR_CLEAN_STALL = 0x0000000d, -IH_PERF_SEL_BIF_LINE0_RISING = 0x0000000e, -IH_PERF_SEL_BIF_LINE0_FALLING = 0x0000000f, -IH_PERF_SEL_RB1_FULL = 0x00000010, -IH_PERF_SEL_RB1_OVERFLOW = 0x00000011, -Reserved18 = 0x00000012, -IH_PERF_SEL_RB1_WPTR_WRAP = 0x00000013, -IH_PERF_SEL_RB1_RPTR_WRAP = 0x00000014, -IH_PERF_SEL_RB2_FULL = 0x00000015, -IH_PERF_SEL_RB2_OVERFLOW = 0x00000016, -Reserved23 = 0x00000017, -IH_PERF_SEL_RB2_WPTR_WRAP = 0x00000018, -IH_PERF_SEL_RB2_RPTR_WRAP = 0x00000019, -Reserved26 = 0x0000001a, -Reserved27 = 0x0000001b, -Reserved28 = 0x0000001c, -Reserved29 = 0x0000001d, -IH_PERF_SEL_RB0_FULL_VF0 = 0x0000001e, -IH_PERF_SEL_RB0_FULL_VF1 = 0x0000001f, -IH_PERF_SEL_RB0_FULL_VF2 = 0x00000020, -IH_PERF_SEL_RB0_FULL_VF3 = 0x00000021, -IH_PERF_SEL_RB0_FULL_VF4 = 0x00000022, -IH_PERF_SEL_RB0_FULL_VF5 = 0x00000023, -IH_PERF_SEL_RB0_FULL_VF6 = 0x00000024, -IH_PERF_SEL_RB0_FULL_VF7 = 0x00000025, -IH_PERF_SEL_RB0_FULL_VF8 = 0x00000026, -IH_PERF_SEL_RB0_FULL_VF9 = 0x00000027, -IH_PERF_SEL_RB0_FULL_VF10 = 0x00000028, -IH_PERF_SEL_RB0_FULL_VF11 = 0x00000029, -IH_PERF_SEL_RB0_FULL_VF12 = 0x0000002a, -IH_PERF_SEL_RB0_FULL_VF13 = 0x0000002b, -IH_PERF_SEL_RB0_FULL_VF14 = 0x0000002c, -IH_PERF_SEL_RB0_FULL_VF15 = 0x0000002d, -IH_PERF_SEL_RB0_OVERFLOW_VF0 = 0x0000002e, -IH_PERF_SEL_RB0_OVERFLOW_VF1 = 0x0000002f, -IH_PERF_SEL_RB0_OVERFLOW_VF2 = 0x00000030, -IH_PERF_SEL_RB0_OVERFLOW_VF3 = 0x00000031, -IH_PERF_SEL_RB0_OVERFLOW_VF4 = 0x00000032, -IH_PERF_SEL_RB0_OVERFLOW_VF5 = 0x00000033, -IH_PERF_SEL_RB0_OVERFLOW_VF6 = 0x00000034, -IH_PERF_SEL_RB0_OVERFLOW_VF7 = 0x00000035, -IH_PERF_SEL_RB0_OVERFLOW_VF8 = 0x00000036, -IH_PERF_SEL_RB0_OVERFLOW_VF9 = 0x00000037, -IH_PERF_SEL_RB0_OVERFLOW_VF10 = 0x00000038, -IH_PERF_SEL_RB0_OVERFLOW_VF11 = 0x00000039, -IH_PERF_SEL_RB0_OVERFLOW_VF12 = 0x0000003a, -IH_PERF_SEL_RB0_OVERFLOW_VF13 = 0x0000003b, -IH_PERF_SEL_RB0_OVERFLOW_VF14 = 0x0000003c, -IH_PERF_SEL_RB0_OVERFLOW_VF15 = 0x0000003d, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF0 = 0x0000003e, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF1 = 0x0000003f, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF2 = 0x00000040, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF3 = 0x00000041, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF4 = 0x00000042, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF5 = 0x00000043, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF6 = 0x00000044, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF7 = 0x00000045, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF8 = 0x00000046, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF9 = 0x00000047, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF10 = 0x00000048, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF11 = 0x00000049, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF12 = 0x0000004a, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF13 = 0x0000004b, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF14 = 0x0000004c, -IH_PERF_SEL_RB0_WPTR_WRITEBACK_VF15 = 0x0000004d, -IH_PERF_SEL_RB0_WPTR_WRAP_VF0 = 0x0000004e, -IH_PERF_SEL_RB0_WPTR_WRAP_VF1 = 0x0000004f, -IH_PERF_SEL_RB0_WPTR_WRAP_VF2 = 0x00000050, -IH_PERF_SEL_RB0_WPTR_WRAP_VF3 = 0x00000051, -IH_PERF_SEL_RB0_WPTR_WRAP_VF4 = 0x00000052, -IH_PERF_SEL_RB0_WPTR_WRAP_VF5 = 0x00000053, -IH_PERF_SEL_RB0_WPTR_WRAP_VF6 = 0x00000054, -IH_PERF_SEL_RB0_WPTR_WRAP_VF7 = 0x00000055, -IH_PERF_SEL_RB0_WPTR_WRAP_VF8 = 0x00000056, -IH_PERF_SEL_RB0_WPTR_WRAP_VF9 = 0x00000057, -IH_PERF_SEL_RB0_WPTR_WRAP_VF10 = 0x00000058, -IH_PERF_SEL_RB0_WPTR_WRAP_VF11 = 0x00000059, -IH_PERF_SEL_RB0_WPTR_WRAP_VF12 = 0x0000005a, -IH_PERF_SEL_RB0_WPTR_WRAP_VF13 = 0x0000005b, -IH_PERF_SEL_RB0_WPTR_WRAP_VF14 = 0x0000005c, -IH_PERF_SEL_RB0_WPTR_WRAP_VF15 = 0x0000005d, -IH_PERF_SEL_RB0_RPTR_WRAP_VF0 = 0x0000005e, -IH_PERF_SEL_RB0_RPTR_WRAP_VF1 = 0x0000005f, -IH_PERF_SEL_RB0_RPTR_WRAP_VF2 = 0x00000060, -IH_PERF_SEL_RB0_RPTR_WRAP_VF3 = 0x00000061, -IH_PERF_SEL_RB0_RPTR_WRAP_VF4 = 0x00000062, -IH_PERF_SEL_RB0_RPTR_WRAP_VF5 = 0x00000063, -IH_PERF_SEL_RB0_RPTR_WRAP_VF6 = 0x00000064, -IH_PERF_SEL_RB0_RPTR_WRAP_VF7 = 0x00000065, -IH_PERF_SEL_RB0_RPTR_WRAP_VF8 = 0x00000066, -IH_PERF_SEL_RB0_RPTR_WRAP_VF9 = 0x00000067, -IH_PERF_SEL_RB0_RPTR_WRAP_VF10 = 0x00000068, -IH_PERF_SEL_RB0_RPTR_WRAP_VF11 = 0x00000069, -IH_PERF_SEL_RB0_RPTR_WRAP_VF12 = 0x0000006a, -IH_PERF_SEL_RB0_RPTR_WRAP_VF13 = 0x0000006b, -IH_PERF_SEL_RB0_RPTR_WRAP_VF14 = 0x0000006c, -IH_PERF_SEL_RB0_RPTR_WRAP_VF15 = 0x0000006d, -IH_PERF_SEL_BIF_LINE0_RISING_VF0 = 0x0000006e, -IH_PERF_SEL_BIF_LINE0_RISING_VF1 = 0x0000006f, -IH_PERF_SEL_BIF_LINE0_RISING_VF2 = 0x00000070, -IH_PERF_SEL_BIF_LINE0_RISING_VF3 = 0x00000071, -IH_PERF_SEL_BIF_LINE0_RISING_VF4 = 0x00000072, -IH_PERF_SEL_BIF_LINE0_RISING_VF5 = 0x00000073, -IH_PERF_SEL_BIF_LINE0_RISING_VF6 = 0x00000074, -IH_PERF_SEL_BIF_LINE0_RISING_VF7 = 0x00000075, -IH_PERF_SEL_BIF_LINE0_RISING_VF8 = 0x00000076, -IH_PERF_SEL_BIF_LINE0_RISING_VF9 = 0x00000077, -IH_PERF_SEL_BIF_LINE0_RISING_VF10 = 0x00000078, -IH_PERF_SEL_BIF_LINE0_RISING_VF11 = 0x00000079, -IH_PERF_SEL_BIF_LINE0_RISING_VF12 = 0x0000007a, -IH_PERF_SEL_BIF_LINE0_RISING_VF13 = 0x0000007b, -IH_PERF_SEL_BIF_LINE0_RISING_VF14 = 0x0000007c, -IH_PERF_SEL_BIF_LINE0_RISING_VF15 = 0x0000007d, -IH_PERF_SEL_BIF_LINE0_FALLING_VF0 = 0x0000007e, -IH_PERF_SEL_BIF_LINE0_FALLING_VF1 = 0x0000007f, -IH_PERF_SEL_BIF_LINE0_FALLING_VF2 = 0x00000080, -IH_PERF_SEL_BIF_LINE0_FALLING_VF3 = 0x00000081, -IH_PERF_SEL_BIF_LINE0_FALLING_VF4 = 0x00000082, -IH_PERF_SEL_BIF_LINE0_FALLING_VF5 = 0x00000083, -IH_PERF_SEL_BIF_LINE0_FALLING_VF6 = 0x00000084, -IH_PERF_SEL_BIF_LINE0_FALLING_VF7 = 0x00000085, -IH_PERF_SEL_BIF_LINE0_FALLING_VF8 = 0x00000086, -IH_PERF_SEL_BIF_LINE0_FALLING_VF9 = 0x00000087, -IH_PERF_SEL_BIF_LINE0_FALLING_VF10 = 0x00000088, -IH_PERF_SEL_BIF_LINE0_FALLING_VF11 = 0x00000089, -IH_PERF_SEL_BIF_LINE0_FALLING_VF12 = 0x0000008a, -IH_PERF_SEL_BIF_LINE0_FALLING_VF13 = 0x0000008b, -IH_PERF_SEL_BIF_LINE0_FALLING_VF14 = 0x0000008c, -IH_PERF_SEL_BIF_LINE0_FALLING_VF15 = 0x0000008d, -Reserved142 = 0x0000008e, -Reserved143 = 0x0000008f, -Reserved144 = 0x00000090, -Reserved145 = 0x00000091, -Reserved146 = 0x00000092, -Reserved147 = 0x00000093, -Reserved148 = 0x00000094, -Reserved149 = 0x00000095, -IH_PERF_SEL_CLIENT0_INT = 0x00000096, -IH_PERF_SEL_CLIENT1_INT = 0x00000097, -IH_PERF_SEL_CLIENT2_INT = 0x00000098, -IH_PERF_SEL_CLIENT3_INT = 0x00000099, -IH_PERF_SEL_CLIENT4_INT = 0x0000009a, -IH_PERF_SEL_CLIENT5_INT = 0x0000009b, -IH_PERF_SEL_CLIENT6_INT = 0x0000009c, -IH_PERF_SEL_CLIENT7_INT = 0x0000009d, -IH_PERF_SEL_CLIENT8_INT = 0x0000009e, -IH_PERF_SEL_CLIENT9_INT = 0x0000009f, -IH_PERF_SEL_CLIENT10_INT = 0x000000a0, -IH_PERF_SEL_CLIENT11_INT = 0x000000a1, -IH_PERF_SEL_CLIENT12_INT = 0x000000a2, -IH_PERF_SEL_CLIENT13_INT = 0x000000a3, -IH_PERF_SEL_CLIENT14_INT = 0x000000a4, -IH_PERF_SEL_CLIENT15_INT = 0x000000a5, -IH_PERF_SEL_CLIENT16_INT = 0x000000a6, -IH_PERF_SEL_CLIENT17_INT = 0x000000a7, -IH_PERF_SEL_CLIENT18_INT = 0x000000a8, -IH_PERF_SEL_CLIENT19_INT = 0x000000a9, -IH_PERF_SEL_CLIENT20_INT = 0x000000aa, -IH_PERF_SEL_CLIENT21_INT = 0x000000ab, -IH_PERF_SEL_CLIENT22_INT = 0x000000ac, -IH_PERF_SEL_CLIENT23_INT = 0x000000ad, -IH_PERF_SEL_CLIENT24_INT = 0x000000ae, -IH_PERF_SEL_CLIENT25_INT = 0x000000af, -IH_PERF_SEL_CLIENT26_INT = 0x000000b0, -IH_PERF_SEL_CLIENT27_INT = 0x000000b1, -IH_PERF_SEL_CLIENT28_INT = 0x000000b2, -IH_PERF_SEL_CLIENT29_INT = 0x000000b3, -IH_PERF_SEL_CLIENT30_INT = 0x000000b4, -IH_PERF_SEL_CLIENT31_INT = 0x000000b5, -Reserved182 = 0x000000b6, -Reserved183 = 0x000000b7, -Reserved184 = 0x000000b8, -Reserved185 = 0x000000b9, -Reserved186 = 0x000000ba, -Reserved187 = 0x000000bb, -Reserved188 = 0x000000bc, -Reserved189 = 0x000000bd, -Reserved190 = 0x000000be, -Reserved191 = 0x000000bf, -Reserved192 = 0x000000c0, -Reserved193 = 0x000000c1, -Reserved194 = 0x000000c2, -Reserved195 = 0x000000c3, -Reserved196 = 0x000000c4, -Reserved197 = 0x000000c5, -Reserved198 = 0x000000c6, -Reserved199 = 0x000000c7, -Reserved200 = 0x000000c8, -Reserved201 = 0x000000c9, -Reserved202 = 0x000000ca, -Reserved203 = 0x000000cb, -Reserved204 = 0x000000cc, -Reserved205 = 0x000000cd, -Reserved206 = 0x000000ce, -Reserved207 = 0x000000cf, -Reserved208 = 0x000000d0, -Reserved209 = 0x000000d1, -Reserved210 = 0x000000d2, -Reserved211 = 0x000000d3, -Reserved212 = 0x000000d4, -Reserved213 = 0x000000d5, -Reserved214 = 0x000000d6, -Reserved215 = 0x000000d7, -Reserved216 = 0x000000d8, -Reserved217 = 0x000000d9, -Reserved218 = 0x000000da, -Reserved219 = 0x000000db, -IH_PERF_SEL_RB1_FULL_VF0 = 0x000000dc, -IH_PERF_SEL_RB1_FULL_VF1 = 0x000000dd, -IH_PERF_SEL_RB1_FULL_VF2 = 0x000000de, -IH_PERF_SEL_RB1_FULL_VF3 = 0x000000df, -IH_PERF_SEL_RB1_FULL_VF4 = 0x000000e0, -IH_PERF_SEL_RB1_FULL_VF5 = 0x000000e1, -IH_PERF_SEL_RB1_FULL_VF6 = 0x000000e2, -IH_PERF_SEL_RB1_FULL_VF7 = 0x000000e3, -IH_PERF_SEL_RB1_FULL_VF8 = 0x000000e4, -IH_PERF_SEL_RB1_FULL_VF9 = 0x000000e5, -IH_PERF_SEL_RB1_FULL_VF10 = 0x000000e6, -IH_PERF_SEL_RB1_FULL_VF11 = 0x000000e7, -IH_PERF_SEL_RB1_FULL_VF12 = 0x000000e8, -IH_PERF_SEL_RB1_FULL_VF13 = 0x000000e9, -IH_PERF_SEL_RB1_FULL_VF14 = 0x000000ea, -IH_PERF_SEL_RB1_FULL_VF15 = 0x000000eb, -IH_PERF_SEL_RB1_OVERFLOW_VF0 = 0x000000ec, -IH_PERF_SEL_RB1_OVERFLOW_VF1 = 0x000000ed, -IH_PERF_SEL_RB1_OVERFLOW_VF2 = 0x000000ee, -IH_PERF_SEL_RB1_OVERFLOW_VF3 = 0x000000ef, -IH_PERF_SEL_RB1_OVERFLOW_VF4 = 0x000000f0, -IH_PERF_SEL_RB1_OVERFLOW_VF5 = 0x000000f1, -IH_PERF_SEL_RB1_OVERFLOW_VF6 = 0x000000f2, -IH_PERF_SEL_RB1_OVERFLOW_VF7 = 0x000000f3, -IH_PERF_SEL_RB1_OVERFLOW_VF8 = 0x000000f4, -IH_PERF_SEL_RB1_OVERFLOW_VF9 = 0x000000f5, -IH_PERF_SEL_RB1_OVERFLOW_VF10 = 0x000000f6, -IH_PERF_SEL_RB1_OVERFLOW_VF11 = 0x000000f7, -IH_PERF_SEL_RB1_OVERFLOW_VF12 = 0x000000f8, -IH_PERF_SEL_RB1_OVERFLOW_VF13 = 0x000000f9, -IH_PERF_SEL_RB1_OVERFLOW_VF14 = 0x000000fa, -IH_PERF_SEL_RB1_OVERFLOW_VF15 = 0x000000fb, -Reserved252 = 0x000000fc, -Reserved253 = 0x000000fd, -Reserved254 = 0x000000fe, -Reserved255 = 0x000000ff, -Reserved256 = 0x00000100, -Reserved257 = 0x00000101, -Reserved258 = 0x00000102, -Reserved259 = 0x00000103, -Reserved260 = 0x00000104, -Reserved261 = 0x00000105, -Reserved262 = 0x00000106, -Reserved263 = 0x00000107, -Reserved264 = 0x00000108, -Reserved265 = 0x00000109, -Reserved266 = 0x0000010a, -Reserved267 = 0x0000010b, -IH_PERF_SEL_RB1_WPTR_WRAP_VF0 = 0x0000010c, -IH_PERF_SEL_RB1_WPTR_WRAP_VF1 = 0x0000010d, -IH_PERF_SEL_RB1_WPTR_WRAP_VF2 = 0x0000010e, -IH_PERF_SEL_RB1_WPTR_WRAP_VF3 = 0x0000010f, -IH_PERF_SEL_RB1_WPTR_WRAP_VF4 = 0x00000110, -IH_PERF_SEL_RB1_WPTR_WRAP_VF5 = 0x00000111, -IH_PERF_SEL_RB1_WPTR_WRAP_VF6 = 0x00000112, -IH_PERF_SEL_RB1_WPTR_WRAP_VF7 = 0x00000113, -IH_PERF_SEL_RB1_WPTR_WRAP_VF8 = 0x00000114, -IH_PERF_SEL_RB1_WPTR_WRAP_VF9 = 0x00000115, -IH_PERF_SEL_RB1_WPTR_WRAP_VF10 = 0x00000116, -IH_PERF_SEL_RB1_WPTR_WRAP_VF11 = 0x00000117, -IH_PERF_SEL_RB1_WPTR_WRAP_VF12 = 0x00000118, -IH_PERF_SEL_RB1_WPTR_WRAP_VF13 = 0x00000119, -IH_PERF_SEL_RB1_WPTR_WRAP_VF14 = 0x0000011a, -IH_PERF_SEL_RB1_WPTR_WRAP_VF15 = 0x0000011b, -IH_PERF_SEL_RB1_RPTR_WRAP_VF0 = 0x0000011c, -IH_PERF_SEL_RB1_RPTR_WRAP_VF1 = 0x0000011d, -IH_PERF_SEL_RB1_RPTR_WRAP_VF2 = 0x0000011e, -IH_PERF_SEL_RB1_RPTR_WRAP_VF3 = 0x0000011f, -IH_PERF_SEL_RB1_RPTR_WRAP_VF4 = 0x00000120, -IH_PERF_SEL_RB1_RPTR_WRAP_VF5 = 0x00000121, -IH_PERF_SEL_RB1_RPTR_WRAP_VF6 = 0x00000122, -IH_PERF_SEL_RB1_RPTR_WRAP_VF7 = 0x00000123, -IH_PERF_SEL_RB1_RPTR_WRAP_VF8 = 0x00000124, -IH_PERF_SEL_RB1_RPTR_WRAP_VF9 = 0x00000125, -IH_PERF_SEL_RB1_RPTR_WRAP_VF10 = 0x00000126, -IH_PERF_SEL_RB1_RPTR_WRAP_VF11 = 0x00000127, -IH_PERF_SEL_RB1_RPTR_WRAP_VF12 = 0x00000128, -IH_PERF_SEL_RB1_RPTR_WRAP_VF13 = 0x00000129, -IH_PERF_SEL_RB1_RPTR_WRAP_VF14 = 0x0000012a, -IH_PERF_SEL_RB1_RPTR_WRAP_VF15 = 0x0000012b, -Reserved300 = 0x0000012c, -Reserved301 = 0x0000012d, -Reserved302 = 0x0000012e, -Reserved303 = 0x0000012f, -Reserved304 = 0x00000130, -Reserved305 = 0x00000131, -Reserved306 = 0x00000132, -Reserved307 = 0x00000133, -Reserved308 = 0x00000134, -Reserved309 = 0x00000135, -Reserved310 = 0x00000136, -Reserved311 = 0x00000137, -Reserved312 = 0x00000138, -Reserved313 = 0x00000139, -Reserved314 = 0x0000013a, -Reserved315 = 0x0000013b, -Reserved316 = 0x0000013c, -Reserved317 = 0x0000013d, -Reserved318 = 0x0000013e, -Reserved319 = 0x0000013f, -Reserved320 = 0x00000140, -Reserved321 = 0x00000141, -Reserved322 = 0x00000142, -Reserved323 = 0x00000143, -Reserved324 = 0x00000144, -Reserved325 = 0x00000145, -Reserved326 = 0x00000146, -Reserved327 = 0x00000147, -Reserved328 = 0x00000148, -Reserved329 = 0x00000149, -Reserved330 = 0x0000014a, -Reserved331 = 0x0000014b, -IH_PERF_SEL_RB2_FULL_VF0 = 0x0000014c, -IH_PERF_SEL_RB2_FULL_VF1 = 0x0000014d, -IH_PERF_SEL_RB2_FULL_VF2 = 0x0000014e, -IH_PERF_SEL_RB2_FULL_VF3 = 0x0000014f, -IH_PERF_SEL_RB2_FULL_VF4 = 0x00000150, -IH_PERF_SEL_RB2_FULL_VF5 = 0x00000151, -IH_PERF_SEL_RB2_FULL_VF6 = 0x00000152, -IH_PERF_SEL_RB2_FULL_VF7 = 0x00000153, -IH_PERF_SEL_RB2_FULL_VF8 = 0x00000154, -IH_PERF_SEL_RB2_FULL_VF9 = 0x00000155, -IH_PERF_SEL_RB2_FULL_VF10 = 0x00000156, -IH_PERF_SEL_RB2_FULL_VF11 = 0x00000157, -IH_PERF_SEL_RB2_FULL_VF12 = 0x00000158, -IH_PERF_SEL_RB2_FULL_VF13 = 0x00000159, -IH_PERF_SEL_RB2_FULL_VF14 = 0x0000015a, -IH_PERF_SEL_RB2_FULL_VF15 = 0x0000015b, -IH_PERF_SEL_RB2_OVERFLOW_VF0 = 0x0000015c, -IH_PERF_SEL_RB2_OVERFLOW_VF1 = 0x0000015d, -IH_PERF_SEL_RB2_OVERFLOW_VF2 = 0x0000015e, -IH_PERF_SEL_RB2_OVERFLOW_VF3 = 0x0000015f, -IH_PERF_SEL_RB2_OVERFLOW_VF4 = 0x00000160, -IH_PERF_SEL_RB2_OVERFLOW_VF5 = 0x00000161, -IH_PERF_SEL_RB2_OVERFLOW_VF6 = 0x00000162, -IH_PERF_SEL_RB2_OVERFLOW_VF7 = 0x00000163, -IH_PERF_SEL_RB2_OVERFLOW_VF8 = 0x00000164, -IH_PERF_SEL_RB2_OVERFLOW_VF9 = 0x00000165, -IH_PERF_SEL_RB2_OVERFLOW_VF10 = 0x00000166, -IH_PERF_SEL_RB2_OVERFLOW_VF11 = 0x00000167, -IH_PERF_SEL_RB2_OVERFLOW_VF12 = 0x00000168, -IH_PERF_SEL_RB2_OVERFLOW_VF13 = 0x00000169, -IH_PERF_SEL_RB2_OVERFLOW_VF14 = 0x0000016a, -IH_PERF_SEL_RB2_OVERFLOW_VF15 = 0x0000016b, -Reserved364 = 0x0000016c, -Reserved365 = 0x0000016d, -Reserved366 = 0x0000016e, -Reserved367 = 0x0000016f, -Reserved368 = 0x00000170, -Reserved369 = 0x00000171, -Reserved370 = 0x00000172, -Reserved371 = 0x00000173, -Reserved372 = 0x00000174, -Reserved373 = 0x00000175, -Reserved374 = 0x00000176, -Reserved375 = 0x00000177, -Reserved376 = 0x00000178, -Reserved377 = 0x00000179, -Reserved378 = 0x0000017a, -Reserved379 = 0x0000017b, -IH_PERF_SEL_RB2_WPTR_WRAP_VF0 = 0x0000017c, -IH_PERF_SEL_RB2_WPTR_WRAP_VF1 = 0x0000017d, -IH_PERF_SEL_RB2_WPTR_WRAP_VF2 = 0x0000017e, -IH_PERF_SEL_RB2_WPTR_WRAP_VF3 = 0x0000017f, -IH_PERF_SEL_RB2_WPTR_WRAP_VF4 = 0x00000180, -IH_PERF_SEL_RB2_WPTR_WRAP_VF5 = 0x00000181, -IH_PERF_SEL_RB2_WPTR_WRAP_VF6 = 0x00000182, -IH_PERF_SEL_RB2_WPTR_WRAP_VF7 = 0x00000183, -IH_PERF_SEL_RB2_WPTR_WRAP_VF8 = 0x00000184, -IH_PERF_SEL_RB2_WPTR_WRAP_VF9 = 0x00000185, -IH_PERF_SEL_RB2_WPTR_WRAP_VF10 = 0x00000186, -IH_PERF_SEL_RB2_WPTR_WRAP_VF11 = 0x00000187, -IH_PERF_SEL_RB2_WPTR_WRAP_VF12 = 0x00000188, -IH_PERF_SEL_RB2_WPTR_WRAP_VF13 = 0x00000189, -IH_PERF_SEL_RB2_WPTR_WRAP_VF14 = 0x0000018a, -IH_PERF_SEL_RB2_WPTR_WRAP_VF15 = 0x0000018b, -IH_PERF_SEL_RB2_RPTR_WRAP_VF0 = 0x0000018c, -IH_PERF_SEL_RB2_RPTR_WRAP_VF1 = 0x0000018d, -IH_PERF_SEL_RB2_RPTR_WRAP_VF2 = 0x0000018e, -IH_PERF_SEL_RB2_RPTR_WRAP_VF3 = 0x0000018f, -IH_PERF_SEL_RB2_RPTR_WRAP_VF4 = 0x00000190, -IH_PERF_SEL_RB2_RPTR_WRAP_VF5 = 0x00000191, -IH_PERF_SEL_RB2_RPTR_WRAP_VF6 = 0x00000192, -IH_PERF_SEL_RB2_RPTR_WRAP_VF7 = 0x00000193, -IH_PERF_SEL_RB2_RPTR_WRAP_VF8 = 0x00000194, -IH_PERF_SEL_RB2_RPTR_WRAP_VF9 = 0x00000195, -IH_PERF_SEL_RB2_RPTR_WRAP_VF10 = 0x00000196, -IH_PERF_SEL_RB2_RPTR_WRAP_VF11 = 0x00000197, -IH_PERF_SEL_RB2_RPTR_WRAP_VF12 = 0x00000198, -IH_PERF_SEL_RB2_RPTR_WRAP_VF13 = 0x00000199, -IH_PERF_SEL_RB2_RPTR_WRAP_VF14 = 0x0000019a, -IH_PERF_SEL_RB2_RPTR_WRAP_VF15 = 0x0000019b, -Reserved412 = 0x0000019c, -Reserved413 = 0x0000019d, -Reserved414 = 0x0000019e, -Reserved415 = 0x0000019f, -Reserved416 = 0x000001a0, -Reserved417 = 0x000001a1, -Reserved418 = 0x000001a2, -Reserved419 = 0x000001a3, -Reserved420 = 0x000001a4, -Reserved421 = 0x000001a5, -Reserved422 = 0x000001a6, -Reserved423 = 0x000001a7, -Reserved424 = 0x000001a8, -Reserved425 = 0x000001a9, -Reserved426 = 0x000001aa, -Reserved427 = 0x000001ab, -Reserved428 = 0x000001ac, -Reserved429 = 0x000001ad, -Reserved430 = 0x000001ae, -Reserved431 = 0x000001af, -Reserved432 = 0x000001b0, -Reserved433 = 0x000001b1, -Reserved434 = 0x000001b2, -Reserved435 = 0x000001b3, -Reserved436 = 0x000001b4, -Reserved437 = 0x000001b5, -Reserved438 = 0x000001b6, -Reserved439 = 0x000001b7, -Reserved440 = 0x000001b8, -Reserved441 = 0x000001b9, -Reserved442 = 0x000001ba, -Reserved443 = 0x000001bb, -Reserved444 = 0x000001bc, -Reserved445 = 0x000001bd, -Reserved446 = 0x000001be, -Reserved447 = 0x000001bf, -Reserved448 = 0x000001c0, -Reserved449 = 0x000001c1, -Reserved450 = 0x000001c2, -Reserved451 = 0x000001c3, -Reserved452 = 0x000001c4, -Reserved453 = 0x000001c5, -Reserved454 = 0x000001c6, -Reserved455 = 0x000001c7, -Reserved456 = 0x000001c8, -Reserved457 = 0x000001c9, -Reserved458 = 0x000001ca, -Reserved459 = 0x000001cb, -Reserved460 = 0x000001cc, -Reserved461 = 0x000001cd, -Reserved462 = 0x000001ce, -Reserved463 = 0x000001cf, -Reserved464 = 0x000001d0, -Reserved465 = 0x000001d1, -Reserved466 = 0x000001d2, -Reserved467 = 0x000001d3, -Reserved468 = 0x000001d4, -Reserved469 = 0x000001d5, -Reserved470 = 0x000001d6, -Reserved471 = 0x000001d7, -Reserved472 = 0x000001d8, -Reserved473 = 0x000001d9, -Reserved474 = 0x000001da, -Reserved475 = 0x000001db, -Reserved476 = 0x000001dc, -Reserved477 = 0x000001dd, -Reserved478 = 0x000001de, -Reserved479 = 0x000001df, -Reserved480 = 0x000001e0, -Reserved481 = 0x000001e1, -Reserved482 = 0x000001e2, -Reserved483 = 0x000001e3, -Reserved484 = 0x000001e4, -Reserved485 = 0x000001e5, -Reserved486 = 0x000001e6, -Reserved487 = 0x000001e7, -Reserved488 = 0x000001e8, -Reserved489 = 0x000001e9, -Reserved490 = 0x000001ea, -Reserved491 = 0x000001eb, -Reserved492 = 0x000001ec, -Reserved493 = 0x000001ed, -Reserved494 = 0x000001ee, -Reserved495 = 0x000001ef, -Reserved496 = 0x000001f0, -Reserved497 = 0x000001f1, -Reserved498 = 0x000001f2, -Reserved499 = 0x000001f3, -Reserved500 = 0x000001f4, -Reserved501 = 0x000001f5, -Reserved502 = 0x000001f6, -Reserved503 = 0x000001f7, -Reserved504 = 0x000001f8, -Reserved505 = 0x000001f9, -Reserved506 = 0x000001fa, -Reserved507 = 0x000001fb, -Reserved508 = 0x000001fc, -Reserved509 = 0x000001fd, -Reserved510 = 0x000001fe, -Reserved511 = 0x000001ff, -} IH_PERF_SEL; - -/******************************************************* - * SEM Enums - *******************************************************/ - -/* - * SEM_PERF_SEL enum - */ - -typedef enum SEM_PERF_SEL { -SEM_PERF_SEL_CYCLE = 0x00000000, -SEM_PERF_SEL_IDLE = 0x00000001, -SEM_PERF_SEL_SDMA0_REQ_SIGNAL = 0x00000002, -SEM_PERF_SEL_SDMA1_REQ_SIGNAL = 0x00000003, -SEM_PERF_SEL_UVD_REQ_SIGNAL = 0x00000004, -SEM_PERF_SEL_VCE0_REQ_SIGNAL = 0x00000005, -SEM_PERF_SEL_ACP_REQ_SIGNAL = 0x00000006, -SEM_PERF_SEL_ISP_REQ_SIGNAL = 0x00000007, -SEM_PERF_SEL_VCE1_REQ_SIGNAL = 0x00000008, -SEM_PERF_SEL_VP8_REQ_SIGNAL = 0x00000009, -SEM_PERF_SEL_CPG_E0_REQ_SIGNAL = 0x0000000a, -SEM_PERF_SEL_CPG_E1_REQ_SIGNAL = 0x0000000b, -SEM_PERF_SEL_CPC1_IMME_E0_REQ_SIGNAL = 0x0000000c, -SEM_PERF_SEL_CPC1_IMME_E1_REQ_SIGNAL = 0x0000000d, -SEM_PERF_SEL_CPC1_IMME_E2_REQ_SIGNAL = 0x0000000e, -SEM_PERF_SEL_CPC1_IMME_E3_REQ_SIGNAL = 0x0000000f, -SEM_PERF_SEL_CPC2_IMME_E0_REQ_SIGNAL = 0x00000010, -SEM_PERF_SEL_CPC2_IMME_E1_REQ_SIGNAL = 0x00000011, -SEM_PERF_SEL_CPC2_IMME_E2_REQ_SIGNAL = 0x00000012, -SEM_PERF_SEL_CPC2_IMME_E3_REQ_SIGNAL = 0x00000013, -SEM_PERF_SEL_SDMA0_REQ_WAIT = 0x00000014, -SEM_PERF_SEL_SDMA1_REQ_WAIT = 0x00000015, -SEM_PERF_SEL_UVD_REQ_WAIT = 0x00000016, -SEM_PERF_SEL_VCE0_REQ_WAIT = 0x00000017, -SEM_PERF_SEL_ACP_REQ_WAIT = 0x00000018, -SEM_PERF_SEL_ISP_REQ_WAIT = 0x00000019, -SEM_PERF_SEL_VCE1_REQ_WAIT = 0x0000001a, -SEM_PERF_SEL_VP8_REQ_WAIT = 0x0000001b, -SEM_PERF_SEL_CPG_E0_REQ_WAIT = 0x0000001c, -SEM_PERF_SEL_CPG_E1_REQ_WAIT = 0x0000001d, -SEM_PERF_SEL_CPC1_IMME_E0_REQ_WAIT = 0x0000001e, -SEM_PERF_SEL_CPC1_IMME_E1_REQ_WAIT = 0x0000001f, -SEM_PERF_SEL_CPC1_IMME_E2_REQ_WAIT = 0x00000020, -SEM_PERF_SEL_CPC1_IMME_E3_REQ_WAIT = 0x00000021, -SEM_PERF_SEL_CPC2_IMME_E0_REQ_WAIT = 0x00000022, -SEM_PERF_SEL_CPC2_IMME_E1_REQ_WAIT = 0x00000023, -SEM_PERF_SEL_CPC2_IMME_E2_REQ_WAIT = 0x00000024, -SEM_PERF_SEL_CPC2_IMME_E3_REQ_WAIT = 0x00000025, -SEM_PERF_SEL_CPC1_OFFL_E0_REQ_WAIT = 0x00000026, -SEM_PERF_SEL_CPC1_OFFL_E1_REQ_WAIT = 0x00000027, -SEM_PERF_SEL_CPC1_OFFL_E2_REQ_WAIT = 0x00000028, -SEM_PERF_SEL_CPC1_OFFL_E3_REQ_WAIT = 0x00000029, -SEM_PERF_SEL_CPC1_OFFL_E4_REQ_WAIT = 0x0000002a, -SEM_PERF_SEL_CPC1_OFFL_E5_REQ_WAIT = 0x0000002b, -SEM_PERF_SEL_CPC1_OFFL_E6_REQ_WAIT = 0x0000002c, -SEM_PERF_SEL_CPC1_OFFL_E7_REQ_WAIT = 0x0000002d, -SEM_PERF_SEL_CPC1_OFFL_E8_REQ_WAIT = 0x0000002e, -SEM_PERF_SEL_CPC1_OFFL_E9_REQ_WAIT = 0x0000002f, -SEM_PERF_SEL_CPC1_OFFL_E10_REQ_WAIT = 0x00000030, -SEM_PERF_SEL_CPC1_OFFL_E11_REQ_WAIT = 0x00000031, -SEM_PERF_SEL_CPC1_OFFL_E12_REQ_WAIT = 0x00000032, -SEM_PERF_SEL_CPC1_OFFL_E13_REQ_WAIT = 0x00000033, -SEM_PERF_SEL_CPC1_OFFL_E14_REQ_WAIT = 0x00000034, -SEM_PERF_SEL_CPC1_OFFL_E15_REQ_WAIT = 0x00000035, -SEM_PERF_SEL_CPC1_OFFL_E16_REQ_WAIT = 0x00000036, -SEM_PERF_SEL_CPC1_OFFL_E17_REQ_WAIT = 0x00000037, -SEM_PERF_SEL_CPC1_OFFL_E18_REQ_WAIT = 0x00000038, -SEM_PERF_SEL_CPC1_OFFL_E19_REQ_WAIT = 0x00000039, -SEM_PERF_SEL_CPC1_OFFL_E20_REQ_WAIT = 0x0000003a, -SEM_PERF_SEL_CPC1_OFFL_E21_REQ_WAIT = 0x0000003b, -SEM_PERF_SEL_CPC1_OFFL_E22_REQ_WAIT = 0x0000003c, -SEM_PERF_SEL_CPC1_OFFL_E23_REQ_WAIT = 0x0000003d, -SEM_PERF_SEL_CPC1_OFFL_E24_REQ_WAIT = 0x0000003e, -SEM_PERF_SEL_CPC1_OFFL_E25_REQ_WAIT = 0x0000003f, -SEM_PERF_SEL_CPC1_OFFL_E26_REQ_WAIT = 0x00000040, -SEM_PERF_SEL_CPC1_OFFL_E27_REQ_WAIT = 0x00000041, -SEM_PERF_SEL_CPC1_OFFL_E28_REQ_WAIT = 0x00000042, -SEM_PERF_SEL_CPC1_OFFL_E29_REQ_WAIT = 0x00000043, -SEM_PERF_SEL_CPC1_OFFL_E30_REQ_WAIT = 0x00000044, -SEM_PERF_SEL_CPC1_OFFL_E31_REQ_WAIT = 0x00000045, -SEM_PERF_SEL_CPC2_OFFL_E0_REQ_WAIT = 0x00000046, -SEM_PERF_SEL_CPC2_OFFL_E1_REQ_WAIT = 0x00000047, -SEM_PERF_SEL_CPC2_OFFL_E2_REQ_WAIT = 0x00000048, -SEM_PERF_SEL_CPC2_OFFL_E3_REQ_WAIT = 0x00000049, -SEM_PERF_SEL_CPC2_OFFL_E4_REQ_WAIT = 0x0000004a, -SEM_PERF_SEL_CPC2_OFFL_E5_REQ_WAIT = 0x0000004b, -SEM_PERF_SEL_CPC2_OFFL_E6_REQ_WAIT = 0x0000004c, -SEM_PERF_SEL_CPC2_OFFL_E7_REQ_WAIT = 0x0000004d, -SEM_PERF_SEL_CPC2_OFFL_E8_REQ_WAIT = 0x0000004e, -SEM_PERF_SEL_CPC2_OFFL_E9_REQ_WAIT = 0x0000004f, -SEM_PERF_SEL_CPC2_OFFL_E10_REQ_WAIT = 0x00000050, -SEM_PERF_SEL_CPC2_OFFL_E11_REQ_WAIT = 0x00000051, -SEM_PERF_SEL_CPC2_OFFL_E12_REQ_WAIT = 0x00000052, -SEM_PERF_SEL_CPC2_OFFL_E13_REQ_WAIT = 0x00000053, -SEM_PERF_SEL_CPC2_OFFL_E14_REQ_WAIT = 0x00000054, -SEM_PERF_SEL_CPC2_OFFL_E15_REQ_WAIT = 0x00000055, -SEM_PERF_SEL_CPC2_OFFL_E16_REQ_WAIT = 0x00000056, -SEM_PERF_SEL_CPC2_OFFL_E17_REQ_WAIT = 0x00000057, -SEM_PERF_SEL_CPC2_OFFL_E18_REQ_WAIT = 0x00000058, -SEM_PERF_SEL_CPC2_OFFL_E19_REQ_WAIT = 0x00000059, -SEM_PERF_SEL_CPC2_OFFL_E20_REQ_WAIT = 0x0000005a, -SEM_PERF_SEL_CPC2_OFFL_E21_REQ_WAIT = 0x0000005b, -SEM_PERF_SEL_CPC2_OFFL_E22_REQ_WAIT = 0x0000005c, -SEM_PERF_SEL_CPC2_OFFL_E23_REQ_WAIT = 0x0000005d, -SEM_PERF_SEL_CPC2_OFFL_E24_REQ_WAIT = 0x0000005e, -SEM_PERF_SEL_CPC2_OFFL_E25_REQ_WAIT = 0x0000005f, -SEM_PERF_SEL_CPC2_OFFL_E26_REQ_WAIT = 0x00000060, -SEM_PERF_SEL_CPC2_OFFL_E27_REQ_WAIT = 0x00000061, -SEM_PERF_SEL_CPC2_OFFL_E28_REQ_WAIT = 0x00000062, -SEM_PERF_SEL_CPC2_OFFL_E29_REQ_WAIT = 0x00000063, -SEM_PERF_SEL_CPC2_OFFL_E30_REQ_WAIT = 0x00000064, -SEM_PERF_SEL_CPC2_OFFL_E31_REQ_WAIT = 0x00000065, -SEM_PERF_SEL_CPC1_OFFL_E0_POLL_WAIT = 0x00000066, -SEM_PERF_SEL_CPC1_OFFL_E1_POLL_WAIT = 0x00000067, -SEM_PERF_SEL_CPC1_OFFL_E2_POLL_WAIT = 0x00000068, -SEM_PERF_SEL_CPC1_OFFL_E3_POLL_WAIT = 0x00000069, -SEM_PERF_SEL_CPC1_OFFL_E4_POLL_WAIT = 0x0000006a, -SEM_PERF_SEL_CPC1_OFFL_E5_POLL_WAIT = 0x0000006b, -SEM_PERF_SEL_CPC1_OFFL_E6_POLL_WAIT = 0x0000006c, -SEM_PERF_SEL_CPC1_OFFL_E7_POLL_WAIT = 0x0000006d, -SEM_PERF_SEL_CPC1_OFFL_E8_POLL_WAIT = 0x0000006e, -SEM_PERF_SEL_CPC1_OFFL_E9_POLL_WAIT = 0x0000006f, -SEM_PERF_SEL_CPC1_OFFL_E10_POLL_WAIT = 0x00000070, -SEM_PERF_SEL_CPC1_OFFL_E11_POLL_WAIT = 0x00000071, -SEM_PERF_SEL_CPC1_OFFL_E12_POLL_WAIT = 0x00000072, -SEM_PERF_SEL_CPC1_OFFL_E13_POLL_WAIT = 0x00000073, -SEM_PERF_SEL_CPC1_OFFL_E14_POLL_WAIT = 0x00000074, -SEM_PERF_SEL_CPC1_OFFL_E15_POLL_WAIT = 0x00000075, -SEM_PERF_SEL_CPC1_OFFL_E16_POLL_WAIT = 0x00000076, -SEM_PERF_SEL_CPC1_OFFL_E17_POLL_WAIT = 0x00000077, -SEM_PERF_SEL_CPC1_OFFL_E18_POLL_WAIT = 0x00000078, -SEM_PERF_SEL_CPC1_OFFL_E19_POLL_WAIT = 0x00000079, -SEM_PERF_SEL_CPC1_OFFL_E20_POLL_WAIT = 0x0000007a, -SEM_PERF_SEL_CPC1_OFFL_E21_POLL_WAIT = 0x0000007b, -SEM_PERF_SEL_CPC1_OFFL_E22_POLL_WAIT = 0x0000007c, -SEM_PERF_SEL_CPC1_OFFL_E23_POLL_WAIT = 0x0000007d, -SEM_PERF_SEL_CPC1_OFFL_E24_POLL_WAIT = 0x0000007e, -SEM_PERF_SEL_CPC1_OFFL_E25_POLL_WAIT = 0x0000007f, -SEM_PERF_SEL_CPC1_OFFL_E26_POLL_WAIT = 0x00000080, -SEM_PERF_SEL_CPC1_OFFL_E27_POLL_WAIT = 0x00000081, -SEM_PERF_SEL_CPC1_OFFL_E28_POLL_WAIT = 0x00000082, -SEM_PERF_SEL_CPC1_OFFL_E29_POLL_WAIT = 0x00000083, -SEM_PERF_SEL_CPC1_OFFL_E30_POLL_WAIT = 0x00000084, -SEM_PERF_SEL_CPC1_OFFL_E31_POLL_WAIT = 0x00000085, -SEM_PERF_SEL_CPC2_OFFL_E0_POLL_WAIT = 0x00000086, -SEM_PERF_SEL_CPC2_OFFL_E1_POLL_WAIT = 0x00000087, -SEM_PERF_SEL_CPC2_OFFL_E2_POLL_WAIT = 0x00000088, -SEM_PERF_SEL_CPC2_OFFL_E3_POLL_WAIT = 0x00000089, -SEM_PERF_SEL_CPC2_OFFL_E4_POLL_WAIT = 0x0000008a, -SEM_PERF_SEL_CPC2_OFFL_E5_POLL_WAIT = 0x0000008b, -SEM_PERF_SEL_CPC2_OFFL_E6_POLL_WAIT = 0x0000008c, -SEM_PERF_SEL_CPC2_OFFL_E7_POLL_WAIT = 0x0000008d, -SEM_PERF_SEL_CPC2_OFFL_E8_POLL_WAIT = 0x0000008e, -SEM_PERF_SEL_CPC2_OFFL_E9_POLL_WAIT = 0x0000008f, -SEM_PERF_SEL_CPC2_OFFL_E10_POLL_WAIT = 0x00000090, -SEM_PERF_SEL_CPC2_OFFL_E11_POLL_WAIT = 0x00000091, -SEM_PERF_SEL_CPC2_OFFL_E12_POLL_WAIT = 0x00000092, -SEM_PERF_SEL_CPC2_OFFL_E13_POLL_WAIT = 0x00000093, -SEM_PERF_SEL_CPC2_OFFL_E14_POLL_WAIT = 0x00000094, -SEM_PERF_SEL_CPC2_OFFL_E15_POLL_WAIT = 0x00000095, -SEM_PERF_SEL_CPC2_OFFL_E16_POLL_WAIT = 0x00000096, -SEM_PERF_SEL_CPC2_OFFL_E17_POLL_WAIT = 0x00000097, -SEM_PERF_SEL_CPC2_OFFL_E18_POLL_WAIT = 0x00000098, -SEM_PERF_SEL_CPC2_OFFL_E19_POLL_WAIT = 0x00000099, -SEM_PERF_SEL_CPC2_OFFL_E20_POLL_WAIT = 0x0000009a, -SEM_PERF_SEL_CPC2_OFFL_E21_POLL_WAIT = 0x0000009b, -SEM_PERF_SEL_CPC2_OFFL_E22_POLL_WAIT = 0x0000009c, -SEM_PERF_SEL_CPC2_OFFL_E23_POLL_WAIT = 0x0000009d, -SEM_PERF_SEL_CPC2_OFFL_E24_POLL_WAIT = 0x0000009e, -SEM_PERF_SEL_CPC2_OFFL_E25_POLL_WAIT = 0x0000009f, -SEM_PERF_SEL_CPC2_OFFL_E26_POLL_WAIT = 0x000000a0, -SEM_PERF_SEL_CPC2_OFFL_E27_POLL_WAIT = 0x000000a1, -SEM_PERF_SEL_CPC2_OFFL_E28_POLL_WAIT = 0x000000a2, -SEM_PERF_SEL_CPC2_OFFL_E29_POLL_WAIT = 0x000000a3, -SEM_PERF_SEL_CPC2_OFFL_E30_POLL_WAIT = 0x000000a4, -SEM_PERF_SEL_CPC2_OFFL_E31_POLL_WAIT = 0x000000a5, -SEM_PERF_SEL_MC_RD_REQ = 0x000000a6, -SEM_PERF_SEL_MC_RD_RET = 0x000000a7, -SEM_PERF_SEL_MC_WR_REQ = 0x000000a8, -SEM_PERF_SEL_MC_WR_RET = 0x000000a9, -SEM_PERF_SEL_ATC_REQ = 0x000000aa, -SEM_PERF_SEL_ATC_RET = 0x000000ab, -SEM_PERF_SEL_ATC_XNACK = 0x000000ac, -SEM_PERF_SEL_ATC_INVALIDATION = 0x000000ad, -} SEM_PERF_SEL; - -/******************************************************* - * SDMA Enums - *******************************************************/ - -/* - * SDMA_PERF_SEL enum - */ - -typedef enum SDMA_PERF_SEL { -SDMA_PERF_SEL_CYCLE = 0x00000000, -SDMA_PERF_SEL_IDLE = 0x00000001, -SDMA_PERF_SEL_REG_IDLE = 0x00000002, -SDMA_PERF_SEL_RB_EMPTY = 0x00000003, -SDMA_PERF_SEL_RB_FULL = 0x00000004, -SDMA_PERF_SEL_RB_WPTR_WRAP = 0x00000005, -SDMA_PERF_SEL_RB_RPTR_WRAP = 0x00000006, -SDMA_PERF_SEL_RB_WPTR_POLL_READ = 0x00000007, -SDMA_PERF_SEL_RB_RPTR_WB = 0x00000008, -SDMA_PERF_SEL_RB_CMD_IDLE = 0x00000009, -SDMA_PERF_SEL_RB_CMD_FULL = 0x0000000a, -SDMA_PERF_SEL_IB_CMD_IDLE = 0x0000000b, -SDMA_PERF_SEL_IB_CMD_FULL = 0x0000000c, -SDMA_PERF_SEL_EX_IDLE = 0x0000000d, -SDMA_PERF_SEL_SRBM_REG_SEND = 0x0000000e, -SDMA_PERF_SEL_EX_IDLE_POLL_TIMER_EXPIRE = 0x0000000f, -SDMA_PERF_SEL_MC_WR_IDLE = 0x00000010, -SDMA_PERF_SEL_MC_WR_COUNT = 0x00000011, -SDMA_PERF_SEL_MC_RD_IDLE = 0x00000012, -SDMA_PERF_SEL_MC_RD_COUNT = 0x00000013, -SDMA_PERF_SEL_MC_RD_RET_STALL = 0x00000014, -SDMA_PERF_SEL_MC_RD_NO_POLL_IDLE = 0x00000015, -SDMA_PERF_SEL_DRM_IDLE = 0x00000016, -SDMA_PERF_SEL_DRM_REQ_STALL = 0x00000017, -SDMA_PERF_SEL_SEM_IDLE = 0x00000018, -SDMA_PERF_SEL_SEM_REQ_STALL = 0x00000019, -SDMA_PERF_SEL_SEM_REQ_COUNT = 0x0000001a, -SDMA_PERF_SEL_SEM_RESP_INCOMPLETE = 0x0000001b, -SDMA_PERF_SEL_SEM_RESP_FAIL = 0x0000001c, -SDMA_PERF_SEL_SEM_RESP_PASS = 0x0000001d, -SDMA_PERF_SEL_INT_IDLE = 0x0000001e, -SDMA_PERF_SEL_INT_REQ_STALL = 0x0000001f, -SDMA_PERF_SEL_INT_REQ_COUNT = 0x00000020, -SDMA_PERF_SEL_INT_RESP_ACCEPTED = 0x00000021, -SDMA_PERF_SEL_INT_RESP_RETRY = 0x00000022, -SDMA_PERF_SEL_NUM_PACKET = 0x00000023, -SDMA_PERF_SEL_DRM1_REQ_STALL = 0x00000024, -SDMA_PERF_SEL_CE_WREQ_IDLE = 0x00000025, -SDMA_PERF_SEL_CE_WR_IDLE = 0x00000026, -SDMA_PERF_SEL_CE_SPLIT_IDLE = 0x00000027, -SDMA_PERF_SEL_CE_RREQ_IDLE = 0x00000028, -SDMA_PERF_SEL_CE_OUT_IDLE = 0x00000029, -SDMA_PERF_SEL_CE_IN_IDLE = 0x0000002a, -SDMA_PERF_SEL_CE_DST_IDLE = 0x0000002b, -SDMA_PERF_SEL_CE_DRM_IDLE = 0x0000002c, -SDMA_PERF_SEL_CE_DRM1_IDLE = 0x0000002d, -SDMA_PERF_SEL_CE_AFIFO_FULL = 0x0000002e, -SDMA_PERF_SEL_CE_DRM_FULL = 0x0000002f, -SDMA_PERF_SEL_CE_DRM1_FULL = 0x00000030, -SDMA_PERF_SEL_CE_INFO_FULL = 0x00000031, -SDMA_PERF_SEL_CE_INFO1_FULL = 0x00000032, -SDMA_PERF_SEL_CE_RD_STALL = 0x00000033, -SDMA_PERF_SEL_CE_WR_STALL = 0x00000034, -SDMA_PERF_SEL_GFX_SELECT = 0x00000035, -SDMA_PERF_SEL_RLC0_SELECT = 0x00000036, -SDMA_PERF_SEL_RLC1_SELECT = 0x00000037, -SDMA_PERF_SEL_PAGE_SELECT = 0x00000038, -SDMA_PERF_SEL_CTX_CHANGE = 0x00000039, -SDMA_PERF_SEL_CTX_CHANGE_EXPIRED = 0x0000003a, -SDMA_PERF_SEL_CTX_CHANGE_EXCEPTION = 0x0000003b, -SDMA_PERF_SEL_DOORBELL = 0x0000003c, -SDMA_PERF_SEL_RD_BA_RTR = 0x0000003d, -SDMA_PERF_SEL_WR_BA_RTR = 0x0000003e, -SDMA_PERF_SEL_F32_L1_WR_VLD = 0x0000003f, -SDMA_PERF_SEL_CE_L1_WR_VLD = 0x00000040, -SDMA_PERF_SEL_CE_L1_STALL = 0x00000041, -SDMA_PERF_SEL_SDMA_INVACK_NFLUSH = 0x00000042, -SDMA_PERF_SEL_SDMA_INVACK_FLUSH = 0x00000043, -SDMA_PERF_SEL_ATCL2_INVREQ_NFLUSH = 0x00000044, -SDMA_PERF_SEL_ATCL2_INVREQ_FLUSH = 0x00000045, -SDMA_PERF_SEL_ATCL2_RET_XNACK = 0x00000046, -SDMA_PERF_SEL_ATCL2_RET_ACK = 0x00000047, -SDMA_PERF_SEL_ATCL2_FREE = 0x00000048, -SDMA_PERF_SEL_SDMA_ATCL2_SEND = 0x00000049, -SDMA_PERF_SEL_DMA_L1_WR_SEND = 0x0000004a, -SDMA_PERF_SEL_DMA_L1_RD_SEND = 0x0000004b, -SDMA_PERF_SEL_DMA_MC_WR_SEND = 0x0000004c, -SDMA_PERF_SEL_DMA_MC_RD_SEND = 0x0000004d, -SDMA_PERF_SEL_L1_WR_FIFO_IDLE = 0x0000004e, -SDMA_PERF_SEL_L1_RD_FIFO_IDLE = 0x0000004f, -SDMA_PERF_SEL_L1_WRL2_IDLE = 0x00000050, -SDMA_PERF_SEL_L1_RDL2_IDLE = 0x00000051, -SDMA_PERF_SEL_L1_WRMC_IDLE = 0x00000052, -SDMA_PERF_SEL_L1_RDMC_IDLE = 0x00000053, -SDMA_PERF_SEL_L1_WR_INV_IDLE = 0x00000054, -SDMA_PERF_SEL_L1_RD_INV_IDLE = 0x00000055, -SDMA_PERF_SEL_L1_WR_INV_EN = 0x00000056, -SDMA_PERF_SEL_L1_RD_INV_EN = 0x00000057, -SDMA_PERF_SEL_L1_WR_WAIT_INVADR = 0x00000058, -SDMA_PERF_SEL_L1_RD_WAIT_INVADR = 0x00000059, -SDMA_PERF_SEL_IS_INVREQ_ADDR_WR = 0x0000005a, -SDMA_PERF_SEL_IS_INVREQ_ADDR_RD = 0x0000005b, -SDMA_PERF_SEL_L1_WR_XNACK_TIMEOUT = 0x0000005c, -SDMA_PERF_SEL_L1_RD_XNACK_TIMEOUT = 0x0000005d, -SDMA_PERF_SEL_L1_INV_MIDDLE = 0x0000005e, -SDMA_PERF_SEL_UTCL1_TAG_DELAY_COUNTER = 0x000000fe, -SDMA_PERF_SEL_MMHUB_TAG_DELAY_COUNTER = 0x000000ff, -} SDMA_PERF_SEL; - -/******************************************************* - * SMUIO Enums - *******************************************************/ - -/* - * ROM_SIGNATURE value - */ - -#define ROM_SIGNATURE 0x0000aa55 - -/******************************************************* - * GDS Enums - *******************************************************/ - -/******************************************************* - * CB Enums - *******************************************************/ - -/* - * SurfaceNumber enum - */ - -typedef enum SurfaceNumber { -NUMBER_UNORM = 0x00000000, -NUMBER_SNORM = 0x00000001, -NUMBER_USCALED = 0x00000002, -NUMBER_SSCALED = 0x00000003, -NUMBER_UINT = 0x00000004, -NUMBER_SINT = 0x00000005, -NUMBER_SRGB = 0x00000006, -NUMBER_FLOAT = 0x00000007, -} SurfaceNumber; - -/* - * SurfaceSwap enum - */ - -typedef enum SurfaceSwap { -SWAP_STD = 0x00000000, -SWAP_ALT = 0x00000001, -SWAP_STD_REV = 0x00000002, -SWAP_ALT_REV = 0x00000003, -} SurfaceSwap; - -/* - * CBMode enum - */ - -typedef enum CBMode { -CB_DISABLE = 0x00000000, -CB_NORMAL = 0x00000001, -CB_ELIMINATE_FAST_CLEAR = 0x00000002, -CB_RESOLVE = 0x00000003, -CB_DECOMPRESS = 0x00000004, -CB_FMASK_DECOMPRESS = 0x00000005, -CB_DCC_DECOMPRESS = 0x00000006, -} CBMode; - -/* - * RoundMode enum - */ - -typedef enum RoundMode { -ROUND_BY_HALF = 0x00000000, -ROUND_TRUNCATE = 0x00000001, -} RoundMode; - -/* - * SourceFormat enum - */ - -typedef enum SourceFormat { -EXPORT_4C_32BPC = 0x00000000, -EXPORT_4C_16BPC = 0x00000001, -EXPORT_2C_32BPC_GR = 0x00000002, -EXPORT_2C_32BPC_AR = 0x00000003, -} SourceFormat; - -/* - * BlendOp enum - */ - -typedef enum BlendOp { -BLEND_ZERO = 0x00000000, -BLEND_ONE = 0x00000001, -BLEND_SRC_COLOR = 0x00000002, -BLEND_ONE_MINUS_SRC_COLOR = 0x00000003, -BLEND_SRC_ALPHA = 0x00000004, -BLEND_ONE_MINUS_SRC_ALPHA = 0x00000005, -BLEND_DST_ALPHA = 0x00000006, -BLEND_ONE_MINUS_DST_ALPHA = 0x00000007, -BLEND_DST_COLOR = 0x00000008, -BLEND_ONE_MINUS_DST_COLOR = 0x00000009, -BLEND_SRC_ALPHA_SATURATE = 0x0000000a, -BLEND_BOTH_SRC_ALPHA = 0x0000000b, -BLEND_BOTH_INV_SRC_ALPHA = 0x0000000c, -BLEND_CONSTANT_COLOR = 0x0000000d, -BLEND_ONE_MINUS_CONSTANT_COLOR = 0x0000000e, -BLEND_SRC1_COLOR = 0x0000000f, -BLEND_INV_SRC1_COLOR = 0x00000010, -BLEND_SRC1_ALPHA = 0x00000011, -BLEND_INV_SRC1_ALPHA = 0x00000012, -BLEND_CONSTANT_ALPHA = 0x00000013, -BLEND_ONE_MINUS_CONSTANT_ALPHA = 0x00000014, -} BlendOp; - -/* - * CombFunc enum - */ - -typedef enum CombFunc { -COMB_DST_PLUS_SRC = 0x00000000, -COMB_SRC_MINUS_DST = 0x00000001, -COMB_MIN_DST_SRC = 0x00000002, -COMB_MAX_DST_SRC = 0x00000003, -COMB_DST_MINUS_SRC = 0x00000004, -} CombFunc; - -/* - * BlendOpt enum - */ - -typedef enum BlendOpt { -FORCE_OPT_AUTO = 0x00000000, -FORCE_OPT_DISABLE = 0x00000001, -FORCE_OPT_ENABLE_IF_SRC_A_0 = 0x00000002, -FORCE_OPT_ENABLE_IF_SRC_RGB_0 = 0x00000003, -FORCE_OPT_ENABLE_IF_SRC_ARGB_0 = 0x00000004, -FORCE_OPT_ENABLE_IF_SRC_A_1 = 0x00000005, -FORCE_OPT_ENABLE_IF_SRC_RGB_1 = 0x00000006, -FORCE_OPT_ENABLE_IF_SRC_ARGB_1 = 0x00000007, -} BlendOpt; - -/* - * CmaskCode enum - */ - -typedef enum CmaskCode { -CMASK_CLR00_F0 = 0x00000000, -CMASK_CLR00_F1 = 0x00000001, -CMASK_CLR00_F2 = 0x00000002, -CMASK_CLR00_FX = 0x00000003, -CMASK_CLR01_F0 = 0x00000004, -CMASK_CLR01_F1 = 0x00000005, -CMASK_CLR01_F2 = 0x00000006, -CMASK_CLR01_FX = 0x00000007, -CMASK_CLR10_F0 = 0x00000008, -CMASK_CLR10_F1 = 0x00000009, -CMASK_CLR10_F2 = 0x0000000a, -CMASK_CLR10_FX = 0x0000000b, -CMASK_CLR11_F0 = 0x0000000c, -CMASK_CLR11_F1 = 0x0000000d, -CMASK_CLR11_F2 = 0x0000000e, -CMASK_CLR11_FX = 0x0000000f, -} CmaskCode; - -/* - * CmaskAddr enum - */ - -typedef enum CmaskAddr { -CMASK_ADDR_TILED = 0x00000000, -CMASK_ADDR_LINEAR = 0x00000001, -CMASK_ADDR_COMPATIBLE = 0x00000002, -} CmaskAddr; - -/* - * MemArbMode enum - */ - -typedef enum MemArbMode { -MEM_ARB_MODE_FIXED = 0x00000000, -MEM_ARB_MODE_AGE = 0x00000001, -MEM_ARB_MODE_WEIGHT = 0x00000002, -MEM_ARB_MODE_BOTH = 0x00000003, -} MemArbMode; - -/* - * CBPerfSel enum - */ - -typedef enum CBPerfSel { -CB_PERF_SEL_NONE = 0x00000000, -CB_PERF_SEL_BUSY = 0x00000001, -CB_PERF_SEL_CORE_SCLK_VLD = 0x00000002, -CB_PERF_SEL_REG_SCLK0_VLD = 0x00000003, -CB_PERF_SEL_REG_SCLK1_VLD = 0x00000004, -CB_PERF_SEL_DRAWN_QUAD = 0x00000005, -CB_PERF_SEL_DRAWN_PIXEL = 0x00000006, -CB_PERF_SEL_DRAWN_QUAD_FRAGMENT = 0x00000007, -CB_PERF_SEL_DRAWN_TILE = 0x00000008, -CB_PERF_SEL_DB_CB_TILE_VALID_READY = 0x00000009, -CB_PERF_SEL_DB_CB_TILE_VALID_READYB = 0x0000000a, -CB_PERF_SEL_DB_CB_TILE_VALIDB_READY = 0x0000000b, -CB_PERF_SEL_DB_CB_TILE_VALIDB_READYB = 0x0000000c, -CB_PERF_SEL_CM_FC_TILE_VALID_READY = 0x0000000d, -CB_PERF_SEL_CM_FC_TILE_VALID_READYB = 0x0000000e, -CB_PERF_SEL_CM_FC_TILE_VALIDB_READY = 0x0000000f, -CB_PERF_SEL_CM_FC_TILE_VALIDB_READYB = 0x00000010, -CB_PERF_SEL_MERGE_TILE_ONLY_VALID_READY = 0x00000011, -CB_PERF_SEL_MERGE_TILE_ONLY_VALID_READYB = 0x00000012, -CB_PERF_SEL_DB_CB_LQUAD_VALID_READY = 0x00000013, -CB_PERF_SEL_DB_CB_LQUAD_VALID_READYB = 0x00000014, -CB_PERF_SEL_DB_CB_LQUAD_VALIDB_READY = 0x00000015, -CB_PERF_SEL_DB_CB_LQUAD_VALIDB_READYB = 0x00000016, -CB_PERF_SEL_LQUAD_NO_TILE = 0x00000017, -CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_R = 0x00000018, -CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_AR = 0x00000019, -CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_GR = 0x0000001a, -CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_32_ABGR = 0x0000001b, -CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_FP16_ABGR = 0x0000001c, -CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_SIGNED16_ABGR = 0x0000001d, -CB_PERF_SEL_LQUAD_FORMAT_IS_EXPORT_UNSIGNED16_ABGR = 0x0000001e, -CB_PERF_SEL_QUAD_KILLED_BY_EXTRA_PIXEL_EXPORT = 0x0000001f, -CB_PERF_SEL_QUAD_KILLED_BY_COLOR_INVALID = 0x00000020, -CB_PERF_SEL_QUAD_KILLED_BY_NULL_TARGET_SHADER_MASK = 0x00000021, -CB_PERF_SEL_QUAD_KILLED_BY_NULL_SAMPLE_MASK = 0x00000022, -CB_PERF_SEL_QUAD_KILLED_BY_DISCARD_PIXEL = 0x00000023, -CB_PERF_SEL_FC_CLEAR_QUAD_VALID_READY = 0x00000024, -CB_PERF_SEL_FC_CLEAR_QUAD_VALID_READYB = 0x00000025, -CB_PERF_SEL_FC_CLEAR_QUAD_VALIDB_READY = 0x00000026, -CB_PERF_SEL_FC_CLEAR_QUAD_VALIDB_READYB = 0x00000027, -CB_PERF_SEL_FOP_IN_VALID_READY = 0x00000028, -CB_PERF_SEL_FOP_IN_VALID_READYB = 0x00000029, -CB_PERF_SEL_FOP_IN_VALIDB_READY = 0x0000002a, -CB_PERF_SEL_FOP_IN_VALIDB_READYB = 0x0000002b, -CB_PERF_SEL_FC_CC_QUADFRAG_VALID_READY = 0x0000002c, -CB_PERF_SEL_FC_CC_QUADFRAG_VALID_READYB = 0x0000002d, -CB_PERF_SEL_FC_CC_QUADFRAG_VALIDB_READY = 0x0000002e, -CB_PERF_SEL_FC_CC_QUADFRAG_VALIDB_READYB = 0x0000002f, -CB_PERF_SEL_CC_IB_SR_FRAG_VALID_READY = 0x00000030, -CB_PERF_SEL_CC_IB_SR_FRAG_VALID_READYB = 0x00000031, -CB_PERF_SEL_CC_IB_SR_FRAG_VALIDB_READY = 0x00000032, -CB_PERF_SEL_CC_IB_SR_FRAG_VALIDB_READYB = 0x00000033, -CB_PERF_SEL_CC_IB_TB_FRAG_VALID_READY = 0x00000034, -CB_PERF_SEL_CC_IB_TB_FRAG_VALID_READYB = 0x00000035, -CB_PERF_SEL_CC_IB_TB_FRAG_VALIDB_READY = 0x00000036, -CB_PERF_SEL_CC_IB_TB_FRAG_VALIDB_READYB = 0x00000037, -CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALID_READY = 0x00000038, -CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALID_READYB = 0x00000039, -CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALIDB_READY = 0x0000003a, -CB_PERF_SEL_CC_RB_BC_EVENFRAG_VALIDB_READYB = 0x0000003b, -CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALID_READY = 0x0000003c, -CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALID_READYB = 0x0000003d, -CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALIDB_READY = 0x0000003e, -CB_PERF_SEL_CC_RB_BC_ODDFRAG_VALIDB_READYB = 0x0000003f, -CB_PERF_SEL_CC_BC_CS_FRAG_VALID = 0x00000040, -CB_PERF_SEL_CM_CACHE_HIT = 0x00000041, -CB_PERF_SEL_CM_CACHE_TAG_MISS = 0x00000042, -CB_PERF_SEL_CM_CACHE_SECTOR_MISS = 0x00000043, -CB_PERF_SEL_CM_CACHE_REEVICTION_STALL = 0x00000044, -CB_PERF_SEL_CM_CACHE_EVICT_NONZERO_INFLIGHT_STALL = 0x00000045, -CB_PERF_SEL_CM_CACHE_REPLACE_PENDING_EVICT_STALL = 0x00000046, -CB_PERF_SEL_CM_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL = 0x00000047, -CB_PERF_SEL_CM_CACHE_READ_OUTPUT_STALL = 0x00000048, -CB_PERF_SEL_CM_CACHE_WRITE_OUTPUT_STALL = 0x00000049, -CB_PERF_SEL_CM_CACHE_ACK_OUTPUT_STALL = 0x0000004a, -CB_PERF_SEL_CM_CACHE_STALL = 0x0000004b, -CB_PERF_SEL_CM_CACHE_FLUSH = 0x0000004c, -CB_PERF_SEL_CM_CACHE_TAGS_FLUSHED = 0x0000004d, -CB_PERF_SEL_CM_CACHE_SECTORS_FLUSHED = 0x0000004e, -CB_PERF_SEL_CM_CACHE_DIRTY_SECTORS_FLUSHED = 0x0000004f, -CB_PERF_SEL_FC_CACHE_HIT = 0x00000050, -CB_PERF_SEL_FC_CACHE_TAG_MISS = 0x00000051, -CB_PERF_SEL_FC_CACHE_SECTOR_MISS = 0x00000052, -CB_PERF_SEL_FC_CACHE_REEVICTION_STALL = 0x00000053, -CB_PERF_SEL_FC_CACHE_EVICT_NONZERO_INFLIGHT_STALL = 0x00000054, -CB_PERF_SEL_FC_CACHE_REPLACE_PENDING_EVICT_STALL = 0x00000055, -CB_PERF_SEL_FC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL = 0x00000056, -CB_PERF_SEL_FC_CACHE_READ_OUTPUT_STALL = 0x00000057, -CB_PERF_SEL_FC_CACHE_WRITE_OUTPUT_STALL = 0x00000058, -CB_PERF_SEL_FC_CACHE_ACK_OUTPUT_STALL = 0x00000059, -CB_PERF_SEL_FC_CACHE_STALL = 0x0000005a, -CB_PERF_SEL_FC_CACHE_FLUSH = 0x0000005b, -CB_PERF_SEL_FC_CACHE_TAGS_FLUSHED = 0x0000005c, -CB_PERF_SEL_FC_CACHE_SECTORS_FLUSHED = 0x0000005d, -CB_PERF_SEL_FC_CACHE_DIRTY_SECTORS_FLUSHED = 0x0000005e, -CB_PERF_SEL_CC_CACHE_HIT = 0x0000005f, -CB_PERF_SEL_CC_CACHE_TAG_MISS = 0x00000060, -CB_PERF_SEL_CC_CACHE_SECTOR_MISS = 0x00000061, -CB_PERF_SEL_CC_CACHE_REEVICTION_STALL = 0x00000062, -CB_PERF_SEL_CC_CACHE_EVICT_NONZERO_INFLIGHT_STALL = 0x00000063, -CB_PERF_SEL_CC_CACHE_REPLACE_PENDING_EVICT_STALL = 0x00000064, -CB_PERF_SEL_CC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL = 0x00000065, -CB_PERF_SEL_CC_CACHE_READ_OUTPUT_STALL = 0x00000066, -CB_PERF_SEL_CC_CACHE_WRITE_OUTPUT_STALL = 0x00000067, -CB_PERF_SEL_CC_CACHE_ACK_OUTPUT_STALL = 0x00000068, -CB_PERF_SEL_CC_CACHE_STALL = 0x00000069, -CB_PERF_SEL_CC_CACHE_FLUSH = 0x0000006a, -CB_PERF_SEL_CC_CACHE_TAGS_FLUSHED = 0x0000006b, -CB_PERF_SEL_CC_CACHE_SECTORS_FLUSHED = 0x0000006c, -CB_PERF_SEL_CC_CACHE_DIRTY_SECTORS_FLUSHED = 0x0000006d, -CB_PERF_SEL_CC_CACHE_WA_TO_RMW_CONVERSION = 0x0000006e, -CB_PERF_SEL_CC_CACHE_READS_SAVED_DUE_TO_DCC = 0x0000006f, -CB_PERF_SEL_CB_TAP_WRREQ_VALID_READY = 0x00000070, -CB_PERF_SEL_CB_TAP_WRREQ_VALID_READYB = 0x00000071, -CB_PERF_SEL_CB_TAP_WRREQ_VALIDB_READY = 0x00000072, -CB_PERF_SEL_CB_TAP_WRREQ_VALIDB_READYB = 0x00000073, -CB_PERF_SEL_CM_MC_WRITE_REQUEST = 0x00000074, -CB_PERF_SEL_FC_MC_WRITE_REQUEST = 0x00000075, -CB_PERF_SEL_CC_MC_WRITE_REQUEST = 0x00000076, -CB_PERF_SEL_CM_MC_WRITE_REQUESTS_IN_FLIGHT = 0x00000077, -CB_PERF_SEL_FC_MC_WRITE_REQUESTS_IN_FLIGHT = 0x00000078, -CB_PERF_SEL_CC_MC_WRITE_REQUESTS_IN_FLIGHT = 0x00000079, -CB_PERF_SEL_CB_TAP_RDREQ_VALID_READY = 0x0000007a, -CB_PERF_SEL_CB_TAP_RDREQ_VALID_READYB = 0x0000007b, -CB_PERF_SEL_CB_TAP_RDREQ_VALIDB_READY = 0x0000007c, -CB_PERF_SEL_CB_TAP_RDREQ_VALIDB_READYB = 0x0000007d, -CB_PERF_SEL_CM_MC_READ_REQUEST = 0x0000007e, -CB_PERF_SEL_FC_MC_READ_REQUEST = 0x0000007f, -CB_PERF_SEL_CC_MC_READ_REQUEST = 0x00000080, -CB_PERF_SEL_CM_MC_READ_REQUESTS_IN_FLIGHT = 0x00000081, -CB_PERF_SEL_FC_MC_READ_REQUESTS_IN_FLIGHT = 0x00000082, -CB_PERF_SEL_CC_MC_READ_REQUESTS_IN_FLIGHT = 0x00000083, -CB_PERF_SEL_CM_TQ_FULL = 0x00000084, -CB_PERF_SEL_CM_TQ_FIFO_TILE_RESIDENCY_STALL = 0x00000085, -CB_PERF_SEL_FC_QUAD_RDLAT_FIFO_FULL = 0x00000086, -CB_PERF_SEL_FC_TILE_RDLAT_FIFO_FULL = 0x00000087, -CB_PERF_SEL_FC_RDLAT_FIFO_QUAD_RESIDENCY_STALL = 0x00000088, -CB_PERF_SEL_FOP_FMASK_RAW_STALL = 0x00000089, -CB_PERF_SEL_FOP_FMASK_BYPASS_STALL = 0x0000008a, -CB_PERF_SEL_CC_SF_FULL = 0x0000008b, -CB_PERF_SEL_CC_RB_FULL = 0x0000008c, -CB_PERF_SEL_CC_EVENFIFO_QUAD_RESIDENCY_STALL = 0x0000008d, -CB_PERF_SEL_CC_ODDFIFO_QUAD_RESIDENCY_STALL = 0x0000008e, -CB_PERF_SEL_BLENDER_RAW_HAZARD_STALL = 0x0000008f, -CB_PERF_SEL_EVENT = 0x00000090, -CB_PERF_SEL_EVENT_CACHE_FLUSH_TS = 0x00000091, -CB_PERF_SEL_EVENT_CONTEXT_DONE = 0x00000092, -CB_PERF_SEL_EVENT_CACHE_FLUSH = 0x00000093, -CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000094, -CB_PERF_SEL_EVENT_CACHE_FLUSH_AND_INV_EVENT = 0x00000095, -CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_DATA_TS = 0x00000096, -CB_PERF_SEL_EVENT_FLUSH_AND_INV_CB_META = 0x00000097, -CB_PERF_SEL_CC_SURFACE_SYNC = 0x00000098, -CB_PERF_SEL_CMASK_READ_DATA_0xC = 0x00000099, -CB_PERF_SEL_CMASK_READ_DATA_0xD = 0x0000009a, -CB_PERF_SEL_CMASK_READ_DATA_0xE = 0x0000009b, -CB_PERF_SEL_CMASK_READ_DATA_0xF = 0x0000009c, -CB_PERF_SEL_CMASK_WRITE_DATA_0xC = 0x0000009d, -CB_PERF_SEL_CMASK_WRITE_DATA_0xD = 0x0000009e, -CB_PERF_SEL_CMASK_WRITE_DATA_0xE = 0x0000009f, -CB_PERF_SEL_CMASK_WRITE_DATA_0xF = 0x000000a0, -CB_PERF_SEL_TWO_PROBE_QUAD_FRAGMENT = 0x000000a1, -CB_PERF_SEL_EXPORT_32_ABGR_QUAD_FRAGMENT = 0x000000a2, -CB_PERF_SEL_DUAL_SOURCE_COLOR_QUAD_FRAGMENT = 0x000000a3, -CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_BEFORE_UPDATE = 0x000000a4, -CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_BEFORE_UPDATE = 0x000000a5, -CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_BEFORE_UPDATE = 0x000000a6, -CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_BEFORE_UPDATE = 0x000000a7, -CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_BEFORE_UPDATE = 0x000000a8, -CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_BEFORE_UPDATE = 0x000000a9, -CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_BEFORE_UPDATE = 0x000000aa, -CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_BEFORE_UPDATE = 0x000000ab, -CB_PERF_SEL_QUAD_HAS_1_FRAGMENT_AFTER_UPDATE = 0x000000ac, -CB_PERF_SEL_QUAD_HAS_2_FRAGMENTS_AFTER_UPDATE = 0x000000ad, -CB_PERF_SEL_QUAD_HAS_3_FRAGMENTS_AFTER_UPDATE = 0x000000ae, -CB_PERF_SEL_QUAD_HAS_4_FRAGMENTS_AFTER_UPDATE = 0x000000af, -CB_PERF_SEL_QUAD_HAS_5_FRAGMENTS_AFTER_UPDATE = 0x000000b0, -CB_PERF_SEL_QUAD_HAS_6_FRAGMENTS_AFTER_UPDATE = 0x000000b1, -CB_PERF_SEL_QUAD_HAS_7_FRAGMENTS_AFTER_UPDATE = 0x000000b2, -CB_PERF_SEL_QUAD_HAS_8_FRAGMENTS_AFTER_UPDATE = 0x000000b3, -CB_PERF_SEL_QUAD_ADDED_1_FRAGMENT = 0x000000b4, -CB_PERF_SEL_QUAD_ADDED_2_FRAGMENTS = 0x000000b5, -CB_PERF_SEL_QUAD_ADDED_3_FRAGMENTS = 0x000000b6, -CB_PERF_SEL_QUAD_ADDED_4_FRAGMENTS = 0x000000b7, -CB_PERF_SEL_QUAD_ADDED_5_FRAGMENTS = 0x000000b8, -CB_PERF_SEL_QUAD_ADDED_6_FRAGMENTS = 0x000000b9, -CB_PERF_SEL_QUAD_ADDED_7_FRAGMENTS = 0x000000ba, -CB_PERF_SEL_QUAD_REMOVED_1_FRAGMENT = 0x000000bb, -CB_PERF_SEL_QUAD_REMOVED_2_FRAGMENTS = 0x000000bc, -CB_PERF_SEL_QUAD_REMOVED_3_FRAGMENTS = 0x000000bd, -CB_PERF_SEL_QUAD_REMOVED_4_FRAGMENTS = 0x000000be, -CB_PERF_SEL_QUAD_REMOVED_5_FRAGMENTS = 0x000000bf, -CB_PERF_SEL_QUAD_REMOVED_6_FRAGMENTS = 0x000000c0, -CB_PERF_SEL_QUAD_REMOVED_7_FRAGMENTS = 0x000000c1, -CB_PERF_SEL_QUAD_READS_FRAGMENT_0 = 0x000000c2, -CB_PERF_SEL_QUAD_READS_FRAGMENT_1 = 0x000000c3, -CB_PERF_SEL_QUAD_READS_FRAGMENT_2 = 0x000000c4, -CB_PERF_SEL_QUAD_READS_FRAGMENT_3 = 0x000000c5, -CB_PERF_SEL_QUAD_READS_FRAGMENT_4 = 0x000000c6, -CB_PERF_SEL_QUAD_READS_FRAGMENT_5 = 0x000000c7, -CB_PERF_SEL_QUAD_READS_FRAGMENT_6 = 0x000000c8, -CB_PERF_SEL_QUAD_READS_FRAGMENT_7 = 0x000000c9, -CB_PERF_SEL_QUAD_WRITES_FRAGMENT_0 = 0x000000ca, -CB_PERF_SEL_QUAD_WRITES_FRAGMENT_1 = 0x000000cb, -CB_PERF_SEL_QUAD_WRITES_FRAGMENT_2 = 0x000000cc, -CB_PERF_SEL_QUAD_WRITES_FRAGMENT_3 = 0x000000cd, -CB_PERF_SEL_QUAD_WRITES_FRAGMENT_4 = 0x000000ce, -CB_PERF_SEL_QUAD_WRITES_FRAGMENT_5 = 0x000000cf, -CB_PERF_SEL_QUAD_WRITES_FRAGMENT_6 = 0x000000d0, -CB_PERF_SEL_QUAD_WRITES_FRAGMENT_7 = 0x000000d1, -CB_PERF_SEL_QUAD_BLEND_OPT_DONT_READ_DST = 0x000000d2, -CB_PERF_SEL_QUAD_BLEND_OPT_BLEND_BYPASS = 0x000000d3, -CB_PERF_SEL_QUAD_BLEND_OPT_DISCARD_PIXELS = 0x000000d4, -CB_PERF_SEL_QUAD_DST_READ_COULD_HAVE_BEEN_OPTIMIZED = 0x000000d5, -CB_PERF_SEL_QUAD_BLENDING_COULD_HAVE_BEEN_BYPASSED = 0x000000d6, -CB_PERF_SEL_QUAD_COULD_HAVE_BEEN_DISCARDED = 0x000000d7, -CB_PERF_SEL_BLEND_OPT_PIXELS_RESULT_EQ_DEST = 0x000000d8, -CB_PERF_SEL_DRAWN_BUSY = 0x000000d9, -CB_PERF_SEL_TILE_TO_CMR_REGION_BUSY = 0x000000da, -CB_PERF_SEL_CMR_TO_FCR_REGION_BUSY = 0x000000db, -CB_PERF_SEL_FCR_TO_CCR_REGION_BUSY = 0x000000dc, -CB_PERF_SEL_CCR_TO_CCW_REGION_BUSY = 0x000000dd, -CB_PERF_SEL_FC_PF_SLOW_MODE_QUAD_EMPTY_HALF_DROPPED = 0x000000de, -CB_PERF_SEL_FC_SEQUENCER_CLEAR = 0x000000df, -CB_PERF_SEL_FC_SEQUENCER_ELIMINATE_FAST_CLEAR = 0x000000e0, -CB_PERF_SEL_FC_SEQUENCER_FMASK_DECOMPRESS = 0x000000e1, -CB_PERF_SEL_FC_SEQUENCER_FMASK_COMPRESSION_DISABLE = 0x000000e2, -CB_PERF_SEL_FC_KEYID_RDLAT_FIFO_FULL = 0x000000e3, -CB_PERF_SEL_FC_DOC_IS_STALLED = 0x000000e4, -CB_PERF_SEL_FC_DOC_MRTS_NOT_COMBINED = 0x000000e5, -CB_PERF_SEL_FC_DOC_MRTS_COMBINED = 0x000000e6, -CB_PERF_SEL_FC_DOC_QTILE_CAM_MISS = 0x000000e7, -CB_PERF_SEL_FC_DOC_QTILE_CAM_HIT = 0x000000e8, -CB_PERF_SEL_FC_DOC_CLINE_CAM_MISS = 0x000000e9, -CB_PERF_SEL_FC_DOC_CLINE_CAM_HIT = 0x000000ea, -CB_PERF_SEL_FC_DOC_QUAD_PTR_FIFO_IS_FULL = 0x000000eb, -CB_PERF_SEL_FC_DOC_OVERWROTE_1_SECTOR = 0x000000ec, -CB_PERF_SEL_FC_DOC_OVERWROTE_2_SECTORS = 0x000000ed, -CB_PERF_SEL_FC_DOC_OVERWROTE_3_SECTORS = 0x000000ee, -CB_PERF_SEL_FC_DOC_OVERWROTE_4_SECTORS = 0x000000ef, -CB_PERF_SEL_FC_DOC_TOTAL_OVERWRITTEN_SECTORS = 0x000000f0, -CB_PERF_SEL_FC_DCC_CACHE_HIT = 0x000000f1, -CB_PERF_SEL_FC_DCC_CACHE_TAG_MISS = 0x000000f2, -CB_PERF_SEL_FC_DCC_CACHE_SECTOR_MISS = 0x000000f3, -CB_PERF_SEL_FC_DCC_CACHE_REEVICTION_STALL = 0x000000f4, -CB_PERF_SEL_FC_DCC_CACHE_EVICT_NONZERO_INFLIGHT_STALL = 0x000000f5, -CB_PERF_SEL_FC_DCC_CACHE_REPLACE_PENDING_EVICT_STALL = 0x000000f6, -CB_PERF_SEL_FC_DCC_CACHE_INFLIGHT_COUNTER_MAXIMUM_STALL = 0x000000f7, -CB_PERF_SEL_FC_DCC_CACHE_READ_OUTPUT_STALL = 0x000000f8, -CB_PERF_SEL_FC_DCC_CACHE_WRITE_OUTPUT_STALL = 0x000000f9, -CB_PERF_SEL_FC_DCC_CACHE_ACK_OUTPUT_STALL = 0x000000fa, -CB_PERF_SEL_FC_DCC_CACHE_STALL = 0x000000fb, -CB_PERF_SEL_FC_DCC_CACHE_FLUSH = 0x000000fc, -CB_PERF_SEL_FC_DCC_CACHE_TAGS_FLUSHED = 0x000000fd, -CB_PERF_SEL_FC_DCC_CACHE_SECTORS_FLUSHED = 0x000000fe, -CB_PERF_SEL_FC_DCC_CACHE_DIRTY_SECTORS_FLUSHED = 0x000000ff, -CB_PERF_SEL_CC_DCC_BEYOND_TILE_SPLIT = 0x00000100, -CB_PERF_SEL_FC_MC_DCC_WRITE_REQUEST = 0x00000101, -CB_PERF_SEL_FC_MC_DCC_WRITE_REQUESTS_IN_FLIGHT = 0x00000102, -CB_PERF_SEL_FC_MC_DCC_READ_REQUEST = 0x00000103, -CB_PERF_SEL_FC_MC_DCC_READ_REQUESTS_IN_FLIGHT = 0x00000104, -CB_PERF_SEL_CC_DCC_RDREQ_STALL = 0x00000105, -CB_PERF_SEL_CC_DCC_DECOMPRESS_TIDS_IN = 0x00000106, -CB_PERF_SEL_CC_DCC_DECOMPRESS_TIDS_OUT = 0x00000107, -CB_PERF_SEL_CC_DCC_COMPRESS_TIDS_IN = 0x00000108, -CB_PERF_SEL_CC_DCC_COMPRESS_TIDS_OUT = 0x00000109, -CB_PERF_SEL_FC_DCC_KEY_VALUE__CLEAR = 0x0000010a, -CB_PERF_SEL_CC_DCC_KEY_VALUE__4_BLOCKS__2TO1 = 0x0000010b, -CB_PERF_SEL_CC_DCC_KEY_VALUE__3BLOCKS_2TO1__1BLOCK_2TO2 = 0x0000010c, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_2TO2__1BLOCK_2TO1 = 0x0000010d, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__2BLOCKS_2TO1 = 0x0000010e, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__3BLOCKS_2TO1 = 0x0000010f, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__2BLOCKS_2TO2 = 0x00000110, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__2BLOCKS_2TO2__1BLOCK_2TO1 = 0x00000111, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_2TO2 = 0x00000112, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_2TO1 = 0x00000113, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__2BLOCKS_2TO1 = 0x00000114, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__2BLOCKS_2TO1__1BLOCK_2TO2 = 0x00000115, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__3BLOCKS_2TO2 = 0x00000116, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__2BLOCKS_2TO2 = 0x00000117, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_2TO1__1BLOCK_2TO2 = 0x00000118, -CB_PERF_SEL_CC_DCC_KEY_VALUE__3BLOCKS_2TO2__1BLOCK_2TO1 = 0x00000119, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_4TO1 = 0x0000011a, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_4TO2 = 0x0000011b, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_4TO3 = 0x0000011c, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_4TO4 = 0x0000011d, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_4TO1 = 0x0000011e, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_4TO2 = 0x0000011f, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_4TO3 = 0x00000120, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_4TO4 = 0x00000121, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_4TO1 = 0x00000122, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_4TO2 = 0x00000123, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_4TO3 = 0x00000124, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_4TO4 = 0x00000125, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_4TO1 = 0x00000126, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_4TO2 = 0x00000127, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_4TO3 = 0x00000128, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO1 = 0x00000129, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO2 = 0x0000012a, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO3 = 0x0000012b, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO1__1BLOCK_4TO4 = 0x0000012c, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO1 = 0x0000012d, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO2 = 0x0000012e, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO3 = 0x0000012f, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_2TO2__1BLOCK_4TO4 = 0x00000130, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO1 = 0x00000131, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO2 = 0x00000132, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO3 = 0x00000133, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_2TO1__1BLOCK_4TO4 = 0x00000134, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_4TO1 = 0x00000135, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_4TO2 = 0x00000136, -CB_PERF_SEL_CC_DCC_KEY_VALUE__2BLOCKS_2TO2__1BLOCK_4TO3 = 0x00000137, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO1__1BLOCK_2TO1 = 0x00000138, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO2__1BLOCK_2TO1 = 0x00000139, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO3__1BLOCK_2TO1 = 0x0000013a, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO4__1BLOCK_2TO1 = 0x0000013b, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO1__1BLOCK_2TO1 = 0x0000013c, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO2__1BLOCK_2TO1 = 0x0000013d, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO3__1BLOCK_2TO1 = 0x0000013e, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO4__1BLOCK_2TO1 = 0x0000013f, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO1__1BLOCK_2TO2 = 0x00000140, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO2__1BLOCK_2TO2 = 0x00000141, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO3__1BLOCK_2TO2 = 0x00000142, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_4TO4__1BLOCK_2TO2 = 0x00000143, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO1__1BLOCK_2TO2 = 0x00000144, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO2__1BLOCK_2TO2 = 0x00000145, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_4TO3__1BLOCK_2TO2 = 0x00000146, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__2BLOCKS_2TO1 = 0x00000147, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__2BLOCKS_2TO1 = 0x00000148, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__2BLOCKS_2TO1 = 0x00000149, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__2BLOCKS_2TO1 = 0x0000014a, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__2BLOCKS_2TO2 = 0x0000014b, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__2BLOCKS_2TO2 = 0x0000014c, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__2BLOCKS_2TO2 = 0x0000014d, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_2TO1__1BLOCK_2TO2 = 0x0000014e, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_2TO1__1BLOCK_2TO2 = 0x0000014f, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_2TO1__1BLOCK_2TO2 = 0x00000150, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_2TO1__1BLOCK_2TO2 = 0x00000151, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO1__1BLOCK_2TO2__1BLOCK_2TO1 = 0x00000152, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO2__1BLOCK_2TO2__1BLOCK_2TO1 = 0x00000153, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO3__1BLOCK_2TO2__1BLOCK_2TO1 = 0x00000154, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_4TO4__1BLOCK_2TO2__1BLOCK_2TO1 = 0x00000155, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO1 = 0x00000156, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO2 = 0x00000157, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO3 = 0x00000158, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO4 = 0x00000159, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO5 = 0x0000015a, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__1BLOCK_6TO6 = 0x0000015b, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__INV0 = 0x0000015c, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO1__INV1 = 0x0000015d, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO1 = 0x0000015e, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO2 = 0x0000015f, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO3 = 0x00000160, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO4 = 0x00000161, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__1BLOCK_6TO5 = 0x00000162, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__INV0 = 0x00000163, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_2TO2__INV1 = 0x00000164, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO1__1BLOCK_2TO1 = 0x00000165, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO2__1BLOCK_2TO1 = 0x00000166, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO3__1BLOCK_2TO1 = 0x00000167, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO4__1BLOCK_2TO1 = 0x00000168, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO5__1BLOCK_2TO1 = 0x00000169, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO6__1BLOCK_2TO1 = 0x0000016a, -CB_PERF_SEL_CC_DCC_KEY_VALUE__INV0__1BLOCK_2TO1 = 0x0000016b, -CB_PERF_SEL_CC_DCC_KEY_VALUE__INV1__1BLOCK_2TO1 = 0x0000016c, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO1__1BLOCK_2TO2 = 0x0000016d, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO2__1BLOCK_2TO2 = 0x0000016e, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO3__1BLOCK_2TO2 = 0x0000016f, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO4__1BLOCK_2TO2 = 0x00000170, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_6TO5__1BLOCK_2TO2 = 0x00000171, -CB_PERF_SEL_CC_DCC_KEY_VALUE__INV0__1BLOCK_2TO2 = 0x00000172, -CB_PERF_SEL_CC_DCC_KEY_VALUE__INV1__1BLOCK_2TO2 = 0x00000173, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO1 = 0x00000174, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO2 = 0x00000175, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO3 = 0x00000176, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO4 = 0x00000177, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO5 = 0x00000178, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO6 = 0x00000179, -CB_PERF_SEL_CC_DCC_KEY_VALUE__1BLOCK_8TO7 = 0x0000017a, -CB_PERF_SEL_CC_DCC_KEY_VALUE__UNCOMPRESSED = 0x0000017b, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_2TO1 = 0x0000017c, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO1 = 0x0000017d, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO2 = 0x0000017e, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_4TO3 = 0x0000017f, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO1 = 0x00000180, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO2 = 0x00000181, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO3 = 0x00000182, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO4 = 0x00000183, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_6TO5 = 0x00000184, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO1 = 0x00000185, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO2 = 0x00000186, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO3 = 0x00000187, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO4 = 0x00000188, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO5 = 0x00000189, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO6 = 0x0000018a, -CB_PERF_SEL_CC_DCC_COMPRESS_RATIO_8TO7 = 0x0000018b, -CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_BOTH = 0x0000018c, -CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_LEFT = 0x0000018d, -CB_PERF_SEL_RBP_EXPORT_8PIX_LIT_RIGHT = 0x0000018e, -CB_PERF_SEL_RBP_SPLIT_MICROTILE = 0x0000018f, -CB_PERF_SEL_RBP_SPLIT_AA_SAMPLE_MASK = 0x00000190, -CB_PERF_SEL_RBP_SPLIT_PARTIAL_TARGET_MASK = 0x00000191, -CB_PERF_SEL_RBP_SPLIT_LINEAR_ADDRESSING = 0x00000192, -CB_PERF_SEL_RBP_SPLIT_AA_NO_FMASK_COMPRESS = 0x00000193, -CB_PERF_SEL_RBP_INSERT_MISSING_LAST_QUAD = 0x00000194, -} CBPerfSel; - -/* - * CBPerfOpFilterSel enum - */ - -typedef enum CBPerfOpFilterSel { -CB_PERF_OP_FILTER_SEL_WRITE_ONLY = 0x00000000, -CB_PERF_OP_FILTER_SEL_NEEDS_DESTINATION = 0x00000001, -CB_PERF_OP_FILTER_SEL_RESOLVE = 0x00000002, -CB_PERF_OP_FILTER_SEL_DECOMPRESS = 0x00000003, -CB_PERF_OP_FILTER_SEL_FMASK_DECOMPRESS = 0x00000004, -CB_PERF_OP_FILTER_SEL_ELIMINATE_FAST_CLEAR = 0x00000005, -} CBPerfOpFilterSel; - -/* - * CBPerfClearFilterSel enum - */ - -typedef enum CBPerfClearFilterSel { -CB_PERF_CLEAR_FILTER_SEL_NONCLEAR = 0x00000000, -CB_PERF_CLEAR_FILTER_SEL_CLEAR = 0x00000001, -} CBPerfClearFilterSel; - -/******************************************************* - * TC Enums - *******************************************************/ - -/* - * TC_OP_MASKS enum - */ - -typedef enum TC_OP_MASKS { -TC_OP_MASK_FLUSH_DENROM = 0x00000008, -TC_OP_MASK_64 = 0x00000020, -TC_OP_MASK_NO_RTN = 0x00000040, -} TC_OP_MASKS; - -/* - * TC_OP enum - */ - -typedef enum TC_OP { -TC_OP_READ = 0x00000000, -TC_OP_ATOMIC_FCMPSWAP_RTN_32 = 0x00000001, -TC_OP_ATOMIC_FMIN_RTN_32 = 0x00000002, -TC_OP_ATOMIC_FMAX_RTN_32 = 0x00000003, -TC_OP_RESERVED_FOP_RTN_32_0 = 0x00000004, -TC_OP_RESERVED_FOP_RTN_32_1 = 0x00000005, -TC_OP_RESERVED_FOP_RTN_32_2 = 0x00000006, -TC_OP_ATOMIC_SWAP_RTN_32 = 0x00000007, -TC_OP_ATOMIC_CMPSWAP_RTN_32 = 0x00000008, -TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_32 = 0x00000009, -TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_32 = 0x0000000a, -TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_32 = 0x0000000b, -TC_OP_PROBE_FILTER = 0x0000000c, -TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_1 = 0x0000000d, -TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_32_2 = 0x0000000e, -TC_OP_ATOMIC_ADD_RTN_32 = 0x0000000f, -TC_OP_ATOMIC_SUB_RTN_32 = 0x00000010, -TC_OP_ATOMIC_SMIN_RTN_32 = 0x00000011, -TC_OP_ATOMIC_UMIN_RTN_32 = 0x00000012, -TC_OP_ATOMIC_SMAX_RTN_32 = 0x00000013, -TC_OP_ATOMIC_UMAX_RTN_32 = 0x00000014, -TC_OP_ATOMIC_AND_RTN_32 = 0x00000015, -TC_OP_ATOMIC_OR_RTN_32 = 0x00000016, -TC_OP_ATOMIC_XOR_RTN_32 = 0x00000017, -TC_OP_ATOMIC_INC_RTN_32 = 0x00000018, -TC_OP_ATOMIC_DEC_RTN_32 = 0x00000019, -TC_OP_WBINVL1_VOL = 0x0000001a, -TC_OP_WBINVL1_SD = 0x0000001b, -TC_OP_RESERVED_NON_FLOAT_RTN_32_0 = 0x0000001c, -TC_OP_RESERVED_NON_FLOAT_RTN_32_1 = 0x0000001d, -TC_OP_RESERVED_NON_FLOAT_RTN_32_2 = 0x0000001e, -TC_OP_RESERVED_NON_FLOAT_RTN_32_3 = 0x0000001f, -TC_OP_WRITE = 0x00000020, -TC_OP_ATOMIC_FCMPSWAP_RTN_64 = 0x00000021, -TC_OP_ATOMIC_FMIN_RTN_64 = 0x00000022, -TC_OP_ATOMIC_FMAX_RTN_64 = 0x00000023, -TC_OP_RESERVED_FOP_RTN_64_0 = 0x00000024, -TC_OP_RESERVED_FOP_RTN_64_1 = 0x00000025, -TC_OP_RESERVED_FOP_RTN_64_2 = 0x00000026, -TC_OP_ATOMIC_SWAP_RTN_64 = 0x00000027, -TC_OP_ATOMIC_CMPSWAP_RTN_64 = 0x00000028, -TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_RTN_64 = 0x00000029, -TC_OP_ATOMIC_FMIN_FLUSH_DENORM_RTN_64 = 0x0000002a, -TC_OP_ATOMIC_FMAX_FLUSH_DENORM_RTN_64 = 0x0000002b, -TC_OP_WBINVL2_SD = 0x0000002c, -TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_0 = 0x0000002d, -TC_OP_RESERVED_FOP_FLUSH_DENORM_RTN_64_1 = 0x0000002e, -TC_OP_ATOMIC_ADD_RTN_64 = 0x0000002f, -TC_OP_ATOMIC_SUB_RTN_64 = 0x00000030, -TC_OP_ATOMIC_SMIN_RTN_64 = 0x00000031, -TC_OP_ATOMIC_UMIN_RTN_64 = 0x00000032, -TC_OP_ATOMIC_SMAX_RTN_64 = 0x00000033, -TC_OP_ATOMIC_UMAX_RTN_64 = 0x00000034, -TC_OP_ATOMIC_AND_RTN_64 = 0x00000035, -TC_OP_ATOMIC_OR_RTN_64 = 0x00000036, -TC_OP_ATOMIC_XOR_RTN_64 = 0x00000037, -TC_OP_ATOMIC_INC_RTN_64 = 0x00000038, -TC_OP_ATOMIC_DEC_RTN_64 = 0x00000039, -TC_OP_WBL2_NC = 0x0000003a, -TC_OP_WBL2_WC = 0x0000003b, -TC_OP_RESERVED_NON_FLOAT_RTN_64_1 = 0x0000003c, -TC_OP_RESERVED_NON_FLOAT_RTN_64_2 = 0x0000003d, -TC_OP_RESERVED_NON_FLOAT_RTN_64_3 = 0x0000003e, -TC_OP_RESERVED_NON_FLOAT_RTN_64_4 = 0x0000003f, -TC_OP_WBINVL1 = 0x00000040, -TC_OP_ATOMIC_FCMPSWAP_32 = 0x00000041, -TC_OP_ATOMIC_FMIN_32 = 0x00000042, -TC_OP_ATOMIC_FMAX_32 = 0x00000043, -TC_OP_RESERVED_FOP_32_0 = 0x00000044, -TC_OP_RESERVED_FOP_32_1 = 0x00000045, -TC_OP_RESERVED_FOP_32_2 = 0x00000046, -TC_OP_ATOMIC_SWAP_32 = 0x00000047, -TC_OP_ATOMIC_CMPSWAP_32 = 0x00000048, -TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_32 = 0x00000049, -TC_OP_ATOMIC_FMIN_FLUSH_DENORM_32 = 0x0000004a, -TC_OP_ATOMIC_FMAX_FLUSH_DENORM_32 = 0x0000004b, -TC_OP_INV_METADATA = 0x0000004c, -TC_OP_RESERVED_FOP_FLUSH_DENORM_32_1 = 0x0000004d, -TC_OP_RESERVED_FOP_FLUSH_DENORM_32_2 = 0x0000004e, -TC_OP_ATOMIC_ADD_32 = 0x0000004f, -TC_OP_ATOMIC_SUB_32 = 0x00000050, -TC_OP_ATOMIC_SMIN_32 = 0x00000051, -TC_OP_ATOMIC_UMIN_32 = 0x00000052, -TC_OP_ATOMIC_SMAX_32 = 0x00000053, -TC_OP_ATOMIC_UMAX_32 = 0x00000054, -TC_OP_ATOMIC_AND_32 = 0x00000055, -TC_OP_ATOMIC_OR_32 = 0x00000056, -TC_OP_ATOMIC_XOR_32 = 0x00000057, -TC_OP_ATOMIC_INC_32 = 0x00000058, -TC_OP_ATOMIC_DEC_32 = 0x00000059, -TC_OP_INVL2_NC = 0x0000005a, -TC_OP_NOP_RTN0 = 0x0000005b, -TC_OP_RESERVED_NON_FLOAT_32_1 = 0x0000005c, -TC_OP_RESERVED_NON_FLOAT_32_2 = 0x0000005d, -TC_OP_RESERVED_NON_FLOAT_32_3 = 0x0000005e, -TC_OP_RESERVED_NON_FLOAT_32_4 = 0x0000005f, -TC_OP_WBINVL2 = 0x00000060, -TC_OP_ATOMIC_FCMPSWAP_64 = 0x00000061, -TC_OP_ATOMIC_FMIN_64 = 0x00000062, -TC_OP_ATOMIC_FMAX_64 = 0x00000063, -TC_OP_RESERVED_FOP_64_0 = 0x00000064, -TC_OP_RESERVED_FOP_64_1 = 0x00000065, -TC_OP_RESERVED_FOP_64_2 = 0x00000066, -TC_OP_ATOMIC_SWAP_64 = 0x00000067, -TC_OP_ATOMIC_CMPSWAP_64 = 0x00000068, -TC_OP_ATOMIC_FCMPSWAP_FLUSH_DENORM_64 = 0x00000069, -TC_OP_ATOMIC_FMIN_FLUSH_DENORM_64 = 0x0000006a, -TC_OP_ATOMIC_FMAX_FLUSH_DENORM_64 = 0x0000006b, -TC_OP_RESERVED_FOP_FLUSH_DENORM_64_0 = 0x0000006c, -TC_OP_RESERVED_FOP_FLUSH_DENORM_64_1 = 0x0000006d, -TC_OP_RESERVED_FOP_FLUSH_DENORM_64_2 = 0x0000006e, -TC_OP_ATOMIC_ADD_64 = 0x0000006f, -TC_OP_ATOMIC_SUB_64 = 0x00000070, -TC_OP_ATOMIC_SMIN_64 = 0x00000071, -TC_OP_ATOMIC_UMIN_64 = 0x00000072, -TC_OP_ATOMIC_SMAX_64 = 0x00000073, -TC_OP_ATOMIC_UMAX_64 = 0x00000074, -TC_OP_ATOMIC_AND_64 = 0x00000075, -TC_OP_ATOMIC_OR_64 = 0x00000076, -TC_OP_ATOMIC_XOR_64 = 0x00000077, -TC_OP_ATOMIC_INC_64 = 0x00000078, -TC_OP_ATOMIC_DEC_64 = 0x00000079, -TC_OP_WBINVL2_NC = 0x0000007a, -TC_OP_NOP_ACK = 0x0000007b, -TC_OP_RESERVED_NON_FLOAT_64_1 = 0x0000007c, -TC_OP_RESERVED_NON_FLOAT_64_2 = 0x0000007d, -TC_OP_RESERVED_NON_FLOAT_64_3 = 0x0000007e, -TC_OP_RESERVED_NON_FLOAT_64_4 = 0x0000007f, -} TC_OP; - -/* - * TC_CHUB_REQ_CREDITS_ENUM enum - */ - -typedef enum TC_CHUB_REQ_CREDITS_ENUM { -TC_CHUB_REQ_CREDITS = 0x00000010, -} TC_CHUB_REQ_CREDITS_ENUM; - -/* - * CHUB_TC_RET_CREDITS_ENUM enum - */ - -typedef enum CHUB_TC_RET_CREDITS_ENUM { -CHUB_TC_RET_CREDITS = 0x00000020, -} CHUB_TC_RET_CREDITS_ENUM; - -/* - * TC_NACKS enum - */ - -typedef enum TC_NACKS { -TC_NACK_NO_FAULT = 0x00000000, -TC_NACK_PAGE_FAULT = 0x00000001, -TC_NACK_PROTECTION_FAULT = 0x00000002, -TC_NACK_DATA_ERROR = 0x00000003, -} TC_NACKS; - -/* - * TC_EA_CID enum - */ - -typedef enum TC_EA_CID { -TC_EA_CID_RT = 0x00000000, -TC_EA_CID_FMASK = 0x00000001, -TC_EA_CID_DCC = 0x00000002, -TC_EA_CID_TCPMETA = 0x00000003, -TC_EA_CID_Z = 0x00000004, -TC_EA_CID_STENCIL = 0x00000005, -TC_EA_CID_HTILE = 0x00000006, -TC_EA_CID_MISC = 0x00000007, -TC_EA_CID_TCP = 0x00000008, -TC_EA_CID_SQC = 0x00000009, -TC_EA_CID_CPF = 0x0000000a, -TC_EA_CID_CPG = 0x0000000b, -TC_EA_CID_IA = 0x0000000c, -TC_EA_CID_WD = 0x0000000d, -TC_EA_CID_PA = 0x0000000e, -TC_EA_CID_UTCL2_TPI = 0x0000000f, -} TC_EA_CID; - -/******************************************************* - * GC_CAC Enums - *******************************************************/ - -/******************************************************* - * RLC Enums - *******************************************************/ - -/******************************************************* - * SPI Enums - *******************************************************/ - -/* - * SPI_SAMPLE_CNTL enum - */ - -typedef enum SPI_SAMPLE_CNTL { -CENTROIDS_ONLY = 0x00000000, -CENTERS_ONLY = 0x00000001, -CENTROIDS_AND_CENTERS = 0x00000002, -UNDEF = 0x00000003, -} SPI_SAMPLE_CNTL; - -/* - * SPI_FOG_MODE enum - */ - -typedef enum SPI_FOG_MODE { -SPI_FOG_NONE = 0x00000000, -SPI_FOG_EXP = 0x00000001, -SPI_FOG_EXP2 = 0x00000002, -SPI_FOG_LINEAR = 0x00000003, -} SPI_FOG_MODE; - -/* - * SPI_PNT_SPRITE_OVERRIDE enum - */ - -typedef enum SPI_PNT_SPRITE_OVERRIDE { -SPI_PNT_SPRITE_SEL_0 = 0x00000000, -SPI_PNT_SPRITE_SEL_1 = 0x00000001, -SPI_PNT_SPRITE_SEL_S = 0x00000002, -SPI_PNT_SPRITE_SEL_T = 0x00000003, -SPI_PNT_SPRITE_SEL_NONE = 0x00000004, -} SPI_PNT_SPRITE_OVERRIDE; - -/* - * SPI_PERFCNT_SEL enum - */ - -typedef enum SPI_PERFCNT_SEL { -SPI_PERF_VS_WINDOW_VALID = 0x00000000, -SPI_PERF_VS_BUSY = 0x00000001, -SPI_PERF_VS_FIRST_WAVE = 0x00000002, -SPI_PERF_VS_LAST_WAVE = 0x00000003, -SPI_PERF_VS_LSHS_DEALLOC = 0x00000004, -SPI_PERF_VS_PC_STALL = 0x00000005, -SPI_PERF_VS_POS0_STALL = 0x00000006, -SPI_PERF_VS_POS1_STALL = 0x00000007, -SPI_PERF_VS_CRAWLER_STALL = 0x00000008, -SPI_PERF_VS_EVENT_WAVE = 0x00000009, -SPI_PERF_VS_WAVE = 0x0000000a, -SPI_PERF_VS_PERS_UPD_FULL0 = 0x0000000b, -SPI_PERF_VS_PERS_UPD_FULL1 = 0x0000000c, -SPI_PERF_VS_LATE_ALLOC_FULL = 0x0000000d, -SPI_PERF_VS_FIRST_SUBGRP = 0x0000000e, -SPI_PERF_VS_LAST_SUBGRP = 0x0000000f, -SPI_PERF_GS_WINDOW_VALID = 0x00000010, -SPI_PERF_GS_BUSY = 0x00000011, -SPI_PERF_GS_CRAWLER_STALL = 0x00000012, -SPI_PERF_GS_EVENT_WAVE = 0x00000013, -SPI_PERF_GS_WAVE = 0x00000014, -SPI_PERF_GS_PERS_UPD_FULL0 = 0x00000015, -SPI_PERF_GS_PERS_UPD_FULL1 = 0x00000016, -SPI_PERF_GS_FIRST_SUBGRP = 0x00000017, -SPI_PERF_GS_LAST_SUBGRP = 0x00000018, -SPI_PERF_ES_WINDOW_VALID = 0x00000019, -SPI_PERF_ES_BUSY = 0x0000001a, -SPI_PERF_ES_CRAWLER_STALL = 0x0000001b, -SPI_PERF_ES_FIRST_WAVE = 0x0000001c, -SPI_PERF_ES_LAST_WAVE = 0x0000001d, -SPI_PERF_ES_LSHS_DEALLOC = 0x0000001e, -SPI_PERF_ES_EVENT_WAVE = 0x0000001f, -SPI_PERF_ES_WAVE = 0x00000020, -SPI_PERF_ES_PERS_UPD_FULL0 = 0x00000021, -SPI_PERF_ES_PERS_UPD_FULL1 = 0x00000022, -SPI_PERF_ES_FIRST_SUBGRP = 0x00000023, -SPI_PERF_ES_LAST_SUBGRP = 0x00000024, -SPI_PERF_HS_WINDOW_VALID = 0x00000025, -SPI_PERF_HS_BUSY = 0x00000026, -SPI_PERF_HS_CRAWLER_STALL = 0x00000027, -SPI_PERF_HS_FIRST_WAVE = 0x00000028, -SPI_PERF_HS_LAST_WAVE = 0x00000029, -SPI_PERF_HS_LSHS_DEALLOC = 0x0000002a, -SPI_PERF_HS_EVENT_WAVE = 0x0000002b, -SPI_PERF_HS_WAVE = 0x0000002c, -SPI_PERF_HS_PERS_UPD_FULL0 = 0x0000002d, -SPI_PERF_HS_PERS_UPD_FULL1 = 0x0000002e, -SPI_PERF_LS_WINDOW_VALID = 0x0000002f, -SPI_PERF_LS_BUSY = 0x00000030, -SPI_PERF_LS_CRAWLER_STALL = 0x00000031, -SPI_PERF_LS_FIRST_WAVE = 0x00000032, -SPI_PERF_LS_LAST_WAVE = 0x00000033, -SPI_PERF_OFFCHIP_LDS_STALL_LS = 0x00000034, -SPI_PERF_LS_EVENT_WAVE = 0x00000035, -SPI_PERF_LS_WAVE = 0x00000036, -SPI_PERF_LS_PERS_UPD_FULL0 = 0x00000037, -SPI_PERF_LS_PERS_UPD_FULL1 = 0x00000038, -SPI_PERF_CSG_WINDOW_VALID = 0x00000039, -SPI_PERF_CSG_BUSY = 0x0000003a, -SPI_PERF_CSG_NUM_THREADGROUPS = 0x0000003b, -SPI_PERF_CSG_CRAWLER_STALL = 0x0000003c, -SPI_PERF_CSG_EVENT_WAVE = 0x0000003d, -SPI_PERF_CSG_WAVE = 0x0000003e, -SPI_PERF_CSN_WINDOW_VALID = 0x0000003f, -SPI_PERF_CSN_BUSY = 0x00000040, -SPI_PERF_CSN_NUM_THREADGROUPS = 0x00000041, -SPI_PERF_CSN_CRAWLER_STALL = 0x00000042, -SPI_PERF_CSN_EVENT_WAVE = 0x00000043, -SPI_PERF_CSN_WAVE = 0x00000044, -SPI_PERF_PS_CTL_WINDOW_VALID = 0x00000045, -SPI_PERF_PS_CTL_BUSY = 0x00000046, -SPI_PERF_PS_CTL_ACTIVE = 0x00000047, -SPI_PERF_PS_CTL_DEALLOC_BIN0 = 0x00000048, -SPI_PERF_PS_CTL_FPOS_BIN1_STALL = 0x00000049, -SPI_PERF_PS_CTL_EVENT_WAVE = 0x0000004a, -SPI_PERF_PS_CTL_WAVE = 0x0000004b, -SPI_PERF_PS_CTL_OPT_WAVE = 0x0000004c, -SPI_PERF_PS_CTL_PASS_BIN0 = 0x0000004d, -SPI_PERF_PS_CTL_PASS_BIN1 = 0x0000004e, -SPI_PERF_PS_CTL_FPOS_BIN2 = 0x0000004f, -SPI_PERF_PS_CTL_PRIM_BIN0 = 0x00000050, -SPI_PERF_PS_CTL_PRIM_BIN1 = 0x00000051, -SPI_PERF_PS_CTL_CNF_BIN2 = 0x00000052, -SPI_PERF_PS_CTL_CNF_BIN3 = 0x00000053, -SPI_PERF_PS_CTL_CRAWLER_STALL = 0x00000054, -SPI_PERF_PS_CTL_LDS_RES_FULL = 0x00000055, -SPI_PERF_PS_PERS_UPD_FULL0 = 0x00000056, -SPI_PERF_PS_PERS_UPD_FULL1 = 0x00000057, -SPI_PERF_PIX_ALLOC_PEND_CNT = 0x00000058, -SPI_PERF_PIX_ALLOC_SCB_STALL = 0x00000059, -SPI_PERF_PIX_ALLOC_DB0_STALL = 0x0000005a, -SPI_PERF_PIX_ALLOC_DB1_STALL = 0x0000005b, -SPI_PERF_PIX_ALLOC_DB2_STALL = 0x0000005c, -SPI_PERF_PIX_ALLOC_DB3_STALL = 0x0000005d, -SPI_PERF_LDS0_PC_VALID = 0x0000005e, -SPI_PERF_LDS1_PC_VALID = 0x0000005f, -SPI_PERF_RA_PIPE_REQ_BIN2 = 0x00000060, -SPI_PERF_RA_TASK_REQ_BIN3 = 0x00000061, -SPI_PERF_RA_WR_CTL_FULL = 0x00000062, -SPI_PERF_RA_REQ_NO_ALLOC = 0x00000063, -SPI_PERF_RA_REQ_NO_ALLOC_PS = 0x00000064, -SPI_PERF_RA_REQ_NO_ALLOC_VS = 0x00000065, -SPI_PERF_RA_REQ_NO_ALLOC_GS = 0x00000066, -SPI_PERF_RA_REQ_NO_ALLOC_ES = 0x00000067, -SPI_PERF_RA_REQ_NO_ALLOC_HS = 0x00000068, -SPI_PERF_RA_REQ_NO_ALLOC_LS = 0x00000069, -SPI_PERF_RA_REQ_NO_ALLOC_CSG = 0x0000006a, -SPI_PERF_RA_REQ_NO_ALLOC_CSN = 0x0000006b, -SPI_PERF_RA_RES_STALL_PS = 0x0000006c, -SPI_PERF_RA_RES_STALL_VS = 0x0000006d, -SPI_PERF_RA_RES_STALL_GS = 0x0000006e, -SPI_PERF_RA_RES_STALL_ES = 0x0000006f, -SPI_PERF_RA_RES_STALL_HS = 0x00000070, -SPI_PERF_RA_RES_STALL_LS = 0x00000071, -SPI_PERF_RA_RES_STALL_CSG = 0x00000072, -SPI_PERF_RA_RES_STALL_CSN = 0x00000073, -SPI_PERF_RA_TMP_STALL_PS = 0x00000074, -SPI_PERF_RA_TMP_STALL_VS = 0x00000075, -SPI_PERF_RA_TMP_STALL_GS = 0x00000076, -SPI_PERF_RA_TMP_STALL_ES = 0x00000077, -SPI_PERF_RA_TMP_STALL_HS = 0x00000078, -SPI_PERF_RA_TMP_STALL_LS = 0x00000079, -SPI_PERF_RA_TMP_STALL_CSG = 0x0000007a, -SPI_PERF_RA_TMP_STALL_CSN = 0x0000007b, -SPI_PERF_RA_WAVE_SIMD_FULL_PS = 0x0000007c, -SPI_PERF_RA_WAVE_SIMD_FULL_VS = 0x0000007d, -SPI_PERF_RA_WAVE_SIMD_FULL_GS = 0x0000007e, -SPI_PERF_RA_WAVE_SIMD_FULL_ES = 0x0000007f, -SPI_PERF_RA_WAVE_SIMD_FULL_HS = 0x00000080, -SPI_PERF_RA_WAVE_SIMD_FULL_LS = 0x00000081, -SPI_PERF_RA_WAVE_SIMD_FULL_CSG = 0x00000082, -SPI_PERF_RA_WAVE_SIMD_FULL_CSN = 0x00000083, -SPI_PERF_RA_VGPR_SIMD_FULL_PS = 0x00000084, -SPI_PERF_RA_VGPR_SIMD_FULL_VS = 0x00000085, -SPI_PERF_RA_VGPR_SIMD_FULL_GS = 0x00000086, -SPI_PERF_RA_VGPR_SIMD_FULL_ES = 0x00000087, -SPI_PERF_RA_VGPR_SIMD_FULL_HS = 0x00000088, -SPI_PERF_RA_VGPR_SIMD_FULL_LS = 0x00000089, -SPI_PERF_RA_VGPR_SIMD_FULL_CSG = 0x0000008a, -SPI_PERF_RA_VGPR_SIMD_FULL_CSN = 0x0000008b, -SPI_PERF_RA_SGPR_SIMD_FULL_PS = 0x0000008c, -SPI_PERF_RA_SGPR_SIMD_FULL_VS = 0x0000008d, -SPI_PERF_RA_SGPR_SIMD_FULL_GS = 0x0000008e, -SPI_PERF_RA_SGPR_SIMD_FULL_ES = 0x0000008f, -SPI_PERF_RA_SGPR_SIMD_FULL_HS = 0x00000090, -SPI_PERF_RA_SGPR_SIMD_FULL_LS = 0x00000091, -SPI_PERF_RA_SGPR_SIMD_FULL_CSG = 0x00000092, -SPI_PERF_RA_SGPR_SIMD_FULL_CSN = 0x00000093, -SPI_PERF_RA_LDS_CU_FULL_PS = 0x00000094, -SPI_PERF_RA_LDS_CU_FULL_LS = 0x00000095, -SPI_PERF_RA_LDS_CU_FULL_ES = 0x00000096, -SPI_PERF_RA_LDS_CU_FULL_CSG = 0x00000097, -SPI_PERF_RA_LDS_CU_FULL_CSN = 0x00000098, -SPI_PERF_RA_BAR_CU_FULL_HS = 0x00000099, -SPI_PERF_RA_BAR_CU_FULL_CSG = 0x0000009a, -SPI_PERF_RA_BAR_CU_FULL_CSN = 0x0000009b, -SPI_PERF_RA_BULKY_CU_FULL_CSG = 0x0000009c, -SPI_PERF_RA_BULKY_CU_FULL_CSN = 0x0000009d, -SPI_PERF_RA_TGLIM_CU_FULL_CSG = 0x0000009e, -SPI_PERF_RA_TGLIM_CU_FULL_CSN = 0x0000009f, -SPI_PERF_RA_WVLIM_STALL_PS = 0x000000a0, -SPI_PERF_RA_WVLIM_STALL_VS = 0x000000a1, -SPI_PERF_RA_WVLIM_STALL_GS = 0x000000a2, -SPI_PERF_RA_WVLIM_STALL_ES = 0x000000a3, -SPI_PERF_RA_WVLIM_STALL_HS = 0x000000a4, -SPI_PERF_RA_WVLIM_STALL_LS = 0x000000a5, -SPI_PERF_RA_WVLIM_STALL_CSG = 0x000000a6, -SPI_PERF_RA_WVLIM_STALL_CSN = 0x000000a7, -SPI_PERF_RA_PS_LOCK_NA = 0x000000a8, -SPI_PERF_RA_VS_LOCK = 0x000000a9, -SPI_PERF_RA_GS_LOCK = 0x000000aa, -SPI_PERF_RA_ES_LOCK = 0x000000ab, -SPI_PERF_RA_HS_LOCK = 0x000000ac, -SPI_PERF_RA_LS_LOCK = 0x000000ad, -SPI_PERF_RA_CSG_LOCK = 0x000000ae, -SPI_PERF_RA_CSN_LOCK = 0x000000af, -SPI_PERF_RA_RSV_UPD = 0x000000b0, -SPI_PERF_EXP_ARB_COL_CNT = 0x000000b1, -SPI_PERF_EXP_ARB_PAR_CNT = 0x000000b2, -SPI_PERF_EXP_ARB_POS_CNT = 0x000000b3, -SPI_PERF_EXP_ARB_GDS_CNT = 0x000000b4, -SPI_PERF_CLKGATE_BUSY_STALL = 0x000000b5, -SPI_PERF_CLKGATE_ACTIVE_STALL = 0x000000b6, -SPI_PERF_CLKGATE_ALL_CLOCKS_ON = 0x000000b7, -SPI_PERF_CLKGATE_CGTT_DYN_ON = 0x000000b8, -SPI_PERF_CLKGATE_CGTT_REG_ON = 0x000000b9, -SPI_PERF_NUM_VS_POS_EXPORTS = 0x000000ba, -SPI_PERF_NUM_VS_PARAM_EXPORTS = 0x000000bb, -SPI_PERF_NUM_PS_COL_EXPORTS = 0x000000bc, -SPI_PERF_ES_GRP_FIFO_FULL = 0x000000bd, -SPI_PERF_GS_GRP_FIFO_FULL = 0x000000be, -SPI_PERF_HS_GRP_FIFO_FULL = 0x000000bf, -SPI_PERF_LS_GRP_FIFO_FULL = 0x000000c0, -SPI_PERF_VS_ALLOC_CNT = 0x000000c1, -SPI_PERF_VS_LATE_ALLOC_ACCUM = 0x000000c2, -SPI_PERF_PC_ALLOC_CNT = 0x000000c3, -SPI_PERF_PC_ALLOC_ACCUM = 0x000000c4, -} SPI_PERFCNT_SEL; - -/* - * SPI_SHADER_FORMAT enum - */ - -typedef enum SPI_SHADER_FORMAT { -SPI_SHADER_NONE = 0x00000000, -SPI_SHADER_1COMP = 0x00000001, -SPI_SHADER_2COMP = 0x00000002, -SPI_SHADER_4COMPRESS = 0x00000003, -SPI_SHADER_4COMP = 0x00000004, -} SPI_SHADER_FORMAT; - -/* - * SPI_SHADER_EX_FORMAT enum - */ - -typedef enum SPI_SHADER_EX_FORMAT { -SPI_SHADER_ZERO = 0x00000000, -SPI_SHADER_32_R = 0x00000001, -SPI_SHADER_32_GR = 0x00000002, -SPI_SHADER_32_AR = 0x00000003, -SPI_SHADER_FP16_ABGR = 0x00000004, -SPI_SHADER_UNORM16_ABGR = 0x00000005, -SPI_SHADER_SNORM16_ABGR = 0x00000006, -SPI_SHADER_UINT16_ABGR = 0x00000007, -SPI_SHADER_SINT16_ABGR = 0x00000008, -SPI_SHADER_32_ABGR = 0x00000009, -} SPI_SHADER_EX_FORMAT; - -/* - * CLKGATE_SM_MODE enum - */ - -typedef enum CLKGATE_SM_MODE { -ON_SEQ = 0x00000000, -OFF_SEQ = 0x00000001, -PROG_SEQ = 0x00000002, -READ_SEQ = 0x00000003, -SM_MODE_RESERVED = 0x00000004, -} CLKGATE_SM_MODE; - -/* - * CLKGATE_BASE_MODE enum - */ - -typedef enum CLKGATE_BASE_MODE { -MULT_8 = 0x00000000, -MULT_16 = 0x00000001, -} CLKGATE_BASE_MODE; - -/******************************************************* - * SQ Enums - *******************************************************/ - -/* - * SQ_TEX_CLAMP enum - */ - -typedef enum SQ_TEX_CLAMP { -SQ_TEX_WRAP = 0x00000000, -SQ_TEX_MIRROR = 0x00000001, -SQ_TEX_CLAMP_LAST_TEXEL = 0x00000002, -SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 0x00000003, -SQ_TEX_CLAMP_HALF_BORDER = 0x00000004, -SQ_TEX_MIRROR_ONCE_HALF_BORDER = 0x00000005, -SQ_TEX_CLAMP_BORDER = 0x00000006, -SQ_TEX_MIRROR_ONCE_BORDER = 0x00000007, -} SQ_TEX_CLAMP; - -/* - * SQ_TEX_XY_FILTER enum - */ - -typedef enum SQ_TEX_XY_FILTER { -SQ_TEX_XY_FILTER_POINT = 0x00000000, -SQ_TEX_XY_FILTER_BILINEAR = 0x00000001, -SQ_TEX_XY_FILTER_ANISO_POINT = 0x00000002, -SQ_TEX_XY_FILTER_ANISO_BILINEAR = 0x00000003, -} SQ_TEX_XY_FILTER; - -/* - * SQ_TEX_Z_FILTER enum - */ - -typedef enum SQ_TEX_Z_FILTER { -SQ_TEX_Z_FILTER_NONE = 0x00000000, -SQ_TEX_Z_FILTER_POINT = 0x00000001, -SQ_TEX_Z_FILTER_LINEAR = 0x00000002, -} SQ_TEX_Z_FILTER; - -/* - * SQ_TEX_MIP_FILTER enum - */ - -typedef enum SQ_TEX_MIP_FILTER { -SQ_TEX_MIP_FILTER_NONE = 0x00000000, -SQ_TEX_MIP_FILTER_POINT = 0x00000001, -SQ_TEX_MIP_FILTER_LINEAR = 0x00000002, -SQ_TEX_MIP_FILTER_POINT_ANISO_ADJ = 0x00000003, -} SQ_TEX_MIP_FILTER; - -/* - * SQ_TEX_ANISO_RATIO enum - */ - -typedef enum SQ_TEX_ANISO_RATIO { -SQ_TEX_ANISO_RATIO_1 = 0x00000000, -SQ_TEX_ANISO_RATIO_2 = 0x00000001, -SQ_TEX_ANISO_RATIO_4 = 0x00000002, -SQ_TEX_ANISO_RATIO_8 = 0x00000003, -SQ_TEX_ANISO_RATIO_16 = 0x00000004, -} SQ_TEX_ANISO_RATIO; - -/* - * SQ_TEX_DEPTH_COMPARE enum - */ - -typedef enum SQ_TEX_DEPTH_COMPARE { -SQ_TEX_DEPTH_COMPARE_NEVER = 0x00000000, -SQ_TEX_DEPTH_COMPARE_LESS = 0x00000001, -SQ_TEX_DEPTH_COMPARE_EQUAL = 0x00000002, -SQ_TEX_DEPTH_COMPARE_LESSEQUAL = 0x00000003, -SQ_TEX_DEPTH_COMPARE_GREATER = 0x00000004, -SQ_TEX_DEPTH_COMPARE_NOTEQUAL = 0x00000005, -SQ_TEX_DEPTH_COMPARE_GREATEREQUAL = 0x00000006, -SQ_TEX_DEPTH_COMPARE_ALWAYS = 0x00000007, -} SQ_TEX_DEPTH_COMPARE; - -/* - * SQ_TEX_BORDER_COLOR enum - */ - -typedef enum SQ_TEX_BORDER_COLOR { -SQ_TEX_BORDER_COLOR_TRANS_BLACK = 0x00000000, -SQ_TEX_BORDER_COLOR_OPAQUE_BLACK = 0x00000001, -SQ_TEX_BORDER_COLOR_OPAQUE_WHITE = 0x00000002, -SQ_TEX_BORDER_COLOR_REGISTER = 0x00000003, -} SQ_TEX_BORDER_COLOR; - -/* - * SQ_RSRC_BUF_TYPE enum - */ - -typedef enum SQ_RSRC_BUF_TYPE { -SQ_RSRC_BUF = 0x00000000, -SQ_RSRC_BUF_RSVD_1 = 0x00000001, -SQ_RSRC_BUF_RSVD_2 = 0x00000002, -SQ_RSRC_BUF_RSVD_3 = 0x00000003, -} SQ_RSRC_BUF_TYPE; - -/* - * SQ_RSRC_IMG_TYPE enum - */ - -typedef enum SQ_RSRC_IMG_TYPE { -SQ_RSRC_IMG_RSVD_0 = 0x00000000, -SQ_RSRC_IMG_RSVD_1 = 0x00000001, -SQ_RSRC_IMG_RSVD_2 = 0x00000002, -SQ_RSRC_IMG_RSVD_3 = 0x00000003, -SQ_RSRC_IMG_RSVD_4 = 0x00000004, -SQ_RSRC_IMG_RSVD_5 = 0x00000005, -SQ_RSRC_IMG_RSVD_6 = 0x00000006, -SQ_RSRC_IMG_RSVD_7 = 0x00000007, -SQ_RSRC_IMG_1D = 0x00000008, -SQ_RSRC_IMG_2D = 0x00000009, -SQ_RSRC_IMG_3D = 0x0000000a, -SQ_RSRC_IMG_CUBE = 0x0000000b, -SQ_RSRC_IMG_1D_ARRAY = 0x0000000c, -SQ_RSRC_IMG_2D_ARRAY = 0x0000000d, -SQ_RSRC_IMG_2D_MSAA = 0x0000000e, -SQ_RSRC_IMG_2D_MSAA_ARRAY = 0x0000000f, -} SQ_RSRC_IMG_TYPE; - -/* - * SQ_RSRC_FLAT_TYPE enum - */ - -typedef enum SQ_RSRC_FLAT_TYPE { -SQ_RSRC_FLAT_RSVD_0 = 0x00000000, -SQ_RSRC_FLAT = 0x00000001, -SQ_RSRC_FLAT_RSVD_2 = 0x00000002, -SQ_RSRC_FLAT_RSVD_3 = 0x00000003, -} SQ_RSRC_FLAT_TYPE; - -/* - * SQ_IMG_FILTER_TYPE enum - */ - -typedef enum SQ_IMG_FILTER_TYPE { -SQ_IMG_FILTER_MODE_BLEND = 0x00000000, -SQ_IMG_FILTER_MODE_MIN = 0x00000001, -SQ_IMG_FILTER_MODE_MAX = 0x00000002, -} SQ_IMG_FILTER_TYPE; - -/* - * SQ_SEL_XYZW01 enum - */ - -typedef enum SQ_SEL_XYZW01 { -SQ_SEL_0 = 0x00000000, -SQ_SEL_1 = 0x00000001, -SQ_SEL_RESERVED_0 = 0x00000002, -SQ_SEL_RESERVED_1 = 0x00000003, -SQ_SEL_X = 0x00000004, -SQ_SEL_Y = 0x00000005, -SQ_SEL_Z = 0x00000006, -SQ_SEL_W = 0x00000007, -} SQ_SEL_XYZW01; - -/* - * SQ_WAVE_TYPE enum - */ - -typedef enum SQ_WAVE_TYPE { -SQ_WAVE_TYPE_PS = 0x00000000, -SQ_WAVE_TYPE_VS = 0x00000001, -SQ_WAVE_TYPE_GS = 0x00000002, -SQ_WAVE_TYPE_ES = 0x00000003, -SQ_WAVE_TYPE_HS = 0x00000004, -SQ_WAVE_TYPE_LS = 0x00000005, -SQ_WAVE_TYPE_CS = 0x00000006, -SQ_WAVE_TYPE_PS1 = 0x00000007, -} SQ_WAVE_TYPE; - -/* - * SQ_THREAD_TRACE_TOKEN_TYPE enum - */ - -typedef enum SQ_THREAD_TRACE_TOKEN_TYPE { -SQ_THREAD_TRACE_TOKEN_MISC = 0x00000000, -SQ_THREAD_TRACE_TOKEN_TIMESTAMP = 0x00000001, -SQ_THREAD_TRACE_TOKEN_REG = 0x00000002, -SQ_THREAD_TRACE_TOKEN_WAVE_START = 0x00000003, -SQ_THREAD_TRACE_TOKEN_WAVE_ALLOC = 0x00000004, -SQ_THREAD_TRACE_TOKEN_REG_CSPRIV = 0x00000005, -SQ_THREAD_TRACE_TOKEN_WAVE_END = 0x00000006, -SQ_THREAD_TRACE_TOKEN_EVENT = 0x00000007, -SQ_THREAD_TRACE_TOKEN_EVENT_CS = 0x00000008, -SQ_THREAD_TRACE_TOKEN_EVENT_GFX1 = 0x00000009, -SQ_THREAD_TRACE_TOKEN_INST = 0x0000000a, -SQ_THREAD_TRACE_TOKEN_INST_PC = 0x0000000b, -SQ_THREAD_TRACE_TOKEN_INST_USERDATA = 0x0000000c, -SQ_THREAD_TRACE_TOKEN_ISSUE = 0x0000000d, -SQ_THREAD_TRACE_TOKEN_PERF = 0x0000000e, -SQ_THREAD_TRACE_TOKEN_REG_CS = 0x0000000f, -} SQ_THREAD_TRACE_TOKEN_TYPE; - -/* - * SQ_THREAD_TRACE_MISC_TOKEN_TYPE enum - */ - -typedef enum SQ_THREAD_TRACE_MISC_TOKEN_TYPE { -SQ_THREAD_TRACE_MISC_TOKEN_TIME = 0x00000000, -SQ_THREAD_TRACE_MISC_TOKEN_TIME_RESET = 0x00000001, -SQ_THREAD_TRACE_MISC_TOKEN_PACKET_LOST = 0x00000002, -SQ_THREAD_TRACE_MISC_TOKEN_SURF_SYNC = 0x00000003, -SQ_THREAD_TRACE_MISC_TOKEN_TTRACE_STALL_BEGIN = 0x00000004, -SQ_THREAD_TRACE_MISC_TOKEN_TTRACE_STALL_END = 0x00000005, -SQ_THREAD_TRACE_MISC_TOKEN_SAVECTX = 0x00000006, -SQ_THREAD_TRACE_MISC_TOKEN_SHOOT_DOWN = 0x00000007, -} SQ_THREAD_TRACE_MISC_TOKEN_TYPE; - -/* - * SQ_THREAD_TRACE_INST_TYPE enum - */ - -typedef enum SQ_THREAD_TRACE_INST_TYPE { -SQ_THREAD_TRACE_INST_TYPE_SMEM_RD = 0x00000000, -SQ_THREAD_TRACE_INST_TYPE_SALU_32 = 0x00000001, -SQ_THREAD_TRACE_INST_TYPE_VMEM_RD = 0x00000002, -SQ_THREAD_TRACE_INST_TYPE_VMEM_WR = 0x00000003, -SQ_THREAD_TRACE_INST_TYPE_FLAT_WR = 0x00000004, -SQ_THREAD_TRACE_INST_TYPE_VALU_32 = 0x00000005, -SQ_THREAD_TRACE_INST_TYPE_LDS = 0x00000006, -SQ_THREAD_TRACE_INST_TYPE_PC = 0x00000007, -SQ_THREAD_TRACE_INST_TYPE_EXPREQ_GDS = 0x00000008, -SQ_THREAD_TRACE_INST_TYPE_EXPREQ_GFX = 0x00000009, -SQ_THREAD_TRACE_INST_TYPE_EXPGNT_PAR_COL = 0x0000000a, -SQ_THREAD_TRACE_INST_TYPE_EXPGNT_POS_GDS = 0x0000000b, -SQ_THREAD_TRACE_INST_TYPE_JUMP = 0x0000000c, -SQ_THREAD_TRACE_INST_TYPE_NEXT = 0x0000000d, -SQ_THREAD_TRACE_INST_TYPE_FLAT_RD = 0x0000000e, -SQ_THREAD_TRACE_INST_TYPE_OTHER_MSG = 0x0000000f, -SQ_THREAD_TRACE_INST_TYPE_SMEM_WR = 0x00000010, -SQ_THREAD_TRACE_INST_TYPE_SALU_64 = 0x00000011, -SQ_THREAD_TRACE_INST_TYPE_VALU_64 = 0x00000012, -SQ_THREAD_TRACE_INST_TYPE_SMEM_RD_REPLAY = 0x00000013, -SQ_THREAD_TRACE_INST_TYPE_SMEM_WR_REPLAY = 0x00000014, -SQ_THREAD_TRACE_INST_TYPE_VMEM_RD_REPLAY = 0x00000015, -SQ_THREAD_TRACE_INST_TYPE_VMEM_WR_REPLAY = 0x00000016, -SQ_THREAD_TRACE_INST_TYPE_FLAT_RD_REPLAY = 0x00000017, -SQ_THREAD_TRACE_INST_TYPE_FLAT_WR_REPLAY = 0x00000018, -SQ_THREAD_TRACE_INST_TYPE_FATAL_HALT = 0x00000019, -} SQ_THREAD_TRACE_INST_TYPE; - -/* - * SQ_THREAD_TRACE_REG_TYPE enum - */ - -typedef enum SQ_THREAD_TRACE_REG_TYPE { -SQ_THREAD_TRACE_REG_TYPE_EVENT = 0x00000000, -SQ_THREAD_TRACE_REG_TYPE_DRAW = 0x00000001, -SQ_THREAD_TRACE_REG_TYPE_DISPATCH = 0x00000002, -SQ_THREAD_TRACE_REG_TYPE_USERDATA = 0x00000003, -SQ_THREAD_TRACE_REG_TYPE_MARKER = 0x00000004, -SQ_THREAD_TRACE_REG_TYPE_GFXDEC = 0x00000005, -SQ_THREAD_TRACE_REG_TYPE_SHDEC = 0x00000006, -SQ_THREAD_TRACE_REG_TYPE_OTHER = 0x00000007, -} SQ_THREAD_TRACE_REG_TYPE; - -/* - * SQ_THREAD_TRACE_REG_OP enum - */ - -typedef enum SQ_THREAD_TRACE_REG_OP { -SQ_THREAD_TRACE_REG_OP_READ = 0x00000000, -SQ_THREAD_TRACE_REG_OP_WRITE = 0x00000001, -} SQ_THREAD_TRACE_REG_OP; - -/* - * SQ_THREAD_TRACE_MODE_SEL enum - */ - -typedef enum SQ_THREAD_TRACE_MODE_SEL { -SQ_THREAD_TRACE_MODE_OFF = 0x00000000, -SQ_THREAD_TRACE_MODE_ON = 0x00000001, -} SQ_THREAD_TRACE_MODE_SEL; - -/* - * SQ_THREAD_TRACE_CAPTURE_MODE enum - */ - -typedef enum SQ_THREAD_TRACE_CAPTURE_MODE { -SQ_THREAD_TRACE_CAPTURE_MODE_ALL = 0x00000000, -SQ_THREAD_TRACE_CAPTURE_MODE_SELECT = 0x00000001, -SQ_THREAD_TRACE_CAPTURE_MODE_SELECT_DETAIL = 0x00000002, -} SQ_THREAD_TRACE_CAPTURE_MODE; - -/* - * SQ_THREAD_TRACE_VM_ID_MASK enum - */ - -typedef enum SQ_THREAD_TRACE_VM_ID_MASK { -SQ_THREAD_TRACE_VM_ID_MASK_SINGLE = 0x00000000, -SQ_THREAD_TRACE_VM_ID_MASK_ALL = 0x00000001, -SQ_THREAD_TRACE_VM_ID_MASK_SINGLE_DETAIL = 0x00000002, -} SQ_THREAD_TRACE_VM_ID_MASK; - -/* - * SQ_THREAD_TRACE_WAVE_MASK enum - */ - -typedef enum SQ_THREAD_TRACE_WAVE_MASK { -SQ_THREAD_TRACE_WAVE_MASK_NONE = 0x00000000, -SQ_THREAD_TRACE_WAVE_MASK_ALL = 0x00000001, -} SQ_THREAD_TRACE_WAVE_MASK; - -/* - * SQ_THREAD_TRACE_ISSUE enum - */ - -typedef enum SQ_THREAD_TRACE_ISSUE { -SQ_THREAD_TRACE_ISSUE_NULL = 0x00000000, -SQ_THREAD_TRACE_ISSUE_STALL = 0x00000001, -SQ_THREAD_TRACE_ISSUE_INST = 0x00000002, -SQ_THREAD_TRACE_ISSUE_IMMED = 0x00000003, -} SQ_THREAD_TRACE_ISSUE; - -/* - * SQ_THREAD_TRACE_ISSUE_MASK enum - */ - -typedef enum SQ_THREAD_TRACE_ISSUE_MASK { -SQ_THREAD_TRACE_ISSUE_MASK_ALL = 0x00000000, -SQ_THREAD_TRACE_ISSUE_MASK_STALLED = 0x00000001, -SQ_THREAD_TRACE_ISSUE_MASK_STALLED_AND_IMMED = 0x00000002, -SQ_THREAD_TRACE_ISSUE_MASK_IMMED = 0x00000003, -} SQ_THREAD_TRACE_ISSUE_MASK; - -/* - * SQ_PERF_SEL enum - */ - -typedef enum SQ_PERF_SEL { -SQ_PERF_SEL_NONE = 0x00000000, -SQ_PERF_SEL_ACCUM_PREV = 0x00000001, -SQ_PERF_SEL_CYCLES = 0x00000002, -SQ_PERF_SEL_BUSY_CYCLES = 0x00000003, -SQ_PERF_SEL_WAVES = 0x00000004, -SQ_PERF_SEL_LEVEL_WAVES = 0x00000005, -SQ_PERF_SEL_WAVES_EQ_64 = 0x00000006, -SQ_PERF_SEL_WAVES_LT_64 = 0x00000007, -SQ_PERF_SEL_WAVES_LT_48 = 0x00000008, -SQ_PERF_SEL_WAVES_LT_32 = 0x00000009, -SQ_PERF_SEL_WAVES_LT_16 = 0x0000000a, -SQ_PERF_SEL_WAVES_CU = 0x0000000b, -SQ_PERF_SEL_LEVEL_WAVES_CU = 0x0000000c, -SQ_PERF_SEL_BUSY_CU_CYCLES = 0x0000000d, -SQ_PERF_SEL_ITEMS = 0x0000000e, -SQ_PERF_SEL_QUADS = 0x0000000f, -SQ_PERF_SEL_EVENTS = 0x00000010, -SQ_PERF_SEL_SURF_SYNCS = 0x00000011, -SQ_PERF_SEL_TTRACE_REQS = 0x00000012, -SQ_PERF_SEL_TTRACE_INFLIGHT_REQS = 0x00000013, -SQ_PERF_SEL_TTRACE_STALL = 0x00000014, -SQ_PERF_SEL_MSG_CNTR = 0x00000015, -SQ_PERF_SEL_MSG_PERF = 0x00000016, -SQ_PERF_SEL_MSG_GSCNT = 0x00000017, -SQ_PERF_SEL_MSG_INTERRUPT = 0x00000018, -SQ_PERF_SEL_INSTS = 0x00000019, -SQ_PERF_SEL_INSTS_VALU = 0x0000001a, -SQ_PERF_SEL_INSTS_VMEM_WR = 0x0000001b, -SQ_PERF_SEL_INSTS_VMEM_RD = 0x0000001c, -SQ_PERF_SEL_INSTS_VMEM = 0x0000001d, -SQ_PERF_SEL_INSTS_SALU = 0x0000001e, -SQ_PERF_SEL_INSTS_SMEM = 0x0000001f, -SQ_PERF_SEL_INSTS_FLAT = 0x00000020, -SQ_PERF_SEL_INSTS_FLAT_LDS_ONLY = 0x00000021, -SQ_PERF_SEL_INSTS_LDS = 0x00000022, -SQ_PERF_SEL_INSTS_GDS = 0x00000023, -SQ_PERF_SEL_INSTS_EXP = 0x00000024, -SQ_PERF_SEL_INSTS_EXP_GDS = 0x00000025, -SQ_PERF_SEL_INSTS_BRANCH = 0x00000026, -SQ_PERF_SEL_INSTS_SENDMSG = 0x00000027, -SQ_PERF_SEL_INSTS_VSKIPPED = 0x00000028, -SQ_PERF_SEL_INST_LEVEL_VMEM = 0x00000029, -SQ_PERF_SEL_INST_LEVEL_SMEM = 0x0000002a, -SQ_PERF_SEL_INST_LEVEL_LDS = 0x0000002b, -SQ_PERF_SEL_INST_LEVEL_GDS = 0x0000002c, -SQ_PERF_SEL_INST_LEVEL_EXP = 0x0000002d, -SQ_PERF_SEL_WAVE_CYCLES = 0x0000002e, -SQ_PERF_SEL_WAVE_READY = 0x0000002f, -SQ_PERF_SEL_WAIT_CNT_VM = 0x00000030, -SQ_PERF_SEL_WAIT_CNT_LGKM = 0x00000031, -SQ_PERF_SEL_WAIT_CNT_EXP = 0x00000032, -SQ_PERF_SEL_WAIT_CNT_ANY = 0x00000033, -SQ_PERF_SEL_WAIT_BARRIER = 0x00000034, -SQ_PERF_SEL_WAIT_EXP_ALLOC = 0x00000035, -SQ_PERF_SEL_WAIT_SLEEP = 0x00000036, -SQ_PERF_SEL_WAIT_SLEEP_XNACK = 0x00000037, -SQ_PERF_SEL_WAIT_OTHER = 0x00000038, -SQ_PERF_SEL_WAIT_ANY = 0x00000039, -SQ_PERF_SEL_WAIT_TTRACE = 0x0000003a, -SQ_PERF_SEL_WAIT_IFETCH = 0x0000003b, -SQ_PERF_SEL_WAIT_INST_ANY = 0x0000003c, -SQ_PERF_SEL_WAIT_INST_VMEM = 0x0000003d, -SQ_PERF_SEL_WAIT_INST_SCA = 0x0000003e, -SQ_PERF_SEL_WAIT_INST_LDS = 0x0000003f, -SQ_PERF_SEL_WAIT_INST_VALU = 0x00000040, -SQ_PERF_SEL_WAIT_INST_EXP_GDS = 0x00000041, -SQ_PERF_SEL_WAIT_INST_MISC = 0x00000042, -SQ_PERF_SEL_WAIT_INST_FLAT = 0x00000043, -SQ_PERF_SEL_ACTIVE_INST_ANY = 0x00000044, -SQ_PERF_SEL_ACTIVE_INST_VMEM = 0x00000045, -SQ_PERF_SEL_ACTIVE_INST_LDS = 0x00000046, -SQ_PERF_SEL_ACTIVE_INST_VALU = 0x00000047, -SQ_PERF_SEL_ACTIVE_INST_SCA = 0x00000048, -SQ_PERF_SEL_ACTIVE_INST_EXP_GDS = 0x00000049, -SQ_PERF_SEL_ACTIVE_INST_MISC = 0x0000004a, -SQ_PERF_SEL_ACTIVE_INST_FLAT = 0x0000004b, -SQ_PERF_SEL_INST_CYCLES_VMEM_WR = 0x0000004c, -SQ_PERF_SEL_INST_CYCLES_VMEM_RD = 0x0000004d, -SQ_PERF_SEL_INST_CYCLES_VMEM_ADDR = 0x0000004e, -SQ_PERF_SEL_INST_CYCLES_VMEM_DATA = 0x0000004f, -SQ_PERF_SEL_INST_CYCLES_VMEM_CMD = 0x00000050, -SQ_PERF_SEL_INST_CYCLES_EXP = 0x00000051, -SQ_PERF_SEL_INST_CYCLES_GDS = 0x00000052, -SQ_PERF_SEL_INST_CYCLES_SMEM = 0x00000053, -SQ_PERF_SEL_INST_CYCLES_SALU = 0x00000054, -SQ_PERF_SEL_THREAD_CYCLES_VALU = 0x00000055, -SQ_PERF_SEL_THREAD_CYCLES_VALU_MAX = 0x00000056, -SQ_PERF_SEL_IFETCH = 0x00000057, -SQ_PERF_SEL_IFETCH_LEVEL = 0x00000058, -SQ_PERF_SEL_CBRANCH_FORK = 0x00000059, -SQ_PERF_SEL_CBRANCH_FORK_SPLIT = 0x0000005a, -SQ_PERF_SEL_VALU_LDS_DIRECT_RD = 0x0000005b, -SQ_PERF_SEL_VALU_LDS_INTERP_OP = 0x0000005c, -SQ_PERF_SEL_LDS_BANK_CONFLICT = 0x0000005d, -SQ_PERF_SEL_LDS_ADDR_CONFLICT = 0x0000005e, -SQ_PERF_SEL_LDS_UNALIGNED_STALL = 0x0000005f, -SQ_PERF_SEL_LDS_MEM_VIOLATIONS = 0x00000060, -SQ_PERF_SEL_LDS_ATOMIC_RETURN = 0x00000061, -SQ_PERF_SEL_LDS_IDX_ACTIVE = 0x00000062, -SQ_PERF_SEL_VALU_DEP_STALL = 0x00000063, -SQ_PERF_SEL_VALU_STARVE = 0x00000064, -SQ_PERF_SEL_EXP_REQ_FIFO_FULL = 0x00000065, -SQ_PERF_SEL_LDS_DATA_FIFO_FULL = 0x00000066, -SQ_PERF_SEL_LDS_CMD_FIFO_FULL = 0x00000067, -SQ_PERF_SEL_VMEM_TA_ADDR_FIFO_FULL = 0x00000068, -SQ_PERF_SEL_VMEM_TA_CMD_FIFO_FULL = 0x00000069, -SQ_PERF_SEL_VMEM_EX_DATA_REG_BUSY = 0x0000006a, -SQ_PERF_SEL_VMEM_WR_TA_DATA_FIFO_FULL = 0x0000006b, -SQ_PERF_SEL_VALU_SRC_C_CONFLICT = 0x0000006c, -SQ_PERF_SEL_VMEM_RD_SRC_CD_CONFLICT = 0x0000006d, -SQ_PERF_SEL_VMEM_WR_SRC_CD_CONFLICT = 0x0000006e, -SQ_PERF_SEL_FLAT_SRC_CD_CONFLICT = 0x0000006f, -SQ_PERF_SEL_LDS_SRC_CD_CONFLICT = 0x00000070, -SQ_PERF_SEL_SRC_CD_BUSY = 0x00000071, -SQ_PERF_SEL_PT_POWER_STALL = 0x00000072, -SQ_PERF_SEL_USER0 = 0x00000073, -SQ_PERF_SEL_USER1 = 0x00000074, -SQ_PERF_SEL_USER2 = 0x00000075, -SQ_PERF_SEL_USER3 = 0x00000076, -SQ_PERF_SEL_USER4 = 0x00000077, -SQ_PERF_SEL_USER5 = 0x00000078, -SQ_PERF_SEL_USER6 = 0x00000079, -SQ_PERF_SEL_USER7 = 0x0000007a, -SQ_PERF_SEL_USER8 = 0x0000007b, -SQ_PERF_SEL_USER9 = 0x0000007c, -SQ_PERF_SEL_USER10 = 0x0000007d, -SQ_PERF_SEL_USER11 = 0x0000007e, -SQ_PERF_SEL_USER12 = 0x0000007f, -SQ_PERF_SEL_USER13 = 0x00000080, -SQ_PERF_SEL_USER14 = 0x00000081, -SQ_PERF_SEL_USER15 = 0x00000082, -SQ_PERF_SEL_USER_LEVEL0 = 0x00000083, -SQ_PERF_SEL_USER_LEVEL1 = 0x00000084, -SQ_PERF_SEL_USER_LEVEL2 = 0x00000085, -SQ_PERF_SEL_USER_LEVEL3 = 0x00000086, -SQ_PERF_SEL_USER_LEVEL4 = 0x00000087, -SQ_PERF_SEL_USER_LEVEL5 = 0x00000088, -SQ_PERF_SEL_USER_LEVEL6 = 0x00000089, -SQ_PERF_SEL_USER_LEVEL7 = 0x0000008a, -SQ_PERF_SEL_USER_LEVEL8 = 0x0000008b, -SQ_PERF_SEL_USER_LEVEL9 = 0x0000008c, -SQ_PERF_SEL_USER_LEVEL10 = 0x0000008d, -SQ_PERF_SEL_USER_LEVEL11 = 0x0000008e, -SQ_PERF_SEL_USER_LEVEL12 = 0x0000008f, -SQ_PERF_SEL_USER_LEVEL13 = 0x00000090, -SQ_PERF_SEL_USER_LEVEL14 = 0x00000091, -SQ_PERF_SEL_USER_LEVEL15 = 0x00000092, -SQ_PERF_SEL_POWER_VALU = 0x00000093, -SQ_PERF_SEL_POWER_VALU0 = 0x00000094, -SQ_PERF_SEL_POWER_VALU1 = 0x00000095, -SQ_PERF_SEL_POWER_VALU2 = 0x00000096, -SQ_PERF_SEL_POWER_GPR_RD = 0x00000097, -SQ_PERF_SEL_POWER_GPR_WR = 0x00000098, -SQ_PERF_SEL_POWER_LDS_BUSY = 0x00000099, -SQ_PERF_SEL_POWER_ALU_BUSY = 0x0000009a, -SQ_PERF_SEL_POWER_TEX_BUSY = 0x0000009b, -SQ_PERF_SEL_ACCUM_PREV_HIRES = 0x0000009c, -SQ_PERF_SEL_WAVES_RESTORED = 0x0000009d, -SQ_PERF_SEL_WAVES_SAVED = 0x0000009e, -SQ_PERF_SEL_INSTS_SMEM_NORM = 0x0000009f, -SQ_PERF_SEL_ATC_INSTS_VMEM = 0x000000a0, -SQ_PERF_SEL_ATC_INST_LEVEL_VMEM = 0x000000a1, -SQ_PERF_SEL_ATC_XNACK_FIRST = 0x000000a2, -SQ_PERF_SEL_ATC_XNACK_ALL = 0x000000a3, -SQ_PERF_SEL_ATC_XNACK_FIFO_FULL = 0x000000a4, -SQ_PERF_SEL_ATC_INSTS_SMEM = 0x000000a5, -SQ_PERF_SEL_ATC_INST_LEVEL_SMEM = 0x000000a6, -SQ_PERF_SEL_IFETCH_XNACK = 0x000000a7, -SQ_PERF_SEL_TLB_SHOOTDOWN = 0x000000a8, -SQ_PERF_SEL_TLB_SHOOTDOWN_CYCLES = 0x000000a9, -SQ_PERF_SEL_INSTS_VMEM_WR_REPLAY = 0x000000aa, -SQ_PERF_SEL_INSTS_VMEM_RD_REPLAY = 0x000000ab, -SQ_PERF_SEL_INSTS_VMEM_REPLAY = 0x000000ac, -SQ_PERF_SEL_INSTS_SMEM_REPLAY = 0x000000ad, -SQ_PERF_SEL_INSTS_SMEM_NORM_REPLAY = 0x000000ae, -SQ_PERF_SEL_INSTS_FLAT_REPLAY = 0x000000af, -SQ_PERF_SEL_ATC_INSTS_VMEM_REPLAY = 0x000000b0, -SQ_PERF_SEL_ATC_INSTS_SMEM_REPLAY = 0x000000b1, -SQ_PERF_SEL_UTCL1_TRANSLATION_MISS = 0x000000b2, -SQ_PERF_SEL_UTCL1_PERMISSION_MISS = 0x000000b3, -SQ_PERF_SEL_UTCL1_REQUEST = 0x000000b4, -SQ_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL = 0x000000b5, -SQ_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX = 0x000000b6, -SQ_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT = 0x000000b7, -SQ_PERF_SEL_UTCL1_LFIFO_FULL = 0x000000b8, -SQ_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES = 0x000000b9, -SQ_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS = 0x000000ba, -SQ_PERF_SEL_DUMMY_END = 0x000000bb, -SQ_PERF_SEL_DUMMY_LAST = 0x000000ff, -SQC_PERF_SEL_ICACHE_INPUT_VALID_READY = 0x00000100, -SQC_PERF_SEL_ICACHE_INPUT_VALID_READYB = 0x00000101, -SQC_PERF_SEL_ICACHE_INPUT_VALIDB = 0x00000102, -SQC_PERF_SEL_DCACHE_INPUT_VALID_READY = 0x00000103, -SQC_PERF_SEL_DCACHE_INPUT_VALID_READYB = 0x00000104, -SQC_PERF_SEL_DCACHE_INPUT_VALIDB = 0x00000105, -SQC_PERF_SEL_TC_REQ = 0x00000106, -SQC_PERF_SEL_TC_INST_REQ = 0x00000107, -SQC_PERF_SEL_TC_DATA_READ_REQ = 0x00000108, -SQC_PERF_SEL_TC_DATA_WRITE_REQ = 0x00000109, -SQC_PERF_SEL_TC_DATA_ATOMIC_REQ = 0x0000010a, -SQC_PERF_SEL_TC_STALL = 0x0000010b, -SQC_PERF_SEL_TC_STARVE = 0x0000010c, -SQC_PERF_SEL_ICACHE_BUSY_CYCLES = 0x0000010d, -SQC_PERF_SEL_ICACHE_REQ = 0x0000010e, -SQC_PERF_SEL_ICACHE_HITS = 0x0000010f, -SQC_PERF_SEL_ICACHE_MISSES = 0x00000110, -SQC_PERF_SEL_ICACHE_MISSES_DUPLICATE = 0x00000111, -SQC_PERF_SEL_ICACHE_INVAL_INST = 0x00000112, -SQC_PERF_SEL_ICACHE_INVAL_ASYNC = 0x00000113, -SQC_PERF_SEL_ICACHE_INPUT_STALL_ARB_NO_GRANT = 0x00000114, -SQC_PERF_SEL_ICACHE_INPUT_STALL_BANK_READYB = 0x00000115, -SQC_PERF_SEL_ICACHE_CACHE_STALLED = 0x00000116, -SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_NONZERO = 0x00000117, -SQC_PERF_SEL_ICACHE_CACHE_STALL_INFLIGHT_MAX = 0x00000118, -SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT = 0x00000119, -SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_MISS_FIFO = 0x0000011a, -SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_HIT_FIFO = 0x0000011b, -SQC_PERF_SEL_ICACHE_CACHE_STALL_OUTPUT_TC_IF = 0x0000011c, -SQC_PERF_SEL_ICACHE_STALL_OUTXBAR_ARB_NO_GRANT = 0x0000011d, -SQC_PERF_SEL_ICACHE_PREFETCH_1 = 0x0000011e, -SQC_PERF_SEL_ICACHE_PREFETCH_2 = 0x0000011f, -SQC_PERF_SEL_ICACHE_PREFETCH_FILTERED = 0x00000120, -SQC_PERF_SEL_DCACHE_BUSY_CYCLES = 0x00000121, -SQC_PERF_SEL_DCACHE_REQ = 0x00000122, -SQC_PERF_SEL_DCACHE_HITS = 0x00000123, -SQC_PERF_SEL_DCACHE_MISSES = 0x00000124, -SQC_PERF_SEL_DCACHE_MISSES_DUPLICATE = 0x00000125, -SQC_PERF_SEL_DCACHE_HIT_LRU_READ = 0x00000126, -SQC_PERF_SEL_DCACHE_MISS_EVICT_READ = 0x00000127, -SQC_PERF_SEL_DCACHE_WC_LRU_WRITE = 0x00000128, -SQC_PERF_SEL_DCACHE_WT_EVICT_WRITE = 0x00000129, -SQC_PERF_SEL_DCACHE_ATOMIC = 0x0000012a, -SQC_PERF_SEL_DCACHE_VOLATILE = 0x0000012b, -SQC_PERF_SEL_DCACHE_INVAL_INST = 0x0000012c, -SQC_PERF_SEL_DCACHE_INVAL_ASYNC = 0x0000012d, -SQC_PERF_SEL_DCACHE_INVAL_VOLATILE_INST = 0x0000012e, -SQC_PERF_SEL_DCACHE_INVAL_VOLATILE_ASYNC = 0x0000012f, -SQC_PERF_SEL_DCACHE_WB_INST = 0x00000130, -SQC_PERF_SEL_DCACHE_WB_ASYNC = 0x00000131, -SQC_PERF_SEL_DCACHE_WB_VOLATILE_INST = 0x00000132, -SQC_PERF_SEL_DCACHE_WB_VOLATILE_ASYNC = 0x00000133, -SQC_PERF_SEL_DCACHE_INPUT_STALL_ARB_NO_GRANT = 0x00000134, -SQC_PERF_SEL_DCACHE_INPUT_STALL_BANK_READYB = 0x00000135, -SQC_PERF_SEL_DCACHE_CACHE_STALLED = 0x00000136, -SQC_PERF_SEL_DCACHE_CACHE_STALL_INFLIGHT_MAX = 0x00000137, -SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT = 0x00000138, -SQC_PERF_SEL_DCACHE_CACHE_STALL_EVICT = 0x00000139, -SQC_PERF_SEL_DCACHE_CACHE_STALL_UNORDERED = 0x0000013a, -SQC_PERF_SEL_DCACHE_CACHE_STALL_ALLOC_UNAVAILABLE = 0x0000013b, -SQC_PERF_SEL_DCACHE_CACHE_STALL_FORCE_EVICT = 0x0000013c, -SQC_PERF_SEL_DCACHE_CACHE_STALL_MULTI_FLUSH = 0x0000013d, -SQC_PERF_SEL_DCACHE_CACHE_STALL_FLUSH_DONE = 0x0000013e, -SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_MISS_FIFO = 0x0000013f, -SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_HIT_FIFO = 0x00000140, -SQC_PERF_SEL_DCACHE_CACHE_STALL_OUTPUT_TC_IF = 0x00000141, -SQC_PERF_SEL_DCACHE_STALL_OUTXBAR_ARB_NO_GRANT = 0x00000142, -SQC_PERF_SEL_DCACHE_REQ_READ_1 = 0x00000143, -SQC_PERF_SEL_DCACHE_REQ_READ_2 = 0x00000144, -SQC_PERF_SEL_DCACHE_REQ_READ_4 = 0x00000145, -SQC_PERF_SEL_DCACHE_REQ_READ_8 = 0x00000146, -SQC_PERF_SEL_DCACHE_REQ_READ_16 = 0x00000147, -SQC_PERF_SEL_DCACHE_REQ_TIME = 0x00000148, -SQC_PERF_SEL_DCACHE_REQ_WRITE_1 = 0x00000149, -SQC_PERF_SEL_DCACHE_REQ_WRITE_2 = 0x0000014a, -SQC_PERF_SEL_DCACHE_REQ_WRITE_4 = 0x0000014b, -SQC_PERF_SEL_DCACHE_REQ_ATC_PROBE = 0x0000014c, -SQC_PERF_SEL_SQ_DCACHE_REQS = 0x0000014d, -SQC_PERF_SEL_DCACHE_FLAT_REQ = 0x0000014e, -SQC_PERF_SEL_DCACHE_NONFLAT_REQ = 0x0000014f, -SQC_PERF_SEL_ICACHE_INFLIGHT_LEVEL = 0x00000150, -SQC_PERF_SEL_DCACHE_INFLIGHT_LEVEL = 0x00000151, -SQC_PERF_SEL_TC_INFLIGHT_LEVEL = 0x00000152, -SQC_PERF_SEL_ICACHE_TC_INFLIGHT_LEVEL = 0x00000153, -SQC_PERF_SEL_DCACHE_TC_INFLIGHT_LEVEL = 0x00000154, -SQC_PERF_SEL_ICACHE_GATCL1_TRANSLATION_MISS = 0x00000155, -SQC_PERF_SEL_ICACHE_GATCL1_PERMISSION_MISS = 0x00000156, -SQC_PERF_SEL_ICACHE_GATCL1_REQUEST = 0x00000157, -SQC_PERF_SEL_ICACHE_GATCL1_STALL_INFLIGHT_MAX = 0x00000158, -SQC_PERF_SEL_ICACHE_GATCL1_STALL_LRU_INFLIGHT = 0x00000159, -SQC_PERF_SEL_ICACHE_GATCL1_LFIFO_FULL = 0x0000015a, -SQC_PERF_SEL_ICACHE_GATCL1_STALL_LFIFO_NOT_RES = 0x0000015b, -SQC_PERF_SEL_ICACHE_GATCL1_STALL_ATCL2_REQ_OUT_OF_CREDITS = 0x0000015c, -SQC_PERF_SEL_ICACHE_GATCL1_ATCL2_INFLIGHT = 0x0000015d, -SQC_PERF_SEL_ICACHE_GATCL1_STALL_MISSFIFO_FULL = 0x0000015e, -SQC_PERF_SEL_DCACHE_GATCL1_TRANSLATION_MISS = 0x0000015f, -SQC_PERF_SEL_DCACHE_GATCL1_PERMISSION_MISS = 0x00000160, -SQC_PERF_SEL_DCACHE_GATCL1_REQUEST = 0x00000161, -SQC_PERF_SEL_DCACHE_GATCL1_STALL_INFLIGHT_MAX = 0x00000162, -SQC_PERF_SEL_DCACHE_GATCL1_STALL_LRU_INFLIGHT = 0x00000163, -SQC_PERF_SEL_DCACHE_GATCL1_LFIFO_FULL = 0x00000164, -SQC_PERF_SEL_DCACHE_GATCL1_STALL_LFIFO_NOT_RES = 0x00000165, -SQC_PERF_SEL_DCACHE_GATCL1_STALL_ATCL2_REQ_OUT_OF_CREDITS = 0x00000166, -SQC_PERF_SEL_DCACHE_GATCL1_ATCL2_INFLIGHT = 0x00000167, -SQC_PERF_SEL_DCACHE_GATCL1_STALL_MISSFIFO_FULL = 0x00000168, -SQC_PERF_SEL_DCACHE_GATCL1_STALL_MULTI_MISS = 0x00000169, -SQC_PERF_SEL_DCACHE_GATCL1_HIT_FIFO_FULL = 0x0000016a, -SQC_PERF_SEL_DUMMY_LAST = 0x0000016b, -} SQ_PERF_SEL; - -/* - * SQ_CAC_POWER_SEL enum - */ - -typedef enum SQ_CAC_POWER_SEL { -SQ_CAC_POWER_VALU = 0x00000000, -SQ_CAC_POWER_VALU0 = 0x00000001, -SQ_CAC_POWER_VALU1 = 0x00000002, -SQ_CAC_POWER_VALU2 = 0x00000003, -SQ_CAC_POWER_GPR_RD = 0x00000004, -SQ_CAC_POWER_GPR_WR = 0x00000005, -SQ_CAC_POWER_LDS_BUSY = 0x00000006, -SQ_CAC_POWER_ALU_BUSY = 0x00000007, -SQ_CAC_POWER_TEX_BUSY = 0x00000008, -} SQ_CAC_POWER_SEL; - -/* - * SQ_IND_CMD_CMD enum - */ - -typedef enum SQ_IND_CMD_CMD { -SQ_IND_CMD_CMD_NULL = 0x00000000, -SQ_IND_CMD_CMD_SETHALT = 0x00000001, -SQ_IND_CMD_CMD_SAVECTX = 0x00000002, -SQ_IND_CMD_CMD_KILL = 0x00000003, -SQ_IND_CMD_CMD_DEBUG = 0x00000004, -SQ_IND_CMD_CMD_TRAP = 0x00000005, -SQ_IND_CMD_CMD_SET_SPI_PRIO = 0x00000006, -SQ_IND_CMD_CMD_SETFATALHALT = 0x00000007, -} SQ_IND_CMD_CMD; - -/* - * SQ_IND_CMD_MODE enum - */ - -typedef enum SQ_IND_CMD_MODE { -SQ_IND_CMD_MODE_SINGLE = 0x00000000, -SQ_IND_CMD_MODE_BROADCAST = 0x00000001, -SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002, -SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003, -SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004, -} SQ_IND_CMD_MODE; - -/* - * SQ_EDC_INFO_SOURCE enum - */ - -typedef enum SQ_EDC_INFO_SOURCE { -SQ_EDC_INFO_SOURCE_INVALID = 0x00000000, -SQ_EDC_INFO_SOURCE_INST = 0x00000001, -SQ_EDC_INFO_SOURCE_SGPR = 0x00000002, -SQ_EDC_INFO_SOURCE_VGPR = 0x00000003, -SQ_EDC_INFO_SOURCE_LDS = 0x00000004, -SQ_EDC_INFO_SOURCE_GDS = 0x00000005, -SQ_EDC_INFO_SOURCE_TA = 0x00000006, -} SQ_EDC_INFO_SOURCE; - -/* - * SQ_ROUND_MODE enum - */ - -typedef enum SQ_ROUND_MODE { -SQ_ROUND_NEAREST_EVEN = 0x00000000, -SQ_ROUND_PLUS_INFINITY = 0x00000001, -SQ_ROUND_MINUS_INFINITY = 0x00000002, -SQ_ROUND_TO_ZERO = 0x00000003, -} SQ_ROUND_MODE; - -/* - * SQ_INTERRUPT_WORD_ENCODING enum - */ - -typedef enum SQ_INTERRUPT_WORD_ENCODING { -SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x00000000, -SQ_INTERRUPT_WORD_ENCODING_INST = 0x00000001, -SQ_INTERRUPT_WORD_ENCODING_ERROR = 0x00000002, -} SQ_INTERRUPT_WORD_ENCODING; - -/* - * ENUM_SQ_EXPORT_RAT_INST enum - */ - -typedef enum ENUM_SQ_EXPORT_RAT_INST { -SQ_EXPORT_RAT_INST_NOP = 0x00000000, -SQ_EXPORT_RAT_INST_STORE_TYPED = 0x00000001, -SQ_EXPORT_RAT_INST_STORE_RAW = 0x00000002, -SQ_EXPORT_RAT_INST_STORE_RAW_FDENORM = 0x00000003, -SQ_EXPORT_RAT_INST_CMPXCHG_INT = 0x00000004, -SQ_EXPORT_RAT_INST_CMPXCHG_FLT = 0x00000005, -SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM = 0x00000006, -SQ_EXPORT_RAT_INST_ADD = 0x00000007, -SQ_EXPORT_RAT_INST_SUB = 0x00000008, -SQ_EXPORT_RAT_INST_RSUB = 0x00000009, -SQ_EXPORT_RAT_INST_MIN_INT = 0x0000000a, -SQ_EXPORT_RAT_INST_MIN_UINT = 0x0000000b, -SQ_EXPORT_RAT_INST_MAX_INT = 0x0000000c, -SQ_EXPORT_RAT_INST_MAX_UINT = 0x0000000d, -SQ_EXPORT_RAT_INST_AND = 0x0000000e, -SQ_EXPORT_RAT_INST_OR = 0x0000000f, -SQ_EXPORT_RAT_INST_XOR = 0x00000010, -SQ_EXPORT_RAT_INST_MSKOR = 0x00000011, -SQ_EXPORT_RAT_INST_INC_UINT = 0x00000012, -SQ_EXPORT_RAT_INST_DEC_UINT = 0x00000013, -SQ_EXPORT_RAT_INST_STORE_DWORD = 0x00000014, -SQ_EXPORT_RAT_INST_STORE_SHORT = 0x00000015, -SQ_EXPORT_RAT_INST_STORE_BYTE = 0x00000016, -SQ_EXPORT_RAT_INST_NOP_RTN = 0x00000020, -SQ_EXPORT_RAT_INST_XCHG_RTN = 0x00000022, -SQ_EXPORT_RAT_INST_XCHG_FDENORM_RTN = 0x00000023, -SQ_EXPORT_RAT_INST_CMPXCHG_INT_RTN = 0x00000024, -SQ_EXPORT_RAT_INST_CMPXCHG_FLT_RTN = 0x00000025, -SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM_RTN = 0x00000026, -SQ_EXPORT_RAT_INST_ADD_RTN = 0x00000027, -SQ_EXPORT_RAT_INST_SUB_RTN = 0x00000028, -SQ_EXPORT_RAT_INST_RSUB_RTN = 0x00000029, -SQ_EXPORT_RAT_INST_MIN_INT_RTN = 0x0000002a, -SQ_EXPORT_RAT_INST_MIN_UINT_RTN = 0x0000002b, -SQ_EXPORT_RAT_INST_MAX_INT_RTN = 0x0000002c, -SQ_EXPORT_RAT_INST_MAX_UINT_RTN = 0x0000002d, -SQ_EXPORT_RAT_INST_AND_RTN = 0x0000002e, -SQ_EXPORT_RAT_INST_OR_RTN = 0x0000002f, -SQ_EXPORT_RAT_INST_XOR_RTN = 0x00000030, -SQ_EXPORT_RAT_INST_MSKOR_RTN = 0x00000031, -SQ_EXPORT_RAT_INST_INC_UINT_RTN = 0x00000032, -SQ_EXPORT_RAT_INST_DEC_UINT_RTN = 0x00000033, -} ENUM_SQ_EXPORT_RAT_INST; - -/* - * SQ_IBUF_ST enum - */ - -typedef enum SQ_IBUF_ST { -SQ_IBUF_IB_IDLE = 0x00000000, -SQ_IBUF_IB_INI_WAIT_GNT = 0x00000001, -SQ_IBUF_IB_INI_WAIT_DRET = 0x00000002, -SQ_IBUF_IB_LE_4DW = 0x00000003, -SQ_IBUF_IB_WAIT_DRET = 0x00000004, -SQ_IBUF_IB_EMPTY_WAIT_DRET = 0x00000005, -SQ_IBUF_IB_DRET = 0x00000006, -SQ_IBUF_IB_EMPTY_WAIT_GNT = 0x00000007, -} SQ_IBUF_ST; - -/* - * SQ_INST_STR_ST enum - */ - -typedef enum SQ_INST_STR_ST { -SQ_INST_STR_IB_WAVE_NORML = 0x00000000, -SQ_INST_STR_IB_WAVE2ID_NORMAL_INST_AV = 0x00000001, -SQ_INST_STR_IB_WAVE_INTERNAL_INST_AV = 0x00000002, -SQ_INST_STR_IB_WAVE_INST_SKIP_AV = 0x00000003, -SQ_INST_STR_IB_WAVE_SETVSKIP_ST0 = 0x00000004, -SQ_INST_STR_IB_WAVE_SETVSKIP_ST1 = 0x00000005, -SQ_INST_STR_IB_WAVE_NOP_SLEEP_WAIT = 0x00000006, -SQ_INST_STR_IB_WAVE_PC_FROM_SGPR_MSG_WAIT = 0x00000007, -} SQ_INST_STR_ST; - -/* - * SQ_WAVE_IB_ECC_ST enum - */ - -typedef enum SQ_WAVE_IB_ECC_ST { -SQ_WAVE_IB_ECC_CLEAN = 0x00000000, -SQ_WAVE_IB_ECC_ERR_CONTINUE = 0x00000001, -SQ_WAVE_IB_ECC_ERR_HALT = 0x00000002, -SQ_WAVE_IB_ECC_WITH_ERR_MSG = 0x00000003, -} SQ_WAVE_IB_ECC_ST; - -/* - * SH_MEM_ADDRESS_MODE enum - */ - -typedef enum SH_MEM_ADDRESS_MODE { -SH_MEM_ADDRESS_MODE_64 = 0x00000000, -SH_MEM_ADDRESS_MODE_32 = 0x00000001, -} SH_MEM_ADDRESS_MODE; - -/* - * SH_MEM_ALIGNMENT_MODE enum - */ - -typedef enum SH_MEM_ALIGNMENT_MODE { -SH_MEM_ALIGNMENT_MODE_DWORD = 0x00000000, -SH_MEM_ALIGNMENT_MODE_DWORD_STRICT = 0x00000001, -SH_MEM_ALIGNMENT_MODE_STRICT = 0x00000002, -SH_MEM_ALIGNMENT_MODE_UNALIGNED = 0x00000003, -} SH_MEM_ALIGNMENT_MODE; - -/* - * SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX enum - */ - -typedef enum SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX { -SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX_WREXEC = 0x00000018, -SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX_RESTORE = 0x00000019, -} SQ_THREAD_TRACE_WAVE_START_COUNT_PREFIX; - -/* - * SQ_LB_CTR_SEL_VALUES enum - */ - -typedef enum SQ_LB_CTR_SEL_VALUES { -SQ_LB_CTR_SEL_ALU_CYCLES = 0x00000000, -SQ_LB_CTR_SEL_ALU_STALLS = 0x00000001, -SQ_LB_CTR_SEL_TEX_CYCLES = 0x00000002, -SQ_LB_CTR_SEL_TEX_STALLS = 0x00000003, -SQ_LB_CTR_SEL_SALU_CYCLES = 0x00000004, -SQ_LB_CTR_SEL_SCALAR_STALLS = 0x00000005, -SQ_LB_CTR_SEL_SMEM_CYCLES = 0x00000006, -SQ_LB_CTR_SEL_ICACHE_STALLS = 0x00000007, -SQ_LB_CTR_SEL_DCACHE_STALLS = 0x00000008, -SQ_LB_CTR_SEL_RESERVED0 = 0x00000009, -SQ_LB_CTR_SEL_RESERVED1 = 0x0000000a, -SQ_LB_CTR_SEL_RESERVED2 = 0x0000000b, -SQ_LB_CTR_SEL_RESERVED3 = 0x0000000c, -SQ_LB_CTR_SEL_RESERVED4 = 0x0000000d, -SQ_LB_CTR_SEL_RESERVED5 = 0x0000000e, -SQ_LB_CTR_SEL_RESERVED6 = 0x0000000f, -} SQ_LB_CTR_SEL_VALUES; - -/* - * SQ_WAVE_TYPE value - */ - -#define SQ_WAVE_TYPE_PS0 0x00000000 - -/* - * SQIND_PARTITIONS value - */ - -#define SQIND_GLOBAL_REGS_OFFSET 0x00000000 -#define SQIND_GLOBAL_REGS_SIZE 0x00000008 -#define SQIND_LOCAL_REGS_OFFSET 0x00000008 -#define SQIND_LOCAL_REGS_SIZE 0x00000008 -#define SQIND_WAVE_HWREGS_OFFSET 0x00000010 -#define SQIND_WAVE_HWREGS_SIZE 0x000001f0 -#define SQIND_WAVE_SGPRS_OFFSET 0x00000200 -#define SQIND_WAVE_SGPRS_SIZE 0x00000200 -#define SQIND_WAVE_VGPRS_OFFSET 0x00000400 -#define SQIND_WAVE_VGPRS_SIZE 0x00000100 - -/* - * SQ_GFXDEC value - */ - -#define SQ_GFXDEC_BEGIN 0x0000a000 -#define SQ_GFXDEC_END 0x0000c000 -#define SQ_GFXDEC_STATE_ID_SHIFT 0x0000000a - -/* - * SQDEC value - */ - -#define SQDEC_BEGIN 0x00002300 -#define SQDEC_END 0x000023ff - -/* - * SQPERFSDEC value - */ - -#define SQPERFSDEC_BEGIN 0x0000d9c0 -#define SQPERFSDEC_END 0x0000da40 - -/* - * SQPERFDDEC value - */ - -#define SQPERFDDEC_BEGIN 0x0000d1c0 -#define SQPERFDDEC_END 0x0000d240 - -/* - * SQGFXUDEC value - */ - -#define SQGFXUDEC_BEGIN 0x0000c330 -#define SQGFXUDEC_END 0x0000c380 - -/* - * SQPWRDEC value - */ - -#define SQPWRDEC_BEGIN 0x0000f08c -#define SQPWRDEC_END 0x0000f094 - -/* - * SQ_DISPATCHER value - */ - -#define SQ_DISPATCHER_GFX_MIN 0x00000010 -#define SQ_DISPATCHER_GFX_CNT_PER_RING 0x00000008 - -/* - * SQ_MAX value - */ - -#define SQ_MAX_PGM_SGPRS 0x00000068 -#define SQ_MAX_PGM_VGPRS 0x00000100 - -/* - * SQ_THREAD_TRACE_TIME_UNIT value - */ - -#define SQ_THREAD_TRACE_TIME_UNIT 0x00000004 - -/* - * SQ_EXCP_BITS value - */ - -#define SQ_EX_MODE_EXCP_VALU_BASE 0x00000000 -#define SQ_EX_MODE_EXCP_VALU_SIZE 0x00000007 -#define SQ_EX_MODE_EXCP_INVALID 0x00000000 -#define SQ_EX_MODE_EXCP_INPUT_DENORM 0x00000001 -#define SQ_EX_MODE_EXCP_DIV0 0x00000002 -#define SQ_EX_MODE_EXCP_OVERFLOW 0x00000003 -#define SQ_EX_MODE_EXCP_UNDERFLOW 0x00000004 -#define SQ_EX_MODE_EXCP_INEXACT 0x00000005 -#define SQ_EX_MODE_EXCP_INT_DIV0 0x00000006 -#define SQ_EX_MODE_EXCP_ADDR_WATCH0 0x00000007 -#define SQ_EX_MODE_EXCP_MEM_VIOL 0x00000008 - -/* - * SQ_EXCP_HI_BITS value - */ - -#define SQ_EX_MODE_EXCP_HI_ADDR_WATCH1 0x00000000 -#define SQ_EX_MODE_EXCP_HI_ADDR_WATCH2 0x00000001 -#define SQ_EX_MODE_EXCP_HI_ADDR_WATCH3 0x00000002 - -/* - * HW_INSERTED_INST_ID value - */ - -#define INST_ID_PRIV_START 0x80000000 -#define INST_ID_ECC_INTERRUPT_MSG 0xfffffff0 -#define INST_ID_TTRACE_NEW_PC_MSG 0xfffffff1 -#define INST_ID_HW_TRAP 0xfffffff2 -#define INST_ID_KILL_SEQ 0xfffffff3 -#define INST_ID_SPI_WREXEC 0xfffffff4 -#define INST_ID_HOST_REG_TRAP_MSG 0xfffffffe - -/* - * SIMM16_WAITCNT_PARTITIONS value - */ - -#define SIMM16_WAITCNT_VM_CNT_START 0x00000000 -#define SIMM16_WAITCNT_VM_CNT_SIZE 0x00000004 -#define SIMM16_WAITCNT_EXP_CNT_START 0x00000004 -#define SIMM16_WAITCNT_EXP_CNT_SIZE 0x00000003 -#define SIMM16_WAITCNT_LGKM_CNT_START 0x00000008 -#define SIMM16_WAITCNT_LGKM_CNT_SIZE 0x00000004 -#define SIMM16_WAITCNT_VM_CNT_HI_START 0x0000000e -#define SIMM16_WAITCNT_VM_CNT_HI_SIZE 0x00000002 - -/* - * SQ_EDC_FUE_CNTL_BITS value - */ - -#define SQ_EDC_FUE_CNTL_SQ 0x00000000 -#define SQ_EDC_FUE_CNTL_LDS 0x00000001 -#define SQ_EDC_FUE_CNTL_SIMD0 0x00000002 -#define SQ_EDC_FUE_CNTL_SIMD1 0x00000003 -#define SQ_EDC_FUE_CNTL_SIMD2 0x00000004 -#define SQ_EDC_FUE_CNTL_SIMD3 0x00000005 -#define SQ_EDC_FUE_CNTL_TA 0x00000006 -#define SQ_EDC_FUE_CNTL_TD 0x00000007 -#define SQ_EDC_FUE_CNTL_TCP 0x00000008 - -/******************************************************* - * COMP Enums - *******************************************************/ - -/* - * CSDATA_TYPE enum - */ - -typedef enum CSDATA_TYPE { -CSDATA_TYPE_TG = 0x00000000, -CSDATA_TYPE_STATE = 0x00000001, -CSDATA_TYPE_EVENT = 0x00000002, -CSDATA_TYPE_PRIVATE = 0x00000003, -} CSDATA_TYPE; - -/* - * CSDATA_TYPE_WIDTH value - */ - -#define CSDATA_TYPE_WIDTH 0x00000002 - -/* - * CSDATA_ADDR_WIDTH value - */ - -#define CSDATA_ADDR_WIDTH 0x00000007 - -/* - * CSDATA_DATA_WIDTH value - */ - -#define CSDATA_DATA_WIDTH 0x00000020 - -/******************************************************* - * VGT Enums - *******************************************************/ - -/* - * VGT_OUT_PRIM_TYPE enum - */ - -typedef enum VGT_OUT_PRIM_TYPE { -VGT_OUT_POINT = 0x00000000, -VGT_OUT_LINE = 0x00000001, -VGT_OUT_TRI = 0x00000002, -VGT_OUT_RECT_V0 = 0x00000003, -VGT_OUT_RECT_V1 = 0x00000004, -VGT_OUT_RECT_V2 = 0x00000005, -VGT_OUT_RECT_V3 = 0x00000006, -VGT_OUT_2D_RECT = 0x00000007, -VGT_TE_QUAD = 0x00000008, -VGT_TE_PRIM_INDEX_LINE = 0x00000009, -VGT_TE_PRIM_INDEX_TRI = 0x0000000a, -VGT_TE_PRIM_INDEX_QUAD = 0x0000000b, -VGT_OUT_LINE_ADJ = 0x0000000c, -VGT_OUT_TRI_ADJ = 0x0000000d, -VGT_OUT_PATCH = 0x0000000e, -} VGT_OUT_PRIM_TYPE; - -/* - * VGT_DI_PRIM_TYPE enum - */ - -typedef enum VGT_DI_PRIM_TYPE { -DI_PT_NONE = 0x00000000, -DI_PT_POINTLIST = 0x00000001, -DI_PT_LINELIST = 0x00000002, -DI_PT_LINESTRIP = 0x00000003, -DI_PT_TRILIST = 0x00000004, -DI_PT_TRIFAN = 0x00000005, -DI_PT_TRISTRIP = 0x00000006, -DI_PT_2D_RECTANGLE = 0x00000007, -DI_PT_UNUSED_1 = 0x00000008, -DI_PT_PATCH = 0x00000009, -DI_PT_LINELIST_ADJ = 0x0000000a, -DI_PT_LINESTRIP_ADJ = 0x0000000b, -DI_PT_TRILIST_ADJ = 0x0000000c, -DI_PT_TRISTRIP_ADJ = 0x0000000d, -DI_PT_UNUSED_3 = 0x0000000e, -DI_PT_UNUSED_4 = 0x0000000f, -DI_PT_TRI_WITH_WFLAGS = 0x00000010, -DI_PT_RECTLIST = 0x00000011, -DI_PT_LINELOOP = 0x00000012, -DI_PT_QUADLIST = 0x00000013, -DI_PT_QUADSTRIP = 0x00000014, -DI_PT_POLYGON = 0x00000015, -} VGT_DI_PRIM_TYPE; - -/* - * VGT_DI_SOURCE_SELECT enum - */ - -typedef enum VGT_DI_SOURCE_SELECT { -DI_SRC_SEL_DMA = 0x00000000, -DI_SRC_SEL_IMMEDIATE = 0x00000001, -DI_SRC_SEL_AUTO_INDEX = 0x00000002, -DI_SRC_SEL_RESERVED = 0x00000003, -} VGT_DI_SOURCE_SELECT; - -/* - * VGT_DI_MAJOR_MODE_SELECT enum - */ - -typedef enum VGT_DI_MAJOR_MODE_SELECT { -DI_MAJOR_MODE_0 = 0x00000000, -DI_MAJOR_MODE_1 = 0x00000001, -} VGT_DI_MAJOR_MODE_SELECT; - -/* - * VGT_DI_INDEX_SIZE enum - */ - -typedef enum VGT_DI_INDEX_SIZE { -DI_INDEX_SIZE_16_BIT = 0x00000000, -DI_INDEX_SIZE_32_BIT = 0x00000001, -DI_INDEX_SIZE_8_BIT = 0x00000002, -} VGT_DI_INDEX_SIZE; - -/* - * VGT_EVENT_TYPE enum - */ - -typedef enum VGT_EVENT_TYPE { -Reserved_0x00 = 0x00000000, -SAMPLE_STREAMOUTSTATS1 = 0x00000001, -SAMPLE_STREAMOUTSTATS2 = 0x00000002, -SAMPLE_STREAMOUTSTATS3 = 0x00000003, -CACHE_FLUSH_TS = 0x00000004, -CONTEXT_DONE = 0x00000005, -CACHE_FLUSH = 0x00000006, -CS_PARTIAL_FLUSH = 0x00000007, -VGT_STREAMOUT_SYNC = 0x00000008, -Reserved_0x09 = 0x00000009, -VGT_STREAMOUT_RESET = 0x0000000a, -END_OF_PIPE_INCR_DE = 0x0000000b, -END_OF_PIPE_IB_END = 0x0000000c, -RST_PIX_CNT = 0x0000000d, -BREAK_BATCH = 0x0000000e, -VS_PARTIAL_FLUSH = 0x0000000f, -PS_PARTIAL_FLUSH = 0x00000010, -FLUSH_HS_OUTPUT = 0x00000011, -FLUSH_DFSM = 0x00000012, -RESET_TO_LOWEST_VGT = 0x00000013, -CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014, -ZPASS_DONE = 0x00000015, -CACHE_FLUSH_AND_INV_EVENT = 0x00000016, -PERFCOUNTER_START = 0x00000017, -PERFCOUNTER_STOP = 0x00000018, -PIPELINESTAT_START = 0x00000019, -PIPELINESTAT_STOP = 0x0000001a, -PERFCOUNTER_SAMPLE = 0x0000001b, -Available_0x1c = 0x0000001c, -Available_0x1d = 0x0000001d, -SAMPLE_PIPELINESTAT = 0x0000001e, -SO_VGTSTREAMOUT_FLUSH = 0x0000001f, -SAMPLE_STREAMOUTSTATS = 0x00000020, -RESET_VTX_CNT = 0x00000021, -BLOCK_CONTEXT_DONE = 0x00000022, -CS_CONTEXT_DONE = 0x00000023, -VGT_FLUSH = 0x00000024, -TGID_ROLLOVER = 0x00000025, -SQ_NON_EVENT = 0x00000026, -SC_SEND_DB_VPZ = 0x00000027, -BOTTOM_OF_PIPE_TS = 0x00000028, -FLUSH_SX_TS = 0x00000029, -DB_CACHE_FLUSH_AND_INV = 0x0000002a, -FLUSH_AND_INV_DB_DATA_TS = 0x0000002b, -FLUSH_AND_INV_DB_META = 0x0000002c, -FLUSH_AND_INV_CB_DATA_TS = 0x0000002d, -FLUSH_AND_INV_CB_META = 0x0000002e, -CS_DONE = 0x0000002f, -PS_DONE = 0x00000030, -FLUSH_AND_INV_CB_PIXEL_DATA = 0x00000031, -SX_CB_RAT_ACK_REQUEST = 0x00000032, -THREAD_TRACE_START = 0x00000033, -THREAD_TRACE_STOP = 0x00000034, -THREAD_TRACE_MARKER = 0x00000035, -THREAD_TRACE_FLUSH = 0x00000036, -THREAD_TRACE_FINISH = 0x00000037, -PIXEL_PIPE_STAT_CONTROL = 0x00000038, -PIXEL_PIPE_STAT_DUMP = 0x00000039, -PIXEL_PIPE_STAT_RESET = 0x0000003a, -CONTEXT_SUSPEND = 0x0000003b, -OFFCHIP_HS_DEALLOC = 0x0000003c, -ENABLE_NGG_PIPELINE = 0x0000003d, -ENABLE_LEGACY_PIPELINE = 0x0000003e, -Reserved_0x3f = 0x0000003f, -} VGT_EVENT_TYPE; - -/* - * VGT_DMA_SWAP_MODE enum - */ - -typedef enum VGT_DMA_SWAP_MODE { -VGT_DMA_SWAP_NONE = 0x00000000, -VGT_DMA_SWAP_16_BIT = 0x00000001, -VGT_DMA_SWAP_32_BIT = 0x00000002, -VGT_DMA_SWAP_WORD = 0x00000003, -} VGT_DMA_SWAP_MODE; - -/* - * VGT_INDEX_TYPE_MODE enum - */ - -typedef enum VGT_INDEX_TYPE_MODE { -VGT_INDEX_16 = 0x00000000, -VGT_INDEX_32 = 0x00000001, -VGT_INDEX_8 = 0x00000002, -} VGT_INDEX_TYPE_MODE; - -/* - * VGT_DMA_BUF_TYPE enum - */ - -typedef enum VGT_DMA_BUF_TYPE { -VGT_DMA_BUF_MEM = 0x00000000, -VGT_DMA_BUF_RING = 0x00000001, -VGT_DMA_BUF_SETUP = 0x00000002, -VGT_DMA_PTR_UPDATE = 0x00000003, -} VGT_DMA_BUF_TYPE; - -/* - * VGT_OUTPATH_SELECT enum - */ - -typedef enum VGT_OUTPATH_SELECT { -VGT_OUTPATH_VTX_REUSE = 0x00000000, -VGT_OUTPATH_TESS_EN = 0x00000001, -VGT_OUTPATH_PASSTHRU = 0x00000002, -VGT_OUTPATH_GS_BLOCK = 0x00000003, -VGT_OUTPATH_HS_BLOCK = 0x00000004, -VGT_OUTPATH_PRIM_GEN = 0x00000005, -} VGT_OUTPATH_SELECT; - -/* - * VGT_GRP_PRIM_TYPE enum - */ - -typedef enum VGT_GRP_PRIM_TYPE { -VGT_GRP_3D_POINT = 0x00000000, -VGT_GRP_3D_LINE = 0x00000001, -VGT_GRP_3D_TRI = 0x00000002, -VGT_GRP_3D_RECT = 0x00000003, -VGT_GRP_3D_QUAD = 0x00000004, -VGT_GRP_2D_COPY_RECT_V0 = 0x00000005, -VGT_GRP_2D_COPY_RECT_V1 = 0x00000006, -VGT_GRP_2D_COPY_RECT_V2 = 0x00000007, -VGT_GRP_2D_COPY_RECT_V3 = 0x00000008, -VGT_GRP_2D_FILL_RECT = 0x00000009, -VGT_GRP_2D_LINE = 0x0000000a, -VGT_GRP_2D_TRI = 0x0000000b, -VGT_GRP_PRIM_INDEX_LINE = 0x0000000c, -VGT_GRP_PRIM_INDEX_TRI = 0x0000000d, -VGT_GRP_PRIM_INDEX_QUAD = 0x0000000e, -VGT_GRP_3D_LINE_ADJ = 0x0000000f, -VGT_GRP_3D_TRI_ADJ = 0x00000010, -VGT_GRP_3D_PATCH = 0x00000011, -VGT_GRP_2D_RECT = 0x00000012, -} VGT_GRP_PRIM_TYPE; - -/* - * VGT_GRP_PRIM_ORDER enum - */ - -typedef enum VGT_GRP_PRIM_ORDER { -VGT_GRP_LIST = 0x00000000, -VGT_GRP_STRIP = 0x00000001, -VGT_GRP_FAN = 0x00000002, -VGT_GRP_LOOP = 0x00000003, -VGT_GRP_POLYGON = 0x00000004, -} VGT_GRP_PRIM_ORDER; - -/* - * VGT_GROUP_CONV_SEL enum - */ - -typedef enum VGT_GROUP_CONV_SEL { -VGT_GRP_INDEX_16 = 0x00000000, -VGT_GRP_INDEX_32 = 0x00000001, -VGT_GRP_UINT_16 = 0x00000002, -VGT_GRP_UINT_32 = 0x00000003, -VGT_GRP_SINT_16 = 0x00000004, -VGT_GRP_SINT_32 = 0x00000005, -VGT_GRP_FLOAT_32 = 0x00000006, -VGT_GRP_AUTO_PRIM = 0x00000007, -VGT_GRP_FIX_1_23_TO_FLOAT = 0x00000008, -} VGT_GROUP_CONV_SEL; - -/* - * VGT_GS_MODE_TYPE enum - */ - -typedef enum VGT_GS_MODE_TYPE { -GS_OFF = 0x00000000, -GS_SCENARIO_A = 0x00000001, -GS_SCENARIO_B = 0x00000002, -GS_SCENARIO_G = 0x00000003, -GS_SCENARIO_C = 0x00000004, -SPRITE_EN = 0x00000005, -} VGT_GS_MODE_TYPE; - -/* - * VGT_GS_CUT_MODE enum - */ - -typedef enum VGT_GS_CUT_MODE { -GS_CUT_1024 = 0x00000000, -GS_CUT_512 = 0x00000001, -GS_CUT_256 = 0x00000002, -GS_CUT_128 = 0x00000003, -} VGT_GS_CUT_MODE; - -/* - * VGT_GS_OUTPRIM_TYPE enum - */ - -typedef enum VGT_GS_OUTPRIM_TYPE { -POINTLIST = 0x00000000, -LINESTRIP = 0x00000001, -TRISTRIP = 0x00000002, -RECTLIST = 0x00000003, -} VGT_GS_OUTPRIM_TYPE; - -/* - * VGT_CACHE_INVALID_MODE enum - */ - -typedef enum VGT_CACHE_INVALID_MODE { -VC_ONLY = 0x00000000, -TC_ONLY = 0x00000001, -VC_AND_TC = 0x00000002, -} VGT_CACHE_INVALID_MODE; - -/* - * VGT_TESS_TYPE enum - */ - -typedef enum VGT_TESS_TYPE { -TESS_ISOLINE = 0x00000000, -TESS_TRIANGLE = 0x00000001, -TESS_QUAD = 0x00000002, -} VGT_TESS_TYPE; - -/* - * VGT_TESS_PARTITION enum - */ - -typedef enum VGT_TESS_PARTITION { -PART_INTEGER = 0x00000000, -PART_POW2 = 0x00000001, -PART_FRAC_ODD = 0x00000002, -PART_FRAC_EVEN = 0x00000003, -} VGT_TESS_PARTITION; - -/* - * VGT_TESS_TOPOLOGY enum - */ - -typedef enum VGT_TESS_TOPOLOGY { -OUTPUT_POINT = 0x00000000, -OUTPUT_LINE = 0x00000001, -OUTPUT_TRIANGLE_CW = 0x00000002, -OUTPUT_TRIANGLE_CCW = 0x00000003, -} VGT_TESS_TOPOLOGY; - -/* - * VGT_RDREQ_POLICY enum - */ - -typedef enum VGT_RDREQ_POLICY { -VGT_POLICY_LRU = 0x00000000, -VGT_POLICY_STREAM = 0x00000001, -} VGT_RDREQ_POLICY; - -/* - * VGT_DIST_MODE enum - */ - -typedef enum VGT_DIST_MODE { -NO_DIST = 0x00000000, -PATCHES = 0x00000001, -DONUTS = 0x00000002, -TRAPEZOIDS = 0x00000003, -} VGT_DIST_MODE; - -/* - * VGT_STAGES_LS_EN enum - */ - -typedef enum VGT_STAGES_LS_EN { -LS_STAGE_OFF = 0x00000000, -LS_STAGE_ON = 0x00000001, -CS_STAGE_ON = 0x00000002, -RESERVED_LS = 0x00000003, -} VGT_STAGES_LS_EN; - -/* - * VGT_STAGES_HS_EN enum - */ - -typedef enum VGT_STAGES_HS_EN { -HS_STAGE_OFF = 0x00000000, -HS_STAGE_ON = 0x00000001, -} VGT_STAGES_HS_EN; - -/* - * VGT_STAGES_ES_EN enum - */ - -typedef enum VGT_STAGES_ES_EN { -ES_STAGE_OFF = 0x00000000, -ES_STAGE_DS = 0x00000001, -ES_STAGE_REAL = 0x00000002, -RESERVED_ES = 0x00000003, -} VGT_STAGES_ES_EN; - -/* - * VGT_STAGES_GS_EN enum - */ - -typedef enum VGT_STAGES_GS_EN { -GS_STAGE_OFF = 0x00000000, -GS_STAGE_ON = 0x00000001, -} VGT_STAGES_GS_EN; - -/* - * VGT_STAGES_VS_EN enum - */ - -typedef enum VGT_STAGES_VS_EN { -VS_STAGE_REAL = 0x00000000, -VS_STAGE_DS = 0x00000001, -VS_STAGE_COPY_SHADER = 0x00000002, -RESERVED_VS = 0x00000003, -} VGT_STAGES_VS_EN; - -/* - * VGT_PERFCOUNT_SELECT enum - */ - -typedef enum VGT_PERFCOUNT_SELECT { -vgt_perf_VGT_SPI_ESTHREAD_EVENT_WINDOW_ACTIVE = 0x00000000, -vgt_perf_VGT_SPI_ESVERT_VALID = 0x00000001, -vgt_perf_VGT_SPI_ESVERT_EOV = 0x00000002, -vgt_perf_VGT_SPI_ESVERT_STALLED = 0x00000003, -vgt_perf_VGT_SPI_ESVERT_STARVED_BUSY = 0x00000004, -vgt_perf_VGT_SPI_ESVERT_STARVED_IDLE = 0x00000005, -vgt_perf_VGT_SPI_ESVERT_STATIC = 0x00000006, -vgt_perf_VGT_SPI_ESTHREAD_IS_EVENT = 0x00000007, -vgt_perf_VGT_SPI_ESTHREAD_SEND = 0x00000008, -vgt_perf_VGT_SPI_GSPRIM_VALID = 0x00000009, -vgt_perf_VGT_SPI_GSPRIM_EOV = 0x0000000a, -vgt_perf_VGT_SPI_GSPRIM_CONT = 0x0000000b, -vgt_perf_VGT_SPI_GSPRIM_STALLED = 0x0000000c, -vgt_perf_VGT_SPI_GSPRIM_STARVED_BUSY = 0x0000000d, -vgt_perf_VGT_SPI_GSPRIM_STARVED_IDLE = 0x0000000e, -vgt_perf_VGT_SPI_GSPRIM_STATIC = 0x0000000f, -vgt_perf_VGT_SPI_GSTHREAD_EVENT_WINDOW_ACTIVE = 0x00000010, -vgt_perf_VGT_SPI_GSTHREAD_IS_EVENT = 0x00000011, -vgt_perf_VGT_SPI_GSTHREAD_SEND = 0x00000012, -vgt_perf_VGT_SPI_VSTHREAD_EVENT_WINDOW_ACTIVE = 0x00000013, -vgt_perf_VGT_SPI_VSVERT_SEND = 0x00000014, -vgt_perf_VGT_SPI_VSVERT_EOV = 0x00000015, -vgt_perf_VGT_SPI_VSVERT_STALLED = 0x00000016, -vgt_perf_VGT_SPI_VSVERT_STARVED_BUSY = 0x00000017, -vgt_perf_VGT_SPI_VSVERT_STARVED_IDLE = 0x00000018, -vgt_perf_VGT_SPI_VSVERT_STATIC = 0x00000019, -vgt_perf_VGT_SPI_VSTHREAD_IS_EVENT = 0x0000001a, -vgt_perf_VGT_SPI_VSTHREAD_SEND = 0x0000001b, -vgt_perf_VGT_PA_EVENT_WINDOW_ACTIVE = 0x0000001c, -vgt_perf_VGT_PA_CLIPV_SEND = 0x0000001d, -vgt_perf_VGT_PA_CLIPV_FIRSTVERT = 0x0000001e, -vgt_perf_VGT_PA_CLIPV_STALLED = 0x0000001f, -vgt_perf_VGT_PA_CLIPV_STARVED_BUSY = 0x00000020, -vgt_perf_VGT_PA_CLIPV_STARVED_IDLE = 0x00000021, -vgt_perf_VGT_PA_CLIPV_STATIC = 0x00000022, -vgt_perf_VGT_PA_CLIPP_SEND = 0x00000023, -vgt_perf_VGT_PA_CLIPP_EOP = 0x00000024, -vgt_perf_VGT_PA_CLIPP_IS_EVENT = 0x00000025, -vgt_perf_VGT_PA_CLIPP_NULL_PRIM = 0x00000026, -vgt_perf_VGT_PA_CLIPP_NEW_VTX_VECT = 0x00000027, -vgt_perf_VGT_PA_CLIPP_STALLED = 0x00000028, -vgt_perf_VGT_PA_CLIPP_STARVED_BUSY = 0x00000029, -vgt_perf_VGT_PA_CLIPP_STARVED_IDLE = 0x0000002a, -vgt_perf_VGT_PA_CLIPP_STATIC = 0x0000002b, -vgt_perf_VGT_PA_CLIPS_SEND = 0x0000002c, -vgt_perf_VGT_PA_CLIPS_STALLED = 0x0000002d, -vgt_perf_VGT_PA_CLIPS_STARVED_BUSY = 0x0000002e, -vgt_perf_VGT_PA_CLIPS_STARVED_IDLE = 0x0000002f, -vgt_perf_VGT_PA_CLIPS_STATIC = 0x00000030, -vgt_perf_vsvert_ds_send = 0x00000031, -vgt_perf_vsvert_api_send = 0x00000032, -vgt_perf_hs_tif_stall = 0x00000033, -vgt_perf_hs_input_stall = 0x00000034, -vgt_perf_hs_interface_stall = 0x00000035, -vgt_perf_hs_tfm_stall = 0x00000036, -vgt_perf_te11_starved = 0x00000037, -vgt_perf_gs_event_stall = 0x00000038, -vgt_perf_vgt_pa_clipp_send_not_event = 0x00000039, -vgt_perf_vgt_pa_clipp_valid_prim = 0x0000003a, -vgt_perf_reused_es_indices = 0x0000003b, -vgt_perf_vs_cache_hits = 0x0000003c, -vgt_perf_gs_cache_hits = 0x0000003d, -vgt_perf_ds_cache_hits = 0x0000003e, -vgt_perf_total_cache_hits = 0x0000003f, -vgt_perf_vgt_busy = 0x00000040, -vgt_perf_vgt_gs_busy = 0x00000041, -vgt_perf_esvert_stalled_es_tbl = 0x00000042, -vgt_perf_esvert_stalled_gs_tbl = 0x00000043, -vgt_perf_esvert_stalled_gs_event = 0x00000044, -vgt_perf_esvert_stalled_gsprim = 0x00000045, -vgt_perf_gsprim_stalled_es_tbl = 0x00000046, -vgt_perf_gsprim_stalled_gs_tbl = 0x00000047, -vgt_perf_gsprim_stalled_gs_event = 0x00000048, -vgt_perf_gsprim_stalled_esvert = 0x00000049, -vgt_perf_esthread_stalled_es_rb_full = 0x0000004a, -vgt_perf_esthread_stalled_spi_bp = 0x0000004b, -vgt_perf_counters_avail_stalled = 0x0000004c, -vgt_perf_gs_rb_space_avail_stalled = 0x0000004d, -vgt_perf_gs_issue_rtr_stalled = 0x0000004e, -vgt_perf_gsthread_stalled = 0x0000004f, -vgt_perf_strmout_stalled = 0x00000050, -vgt_perf_wait_for_es_done_stalled = 0x00000051, -vgt_perf_cm_stalled_by_gog = 0x00000052, -vgt_perf_cm_reading_stalled = 0x00000053, -vgt_perf_cm_stalled_by_gsfetch_done = 0x00000054, -vgt_perf_gog_vs_tbl_stalled = 0x00000055, -vgt_perf_gog_out_indx_stalled = 0x00000056, -vgt_perf_gog_out_prim_stalled = 0x00000057, -vgt_perf_waveid_stalled = 0x00000058, -vgt_perf_gog_busy = 0x00000059, -vgt_perf_reused_vs_indices = 0x0000005a, -vgt_perf_sclk_reg_vld_event = 0x0000005b, -vgt_perf_vs_conflicting_indices = 0x0000005c, -vgt_perf_sclk_core_vld_event = 0x0000005d, -vgt_perf_hswave_stalled = 0x0000005e, -vgt_perf_sclk_gs_vld_event = 0x0000005f, -vgt_perf_VGT_SPI_LSVERT_VALID = 0x00000060, -vgt_perf_VGT_SPI_LSVERT_EOV = 0x00000061, -vgt_perf_VGT_SPI_LSVERT_STALLED = 0x00000062, -vgt_perf_VGT_SPI_LSVERT_STARVED_BUSY = 0x00000063, -vgt_perf_VGT_SPI_LSVERT_STARVED_IDLE = 0x00000064, -vgt_perf_VGT_SPI_LSVERT_STATIC = 0x00000065, -vgt_perf_VGT_SPI_LSWAVE_EVENT_WINDOW_ACTIVE = 0x00000066, -vgt_perf_VGT_SPI_LSWAVE_IS_EVENT = 0x00000067, -vgt_perf_VGT_SPI_LSWAVE_SEND = 0x00000068, -vgt_perf_VGT_SPI_HSVERT_VALID = 0x00000069, -vgt_perf_VGT_SPI_HSVERT_EOV = 0x0000006a, -vgt_perf_VGT_SPI_HSVERT_STALLED = 0x0000006b, -vgt_perf_VGT_SPI_HSVERT_STARVED_BUSY = 0x0000006c, -vgt_perf_VGT_SPI_HSVERT_STARVED_IDLE = 0x0000006d, -vgt_perf_VGT_SPI_HSVERT_STATIC = 0x0000006e, -vgt_perf_VGT_SPI_HSWAVE_EVENT_WINDOW_ACTIVE = 0x0000006f, -vgt_perf_VGT_SPI_HSWAVE_IS_EVENT = 0x00000070, -vgt_perf_VGT_SPI_HSWAVE_SEND = 0x00000071, -vgt_perf_ds_prims = 0x00000072, -vgt_perf_ds_RESERVED = 0x00000073, -vgt_perf_ls_thread_groups = 0x00000074, -vgt_perf_hs_thread_groups = 0x00000075, -vgt_perf_es_thread_groups = 0x00000076, -vgt_perf_vs_thread_groups = 0x00000077, -vgt_perf_ls_done_latency = 0x00000078, -vgt_perf_hs_done_latency = 0x00000079, -vgt_perf_es_done_latency = 0x0000007a, -vgt_perf_gs_done_latency = 0x0000007b, -vgt_perf_vgt_hs_busy = 0x0000007c, -vgt_perf_vgt_te11_busy = 0x0000007d, -vgt_perf_ls_flush = 0x0000007e, -vgt_perf_hs_flush = 0x0000007f, -vgt_perf_es_flush = 0x00000080, -vgt_perf_vgt_pa_clipp_eopg = 0x00000081, -vgt_perf_ls_done = 0x00000082, -vgt_perf_hs_done = 0x00000083, -vgt_perf_es_done = 0x00000084, -vgt_perf_gs_done = 0x00000085, -vgt_perf_vsfetch_done = 0x00000086, -vgt_perf_gs_done_received = 0x00000087, -vgt_perf_es_ring_high_water_mark = 0x00000088, -vgt_perf_gs_ring_high_water_mark = 0x00000089, -vgt_perf_vs_table_high_water_mark = 0x0000008a, -vgt_perf_hs_tgs_active_high_water_mark = 0x0000008b, -vgt_perf_pa_clipp_dealloc = 0x0000008c, -vgt_perf_cut_mem_flush_stalled = 0x0000008d, -vgt_perf_vsvert_work_received = 0x0000008e, -vgt_perf_vgt_pa_clipp_starved_after_work = 0x0000008f, -vgt_perf_te11_con_starved_after_work = 0x00000090, -vgt_perf_hs_waiting_on_ls_done_stall = 0x00000091, -vgt_spi_vsvert_valid = 0x00000092, -} VGT_PERFCOUNT_SELECT; - -/* - * IA_PERFCOUNT_SELECT enum - */ - -typedef enum IA_PERFCOUNT_SELECT { -ia_perf_GRP_INPUT_EVENT_WINDOW_ACTIVE = 0x00000000, -ia_perf_dma_data_fifo_full = 0x00000001, -ia_perf_RESERVED1 = 0x00000002, -ia_perf_RESERVED2 = 0x00000003, -ia_perf_RESERVED3 = 0x00000004, -ia_perf_RESERVED4 = 0x00000005, -ia_perf_RESERVED5 = 0x00000006, -ia_perf_MC_LAT_BIN_0 = 0x00000007, -ia_perf_MC_LAT_BIN_1 = 0x00000008, -ia_perf_MC_LAT_BIN_2 = 0x00000009, -ia_perf_MC_LAT_BIN_3 = 0x0000000a, -ia_perf_MC_LAT_BIN_4 = 0x0000000b, -ia_perf_MC_LAT_BIN_5 = 0x0000000c, -ia_perf_MC_LAT_BIN_6 = 0x0000000d, -ia_perf_MC_LAT_BIN_7 = 0x0000000e, -ia_perf_ia_busy = 0x0000000f, -ia_perf_ia_sclk_reg_vld_event = 0x00000010, -ia_perf_RESERVED6 = 0x00000011, -ia_perf_ia_sclk_core_vld_event = 0x00000012, -ia_perf_RESERVED7 = 0x00000013, -ia_perf_ia_dma_return = 0x00000014, -ia_perf_ia_stalled = 0x00000015, -ia_perf_shift_starved_pipe0_event = 0x00000016, -ia_perf_shift_starved_pipe1_event = 0x00000017, -} IA_PERFCOUNT_SELECT; - -/* - * WD_PERFCOUNT_SELECT enum - */ - -typedef enum WD_PERFCOUNT_SELECT { -wd_perf_RBIU_FIFOS_EVENT_WINDOW_ACTIVE = 0x00000000, -wd_perf_RBIU_DR_FIFO_STARVED = 0x00000001, -wd_perf_RBIU_DR_FIFO_STALLED = 0x00000002, -wd_perf_RBIU_DI_FIFO_STARVED = 0x00000003, -wd_perf_RBIU_DI_FIFO_STALLED = 0x00000004, -wd_perf_wd_busy = 0x00000005, -wd_perf_wd_sclk_reg_vld_event = 0x00000006, -wd_perf_wd_sclk_input_vld_event = 0x00000007, -wd_perf_wd_sclk_core_vld_event = 0x00000008, -wd_perf_wd_stalled = 0x00000009, -wd_perf_inside_tf_bin_0 = 0x0000000a, -wd_perf_inside_tf_bin_1 = 0x0000000b, -wd_perf_inside_tf_bin_2 = 0x0000000c, -wd_perf_inside_tf_bin_3 = 0x0000000d, -wd_perf_inside_tf_bin_4 = 0x0000000e, -wd_perf_inside_tf_bin_5 = 0x0000000f, -wd_perf_inside_tf_bin_6 = 0x00000010, -wd_perf_inside_tf_bin_7 = 0x00000011, -wd_perf_inside_tf_bin_8 = 0x00000012, -wd_perf_tfreq_lat_bin_0 = 0x00000013, -wd_perf_tfreq_lat_bin_1 = 0x00000014, -wd_perf_tfreq_lat_bin_2 = 0x00000015, -wd_perf_tfreq_lat_bin_3 = 0x00000016, -wd_perf_tfreq_lat_bin_4 = 0x00000017, -wd_perf_tfreq_lat_bin_5 = 0x00000018, -wd_perf_tfreq_lat_bin_6 = 0x00000019, -wd_perf_tfreq_lat_bin_7 = 0x0000001a, -wd_starved_on_hs_done = 0x0000001b, -wd_perf_se0_hs_done_latency = 0x0000001c, -wd_perf_se1_hs_done_latency = 0x0000001d, -wd_perf_se2_hs_done_latency = 0x0000001e, -wd_perf_se3_hs_done_latency = 0x0000001f, -wd_perf_hs_done_se0 = 0x00000020, -wd_perf_hs_done_se1 = 0x00000021, -wd_perf_hs_done_se2 = 0x00000022, -wd_perf_hs_done_se3 = 0x00000023, -wd_perf_null_patches = 0x00000024, -} WD_PERFCOUNT_SELECT; - -/* - * WD_IA_DRAW_TYPE enum - */ - -typedef enum WD_IA_DRAW_TYPE { -WD_IA_DRAW_TYPE_DI_MM0 = 0x00000000, -WD_IA_DRAW_TYPE_REG_XFER = 0x00000001, -WD_IA_DRAW_TYPE_EVENT_INIT = 0x00000002, -WD_IA_DRAW_TYPE_EVENT_ADDR = 0x00000003, -WD_IA_DRAW_TYPE_MIN_INDX = 0x00000004, -WD_IA_DRAW_TYPE_MAX_INDX = 0x00000005, -WD_IA_DRAW_TYPE_INDX_OFF = 0x00000006, -WD_IA_DRAW_TYPE_IMM_DATA = 0x00000007, -} WD_IA_DRAW_TYPE; - -/* - * WD_IA_DRAW_REG_XFER enum - */ - -typedef enum WD_IA_DRAW_REG_XFER { -WD_IA_DRAW_REG_XFER_IA_MULTI_VGT_PARAM = 0x00000000, -WD_IA_DRAW_REG_XFER_VGT_MULTI_PRIM_IB_RESET_EN = 0x00000001, -} WD_IA_DRAW_REG_XFER; - -/* - * WD_IA_DRAW_SOURCE enum - */ - -typedef enum WD_IA_DRAW_SOURCE { -WD_IA_DRAW_SOURCE_DMA = 0x00000000, -WD_IA_DRAW_SOURCE_IMMD = 0x00000001, -WD_IA_DRAW_SOURCE_AUTO = 0x00000002, -WD_IA_DRAW_SOURCE_OPAQ = 0x00000003, -} WD_IA_DRAW_SOURCE; - -/* - * GS_THREADID_SIZE value - */ - -#define GSTHREADID_SIZE 0x00000002 - -/******************************************************* - * GB Enums - *******************************************************/ - -/* - * GB_EDC_DED_MODE enum - */ - -typedef enum GB_EDC_DED_MODE { -GB_EDC_DED_MODE_LOG = 0x00000000, -GB_EDC_DED_MODE_HALT = 0x00000001, -GB_EDC_DED_MODE_INT_HALT = 0x00000002, -} GB_EDC_DED_MODE; - -/* - * VALUE_GB_TILING_CONFIG_TABLE_SIZE value - */ - -#define GB_TILING_CONFIG_TABLE_SIZE 0x00000020 - -/* - * VALUE_GB_TILING_CONFIG_MACROTABLE_SIZE value - */ - -#define GB_TILING_CONFIG_MACROTABLE_SIZE 0x00000010 - -/******************************************************* - * TP Enums - *******************************************************/ - -/* - * TA_TC_ADDR_MODES enum - */ - -typedef enum TA_TC_ADDR_MODES { -TA_TC_ADDR_MODE_DEFAULT = 0x00000000, -TA_TC_ADDR_MODE_COMP0 = 0x00000001, -TA_TC_ADDR_MODE_COMP1 = 0x00000002, -TA_TC_ADDR_MODE_COMP2 = 0x00000003, -TA_TC_ADDR_MODE_COMP3 = 0x00000004, -TA_TC_ADDR_MODE_UNALIGNED = 0x00000005, -TA_TC_ADDR_MODE_BORDER_COLOR = 0x00000006, -} TA_TC_ADDR_MODES; - -/* - * TA_PERFCOUNT_SEL enum - */ - -typedef enum TA_PERFCOUNT_SEL { -TA_PERF_SEL_NULL = 0x00000000, -TA_PERF_SEL_sh_fifo_busy = 0x00000001, -TA_PERF_SEL_sh_fifo_cmd_busy = 0x00000002, -TA_PERF_SEL_sh_fifo_addr_busy = 0x00000003, -TA_PERF_SEL_sh_fifo_data_busy = 0x00000004, -TA_PERF_SEL_sh_fifo_data_sfifo_busy = 0x00000005, -TA_PERF_SEL_sh_fifo_data_tfifo_busy = 0x00000006, -TA_PERF_SEL_gradient_busy = 0x00000007, -TA_PERF_SEL_gradient_fifo_busy = 0x00000008, -TA_PERF_SEL_lod_busy = 0x00000009, -TA_PERF_SEL_lod_fifo_busy = 0x0000000a, -TA_PERF_SEL_addresser_busy = 0x0000000b, -TA_PERF_SEL_addresser_fifo_busy = 0x0000000c, -TA_PERF_SEL_aligner_busy = 0x0000000d, -TA_PERF_SEL_write_path_busy = 0x0000000e, -TA_PERF_SEL_ta_busy = 0x0000000f, -TA_PERF_SEL_sq_ta_cmd_cycles = 0x00000010, -TA_PERF_SEL_sp_ta_addr_cycles = 0x00000011, -TA_PERF_SEL_sp_ta_data_cycles = 0x00000012, -TA_PERF_SEL_ta_fa_data_state_cycles = 0x00000013, -TA_PERF_SEL_sh_fifo_addr_waiting_on_cmd_cycles = 0x00000014, -TA_PERF_SEL_sh_fifo_cmd_waiting_on_addr_cycles = 0x00000015, -TA_PERF_SEL_sh_fifo_addr_starved_while_busy_cycles = 0x00000016, -TA_PERF_SEL_sh_fifo_cmd_starved_while_busy_cycles = 0x00000017, -TA_PERF_SEL_sh_fifo_data_waiting_on_data_state_cycles = 0x00000018, -TA_PERF_SEL_sh_fifo_data_state_waiting_on_data_cycles = 0x00000019, -TA_PERF_SEL_sh_fifo_data_starved_while_busy_cycles = 0x0000001a, -TA_PERF_SEL_sh_fifo_data_state_starved_while_busy_cycles = 0x0000001b, -TA_PERF_SEL_RESERVED_28 = 0x0000001c, -TA_PERF_SEL_RESERVED_29 = 0x0000001d, -TA_PERF_SEL_sh_fifo_addr_cycles = 0x0000001e, -TA_PERF_SEL_sh_fifo_data_cycles = 0x0000001f, -TA_PERF_SEL_total_wavefronts = 0x00000020, -TA_PERF_SEL_gradient_cycles = 0x00000021, -TA_PERF_SEL_walker_cycles = 0x00000022, -TA_PERF_SEL_aligner_cycles = 0x00000023, -TA_PERF_SEL_image_wavefronts = 0x00000024, -TA_PERF_SEL_image_read_wavefronts = 0x00000025, -TA_PERF_SEL_image_write_wavefronts = 0x00000026, -TA_PERF_SEL_image_atomic_wavefronts = 0x00000027, -TA_PERF_SEL_image_total_cycles = 0x00000028, -TA_PERF_SEL_RESERVED_41 = 0x00000029, -TA_PERF_SEL_RESERVED_42 = 0x0000002a, -TA_PERF_SEL_RESERVED_43 = 0x0000002b, -TA_PERF_SEL_buffer_wavefronts = 0x0000002c, -TA_PERF_SEL_buffer_read_wavefronts = 0x0000002d, -TA_PERF_SEL_buffer_write_wavefronts = 0x0000002e, -TA_PERF_SEL_buffer_atomic_wavefronts = 0x0000002f, -TA_PERF_SEL_buffer_coalescable_wavefronts = 0x00000030, -TA_PERF_SEL_buffer_total_cycles = 0x00000031, -TA_PERF_SEL_buffer_coalescable_addr_multicycled_cycles = 0x00000032, -TA_PERF_SEL_buffer_coalescable_clamp_16kdword_multicycled_cycles = 0x00000033, -TA_PERF_SEL_buffer_coalesced_read_cycles = 0x00000034, -TA_PERF_SEL_buffer_coalesced_write_cycles = 0x00000035, -TA_PERF_SEL_addr_stalled_by_tc_cycles = 0x00000036, -TA_PERF_SEL_addr_stalled_by_td_cycles = 0x00000037, -TA_PERF_SEL_data_stalled_by_tc_cycles = 0x00000038, -TA_PERF_SEL_addresser_stalled_by_aligner_only_cycles = 0x00000039, -TA_PERF_SEL_addresser_stalled_cycles = 0x0000003a, -TA_PERF_SEL_aniso_stalled_by_addresser_only_cycles = 0x0000003b, -TA_PERF_SEL_aniso_stalled_cycles = 0x0000003c, -TA_PERF_SEL_deriv_stalled_by_aniso_only_cycles = 0x0000003d, -TA_PERF_SEL_deriv_stalled_cycles = 0x0000003e, -TA_PERF_SEL_aniso_gt1_cycle_quads = 0x0000003f, -TA_PERF_SEL_color_1_cycle_pixels = 0x00000040, -TA_PERF_SEL_color_2_cycle_pixels = 0x00000041, -TA_PERF_SEL_color_3_cycle_pixels = 0x00000042, -TA_PERF_SEL_color_4_cycle_pixels = 0x00000043, -TA_PERF_SEL_mip_1_cycle_pixels = 0x00000044, -TA_PERF_SEL_mip_2_cycle_pixels = 0x00000045, -TA_PERF_SEL_vol_1_cycle_pixels = 0x00000046, -TA_PERF_SEL_vol_2_cycle_pixels = 0x00000047, -TA_PERF_SEL_bilin_point_1_cycle_pixels = 0x00000048, -TA_PERF_SEL_mipmap_lod_0_samples = 0x00000049, -TA_PERF_SEL_mipmap_lod_1_samples = 0x0000004a, -TA_PERF_SEL_mipmap_lod_2_samples = 0x0000004b, -TA_PERF_SEL_mipmap_lod_3_samples = 0x0000004c, -TA_PERF_SEL_mipmap_lod_4_samples = 0x0000004d, -TA_PERF_SEL_mipmap_lod_5_samples = 0x0000004e, -TA_PERF_SEL_mipmap_lod_6_samples = 0x0000004f, -TA_PERF_SEL_mipmap_lod_7_samples = 0x00000050, -TA_PERF_SEL_mipmap_lod_8_samples = 0x00000051, -TA_PERF_SEL_mipmap_lod_9_samples = 0x00000052, -TA_PERF_SEL_mipmap_lod_10_samples = 0x00000053, -TA_PERF_SEL_mipmap_lod_11_samples = 0x00000054, -TA_PERF_SEL_mipmap_lod_12_samples = 0x00000055, -TA_PERF_SEL_mipmap_lod_13_samples = 0x00000056, -TA_PERF_SEL_mipmap_lod_14_samples = 0x00000057, -TA_PERF_SEL_mipmap_invalid_samples = 0x00000058, -TA_PERF_SEL_aniso_1_cycle_quads = 0x00000059, -TA_PERF_SEL_aniso_2_cycle_quads = 0x0000005a, -TA_PERF_SEL_aniso_4_cycle_quads = 0x0000005b, -TA_PERF_SEL_aniso_6_cycle_quads = 0x0000005c, -TA_PERF_SEL_aniso_8_cycle_quads = 0x0000005d, -TA_PERF_SEL_aniso_10_cycle_quads = 0x0000005e, -TA_PERF_SEL_aniso_12_cycle_quads = 0x0000005f, -TA_PERF_SEL_aniso_14_cycle_quads = 0x00000060, -TA_PERF_SEL_aniso_16_cycle_quads = 0x00000061, -TA_PERF_SEL_write_path_input_cycles = 0x00000062, -TA_PERF_SEL_write_path_output_cycles = 0x00000063, -TA_PERF_SEL_flat_wavefronts = 0x00000064, -TA_PERF_SEL_flat_read_wavefronts = 0x00000065, -TA_PERF_SEL_flat_write_wavefronts = 0x00000066, -TA_PERF_SEL_flat_atomic_wavefronts = 0x00000067, -TA_PERF_SEL_flat_coalesceable_wavefronts = 0x00000068, -TA_PERF_SEL_reg_sclk_vld = 0x00000069, -TA_PERF_SEL_local_cg_dyn_sclk_grp0_en = 0x0000006a, -TA_PERF_SEL_local_cg_dyn_sclk_grp1_en = 0x0000006b, -TA_PERF_SEL_local_cg_dyn_sclk_grp1_mems_en = 0x0000006c, -TA_PERF_SEL_local_cg_dyn_sclk_grp4_en = 0x0000006d, -TA_PERF_SEL_local_cg_dyn_sclk_grp5_en = 0x0000006e, -TA_PERF_SEL_xnack_on_phase0 = 0x0000006f, -TA_PERF_SEL_xnack_on_phase1 = 0x00000070, -TA_PERF_SEL_xnack_on_phase2 = 0x00000071, -TA_PERF_SEL_xnack_on_phase3 = 0x00000072, -TA_PERF_SEL_first_xnack_on_phase0 = 0x00000073, -TA_PERF_SEL_first_xnack_on_phase1 = 0x00000074, -TA_PERF_SEL_first_xnack_on_phase2 = 0x00000075, -TA_PERF_SEL_first_xnack_on_phase3 = 0x00000076, -} TA_PERFCOUNT_SEL; - -/* - * TD_PERFCOUNT_SEL enum - */ - -typedef enum TD_PERFCOUNT_SEL { -TD_PERF_SEL_none = 0x00000000, -TD_PERF_SEL_td_busy = 0x00000001, -TD_PERF_SEL_input_busy = 0x00000002, -TD_PERF_SEL_output_busy = 0x00000003, -TD_PERF_SEL_lerp_busy = 0x00000004, -TD_PERF_SEL_reg_sclk_vld = 0x00000005, -TD_PERF_SEL_local_cg_dyn_sclk_grp0_en = 0x00000006, -TD_PERF_SEL_local_cg_dyn_sclk_grp1_en = 0x00000007, -TD_PERF_SEL_local_cg_dyn_sclk_grp4_en = 0x00000008, -TD_PERF_SEL_local_cg_dyn_sclk_grp5_en = 0x00000009, -TD_PERF_SEL_tc_td_fifo_full = 0x0000000a, -TD_PERF_SEL_constant_state_full = 0x0000000b, -TD_PERF_SEL_sample_state_full = 0x0000000c, -TD_PERF_SEL_output_fifo_full = 0x0000000d, -TD_PERF_SEL_RESERVED_14 = 0x0000000e, -TD_PERF_SEL_tc_stall = 0x0000000f, -TD_PERF_SEL_pc_stall = 0x00000010, -TD_PERF_SEL_gds_stall = 0x00000011, -TD_PERF_SEL_RESERVED_18 = 0x00000012, -TD_PERF_SEL_RESERVED_19 = 0x00000013, -TD_PERF_SEL_gather4_wavefront = 0x00000014, -TD_PERF_SEL_gather4h_wavefront = 0x00000015, -TD_PERF_SEL_gather4h_packed_wavefront = 0x00000016, -TD_PERF_SEL_gather8h_packed_wavefront = 0x00000017, -TD_PERF_SEL_sample_c_wavefront = 0x00000018, -TD_PERF_SEL_load_wavefront = 0x00000019, -TD_PERF_SEL_atomic_wavefront = 0x0000001a, -TD_PERF_SEL_store_wavefront = 0x0000001b, -TD_PERF_SEL_ldfptr_wavefront = 0x0000001c, -TD_PERF_SEL_d16_en_wavefront = 0x0000001d, -TD_PERF_SEL_bypass_filter_wavefront = 0x0000001e, -TD_PERF_SEL_min_max_filter_wavefront = 0x0000001f, -TD_PERF_SEL_coalescable_wavefront = 0x00000020, -TD_PERF_SEL_coalesced_phase = 0x00000021, -TD_PERF_SEL_four_phase_wavefront = 0x00000022, -TD_PERF_SEL_eight_phase_wavefront = 0x00000023, -TD_PERF_SEL_sixteen_phase_wavefront = 0x00000024, -TD_PERF_SEL_four_phase_forward_wavefront = 0x00000025, -TD_PERF_SEL_write_ack_wavefront = 0x00000026, -TD_PERF_SEL_RESERVED_39 = 0x00000027, -TD_PERF_SEL_user_defined_border = 0x00000028, -TD_PERF_SEL_white_border = 0x00000029, -TD_PERF_SEL_opaque_black_border = 0x0000002a, -TD_PERF_SEL_RESERVED_43 = 0x0000002b, -TD_PERF_SEL_RESERVED_44 = 0x0000002c, -TD_PERF_SEL_nack = 0x0000002d, -TD_PERF_SEL_td_sp_traffic = 0x0000002e, -TD_PERF_SEL_consume_gds_traffic = 0x0000002f, -TD_PERF_SEL_addresscmd_poison = 0x00000030, -TD_PERF_SEL_data_poison = 0x00000031, -TD_PERF_SEL_start_cycle_0 = 0x00000032, -TD_PERF_SEL_start_cycle_1 = 0x00000033, -TD_PERF_SEL_start_cycle_2 = 0x00000034, -TD_PERF_SEL_start_cycle_3 = 0x00000035, -TD_PERF_SEL_null_cycle_output = 0x00000036, -TD_PERF_SEL_d16_data_packed = 0x00000037, -TD_PERF_SEL_texels_zeroed_out_by_blend_zero_prt = 0x00000038, -} TD_PERFCOUNT_SEL; - -/* - * TCP_PERFCOUNT_SELECT enum - */ - -typedef enum TCP_PERFCOUNT_SELECT { -TCP_PERF_SEL_TA_TCP_ADDR_STARVE_CYCLES = 0x00000000, -TCP_PERF_SEL_TA_TCP_DATA_STARVE_CYCLES = 0x00000001, -TCP_PERF_SEL_TCP_TA_ADDR_STALL_CYCLES = 0x00000002, -TCP_PERF_SEL_TCP_TA_DATA_STALL_CYCLES = 0x00000003, -TCP_PERF_SEL_TD_TCP_STALL_CYCLES = 0x00000004, -TCP_PERF_SEL_TCR_TCP_STALL_CYCLES = 0x00000005, -TCP_PERF_SEL_LOD_STALL_CYCLES = 0x00000006, -TCP_PERF_SEL_READ_TAGCONFLICT_STALL_CYCLES = 0x00000007, -TCP_PERF_SEL_WRITE_TAGCONFLICT_STALL_CYCLES = 0x00000008, -TCP_PERF_SEL_ATOMIC_TAGCONFLICT_STALL_CYCLES = 0x00000009, -TCP_PERF_SEL_ALLOC_STALL_CYCLES = 0x0000000a, -TCP_PERF_SEL_LFIFO_STALL_CYCLES = 0x0000000b, -TCP_PERF_SEL_RFIFO_STALL_CYCLES = 0x0000000c, -TCP_PERF_SEL_TCR_RDRET_STALL = 0x0000000d, -TCP_PERF_SEL_WRITE_CONFLICT_STALL = 0x0000000e, -TCP_PERF_SEL_HOLE_READ_STALL = 0x0000000f, -TCP_PERF_SEL_READCONFLICT_STALL_CYCLES = 0x00000010, -TCP_PERF_SEL_PENDING_STALL_CYCLES = 0x00000011, -TCP_PERF_SEL_READFIFO_STALL_CYCLES = 0x00000012, -TCP_PERF_SEL_TCP_LATENCY = 0x00000013, -TCP_PERF_SEL_TCC_READ_REQ_LATENCY = 0x00000014, -TCP_PERF_SEL_TCC_WRITE_REQ_LATENCY = 0x00000015, -TCP_PERF_SEL_TCC_WRITE_REQ_HOLE_LATENCY = 0x00000016, -TCP_PERF_SEL_TCC_READ_REQ = 0x00000017, -TCP_PERF_SEL_TCC_WRITE_REQ = 0x00000018, -TCP_PERF_SEL_TCC_ATOMIC_WITH_RET_REQ = 0x00000019, -TCP_PERF_SEL_TCC_ATOMIC_WITHOUT_RET_REQ = 0x0000001a, -TCP_PERF_SEL_TOTAL_LOCAL_READ = 0x0000001b, -TCP_PERF_SEL_TOTAL_GLOBAL_READ = 0x0000001c, -TCP_PERF_SEL_TOTAL_LOCAL_WRITE = 0x0000001d, -TCP_PERF_SEL_TOTAL_GLOBAL_WRITE = 0x0000001e, -TCP_PERF_SEL_TOTAL_ATOMIC_WITH_RET = 0x0000001f, -TCP_PERF_SEL_TOTAL_ATOMIC_WITHOUT_RET = 0x00000020, -TCP_PERF_SEL_TOTAL_WBINVL1 = 0x00000021, -TCP_PERF_SEL_IMG_READ_FMT_1 = 0x00000022, -TCP_PERF_SEL_IMG_READ_FMT_8 = 0x00000023, -TCP_PERF_SEL_IMG_READ_FMT_16 = 0x00000024, -TCP_PERF_SEL_IMG_READ_FMT_32 = 0x00000025, -TCP_PERF_SEL_IMG_READ_FMT_32_AS_8 = 0x00000026, -TCP_PERF_SEL_IMG_READ_FMT_32_AS_16 = 0x00000027, -TCP_PERF_SEL_IMG_READ_FMT_32_AS_128 = 0x00000028, -TCP_PERF_SEL_IMG_READ_FMT_64_2_CYCLE = 0x00000029, -TCP_PERF_SEL_IMG_READ_FMT_64_1_CYCLE = 0x0000002a, -TCP_PERF_SEL_IMG_READ_FMT_96 = 0x0000002b, -TCP_PERF_SEL_IMG_READ_FMT_128_4_CYCLE = 0x0000002c, -TCP_PERF_SEL_IMG_READ_FMT_128_1_CYCLE = 0x0000002d, -TCP_PERF_SEL_IMG_READ_FMT_BC1 = 0x0000002e, -TCP_PERF_SEL_IMG_READ_FMT_BC2 = 0x0000002f, -TCP_PERF_SEL_IMG_READ_FMT_BC3 = 0x00000030, -TCP_PERF_SEL_IMG_READ_FMT_BC4 = 0x00000031, -TCP_PERF_SEL_IMG_READ_FMT_BC5 = 0x00000032, -TCP_PERF_SEL_IMG_READ_FMT_BC6 = 0x00000033, -TCP_PERF_SEL_IMG_READ_FMT_BC7 = 0x00000034, -TCP_PERF_SEL_IMG_READ_FMT_I8 = 0x00000035, -TCP_PERF_SEL_IMG_READ_FMT_I16 = 0x00000036, -TCP_PERF_SEL_IMG_READ_FMT_I32 = 0x00000037, -TCP_PERF_SEL_IMG_READ_FMT_I32_AS_8 = 0x00000038, -TCP_PERF_SEL_IMG_READ_FMT_I32_AS_16 = 0x00000039, -TCP_PERF_SEL_IMG_READ_FMT_D8 = 0x0000003a, -TCP_PERF_SEL_IMG_READ_FMT_D16 = 0x0000003b, -TCP_PERF_SEL_IMG_READ_FMT_D32 = 0x0000003c, -TCP_PERF_SEL_IMG_WRITE_FMT_8 = 0x0000003d, -TCP_PERF_SEL_IMG_WRITE_FMT_16 = 0x0000003e, -TCP_PERF_SEL_IMG_WRITE_FMT_32 = 0x0000003f, -TCP_PERF_SEL_IMG_WRITE_FMT_64 = 0x00000040, -TCP_PERF_SEL_IMG_WRITE_FMT_128 = 0x00000041, -TCP_PERF_SEL_IMG_WRITE_FMT_D8 = 0x00000042, -TCP_PERF_SEL_IMG_WRITE_FMT_D16 = 0x00000043, -TCP_PERF_SEL_IMG_WRITE_FMT_D32 = 0x00000044, -TCP_PERF_SEL_IMG_ATOMIC_WITH_RET_FMT_32 = 0x00000045, -TCP_PERF_SEL_IMG_ATOMIC_WITHOUT_RET_FMT_32 = 0x00000046, -TCP_PERF_SEL_IMG_ATOMIC_WITH_RET_FMT_64 = 0x00000047, -TCP_PERF_SEL_IMG_ATOMIC_WITHOUT_RET_FMT_64 = 0x00000048, -TCP_PERF_SEL_BUF_READ_FMT_8 = 0x00000049, -TCP_PERF_SEL_BUF_READ_FMT_16 = 0x0000004a, -TCP_PERF_SEL_BUF_READ_FMT_32 = 0x0000004b, -TCP_PERF_SEL_BUF_WRITE_FMT_8 = 0x0000004c, -TCP_PERF_SEL_BUF_WRITE_FMT_16 = 0x0000004d, -TCP_PERF_SEL_BUF_WRITE_FMT_32 = 0x0000004e, -TCP_PERF_SEL_BUF_ATOMIC_WITH_RET_FMT_32 = 0x0000004f, -TCP_PERF_SEL_BUF_ATOMIC_WITHOUT_RET_FMT_32 = 0x00000050, -TCP_PERF_SEL_BUF_ATOMIC_WITH_RET_FMT_64 = 0x00000051, -TCP_PERF_SEL_BUF_ATOMIC_WITHOUT_RET_FMT_64 = 0x00000052, -TCP_PERF_SEL_ARR_LINEAR_GENERAL = 0x00000053, -TCP_PERF_SEL_ARR_LINEAR_ALIGNED = 0x00000054, -TCP_PERF_SEL_ARR_1D_THIN1 = 0x00000055, -TCP_PERF_SEL_ARR_1D_THICK = 0x00000056, -TCP_PERF_SEL_ARR_2D_THIN1 = 0x00000057, -TCP_PERF_SEL_ARR_2D_THICK = 0x00000058, -TCP_PERF_SEL_ARR_2D_XTHICK = 0x00000059, -TCP_PERF_SEL_ARR_3D_THIN1 = 0x0000005a, -TCP_PERF_SEL_ARR_3D_THICK = 0x0000005b, -TCP_PERF_SEL_ARR_3D_XTHICK = 0x0000005c, -TCP_PERF_SEL_DIM_1D = 0x0000005d, -TCP_PERF_SEL_DIM_2D = 0x0000005e, -TCP_PERF_SEL_DIM_3D = 0x0000005f, -TCP_PERF_SEL_DIM_1D_ARRAY = 0x00000060, -TCP_PERF_SEL_DIM_2D_ARRAY = 0x00000061, -TCP_PERF_SEL_DIM_2D_MSAA = 0x00000062, -TCP_PERF_SEL_DIM_2D_ARRAY_MSAA = 0x00000063, -TCP_PERF_SEL_DIM_CUBE_ARRAY = 0x00000064, -TCP_PERF_SEL_CP_TCP_INVALIDATE = 0x00000065, -TCP_PERF_SEL_TA_TCP_STATE_READ = 0x00000066, -TCP_PERF_SEL_TAGRAM0_REQ = 0x00000067, -TCP_PERF_SEL_TAGRAM1_REQ = 0x00000068, -TCP_PERF_SEL_TAGRAM2_REQ = 0x00000069, -TCP_PERF_SEL_TAGRAM3_REQ = 0x0000006a, -TCP_PERF_SEL_GATE_EN1 = 0x0000006b, -TCP_PERF_SEL_GATE_EN2 = 0x0000006c, -TCP_PERF_SEL_CORE_REG_SCLK_VLD = 0x0000006d, -TCP_PERF_SEL_TCC_REQ = 0x0000006e, -TCP_PERF_SEL_TCC_NON_READ_REQ = 0x0000006f, -TCP_PERF_SEL_TCC_BYPASS_READ_REQ = 0x00000070, -TCP_PERF_SEL_TCC_MISS_EVICT_READ_REQ = 0x00000071, -TCP_PERF_SEL_TCC_VOLATILE_READ_REQ = 0x00000072, -TCP_PERF_SEL_TCC_VOLATILE_BYPASS_READ_REQ = 0x00000073, -TCP_PERF_SEL_TCC_VOLATILE_MISS_EVICT_READ_REQ = 0x00000074, -TCP_PERF_SEL_TCC_BYPASS_WRITE_REQ = 0x00000075, -TCP_PERF_SEL_TCC_MISS_EVICT_WRITE_REQ = 0x00000076, -TCP_PERF_SEL_TCC_VOLATILE_BYPASS_WRITE_REQ = 0x00000077, -TCP_PERF_SEL_TCC_VOLATILE_WRITE_REQ = 0x00000078, -TCP_PERF_SEL_TCC_VOLATILE_MISS_EVICT_WRITE_REQ = 0x00000079, -TCP_PERF_SEL_TCC_BYPASS_ATOMIC_REQ = 0x0000007a, -TCP_PERF_SEL_TCC_ATOMIC_REQ = 0x0000007b, -TCP_PERF_SEL_TCC_VOLATILE_ATOMIC_REQ = 0x0000007c, -TCP_PERF_SEL_TCC_DATA_BUS_BUSY = 0x0000007d, -TCP_PERF_SEL_TOTAL_ACCESSES = 0x0000007e, -TCP_PERF_SEL_TOTAL_READ = 0x0000007f, -TCP_PERF_SEL_TOTAL_HIT_LRU_READ = 0x00000080, -TCP_PERF_SEL_TOTAL_HIT_EVICT_READ = 0x00000081, -TCP_PERF_SEL_TOTAL_MISS_LRU_READ = 0x00000082, -TCP_PERF_SEL_TOTAL_MISS_EVICT_READ = 0x00000083, -TCP_PERF_SEL_TOTAL_NON_READ = 0x00000084, -TCP_PERF_SEL_TOTAL_WRITE = 0x00000085, -TCP_PERF_SEL_TOTAL_MISS_LRU_WRITE = 0x00000086, -TCP_PERF_SEL_TOTAL_MISS_EVICT_WRITE = 0x00000087, -TCP_PERF_SEL_TOTAL_WBINVL1_VOL = 0x00000088, -TCP_PERF_SEL_TOTAL_WRITEBACK_INVALIDATES = 0x00000089, -TCP_PERF_SEL_DISPLAY_MICROTILING = 0x0000008a, -TCP_PERF_SEL_THIN_MICROTILING = 0x0000008b, -TCP_PERF_SEL_DEPTH_MICROTILING = 0x0000008c, -TCP_PERF_SEL_ARR_PRT_THIN1 = 0x0000008d, -TCP_PERF_SEL_ARR_PRT_2D_THIN1 = 0x0000008e, -TCP_PERF_SEL_ARR_PRT_3D_THIN1 = 0x0000008f, -TCP_PERF_SEL_ARR_PRT_THICK = 0x00000090, -TCP_PERF_SEL_ARR_PRT_2D_THICK = 0x00000091, -TCP_PERF_SEL_ARR_PRT_3D_THICK = 0x00000092, -TCP_PERF_SEL_CP_TCP_INVALIDATE_VOL = 0x00000093, -TCP_PERF_SEL_SQ_TCP_INVALIDATE_VOL = 0x00000094, -TCP_PERF_SEL_UNALIGNED = 0x00000095, -TCP_PERF_SEL_ROTATED_MICROTILING = 0x00000096, -TCP_PERF_SEL_THICK_MICROTILING = 0x00000097, -TCP_PERF_SEL_ATC = 0x00000098, -TCP_PERF_SEL_POWER_STALL = 0x00000099, -TCP_PERF_SEL_RESERVED_154 = 0x0000009a, -TCP_PERF_SEL_TCC_LRU_REQ = 0x0000009b, -TCP_PERF_SEL_TCC_STREAM_REQ = 0x0000009c, -TCP_PERF_SEL_TCC_NC_READ_REQ = 0x0000009d, -TCP_PERF_SEL_TCC_NC_WRITE_REQ = 0x0000009e, -TCP_PERF_SEL_TCC_NC_ATOMIC_REQ = 0x0000009f, -TCP_PERF_SEL_TCC_UC_READ_REQ = 0x000000a0, -TCP_PERF_SEL_TCC_UC_WRITE_REQ = 0x000000a1, -TCP_PERF_SEL_TCC_UC_ATOMIC_REQ = 0x000000a2, -TCP_PERF_SEL_TCC_CC_READ_REQ = 0x000000a3, -TCP_PERF_SEL_TCC_CC_WRITE_REQ = 0x000000a4, -TCP_PERF_SEL_TCC_CC_ATOMIC_REQ = 0x000000a5, -TCP_PERF_SEL_TCC_DCC_REQ = 0x000000a6, -TCP_PERF_SEL_TCC_PHYSICAL_REQ = 0x000000a7, -TCP_PERF_SEL_UNORDERED_MTYPE_STALL = 0x000000a8, -TCP_PERF_SEL_VOLATILE = 0x000000a9, -TCP_PERF_SEL_TC_TA_XNACK_STALL = 0x000000aa, -TCP_PERF_SEL_UTCL1_SERIALIZATION_STALL = 0x000000ab, -TCP_PERF_SEL_SHOOTDOWN = 0x000000ac, -TCP_PERF_SEL_UTCL1_TRANSLATION_MISS = 0x000000ad, -TCP_PERF_SEL_UTCL1_PERMISSION_MISS = 0x000000ae, -TCP_PERF_SEL_UTCL1_REQUEST = 0x000000af, -TCP_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX = 0x000000b0, -TCP_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT = 0x000000b1, -TCP_PERF_SEL_UTCL1_LFIFO_FULL = 0x000000b2, -TCP_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES = 0x000000b3, -TCP_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS = 0x000000b4, -TCP_PERF_SEL_UTCL1_UTCL2_INFLIGHT = 0x000000b5, -TCP_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL = 0x000000b6, -TCP_PERF_SEL_IMG_READ_FMT_ETC2_RGB = 0x000000b7, -TCP_PERF_SEL_IMG_READ_FMT_ETC2_RGBA = 0x000000b8, -TCP_PERF_SEL_IMG_READ_FMT_ETC2_RGBA1 = 0x000000b9, -TCP_PERF_SEL_IMG_READ_FMT_ETC2_R = 0x000000ba, -TCP_PERF_SEL_IMG_READ_FMT_ETC2_RG = 0x000000bb, -TCP_PERF_SEL_IMG_READ_FMT_8_AS_32 = 0x000000bc, -TCP_PERF_SEL_IMG_READ_FMT_8_AS_64 = 0x000000bd, -TCP_PERF_SEL_IMG_READ_FMT_16_AS_64 = 0x000000be, -TCP_PERF_SEL_IMG_READ_FMT_16_AS_128 = 0x000000bf, -TCP_PERF_SEL_IMG_WRITE_FMT_8_AS_32 = 0x000000c0, -TCP_PERF_SEL_IMG_WRITE_FMT_8_AS_64 = 0x000000c1, -TCP_PERF_SEL_IMG_WRITE_FMT_16_AS_64 = 0x000000c2, -TCP_PERF_SEL_IMG_WRITE_FMT_16_AS_128 = 0x000000c3, -} TCP_PERFCOUNT_SELECT; - -/* - * TCP_CACHE_POLICIES enum - */ - -typedef enum TCP_CACHE_POLICIES { -TCP_CACHE_POLICY_MISS_LRU = 0x00000000, -TCP_CACHE_POLICY_MISS_EVICT = 0x00000001, -TCP_CACHE_POLICY_HIT_LRU = 0x00000002, -TCP_CACHE_POLICY_HIT_EVICT = 0x00000003, -} TCP_CACHE_POLICIES; - -/* - * TCP_CACHE_STORE_POLICIES enum - */ - -typedef enum TCP_CACHE_STORE_POLICIES { -TCP_CACHE_STORE_POLICY_WT_LRU = 0x00000000, -TCP_CACHE_STORE_POLICY_WT_EVICT = 0x00000001, -} TCP_CACHE_STORE_POLICIES; - -/* - * TCP_WATCH_MODES enum - */ - -typedef enum TCP_WATCH_MODES { -TCP_WATCH_MODE_READ = 0x00000000, -TCP_WATCH_MODE_NONREAD = 0x00000001, -TCP_WATCH_MODE_ATOMIC = 0x00000002, -TCP_WATCH_MODE_ALL = 0x00000003, -} TCP_WATCH_MODES; - -/* - * TCP_DSM_DATA_SEL enum - */ - -typedef enum TCP_DSM_DATA_SEL { -TCP_DSM_DISABLE = 0x00000000, -TCP_DSM_SEL0 = 0x00000001, -TCP_DSM_SEL1 = 0x00000002, -TCP_DSM_SEL_BOTH = 0x00000003, -} TCP_DSM_DATA_SEL; - -/* - * TCP_DSM_SINGLE_WRITE enum - */ - -typedef enum TCP_DSM_SINGLE_WRITE { -TCP_DSM_SINGLE_WRITE_DIS = 0x00000000, -TCP_DSM_SINGLE_WRITE_EN = 0x00000001, -} TCP_DSM_SINGLE_WRITE; - -/* - * TCP_DSM_INJECT_SEL enum - */ - -typedef enum TCP_DSM_INJECT_SEL { -TCP_DSM_INJECT_SEL0 = 0x00000000, -TCP_DSM_INJECT_SEL1 = 0x00000001, -TCP_DSM_INJECT_SEL2 = 0x00000002, -TCP_DSM_INJECT_SEL3 = 0x00000003, -} TCP_DSM_INJECT_SEL; - -/******************************************************* - * TCC Enums - *******************************************************/ - -/* - * TCC_PERF_SEL enum - */ - -typedef enum TCC_PERF_SEL { -TCC_PERF_SEL_NONE = 0x00000000, -TCC_PERF_SEL_CYCLE = 0x00000001, -TCC_PERF_SEL_BUSY = 0x00000002, -TCC_PERF_SEL_REQ = 0x00000003, -TCC_PERF_SEL_STREAMING_REQ = 0x00000004, -TCC_PERF_SEL_EXE_REQ = 0x00000005, -TCC_PERF_SEL_COMPRESSED_REQ = 0x00000006, -TCC_PERF_SEL_COMPRESSED_0_REQ = 0x00000007, -TCC_PERF_SEL_METADATA_REQ = 0x00000008, -TCC_PERF_SEL_NC_VIRTUAL_REQ = 0x00000009, -TCC_PERF_SEL_UC_VIRTUAL_REQ = 0x0000000a, -TCC_PERF_SEL_CC_PHYSICAL_REQ = 0x0000000b, -TCC_PERF_SEL_PROBE = 0x0000000c, -TCC_PERF_SEL_PROBE_ALL = 0x0000000d, -TCC_PERF_SEL_READ = 0x0000000e, -TCC_PERF_SEL_WRITE = 0x0000000f, -TCC_PERF_SEL_ATOMIC = 0x00000010, -TCC_PERF_SEL_HIT = 0x00000011, -TCC_PERF_SEL_SECTOR_HIT = 0x00000012, -TCC_PERF_SEL_MISS = 0x00000013, -TCC_PERF_SEL_DEWRITE_ALLOCATE_HIT = 0x00000014, -TCC_PERF_SEL_FULLY_WRITTEN_HIT = 0x00000015, -TCC_PERF_SEL_WRITEBACK = 0x00000016, -TCC_PERF_SEL_LATENCY_FIFO_FULL = 0x00000017, -TCC_PERF_SEL_SRC_FIFO_FULL = 0x00000018, -TCC_PERF_SEL_HOLE_FIFO_FULL = 0x00000019, -TCC_PERF_SEL_EA_WRREQ = 0x0000001a, -TCC_PERF_SEL_EA_WRREQ_64B = 0x0000001b, -TCC_PERF_SEL_EA_WRREQ_PROBE_COMMAND = 0x0000001c, -TCC_PERF_SEL_EA_WR_UNCACHED_32B = 0x0000001d, -TCC_PERF_SEL_EA_WRREQ_STALL = 0x0000001e, -TCC_PERF_SEL_EA_WRREQ_CREDIT_STALL = 0x0000001f, -TCC_PERF_SEL_TOO_MANY_EA_WRREQS_STALL = 0x00000020, -TCC_PERF_SEL_EA_WRREQ_LEVEL = 0x00000021, -TCC_PERF_SEL_EA_ATOMIC = 0x00000022, -TCC_PERF_SEL_EA_ATOMIC_LEVEL = 0x00000023, -TCC_PERF_SEL_EA_RDREQ = 0x00000024, -TCC_PERF_SEL_EA_RDREQ_32B = 0x00000025, -TCC_PERF_SEL_EA_RD_UNCACHED_32B = 0x00000026, -TCC_PERF_SEL_EA_RD_MDC_32B = 0x00000027, -TCC_PERF_SEL_EA_RD_COMPRESSED_32B = 0x00000028, -TCC_PERF_SEL_EA_RDREQ_CREDIT_STALL = 0x00000029, -TCC_PERF_SEL_EA_RDREQ_LEVEL = 0x0000002a, -TCC_PERF_SEL_TAG_STALL = 0x0000002b, -TCC_PERF_SEL_TAG_WRITEBACK_FIFO_FULL_STALL = 0x0000002c, -TCC_PERF_SEL_TAG_MISS_NOTHING_REPLACEABLE_STALL = 0x0000002d, -TCC_PERF_SEL_TAG_UNCACHED_WRITE_ATOMIC_FIFO_FULL_STALL = 0x0000002e, -TCC_PERF_SEL_TAG_NO_UNCACHED_WRITE_ATOMIC_ENTRIES_STALL = 0x0000002f, -TCC_PERF_SEL_TAG_PROBE_STALL = 0x00000030, -TCC_PERF_SEL_TAG_PROBE_FILTER_STALL = 0x00000031, -TCC_PERF_SEL_READ_RETURN_TIMEOUT = 0x00000032, -TCC_PERF_SEL_WRITEBACK_READ_TIMEOUT = 0x00000033, -TCC_PERF_SEL_READ_RETURN_FULL_BUBBLE = 0x00000034, -TCC_PERF_SEL_BUBBLE = 0x00000035, -TCC_PERF_SEL_RETURN_ACK = 0x00000036, -TCC_PERF_SEL_RETURN_DATA = 0x00000037, -TCC_PERF_SEL_RETURN_HOLE = 0x00000038, -TCC_PERF_SEL_RETURN_ACK_HOLE = 0x00000039, -TCC_PERF_SEL_IB_REQ = 0x0000003a, -TCC_PERF_SEL_IB_STALL = 0x0000003b, -TCC_PERF_SEL_IB_TAG_STALL = 0x0000003c, -TCC_PERF_SEL_IB_MDC_STALL = 0x0000003d, -TCC_PERF_SEL_TCA_LEVEL = 0x0000003e, -TCC_PERF_SEL_HOLE_LEVEL = 0x0000003f, -TCC_PERF_SEL_EA_RDRET_NACK = 0x00000040, -TCC_PERF_SEL_EA_WRRET_NACK = 0x00000041, -TCC_PERF_SEL_NORMAL_WRITEBACK = 0x00000042, -TCC_PERF_SEL_TC_OP_WBL2_NC_WRITEBACK = 0x00000043, -TCC_PERF_SEL_TC_OP_WBL2_WC_WRITEBACK = 0x00000044, -TCC_PERF_SEL_TC_OP_WBINVL2_WRITEBACK = 0x00000045, -TCC_PERF_SEL_TC_OP_WBINVL2_NC_WRITEBACK = 0x00000046, -TCC_PERF_SEL_TC_OP_WBINVL2_SD_WRITEBACK = 0x00000047, -TCC_PERF_SEL_ALL_TC_OP_WB_WRITEBACK = 0x00000048, -TCC_PERF_SEL_NORMAL_EVICT = 0x00000049, -TCC_PERF_SEL_TC_OP_WBL2_NC_EVICT = 0x0000004a, -TCC_PERF_SEL_TC_OP_WBL2_WC_EVICT = 0x0000004b, -TCC_PERF_SEL_TC_OP_INVL2_NC_EVICT = 0x0000004c, -TCC_PERF_SEL_TC_OP_WBINVL2_EVICT = 0x0000004d, -TCC_PERF_SEL_TC_OP_WBINVL2_NC_EVICT = 0x0000004e, -TCC_PERF_SEL_TC_OP_WBINVL2_SD_EVICT = 0x0000004f, -TCC_PERF_SEL_ALL_TC_OP_INV_EVICT = 0x00000050, -TCC_PERF_SEL_PROBE_EVICT = 0x00000051, -TCC_PERF_SEL_TC_OP_WBL2_NC_CYCLE = 0x00000052, -TCC_PERF_SEL_TC_OP_WBL2_WC_CYCLE = 0x00000053, -TCC_PERF_SEL_TC_OP_INVL2_NC_CYCLE = 0x00000054, -TCC_PERF_SEL_TC_OP_WBINVL2_CYCLE = 0x00000055, -TCC_PERF_SEL_TC_OP_WBINVL2_NC_CYCLE = 0x00000056, -TCC_PERF_SEL_TC_OP_WBINVL2_SD_CYCLE = 0x00000057, -TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_CYCLE = 0x00000058, -TCC_PERF_SEL_TC_OP_WBL2_NC_START = 0x00000059, -TCC_PERF_SEL_TC_OP_WBL2_WC_START = 0x0000005a, -TCC_PERF_SEL_TC_OP_INVL2_NC_START = 0x0000005b, -TCC_PERF_SEL_TC_OP_WBINVL2_START = 0x0000005c, -TCC_PERF_SEL_TC_OP_WBINVL2_NC_START = 0x0000005d, -TCC_PERF_SEL_TC_OP_WBINVL2_SD_START = 0x0000005e, -TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_START = 0x0000005f, -TCC_PERF_SEL_TC_OP_WBL2_NC_FINISH = 0x00000060, -TCC_PERF_SEL_TC_OP_WBL2_WC_FINISH = 0x00000061, -TCC_PERF_SEL_TC_OP_INVL2_NC_FINISH = 0x00000062, -TCC_PERF_SEL_TC_OP_WBINVL2_FINISH = 0x00000063, -TCC_PERF_SEL_TC_OP_WBINVL2_NC_FINISH = 0x00000064, -TCC_PERF_SEL_TC_OP_WBINVL2_SD_FINISH = 0x00000065, -TCC_PERF_SEL_ALL_TC_OP_WB_OR_INV_FINISH = 0x00000066, -TCC_PERF_SEL_MDC_REQ = 0x00000067, -TCC_PERF_SEL_MDC_LEVEL = 0x00000068, -TCC_PERF_SEL_MDC_TAG_HIT = 0x00000069, -TCC_PERF_SEL_MDC_SECTOR_HIT = 0x0000006a, -TCC_PERF_SEL_MDC_SECTOR_MISS = 0x0000006b, -TCC_PERF_SEL_MDC_TAG_STALL = 0x0000006c, -TCC_PERF_SEL_MDC_TAG_REPLACEMENT_LINE_IN_USE_STALL = 0x0000006d, -TCC_PERF_SEL_MDC_TAG_DESECTORIZATION_FIFO_FULL_STALL = 0x0000006e, -TCC_PERF_SEL_MDC_TAG_WAITING_FOR_INVALIDATE_COMPLETION_STALL = 0x0000006f, -TCC_PERF_SEL_PROBE_FILTER_DISABLE_TRANSITION = 0x00000070, -TCC_PERF_SEL_PROBE_FILTER_DISABLED = 0x00000071, -TCC_PERF_SEL_CLIENT0_REQ = 0x00000080, -TCC_PERF_SEL_CLIENT1_REQ = 0x00000081, -TCC_PERF_SEL_CLIENT2_REQ = 0x00000082, -TCC_PERF_SEL_CLIENT3_REQ = 0x00000083, -TCC_PERF_SEL_CLIENT4_REQ = 0x00000084, -TCC_PERF_SEL_CLIENT5_REQ = 0x00000085, -TCC_PERF_SEL_CLIENT6_REQ = 0x00000086, -TCC_PERF_SEL_CLIENT7_REQ = 0x00000087, -TCC_PERF_SEL_CLIENT8_REQ = 0x00000088, -TCC_PERF_SEL_CLIENT9_REQ = 0x00000089, -TCC_PERF_SEL_CLIENT10_REQ = 0x0000008a, -TCC_PERF_SEL_CLIENT11_REQ = 0x0000008b, -TCC_PERF_SEL_CLIENT12_REQ = 0x0000008c, -TCC_PERF_SEL_CLIENT13_REQ = 0x0000008d, -TCC_PERF_SEL_CLIENT14_REQ = 0x0000008e, -TCC_PERF_SEL_CLIENT15_REQ = 0x0000008f, -TCC_PERF_SEL_CLIENT16_REQ = 0x00000090, -TCC_PERF_SEL_CLIENT17_REQ = 0x00000091, -TCC_PERF_SEL_CLIENT18_REQ = 0x00000092, -TCC_PERF_SEL_CLIENT19_REQ = 0x00000093, -TCC_PERF_SEL_CLIENT20_REQ = 0x00000094, -TCC_PERF_SEL_CLIENT21_REQ = 0x00000095, -TCC_PERF_SEL_CLIENT22_REQ = 0x00000096, -TCC_PERF_SEL_CLIENT23_REQ = 0x00000097, -TCC_PERF_SEL_CLIENT24_REQ = 0x00000098, -TCC_PERF_SEL_CLIENT25_REQ = 0x00000099, -TCC_PERF_SEL_CLIENT26_REQ = 0x0000009a, -TCC_PERF_SEL_CLIENT27_REQ = 0x0000009b, -TCC_PERF_SEL_CLIENT28_REQ = 0x0000009c, -TCC_PERF_SEL_CLIENT29_REQ = 0x0000009d, -TCC_PERF_SEL_CLIENT30_REQ = 0x0000009e, -TCC_PERF_SEL_CLIENT31_REQ = 0x0000009f, -TCC_PERF_SEL_CLIENT32_REQ = 0x000000a0, -TCC_PERF_SEL_CLIENT33_REQ = 0x000000a1, -TCC_PERF_SEL_CLIENT34_REQ = 0x000000a2, -TCC_PERF_SEL_CLIENT35_REQ = 0x000000a3, -TCC_PERF_SEL_CLIENT36_REQ = 0x000000a4, -TCC_PERF_SEL_CLIENT37_REQ = 0x000000a5, -TCC_PERF_SEL_CLIENT38_REQ = 0x000000a6, -TCC_PERF_SEL_CLIENT39_REQ = 0x000000a7, -TCC_PERF_SEL_CLIENT40_REQ = 0x000000a8, -TCC_PERF_SEL_CLIENT41_REQ = 0x000000a9, -TCC_PERF_SEL_CLIENT42_REQ = 0x000000aa, -TCC_PERF_SEL_CLIENT43_REQ = 0x000000ab, -TCC_PERF_SEL_CLIENT44_REQ = 0x000000ac, -TCC_PERF_SEL_CLIENT45_REQ = 0x000000ad, -TCC_PERF_SEL_CLIENT46_REQ = 0x000000ae, -TCC_PERF_SEL_CLIENT47_REQ = 0x000000af, -TCC_PERF_SEL_CLIENT48_REQ = 0x000000b0, -TCC_PERF_SEL_CLIENT49_REQ = 0x000000b1, -TCC_PERF_SEL_CLIENT50_REQ = 0x000000b2, -TCC_PERF_SEL_CLIENT51_REQ = 0x000000b3, -TCC_PERF_SEL_CLIENT52_REQ = 0x000000b4, -TCC_PERF_SEL_CLIENT53_REQ = 0x000000b5, -TCC_PERF_SEL_CLIENT54_REQ = 0x000000b6, -TCC_PERF_SEL_CLIENT55_REQ = 0x000000b7, -TCC_PERF_SEL_CLIENT56_REQ = 0x000000b8, -TCC_PERF_SEL_CLIENT57_REQ = 0x000000b9, -TCC_PERF_SEL_CLIENT58_REQ = 0x000000ba, -TCC_PERF_SEL_CLIENT59_REQ = 0x000000bb, -TCC_PERF_SEL_CLIENT60_REQ = 0x000000bc, -TCC_PERF_SEL_CLIENT61_REQ = 0x000000bd, -TCC_PERF_SEL_CLIENT62_REQ = 0x000000be, -TCC_PERF_SEL_CLIENT63_REQ = 0x000000bf, -TCC_PERF_SEL_CLIENT64_REQ = 0x000000c0, -TCC_PERF_SEL_CLIENT65_REQ = 0x000000c1, -TCC_PERF_SEL_CLIENT66_REQ = 0x000000c2, -TCC_PERF_SEL_CLIENT67_REQ = 0x000000c3, -TCC_PERF_SEL_CLIENT68_REQ = 0x000000c4, -TCC_PERF_SEL_CLIENT69_REQ = 0x000000c5, -TCC_PERF_SEL_CLIENT70_REQ = 0x000000c6, -TCC_PERF_SEL_CLIENT71_REQ = 0x000000c7, -TCC_PERF_SEL_CLIENT72_REQ = 0x000000c8, -TCC_PERF_SEL_CLIENT73_REQ = 0x000000c9, -TCC_PERF_SEL_CLIENT74_REQ = 0x000000ca, -TCC_PERF_SEL_CLIENT75_REQ = 0x000000cb, -TCC_PERF_SEL_CLIENT76_REQ = 0x000000cc, -TCC_PERF_SEL_CLIENT77_REQ = 0x000000cd, -TCC_PERF_SEL_CLIENT78_REQ = 0x000000ce, -TCC_PERF_SEL_CLIENT79_REQ = 0x000000cf, -TCC_PERF_SEL_CLIENT80_REQ = 0x000000d0, -TCC_PERF_SEL_CLIENT81_REQ = 0x000000d1, -TCC_PERF_SEL_CLIENT82_REQ = 0x000000d2, -TCC_PERF_SEL_CLIENT83_REQ = 0x000000d3, -TCC_PERF_SEL_CLIENT84_REQ = 0x000000d4, -TCC_PERF_SEL_CLIENT85_REQ = 0x000000d5, -TCC_PERF_SEL_CLIENT86_REQ = 0x000000d6, -TCC_PERF_SEL_CLIENT87_REQ = 0x000000d7, -TCC_PERF_SEL_CLIENT88_REQ = 0x000000d8, -TCC_PERF_SEL_CLIENT89_REQ = 0x000000d9, -TCC_PERF_SEL_CLIENT90_REQ = 0x000000da, -TCC_PERF_SEL_CLIENT91_REQ = 0x000000db, -TCC_PERF_SEL_CLIENT92_REQ = 0x000000dc, -TCC_PERF_SEL_CLIENT93_REQ = 0x000000dd, -TCC_PERF_SEL_CLIENT94_REQ = 0x000000de, -TCC_PERF_SEL_CLIENT95_REQ = 0x000000df, -TCC_PERF_SEL_CLIENT96_REQ = 0x000000e0, -TCC_PERF_SEL_CLIENT97_REQ = 0x000000e1, -TCC_PERF_SEL_CLIENT98_REQ = 0x000000e2, -TCC_PERF_SEL_CLIENT99_REQ = 0x000000e3, -TCC_PERF_SEL_CLIENT100_REQ = 0x000000e4, -TCC_PERF_SEL_CLIENT101_REQ = 0x000000e5, -TCC_PERF_SEL_CLIENT102_REQ = 0x000000e6, -TCC_PERF_SEL_CLIENT103_REQ = 0x000000e7, -TCC_PERF_SEL_CLIENT104_REQ = 0x000000e8, -TCC_PERF_SEL_CLIENT105_REQ = 0x000000e9, -TCC_PERF_SEL_CLIENT106_REQ = 0x000000ea, -TCC_PERF_SEL_CLIENT107_REQ = 0x000000eb, -TCC_PERF_SEL_CLIENT108_REQ = 0x000000ec, -TCC_PERF_SEL_CLIENT109_REQ = 0x000000ed, -TCC_PERF_SEL_CLIENT110_REQ = 0x000000ee, -TCC_PERF_SEL_CLIENT111_REQ = 0x000000ef, -TCC_PERF_SEL_CLIENT112_REQ = 0x000000f0, -TCC_PERF_SEL_CLIENT113_REQ = 0x000000f1, -TCC_PERF_SEL_CLIENT114_REQ = 0x000000f2, -TCC_PERF_SEL_CLIENT115_REQ = 0x000000f3, -TCC_PERF_SEL_CLIENT116_REQ = 0x000000f4, -TCC_PERF_SEL_CLIENT117_REQ = 0x000000f5, -TCC_PERF_SEL_CLIENT118_REQ = 0x000000f6, -TCC_PERF_SEL_CLIENT119_REQ = 0x000000f7, -TCC_PERF_SEL_CLIENT120_REQ = 0x000000f8, -TCC_PERF_SEL_CLIENT121_REQ = 0x000000f9, -TCC_PERF_SEL_CLIENT122_REQ = 0x000000fa, -TCC_PERF_SEL_CLIENT123_REQ = 0x000000fb, -TCC_PERF_SEL_CLIENT124_REQ = 0x000000fc, -TCC_PERF_SEL_CLIENT125_REQ = 0x000000fd, -TCC_PERF_SEL_CLIENT126_REQ = 0x000000fe, -TCC_PERF_SEL_CLIENT127_REQ = 0x000000ff, -} TCC_PERF_SEL; - -/* - * TCA_PERF_SEL enum - */ - -typedef enum TCA_PERF_SEL { -TCA_PERF_SEL_NONE = 0x00000000, -TCA_PERF_SEL_CYCLE = 0x00000001, -TCA_PERF_SEL_BUSY = 0x00000002, -TCA_PERF_SEL_FORCED_HOLE_TCC0 = 0x00000003, -TCA_PERF_SEL_FORCED_HOLE_TCC1 = 0x00000004, -TCA_PERF_SEL_FORCED_HOLE_TCC2 = 0x00000005, -TCA_PERF_SEL_FORCED_HOLE_TCC3 = 0x00000006, -TCA_PERF_SEL_FORCED_HOLE_TCC4 = 0x00000007, -TCA_PERF_SEL_FORCED_HOLE_TCC5 = 0x00000008, -TCA_PERF_SEL_FORCED_HOLE_TCC6 = 0x00000009, -TCA_PERF_SEL_FORCED_HOLE_TCC7 = 0x0000000a, -TCA_PERF_SEL_REQ_TCC0 = 0x0000000b, -TCA_PERF_SEL_REQ_TCC1 = 0x0000000c, -TCA_PERF_SEL_REQ_TCC2 = 0x0000000d, -TCA_PERF_SEL_REQ_TCC3 = 0x0000000e, -TCA_PERF_SEL_REQ_TCC4 = 0x0000000f, -TCA_PERF_SEL_REQ_TCC5 = 0x00000010, -TCA_PERF_SEL_REQ_TCC6 = 0x00000011, -TCA_PERF_SEL_REQ_TCC7 = 0x00000012, -TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC0 = 0x00000013, -TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC1 = 0x00000014, -TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC2 = 0x00000015, -TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC3 = 0x00000016, -TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC4 = 0x00000017, -TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC5 = 0x00000018, -TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC6 = 0x00000019, -TCA_PERF_SEL_CROSSBAR_DOUBLE_ARB_TCC7 = 0x0000001a, -TCA_PERF_SEL_CROSSBAR_STALL_TCC0 = 0x0000001b, -TCA_PERF_SEL_CROSSBAR_STALL_TCC1 = 0x0000001c, -TCA_PERF_SEL_CROSSBAR_STALL_TCC2 = 0x0000001d, -TCA_PERF_SEL_CROSSBAR_STALL_TCC3 = 0x0000001e, -TCA_PERF_SEL_CROSSBAR_STALL_TCC4 = 0x0000001f, -TCA_PERF_SEL_CROSSBAR_STALL_TCC5 = 0x00000020, -TCA_PERF_SEL_CROSSBAR_STALL_TCC6 = 0x00000021, -TCA_PERF_SEL_CROSSBAR_STALL_TCC7 = 0x00000022, -} TCA_PERF_SEL; - -/******************************************************* - * GRBM Enums - *******************************************************/ - -/* - * GRBM_PERF_SEL enum - */ - -typedef enum GRBM_PERF_SEL { -GRBM_PERF_SEL_COUNT = 0x00000000, -GRBM_PERF_SEL_USER_DEFINED = 0x00000001, -GRBM_PERF_SEL_GUI_ACTIVE = 0x00000002, -GRBM_PERF_SEL_CP_BUSY = 0x00000003, -GRBM_PERF_SEL_CP_COHER_BUSY = 0x00000004, -GRBM_PERF_SEL_CP_DMA_BUSY = 0x00000005, -GRBM_PERF_SEL_CB_BUSY = 0x00000006, -GRBM_PERF_SEL_DB_BUSY = 0x00000007, -GRBM_PERF_SEL_PA_BUSY = 0x00000008, -GRBM_PERF_SEL_SC_BUSY = 0x00000009, -GRBM_PERF_SEL_RESERVED_6 = 0x0000000a, -GRBM_PERF_SEL_SPI_BUSY = 0x0000000b, -GRBM_PERF_SEL_SX_BUSY = 0x0000000c, -GRBM_PERF_SEL_TA_BUSY = 0x0000000d, -GRBM_PERF_SEL_CB_CLEAN = 0x0000000e, -GRBM_PERF_SEL_DB_CLEAN = 0x0000000f, -GRBM_PERF_SEL_RESERVED_5 = 0x00000010, -GRBM_PERF_SEL_VGT_BUSY = 0x00000011, -GRBM_PERF_SEL_RESERVED_4 = 0x00000012, -GRBM_PERF_SEL_RESERVED_3 = 0x00000013, -GRBM_PERF_SEL_RESERVED_2 = 0x00000014, -GRBM_PERF_SEL_RESERVED_1 = 0x00000015, -GRBM_PERF_SEL_RESERVED_0 = 0x00000016, -GRBM_PERF_SEL_IA_BUSY = 0x00000017, -GRBM_PERF_SEL_IA_NO_DMA_BUSY = 0x00000018, -GRBM_PERF_SEL_GDS_BUSY = 0x00000019, -GRBM_PERF_SEL_BCI_BUSY = 0x0000001a, -GRBM_PERF_SEL_RLC_BUSY = 0x0000001b, -GRBM_PERF_SEL_TC_BUSY = 0x0000001c, -GRBM_PERF_SEL_CPG_BUSY = 0x0000001d, -GRBM_PERF_SEL_CPC_BUSY = 0x0000001e, -GRBM_PERF_SEL_CPF_BUSY = 0x0000001f, -GRBM_PERF_SEL_WD_BUSY = 0x00000020, -GRBM_PERF_SEL_WD_NO_DMA_BUSY = 0x00000021, -GRBM_PERF_SEL_UTCL2_BUSY = 0x00000022, -GRBM_PERF_SEL_EA_BUSY = 0x00000023, -GRBM_PERF_SEL_RMI_BUSY = 0x00000024, -GRBM_PERF_SEL_CPAXI_BUSY = 0x00000025, -} GRBM_PERF_SEL; - -/* - * GRBM_SE0_PERF_SEL enum - */ - -typedef enum GRBM_SE0_PERF_SEL { -GRBM_SE0_PERF_SEL_COUNT = 0x00000000, -GRBM_SE0_PERF_SEL_USER_DEFINED = 0x00000001, -GRBM_SE0_PERF_SEL_CB_BUSY = 0x00000002, -GRBM_SE0_PERF_SEL_DB_BUSY = 0x00000003, -GRBM_SE0_PERF_SEL_SC_BUSY = 0x00000004, -GRBM_SE0_PERF_SEL_RESERVED_1 = 0x00000005, -GRBM_SE0_PERF_SEL_SPI_BUSY = 0x00000006, -GRBM_SE0_PERF_SEL_SX_BUSY = 0x00000007, -GRBM_SE0_PERF_SEL_TA_BUSY = 0x00000008, -GRBM_SE0_PERF_SEL_CB_CLEAN = 0x00000009, -GRBM_SE0_PERF_SEL_DB_CLEAN = 0x0000000a, -GRBM_SE0_PERF_SEL_RESERVED_0 = 0x0000000b, -GRBM_SE0_PERF_SEL_PA_BUSY = 0x0000000c, -GRBM_SE0_PERF_SEL_VGT_BUSY = 0x0000000d, -GRBM_SE0_PERF_SEL_BCI_BUSY = 0x0000000e, -GRBM_SE0_PERF_SEL_RMI_BUSY = 0x0000000f, -} GRBM_SE0_PERF_SEL; - -/* - * GRBM_SE1_PERF_SEL enum - */ - -typedef enum GRBM_SE1_PERF_SEL { -GRBM_SE1_PERF_SEL_COUNT = 0x00000000, -GRBM_SE1_PERF_SEL_USER_DEFINED = 0x00000001, -GRBM_SE1_PERF_SEL_CB_BUSY = 0x00000002, -GRBM_SE1_PERF_SEL_DB_BUSY = 0x00000003, -GRBM_SE1_PERF_SEL_SC_BUSY = 0x00000004, -GRBM_SE1_PERF_SEL_RESERVED_1 = 0x00000005, -GRBM_SE1_PERF_SEL_SPI_BUSY = 0x00000006, -GRBM_SE1_PERF_SEL_SX_BUSY = 0x00000007, -GRBM_SE1_PERF_SEL_TA_BUSY = 0x00000008, -GRBM_SE1_PERF_SEL_CB_CLEAN = 0x00000009, -GRBM_SE1_PERF_SEL_DB_CLEAN = 0x0000000a, -GRBM_SE1_PERF_SEL_RESERVED_0 = 0x0000000b, -GRBM_SE1_PERF_SEL_PA_BUSY = 0x0000000c, -GRBM_SE1_PERF_SEL_VGT_BUSY = 0x0000000d, -GRBM_SE1_PERF_SEL_BCI_BUSY = 0x0000000e, -GRBM_SE1_PERF_SEL_RMI_BUSY = 0x0000000f, -} GRBM_SE1_PERF_SEL; - -/* - * GRBM_SE2_PERF_SEL enum - */ - -typedef enum GRBM_SE2_PERF_SEL { -GRBM_SE2_PERF_SEL_COUNT = 0x00000000, -GRBM_SE2_PERF_SEL_USER_DEFINED = 0x00000001, -GRBM_SE2_PERF_SEL_CB_BUSY = 0x00000002, -GRBM_SE2_PERF_SEL_DB_BUSY = 0x00000003, -GRBM_SE2_PERF_SEL_SC_BUSY = 0x00000004, -GRBM_SE2_PERF_SEL_RESERVED_1 = 0x00000005, -GRBM_SE2_PERF_SEL_SPI_BUSY = 0x00000006, -GRBM_SE2_PERF_SEL_SX_BUSY = 0x00000007, -GRBM_SE2_PERF_SEL_TA_BUSY = 0x00000008, -GRBM_SE2_PERF_SEL_CB_CLEAN = 0x00000009, -GRBM_SE2_PERF_SEL_DB_CLEAN = 0x0000000a, -GRBM_SE2_PERF_SEL_RESERVED_0 = 0x0000000b, -GRBM_SE2_PERF_SEL_PA_BUSY = 0x0000000c, -GRBM_SE2_PERF_SEL_VGT_BUSY = 0x0000000d, -GRBM_SE2_PERF_SEL_BCI_BUSY = 0x0000000e, -GRBM_SE2_PERF_SEL_RMI_BUSY = 0x0000000f, -} GRBM_SE2_PERF_SEL; - -/* - * GRBM_SE3_PERF_SEL enum - */ - -typedef enum GRBM_SE3_PERF_SEL { -GRBM_SE3_PERF_SEL_COUNT = 0x00000000, -GRBM_SE3_PERF_SEL_USER_DEFINED = 0x00000001, -GRBM_SE3_PERF_SEL_CB_BUSY = 0x00000002, -GRBM_SE3_PERF_SEL_DB_BUSY = 0x00000003, -GRBM_SE3_PERF_SEL_SC_BUSY = 0x00000004, -GRBM_SE3_PERF_SEL_RESERVED_1 = 0x00000005, -GRBM_SE3_PERF_SEL_SPI_BUSY = 0x00000006, -GRBM_SE3_PERF_SEL_SX_BUSY = 0x00000007, -GRBM_SE3_PERF_SEL_TA_BUSY = 0x00000008, -GRBM_SE3_PERF_SEL_CB_CLEAN = 0x00000009, -GRBM_SE3_PERF_SEL_DB_CLEAN = 0x0000000a, -GRBM_SE3_PERF_SEL_RESERVED_0 = 0x0000000b, -GRBM_SE3_PERF_SEL_PA_BUSY = 0x0000000c, -GRBM_SE3_PERF_SEL_VGT_BUSY = 0x0000000d, -GRBM_SE3_PERF_SEL_BCI_BUSY = 0x0000000e, -GRBM_SE3_PERF_SEL_RMI_BUSY = 0x0000000f, -} GRBM_SE3_PERF_SEL; - -/******************************************************* - * CP Enums - *******************************************************/ - -/* - * CP_RING_ID enum - */ - -typedef enum CP_RING_ID { -RINGID0 = 0x00000000, -RINGID1 = 0x00000001, -RINGID2 = 0x00000002, -RINGID3 = 0x00000003, -} CP_RING_ID; - -/* - * CP_PIPE_ID enum - */ - -typedef enum CP_PIPE_ID { -PIPE_ID0 = 0x00000000, -PIPE_ID1 = 0x00000001, -PIPE_ID2 = 0x00000002, -PIPE_ID3 = 0x00000003, -} CP_PIPE_ID; - -/* - * CP_ME_ID enum - */ - -typedef enum CP_ME_ID { -ME_ID0 = 0x00000000, -ME_ID1 = 0x00000001, -ME_ID2 = 0x00000002, -ME_ID3 = 0x00000003, -} CP_ME_ID; - -/* - * SPM_PERFMON_STATE enum - */ - -typedef enum SPM_PERFMON_STATE { -STRM_PERFMON_STATE_DISABLE_AND_RESET = 0x00000000, -STRM_PERFMON_STATE_START_COUNTING = 0x00000001, -STRM_PERFMON_STATE_STOP_COUNTING = 0x00000002, -STRM_PERFMON_STATE_RESERVED_3 = 0x00000003, -STRM_PERFMON_STATE_DISABLE_AND_RESET_PHANTOM = 0x00000004, -STRM_PERFMON_STATE_COUNT_AND_DUMP_PHANTOM = 0x00000005, -} SPM_PERFMON_STATE; - -/* - * CP_PERFMON_STATE enum - */ - -typedef enum CP_PERFMON_STATE { -CP_PERFMON_STATE_DISABLE_AND_RESET = 0x00000000, -CP_PERFMON_STATE_START_COUNTING = 0x00000001, -CP_PERFMON_STATE_STOP_COUNTING = 0x00000002, -CP_PERFMON_STATE_RESERVED_3 = 0x00000003, -CP_PERFMON_STATE_DISABLE_AND_RESET_PHANTOM = 0x00000004, -CP_PERFMON_STATE_COUNT_AND_DUMP_PHANTOM = 0x00000005, -} CP_PERFMON_STATE; - -/* - * CP_PERFMON_ENABLE_MODE enum - */ - -typedef enum CP_PERFMON_ENABLE_MODE { -CP_PERFMON_ENABLE_MODE_ALWAYS_COUNT = 0x00000000, -CP_PERFMON_ENABLE_MODE_RESERVED_1 = 0x00000001, -CP_PERFMON_ENABLE_MODE_COUNT_CONTEXT_TRUE = 0x00000002, -CP_PERFMON_ENABLE_MODE_COUNT_CONTEXT_FALSE = 0x00000003, -} CP_PERFMON_ENABLE_MODE; - -/* - * CPG_PERFCOUNT_SEL enum - */ - -typedef enum CPG_PERFCOUNT_SEL { -CPG_PERF_SEL_ALWAYS_COUNT = 0x00000000, -CPG_PERF_SEL_RBIU_FIFO_FULL = 0x00000001, -CPG_PERF_SEL_CSF_RTS_BUT_MIU_NOT_RTR = 0x00000002, -CPG_PERF_SEL_CSF_ST_BASE_SIZE_FIFO_FULL = 0x00000003, -CPG_PERF_SEL_CP_GRBM_DWORDS_SENT = 0x00000004, -CPG_PERF_SEL_ME_PARSER_BUSY = 0x00000005, -CPG_PERF_SEL_COUNT_TYPE0_PACKETS = 0x00000006, -CPG_PERF_SEL_COUNT_TYPE3_PACKETS = 0x00000007, -CPG_PERF_SEL_CSF_FETCHING_CMD_BUFFERS = 0x00000008, -CPG_PERF_SEL_CP_GRBM_OUT_OF_CREDITS = 0x00000009, -CPG_PERF_SEL_CP_PFP_GRBM_OUT_OF_CREDITS = 0x0000000a, -CPG_PERF_SEL_CP_GDS_GRBM_OUT_OF_CREDITS = 0x0000000b, -CPG_PERF_SEL_RCIU_STALLED_ON_ME_READ = 0x0000000c, -CPG_PERF_SEL_RCIU_STALLED_ON_DMA_READ = 0x0000000d, -CPG_PERF_SEL_SSU_STALLED_ON_ACTIVE_CNTX = 0x0000000e, -CPG_PERF_SEL_SSU_STALLED_ON_CLEAN_SIGNALS = 0x0000000f, -CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_PULSE = 0x00000010, -CPG_PERF_SEL_QU_STALLED_ON_EOP_DONE_WR_CONFIRM = 0x00000011, -CPG_PERF_SEL_PFP_STALLED_ON_CSF_READY = 0x00000012, -CPG_PERF_SEL_PFP_STALLED_ON_MEQ_READY = 0x00000013, -CPG_PERF_SEL_PFP_STALLED_ON_RCIU_READY = 0x00000014, -CPG_PERF_SEL_PFP_STALLED_FOR_DATA_FROM_ROQ = 0x00000015, -CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_PFP = 0x00000016, -CPG_PERF_SEL_ME_STALLED_FOR_DATA_FROM_STQ = 0x00000017, -CPG_PERF_SEL_ME_STALLED_ON_NO_AVAIL_GFX_CNTX = 0x00000018, -CPG_PERF_SEL_ME_STALLED_WRITING_TO_RCIU = 0x00000019, -CPG_PERF_SEL_ME_STALLED_WRITING_CONSTANTS = 0x0000001a, -CPG_PERF_SEL_ME_STALLED_ON_PARTIAL_FLUSH = 0x0000001b, -CPG_PERF_SEL_ME_WAIT_ON_CE_COUNTER = 0x0000001c, -CPG_PERF_SEL_ME_WAIT_ON_AVAIL_BUFFER = 0x0000001d, -CPG_PERF_SEL_SEMAPHORE_BUSY_POLLING_FOR_PASS = 0x0000001e, -CPG_PERF_SEL_LOAD_STALLED_ON_SET_COHERENCY = 0x0000001f, -CPG_PERF_SEL_DYNAMIC_CLK_VALID = 0x00000020, -CPG_PERF_SEL_REGISTER_CLK_VALID = 0x00000021, -CPG_PERF_SEL_MIU_WRITE_REQUEST_SENT = 0x00000022, -CPG_PERF_SEL_MIU_READ_REQUEST_SENT = 0x00000023, -CPG_PERF_SEL_CE_STALL_RAM_DUMP = 0x00000024, -CPG_PERF_SEL_CE_STALL_RAM_WRITE = 0x00000025, -CPG_PERF_SEL_CE_STALL_ON_INC_FIFO = 0x00000026, -CPG_PERF_SEL_CE_STALL_ON_WR_RAM_FIFO = 0x00000027, -CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_MIU = 0x00000028, -CPG_PERF_SEL_CE_STALL_ON_DATA_FROM_ROQ = 0x00000029, -CPG_PERF_SEL_CE_STALL_ON_CE_BUFFER_FLAG = 0x0000002a, -CPG_PERF_SEL_CE_STALL_ON_DE_COUNTER = 0x0000002b, -CPG_PERF_SEL_TCIU_STALL_WAIT_ON_FREE = 0x0000002c, -CPG_PERF_SEL_TCIU_STALL_WAIT_ON_TAGS = 0x0000002d, -CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE = 0x0000002e, -CPG_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS = 0x0000002f, -CPG_PERF_SEL_UTCL1_STALL_ON_TRANSLATION = 0x00000030, -} CPG_PERFCOUNT_SEL; - -/* - * CPF_PERFCOUNT_SEL enum - */ - -typedef enum CPF_PERFCOUNT_SEL { -CPF_PERF_SEL_ALWAYS_COUNT = 0x00000000, -CPF_PERF_SEL_MIU_STALLED_WAITING_RDREQ_FREE = 0x00000001, -CPF_PERF_SEL_TCIU_STALLED_WAITING_ON_FREE = 0x00000002, -CPF_PERF_SEL_TCIU_STALLED_WAITING_ON_TAGS = 0x00000003, -CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_RING = 0x00000004, -CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_IB1 = 0x00000005, -CPF_PERF_SEL_CSF_BUSY_FOR_FETCHING_IB2 = 0x00000006, -CPF_PERF_SEL_CSF_BUSY_FOR_FECTHINC_STATE = 0x00000007, -CPF_PERF_SEL_MIU_BUSY_FOR_OUTSTANDING_TAGS = 0x00000008, -CPF_PERF_SEL_CSF_RTS_MIU_NOT_RTR = 0x00000009, -CPF_PERF_SEL_CSF_STATE_FIFO_NOT_RTR = 0x0000000a, -CPF_PERF_SEL_CSF_FETCHING_CMD_BUFFERS = 0x0000000b, -CPF_PERF_SEL_GRBM_DWORDS_SENT = 0x0000000c, -CPF_PERF_SEL_DYNAMIC_CLOCK_VALID = 0x0000000d, -CPF_PERF_SEL_REGISTER_CLOCK_VALID = 0x0000000e, -CPF_PERF_SEL_MIU_WRITE_REQUEST_SEND = 0x0000000f, -CPF_PERF_SEL_MIU_READ_REQUEST_SEND = 0x00000010, -CPF_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE = 0x00000011, -CPF_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS = 0x00000012, -CPF_PERF_SEL_UTCL1_STALL_ON_TRANSLATION = 0x00000013, -CPF_PERF_SEL_RCIU_STALL_WAIT_ON_FREE = 0x00000014, -} CPF_PERFCOUNT_SEL; - -/* - * CPC_PERFCOUNT_SEL enum - */ - -typedef enum CPC_PERFCOUNT_SEL { -CPC_PERF_SEL_ALWAYS_COUNT = 0x00000000, -CPC_PERF_SEL_RCIU_STALL_WAIT_ON_FREE = 0x00000001, -CPC_PERF_SEL_RCIU_STALL_PRIV_VIOLATION = 0x00000002, -CPC_PERF_SEL_MIU_STALL_ON_RDREQ_FREE = 0x00000003, -CPC_PERF_SEL_MIU_STALL_ON_WRREQ_FREE = 0x00000004, -CPC_PERF_SEL_TCIU_STALL_WAIT_ON_FREE = 0x00000005, -CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READY = 0x00000006, -CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READY_PERF = 0x00000007, -CPC_PERF_SEL_ME1_STALL_WAIT_ON_RCIU_READ = 0x00000008, -CPC_PERF_SEL_ME1_STALL_WAIT_ON_MIU_READ = 0x00000009, -CPC_PERF_SEL_ME1_STALL_WAIT_ON_MIU_WRITE = 0x0000000a, -CPC_PERF_SEL_ME1_STALL_ON_DATA_FROM_ROQ = 0x0000000b, -CPC_PERF_SEL_ME1_STALL_ON_DATA_FROM_ROQ_PERF = 0x0000000c, -CPC_PERF_SEL_ME1_BUSY_FOR_PACKET_DECODE = 0x0000000d, -CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READY = 0x0000000e, -CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READY_PERF = 0x0000000f, -CPC_PERF_SEL_ME2_STALL_WAIT_ON_RCIU_READ = 0x00000010, -CPC_PERF_SEL_ME2_STALL_WAIT_ON_MIU_READ = 0x00000011, -CPC_PERF_SEL_ME2_STALL_WAIT_ON_MIU_WRITE = 0x00000012, -CPC_PERF_SEL_ME2_STALL_ON_DATA_FROM_ROQ = 0x00000013, -CPC_PERF_SEL_ME2_STALL_ON_DATA_FROM_ROQ_PERF = 0x00000014, -CPC_PERF_SEL_ME2_BUSY_FOR_PACKET_DECODE = 0x00000015, -CPC_PERF_SEL_UTCL2IU_STALL_WAIT_ON_FREE = 0x00000016, -CPC_PERF_SEL_UTCL2IU_STALL_WAIT_ON_TAGS = 0x00000017, -CPC_PERF_SEL_UTCL1_STALL_ON_TRANSLATION = 0x00000018, -} CPC_PERFCOUNT_SEL; - -/* - * CP_ALPHA_TAG_RAM_SEL enum - */ - -typedef enum CP_ALPHA_TAG_RAM_SEL { -CPG_TAG_RAM = 0x00000000, -CPC_TAG_RAM = 0x00000001, -CPF_TAG_RAM = 0x00000002, -RSV_TAG_RAM = 0x00000003, -} CP_ALPHA_TAG_RAM_SEL; - -/* - * SEM_RESPONSE value - */ - -#define SEM_ECC_ERROR 0x00000000 -#define SEM_TRANS_ERROR 0x00000001 -#define SEM_FAILED 0x00000002 -#define SEM_PASSED 0x00000003 - -/* - * IQ_RETRY_TYPE value - */ - -#define IQ_QUEUE_SLEEP 0x00000000 -#define IQ_OFFLOAD_RETRY 0x00000001 -#define IQ_SCH_WAVE_MSG 0x00000002 -#define IQ_SEM_REARM 0x00000003 -#define IQ_DEQUEUE_RETRY 0x00000004 - -/* - * IQ_INTR_TYPE value - */ - -#define IQ_INTR_TYPE_PQ 0x00000000 -#define IQ_INTR_TYPE_IB 0x00000001 -#define IQ_INTR_TYPE_MQD 0x00000002 - -/* - * VMID_SIZE value - */ - -#define VMID_SZ 0x00000004 - -/* - * CONFIG_SPACE value - */ - -#define CONFIG_SPACE_START 0x00002000 -#define CONFIG_SPACE_END 0x00009fff - -/* - * CONFIG_SPACE1 value - */ - -#define CONFIG_SPACE1_START 0x00002000 -#define CONFIG_SPACE1_END 0x00002bff - -/* - * CONFIG_SPACE2 value - */ - -#define CONFIG_SPACE2_START 0x00003000 -#define CONFIG_SPACE2_END 0x00009fff - -/* - * UCONFIG_SPACE value - */ - -#define UCONFIG_SPACE_START 0x0000c000 -#define UCONFIG_SPACE_END 0x0000ffff - -/* - * PERSISTENT_SPACE value - */ - -#define PERSISTENT_SPACE_START 0x00002c00 -#define PERSISTENT_SPACE_END 0x00002fff - -/* - * CONTEXT_SPACE value - */ - -#define CONTEXT_SPACE_START 0x0000a000 -#define CONTEXT_SPACE_END 0x0000bfff - -/******************************************************* - * SQ_UC Enums - *******************************************************/ - -/* - * VALUE_SQ_ENC_SOP1 value - */ - -#define SQ_ENC_SOP1_BITS 0xbe800000 -#define SQ_ENC_SOP1_MASK 0xff800000 -#define SQ_ENC_SOP1_FIELD 0x0000017d - -/* - * VALUE_SQ_ENC_SOPC value - */ - -#define SQ_ENC_SOPC_BITS 0xbf000000 -#define SQ_ENC_SOPC_MASK 0xff800000 -#define SQ_ENC_SOPC_FIELD 0x0000017e - -/* - * VALUE_SQ_ENC_SOPP value - */ - -#define SQ_ENC_SOPP_BITS 0xbf800000 -#define SQ_ENC_SOPP_MASK 0xff800000 -#define SQ_ENC_SOPP_FIELD 0x0000017f - -/* - * VALUE_SQ_ENC_SOPK value - */ - -#define SQ_ENC_SOPK_BITS 0xb0000000 -#define SQ_ENC_SOPK_MASK 0xf0000000 -#define SQ_ENC_SOPK_FIELD 0x0000000b - -/* - * VALUE_SQ_ENC_SOP2 value - */ - -#define SQ_ENC_SOP2_BITS 0x80000000 -#define SQ_ENC_SOP2_MASK 0xc0000000 -#define SQ_ENC_SOP2_FIELD 0x00000002 - -/* - * VALUE_SQ_ENC_SMEM value - */ - -#define SQ_ENC_SMEM_BITS 0xc0000000 -#define SQ_ENC_SMEM_MASK 0xfc000000 -#define SQ_ENC_SMEM_FIELD 0x00000030 - -/* - * VALUE_SQ_ENC_VOP1 value - */ - -#define SQ_ENC_VOP1_BITS 0x7e000000 -#define SQ_ENC_VOP1_MASK 0xfe000000 -#define SQ_ENC_VOP1_FIELD 0x0000003f - -/* - * VALUE_SQ_ENC_VOPC value - */ - -#define SQ_ENC_VOPC_BITS 0x7c000000 -#define SQ_ENC_VOPC_MASK 0xfe000000 -#define SQ_ENC_VOPC_FIELD 0x0000003e - -/* - * VALUE_SQ_ENC_VOP2 value - */ - -#define SQ_ENC_VOP2_BITS 0x00000000 -#define SQ_ENC_VOP2_MASK 0x80000000 -#define SQ_ENC_VOP2_FIELD 0x00000000 - -/* - * VALUE_SQ_ENC_VINTRP value - */ - -#define SQ_ENC_VINTRP_BITS 0xd4000000 -#define SQ_ENC_VINTRP_MASK 0xfc000000 -#define SQ_ENC_VINTRP_FIELD 0x00000035 - -/* - * VALUE_SQ_ENC_VOP3P value - */ - -#define SQ_ENC_VOP3P_BITS 0xd3800000 -#define SQ_ENC_VOP3P_MASK 0xff800000 -#define SQ_ENC_VOP3P_FIELD 0x000001a7 - -/* - * VALUE_SQ_ENC_VOP3 value - */ - -#define SQ_ENC_VOP3_BITS 0xd0000000 -#define SQ_ENC_VOP3_MASK 0xfc000000 -#define SQ_ENC_VOP3_FIELD 0x00000034 - -/* - * VALUE_SQ_ENC_DS value - */ - -#define SQ_ENC_DS_BITS 0xd8000000 -#define SQ_ENC_DS_MASK 0xfc000000 -#define SQ_ENC_DS_FIELD 0x00000036 - -/* - * VALUE_SQ_ENC_MUBUF value - */ - -#define SQ_ENC_MUBUF_BITS 0xe0000000 -#define SQ_ENC_MUBUF_MASK 0xfc000000 -#define SQ_ENC_MUBUF_FIELD 0x00000038 - -/* - * VALUE_SQ_ENC_MTBUF value - */ - -#define SQ_ENC_MTBUF_BITS 0xe8000000 -#define SQ_ENC_MTBUF_MASK 0xfc000000 -#define SQ_ENC_MTBUF_FIELD 0x0000003a - -/* - * VALUE_SQ_ENC_MIMG value - */ - -#define SQ_ENC_MIMG_BITS 0xf0000000 -#define SQ_ENC_MIMG_MASK 0xfc000000 -#define SQ_ENC_MIMG_FIELD 0x0000003c - -/* - * VALUE_SQ_ENC_EXP value - */ - -#define SQ_ENC_EXP_BITS 0xc4000000 -#define SQ_ENC_EXP_MASK 0xfc000000 -#define SQ_ENC_EXP_FIELD 0x00000031 - -/* - * VALUE_SQ_ENC_FLAT value - */ - -#define SQ_ENC_FLAT_BITS 0xdc000000 -#define SQ_ENC_FLAT_MASK 0xfc000000 -#define SQ_ENC_FLAT_FIELD 0x00000037 - -/* - * VALUE_SQ_HWREG_ID_SHIFT value - */ - -#define SQ_HWREG_ID_SHIFT 0x00000000 - -/* - * VALUE_SQ_V_OP3P_COUNT value - */ - -#define SQ_V_OP3P_COUNT 0x00000080 - -/* - * VALUE_SQ_SENDMSG_SYSTEM_SHIFT value - */ - -#define SQ_SENDMSG_SYSTEM_SHIFT 0x00000004 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VOP1_COUNT value - */ - -#define SQ_XLATE_VOP3_TO_VOP1_COUNT 0x00000080 - -/* - * VALUE_SQ_SRC_VGPR_BIT value - */ - -#define SQ_SRC_VGPR_BIT 0x00000100 - -/* - * VALUE_SQ_V_OP1_COUNT value - */ - -#define SQ_V_OP1_COUNT 0x00000080 - -/* - * VALUE_SQ_SENDMSG_STREAMID_SHIFT value - */ - -#define SQ_SENDMSG_STREAMID_SHIFT 0x00000008 - -/* - * VALUE_SQ_HWREG_ID_SIZE value - */ - -#define SQ_HWREG_ID_SIZE 0x00000006 - -/* - * VALUE_SQ_EXP_NUM_MRT value - */ - -#define SQ_EXP_NUM_MRT 0x00000008 - -/* - * VALUE_SQ_V_OP3_3IN_OFFSET value - */ - -#define SQ_V_OP3_3IN_OFFSET 0x000001c0 - -/* - * VALUE_SQ_SENDMSG_STREAMID_SIZE value - */ - -#define SQ_SENDMSG_STREAMID_SIZE 0x00000002 - -/* - * VALUE_SQ_HWREG_OFFSET_SHIFT value - */ - -#define SQ_HWREG_OFFSET_SHIFT 0x00000006 - -/* - * VALUE_SQ_SENDMSG_MSG_SIZE value - */ - -#define SQ_SENDMSG_MSG_SIZE 0x00000004 - -/* - * VALUE_SQ_HWREG_SIZE_SHIFT value - */ - -#define SQ_HWREG_SIZE_SHIFT 0x0000000b - -/* - * VALUE_SQ_SENDMSG_SYSTEM_SIZE value - */ - -#define SQ_SENDMSG_SYSTEM_SIZE 0x00000003 - -/* - * VALUE_SQ_SENDMSG_MSG_SHIFT value - */ - -#define SQ_SENDMSG_MSG_SHIFT 0x00000000 - -/* - * VALUE_SQ_SENDMSG_GSOP_SIZE value - */ - -#define SQ_SENDMSG_GSOP_SIZE 0x00000002 - -/* - * VALUE_SQ_SENDMSG_GSOP_SHIFT value - */ - -#define SQ_SENDMSG_GSOP_SHIFT 0x00000004 - -/* - * VALUE_SQ_NUM_TTMP value - */ - -#define SQ_NUM_TTMP 0x00000010 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VOP3P_COUNT value - */ - -#define SQ_XLATE_VOP3_TO_VOP3P_COUNT 0x00000080 - -/* - * VALUE_SQ_EXP_NUM_POS value - */ - -#define SQ_EXP_NUM_POS 0x00000004 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VOP3P_OFFSET value - */ - -#define SQ_XLATE_VOP3_TO_VOP3P_OFFSET 0x00000380 - -/* - * VALUE_SQ_WAITCNT_EXP_SIZE value - */ - -#define SQ_WAITCNT_EXP_SIZE 0x00000003 - -/* - * VALUE_SQ_V_OP2_COUNT value - */ - -#define SQ_V_OP2_COUNT 0x00000040 - -/* - * VALUE_SQ_HWREG_SIZE_SIZE value - */ - -#define SQ_HWREG_SIZE_SIZE 0x00000005 - -/* - * VALUE_SQ_WAITCNT_VM_SHIFT value - */ - -#define SQ_WAITCNT_VM_SHIFT 0x00000000 - -/* - * VALUE_SQ_V_OP3_3IN_COUNT value - */ - -#define SQ_V_OP3_3IN_COUNT 0x000000b0 - -/* - * VALUE_SQ_NUM_VGPR value - */ - -#define SQ_NUM_VGPR 0x00000100 - -/* - * VALUE_SQ_EXP_NUM_PARAM value - */ - -#define SQ_EXP_NUM_PARAM 0x00000020 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VOPC_OFFSET value - */ - -#define SQ_XLATE_VOP3_TO_VOPC_OFFSET 0x00000000 - -/* - * VALUE_SQ_V_OP3_INTRP_COUNT value - */ - -#define SQ_V_OP3_INTRP_COUNT 0x0000000c - -/* - * VALUE_SQ_WAITCNT_LGKM_SHIFT value - */ - -#define SQ_WAITCNT_LGKM_SHIFT 0x00000008 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VOP2_OFFSET value - */ - -#define SQ_XLATE_VOP3_TO_VOP2_OFFSET 0x00000100 - -/* - * VALUE_SQ_V_OP3_2IN_OFFSET value - */ - -#define SQ_V_OP3_2IN_OFFSET 0x00000280 - -/* - * VALUE_SQ_V_INTRP_COUNT value - */ - -#define SQ_V_INTRP_COUNT 0x00000004 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VINTRP_OFFSET value - */ - -#define SQ_XLATE_VOP3_TO_VINTRP_OFFSET 0x00000270 - -/* - * VALUE_SQ_WAITCNT_LGKM_SIZE value - */ - -#define SQ_WAITCNT_LGKM_SIZE 0x00000004 - -/* - * VALUE_SQ_EXP_NUM_GDS value - */ - -#define SQ_EXP_NUM_GDS 0x00000005 - -/* - * VALUE_SQ_HWREG_OFFSET_SIZE value - */ - -#define SQ_HWREG_OFFSET_SIZE 0x00000005 - -/* - * VALUE_SQ_WAITCNT_VM_SIZE value - */ - -#define SQ_WAITCNT_VM_SIZE 0x00000004 - -/* - * VALUE_SQ_V_OP3_2IN_COUNT value - */ - -#define SQ_V_OP3_2IN_COUNT 0x00000080 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VINTRP_COUNT value - */ - -#define SQ_XLATE_VOP3_TO_VINTRP_COUNT 0x00000004 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VOPC_COUNT value - */ - -#define SQ_XLATE_VOP3_TO_VOPC_COUNT 0x00000100 - -/* - * VALUE_SQ_NUM_ATTR value - */ - -#define SQ_NUM_ATTR 0x00000021 - -/* - * VALUE_SQ_V_OPC_COUNT value - */ - -#define SQ_V_OPC_COUNT 0x00000100 - -/* - * VALUE_SQ_V_OP3_INTRP_OFFSET value - */ - -#define SQ_V_OP3_INTRP_OFFSET 0x00000274 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VOP2_COUNT value - */ - -#define SQ_XLATE_VOP3_TO_VOP2_COUNT 0x00000040 - -/* - * VALUE_SQ_WAITCNT_EXP_SHIFT value - */ - -#define SQ_WAITCNT_EXP_SHIFT 0x00000004 - -/* - * VALUE_SQ_XLATE_VOP3_TO_VOP1_OFFSET value - */ - -#define SQ_XLATE_VOP3_TO_VOP1_OFFSET 0x00000140 - -/* - * VALUE_SQ_NUM_SGPR value - */ - -#define SQ_NUM_SGPR 0x00000066 - -/* - * VALUE_SQ_FLAT_SCRATCH_LOHI value - */ - -#define SQ_FLAT_SCRATCH_LO 0x00000066 -#define SQ_FLAT_SCRATCH_HI 0x00000067 - -/* - * VALUE_SQ_OP_VOP3 value - */ - -#define SQ_V_MAD_LEGACY_F32 0x000001c0 -#define SQ_V_MAD_F32 0x000001c1 -#define SQ_V_MAD_I32_I24 0x000001c2 -#define SQ_V_MAD_U32_U24 0x000001c3 -#define SQ_V_CUBEID_F32 0x000001c4 -#define SQ_V_CUBESC_F32 0x000001c5 -#define SQ_V_CUBETC_F32 0x000001c6 -#define SQ_V_CUBEMA_F32 0x000001c7 -#define SQ_V_BFE_U32 0x000001c8 -#define SQ_V_BFE_I32 0x000001c9 -#define SQ_V_BFI_B32 0x000001ca -#define SQ_V_FMA_F32 0x000001cb -#define SQ_V_FMA_F64 0x000001cc -#define SQ_V_LERP_U8 0x000001cd -#define SQ_V_ALIGNBIT_B32 0x000001ce -#define SQ_V_ALIGNBYTE_B32 0x000001cf -#define SQ_V_MIN3_F32 0x000001d0 -#define SQ_V_MIN3_I32 0x000001d1 -#define SQ_V_MIN3_U32 0x000001d2 -#define SQ_V_MAX3_F32 0x000001d3 -#define SQ_V_MAX3_I32 0x000001d4 -#define SQ_V_MAX3_U32 0x000001d5 -#define SQ_V_MED3_F32 0x000001d6 -#define SQ_V_MED3_I32 0x000001d7 -#define SQ_V_MED3_U32 0x000001d8 -#define SQ_V_SAD_U8 0x000001d9 -#define SQ_V_SAD_HI_U8 0x000001da -#define SQ_V_SAD_U16 0x000001db -#define SQ_V_SAD_U32 0x000001dc -#define SQ_V_CVT_PK_U8_F32 0x000001dd -#define SQ_V_DIV_FIXUP_F32 0x000001de -#define SQ_V_DIV_FIXUP_F64 0x000001df -#define SQ_V_DIV_SCALE_F32 0x000001e0 -#define SQ_V_DIV_SCALE_F64 0x000001e1 -#define SQ_V_DIV_FMAS_F32 0x000001e2 -#define SQ_V_DIV_FMAS_F64 0x000001e3 -#define SQ_V_MSAD_U8 0x000001e4 -#define SQ_V_QSAD_PK_U16_U8 0x000001e5 -#define SQ_V_MQSAD_PK_U16_U8 0x000001e6 -#define SQ_V_MQSAD_U32_U8 0x000001e7 -#define SQ_V_MAD_U64_U32 0x000001e8 -#define SQ_V_MAD_I64_I32 0x000001e9 -#define SQ_V_MAD_LEGACY_F16 0x000001ea -#define SQ_V_MAD_LEGACY_U16 0x000001eb -#define SQ_V_MAD_LEGACY_I16 0x000001ec -#define SQ_V_PERM_B32 0x000001ed -#define SQ_V_FMA_LEGACY_F16 0x000001ee -#define SQ_V_DIV_FIXUP_LEGACY_F16 0x000001ef -#define SQ_V_CVT_PKACCUM_U8_F32 0x000001f0 -#define SQ_V_MAD_U32_U16 0x000001f1 -#define SQ_V_MAD_I32_I16 0x000001f2 -#define SQ_V_XAD_U32 0x000001f3 -#define SQ_V_MIN3_F16 0x000001f4 -#define SQ_V_MIN3_I16 0x000001f5 -#define SQ_V_MIN3_U16 0x000001f6 -#define SQ_V_MAX3_F16 0x000001f7 -#define SQ_V_MAX3_I16 0x000001f8 -#define SQ_V_MAX3_U16 0x000001f9 -#define SQ_V_MED3_F16 0x000001fa -#define SQ_V_MED3_I16 0x000001fb -#define SQ_V_MED3_U16 0x000001fc -#define SQ_V_LSHL_ADD_U32 0x000001fd -#define SQ_V_ADD_LSHL_U32 0x000001fe -#define SQ_V_ADD3_U32 0x000001ff -#define SQ_V_LSHL_OR_B32 0x00000200 -#define SQ_V_AND_OR_B32 0x00000201 -#define SQ_V_OR3_B32 0x00000202 -#define SQ_V_MAD_F16 0x00000203 -#define SQ_V_MAD_U16 0x00000204 -#define SQ_V_MAD_I16 0x00000205 -#define SQ_V_FMA_F16 0x00000206 -#define SQ_V_DIV_FIXUP_F16 0x00000207 -#define SQ_V_INTERP_P1LL_F16 0x00000274 -#define SQ_V_INTERP_P1LV_F16 0x00000275 -#define SQ_V_INTERP_P2_LEGACY_F16 0x00000276 -#define SQ_V_INTERP_P2_F16 0x00000277 -#define SQ_V_ADD_F64 0x00000280 -#define SQ_V_MUL_F64 0x00000281 -#define SQ_V_MIN_F64 0x00000282 -#define SQ_V_MAX_F64 0x00000283 -#define SQ_V_LDEXP_F64 0x00000284 -#define SQ_V_MUL_LO_U32 0x00000285 -#define SQ_V_MUL_HI_U32 0x00000286 -#define SQ_V_MUL_HI_I32 0x00000287 -#define SQ_V_LDEXP_F32 0x00000288 -#define SQ_V_READLANE_B32 0x00000289 -#define SQ_V_WRITELANE_B32 0x0000028a -#define SQ_V_BCNT_U32_B32 0x0000028b -#define SQ_V_MBCNT_LO_U32_B32 0x0000028c -#define SQ_V_MBCNT_HI_U32_B32 0x0000028d -#define SQ_V_MAC_LEGACY_F32 0x0000028e -#define SQ_V_LSHLREV_B64 0x0000028f -#define SQ_V_LSHRREV_B64 0x00000290 -#define SQ_V_ASHRREV_I64 0x00000291 -#define SQ_V_TRIG_PREOP_F64 0x00000292 -#define SQ_V_BFM_B32 0x00000293 -#define SQ_V_CVT_PKNORM_I16_F32 0x00000294 -#define SQ_V_CVT_PKNORM_U16_F32 0x00000295 -#define SQ_V_CVT_PKRTZ_F16_F32 0x00000296 -#define SQ_V_CVT_PK_U16_U32 0x00000297 -#define SQ_V_CVT_PK_I16_I32 0x00000298 -#define SQ_V_CVT_PKNORM_I16_F16 0x00000299 -#define SQ_V_CVT_PKNORM_U16_F16 0x0000029a -#define SQ_V_READLANE_REGRD_B32 0x0000029b -#define SQ_V_ADD_I32 0x0000029c -#define SQ_V_SUB_I32 0x0000029d -#define SQ_V_ADD_I16 0x0000029e -#define SQ_V_SUB_I16 0x0000029f -#define SQ_V_PACK_B32_F16 0x000002a0 - -/* - * VALUE_SQ_OP_VINTRP value - */ - -#define SQ_V_INTERP_P1_F32 0x00000000 -#define SQ_V_INTERP_P2_F32 0x00000001 -#define SQ_V_INTERP_MOV_F32 0x00000002 - -/* - * VALUE_SQ_SSRC_SPECIAL_VCCZ value - */ - -#define SQ_SRC_VCCZ 0x000000fb - -/* - * VALUE_SQ_TGT_INTERNAL value - */ - -#define SQ_EXP_GDS0 0x00000018 - -/* - * VALUE_SQ_OMOD value - */ - -#define SQ_OMOD_OFF 0x00000000 -#define SQ_OMOD_M2 0x00000001 -#define SQ_OMOD_M4 0x00000002 -#define SQ_OMOD_D2 0x00000003 - -/* - * VALUE_SQ_ATTR value - */ - -#define SQ_ATTR0 0x00000000 - -/* - * VALUE_SQ_TGT value - */ - -#define SQ_EXP_MRT0 0x00000000 -#define SQ_EXP_MRTZ 0x00000008 -#define SQ_EXP_NULL 0x00000009 -#define SQ_EXP_POS0 0x0000000c -#define SQ_EXP_PARAM0 0x00000020 - -/* - * VALUE_SQ_OPU_VOP3 value - */ - -#define SQ_V_OPC_OFFSET 0x00000000 -#define SQ_V_OP2_OFFSET 0x00000100 -#define SQ_V_OP1_OFFSET 0x00000140 -#define SQ_V_INTRP_OFFSET 0x00000270 -#define SQ_V_OP3P_OFFSET 0x00000380 - -/* - * VALUE_SQ_OP_SOPK value - */ - -#define SQ_S_MOVK_I32 0x00000000 -#define SQ_S_CMOVK_I32 0x00000001 -#define SQ_S_CMPK_EQ_I32 0x00000002 -#define SQ_S_CMPK_LG_I32 0x00000003 -#define SQ_S_CMPK_GT_I32 0x00000004 -#define SQ_S_CMPK_GE_I32 0x00000005 -#define SQ_S_CMPK_LT_I32 0x00000006 -#define SQ_S_CMPK_LE_I32 0x00000007 -#define SQ_S_CMPK_EQ_U32 0x00000008 -#define SQ_S_CMPK_LG_U32 0x00000009 -#define SQ_S_CMPK_GT_U32 0x0000000a -#define SQ_S_CMPK_GE_U32 0x0000000b -#define SQ_S_CMPK_LT_U32 0x0000000c -#define SQ_S_CMPK_LE_U32 0x0000000d -#define SQ_S_ADDK_I32 0x0000000e -#define SQ_S_MULK_I32 0x0000000f -#define SQ_S_CBRANCH_I_FORK 0x00000010 -#define SQ_S_GETREG_B32 0x00000011 -#define SQ_S_SETREG_B32 0x00000012 -#define SQ_S_GETREG_REGRD_B32 0x00000013 -#define SQ_S_SETREG_IMM32_B32 0x00000014 -#define SQ_S_CALL_B64 0x00000015 - -/* - * VALUE_SQ_COMPF value - */ - -#define SQ_F 0x00000000 -#define SQ_LT 0x00000001 -#define SQ_EQ 0x00000002 -#define SQ_LE 0x00000003 -#define SQ_GT 0x00000004 -#define SQ_LG 0x00000005 -#define SQ_GE 0x00000006 -#define SQ_O 0x00000007 -#define SQ_U 0x00000008 -#define SQ_NGE 0x00000009 -#define SQ_NLG 0x0000000a -#define SQ_NGT 0x0000000b -#define SQ_NLE 0x0000000c -#define SQ_NEQ 0x0000000d -#define SQ_NLT 0x0000000e -#define SQ_TRU 0x0000000f - -/* - * VALUE_SQ_DPP_CTRL value - */ - -#define SQ_DPP_QUAD_PERM 0x00000000 -#define SQ_DPP_ROW_SL1 0x00000101 -#define SQ_DPP_ROW_SL2 0x00000102 -#define SQ_DPP_ROW_SL3 0x00000103 -#define SQ_DPP_ROW_SL4 0x00000104 -#define SQ_DPP_ROW_SL5 0x00000105 -#define SQ_DPP_ROW_SL6 0x00000106 -#define SQ_DPP_ROW_SL7 0x00000107 -#define SQ_DPP_ROW_SL8 0x00000108 -#define SQ_DPP_ROW_SL9 0x00000109 -#define SQ_DPP_ROW_SL10 0x0000010a -#define SQ_DPP_ROW_SL11 0x0000010b -#define SQ_DPP_ROW_SL12 0x0000010c -#define SQ_DPP_ROW_SL13 0x0000010d -#define SQ_DPP_ROW_SL14 0x0000010e -#define SQ_DPP_ROW_SL15 0x0000010f -#define SQ_DPP_ROW_SR1 0x00000111 -#define SQ_DPP_ROW_SR2 0x00000112 -#define SQ_DPP_ROW_SR3 0x00000113 -#define SQ_DPP_ROW_SR4 0x00000114 -#define SQ_DPP_ROW_SR5 0x00000115 -#define SQ_DPP_ROW_SR6 0x00000116 -#define SQ_DPP_ROW_SR7 0x00000117 -#define SQ_DPP_ROW_SR8 0x00000118 -#define SQ_DPP_ROW_SR9 0x00000119 -#define SQ_DPP_ROW_SR10 0x0000011a -#define SQ_DPP_ROW_SR11 0x0000011b -#define SQ_DPP_ROW_SR12 0x0000011c -#define SQ_DPP_ROW_SR13 0x0000011d -#define SQ_DPP_ROW_SR14 0x0000011e -#define SQ_DPP_ROW_SR15 0x0000011f -#define SQ_DPP_ROW_RR1 0x00000121 -#define SQ_DPP_ROW_RR2 0x00000122 -#define SQ_DPP_ROW_RR3 0x00000123 -#define SQ_DPP_ROW_RR4 0x00000124 -#define SQ_DPP_ROW_RR5 0x00000125 -#define SQ_DPP_ROW_RR6 0x00000126 -#define SQ_DPP_ROW_RR7 0x00000127 -#define SQ_DPP_ROW_RR8 0x00000128 -#define SQ_DPP_ROW_RR9 0x00000129 -#define SQ_DPP_ROW_RR10 0x0000012a -#define SQ_DPP_ROW_RR11 0x0000012b -#define SQ_DPP_ROW_RR12 0x0000012c -#define SQ_DPP_ROW_RR13 0x0000012d -#define SQ_DPP_ROW_RR14 0x0000012e -#define SQ_DPP_ROW_RR15 0x0000012f -#define SQ_DPP_WF_SL1 0x00000130 -#define SQ_DPP_WF_RL1 0x00000134 -#define SQ_DPP_WF_SR1 0x00000138 -#define SQ_DPP_WF_RR1 0x0000013c -#define SQ_DPP_ROW_MIRROR 0x00000140 -#define SQ_DPP_ROW_HALF_MIRROR 0x00000141 -#define SQ_DPP_ROW_BCAST15 0x00000142 -#define SQ_DPP_ROW_BCAST31 0x00000143 - -/* - * VALUE_SQ_VCC_LOHI value - */ - -#define SQ_VCC_LO 0x0000006a -#define SQ_VCC_HI 0x0000006b - -/* - * VALUE_SQ_SSRC_SPECIAL_SCC value - */ - -#define SQ_SRC_SCC 0x000000fd - -/* - * VALUE_SQ_OP_SOP1 value - */ - -#define SQ_S_MOV_B32 0x00000000 -#define SQ_S_MOV_B64 0x00000001 -#define SQ_S_CMOV_B32 0x00000002 -#define SQ_S_CMOV_B64 0x00000003 -#define SQ_S_NOT_B32 0x00000004 -#define SQ_S_NOT_B64 0x00000005 -#define SQ_S_WQM_B32 0x00000006 -#define SQ_S_WQM_B64 0x00000007 -#define SQ_S_BREV_B32 0x00000008 -#define SQ_S_BREV_B64 0x00000009 -#define SQ_S_BCNT0_I32_B32 0x0000000a -#define SQ_S_BCNT0_I32_B64 0x0000000b -#define SQ_S_BCNT1_I32_B32 0x0000000c -#define SQ_S_BCNT1_I32_B64 0x0000000d -#define SQ_S_FF0_I32_B32 0x0000000e -#define SQ_S_FF0_I32_B64 0x0000000f -#define SQ_S_FF1_I32_B32 0x00000010 -#define SQ_S_FF1_I32_B64 0x00000011 -#define SQ_S_FLBIT_I32_B32 0x00000012 -#define SQ_S_FLBIT_I32_B64 0x00000013 -#define SQ_S_FLBIT_I32 0x00000014 -#define SQ_S_FLBIT_I32_I64 0x00000015 -#define SQ_S_SEXT_I32_I8 0x00000016 -#define SQ_S_SEXT_I32_I16 0x00000017 -#define SQ_S_BITSET0_B32 0x00000018 -#define SQ_S_BITSET0_B64 0x00000019 -#define SQ_S_BITSET1_B32 0x0000001a -#define SQ_S_BITSET1_B64 0x0000001b -#define SQ_S_GETPC_B64 0x0000001c -#define SQ_S_SETPC_B64 0x0000001d -#define SQ_S_SWAPPC_B64 0x0000001e -#define SQ_S_RFE_B64 0x0000001f -#define SQ_S_AND_SAVEEXEC_B64 0x00000020 -#define SQ_S_OR_SAVEEXEC_B64 0x00000021 -#define SQ_S_XOR_SAVEEXEC_B64 0x00000022 -#define SQ_S_ANDN2_SAVEEXEC_B64 0x00000023 -#define SQ_S_ORN2_SAVEEXEC_B64 0x00000024 -#define SQ_S_NAND_SAVEEXEC_B64 0x00000025 -#define SQ_S_NOR_SAVEEXEC_B64 0x00000026 -#define SQ_S_XNOR_SAVEEXEC_B64 0x00000027 -#define SQ_S_QUADMASK_B32 0x00000028 -#define SQ_S_QUADMASK_B64 0x00000029 -#define SQ_S_MOVRELS_B32 0x0000002a -#define SQ_S_MOVRELS_B64 0x0000002b -#define SQ_S_MOVRELD_B32 0x0000002c -#define SQ_S_MOVRELD_B64 0x0000002d -#define SQ_S_CBRANCH_JOIN 0x0000002e -#define SQ_S_MOV_REGRD_B32 0x0000002f -#define SQ_S_ABS_I32 0x00000030 -#define SQ_S_MOV_FED_B32 0x00000031 -#define SQ_S_SET_GPR_IDX_IDX 0x00000032 -#define SQ_S_ANDN1_SAVEEXEC_B64 0x00000033 -#define SQ_S_ORN1_SAVEEXEC_B64 0x00000034 -#define SQ_S_ANDN1_WREXEC_B64 0x00000035 -#define SQ_S_ANDN2_WREXEC_B64 0x00000036 -#define SQ_S_BITREPLICATE_B64_B32 0x00000037 - -/* - * VALUE_SQ_MSG value - */ - -#define SQ_MSG_INTERRUPT 0x00000001 -#define SQ_MSG_GS 0x00000002 -#define SQ_MSG_GS_DONE 0x00000003 -#define SQ_MSG_SAVEWAVE 0x00000004 -#define SQ_MSG_STALL_WAVE_GEN 0x00000005 -#define SQ_MSG_HALT_WAVES 0x00000006 -#define SQ_MSG_ORDERED_PS_DONE 0x00000007 -#define SQ_MSG_EARLY_PRIM_DEALLOC 0x00000008 -#define SQ_MSG_GS_ALLOC_REQ 0x00000009 -#define SQ_MSG_SYSMSG 0x0000000f - -/* - * VALUE_SQ_OP_FLAT_GLBL value - */ - -#define SQ_GLOBAL_LOAD_UBYTE 0x00000010 -#define SQ_GLOBAL_LOAD_SBYTE 0x00000011 -#define SQ_GLOBAL_LOAD_USHORT 0x00000012 -#define SQ_GLOBAL_LOAD_SSHORT 0x00000013 -#define SQ_GLOBAL_LOAD_DWORD 0x00000014 -#define SQ_GLOBAL_LOAD_DWORDX2 0x00000015 -#define SQ_GLOBAL_LOAD_DWORDX3 0x00000016 -#define SQ_GLOBAL_LOAD_DWORDX4 0x00000017 -#define SQ_GLOBAL_STORE_BYTE 0x00000018 -#define SQ_GLOBAL_STORE_SHORT 0x0000001a -#define SQ_GLOBAL_STORE_DWORD 0x0000001c -#define SQ_GLOBAL_STORE_DWORDX2 0x0000001d -#define SQ_GLOBAL_STORE_DWORDX3 0x0000001e -#define SQ_GLOBAL_STORE_DWORDX4 0x0000001f -#define SQ_GLOBAL_ATOMIC_SWAP 0x00000040 -#define SQ_GLOBAL_ATOMIC_CMPSWAP 0x00000041 -#define SQ_GLOBAL_ATOMIC_ADD 0x00000042 -#define SQ_GLOBAL_ATOMIC_SUB 0x00000043 -#define SQ_GLOBAL_ATOMIC_SMIN 0x00000044 -#define SQ_GLOBAL_ATOMIC_UMIN 0x00000045 -#define SQ_GLOBAL_ATOMIC_SMAX 0x00000046 -#define SQ_GLOBAL_ATOMIC_UMAX 0x00000047 -#define SQ_GLOBAL_ATOMIC_AND 0x00000048 -#define SQ_GLOBAL_ATOMIC_OR 0x00000049 -#define SQ_GLOBAL_ATOMIC_XOR 0x0000004a -#define SQ_GLOBAL_ATOMIC_INC 0x0000004b -#define SQ_GLOBAL_ATOMIC_DEC 0x0000004c -#define SQ_GLOBAL_ATOMIC_SWAP_X2 0x00000060 -#define SQ_GLOBAL_ATOMIC_CMPSWAP_X2 0x00000061 -#define SQ_GLOBAL_ATOMIC_ADD_X2 0x00000062 -#define SQ_GLOBAL_ATOMIC_SUB_X2 0x00000063 -#define SQ_GLOBAL_ATOMIC_SMIN_X2 0x00000064 -#define SQ_GLOBAL_ATOMIC_UMIN_X2 0x00000065 -#define SQ_GLOBAL_ATOMIC_SMAX_X2 0x00000066 -#define SQ_GLOBAL_ATOMIC_UMAX_X2 0x00000067 -#define SQ_GLOBAL_ATOMIC_AND_X2 0x00000068 -#define SQ_GLOBAL_ATOMIC_OR_X2 0x00000069 -#define SQ_GLOBAL_ATOMIC_XOR_X2 0x0000006a -#define SQ_GLOBAL_ATOMIC_INC_X2 0x0000006b -#define SQ_GLOBAL_ATOMIC_DEC_X2 0x0000006c - -/* - * VALUE_SQ_VGPR value - */ - -#define SQ_VGPR0 0x00000000 - -/* - * VALUE_SQ_HW_REG value - */ - -#define SQ_HW_REG_MODE 0x00000001 -#define SQ_HW_REG_STATUS 0x00000002 -#define SQ_HW_REG_TRAPSTS 0x00000003 -#define SQ_HW_REG_HW_ID 0x00000004 -#define SQ_HW_REG_GPR_ALLOC 0x00000005 -#define SQ_HW_REG_LDS_ALLOC 0x00000006 -#define SQ_HW_REG_IB_STS 0x00000007 -#define SQ_HW_REG_PC_LO 0x00000008 -#define SQ_HW_REG_PC_HI 0x00000009 -#define SQ_HW_REG_INST_DW0 0x0000000a -#define SQ_HW_REG_INST_DW1 0x0000000b -#define SQ_HW_REG_IB_DBG0 0x0000000c -#define SQ_HW_REG_IB_DBG1 0x0000000d -#define SQ_HW_REG_FLUSH_IB 0x0000000e -#define SQ_HW_REG_SH_MEM_BASES 0x0000000f -#define SQ_HW_REG_SQ_SHADER_TBA_LO 0x00000010 -#define SQ_HW_REG_SQ_SHADER_TBA_HI 0x00000011 -#define SQ_HW_REG_SQ_SHADER_TMA_LO 0x00000012 -#define SQ_HW_REG_SQ_SHADER_TMA_HI 0x00000013 - -/* - * VALUE_SQ_OP_VOP1 value - */ - -#define SQ_V_NOP 0x00000000 -#define SQ_V_MOV_B32 0x00000001 -#define SQ_V_READFIRSTLANE_B32 0x00000002 -#define SQ_V_CVT_I32_F64 0x00000003 -#define SQ_V_CVT_F64_I32 0x00000004 -#define SQ_V_CVT_F32_I32 0x00000005 -#define SQ_V_CVT_F32_U32 0x00000006 -#define SQ_V_CVT_U32_F32 0x00000007 -#define SQ_V_CVT_I32_F32 0x00000008 -#define SQ_V_MOV_FED_B32 0x00000009 -#define SQ_V_CVT_F16_F32 0x0000000a -#define SQ_V_CVT_F32_F16 0x0000000b -#define SQ_V_CVT_RPI_I32_F32 0x0000000c -#define SQ_V_CVT_FLR_I32_F32 0x0000000d -#define SQ_V_CVT_OFF_F32_I4 0x0000000e -#define SQ_V_CVT_F32_F64 0x0000000f -#define SQ_V_CVT_F64_F32 0x00000010 -#define SQ_V_CVT_F32_UBYTE0 0x00000011 -#define SQ_V_CVT_F32_UBYTE1 0x00000012 -#define SQ_V_CVT_F32_UBYTE2 0x00000013 -#define SQ_V_CVT_F32_UBYTE3 0x00000014 -#define SQ_V_CVT_U32_F64 0x00000015 -#define SQ_V_CVT_F64_U32 0x00000016 -#define SQ_V_TRUNC_F64 0x00000017 -#define SQ_V_CEIL_F64 0x00000018 -#define SQ_V_RNDNE_F64 0x00000019 -#define SQ_V_FLOOR_F64 0x0000001a -#define SQ_V_FRACT_F32 0x0000001b -#define SQ_V_TRUNC_F32 0x0000001c -#define SQ_V_CEIL_F32 0x0000001d -#define SQ_V_RNDNE_F32 0x0000001e -#define SQ_V_FLOOR_F32 0x0000001f -#define SQ_V_EXP_F32 0x00000020 -#define SQ_V_LOG_F32 0x00000021 -#define SQ_V_RCP_F32 0x00000022 -#define SQ_V_RCP_IFLAG_F32 0x00000023 -#define SQ_V_RSQ_F32 0x00000024 -#define SQ_V_RCP_F64 0x00000025 -#define SQ_V_RSQ_F64 0x00000026 -#define SQ_V_SQRT_F32 0x00000027 -#define SQ_V_SQRT_F64 0x00000028 -#define SQ_V_SIN_F32 0x00000029 -#define SQ_V_COS_F32 0x0000002a -#define SQ_V_NOT_B32 0x0000002b -#define SQ_V_BFREV_B32 0x0000002c -#define SQ_V_FFBH_U32 0x0000002d -#define SQ_V_FFBL_B32 0x0000002e -#define SQ_V_FFBH_I32 0x0000002f -#define SQ_V_FREXP_EXP_I32_F64 0x00000030 -#define SQ_V_FREXP_MANT_F64 0x00000031 -#define SQ_V_FRACT_F64 0x00000032 -#define SQ_V_FREXP_EXP_I32_F32 0x00000033 -#define SQ_V_FREXP_MANT_F32 0x00000034 -#define SQ_V_CLREXCP 0x00000035 -#define SQ_V_MOV_PRSV_B32 0x00000036 -#define SQ_V_CVT_F16_U16 0x00000039 -#define SQ_V_CVT_F16_I16 0x0000003a -#define SQ_V_CVT_U16_F16 0x0000003b -#define SQ_V_CVT_I16_F16 0x0000003c -#define SQ_V_RCP_F16 0x0000003d -#define SQ_V_SQRT_F16 0x0000003e -#define SQ_V_RSQ_F16 0x0000003f -#define SQ_V_LOG_F16 0x00000040 -#define SQ_V_EXP_F16 0x00000041 -#define SQ_V_FREXP_MANT_F16 0x00000042 -#define SQ_V_FREXP_EXP_I16_F16 0x00000043 -#define SQ_V_FLOOR_F16 0x00000044 -#define SQ_V_CEIL_F16 0x00000045 -#define SQ_V_TRUNC_F16 0x00000046 -#define SQ_V_RNDNE_F16 0x00000047 -#define SQ_V_FRACT_F16 0x00000048 -#define SQ_V_SIN_F16 0x00000049 -#define SQ_V_COS_F16 0x0000004a -#define SQ_V_EXP_LEGACY_F32 0x0000004b -#define SQ_V_LOG_LEGACY_F32 0x0000004c -#define SQ_V_CVT_NORM_I16_F16 0x0000004d -#define SQ_V_CVT_NORM_U16_F16 0x0000004e -#define SQ_V_SAT_PK_U8_I16 0x0000004f -#define SQ_V_WRITELANE_IMM32 0x00000050 -#define SQ_V_SWAP_B32 0x00000051 - -/* - * VALUE_SQ_OP_MUBUF value - */ - -#define SQ_BUFFER_LOAD_FORMAT_X 0x00000000 -#define SQ_BUFFER_LOAD_FORMAT_XY 0x00000001 -#define SQ_BUFFER_LOAD_FORMAT_XYZ 0x00000002 -#define SQ_BUFFER_LOAD_FORMAT_XYZW 0x00000003 -#define SQ_BUFFER_STORE_FORMAT_X 0x00000004 -#define SQ_BUFFER_STORE_FORMAT_XY 0x00000005 -#define SQ_BUFFER_STORE_FORMAT_XYZ 0x00000006 -#define SQ_BUFFER_STORE_FORMAT_XYZW 0x00000007 -#define SQ_BUFFER_LOAD_FORMAT_D16_X 0x00000008 -#define SQ_BUFFER_LOAD_FORMAT_D16_XY 0x00000009 -#define SQ_BUFFER_LOAD_FORMAT_D16_XYZ 0x0000000a -#define SQ_BUFFER_LOAD_FORMAT_D16_XYZW 0x0000000b -#define SQ_BUFFER_STORE_FORMAT_D16_X 0x0000000c -#define SQ_BUFFER_STORE_FORMAT_D16_XY 0x0000000d -#define SQ_BUFFER_STORE_FORMAT_D16_XYZ 0x0000000e -#define SQ_BUFFER_STORE_FORMAT_D16_XYZW 0x0000000f -#define SQ_BUFFER_LOAD_UBYTE 0x00000010 -#define SQ_BUFFER_LOAD_SBYTE 0x00000011 -#define SQ_BUFFER_LOAD_USHORT 0x00000012 -#define SQ_BUFFER_LOAD_SSHORT 0x00000013 -#define SQ_BUFFER_LOAD_DWORD 0x00000014 -#define SQ_BUFFER_LOAD_DWORDX2 0x00000015 -#define SQ_BUFFER_LOAD_DWORDX3 0x00000016 -#define SQ_BUFFER_LOAD_DWORDX4 0x00000017 -#define SQ_BUFFER_STORE_BYTE 0x00000018 -#define SQ_BUFFER_STORE_SHORT 0x0000001a -#define SQ_BUFFER_STORE_DWORD 0x0000001c -#define SQ_BUFFER_STORE_DWORDX2 0x0000001d -#define SQ_BUFFER_STORE_DWORDX3 0x0000001e -#define SQ_BUFFER_STORE_DWORDX4 0x0000001f -#define SQ_BUFFER_STORE_LDS_DWORD 0x0000003d -#define SQ_BUFFER_WBINVL1 0x0000003e -#define SQ_BUFFER_WBINVL1_VOL 0x0000003f -#define SQ_BUFFER_ATOMIC_SWAP 0x00000040 -#define SQ_BUFFER_ATOMIC_CMPSWAP 0x00000041 -#define SQ_BUFFER_ATOMIC_ADD 0x00000042 -#define SQ_BUFFER_ATOMIC_SUB 0x00000043 -#define SQ_BUFFER_ATOMIC_SMIN 0x00000044 -#define SQ_BUFFER_ATOMIC_UMIN 0x00000045 -#define SQ_BUFFER_ATOMIC_SMAX 0x00000046 -#define SQ_BUFFER_ATOMIC_UMAX 0x00000047 -#define SQ_BUFFER_ATOMIC_AND 0x00000048 -#define SQ_BUFFER_ATOMIC_OR 0x00000049 -#define SQ_BUFFER_ATOMIC_XOR 0x0000004a -#define SQ_BUFFER_ATOMIC_INC 0x0000004b -#define SQ_BUFFER_ATOMIC_DEC 0x0000004c -#define SQ_BUFFER_ATOMIC_SWAP_X2 0x00000060 -#define SQ_BUFFER_ATOMIC_CMPSWAP_X2 0x00000061 -#define SQ_BUFFER_ATOMIC_ADD_X2 0x00000062 -#define SQ_BUFFER_ATOMIC_SUB_X2 0x00000063 -#define SQ_BUFFER_ATOMIC_SMIN_X2 0x00000064 -#define SQ_BUFFER_ATOMIC_UMIN_X2 0x00000065 -#define SQ_BUFFER_ATOMIC_SMAX_X2 0x00000066 -#define SQ_BUFFER_ATOMIC_UMAX_X2 0x00000067 -#define SQ_BUFFER_ATOMIC_AND_X2 0x00000068 -#define SQ_BUFFER_ATOMIC_OR_X2 0x00000069 -#define SQ_BUFFER_ATOMIC_XOR_X2 0x0000006a -#define SQ_BUFFER_ATOMIC_INC_X2 0x0000006b -#define SQ_BUFFER_ATOMIC_DEC_X2 0x0000006c - -/* - * VALUE_SQ_TRAP value - */ - -#define SQ_TTMP0 0x0000006c -#define SQ_TTMP1 0x0000006d -#define SQ_TTMP2 0x0000006e -#define SQ_TTMP3 0x0000006f -#define SQ_TTMP4 0x00000070 -#define SQ_TTMP5 0x00000071 -#define SQ_TTMP6 0x00000072 -#define SQ_TTMP7 0x00000073 -#define SQ_TTMP8 0x00000074 -#define SQ_TTMP9 0x00000075 -#define SQ_TTMP10 0x00000076 -#define SQ_TTMP11 0x00000077 -#define SQ_TTMP12 0x00000078 -#define SQ_TTMP13 0x00000079 -#define SQ_TTMP14 0x0000007a -#define SQ_TTMP15 0x0000007b - -/* - * VALUE_SQ_OP_VOPC value - */ - -#define SQ_V_CMP_CLASS_F32 0x00000010 -#define SQ_V_CMPX_CLASS_F32 0x00000011 -#define SQ_V_CMP_CLASS_F64 0x00000012 -#define SQ_V_CMPX_CLASS_F64 0x00000013 -#define SQ_V_CMP_CLASS_F16 0x00000014 -#define SQ_V_CMPX_CLASS_F16 0x00000015 -#define SQ_V_CMP_F_F16 0x00000020 -#define SQ_V_CMP_LT_F16 0x00000021 -#define SQ_V_CMP_EQ_F16 0x00000022 -#define SQ_V_CMP_LE_F16 0x00000023 -#define SQ_V_CMP_GT_F16 0x00000024 -#define SQ_V_CMP_LG_F16 0x00000025 -#define SQ_V_CMP_GE_F16 0x00000026 -#define SQ_V_CMP_O_F16 0x00000027 -#define SQ_V_CMP_U_F16 0x00000028 -#define SQ_V_CMP_NGE_F16 0x00000029 -#define SQ_V_CMP_NLG_F16 0x0000002a -#define SQ_V_CMP_NGT_F16 0x0000002b -#define SQ_V_CMP_NLE_F16 0x0000002c -#define SQ_V_CMP_NEQ_F16 0x0000002d -#define SQ_V_CMP_NLT_F16 0x0000002e -#define SQ_V_CMP_TRU_F16 0x0000002f -#define SQ_V_CMPX_F_F16 0x00000030 -#define SQ_V_CMPX_LT_F16 0x00000031 -#define SQ_V_CMPX_EQ_F16 0x00000032 -#define SQ_V_CMPX_LE_F16 0x00000033 -#define SQ_V_CMPX_GT_F16 0x00000034 -#define SQ_V_CMPX_LG_F16 0x00000035 -#define SQ_V_CMPX_GE_F16 0x00000036 -#define SQ_V_CMPX_O_F16 0x00000037 -#define SQ_V_CMPX_U_F16 0x00000038 -#define SQ_V_CMPX_NGE_F16 0x00000039 -#define SQ_V_CMPX_NLG_F16 0x0000003a -#define SQ_V_CMPX_NGT_F16 0x0000003b -#define SQ_V_CMPX_NLE_F16 0x0000003c -#define SQ_V_CMPX_NEQ_F16 0x0000003d -#define SQ_V_CMPX_NLT_F16 0x0000003e -#define SQ_V_CMPX_TRU_F16 0x0000003f -#define SQ_V_CMP_F_F32 0x00000040 -#define SQ_V_CMP_LT_F32 0x00000041 -#define SQ_V_CMP_EQ_F32 0x00000042 -#define SQ_V_CMP_LE_F32 0x00000043 -#define SQ_V_CMP_GT_F32 0x00000044 -#define SQ_V_CMP_LG_F32 0x00000045 -#define SQ_V_CMP_GE_F32 0x00000046 -#define SQ_V_CMP_O_F32 0x00000047 -#define SQ_V_CMP_U_F32 0x00000048 -#define SQ_V_CMP_NGE_F32 0x00000049 -#define SQ_V_CMP_NLG_F32 0x0000004a -#define SQ_V_CMP_NGT_F32 0x0000004b -#define SQ_V_CMP_NLE_F32 0x0000004c -#define SQ_V_CMP_NEQ_F32 0x0000004d -#define SQ_V_CMP_NLT_F32 0x0000004e -#define SQ_V_CMP_TRU_F32 0x0000004f -#define SQ_V_CMPX_F_F32 0x00000050 -#define SQ_V_CMPX_LT_F32 0x00000051 -#define SQ_V_CMPX_EQ_F32 0x00000052 -#define SQ_V_CMPX_LE_F32 0x00000053 -#define SQ_V_CMPX_GT_F32 0x00000054 -#define SQ_V_CMPX_LG_F32 0x00000055 -#define SQ_V_CMPX_GE_F32 0x00000056 -#define SQ_V_CMPX_O_F32 0x00000057 -#define SQ_V_CMPX_U_F32 0x00000058 -#define SQ_V_CMPX_NGE_F32 0x00000059 -#define SQ_V_CMPX_NLG_F32 0x0000005a -#define SQ_V_CMPX_NGT_F32 0x0000005b -#define SQ_V_CMPX_NLE_F32 0x0000005c -#define SQ_V_CMPX_NEQ_F32 0x0000005d -#define SQ_V_CMPX_NLT_F32 0x0000005e -#define SQ_V_CMPX_TRU_F32 0x0000005f -#define SQ_V_CMP_F_F64 0x00000060 -#define SQ_V_CMP_LT_F64 0x00000061 -#define SQ_V_CMP_EQ_F64 0x00000062 -#define SQ_V_CMP_LE_F64 0x00000063 -#define SQ_V_CMP_GT_F64 0x00000064 -#define SQ_V_CMP_LG_F64 0x00000065 -#define SQ_V_CMP_GE_F64 0x00000066 -#define SQ_V_CMP_O_F64 0x00000067 -#define SQ_V_CMP_U_F64 0x00000068 -#define SQ_V_CMP_NGE_F64 0x00000069 -#define SQ_V_CMP_NLG_F64 0x0000006a -#define SQ_V_CMP_NGT_F64 0x0000006b -#define SQ_V_CMP_NLE_F64 0x0000006c -#define SQ_V_CMP_NEQ_F64 0x0000006d -#define SQ_V_CMP_NLT_F64 0x0000006e -#define SQ_V_CMP_TRU_F64 0x0000006f -#define SQ_V_CMPX_F_F64 0x00000070 -#define SQ_V_CMPX_LT_F64 0x00000071 -#define SQ_V_CMPX_EQ_F64 0x00000072 -#define SQ_V_CMPX_LE_F64 0x00000073 -#define SQ_V_CMPX_GT_F64 0x00000074 -#define SQ_V_CMPX_LG_F64 0x00000075 -#define SQ_V_CMPX_GE_F64 0x00000076 -#define SQ_V_CMPX_O_F64 0x00000077 -#define SQ_V_CMPX_U_F64 0x00000078 -#define SQ_V_CMPX_NGE_F64 0x00000079 -#define SQ_V_CMPX_NLG_F64 0x0000007a -#define SQ_V_CMPX_NGT_F64 0x0000007b -#define SQ_V_CMPX_NLE_F64 0x0000007c -#define SQ_V_CMPX_NEQ_F64 0x0000007d -#define SQ_V_CMPX_NLT_F64 0x0000007e -#define SQ_V_CMPX_TRU_F64 0x0000007f -#define SQ_V_CMP_F_I16 0x000000a0 -#define SQ_V_CMP_LT_I16 0x000000a1 -#define SQ_V_CMP_EQ_I16 0x000000a2 -#define SQ_V_CMP_LE_I16 0x000000a3 -#define SQ_V_CMP_GT_I16 0x000000a4 -#define SQ_V_CMP_NE_I16 0x000000a5 -#define SQ_V_CMP_GE_I16 0x000000a6 -#define SQ_V_CMP_T_I16 0x000000a7 -#define SQ_V_CMP_F_U16 0x000000a8 -#define SQ_V_CMP_LT_U16 0x000000a9 -#define SQ_V_CMP_EQ_U16 0x000000aa -#define SQ_V_CMP_LE_U16 0x000000ab -#define SQ_V_CMP_GT_U16 0x000000ac -#define SQ_V_CMP_NE_U16 0x000000ad -#define SQ_V_CMP_GE_U16 0x000000ae -#define SQ_V_CMP_T_U16 0x000000af -#define SQ_V_CMPX_F_I16 0x000000b0 -#define SQ_V_CMPX_LT_I16 0x000000b1 -#define SQ_V_CMPX_EQ_I16 0x000000b2 -#define SQ_V_CMPX_LE_I16 0x000000b3 -#define SQ_V_CMPX_GT_I16 0x000000b4 -#define SQ_V_CMPX_NE_I16 0x000000b5 -#define SQ_V_CMPX_GE_I16 0x000000b6 -#define SQ_V_CMPX_T_I16 0x000000b7 -#define SQ_V_CMPX_F_U16 0x000000b8 -#define SQ_V_CMPX_LT_U16 0x000000b9 -#define SQ_V_CMPX_EQ_U16 0x000000ba -#define SQ_V_CMPX_LE_U16 0x000000bb -#define SQ_V_CMPX_GT_U16 0x000000bc -#define SQ_V_CMPX_NE_U16 0x000000bd -#define SQ_V_CMPX_GE_U16 0x000000be -#define SQ_V_CMPX_T_U16 0x000000bf -#define SQ_V_CMP_F_I32 0x000000c0 -#define SQ_V_CMP_LT_I32 0x000000c1 -#define SQ_V_CMP_EQ_I32 0x000000c2 -#define SQ_V_CMP_LE_I32 0x000000c3 -#define SQ_V_CMP_GT_I32 0x000000c4 -#define SQ_V_CMP_NE_I32 0x000000c5 -#define SQ_V_CMP_GE_I32 0x000000c6 -#define SQ_V_CMP_T_I32 0x000000c7 -#define SQ_V_CMP_F_U32 0x000000c8 -#define SQ_V_CMP_LT_U32 0x000000c9 -#define SQ_V_CMP_EQ_U32 0x000000ca -#define SQ_V_CMP_LE_U32 0x000000cb -#define SQ_V_CMP_GT_U32 0x000000cc -#define SQ_V_CMP_NE_U32 0x000000cd -#define SQ_V_CMP_GE_U32 0x000000ce -#define SQ_V_CMP_T_U32 0x000000cf -#define SQ_V_CMPX_F_I32 0x000000d0 -#define SQ_V_CMPX_LT_I32 0x000000d1 -#define SQ_V_CMPX_EQ_I32 0x000000d2 -#define SQ_V_CMPX_LE_I32 0x000000d3 -#define SQ_V_CMPX_GT_I32 0x000000d4 -#define SQ_V_CMPX_NE_I32 0x000000d5 -#define SQ_V_CMPX_GE_I32 0x000000d6 -#define SQ_V_CMPX_T_I32 0x000000d7 -#define SQ_V_CMPX_F_U32 0x000000d8 -#define SQ_V_CMPX_LT_U32 0x000000d9 -#define SQ_V_CMPX_EQ_U32 0x000000da -#define SQ_V_CMPX_LE_U32 0x000000db -#define SQ_V_CMPX_GT_U32 0x000000dc -#define SQ_V_CMPX_NE_U32 0x000000dd -#define SQ_V_CMPX_GE_U32 0x000000de -#define SQ_V_CMPX_T_U32 0x000000df -#define SQ_V_CMP_F_I64 0x000000e0 -#define SQ_V_CMP_LT_I64 0x000000e1 -#define SQ_V_CMP_EQ_I64 0x000000e2 -#define SQ_V_CMP_LE_I64 0x000000e3 -#define SQ_V_CMP_GT_I64 0x000000e4 -#define SQ_V_CMP_NE_I64 0x000000e5 -#define SQ_V_CMP_GE_I64 0x000000e6 -#define SQ_V_CMP_T_I64 0x000000e7 -#define SQ_V_CMP_F_U64 0x000000e8 -#define SQ_V_CMP_LT_U64 0x000000e9 -#define SQ_V_CMP_EQ_U64 0x000000ea -#define SQ_V_CMP_LE_U64 0x000000eb -#define SQ_V_CMP_GT_U64 0x000000ec -#define SQ_V_CMP_NE_U64 0x000000ed -#define SQ_V_CMP_GE_U64 0x000000ee -#define SQ_V_CMP_T_U64 0x000000ef -#define SQ_V_CMPX_F_I64 0x000000f0 -#define SQ_V_CMPX_LT_I64 0x000000f1 -#define SQ_V_CMPX_EQ_I64 0x000000f2 -#define SQ_V_CMPX_LE_I64 0x000000f3 -#define SQ_V_CMPX_GT_I64 0x000000f4 -#define SQ_V_CMPX_NE_I64 0x000000f5 -#define SQ_V_CMPX_GE_I64 0x000000f6 -#define SQ_V_CMPX_T_I64 0x000000f7 -#define SQ_V_CMPX_F_U64 0x000000f8 -#define SQ_V_CMPX_LT_U64 0x000000f9 -#define SQ_V_CMPX_EQ_U64 0x000000fa -#define SQ_V_CMPX_LE_U64 0x000000fb -#define SQ_V_CMPX_GT_U64 0x000000fc -#define SQ_V_CMPX_NE_U64 0x000000fd -#define SQ_V_CMPX_GE_U64 0x000000fe -#define SQ_V_CMPX_T_U64 0x000000ff - -/* - * VALUE_SQ_DPP_CTRL_R_1_15 value - */ - -#define SQ_R1 0x00000001 -#define SQ_R2 0x00000002 -#define SQ_R3 0x00000003 -#define SQ_R4 0x00000004 -#define SQ_R5 0x00000005 -#define SQ_R6 0x00000006 -#define SQ_R7 0x00000007 -#define SQ_R8 0x00000008 -#define SQ_R9 0x00000009 -#define SQ_R10 0x0000000a -#define SQ_R11 0x0000000b -#define SQ_R12 0x0000000c -#define SQ_R13 0x0000000d -#define SQ_R14 0x0000000e -#define SQ_R15 0x0000000f - -/* - * VALUE_SQ_SSRC_SPECIAL_LDS value - */ - -#define SQ_SRC_LDS_DIRECT 0x000000fe - -/* - * VALUE_SQ_OP_EXP value - */ - -#define SQ_EXP 0x00000000 - -/* - * VALUE_SQ_SDST_M0 value - */ - -#define SQ_M0 0x0000007c - -/* - * VALUE_SQ_OP_MIMG value - */ - -#define SQ_IMAGE_LOAD 0x00000000 -#define SQ_IMAGE_LOAD_MIP 0x00000001 -#define SQ_IMAGE_LOAD_PCK 0x00000002 -#define SQ_IMAGE_LOAD_PCK_SGN 0x00000003 -#define SQ_IMAGE_LOAD_MIP_PCK 0x00000004 -#define SQ_IMAGE_LOAD_MIP_PCK_SGN 0x00000005 -#define SQ_IMAGE_STORE 0x00000008 -#define SQ_IMAGE_STORE_MIP 0x00000009 -#define SQ_IMAGE_STORE_PCK 0x0000000a -#define SQ_IMAGE_STORE_MIP_PCK 0x0000000b -#define SQ_IMAGE_GET_RESINFO 0x0000000e -#define SQ_IMAGE_ATOMIC_SWAP 0x00000010 -#define SQ_IMAGE_ATOMIC_CMPSWAP 0x00000011 -#define SQ_IMAGE_ATOMIC_ADD 0x00000012 -#define SQ_IMAGE_ATOMIC_SUB 0x00000013 -#define SQ_IMAGE_ATOMIC_SMIN 0x00000014 -#define SQ_IMAGE_ATOMIC_UMIN 0x00000015 -#define SQ_IMAGE_ATOMIC_SMAX 0x00000016 -#define SQ_IMAGE_ATOMIC_UMAX 0x00000017 -#define SQ_IMAGE_ATOMIC_AND 0x00000018 -#define SQ_IMAGE_ATOMIC_OR 0x00000019 -#define SQ_IMAGE_ATOMIC_XOR 0x0000001a -#define SQ_IMAGE_ATOMIC_INC 0x0000001b -#define SQ_IMAGE_ATOMIC_DEC 0x0000001c -#define SQ_IMAGE_SAMPLE 0x00000020 -#define SQ_IMAGE_SAMPLE_CL 0x00000021 -#define SQ_IMAGE_SAMPLE_D 0x00000022 -#define SQ_IMAGE_SAMPLE_D_CL 0x00000023 -#define SQ_IMAGE_SAMPLE_L 0x00000024 -#define SQ_IMAGE_SAMPLE_B 0x00000025 -#define SQ_IMAGE_SAMPLE_B_CL 0x00000026 -#define SQ_IMAGE_SAMPLE_LZ 0x00000027 -#define SQ_IMAGE_SAMPLE_C 0x00000028 -#define SQ_IMAGE_SAMPLE_C_CL 0x00000029 -#define SQ_IMAGE_SAMPLE_C_D 0x0000002a -#define SQ_IMAGE_SAMPLE_C_D_CL 0x0000002b -#define SQ_IMAGE_SAMPLE_C_L 0x0000002c -#define SQ_IMAGE_SAMPLE_C_B 0x0000002d -#define SQ_IMAGE_SAMPLE_C_B_CL 0x0000002e -#define SQ_IMAGE_SAMPLE_C_LZ 0x0000002f -#define SQ_IMAGE_SAMPLE_O 0x00000030 -#define SQ_IMAGE_SAMPLE_CL_O 0x00000031 -#define SQ_IMAGE_SAMPLE_D_O 0x00000032 -#define SQ_IMAGE_SAMPLE_D_CL_O 0x00000033 -#define SQ_IMAGE_SAMPLE_L_O 0x00000034 -#define SQ_IMAGE_SAMPLE_B_O 0x00000035 -#define SQ_IMAGE_SAMPLE_B_CL_O 0x00000036 -#define SQ_IMAGE_SAMPLE_LZ_O 0x00000037 -#define SQ_IMAGE_SAMPLE_C_O 0x00000038 -#define SQ_IMAGE_SAMPLE_C_CL_O 0x00000039 -#define SQ_IMAGE_SAMPLE_C_D_O 0x0000003a -#define SQ_IMAGE_SAMPLE_C_D_CL_O 0x0000003b -#define SQ_IMAGE_SAMPLE_C_L_O 0x0000003c -#define SQ_IMAGE_SAMPLE_C_B_O 0x0000003d -#define SQ_IMAGE_SAMPLE_C_B_CL_O 0x0000003e -#define SQ_IMAGE_SAMPLE_C_LZ_O 0x0000003f -#define SQ_IMAGE_GATHER4 0x00000040 -#define SQ_IMAGE_GATHER4_CL 0x00000041 -#define SQ_IMAGE_GATHER4H 0x00000042 -#define SQ_IMAGE_GATHER4_L 0x00000044 -#define SQ_IMAGE_GATHER4_B 0x00000045 -#define SQ_IMAGE_GATHER4_B_CL 0x00000046 -#define SQ_IMAGE_GATHER4_LZ 0x00000047 -#define SQ_IMAGE_GATHER4_C 0x00000048 -#define SQ_IMAGE_GATHER4_C_CL 0x00000049 -#define SQ_IMAGE_GATHER4H_PCK 0x0000004a -#define SQ_IMAGE_GATHER8H_PCK 0x0000004b -#define SQ_IMAGE_GATHER4_C_L 0x0000004c -#define SQ_IMAGE_GATHER4_C_B 0x0000004d -#define SQ_IMAGE_GATHER4_C_B_CL 0x0000004e -#define SQ_IMAGE_GATHER4_C_LZ 0x0000004f -#define SQ_IMAGE_GATHER4_O 0x00000050 -#define SQ_IMAGE_GATHER4_CL_O 0x00000051 -#define SQ_IMAGE_GATHER4_L_O 0x00000054 -#define SQ_IMAGE_GATHER4_B_O 0x00000055 -#define SQ_IMAGE_GATHER4_B_CL_O 0x00000056 -#define SQ_IMAGE_GATHER4_LZ_O 0x00000057 -#define SQ_IMAGE_GATHER4_C_O 0x00000058 -#define SQ_IMAGE_GATHER4_C_CL_O 0x00000059 -#define SQ_IMAGE_GATHER4_C_L_O 0x0000005c -#define SQ_IMAGE_GATHER4_C_B_O 0x0000005d -#define SQ_IMAGE_GATHER4_C_B_CL_O 0x0000005e -#define SQ_IMAGE_GATHER4_C_LZ_O 0x0000005f -#define SQ_IMAGE_GET_LOD 0x00000060 -#define SQ_IMAGE_SAMPLE_CD 0x00000068 -#define SQ_IMAGE_SAMPLE_CD_CL 0x00000069 -#define SQ_IMAGE_SAMPLE_C_CD 0x0000006a -#define SQ_IMAGE_SAMPLE_C_CD_CL 0x0000006b -#define SQ_IMAGE_SAMPLE_CD_O 0x0000006c -#define SQ_IMAGE_SAMPLE_CD_CL_O 0x0000006d -#define SQ_IMAGE_SAMPLE_C_CD_O 0x0000006e -#define SQ_IMAGE_SAMPLE_C_CD_CL_O 0x0000006f -#define SQ_IMAGE_RSRC256 0x0000007e -#define SQ_IMAGE_SAMPLER 0x0000007f - -/* - * VALUE_SQ_SSRC_SPECIAL_NOLIT value - */ - -#define SQ_SRC_64_INT 0x000000c0 -#define SQ_SRC_M_1_INT 0x000000c1 -#define SQ_SRC_M_2_INT 0x000000c2 -#define SQ_SRC_M_3_INT 0x000000c3 -#define SQ_SRC_M_4_INT 0x000000c4 -#define SQ_SRC_M_5_INT 0x000000c5 -#define SQ_SRC_M_6_INT 0x000000c6 -#define SQ_SRC_M_7_INT 0x000000c7 -#define SQ_SRC_M_8_INT 0x000000c8 -#define SQ_SRC_M_9_INT 0x000000c9 -#define SQ_SRC_M_10_INT 0x000000ca -#define SQ_SRC_M_11_INT 0x000000cb -#define SQ_SRC_M_12_INT 0x000000cc -#define SQ_SRC_M_13_INT 0x000000cd -#define SQ_SRC_M_14_INT 0x000000ce -#define SQ_SRC_M_15_INT 0x000000cf -#define SQ_SRC_M_16_INT 0x000000d0 -#define SQ_SRC_0_5 0x000000f0 -#define SQ_SRC_M_0_5 0x000000f1 -#define SQ_SRC_1 0x000000f2 -#define SQ_SRC_M_1 0x000000f3 -#define SQ_SRC_2 0x000000f4 -#define SQ_SRC_M_2 0x000000f5 -#define SQ_SRC_4 0x000000f6 -#define SQ_SRC_M_4 0x000000f7 -#define SQ_SRC_INV_2PI 0x000000f8 - -/* - * VALUE_SQ_SSRC_SPECIAL_DPP value - */ - -#define SQ_SRC_DPP 0x000000fa - -/* - * VALUE_SQ_SSRC_SPECIAL_APERTURE value - */ - -#define SQ_SRC_SHARED_BASE 0x000000eb -#define SQ_SRC_SHARED_LIMIT 0x000000ec -#define SQ_SRC_PRIVATE_BASE 0x000000ed -#define SQ_SRC_PRIVATE_LIMIT 0x000000ee - -/* - * VALUE_SQ_DPP_CTRL_L_1_15 value - */ - -#define SQ_L1 0x00000001 -#define SQ_L2 0x00000002 -#define SQ_L3 0x00000003 -#define SQ_L4 0x00000004 -#define SQ_L5 0x00000005 -#define SQ_L6 0x00000006 -#define SQ_L7 0x00000007 -#define SQ_L8 0x00000008 -#define SQ_L9 0x00000009 -#define SQ_L10 0x0000000a -#define SQ_L11 0x0000000b -#define SQ_L12 0x0000000c -#define SQ_L13 0x0000000d -#define SQ_L14 0x0000000e -#define SQ_L15 0x0000000f - -/* - * VALUE_SQ_OP_SOP2 value - */ - -#define SQ_S_ADD_U32 0x00000000 -#define SQ_S_SUB_U32 0x00000001 -#define SQ_S_ADD_I32 0x00000002 -#define SQ_S_SUB_I32 0x00000003 -#define SQ_S_ADDC_U32 0x00000004 -#define SQ_S_SUBB_U32 0x00000005 -#define SQ_S_MIN_I32 0x00000006 -#define SQ_S_MIN_U32 0x00000007 -#define SQ_S_MAX_I32 0x00000008 -#define SQ_S_MAX_U32 0x00000009 -#define SQ_S_CSELECT_B32 0x0000000a -#define SQ_S_CSELECT_B64 0x0000000b -#define SQ_S_AND_B32 0x0000000c -#define SQ_S_AND_B64 0x0000000d -#define SQ_S_OR_B32 0x0000000e -#define SQ_S_OR_B64 0x0000000f -#define SQ_S_XOR_B32 0x00000010 -#define SQ_S_XOR_B64 0x00000011 -#define SQ_S_ANDN2_B32 0x00000012 -#define SQ_S_ANDN2_B64 0x00000013 -#define SQ_S_ORN2_B32 0x00000014 -#define SQ_S_ORN2_B64 0x00000015 -#define SQ_S_NAND_B32 0x00000016 -#define SQ_S_NAND_B64 0x00000017 -#define SQ_S_NOR_B32 0x00000018 -#define SQ_S_NOR_B64 0x00000019 -#define SQ_S_XNOR_B32 0x0000001a -#define SQ_S_XNOR_B64 0x0000001b -#define SQ_S_LSHL_B32 0x0000001c -#define SQ_S_LSHL_B64 0x0000001d -#define SQ_S_LSHR_B32 0x0000001e -#define SQ_S_LSHR_B64 0x0000001f -#define SQ_S_ASHR_I32 0x00000020 -#define SQ_S_ASHR_I64 0x00000021 -#define SQ_S_BFM_B32 0x00000022 -#define SQ_S_BFM_B64 0x00000023 -#define SQ_S_MUL_I32 0x00000024 -#define SQ_S_BFE_U32 0x00000025 -#define SQ_S_BFE_I32 0x00000026 -#define SQ_S_BFE_U64 0x00000027 -#define SQ_S_BFE_I64 0x00000028 -#define SQ_S_CBRANCH_G_FORK 0x00000029 -#define SQ_S_ABSDIFF_I32 0x0000002a -#define SQ_S_RFE_RESTORE_B64 0x0000002b -#define SQ_S_MUL_HI_U32 0x0000002c -#define SQ_S_MUL_HI_I32 0x0000002d -#define SQ_S_LSHL1_ADD_U32 0x0000002e -#define SQ_S_LSHL2_ADD_U32 0x0000002f -#define SQ_S_LSHL3_ADD_U32 0x00000030 -#define SQ_S_LSHL4_ADD_U32 0x00000031 -#define SQ_S_PACK_LL_B32_B16 0x00000032 -#define SQ_S_PACK_LH_B32_B16 0x00000033 -#define SQ_S_PACK_HH_B32_B16 0x00000034 - -/* - * VALUE_SQ_SDST_EXEC value - */ - -#define SQ_EXEC_LO 0x0000007e -#define SQ_EXEC_HI 0x0000007f - -/* - * VALUE_SQ_SSRC_SPECIAL_POPS_EXITING_WAVE_ID value - */ - -#define SQ_SRC_POPS_EXITING_WAVE_ID 0x000000ef - -/* - * VALUE_SQ_COMPI value - */ - -#define SQ_F 0x00000000 -#define SQ_LT 0x00000001 -#define SQ_EQ 0x00000002 -#define SQ_LE 0x00000003 -#define SQ_GT 0x00000004 -#define SQ_NE 0x00000005 -#define SQ_GE 0x00000006 -#define SQ_T 0x00000007 - -/* - * VALUE_SQ_SGPR value - */ - -#define SQ_SGPR0 0x00000000 - -/* - * VALUE_SQ_CHAN value - */ - -#define SQ_CHAN_X 0x00000000 -#define SQ_CHAN_Y 0x00000001 -#define SQ_CHAN_Z 0x00000002 -#define SQ_CHAN_W 0x00000003 - -/* - * VALUE_SQ_SSRC_SPECIAL_SDWA value - */ - -#define SQ_SRC_SDWA 0x000000f9 - -/* - * VALUE_SQ_SSRC_SPECIAL_LIT value - */ - -#define SQ_SRC_LITERAL 0x000000ff - -/* - * VALUE_SQ_DPP_BOUND_CTRL value - */ - -#define SQ_DPP_BOUND_OFF 0x00000000 -#define SQ_DPP_BOUND_ZERO 0x00000001 - -/* - * VALUE_SQ_GS_OP value - */ - -#define SQ_GS_OP_NOP 0x00000000 -#define SQ_GS_OP_CUT 0x00000001 -#define SQ_GS_OP_EMIT 0x00000002 -#define SQ_GS_OP_EMIT_CUT 0x00000003 - -/* - * VALUE_SQ_OP_MTBUF value - */ - -#define SQ_TBUFFER_LOAD_FORMAT_X 0x00000000 -#define SQ_TBUFFER_LOAD_FORMAT_XY 0x00000001 -#define SQ_TBUFFER_LOAD_FORMAT_XYZ 0x00000002 -#define SQ_TBUFFER_LOAD_FORMAT_XYZW 0x00000003 -#define SQ_TBUFFER_STORE_FORMAT_X 0x00000004 -#define SQ_TBUFFER_STORE_FORMAT_XY 0x00000005 -#define SQ_TBUFFER_STORE_FORMAT_XYZ 0x00000006 -#define SQ_TBUFFER_STORE_FORMAT_XYZW 0x00000007 -#define SQ_TBUFFER_LOAD_FORMAT_D16_X 0x00000008 -#define SQ_TBUFFER_LOAD_FORMAT_D16_XY 0x00000009 -#define SQ_TBUFFER_LOAD_FORMAT_D16_XYZ 0x0000000a -#define SQ_TBUFFER_LOAD_FORMAT_D16_XYZW 0x0000000b -#define SQ_TBUFFER_STORE_FORMAT_D16_X 0x0000000c -#define SQ_TBUFFER_STORE_FORMAT_D16_XY 0x0000000d -#define SQ_TBUFFER_STORE_FORMAT_D16_XYZ 0x0000000e -#define SQ_TBUFFER_STORE_FORMAT_D16_XYZW 0x0000000f - -/* - * VALUE_SQ_SSRC_SPECIAL_EXECZ value - */ - -#define SQ_SRC_EXECZ 0x000000fc - -/* - * VALUE_SQ_OP_VOP3P value - */ - -#define SQ_V_PK_MAD_I16 0x00000000 -#define SQ_V_PK_MUL_LO_U16 0x00000001 -#define SQ_V_PK_ADD_I16 0x00000002 -#define SQ_V_PK_SUB_I16 0x00000003 -#define SQ_V_PK_LSHLREV_B16 0x00000004 -#define SQ_V_PK_LSHRREV_B16 0x00000005 -#define SQ_V_PK_ASHRREV_I16 0x00000006 -#define SQ_V_PK_MAX_I16 0x00000007 -#define SQ_V_PK_MIN_I16 0x00000008 -#define SQ_V_PK_MAD_U16 0x00000009 -#define SQ_V_PK_ADD_U16 0x0000000a -#define SQ_V_PK_SUB_U16 0x0000000b -#define SQ_V_PK_MAX_U16 0x0000000c -#define SQ_V_PK_MIN_U16 0x0000000d -#define SQ_V_PK_MAD_F16 0x0000000e -#define SQ_V_PK_ADD_F16 0x0000000f -#define SQ_V_PK_MUL_F16 0x00000010 -#define SQ_V_PK_MIN_F16 0x00000011 -#define SQ_V_PK_MAX_F16 0x00000012 -#define SQ_V_MAD_MIX_F32 0x00000020 -#define SQ_V_MAD_MIXLO_F16 0x00000021 -#define SQ_V_MAD_MIXHI_F16 0x00000022 - -/* - * VALUE_SQ_SYSMSG_OP value - */ - -#define SQ_SYSMSG_OP_ECC_ERR_INTERRUPT 0x00000001 -#define SQ_SYSMSG_OP_REG_RD 0x00000002 -#define SQ_SYSMSG_OP_HOST_TRAP_ACK 0x00000003 -#define SQ_SYSMSG_OP_TTRACE_PC 0x00000004 -#define SQ_SYSMSG_OP_ILLEGAL_INST_INTERRUPT 0x00000005 -#define SQ_SYSMSG_OP_MEMVIOL_INTERRUPT 0x00000006 - -/* - * VALUE_SQ_VCC value - */ - -#define SQ_VCC_ALL 0x00000000 - -/* - * VALUE_SQ_OP_SMEM value - */ - -#define SQ_S_LOAD_DWORD 0x00000000 -#define SQ_S_LOAD_DWORDX2 0x00000001 -#define SQ_S_LOAD_DWORDX4 0x00000002 -#define SQ_S_LOAD_DWORDX8 0x00000003 -#define SQ_S_LOAD_DWORDX16 0x00000004 -#define SQ_S_SCRATCH_LOAD_DWORD 0x00000005 -#define SQ_S_SCRATCH_LOAD_DWORDX2 0x00000006 -#define SQ_S_SCRATCH_LOAD_DWORDX4 0x00000007 -#define SQ_S_BUFFER_LOAD_DWORD 0x00000008 -#define SQ_S_BUFFER_LOAD_DWORDX2 0x00000009 -#define SQ_S_BUFFER_LOAD_DWORDX4 0x0000000a -#define SQ_S_BUFFER_LOAD_DWORDX8 0x0000000b -#define SQ_S_BUFFER_LOAD_DWORDX16 0x0000000c -#define SQ_S_STORE_DWORD 0x00000010 -#define SQ_S_STORE_DWORDX2 0x00000011 -#define SQ_S_STORE_DWORDX4 0x00000012 -#define SQ_S_SCRATCH_STORE_DWORD 0x00000015 -#define SQ_S_SCRATCH_STORE_DWORDX2 0x00000016 -#define SQ_S_SCRATCH_STORE_DWORDX4 0x00000017 -#define SQ_S_BUFFER_STORE_DWORD 0x00000018 -#define SQ_S_BUFFER_STORE_DWORDX2 0x00000019 -#define SQ_S_BUFFER_STORE_DWORDX4 0x0000001a -#define SQ_S_DCACHE_INV 0x00000020 -#define SQ_S_DCACHE_WB 0x00000021 -#define SQ_S_DCACHE_INV_VOL 0x00000022 -#define SQ_S_DCACHE_WB_VOL 0x00000023 -#define SQ_S_MEMTIME 0x00000024 -#define SQ_S_MEMREALTIME 0x00000025 -#define SQ_S_ATC_PROBE 0x00000026 -#define SQ_S_ATC_PROBE_BUFFER 0x00000027 -#define SQ_S_BUFFER_ATOMIC_SWAP 0x00000040 -#define SQ_S_BUFFER_ATOMIC_CMPSWAP 0x00000041 -#define SQ_S_BUFFER_ATOMIC_ADD 0x00000042 -#define SQ_S_BUFFER_ATOMIC_SUB 0x00000043 -#define SQ_S_BUFFER_ATOMIC_SMIN 0x00000044 -#define SQ_S_BUFFER_ATOMIC_UMIN 0x00000045 -#define SQ_S_BUFFER_ATOMIC_SMAX 0x00000046 -#define SQ_S_BUFFER_ATOMIC_UMAX 0x00000047 -#define SQ_S_BUFFER_ATOMIC_AND 0x00000048 -#define SQ_S_BUFFER_ATOMIC_OR 0x00000049 -#define SQ_S_BUFFER_ATOMIC_XOR 0x0000004a -#define SQ_S_BUFFER_ATOMIC_INC 0x0000004b -#define SQ_S_BUFFER_ATOMIC_DEC 0x0000004c -#define SQ_S_BUFFER_ATOMIC_SWAP_X2 0x00000060 -#define SQ_S_BUFFER_ATOMIC_CMPSWAP_X2 0x00000061 -#define SQ_S_BUFFER_ATOMIC_ADD_X2 0x00000062 -#define SQ_S_BUFFER_ATOMIC_SUB_X2 0x00000063 -#define SQ_S_BUFFER_ATOMIC_SMIN_X2 0x00000064 -#define SQ_S_BUFFER_ATOMIC_UMIN_X2 0x00000065 -#define SQ_S_BUFFER_ATOMIC_SMAX_X2 0x00000066 -#define SQ_S_BUFFER_ATOMIC_UMAX_X2 0x00000067 -#define SQ_S_BUFFER_ATOMIC_AND_X2 0x00000068 -#define SQ_S_BUFFER_ATOMIC_OR_X2 0x00000069 -#define SQ_S_BUFFER_ATOMIC_XOR_X2 0x0000006a -#define SQ_S_BUFFER_ATOMIC_INC_X2 0x0000006b -#define SQ_S_BUFFER_ATOMIC_DEC_X2 0x0000006c -#define SQ_S_ATOMIC_SWAP 0x00000080 -#define SQ_S_ATOMIC_CMPSWAP 0x00000081 -#define SQ_S_ATOMIC_ADD 0x00000082 -#define SQ_S_ATOMIC_SUB 0x00000083 -#define SQ_S_ATOMIC_SMIN 0x00000084 -#define SQ_S_ATOMIC_UMIN 0x00000085 -#define SQ_S_ATOMIC_SMAX 0x00000086 -#define SQ_S_ATOMIC_UMAX 0x00000087 -#define SQ_S_ATOMIC_AND 0x00000088 -#define SQ_S_ATOMIC_OR 0x00000089 -#define SQ_S_ATOMIC_XOR 0x0000008a -#define SQ_S_ATOMIC_INC 0x0000008b -#define SQ_S_ATOMIC_DEC 0x0000008c -#define SQ_S_ATOMIC_SWAP_X2 0x000000a0 -#define SQ_S_ATOMIC_CMPSWAP_X2 0x000000a1 -#define SQ_S_ATOMIC_ADD_X2 0x000000a2 -#define SQ_S_ATOMIC_SUB_X2 0x000000a3 -#define SQ_S_ATOMIC_SMIN_X2 0x000000a4 -#define SQ_S_ATOMIC_UMIN_X2 0x000000a5 -#define SQ_S_ATOMIC_SMAX_X2 0x000000a6 -#define SQ_S_ATOMIC_UMAX_X2 0x000000a7 -#define SQ_S_ATOMIC_AND_X2 0x000000a8 -#define SQ_S_ATOMIC_OR_X2 0x000000a9 -#define SQ_S_ATOMIC_XOR_X2 0x000000aa -#define SQ_S_ATOMIC_INC_X2 0x000000ab -#define SQ_S_ATOMIC_DEC_X2 0x000000ac - -/* - * VALUE_SQ_OP_DS value - */ - -#define SQ_DS_ADD_U32 0x00000000 -#define SQ_DS_SUB_U32 0x00000001 -#define SQ_DS_RSUB_U32 0x00000002 -#define SQ_DS_INC_U32 0x00000003 -#define SQ_DS_DEC_U32 0x00000004 -#define SQ_DS_MIN_I32 0x00000005 -#define SQ_DS_MAX_I32 0x00000006 -#define SQ_DS_MIN_U32 0x00000007 -#define SQ_DS_MAX_U32 0x00000008 -#define SQ_DS_AND_B32 0x00000009 -#define SQ_DS_OR_B32 0x0000000a -#define SQ_DS_XOR_B32 0x0000000b -#define SQ_DS_MSKOR_B32 0x0000000c -#define SQ_DS_WRITE_B32 0x0000000d -#define SQ_DS_WRITE2_B32 0x0000000e -#define SQ_DS_WRITE2ST64_B32 0x0000000f -#define SQ_DS_CMPST_B32 0x00000010 -#define SQ_DS_CMPST_F32 0x00000011 -#define SQ_DS_MIN_F32 0x00000012 -#define SQ_DS_MAX_F32 0x00000013 -#define SQ_DS_NOP 0x00000014 -#define SQ_DS_ADD_F32 0x00000015 -#define SQ_DS_WRITE_ADDTID_B32 0x0000001d -#define SQ_DS_WRITE_B8 0x0000001e -#define SQ_DS_WRITE_B16 0x0000001f -#define SQ_DS_ADD_RTN_U32 0x00000020 -#define SQ_DS_SUB_RTN_U32 0x00000021 -#define SQ_DS_RSUB_RTN_U32 0x00000022 -#define SQ_DS_INC_RTN_U32 0x00000023 -#define SQ_DS_DEC_RTN_U32 0x00000024 -#define SQ_DS_MIN_RTN_I32 0x00000025 -#define SQ_DS_MAX_RTN_I32 0x00000026 -#define SQ_DS_MIN_RTN_U32 0x00000027 -#define SQ_DS_MAX_RTN_U32 0x00000028 -#define SQ_DS_AND_RTN_B32 0x00000029 -#define SQ_DS_OR_RTN_B32 0x0000002a -#define SQ_DS_XOR_RTN_B32 0x0000002b -#define SQ_DS_MSKOR_RTN_B32 0x0000002c -#define SQ_DS_WRXCHG_RTN_B32 0x0000002d -#define SQ_DS_WRXCHG2_RTN_B32 0x0000002e -#define SQ_DS_WRXCHG2ST64_RTN_B32 0x0000002f -#define SQ_DS_CMPST_RTN_B32 0x00000030 -#define SQ_DS_CMPST_RTN_F32 0x00000031 -#define SQ_DS_MIN_RTN_F32 0x00000032 -#define SQ_DS_MAX_RTN_F32 0x00000033 -#define SQ_DS_WRAP_RTN_B32 0x00000034 -#define SQ_DS_ADD_RTN_F32 0x00000035 -#define SQ_DS_READ_B32 0x00000036 -#define SQ_DS_READ2_B32 0x00000037 -#define SQ_DS_READ2ST64_B32 0x00000038 -#define SQ_DS_READ_I8 0x00000039 -#define SQ_DS_READ_U8 0x0000003a -#define SQ_DS_READ_I16 0x0000003b -#define SQ_DS_READ_U16 0x0000003c -#define SQ_DS_SWIZZLE_B32 0x0000003d -#define SQ_DS_PERMUTE_B32 0x0000003e -#define SQ_DS_BPERMUTE_B32 0x0000003f -#define SQ_DS_ADD_U64 0x00000040 -#define SQ_DS_SUB_U64 0x00000041 -#define SQ_DS_RSUB_U64 0x00000042 -#define SQ_DS_INC_U64 0x00000043 -#define SQ_DS_DEC_U64 0x00000044 -#define SQ_DS_MIN_I64 0x00000045 -#define SQ_DS_MAX_I64 0x00000046 -#define SQ_DS_MIN_U64 0x00000047 -#define SQ_DS_MAX_U64 0x00000048 -#define SQ_DS_AND_B64 0x00000049 -#define SQ_DS_OR_B64 0x0000004a -#define SQ_DS_XOR_B64 0x0000004b -#define SQ_DS_MSKOR_B64 0x0000004c -#define SQ_DS_WRITE_B64 0x0000004d -#define SQ_DS_WRITE2_B64 0x0000004e -#define SQ_DS_WRITE2ST64_B64 0x0000004f -#define SQ_DS_CMPST_B64 0x00000050 -#define SQ_DS_CMPST_F64 0x00000051 -#define SQ_DS_MIN_F64 0x00000052 -#define SQ_DS_MAX_F64 0x00000053 -#define SQ_DS_ADD_RTN_U64 0x00000060 -#define SQ_DS_SUB_RTN_U64 0x00000061 -#define SQ_DS_RSUB_RTN_U64 0x00000062 -#define SQ_DS_INC_RTN_U64 0x00000063 -#define SQ_DS_DEC_RTN_U64 0x00000064 -#define SQ_DS_MIN_RTN_I64 0x00000065 -#define SQ_DS_MAX_RTN_I64 0x00000066 -#define SQ_DS_MIN_RTN_U64 0x00000067 -#define SQ_DS_MAX_RTN_U64 0x00000068 -#define SQ_DS_AND_RTN_B64 0x00000069 -#define SQ_DS_OR_RTN_B64 0x0000006a -#define SQ_DS_XOR_RTN_B64 0x0000006b -#define SQ_DS_MSKOR_RTN_B64 0x0000006c -#define SQ_DS_WRXCHG_RTN_B64 0x0000006d -#define SQ_DS_WRXCHG2_RTN_B64 0x0000006e -#define SQ_DS_WRXCHG2ST64_RTN_B64 0x0000006f -#define SQ_DS_CMPST_RTN_B64 0x00000070 -#define SQ_DS_CMPST_RTN_F64 0x00000071 -#define SQ_DS_MIN_RTN_F64 0x00000072 -#define SQ_DS_MAX_RTN_F64 0x00000073 -#define SQ_DS_READ_B64 0x00000076 -#define SQ_DS_READ2_B64 0x00000077 -#define SQ_DS_READ2ST64_B64 0x00000078 -#define SQ_DS_CONDXCHG32_RTN_B64 0x0000007e -#define SQ_DS_ADD_SRC2_U32 0x00000080 -#define SQ_DS_SUB_SRC2_U32 0x00000081 -#define SQ_DS_RSUB_SRC2_U32 0x00000082 -#define SQ_DS_INC_SRC2_U32 0x00000083 -#define SQ_DS_DEC_SRC2_U32 0x00000084 -#define SQ_DS_MIN_SRC2_I32 0x00000085 -#define SQ_DS_MAX_SRC2_I32 0x00000086 -#define SQ_DS_MIN_SRC2_U32 0x00000087 -#define SQ_DS_MAX_SRC2_U32 0x00000088 -#define SQ_DS_AND_SRC2_B32 0x00000089 -#define SQ_DS_OR_SRC2_B32 0x0000008a -#define SQ_DS_XOR_SRC2_B32 0x0000008b -#define SQ_DS_WRITE_SRC2_B32 0x0000008d -#define SQ_DS_MIN_SRC2_F32 0x00000092 -#define SQ_DS_MAX_SRC2_F32 0x00000093 -#define SQ_DS_ADD_SRC2_F32 0x00000095 -#define SQ_DS_GWS_SEMA_RELEASE_ALL 0x00000098 -#define SQ_DS_GWS_INIT 0x00000099 -#define SQ_DS_GWS_SEMA_V 0x0000009a -#define SQ_DS_GWS_SEMA_BR 0x0000009b -#define SQ_DS_GWS_SEMA_P 0x0000009c -#define SQ_DS_GWS_BARRIER 0x0000009d -#define SQ_DS_READ_ADDTID_B32 0x000000b6 -#define SQ_DS_CONSUME 0x000000bd -#define SQ_DS_APPEND 0x000000be -#define SQ_DS_ORDERED_COUNT 0x000000bf -#define SQ_DS_ADD_SRC2_U64 0x000000c0 -#define SQ_DS_SUB_SRC2_U64 0x000000c1 -#define SQ_DS_RSUB_SRC2_U64 0x000000c2 -#define SQ_DS_INC_SRC2_U64 0x000000c3 -#define SQ_DS_DEC_SRC2_U64 0x000000c4 -#define SQ_DS_MIN_SRC2_I64 0x000000c5 -#define SQ_DS_MAX_SRC2_I64 0x000000c6 -#define SQ_DS_MIN_SRC2_U64 0x000000c7 -#define SQ_DS_MAX_SRC2_U64 0x000000c8 -#define SQ_DS_AND_SRC2_B64 0x000000c9 -#define SQ_DS_OR_SRC2_B64 0x000000ca -#define SQ_DS_XOR_SRC2_B64 0x000000cb -#define SQ_DS_WRITE_SRC2_B64 0x000000cd -#define SQ_DS_MIN_SRC2_F64 0x000000d2 -#define SQ_DS_MAX_SRC2_F64 0x000000d3 -#define SQ_DS_WRITE_B96 0x000000de -#define SQ_DS_WRITE_B128 0x000000df -#define SQ_DS_CONDXCHG32_RTN_B128 0x000000fd -#define SQ_DS_READ_B96 0x000000fe -#define SQ_DS_READ_B128 0x000000ff - -/* - * VALUE_SQ_SDWA_SEL value - */ - -#define SQ_SDWA_BYTE_0 0x00000000 -#define SQ_SDWA_BYTE_1 0x00000001 -#define SQ_SDWA_BYTE_2 0x00000002 -#define SQ_SDWA_BYTE_3 0x00000003 -#define SQ_SDWA_WORD_0 0x00000004 -#define SQ_SDWA_WORD_1 0x00000005 -#define SQ_SDWA_DWORD 0x00000006 - -/* - * VALUE_SQ_OP_VOP2 value - */ - -#define SQ_V_CNDMASK_B32 0x00000000 -#define SQ_V_ADD_F32 0x00000001 -#define SQ_V_SUB_F32 0x00000002 -#define SQ_V_SUBREV_F32 0x00000003 -#define SQ_V_MUL_LEGACY_F32 0x00000004 -#define SQ_V_MUL_F32 0x00000005 -#define SQ_V_MUL_I32_I24 0x00000006 -#define SQ_V_MUL_HI_I32_I24 0x00000007 -#define SQ_V_MUL_U32_U24 0x00000008 -#define SQ_V_MUL_HI_U32_U24 0x00000009 -#define SQ_V_MIN_F32 0x0000000a -#define SQ_V_MAX_F32 0x0000000b -#define SQ_V_MIN_I32 0x0000000c -#define SQ_V_MAX_I32 0x0000000d -#define SQ_V_MIN_U32 0x0000000e -#define SQ_V_MAX_U32 0x0000000f -#define SQ_V_LSHRREV_B32 0x00000010 -#define SQ_V_ASHRREV_I32 0x00000011 -#define SQ_V_LSHLREV_B32 0x00000012 -#define SQ_V_AND_B32 0x00000013 -#define SQ_V_OR_B32 0x00000014 -#define SQ_V_XOR_B32 0x00000015 -#define SQ_V_MAC_F32 0x00000016 -#define SQ_V_MADMK_F32 0x00000017 -#define SQ_V_MADAK_F32 0x00000018 -#define SQ_V_ADD_CO_U32 0x00000019 -#define SQ_V_SUB_CO_U32 0x0000001a -#define SQ_V_SUBREV_CO_U32 0x0000001b -#define SQ_V_ADDC_CO_U32 0x0000001c -#define SQ_V_SUBB_CO_U32 0x0000001d -#define SQ_V_SUBBREV_CO_U32 0x0000001e -#define SQ_V_ADD_F16 0x0000001f -#define SQ_V_SUB_F16 0x00000020 -#define SQ_V_SUBREV_F16 0x00000021 -#define SQ_V_MUL_F16 0x00000022 -#define SQ_V_MAC_F16 0x00000023 -#define SQ_V_MADMK_F16 0x00000024 -#define SQ_V_MADAK_F16 0x00000025 -#define SQ_V_ADD_U16 0x00000026 -#define SQ_V_SUB_U16 0x00000027 -#define SQ_V_SUBREV_U16 0x00000028 -#define SQ_V_MUL_LO_U16 0x00000029 -#define SQ_V_LSHLREV_B16 0x0000002a -#define SQ_V_LSHRREV_B16 0x0000002b -#define SQ_V_ASHRREV_I16 0x0000002c -#define SQ_V_MAX_F16 0x0000002d -#define SQ_V_MIN_F16 0x0000002e -#define SQ_V_MAX_U16 0x0000002f -#define SQ_V_MAX_I16 0x00000030 -#define SQ_V_MIN_U16 0x00000031 -#define SQ_V_MIN_I16 0x00000032 -#define SQ_V_LDEXP_F16 0x00000033 -#define SQ_V_ADD_U32 0x00000034 -#define SQ_V_SUB_U32 0x00000035 -#define SQ_V_SUBREV_U32 0x00000036 - -/* - * VALUE_SQ_SRC_VGPR value - */ - -#define SQ_SRC_VGPR0 0x00000100 - -/* - * VALUE_SQ_OP_SOPP value - */ - -#define SQ_S_NOP 0x00000000 -#define SQ_S_ENDPGM 0x00000001 -#define SQ_S_BRANCH 0x00000002 -#define SQ_S_WAKEUP 0x00000003 -#define SQ_S_CBRANCH_SCC0 0x00000004 -#define SQ_S_CBRANCH_SCC1 0x00000005 -#define SQ_S_CBRANCH_VCCZ 0x00000006 -#define SQ_S_CBRANCH_VCCNZ 0x00000007 -#define SQ_S_CBRANCH_EXECZ 0x00000008 -#define SQ_S_CBRANCH_EXECNZ 0x00000009 -#define SQ_S_BARRIER 0x0000000a -#define SQ_S_SETKILL 0x0000000b -#define SQ_S_WAITCNT 0x0000000c -#define SQ_S_SETHALT 0x0000000d -#define SQ_S_SLEEP 0x0000000e -#define SQ_S_SETPRIO 0x0000000f -#define SQ_S_SENDMSG 0x00000010 -#define SQ_S_SENDMSGHALT 0x00000011 -#define SQ_S_TRAP 0x00000012 -#define SQ_S_ICACHE_INV 0x00000013 -#define SQ_S_INCPERFLEVEL 0x00000014 -#define SQ_S_DECPERFLEVEL 0x00000015 -#define SQ_S_TTRACEDATA 0x00000016 -#define SQ_S_CBRANCH_CDBGSYS 0x00000017 -#define SQ_S_CBRANCH_CDBGUSER 0x00000018 -#define SQ_S_CBRANCH_CDBGSYS_OR_USER 0x00000019 -#define SQ_S_CBRANCH_CDBGSYS_AND_USER 0x0000001a -#define SQ_S_ENDPGM_SAVED 0x0000001b -#define SQ_S_SET_GPR_IDX_OFF 0x0000001c -#define SQ_S_SET_GPR_IDX_MODE 0x0000001d -#define SQ_S_ENDPGM_ORDERED_PS_DONE 0x0000001e - -/* - * VALUE_SQ_XNACK_MASK_LOHI value - */ - -#define SQ_XNACK_MASK_LO 0x00000068 -#define SQ_XNACK_MASK_HI 0x00000069 - -/* - * VALUE_SQ_SDWA_UNUSED value - */ - -#define SQ_SDWA_UNUSED_PAD 0x00000000 -#define SQ_SDWA_UNUSED_SEXT 0x00000001 -#define SQ_SDWA_UNUSED_PRESERVE 0x00000002 - -/* - * VALUE_SQ_OP_FLAT value - */ - -#define SQ_FLAT_LOAD_UBYTE 0x00000010 -#define SQ_FLAT_LOAD_SBYTE 0x00000011 -#define SQ_FLAT_LOAD_USHORT 0x00000012 -#define SQ_FLAT_LOAD_SSHORT 0x00000013 -#define SQ_FLAT_LOAD_DWORD 0x00000014 -#define SQ_FLAT_LOAD_DWORDX2 0x00000015 -#define SQ_FLAT_LOAD_DWORDX3 0x00000016 -#define SQ_FLAT_LOAD_DWORDX4 0x00000017 -#define SQ_FLAT_STORE_BYTE 0x00000018 -#define SQ_FLAT_STORE_SHORT 0x0000001a -#define SQ_FLAT_STORE_DWORD 0x0000001c -#define SQ_FLAT_STORE_DWORDX2 0x0000001d -#define SQ_FLAT_STORE_DWORDX3 0x0000001e -#define SQ_FLAT_STORE_DWORDX4 0x0000001f -#define SQ_FLAT_ATOMIC_SWAP 0x00000040 -#define SQ_FLAT_ATOMIC_CMPSWAP 0x00000041 -#define SQ_FLAT_ATOMIC_ADD 0x00000042 -#define SQ_FLAT_ATOMIC_SUB 0x00000043 -#define SQ_FLAT_ATOMIC_SMIN 0x00000044 -#define SQ_FLAT_ATOMIC_UMIN 0x00000045 -#define SQ_FLAT_ATOMIC_SMAX 0x00000046 -#define SQ_FLAT_ATOMIC_UMAX 0x00000047 -#define SQ_FLAT_ATOMIC_AND 0x00000048 -#define SQ_FLAT_ATOMIC_OR 0x00000049 -#define SQ_FLAT_ATOMIC_XOR 0x0000004a -#define SQ_FLAT_ATOMIC_INC 0x0000004b -#define SQ_FLAT_ATOMIC_DEC 0x0000004c -#define SQ_FLAT_ATOMIC_SWAP_X2 0x00000060 -#define SQ_FLAT_ATOMIC_CMPSWAP_X2 0x00000061 -#define SQ_FLAT_ATOMIC_ADD_X2 0x00000062 -#define SQ_FLAT_ATOMIC_SUB_X2 0x00000063 -#define SQ_FLAT_ATOMIC_SMIN_X2 0x00000064 -#define SQ_FLAT_ATOMIC_UMIN_X2 0x00000065 -#define SQ_FLAT_ATOMIC_SMAX_X2 0x00000066 -#define SQ_FLAT_ATOMIC_UMAX_X2 0x00000067 -#define SQ_FLAT_ATOMIC_AND_X2 0x00000068 -#define SQ_FLAT_ATOMIC_OR_X2 0x00000069 -#define SQ_FLAT_ATOMIC_XOR_X2 0x0000006a -#define SQ_FLAT_ATOMIC_INC_X2 0x0000006b -#define SQ_FLAT_ATOMIC_DEC_X2 0x0000006c - -/* - * VALUE_SQ_OP_SOPC value - */ - -#define SQ_S_CMP_EQ_I32 0x00000000 -#define SQ_S_CMP_LG_I32 0x00000001 -#define SQ_S_CMP_GT_I32 0x00000002 -#define SQ_S_CMP_GE_I32 0x00000003 -#define SQ_S_CMP_LT_I32 0x00000004 -#define SQ_S_CMP_LE_I32 0x00000005 -#define SQ_S_CMP_EQ_U32 0x00000006 -#define SQ_S_CMP_LG_U32 0x00000007 -#define SQ_S_CMP_GT_U32 0x00000008 -#define SQ_S_CMP_GE_U32 0x00000009 -#define SQ_S_CMP_LT_U32 0x0000000a -#define SQ_S_CMP_LE_U32 0x0000000b -#define SQ_S_BITCMP0_B32 0x0000000c -#define SQ_S_BITCMP1_B32 0x0000000d -#define SQ_S_BITCMP0_B64 0x0000000e -#define SQ_S_BITCMP1_B64 0x0000000f -#define SQ_S_SETVSKIP 0x00000010 -#define SQ_S_SET_GPR_IDX_ON 0x00000011 -#define SQ_S_CMP_EQ_U64 0x00000012 -#define SQ_S_CMP_LG_U64 0x00000013 - -/* - * VALUE_SQ_PARAM value - */ - -#define SQ_PARAM_P10 0x00000000 -#define SQ_PARAM_P20 0x00000001 -#define SQ_PARAM_P0 0x00000002 - -/* - * VALUE_SQ_OP_FLAT_SCRATCH value - */ - -#define SQ_SCRATCH_LOAD_UBYTE 0x00000010 -#define SQ_SCRATCH_LOAD_SBYTE 0x00000011 -#define SQ_SCRATCH_LOAD_USHORT 0x00000012 -#define SQ_SCRATCH_LOAD_SSHORT 0x00000013 -#define SQ_SCRATCH_LOAD_DWORD 0x00000014 -#define SQ_SCRATCH_LOAD_DWORDX2 0x00000015 -#define SQ_SCRATCH_LOAD_DWORDX3 0x00000016 -#define SQ_SCRATCH_LOAD_DWORDX4 0x00000017 -#define SQ_SCRATCH_STORE_BYTE 0x00000018 -#define SQ_SCRATCH_STORE_SHORT 0x0000001a -#define SQ_SCRATCH_STORE_DWORD 0x0000001c -#define SQ_SCRATCH_STORE_DWORDX2 0x0000001d -#define SQ_SCRATCH_STORE_DWORDX3 0x0000001e -#define SQ_SCRATCH_STORE_DWORDX4 0x0000001f - -/* - * VALUE_SQ_SEG value - */ - -#define SQ_FLAT 0x00000000 -#define SQ_SCRATCH 0x00000001 -#define SQ_GLOBAL 0x00000002 - -/* - * VALUE_SQ_SSRC_0_63_INLINES value - */ - -#define SQ_SRC_0 0x00000080 -#define SQ_SRC_1_INT 0x00000081 -#define SQ_SRC_2_INT 0x00000082 -#define SQ_SRC_3_INT 0x00000083 -#define SQ_SRC_4_INT 0x00000084 -#define SQ_SRC_5_INT 0x00000085 -#define SQ_SRC_6_INT 0x00000086 -#define SQ_SRC_7_INT 0x00000087 -#define SQ_SRC_8_INT 0x00000088 -#define SQ_SRC_9_INT 0x00000089 -#define SQ_SRC_10_INT 0x0000008a -#define SQ_SRC_11_INT 0x0000008b -#define SQ_SRC_12_INT 0x0000008c -#define SQ_SRC_13_INT 0x0000008d -#define SQ_SRC_14_INT 0x0000008e -#define SQ_SRC_15_INT 0x0000008f -#define SQ_SRC_16_INT 0x00000090 -#define SQ_SRC_17_INT 0x00000091 -#define SQ_SRC_18_INT 0x00000092 -#define SQ_SRC_19_INT 0x00000093 -#define SQ_SRC_20_INT 0x00000094 -#define SQ_SRC_21_INT 0x00000095 -#define SQ_SRC_22_INT 0x00000096 -#define SQ_SRC_23_INT 0x00000097 -#define SQ_SRC_24_INT 0x00000098 -#define SQ_SRC_25_INT 0x00000099 -#define SQ_SRC_26_INT 0x0000009a -#define SQ_SRC_27_INT 0x0000009b -#define SQ_SRC_28_INT 0x0000009c -#define SQ_SRC_29_INT 0x0000009d -#define SQ_SRC_30_INT 0x0000009e -#define SQ_SRC_31_INT 0x0000009f -#define SQ_SRC_32_INT 0x000000a0 -#define SQ_SRC_33_INT 0x000000a1 -#define SQ_SRC_34_INT 0x000000a2 -#define SQ_SRC_35_INT 0x000000a3 -#define SQ_SRC_36_INT 0x000000a4 -#define SQ_SRC_37_INT 0x000000a5 -#define SQ_SRC_38_INT 0x000000a6 -#define SQ_SRC_39_INT 0x000000a7 -#define SQ_SRC_40_INT 0x000000a8 -#define SQ_SRC_41_INT 0x000000a9 -#define SQ_SRC_42_INT 0x000000aa -#define SQ_SRC_43_INT 0x000000ab -#define SQ_SRC_44_INT 0x000000ac -#define SQ_SRC_45_INT 0x000000ad -#define SQ_SRC_46_INT 0x000000ae -#define SQ_SRC_47_INT 0x000000af -#define SQ_SRC_48_INT 0x000000b0 -#define SQ_SRC_49_INT 0x000000b1 -#define SQ_SRC_50_INT 0x000000b2 -#define SQ_SRC_51_INT 0x000000b3 -#define SQ_SRC_52_INT 0x000000b4 -#define SQ_SRC_53_INT 0x000000b5 -#define SQ_SRC_54_INT 0x000000b6 -#define SQ_SRC_55_INT 0x000000b7 -#define SQ_SRC_56_INT 0x000000b8 -#define SQ_SRC_57_INT 0x000000b9 -#define SQ_SRC_58_INT 0x000000ba -#define SQ_SRC_59_INT 0x000000bb -#define SQ_SRC_60_INT 0x000000bc -#define SQ_SRC_61_INT 0x000000bd -#define SQ_SRC_62_INT 0x000000be -#define SQ_SRC_63_INT 0x000000bf - -/* - * VALUE_SQ_CNT value - */ - -#define SQ_CNT1 0x00000000 -#define SQ_CNT2 0x00000001 -#define SQ_CNT3 0x00000002 -#define SQ_CNT4 0x00000003 - -/******************************************************* - * DIDT Enums - *******************************************************/ - -/******************************************************* - * SX Enums - *******************************************************/ - -/* - * SX_BLEND_OPT enum - */ - -typedef enum SX_BLEND_OPT { -BLEND_OPT_PRESERVE_NONE_IGNORE_ALL = 0x00000000, -BLEND_OPT_PRESERVE_ALL_IGNORE_NONE = 0x00000001, -BLEND_OPT_PRESERVE_C1_IGNORE_C0 = 0x00000002, -BLEND_OPT_PRESERVE_C0_IGNORE_C1 = 0x00000003, -BLEND_OPT_PRESERVE_A1_IGNORE_A0 = 0x00000004, -BLEND_OPT_PRESERVE_A0_IGNORE_A1 = 0x00000005, -BLEND_OPT_PRESERVE_NONE_IGNORE_A0 = 0x00000006, -BLEND_OPT_PRESERVE_NONE_IGNORE_NONE = 0x00000007, -} SX_BLEND_OPT; - -/* - * SX_OPT_COMB_FCN enum - */ - -typedef enum SX_OPT_COMB_FCN { -OPT_COMB_NONE = 0x00000000, -OPT_COMB_ADD = 0x00000001, -OPT_COMB_SUBTRACT = 0x00000002, -OPT_COMB_MIN = 0x00000003, -OPT_COMB_MAX = 0x00000004, -OPT_COMB_REVSUBTRACT = 0x00000005, -OPT_COMB_BLEND_DISABLED = 0x00000006, -OPT_COMB_SAFE_ADD = 0x00000007, -} SX_OPT_COMB_FCN; - -/* - * SX_DOWNCONVERT_FORMAT enum - */ - -typedef enum SX_DOWNCONVERT_FORMAT { -SX_RT_EXPORT_NO_CONVERSION = 0x00000000, -SX_RT_EXPORT_32_R = 0x00000001, -SX_RT_EXPORT_32_A = 0x00000002, -SX_RT_EXPORT_10_11_11 = 0x00000003, -SX_RT_EXPORT_2_10_10_10 = 0x00000004, -SX_RT_EXPORT_8_8_8_8 = 0x00000005, -SX_RT_EXPORT_5_6_5 = 0x00000006, -SX_RT_EXPORT_1_5_5_5 = 0x00000007, -SX_RT_EXPORT_4_4_4_4 = 0x00000008, -SX_RT_EXPORT_16_16_GR = 0x00000009, -SX_RT_EXPORT_16_16_AR = 0x0000000a, -} SX_DOWNCONVERT_FORMAT; - -/* - * SX_PERFCOUNTER_VALS enum - */ - -typedef enum SX_PERFCOUNTER_VALS { -SX_PERF_SEL_PA_IDLE_CYCLES = 0x00000000, -SX_PERF_SEL_PA_REQ = 0x00000001, -SX_PERF_SEL_PA_POS = 0x00000002, -SX_PERF_SEL_CLOCK = 0x00000003, -SX_PERF_SEL_GATE_EN1 = 0x00000004, -SX_PERF_SEL_GATE_EN2 = 0x00000005, -SX_PERF_SEL_GATE_EN3 = 0x00000006, -SX_PERF_SEL_GATE_EN4 = 0x00000007, -SX_PERF_SEL_SH_POS_STARVE = 0x00000008, -SX_PERF_SEL_SH_COLOR_STARVE = 0x00000009, -SX_PERF_SEL_SH_POS_STALL = 0x0000000a, -SX_PERF_SEL_SH_COLOR_STALL = 0x0000000b, -SX_PERF_SEL_DB0_PIXELS = 0x0000000c, -SX_PERF_SEL_DB0_HALF_QUADS = 0x0000000d, -SX_PERF_SEL_DB0_PIXEL_STALL = 0x0000000e, -SX_PERF_SEL_DB0_PIXEL_IDLE = 0x0000000f, -SX_PERF_SEL_DB0_PRED_PIXELS = 0x00000010, -SX_PERF_SEL_DB1_PIXELS = 0x00000011, -SX_PERF_SEL_DB1_HALF_QUADS = 0x00000012, -SX_PERF_SEL_DB1_PIXEL_STALL = 0x00000013, -SX_PERF_SEL_DB1_PIXEL_IDLE = 0x00000014, -SX_PERF_SEL_DB1_PRED_PIXELS = 0x00000015, -SX_PERF_SEL_DB2_PIXELS = 0x00000016, -SX_PERF_SEL_DB2_HALF_QUADS = 0x00000017, -SX_PERF_SEL_DB2_PIXEL_STALL = 0x00000018, -SX_PERF_SEL_DB2_PIXEL_IDLE = 0x00000019, -SX_PERF_SEL_DB2_PRED_PIXELS = 0x0000001a, -SX_PERF_SEL_DB3_PIXELS = 0x0000001b, -SX_PERF_SEL_DB3_HALF_QUADS = 0x0000001c, -SX_PERF_SEL_DB3_PIXEL_STALL = 0x0000001d, -SX_PERF_SEL_DB3_PIXEL_IDLE = 0x0000001e, -SX_PERF_SEL_DB3_PRED_PIXELS = 0x0000001f, -SX_PERF_SEL_COL_BUSY = 0x00000020, -SX_PERF_SEL_POS_BUSY = 0x00000021, -SX_PERF_SEL_DB0_A2M_DISCARD_QUADS = 0x00000022, -SX_PERF_SEL_DB0_MRT0_BLEND_BYPASS = 0x00000023, -SX_PERF_SEL_DB0_MRT0_DONT_RD_DEST = 0x00000024, -SX_PERF_SEL_DB0_MRT0_DISCARD_SRC = 0x00000025, -SX_PERF_SEL_DB0_MRT0_SINGLE_QUADS = 0x00000026, -SX_PERF_SEL_DB0_MRT0_DOUBLE_QUADS = 0x00000027, -SX_PERF_SEL_DB0_MRT1_BLEND_BYPASS = 0x00000028, -SX_PERF_SEL_DB0_MRT1_DONT_RD_DEST = 0x00000029, -SX_PERF_SEL_DB0_MRT1_DISCARD_SRC = 0x0000002a, -SX_PERF_SEL_DB0_MRT1_SINGLE_QUADS = 0x0000002b, -SX_PERF_SEL_DB0_MRT1_DOUBLE_QUADS = 0x0000002c, -SX_PERF_SEL_DB0_MRT2_BLEND_BYPASS = 0x0000002d, -SX_PERF_SEL_DB0_MRT2_DONT_RD_DEST = 0x0000002e, -SX_PERF_SEL_DB0_MRT2_DISCARD_SRC = 0x0000002f, -SX_PERF_SEL_DB0_MRT2_SINGLE_QUADS = 0x00000030, -SX_PERF_SEL_DB0_MRT2_DOUBLE_QUADS = 0x00000031, -SX_PERF_SEL_DB0_MRT3_BLEND_BYPASS = 0x00000032, -SX_PERF_SEL_DB0_MRT3_DONT_RD_DEST = 0x00000033, -SX_PERF_SEL_DB0_MRT3_DISCARD_SRC = 0x00000034, -SX_PERF_SEL_DB0_MRT3_SINGLE_QUADS = 0x00000035, -SX_PERF_SEL_DB0_MRT3_DOUBLE_QUADS = 0x00000036, -SX_PERF_SEL_DB0_MRT4_BLEND_BYPASS = 0x00000037, -SX_PERF_SEL_DB0_MRT4_DONT_RD_DEST = 0x00000038, -SX_PERF_SEL_DB0_MRT4_DISCARD_SRC = 0x00000039, -SX_PERF_SEL_DB0_MRT4_SINGLE_QUADS = 0x0000003a, -SX_PERF_SEL_DB0_MRT4_DOUBLE_QUADS = 0x0000003b, -SX_PERF_SEL_DB0_MRT5_BLEND_BYPASS = 0x0000003c, -SX_PERF_SEL_DB0_MRT5_DONT_RD_DEST = 0x0000003d, -SX_PERF_SEL_DB0_MRT5_DISCARD_SRC = 0x0000003e, -SX_PERF_SEL_DB0_MRT5_SINGLE_QUADS = 0x0000003f, -SX_PERF_SEL_DB0_MRT5_DOUBLE_QUADS = 0x00000040, -SX_PERF_SEL_DB0_MRT6_BLEND_BYPASS = 0x00000041, -SX_PERF_SEL_DB0_MRT6_DONT_RD_DEST = 0x00000042, -SX_PERF_SEL_DB0_MRT6_DISCARD_SRC = 0x00000043, -SX_PERF_SEL_DB0_MRT6_SINGLE_QUADS = 0x00000044, -SX_PERF_SEL_DB0_MRT6_DOUBLE_QUADS = 0x00000045, -SX_PERF_SEL_DB0_MRT7_BLEND_BYPASS = 0x00000046, -SX_PERF_SEL_DB0_MRT7_DONT_RD_DEST = 0x00000047, -SX_PERF_SEL_DB0_MRT7_DISCARD_SRC = 0x00000048, -SX_PERF_SEL_DB0_MRT7_SINGLE_QUADS = 0x00000049, -SX_PERF_SEL_DB0_MRT7_DOUBLE_QUADS = 0x0000004a, -SX_PERF_SEL_DB1_A2M_DISCARD_QUADS = 0x0000004b, -SX_PERF_SEL_DB1_MRT0_BLEND_BYPASS = 0x0000004c, -SX_PERF_SEL_DB1_MRT0_DONT_RD_DEST = 0x0000004d, -SX_PERF_SEL_DB1_MRT0_DISCARD_SRC = 0x0000004e, -SX_PERF_SEL_DB1_MRT0_SINGLE_QUADS = 0x0000004f, -SX_PERF_SEL_DB1_MRT0_DOUBLE_QUADS = 0x00000050, -SX_PERF_SEL_DB1_MRT1_BLEND_BYPASS = 0x00000051, -SX_PERF_SEL_DB1_MRT1_DONT_RD_DEST = 0x00000052, -SX_PERF_SEL_DB1_MRT1_DISCARD_SRC = 0x00000053, -SX_PERF_SEL_DB1_MRT1_SINGLE_QUADS = 0x00000054, -SX_PERF_SEL_DB1_MRT1_DOUBLE_QUADS = 0x00000055, -SX_PERF_SEL_DB1_MRT2_BLEND_BYPASS = 0x00000056, -SX_PERF_SEL_DB1_MRT2_DONT_RD_DEST = 0x00000057, -SX_PERF_SEL_DB1_MRT2_DISCARD_SRC = 0x00000058, -SX_PERF_SEL_DB1_MRT2_SINGLE_QUADS = 0x00000059, -SX_PERF_SEL_DB1_MRT2_DOUBLE_QUADS = 0x0000005a, -SX_PERF_SEL_DB1_MRT3_BLEND_BYPASS = 0x0000005b, -SX_PERF_SEL_DB1_MRT3_DONT_RD_DEST = 0x0000005c, -SX_PERF_SEL_DB1_MRT3_DISCARD_SRC = 0x0000005d, -SX_PERF_SEL_DB1_MRT3_SINGLE_QUADS = 0x0000005e, -SX_PERF_SEL_DB1_MRT3_DOUBLE_QUADS = 0x0000005f, -SX_PERF_SEL_DB1_MRT4_BLEND_BYPASS = 0x00000060, -SX_PERF_SEL_DB1_MRT4_DONT_RD_DEST = 0x00000061, -SX_PERF_SEL_DB1_MRT4_DISCARD_SRC = 0x00000062, -SX_PERF_SEL_DB1_MRT4_SINGLE_QUADS = 0x00000063, -SX_PERF_SEL_DB1_MRT4_DOUBLE_QUADS = 0x00000064, -SX_PERF_SEL_DB1_MRT5_BLEND_BYPASS = 0x00000065, -SX_PERF_SEL_DB1_MRT5_DONT_RD_DEST = 0x00000066, -SX_PERF_SEL_DB1_MRT5_DISCARD_SRC = 0x00000067, -SX_PERF_SEL_DB1_MRT5_SINGLE_QUADS = 0x00000068, -SX_PERF_SEL_DB1_MRT5_DOUBLE_QUADS = 0x00000069, -SX_PERF_SEL_DB1_MRT6_BLEND_BYPASS = 0x0000006a, -SX_PERF_SEL_DB1_MRT6_DONT_RD_DEST = 0x0000006b, -SX_PERF_SEL_DB1_MRT6_DISCARD_SRC = 0x0000006c, -SX_PERF_SEL_DB1_MRT6_SINGLE_QUADS = 0x0000006d, -SX_PERF_SEL_DB1_MRT6_DOUBLE_QUADS = 0x0000006e, -SX_PERF_SEL_DB1_MRT7_BLEND_BYPASS = 0x0000006f, -SX_PERF_SEL_DB1_MRT7_DONT_RD_DEST = 0x00000070, -SX_PERF_SEL_DB1_MRT7_DISCARD_SRC = 0x00000071, -SX_PERF_SEL_DB1_MRT7_SINGLE_QUADS = 0x00000072, -SX_PERF_SEL_DB1_MRT7_DOUBLE_QUADS = 0x00000073, -SX_PERF_SEL_DB2_A2M_DISCARD_QUADS = 0x00000074, -SX_PERF_SEL_DB2_MRT0_BLEND_BYPASS = 0x00000075, -SX_PERF_SEL_DB2_MRT0_DONT_RD_DEST = 0x00000076, -SX_PERF_SEL_DB2_MRT0_DISCARD_SRC = 0x00000077, -SX_PERF_SEL_DB2_MRT0_SINGLE_QUADS = 0x00000078, -SX_PERF_SEL_DB2_MRT0_DOUBLE_QUADS = 0x00000079, -SX_PERF_SEL_DB2_MRT1_BLEND_BYPASS = 0x0000007a, -SX_PERF_SEL_DB2_MRT1_DONT_RD_DEST = 0x0000007b, -SX_PERF_SEL_DB2_MRT1_DISCARD_SRC = 0x0000007c, -SX_PERF_SEL_DB2_MRT1_SINGLE_QUADS = 0x0000007d, -SX_PERF_SEL_DB2_MRT1_DOUBLE_QUADS = 0x0000007e, -SX_PERF_SEL_DB2_MRT2_BLEND_BYPASS = 0x0000007f, -SX_PERF_SEL_DB2_MRT2_DONT_RD_DEST = 0x00000080, -SX_PERF_SEL_DB2_MRT2_DISCARD_SRC = 0x00000081, -SX_PERF_SEL_DB2_MRT2_SINGLE_QUADS = 0x00000082, -SX_PERF_SEL_DB2_MRT2_DOUBLE_QUADS = 0x00000083, -SX_PERF_SEL_DB2_MRT3_BLEND_BYPASS = 0x00000084, -SX_PERF_SEL_DB2_MRT3_DONT_RD_DEST = 0x00000085, -SX_PERF_SEL_DB2_MRT3_DISCARD_SRC = 0x00000086, -SX_PERF_SEL_DB2_MRT3_SINGLE_QUADS = 0x00000087, -SX_PERF_SEL_DB2_MRT3_DOUBLE_QUADS = 0x00000088, -SX_PERF_SEL_DB2_MRT4_BLEND_BYPASS = 0x00000089, -SX_PERF_SEL_DB2_MRT4_DONT_RD_DEST = 0x0000008a, -SX_PERF_SEL_DB2_MRT4_DISCARD_SRC = 0x0000008b, -SX_PERF_SEL_DB2_MRT4_SINGLE_QUADS = 0x0000008c, -SX_PERF_SEL_DB2_MRT4_DOUBLE_QUADS = 0x0000008d, -SX_PERF_SEL_DB2_MRT5_BLEND_BYPASS = 0x0000008e, -SX_PERF_SEL_DB2_MRT5_DONT_RD_DEST = 0x0000008f, -SX_PERF_SEL_DB2_MRT5_DISCARD_SRC = 0x00000090, -SX_PERF_SEL_DB2_MRT5_SINGLE_QUADS = 0x00000091, -SX_PERF_SEL_DB2_MRT5_DOUBLE_QUADS = 0x00000092, -SX_PERF_SEL_DB2_MRT6_BLEND_BYPASS = 0x00000093, -SX_PERF_SEL_DB2_MRT6_DONT_RD_DEST = 0x00000094, -SX_PERF_SEL_DB2_MRT6_DISCARD_SRC = 0x00000095, -SX_PERF_SEL_DB2_MRT6_SINGLE_QUADS = 0x00000096, -SX_PERF_SEL_DB2_MRT6_DOUBLE_QUADS = 0x00000097, -SX_PERF_SEL_DB2_MRT7_BLEND_BYPASS = 0x00000098, -SX_PERF_SEL_DB2_MRT7_DONT_RD_DEST = 0x00000099, -SX_PERF_SEL_DB2_MRT7_DISCARD_SRC = 0x0000009a, -SX_PERF_SEL_DB2_MRT7_SINGLE_QUADS = 0x0000009b, -SX_PERF_SEL_DB2_MRT7_DOUBLE_QUADS = 0x0000009c, -SX_PERF_SEL_DB3_A2M_DISCARD_QUADS = 0x0000009d, -SX_PERF_SEL_DB3_MRT0_BLEND_BYPASS = 0x0000009e, -SX_PERF_SEL_DB3_MRT0_DONT_RD_DEST = 0x0000009f, -SX_PERF_SEL_DB3_MRT0_DISCARD_SRC = 0x000000a0, -SX_PERF_SEL_DB3_MRT0_SINGLE_QUADS = 0x000000a1, -SX_PERF_SEL_DB3_MRT0_DOUBLE_QUADS = 0x000000a2, -SX_PERF_SEL_DB3_MRT1_BLEND_BYPASS = 0x000000a3, -SX_PERF_SEL_DB3_MRT1_DONT_RD_DEST = 0x000000a4, -SX_PERF_SEL_DB3_MRT1_DISCARD_SRC = 0x000000a5, -SX_PERF_SEL_DB3_MRT1_SINGLE_QUADS = 0x000000a6, -SX_PERF_SEL_DB3_MRT1_DOUBLE_QUADS = 0x000000a7, -SX_PERF_SEL_DB3_MRT2_BLEND_BYPASS = 0x000000a8, -SX_PERF_SEL_DB3_MRT2_DONT_RD_DEST = 0x000000a9, -SX_PERF_SEL_DB3_MRT2_DISCARD_SRC = 0x000000aa, -SX_PERF_SEL_DB3_MRT2_SINGLE_QUADS = 0x000000ab, -SX_PERF_SEL_DB3_MRT2_DOUBLE_QUADS = 0x000000ac, -SX_PERF_SEL_DB3_MRT3_BLEND_BYPASS = 0x000000ad, -SX_PERF_SEL_DB3_MRT3_DONT_RD_DEST = 0x000000ae, -SX_PERF_SEL_DB3_MRT3_DISCARD_SRC = 0x000000af, -SX_PERF_SEL_DB3_MRT3_SINGLE_QUADS = 0x000000b0, -SX_PERF_SEL_DB3_MRT3_DOUBLE_QUADS = 0x000000b1, -SX_PERF_SEL_DB3_MRT4_BLEND_BYPASS = 0x000000b2, -SX_PERF_SEL_DB3_MRT4_DONT_RD_DEST = 0x000000b3, -SX_PERF_SEL_DB3_MRT4_DISCARD_SRC = 0x000000b4, -SX_PERF_SEL_DB3_MRT4_SINGLE_QUADS = 0x000000b5, -SX_PERF_SEL_DB3_MRT4_DOUBLE_QUADS = 0x000000b6, -SX_PERF_SEL_DB3_MRT5_BLEND_BYPASS = 0x000000b7, -SX_PERF_SEL_DB3_MRT5_DONT_RD_DEST = 0x000000b8, -SX_PERF_SEL_DB3_MRT5_DISCARD_SRC = 0x000000b9, -SX_PERF_SEL_DB3_MRT5_SINGLE_QUADS = 0x000000ba, -SX_PERF_SEL_DB3_MRT5_DOUBLE_QUADS = 0x000000bb, -SX_PERF_SEL_DB3_MRT6_BLEND_BYPASS = 0x000000bc, -SX_PERF_SEL_DB3_MRT6_DONT_RD_DEST = 0x000000bd, -SX_PERF_SEL_DB3_MRT6_DISCARD_SRC = 0x000000be, -SX_PERF_SEL_DB3_MRT6_SINGLE_QUADS = 0x000000bf, -SX_PERF_SEL_DB3_MRT6_DOUBLE_QUADS = 0x000000c0, -SX_PERF_SEL_DB3_MRT7_BLEND_BYPASS = 0x000000c1, -SX_PERF_SEL_DB3_MRT7_DONT_RD_DEST = 0x000000c2, -SX_PERF_SEL_DB3_MRT7_DISCARD_SRC = 0x000000c3, -SX_PERF_SEL_DB3_MRT7_SINGLE_QUADS = 0x000000c4, -SX_PERF_SEL_DB3_MRT7_DOUBLE_QUADS = 0x000000c5, -} SX_PERFCOUNTER_VALS; - -/******************************************************* - * DB Enums - *******************************************************/ - -/* - * ForceControl enum - */ - -typedef enum ForceControl { -FORCE_OFF = 0x00000000, -FORCE_ENABLE = 0x00000001, -FORCE_DISABLE = 0x00000002, -FORCE_RESERVED = 0x00000003, -} ForceControl; - -/* - * ZSamplePosition enum - */ - -typedef enum ZSamplePosition { -Z_SAMPLE_CENTER = 0x00000000, -Z_SAMPLE_CENTROID = 0x00000001, -} ZSamplePosition; - -/* - * ZOrder enum - */ - -typedef enum ZOrder { -LATE_Z = 0x00000000, -EARLY_Z_THEN_LATE_Z = 0x00000001, -RE_Z = 0x00000002, -EARLY_Z_THEN_RE_Z = 0x00000003, -} ZOrder; - -/* - * ZpassControl enum - */ - -typedef enum ZpassControl { -ZPASS_DISABLE = 0x00000000, -ZPASS_SAMPLES = 0x00000001, -ZPASS_PIXELS = 0x00000002, -} ZpassControl; - -/* - * ZModeForce enum - */ - -typedef enum ZModeForce { -NO_FORCE = 0x00000000, -FORCE_EARLY_Z = 0x00000001, -FORCE_LATE_Z = 0x00000002, -FORCE_RE_Z = 0x00000003, -} ZModeForce; - -/* - * ZLimitSumm enum - */ - -typedef enum ZLimitSumm { -FORCE_SUMM_OFF = 0x00000000, -FORCE_SUMM_MINZ = 0x00000001, -FORCE_SUMM_MAXZ = 0x00000002, -FORCE_SUMM_BOTH = 0x00000003, -} ZLimitSumm; - -/* - * CompareFrag enum - */ - -typedef enum CompareFrag { -FRAG_NEVER = 0x00000000, -FRAG_LESS = 0x00000001, -FRAG_EQUAL = 0x00000002, -FRAG_LEQUAL = 0x00000003, -FRAG_GREATER = 0x00000004, -FRAG_NOTEQUAL = 0x00000005, -FRAG_GEQUAL = 0x00000006, -FRAG_ALWAYS = 0x00000007, -} CompareFrag; - -/* - * StencilOp enum - */ - -typedef enum StencilOp { -STENCIL_KEEP = 0x00000000, -STENCIL_ZERO = 0x00000001, -STENCIL_ONES = 0x00000002, -STENCIL_REPLACE_TEST = 0x00000003, -STENCIL_REPLACE_OP = 0x00000004, -STENCIL_ADD_CLAMP = 0x00000005, -STENCIL_SUB_CLAMP = 0x00000006, -STENCIL_INVERT = 0x00000007, -STENCIL_ADD_WRAP = 0x00000008, -STENCIL_SUB_WRAP = 0x00000009, -STENCIL_AND = 0x0000000a, -STENCIL_OR = 0x0000000b, -STENCIL_XOR = 0x0000000c, -STENCIL_NAND = 0x0000000d, -STENCIL_NOR = 0x0000000e, -STENCIL_XNOR = 0x0000000f, -} StencilOp; - -/* - * ConservativeZExport enum - */ - -typedef enum ConservativeZExport { -EXPORT_ANY_Z = 0x00000000, -EXPORT_LESS_THAN_Z = 0x00000001, -EXPORT_GREATER_THAN_Z = 0x00000002, -EXPORT_RESERVED = 0x00000003, -} ConservativeZExport; - -/* - * DbPSLControl enum - */ - -typedef enum DbPSLControl { -PSLC_AUTO = 0x00000000, -PSLC_ON_HANG_ONLY = 0x00000001, -PSLC_ASAP = 0x00000002, -PSLC_COUNTDOWN = 0x00000003, -} DbPSLControl; - -/* - * DbPRTFaultBehavior enum - */ - -typedef enum DbPRTFaultBehavior { -FAULT_ZERO = 0x00000000, -FAULT_ONE = 0x00000001, -FAULT_FAIL = 0x00000002, -FAULT_PASS = 0x00000003, -} DbPRTFaultBehavior; - -/* - * PerfCounter_Vals enum - */ - -typedef enum PerfCounter_Vals { -DB_PERF_SEL_SC_DB_tile_sends = 0x00000000, -DB_PERF_SEL_SC_DB_tile_busy = 0x00000001, -DB_PERF_SEL_SC_DB_tile_stalls = 0x00000002, -DB_PERF_SEL_SC_DB_tile_events = 0x00000003, -DB_PERF_SEL_SC_DB_tile_tiles = 0x00000004, -DB_PERF_SEL_SC_DB_tile_covered = 0x00000005, -DB_PERF_SEL_hiz_tc_read_starved = 0x00000006, -DB_PERF_SEL_hiz_tc_write_stall = 0x00000007, -DB_PERF_SEL_hiz_qtiles_culled = 0x00000008, -DB_PERF_SEL_his_qtiles_culled = 0x00000009, -DB_PERF_SEL_DB_SC_tile_sends = 0x0000000a, -DB_PERF_SEL_DB_SC_tile_busy = 0x0000000b, -DB_PERF_SEL_DB_SC_tile_stalls = 0x0000000c, -DB_PERF_SEL_DB_SC_tile_df_stalls = 0x0000000d, -DB_PERF_SEL_DB_SC_tile_tiles = 0x0000000e, -DB_PERF_SEL_DB_SC_tile_culled = 0x0000000f, -DB_PERF_SEL_DB_SC_tile_hier_kill = 0x00000010, -DB_PERF_SEL_DB_SC_tile_fast_ops = 0x00000011, -DB_PERF_SEL_DB_SC_tile_no_ops = 0x00000012, -DB_PERF_SEL_DB_SC_tile_tile_rate = 0x00000013, -DB_PERF_SEL_DB_SC_tile_ssaa_kill = 0x00000014, -DB_PERF_SEL_DB_SC_tile_fast_z_ops = 0x00000015, -DB_PERF_SEL_DB_SC_tile_fast_stencil_ops = 0x00000016, -DB_PERF_SEL_SC_DB_quad_sends = 0x00000017, -DB_PERF_SEL_SC_DB_quad_busy = 0x00000018, -DB_PERF_SEL_SC_DB_quad_squads = 0x00000019, -DB_PERF_SEL_SC_DB_quad_tiles = 0x0000001a, -DB_PERF_SEL_SC_DB_quad_pixels = 0x0000001b, -DB_PERF_SEL_SC_DB_quad_killed_tiles = 0x0000001c, -DB_PERF_SEL_DB_SC_quad_sends = 0x0000001d, -DB_PERF_SEL_DB_SC_quad_busy = 0x0000001e, -DB_PERF_SEL_DB_SC_quad_stalls = 0x0000001f, -DB_PERF_SEL_DB_SC_quad_tiles = 0x00000020, -DB_PERF_SEL_DB_SC_quad_lit_quad = 0x00000021, -DB_PERF_SEL_DB_CB_tile_sends = 0x00000022, -DB_PERF_SEL_DB_CB_tile_busy = 0x00000023, -DB_PERF_SEL_DB_CB_tile_stalls = 0x00000024, -DB_PERF_SEL_SX_DB_quad_sends = 0x00000025, -DB_PERF_SEL_SX_DB_quad_busy = 0x00000026, -DB_PERF_SEL_SX_DB_quad_stalls = 0x00000027, -DB_PERF_SEL_SX_DB_quad_quads = 0x00000028, -DB_PERF_SEL_SX_DB_quad_pixels = 0x00000029, -DB_PERF_SEL_SX_DB_quad_exports = 0x0000002a, -DB_PERF_SEL_SH_quads_outstanding_sum = 0x0000002b, -DB_PERF_SEL_DB_CB_lquad_sends = 0x0000002c, -DB_PERF_SEL_DB_CB_lquad_busy = 0x0000002d, -DB_PERF_SEL_DB_CB_lquad_stalls = 0x0000002e, -DB_PERF_SEL_DB_CB_lquad_quads = 0x0000002f, -DB_PERF_SEL_tile_rd_sends = 0x00000030, -DB_PERF_SEL_mi_tile_rd_outstanding_sum = 0x00000031, -DB_PERF_SEL_quad_rd_sends = 0x00000032, -DB_PERF_SEL_quad_rd_busy = 0x00000033, -DB_PERF_SEL_quad_rd_mi_stall = 0x00000034, -DB_PERF_SEL_quad_rd_rw_collision = 0x00000035, -DB_PERF_SEL_quad_rd_tag_stall = 0x00000036, -DB_PERF_SEL_quad_rd_32byte_reqs = 0x00000037, -DB_PERF_SEL_quad_rd_panic = 0x00000038, -DB_PERF_SEL_mi_quad_rd_outstanding_sum = 0x00000039, -DB_PERF_SEL_quad_rdret_sends = 0x0000003a, -DB_PERF_SEL_quad_rdret_busy = 0x0000003b, -DB_PERF_SEL_tile_wr_sends = 0x0000003c, -DB_PERF_SEL_tile_wr_acks = 0x0000003d, -DB_PERF_SEL_mi_tile_wr_outstanding_sum = 0x0000003e, -DB_PERF_SEL_quad_wr_sends = 0x0000003f, -DB_PERF_SEL_quad_wr_busy = 0x00000040, -DB_PERF_SEL_quad_wr_mi_stall = 0x00000041, -DB_PERF_SEL_quad_wr_coherency_stall = 0x00000042, -DB_PERF_SEL_quad_wr_acks = 0x00000043, -DB_PERF_SEL_mi_quad_wr_outstanding_sum = 0x00000044, -DB_PERF_SEL_Tile_Cache_misses = 0x00000045, -DB_PERF_SEL_Tile_Cache_hits = 0x00000046, -DB_PERF_SEL_Tile_Cache_flushes = 0x00000047, -DB_PERF_SEL_Tile_Cache_surface_stall = 0x00000048, -DB_PERF_SEL_Tile_Cache_starves = 0x00000049, -DB_PERF_SEL_Tile_Cache_mem_return_starve = 0x0000004a, -DB_PERF_SEL_tcp_dispatcher_reads = 0x0000004b, -DB_PERF_SEL_tcp_prefetcher_reads = 0x0000004c, -DB_PERF_SEL_tcp_preloader_reads = 0x0000004d, -DB_PERF_SEL_tcp_dispatcher_flushes = 0x0000004e, -DB_PERF_SEL_tcp_prefetcher_flushes = 0x0000004f, -DB_PERF_SEL_tcp_preloader_flushes = 0x00000050, -DB_PERF_SEL_Depth_Tile_Cache_sends = 0x00000051, -DB_PERF_SEL_Depth_Tile_Cache_busy = 0x00000052, -DB_PERF_SEL_Depth_Tile_Cache_starves = 0x00000053, -DB_PERF_SEL_Depth_Tile_Cache_dtile_locked = 0x00000054, -DB_PERF_SEL_Depth_Tile_Cache_alloc_stall = 0x00000055, -DB_PERF_SEL_Depth_Tile_Cache_misses = 0x00000056, -DB_PERF_SEL_Depth_Tile_Cache_hits = 0x00000057, -DB_PERF_SEL_Depth_Tile_Cache_flushes = 0x00000058, -DB_PERF_SEL_Depth_Tile_Cache_noop_tile = 0x00000059, -DB_PERF_SEL_Depth_Tile_Cache_detailed_noop = 0x0000005a, -DB_PERF_SEL_Depth_Tile_Cache_event = 0x0000005b, -DB_PERF_SEL_Depth_Tile_Cache_tile_frees = 0x0000005c, -DB_PERF_SEL_Depth_Tile_Cache_data_frees = 0x0000005d, -DB_PERF_SEL_Depth_Tile_Cache_mem_return_starve = 0x0000005e, -DB_PERF_SEL_Stencil_Cache_misses = 0x0000005f, -DB_PERF_SEL_Stencil_Cache_hits = 0x00000060, -DB_PERF_SEL_Stencil_Cache_flushes = 0x00000061, -DB_PERF_SEL_Stencil_Cache_starves = 0x00000062, -DB_PERF_SEL_Stencil_Cache_frees = 0x00000063, -DB_PERF_SEL_Z_Cache_separate_Z_misses = 0x00000064, -DB_PERF_SEL_Z_Cache_separate_Z_hits = 0x00000065, -DB_PERF_SEL_Z_Cache_separate_Z_flushes = 0x00000066, -DB_PERF_SEL_Z_Cache_separate_Z_starves = 0x00000067, -DB_PERF_SEL_Z_Cache_pmask_misses = 0x00000068, -DB_PERF_SEL_Z_Cache_pmask_hits = 0x00000069, -DB_PERF_SEL_Z_Cache_pmask_flushes = 0x0000006a, -DB_PERF_SEL_Z_Cache_pmask_starves = 0x0000006b, -DB_PERF_SEL_Z_Cache_frees = 0x0000006c, -DB_PERF_SEL_Plane_Cache_misses = 0x0000006d, -DB_PERF_SEL_Plane_Cache_hits = 0x0000006e, -DB_PERF_SEL_Plane_Cache_flushes = 0x0000006f, -DB_PERF_SEL_Plane_Cache_starves = 0x00000070, -DB_PERF_SEL_Plane_Cache_frees = 0x00000071, -DB_PERF_SEL_flush_expanded_stencil = 0x00000072, -DB_PERF_SEL_flush_compressed_stencil = 0x00000073, -DB_PERF_SEL_flush_single_stencil = 0x00000074, -DB_PERF_SEL_planes_flushed = 0x00000075, -DB_PERF_SEL_flush_1plane = 0x00000076, -DB_PERF_SEL_flush_2plane = 0x00000077, -DB_PERF_SEL_flush_3plane = 0x00000078, -DB_PERF_SEL_flush_4plane = 0x00000079, -DB_PERF_SEL_flush_5plane = 0x0000007a, -DB_PERF_SEL_flush_6plane = 0x0000007b, -DB_PERF_SEL_flush_7plane = 0x0000007c, -DB_PERF_SEL_flush_8plane = 0x0000007d, -DB_PERF_SEL_flush_9plane = 0x0000007e, -DB_PERF_SEL_flush_10plane = 0x0000007f, -DB_PERF_SEL_flush_11plane = 0x00000080, -DB_PERF_SEL_flush_12plane = 0x00000081, -DB_PERF_SEL_flush_13plane = 0x00000082, -DB_PERF_SEL_flush_14plane = 0x00000083, -DB_PERF_SEL_flush_15plane = 0x00000084, -DB_PERF_SEL_flush_16plane = 0x00000085, -DB_PERF_SEL_flush_expanded_z = 0x00000086, -DB_PERF_SEL_earlyZ_waiting_for_postZ_done = 0x00000087, -DB_PERF_SEL_reZ_waiting_for_postZ_done = 0x00000088, -DB_PERF_SEL_dk_tile_sends = 0x00000089, -DB_PERF_SEL_dk_tile_busy = 0x0000008a, -DB_PERF_SEL_dk_tile_quad_starves = 0x0000008b, -DB_PERF_SEL_dk_tile_stalls = 0x0000008c, -DB_PERF_SEL_dk_squad_sends = 0x0000008d, -DB_PERF_SEL_dk_squad_busy = 0x0000008e, -DB_PERF_SEL_dk_squad_stalls = 0x0000008f, -DB_PERF_SEL_Op_Pipe_Busy = 0x00000090, -DB_PERF_SEL_Op_Pipe_MC_Read_stall = 0x00000091, -DB_PERF_SEL_qc_busy = 0x00000092, -DB_PERF_SEL_qc_xfc = 0x00000093, -DB_PERF_SEL_qc_conflicts = 0x00000094, -DB_PERF_SEL_qc_full_stall = 0x00000095, -DB_PERF_SEL_qc_in_preZ_tile_stalls_postZ = 0x00000096, -DB_PERF_SEL_qc_in_postZ_tile_stalls_preZ = 0x00000097, -DB_PERF_SEL_tsc_insert_summarize_stall = 0x00000098, -DB_PERF_SEL_tl_busy = 0x00000099, -DB_PERF_SEL_tl_dtc_read_starved = 0x0000009a, -DB_PERF_SEL_tl_z_fetch_stall = 0x0000009b, -DB_PERF_SEL_tl_stencil_stall = 0x0000009c, -DB_PERF_SEL_tl_z_decompress_stall = 0x0000009d, -DB_PERF_SEL_tl_stencil_locked_stall = 0x0000009e, -DB_PERF_SEL_tl_events = 0x0000009f, -DB_PERF_SEL_tl_summarize_squads = 0x000000a0, -DB_PERF_SEL_tl_flush_expand_squads = 0x000000a1, -DB_PERF_SEL_tl_expand_squads = 0x000000a2, -DB_PERF_SEL_tl_preZ_squads = 0x000000a3, -DB_PERF_SEL_tl_postZ_squads = 0x000000a4, -DB_PERF_SEL_tl_preZ_noop_squads = 0x000000a5, -DB_PERF_SEL_tl_postZ_noop_squads = 0x000000a6, -DB_PERF_SEL_tl_tile_ops = 0x000000a7, -DB_PERF_SEL_tl_in_xfc = 0x000000a8, -DB_PERF_SEL_tl_in_single_stencil_expand_stall = 0x000000a9, -DB_PERF_SEL_tl_in_fast_z_stall = 0x000000aa, -DB_PERF_SEL_tl_out_xfc = 0x000000ab, -DB_PERF_SEL_tl_out_squads = 0x000000ac, -DB_PERF_SEL_zf_plane_multicycle = 0x000000ad, -DB_PERF_SEL_PostZ_Samples_passing_Z = 0x000000ae, -DB_PERF_SEL_PostZ_Samples_failing_Z = 0x000000af, -DB_PERF_SEL_PostZ_Samples_failing_S = 0x000000b0, -DB_PERF_SEL_PreZ_Samples_passing_Z = 0x000000b1, -DB_PERF_SEL_PreZ_Samples_failing_Z = 0x000000b2, -DB_PERF_SEL_PreZ_Samples_failing_S = 0x000000b3, -DB_PERF_SEL_ts_tc_update_stall = 0x000000b4, -DB_PERF_SEL_sc_kick_start = 0x000000b5, -DB_PERF_SEL_sc_kick_end = 0x000000b6, -DB_PERF_SEL_clock_reg_active = 0x000000b7, -DB_PERF_SEL_clock_main_active = 0x000000b8, -DB_PERF_SEL_clock_mem_export_active = 0x000000b9, -DB_PERF_SEL_esr_ps_out_busy = 0x000000ba, -DB_PERF_SEL_esr_ps_lqf_busy = 0x000000bb, -DB_PERF_SEL_esr_ps_lqf_stall = 0x000000bc, -DB_PERF_SEL_etr_out_send = 0x000000bd, -DB_PERF_SEL_etr_out_busy = 0x000000be, -DB_PERF_SEL_etr_out_ltile_probe_fifo_full_stall = 0x000000bf, -DB_PERF_SEL_etr_out_cb_tile_stall = 0x000000c0, -DB_PERF_SEL_etr_out_esr_stall = 0x000000c1, -DB_PERF_SEL_esr_ps_sqq_busy = 0x000000c2, -DB_PERF_SEL_esr_ps_sqq_stall = 0x000000c3, -DB_PERF_SEL_esr_eot_fwd_busy = 0x000000c4, -DB_PERF_SEL_esr_eot_fwd_holding_squad = 0x000000c5, -DB_PERF_SEL_esr_eot_fwd_forward = 0x000000c6, -DB_PERF_SEL_esr_sqq_zi_busy = 0x000000c7, -DB_PERF_SEL_esr_sqq_zi_stall = 0x000000c8, -DB_PERF_SEL_postzl_sq_pt_busy = 0x000000c9, -DB_PERF_SEL_postzl_sq_pt_stall = 0x000000ca, -DB_PERF_SEL_postzl_se_busy = 0x000000cb, -DB_PERF_SEL_postzl_se_stall = 0x000000cc, -DB_PERF_SEL_postzl_partial_launch = 0x000000cd, -DB_PERF_SEL_postzl_full_launch = 0x000000ce, -DB_PERF_SEL_postzl_partial_waiting = 0x000000cf, -DB_PERF_SEL_postzl_tile_mem_stall = 0x000000d0, -DB_PERF_SEL_postzl_tile_init_stall = 0x000000d1, -DB_PEFF_SEL_prezl_tile_mem_stall = 0x000000d2, -DB_PERF_SEL_prezl_tile_init_stall = 0x000000d3, -DB_PERF_SEL_dtt_sm_clash_stall = 0x000000d4, -DB_PERF_SEL_dtt_sm_slot_stall = 0x000000d5, -DB_PERF_SEL_dtt_sm_miss_stall = 0x000000d6, -DB_PERF_SEL_mi_rdreq_busy = 0x000000d7, -DB_PERF_SEL_mi_rdreq_stall = 0x000000d8, -DB_PERF_SEL_mi_wrreq_busy = 0x000000d9, -DB_PERF_SEL_mi_wrreq_stall = 0x000000da, -DB_PERF_SEL_recomp_tile_to_1zplane_no_fastop = 0x000000db, -DB_PERF_SEL_dkg_tile_rate_tile = 0x000000dc, -DB_PERF_SEL_prezl_src_in_sends = 0x000000dd, -DB_PERF_SEL_prezl_src_in_stall = 0x000000de, -DB_PERF_SEL_prezl_src_in_squads = 0x000000df, -DB_PERF_SEL_prezl_src_in_squads_unrolled = 0x000000e0, -DB_PERF_SEL_prezl_src_in_tile_rate = 0x000000e1, -DB_PERF_SEL_prezl_src_in_tile_rate_unrolled = 0x000000e2, -DB_PERF_SEL_prezl_src_out_stall = 0x000000e3, -DB_PERF_SEL_postzl_src_in_sends = 0x000000e4, -DB_PERF_SEL_postzl_src_in_stall = 0x000000e5, -DB_PERF_SEL_postzl_src_in_squads = 0x000000e6, -DB_PERF_SEL_postzl_src_in_squads_unrolled = 0x000000e7, -DB_PERF_SEL_postzl_src_in_tile_rate = 0x000000e8, -DB_PERF_SEL_postzl_src_in_tile_rate_unrolled = 0x000000e9, -DB_PERF_SEL_postzl_src_out_stall = 0x000000ea, -DB_PERF_SEL_esr_ps_src_in_sends = 0x000000eb, -DB_PERF_SEL_esr_ps_src_in_stall = 0x000000ec, -DB_PERF_SEL_esr_ps_src_in_squads = 0x000000ed, -DB_PERF_SEL_esr_ps_src_in_squads_unrolled = 0x000000ee, -DB_PERF_SEL_esr_ps_src_in_tile_rate = 0x000000ef, -DB_PERF_SEL_esr_ps_src_in_tile_rate_unrolled = 0x000000f0, -DB_PERF_SEL_esr_ps_src_in_tile_rate_unrolled_to_pixel_rate = 0x000000f1, -DB_PERF_SEL_esr_ps_src_out_stall = 0x000000f2, -DB_PERF_SEL_depth_bounds_qtiles_culled = 0x000000f3, -DB_PERF_SEL_PreZ_Samples_failing_DB = 0x000000f4, -DB_PERF_SEL_PostZ_Samples_failing_DB = 0x000000f5, -DB_PERF_SEL_flush_compressed = 0x000000f6, -DB_PERF_SEL_flush_plane_le4 = 0x000000f7, -DB_PERF_SEL_tiles_z_fully_summarized = 0x000000f8, -DB_PERF_SEL_tiles_stencil_fully_summarized = 0x000000f9, -DB_PERF_SEL_tiles_z_clear_on_expclear = 0x000000fa, -DB_PERF_SEL_tiles_s_clear_on_expclear = 0x000000fb, -DB_PERF_SEL_tiles_decomp_on_expclear = 0x000000fc, -DB_PERF_SEL_tiles_compressed_to_decompressed = 0x000000fd, -DB_PERF_SEL_Op_Pipe_Prez_Busy = 0x000000fe, -DB_PERF_SEL_Op_Pipe_Postz_Busy = 0x000000ff, -DB_PERF_SEL_di_dt_stall = 0x00000100, -DB_PERF_SEL_DB_SC_quad_double_quad = 0x00000101, -DB_PERF_SEL_SX_DB_quad_export_quads = 0x00000102, -DB_PERF_SEL_SX_DB_quad_double_format = 0x00000103, -DB_PERF_SEL_SX_DB_quad_fast_format = 0x00000104, -DB_PERF_SEL_SX_DB_quad_slow_format = 0x00000105, -DB_PERF_SEL_DB_CB_lquad_export_quads = 0x00000106, -DB_PERF_SEL_DB_CB_lquad_double_format = 0x00000107, -DB_PERF_SEL_DB_CB_lquad_fast_format = 0x00000108, -DB_PERF_SEL_DB_CB_lquad_slow_format = 0x00000109, -DB_PERF_SEL_CB_DB_rdreq_sends = 0x0000010a, -DB_PERF_SEL_CB_DB_rdreq_prt_sends = 0x0000010b, -DB_PERF_SEL_CB_DB_wrreq_sends = 0x0000010c, -DB_PERF_SEL_CB_DB_wrreq_prt_sends = 0x0000010d, -DB_PERF_SEL_DB_CB_rdret_ack = 0x0000010e, -DB_PERF_SEL_DB_CB_rdret_nack = 0x0000010f, -DB_PERF_SEL_DB_CB_wrret_ack = 0x00000110, -DB_PERF_SEL_DB_CB_wrret_nack = 0x00000111, -DB_PERF_SEL_DFSM_squads_in = 0x00000112, -DB_PERF_SEL_DFSM_full_cleared_squads_out = 0x00000113, -DB_PERF_SEL_DFSM_quads_in = 0x00000114, -DB_PERF_SEL_DFSM_fully_cleared_quads_out = 0x00000115, -DB_PERF_SEL_DFSM_lit_pixels_in = 0x00000116, -DB_PERF_SEL_DFSM_fully_cleared_pixels_out = 0x00000117, -DB_PERF_SEL_DFSM_lit_samples_in = 0x00000118, -DB_PERF_SEL_DFSM_lit_samples_out = 0x00000119, -DB_PERF_SEL_DFSM_cycles_above_watermark = 0x0000011a, -DB_PERF_SEL_DFSM_cant_accept_squads_but_not_stalled_by_downstream = 0x0000011b, -DB_PERF_SEL_DFSM_stalled_by_downstream = 0x0000011c, -DB_PERF_SEL_DFSM_evicted_squads_above_watermark = 0x0000011d, -DB_PERF_SEL_DFSM_collisions_due_to_POPS_overflow = 0x0000011e, -DB_PERF_SEL_DFSM_collisions_detected_within_POPS_FIFO = 0x0000011f, -DB_PERF_SEL_DFSM_evicted_squads_due_to_prim_watermark = 0x00000120, -} PerfCounter_Vals; - -/* - * RingCounterControl enum - */ - -typedef enum RingCounterControl { -COUNTER_RING_SPLIT = 0x00000000, -COUNTER_RING_0 = 0x00000001, -COUNTER_RING_1 = 0x00000002, -} RingCounterControl; - -/* - * DbMemArbWatermarks enum - */ - -typedef enum DbMemArbWatermarks { -TRANSFERRED_64_BYTES = 0x00000000, -TRANSFERRED_128_BYTES = 0x00000001, -TRANSFERRED_256_BYTES = 0x00000002, -TRANSFERRED_512_BYTES = 0x00000003, -TRANSFERRED_1024_BYTES = 0x00000004, -TRANSFERRED_2048_BYTES = 0x00000005, -TRANSFERRED_4096_BYTES = 0x00000006, -TRANSFERRED_8192_BYTES = 0x00000007, -} DbMemArbWatermarks; - -/* - * DFSMFlushEvents enum - */ - -typedef enum DFSMFlushEvents { -DB_FLUSH_AND_INV_DB_DATA_TS = 0x00000000, -DB_FLUSH_AND_INV_DB_META = 0x00000001, -DB_CACHE_FLUSH = 0x00000002, -DB_CACHE_FLUSH_TS = 0x00000003, -DB_CACHE_FLUSH_AND_INV_EVENT = 0x00000004, -DB_CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000005, -} DFSMFlushEvents; - -/* - * PixelPipeCounterId enum - */ - -typedef enum PixelPipeCounterId { -PIXEL_PIPE_OCCLUSION_COUNT_0 = 0x00000000, -PIXEL_PIPE_OCCLUSION_COUNT_1 = 0x00000001, -PIXEL_PIPE_OCCLUSION_COUNT_2 = 0x00000002, -PIXEL_PIPE_OCCLUSION_COUNT_3 = 0x00000003, -PIXEL_PIPE_SCREEN_MIN_EXTENTS_0 = 0x00000004, -PIXEL_PIPE_SCREEN_MAX_EXTENTS_0 = 0x00000005, -PIXEL_PIPE_SCREEN_MIN_EXTENTS_1 = 0x00000006, -PIXEL_PIPE_SCREEN_MAX_EXTENTS_1 = 0x00000007, -} PixelPipeCounterId; - -/* - * PixelPipeStride enum - */ - -typedef enum PixelPipeStride { -PIXEL_PIPE_STRIDE_32_BITS = 0x00000000, -PIXEL_PIPE_STRIDE_64_BITS = 0x00000001, -PIXEL_PIPE_STRIDE_128_BITS = 0x00000002, -PIXEL_PIPE_STRIDE_256_BITS = 0x00000003, -} PixelPipeStride; - -/******************************************************* - * TA Enums - *******************************************************/ - -/* - * TEX_BORDER_COLOR_TYPE enum - */ - -typedef enum TEX_BORDER_COLOR_TYPE { -TEX_BorderColor_TransparentBlack = 0x00000000, -TEX_BorderColor_OpaqueBlack = 0x00000001, -TEX_BorderColor_OpaqueWhite = 0x00000002, -TEX_BorderColor_Register = 0x00000003, -} TEX_BORDER_COLOR_TYPE; - -/* - * TEX_CHROMA_KEY enum - */ - -typedef enum TEX_CHROMA_KEY { -TEX_ChromaKey_Disabled = 0x00000000, -TEX_ChromaKey_Kill = 0x00000001, -TEX_ChromaKey_Blend = 0x00000002, -TEX_ChromaKey_RESERVED_3 = 0x00000003, -} TEX_CHROMA_KEY; - -/* - * TEX_CLAMP enum - */ - -typedef enum TEX_CLAMP { -TEX_Clamp_Repeat = 0x00000000, -TEX_Clamp_Mirror = 0x00000001, -TEX_Clamp_ClampToLast = 0x00000002, -TEX_Clamp_MirrorOnceToLast = 0x00000003, -TEX_Clamp_ClampHalfToBorder = 0x00000004, -TEX_Clamp_MirrorOnceHalfToBorder = 0x00000005, -TEX_Clamp_ClampToBorder = 0x00000006, -TEX_Clamp_MirrorOnceToBorder = 0x00000007, -} TEX_CLAMP; - -/* - * TEX_COORD_TYPE enum - */ - -typedef enum TEX_COORD_TYPE { -TEX_CoordType_Unnormalized = 0x00000000, -TEX_CoordType_Normalized = 0x00000001, -} TEX_COORD_TYPE; - -/* - * TEX_DEPTH_COMPARE_FUNCTION enum - */ - -typedef enum TEX_DEPTH_COMPARE_FUNCTION { -TEX_DepthCompareFunction_Never = 0x00000000, -TEX_DepthCompareFunction_Less = 0x00000001, -TEX_DepthCompareFunction_Equal = 0x00000002, -TEX_DepthCompareFunction_LessEqual = 0x00000003, -TEX_DepthCompareFunction_Greater = 0x00000004, -TEX_DepthCompareFunction_NotEqual = 0x00000005, -TEX_DepthCompareFunction_GreaterEqual = 0x00000006, -TEX_DepthCompareFunction_Always = 0x00000007, -} TEX_DEPTH_COMPARE_FUNCTION; - -/* - * TEX_DIM enum - */ - -typedef enum TEX_DIM { -TEX_Dim_1D = 0x00000000, -TEX_Dim_2D = 0x00000001, -TEX_Dim_3D = 0x00000002, -TEX_Dim_CubeMap = 0x00000003, -TEX_Dim_1DArray = 0x00000004, -TEX_Dim_2DArray = 0x00000005, -TEX_Dim_2D_MSAA = 0x00000006, -TEX_Dim_2DArray_MSAA = 0x00000007, -} TEX_DIM; - -/* - * TEX_FORMAT_COMP enum - */ - -typedef enum TEX_FORMAT_COMP { -TEX_FormatComp_Unsigned = 0x00000000, -TEX_FormatComp_Signed = 0x00000001, -TEX_FormatComp_UnsignedBiased = 0x00000002, -TEX_FormatComp_RESERVED_3 = 0x00000003, -} TEX_FORMAT_COMP; - -/* - * TEX_MAX_ANISO_RATIO enum - */ - -typedef enum TEX_MAX_ANISO_RATIO { -TEX_MaxAnisoRatio_1to1 = 0x00000000, -TEX_MaxAnisoRatio_2to1 = 0x00000001, -TEX_MaxAnisoRatio_4to1 = 0x00000002, -TEX_MaxAnisoRatio_8to1 = 0x00000003, -TEX_MaxAnisoRatio_16to1 = 0x00000004, -TEX_MaxAnisoRatio_RESERVED_5 = 0x00000005, -TEX_MaxAnisoRatio_RESERVED_6 = 0x00000006, -TEX_MaxAnisoRatio_RESERVED_7 = 0x00000007, -} TEX_MAX_ANISO_RATIO; - -/* - * TEX_MIP_FILTER enum - */ - -typedef enum TEX_MIP_FILTER { -TEX_MipFilter_None = 0x00000000, -TEX_MipFilter_Point = 0x00000001, -TEX_MipFilter_Linear = 0x00000002, -TEX_MipFilter_Point_Aniso_Adj = 0x00000003, -} TEX_MIP_FILTER; - -/* - * TEX_REQUEST_SIZE enum - */ - -typedef enum TEX_REQUEST_SIZE { -TEX_RequestSize_32B = 0x00000000, -TEX_RequestSize_64B = 0x00000001, -TEX_RequestSize_128B = 0x00000002, -TEX_RequestSize_2X64B = 0x00000003, -} TEX_REQUEST_SIZE; - -/* - * TEX_SAMPLER_TYPE enum - */ - -typedef enum TEX_SAMPLER_TYPE { -TEX_SamplerType_Invalid = 0x00000000, -TEX_SamplerType_Valid = 0x00000001, -} TEX_SAMPLER_TYPE; - -/* - * TEX_XY_FILTER enum - */ - -typedef enum TEX_XY_FILTER { -TEX_XYFilter_Point = 0x00000000, -TEX_XYFilter_Linear = 0x00000001, -TEX_XYFilter_AnisoPoint = 0x00000002, -TEX_XYFilter_AnisoLinear = 0x00000003, -} TEX_XY_FILTER; - -/* - * TEX_Z_FILTER enum - */ - -typedef enum TEX_Z_FILTER { -TEX_ZFilter_None = 0x00000000, -TEX_ZFilter_Point = 0x00000001, -TEX_ZFilter_Linear = 0x00000002, -TEX_ZFilter_RESERVED_3 = 0x00000003, -} TEX_Z_FILTER; - -/* - * VTX_CLAMP enum - */ - -typedef enum VTX_CLAMP { -VTX_Clamp_ClampToZero = 0x00000000, -VTX_Clamp_ClampToNAN = 0x00000001, -} VTX_CLAMP; - -/* - * VTX_FETCH_TYPE enum - */ - -typedef enum VTX_FETCH_TYPE { -VTX_FetchType_VertexData = 0x00000000, -VTX_FetchType_InstanceData = 0x00000001, -VTX_FetchType_NoIndexOffset = 0x00000002, -VTX_FetchType_RESERVED_3 = 0x00000003, -} VTX_FETCH_TYPE; - -/* - * VTX_FORMAT_COMP_ALL enum - */ - -typedef enum VTX_FORMAT_COMP_ALL { -VTX_FormatCompAll_Unsigned = 0x00000000, -VTX_FormatCompAll_Signed = 0x00000001, -} VTX_FORMAT_COMP_ALL; - -/* - * VTX_MEM_REQUEST_SIZE enum - */ - -typedef enum VTX_MEM_REQUEST_SIZE { -VTX_MemRequestSize_32B = 0x00000000, -VTX_MemRequestSize_64B = 0x00000001, -} VTX_MEM_REQUEST_SIZE; - -/* - * TVX_DATA_FORMAT enum - */ - -typedef enum TVX_DATA_FORMAT { -TVX_FMT_INVALID = 0x00000000, -TVX_FMT_8 = 0x00000001, -TVX_FMT_4_4 = 0x00000002, -TVX_FMT_3_3_2 = 0x00000003, -TVX_FMT_RESERVED_4 = 0x00000004, -TVX_FMT_16 = 0x00000005, -TVX_FMT_16_FLOAT = 0x00000006, -TVX_FMT_8_8 = 0x00000007, -TVX_FMT_5_6_5 = 0x00000008, -TVX_FMT_6_5_5 = 0x00000009, -TVX_FMT_1_5_5_5 = 0x0000000a, -TVX_FMT_4_4_4_4 = 0x0000000b, -TVX_FMT_5_5_5_1 = 0x0000000c, -TVX_FMT_32 = 0x0000000d, -TVX_FMT_32_FLOAT = 0x0000000e, -TVX_FMT_16_16 = 0x0000000f, -TVX_FMT_16_16_FLOAT = 0x00000010, -TVX_FMT_8_24 = 0x00000011, -TVX_FMT_8_24_FLOAT = 0x00000012, -TVX_FMT_24_8 = 0x00000013, -TVX_FMT_24_8_FLOAT = 0x00000014, -TVX_FMT_10_11_11 = 0x00000015, -TVX_FMT_10_11_11_FLOAT = 0x00000016, -TVX_FMT_11_11_10 = 0x00000017, -TVX_FMT_11_11_10_FLOAT = 0x00000018, -TVX_FMT_2_10_10_10 = 0x00000019, -TVX_FMT_8_8_8_8 = 0x0000001a, -TVX_FMT_10_10_10_2 = 0x0000001b, -TVX_FMT_X24_8_32_FLOAT = 0x0000001c, -TVX_FMT_32_32 = 0x0000001d, -TVX_FMT_32_32_FLOAT = 0x0000001e, -TVX_FMT_16_16_16_16 = 0x0000001f, -TVX_FMT_16_16_16_16_FLOAT = 0x00000020, -TVX_FMT_RESERVED_33 = 0x00000021, -TVX_FMT_32_32_32_32 = 0x00000022, -TVX_FMT_32_32_32_32_FLOAT = 0x00000023, -TVX_FMT_RESERVED_36 = 0x00000024, -TVX_FMT_1 = 0x00000025, -TVX_FMT_1_REVERSED = 0x00000026, -TVX_FMT_GB_GR = 0x00000027, -TVX_FMT_BG_RG = 0x00000028, -TVX_FMT_32_AS_8 = 0x00000029, -TVX_FMT_32_AS_8_8 = 0x0000002a, -TVX_FMT_5_9_9_9_SHAREDEXP = 0x0000002b, -TVX_FMT_8_8_8 = 0x0000002c, -TVX_FMT_16_16_16 = 0x0000002d, -TVX_FMT_16_16_16_FLOAT = 0x0000002e, -TVX_FMT_32_32_32 = 0x0000002f, -TVX_FMT_32_32_32_FLOAT = 0x00000030, -TVX_FMT_BC1 = 0x00000031, -TVX_FMT_BC2 = 0x00000032, -TVX_FMT_BC3 = 0x00000033, -TVX_FMT_BC4 = 0x00000034, -TVX_FMT_BC5 = 0x00000035, -TVX_FMT_APC0 = 0x00000036, -TVX_FMT_APC1 = 0x00000037, -TVX_FMT_APC2 = 0x00000038, -TVX_FMT_APC3 = 0x00000039, -TVX_FMT_APC4 = 0x0000003a, -TVX_FMT_APC5 = 0x0000003b, -TVX_FMT_APC6 = 0x0000003c, -TVX_FMT_APC7 = 0x0000003d, -TVX_FMT_CTX1 = 0x0000003e, -TVX_FMT_RESERVED_63 = 0x0000003f, -} TVX_DATA_FORMAT; - -/* - * TVX_DST_SEL enum - */ - -typedef enum TVX_DST_SEL { -TVX_DstSel_X = 0x00000000, -TVX_DstSel_Y = 0x00000001, -TVX_DstSel_Z = 0x00000002, -TVX_DstSel_W = 0x00000003, -TVX_DstSel_0f = 0x00000004, -TVX_DstSel_1f = 0x00000005, -TVX_DstSel_RESERVED_6 = 0x00000006, -TVX_DstSel_Mask = 0x00000007, -} TVX_DST_SEL; - -/* - * TVX_ENDIAN_SWAP enum - */ - -typedef enum TVX_ENDIAN_SWAP { -TVX_EndianSwap_None = 0x00000000, -TVX_EndianSwap_8in16 = 0x00000001, -TVX_EndianSwap_8in32 = 0x00000002, -TVX_EndianSwap_8in64 = 0x00000003, -} TVX_ENDIAN_SWAP; - -/* - * TVX_INST enum - */ - -typedef enum TVX_INST { -TVX_Inst_NormalVertexFetch = 0x00000000, -TVX_Inst_SemanticVertexFetch = 0x00000001, -TVX_Inst_RESERVED_2 = 0x00000002, -TVX_Inst_LD = 0x00000003, -TVX_Inst_GetTextureResInfo = 0x00000004, -TVX_Inst_GetNumberOfSamples = 0x00000005, -TVX_Inst_GetLOD = 0x00000006, -TVX_Inst_GetGradientsH = 0x00000007, -TVX_Inst_GetGradientsV = 0x00000008, -TVX_Inst_SetTextureOffsets = 0x00000009, -TVX_Inst_KeepGradients = 0x0000000a, -TVX_Inst_SetGradientsH = 0x0000000b, -TVX_Inst_SetGradientsV = 0x0000000c, -TVX_Inst_Pass = 0x0000000d, -TVX_Inst_GetBufferResInfo = 0x0000000e, -TVX_Inst_RESERVED_15 = 0x0000000f, -TVX_Inst_Sample = 0x00000010, -TVX_Inst_Sample_L = 0x00000011, -TVX_Inst_Sample_LB = 0x00000012, -TVX_Inst_Sample_LZ = 0x00000013, -TVX_Inst_Sample_G = 0x00000014, -TVX_Inst_Gather4 = 0x00000015, -TVX_Inst_Sample_G_LB = 0x00000016, -TVX_Inst_Gather4_O = 0x00000017, -TVX_Inst_Sample_C = 0x00000018, -TVX_Inst_Sample_C_L = 0x00000019, -TVX_Inst_Sample_C_LB = 0x0000001a, -TVX_Inst_Sample_C_LZ = 0x0000001b, -TVX_Inst_Sample_C_G = 0x0000001c, -TVX_Inst_Gather4_C = 0x0000001d, -TVX_Inst_Sample_C_G_LB = 0x0000001e, -TVX_Inst_Gather4_C_O = 0x0000001f, -} TVX_INST; - -/* - * TVX_NUM_FORMAT_ALL enum - */ - -typedef enum TVX_NUM_FORMAT_ALL { -TVX_NumFormatAll_Norm = 0x00000000, -TVX_NumFormatAll_Int = 0x00000001, -TVX_NumFormatAll_Scaled = 0x00000002, -TVX_NumFormatAll_RESERVED_3 = 0x00000003, -} TVX_NUM_FORMAT_ALL; - -/* - * TVX_SRC_SEL enum - */ - -typedef enum TVX_SRC_SEL { -TVX_SrcSel_X = 0x00000000, -TVX_SrcSel_Y = 0x00000001, -TVX_SrcSel_Z = 0x00000002, -TVX_SrcSel_W = 0x00000003, -TVX_SrcSel_0f = 0x00000004, -TVX_SrcSel_1f = 0x00000005, -} TVX_SRC_SEL; - -/* - * TVX_SRF_MODE_ALL enum - */ - -typedef enum TVX_SRF_MODE_ALL { -TVX_SRFModeAll_ZCMO = 0x00000000, -TVX_SRFModeAll_NZ = 0x00000001, -} TVX_SRF_MODE_ALL; - -/* - * TVX_TYPE enum - */ - -typedef enum TVX_TYPE { -TVX_Type_InvalidTextureResource = 0x00000000, -TVX_Type_InvalidVertexBuffer = 0x00000001, -TVX_Type_ValidTextureResource = 0x00000002, -TVX_Type_ValidVertexBuffer = 0x00000003, -} TVX_TYPE; - -/******************************************************* - * PA Enums - *******************************************************/ - -/* - * SU_PERFCNT_SEL enum - */ - -typedef enum SU_PERFCNT_SEL { -PERF_PAPC_PASX_REQ = 0x00000000, -PERF_PAPC_PASX_DISABLE_PIPE = 0x00000001, -PERF_PAPC_PASX_FIRST_VECTOR = 0x00000002, -PERF_PAPC_PASX_SECOND_VECTOR = 0x00000003, -PERF_PAPC_PASX_FIRST_DEAD = 0x00000004, -PERF_PAPC_PASX_SECOND_DEAD = 0x00000005, -PERF_PAPC_PASX_VTX_KILL_DISCARD = 0x00000006, -PERF_PAPC_PASX_VTX_NAN_DISCARD = 0x00000007, -PERF_PAPC_PA_INPUT_PRIM = 0x00000008, -PERF_PAPC_PA_INPUT_NULL_PRIM = 0x00000009, -PERF_PAPC_PA_INPUT_EVENT_FLAG = 0x0000000a, -PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT = 0x0000000b, -PERF_PAPC_PA_INPUT_END_OF_PACKET = 0x0000000c, -PERF_PAPC_PA_INPUT_EXTENDED_EVENT = 0x0000000d, -PERF_PAPC_CLPR_CULL_PRIM = 0x0000000e, -PERF_PAPC_CLPR_VVUCP_CULL_PRIM = 0x0000000f, -PERF_PAPC_CLPR_VV_CULL_PRIM = 0x00000010, -PERF_PAPC_CLPR_UCP_CULL_PRIM = 0x00000011, -PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM = 0x00000012, -PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM = 0x00000013, -PERF_PAPC_CLPR_CULL_TO_NULL_PRIM = 0x00000014, -PERF_PAPC_CLPR_VVUCP_CLIP_PRIM = 0x00000015, -PERF_PAPC_CLPR_VV_CLIP_PRIM = 0x00000016, -PERF_PAPC_CLPR_UCP_CLIP_PRIM = 0x00000017, -PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE = 0x00000018, -PERF_PAPC_CLPR_CLIP_PLANE_CNT_1 = 0x00000019, -PERF_PAPC_CLPR_CLIP_PLANE_CNT_2 = 0x0000001a, -PERF_PAPC_CLPR_CLIP_PLANE_CNT_3 = 0x0000001b, -PERF_PAPC_CLPR_CLIP_PLANE_CNT_4 = 0x0000001c, -PERF_PAPC_CLPR_CLIP_PLANE_CNT_5_8 = 0x0000001d, -PERF_PAPC_CLPR_CLIP_PLANE_CNT_9_12 = 0x0000001e, -PERF_PAPC_CLPR_CLIP_PLANE_NEAR = 0x0000001f, -PERF_PAPC_CLPR_CLIP_PLANE_FAR = 0x00000020, -PERF_PAPC_CLPR_CLIP_PLANE_LEFT = 0x00000021, -PERF_PAPC_CLPR_CLIP_PLANE_RIGHT = 0x00000022, -PERF_PAPC_CLPR_CLIP_PLANE_TOP = 0x00000023, -PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM = 0x00000024, -PERF_PAPC_CLPR_GSC_KILL_CULL_PRIM = 0x00000025, -PERF_PAPC_CLPR_RASTER_KILL_CULL_PRIM = 0x00000026, -PERF_PAPC_CLSM_NULL_PRIM = 0x00000027, -PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM = 0x00000028, -PERF_PAPC_CLSM_CULL_TO_NULL_PRIM = 0x00000029, -PERF_PAPC_CLSM_OUT_PRIM_CNT_1 = 0x0000002a, -PERF_PAPC_CLSM_OUT_PRIM_CNT_2 = 0x0000002b, -PERF_PAPC_CLSM_OUT_PRIM_CNT_3 = 0x0000002c, -PERF_PAPC_CLSM_OUT_PRIM_CNT_4 = 0x0000002d, -PERF_PAPC_CLSM_OUT_PRIM_CNT_5_8 = 0x0000002e, -PERF_PAPC_CLSM_OUT_PRIM_CNT_9_13 = 0x0000002f, -PERF_PAPC_CLIPGA_VTE_KILL_PRIM = 0x00000030, -PERF_PAPC_SU_INPUT_PRIM = 0x00000031, -PERF_PAPC_SU_INPUT_CLIP_PRIM = 0x00000032, -PERF_PAPC_SU_INPUT_NULL_PRIM = 0x00000033, -PERF_PAPC_SU_INPUT_PRIM_DUAL = 0x00000034, -PERF_PAPC_SU_INPUT_CLIP_PRIM_DUAL = 0x00000035, -PERF_PAPC_SU_ZERO_AREA_CULL_PRIM = 0x00000036, -PERF_PAPC_SU_BACK_FACE_CULL_PRIM = 0x00000037, -PERF_PAPC_SU_FRONT_FACE_CULL_PRIM = 0x00000038, -PERF_PAPC_SU_POLYMODE_FACE_CULL = 0x00000039, -PERF_PAPC_SU_POLYMODE_BACK_CULL = 0x0000003a, -PERF_PAPC_SU_POLYMODE_FRONT_CULL = 0x0000003b, -PERF_PAPC_SU_POLYMODE_INVALID_FILL = 0x0000003c, -PERF_PAPC_SU_OUTPUT_PRIM = 0x0000003d, -PERF_PAPC_SU_OUTPUT_CLIP_PRIM = 0x0000003e, -PERF_PAPC_SU_OUTPUT_NULL_PRIM = 0x0000003f, -PERF_PAPC_SU_OUTPUT_EVENT_FLAG = 0x00000040, -PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT = 0x00000041, -PERF_PAPC_SU_OUTPUT_END_OF_PACKET = 0x00000042, -PERF_PAPC_SU_OUTPUT_POLYMODE_FACE = 0x00000043, -PERF_PAPC_SU_OUTPUT_POLYMODE_BACK = 0x00000044, -PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT = 0x00000045, -PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE = 0x00000046, -PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK = 0x00000047, -PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT = 0x00000048, -PERF_PAPC_SU_OUTPUT_PRIM_DUAL = 0x00000049, -PERF_PAPC_SU_OUTPUT_CLIP_PRIM_DUAL = 0x0000004a, -PERF_PAPC_SU_OUTPUT_POLYMODE_DUAL = 0x0000004b, -PERF_PAPC_SU_OUTPUT_CLIP_POLYMODE_DUAL = 0x0000004c, -PERF_PAPC_PASX_REQ_IDLE = 0x0000004d, -PERF_PAPC_PASX_REQ_BUSY = 0x0000004e, -PERF_PAPC_PASX_REQ_STALLED = 0x0000004f, -PERF_PAPC_PASX_REC_IDLE = 0x00000050, -PERF_PAPC_PASX_REC_BUSY = 0x00000051, -PERF_PAPC_PASX_REC_STARVED_SX = 0x00000052, -PERF_PAPC_PASX_REC_STALLED = 0x00000053, -PERF_PAPC_PASX_REC_STALLED_POS_MEM = 0x00000054, -PERF_PAPC_PASX_REC_STALLED_CCGSM_IN = 0x00000055, -PERF_PAPC_CCGSM_IDLE = 0x00000056, -PERF_PAPC_CCGSM_BUSY = 0x00000057, -PERF_PAPC_CCGSM_STALLED = 0x00000058, -PERF_PAPC_CLPRIM_IDLE = 0x00000059, -PERF_PAPC_CLPRIM_BUSY = 0x0000005a, -PERF_PAPC_CLPRIM_STALLED = 0x0000005b, -PERF_PAPC_CLPRIM_STARVED_CCGSM = 0x0000005c, -PERF_PAPC_CLIPSM_IDLE = 0x0000005d, -PERF_PAPC_CLIPSM_BUSY = 0x0000005e, -PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH = 0x0000005f, -PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ = 0x00000060, -PERF_PAPC_CLIPSM_WAIT_CLIPGA = 0x00000061, -PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP = 0x00000062, -PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM = 0x00000063, -PERF_PAPC_CLIPGA_IDLE = 0x00000064, -PERF_PAPC_CLIPGA_BUSY = 0x00000065, -PERF_PAPC_CLIPGA_STARVED_VTE_CLIP = 0x00000066, -PERF_PAPC_CLIPGA_STALLED = 0x00000067, -PERF_PAPC_CLIP_IDLE = 0x00000068, -PERF_PAPC_CLIP_BUSY = 0x00000069, -PERF_PAPC_SU_IDLE = 0x0000006a, -PERF_PAPC_SU_BUSY = 0x0000006b, -PERF_PAPC_SU_STARVED_CLIP = 0x0000006c, -PERF_PAPC_SU_STALLED_SC = 0x0000006d, -PERF_PAPC_CL_DYN_SCLK_VLD = 0x0000006e, -PERF_PAPC_SU_DYN_SCLK_VLD = 0x0000006f, -PERF_PAPC_PA_REG_SCLK_VLD = 0x00000070, -PERF_PAPC_SU_MULTI_GPU_PRIM_FILTER_CULL = 0x00000071, -PERF_PAPC_PASX_SE0_REQ = 0x00000072, -PERF_PAPC_PASX_SE1_REQ = 0x00000073, -PERF_PAPC_PASX_SE0_FIRST_VECTOR = 0x00000074, -PERF_PAPC_PASX_SE0_SECOND_VECTOR = 0x00000075, -PERF_PAPC_PASX_SE1_FIRST_VECTOR = 0x00000076, -PERF_PAPC_PASX_SE1_SECOND_VECTOR = 0x00000077, -PERF_PAPC_SU_SE0_PRIM_FILTER_CULL = 0x00000078, -PERF_PAPC_SU_SE1_PRIM_FILTER_CULL = 0x00000079, -PERF_PAPC_SU_SE01_PRIM_FILTER_CULL = 0x0000007a, -PERF_PAPC_SU_SE0_OUTPUT_PRIM = 0x0000007b, -PERF_PAPC_SU_SE1_OUTPUT_PRIM = 0x0000007c, -PERF_PAPC_SU_SE01_OUTPUT_PRIM = 0x0000007d, -PERF_PAPC_SU_SE0_OUTPUT_NULL_PRIM = 0x0000007e, -PERF_PAPC_SU_SE1_OUTPUT_NULL_PRIM = 0x0000007f, -PERF_PAPC_SU_SE01_OUTPUT_NULL_PRIM = 0x00000080, -PERF_PAPC_SU_SE0_OUTPUT_FIRST_PRIM_SLOT = 0x00000081, -PERF_PAPC_SU_SE1_OUTPUT_FIRST_PRIM_SLOT = 0x00000082, -PERF_PAPC_SU_SE0_STALLED_SC = 0x00000083, -PERF_PAPC_SU_SE1_STALLED_SC = 0x00000084, -PERF_PAPC_SU_SE01_STALLED_SC = 0x00000085, -PERF_PAPC_CLSM_CLIPPING_PRIM = 0x00000086, -PERF_PAPC_SU_CULLED_PRIM = 0x00000087, -PERF_PAPC_SU_OUTPUT_EOPG = 0x00000088, -PERF_PAPC_SU_SE2_PRIM_FILTER_CULL = 0x00000089, -PERF_PAPC_SU_SE3_PRIM_FILTER_CULL = 0x0000008a, -PERF_PAPC_SU_SE2_OUTPUT_PRIM = 0x0000008b, -PERF_PAPC_SU_SE3_OUTPUT_PRIM = 0x0000008c, -PERF_PAPC_SU_SE2_OUTPUT_NULL_PRIM = 0x0000008d, -PERF_PAPC_SU_SE3_OUTPUT_NULL_PRIM = 0x0000008e, -PERF_PAPC_SU_SE0_OUTPUT_END_OF_PACKET = 0x0000008f, -PERF_PAPC_SU_SE1_OUTPUT_END_OF_PACKET = 0x00000090, -PERF_PAPC_SU_SE2_OUTPUT_END_OF_PACKET = 0x00000091, -PERF_PAPC_SU_SE3_OUTPUT_END_OF_PACKET = 0x00000092, -PERF_PAPC_SU_SE0_OUTPUT_EOPG = 0x00000093, -PERF_PAPC_SU_SE1_OUTPUT_EOPG = 0x00000094, -PERF_PAPC_SU_SE2_OUTPUT_EOPG = 0x00000095, -PERF_PAPC_SU_SE3_OUTPUT_EOPG = 0x00000096, -PERF_PAPC_SU_SE2_STALLED_SC = 0x00000097, -PERF_PAPC_SU_SE3_STALLED_SC = 0x00000098, -} SU_PERFCNT_SEL; - -/* - * SC_PERFCNT_SEL enum - */ - -typedef enum SC_PERFCNT_SEL { -SC_SRPS_WINDOW_VALID = 0x00000000, -SC_PSSW_WINDOW_VALID = 0x00000001, -SC_TPQZ_WINDOW_VALID = 0x00000002, -SC_QZQP_WINDOW_VALID = 0x00000003, -SC_TRPK_WINDOW_VALID = 0x00000004, -SC_SRPS_WINDOW_VALID_BUSY = 0x00000005, -SC_PSSW_WINDOW_VALID_BUSY = 0x00000006, -SC_TPQZ_WINDOW_VALID_BUSY = 0x00000007, -SC_QZQP_WINDOW_VALID_BUSY = 0x00000008, -SC_TRPK_WINDOW_VALID_BUSY = 0x00000009, -SC_STARVED_BY_PA = 0x0000000a, -SC_STALLED_BY_PRIMFIFO = 0x0000000b, -SC_STALLED_BY_DB_TILE = 0x0000000c, -SC_STARVED_BY_DB_TILE = 0x0000000d, -SC_STALLED_BY_TILEORDERFIFO = 0x0000000e, -SC_STALLED_BY_TILEFIFO = 0x0000000f, -SC_STALLED_BY_DB_QUAD = 0x00000010, -SC_STARVED_BY_DB_QUAD = 0x00000011, -SC_STALLED_BY_QUADFIFO = 0x00000012, -SC_STALLED_BY_BCI = 0x00000013, -SC_STALLED_BY_SPI = 0x00000014, -SC_SCISSOR_DISCARD = 0x00000015, -SC_BB_DISCARD = 0x00000016, -SC_SUPERTILE_COUNT = 0x00000017, -SC_SUPERTILE_PER_PRIM_H0 = 0x00000018, -SC_SUPERTILE_PER_PRIM_H1 = 0x00000019, -SC_SUPERTILE_PER_PRIM_H2 = 0x0000001a, -SC_SUPERTILE_PER_PRIM_H3 = 0x0000001b, -SC_SUPERTILE_PER_PRIM_H4 = 0x0000001c, -SC_SUPERTILE_PER_PRIM_H5 = 0x0000001d, -SC_SUPERTILE_PER_PRIM_H6 = 0x0000001e, -SC_SUPERTILE_PER_PRIM_H7 = 0x0000001f, -SC_SUPERTILE_PER_PRIM_H8 = 0x00000020, -SC_SUPERTILE_PER_PRIM_H9 = 0x00000021, -SC_SUPERTILE_PER_PRIM_H10 = 0x00000022, -SC_SUPERTILE_PER_PRIM_H11 = 0x00000023, -SC_SUPERTILE_PER_PRIM_H12 = 0x00000024, -SC_SUPERTILE_PER_PRIM_H13 = 0x00000025, -SC_SUPERTILE_PER_PRIM_H14 = 0x00000026, -SC_SUPERTILE_PER_PRIM_H15 = 0x00000027, -SC_SUPERTILE_PER_PRIM_H16 = 0x00000028, -SC_TILE_PER_PRIM_H0 = 0x00000029, -SC_TILE_PER_PRIM_H1 = 0x0000002a, -SC_TILE_PER_PRIM_H2 = 0x0000002b, -SC_TILE_PER_PRIM_H3 = 0x0000002c, -SC_TILE_PER_PRIM_H4 = 0x0000002d, -SC_TILE_PER_PRIM_H5 = 0x0000002e, -SC_TILE_PER_PRIM_H6 = 0x0000002f, -SC_TILE_PER_PRIM_H7 = 0x00000030, -SC_TILE_PER_PRIM_H8 = 0x00000031, -SC_TILE_PER_PRIM_H9 = 0x00000032, -SC_TILE_PER_PRIM_H10 = 0x00000033, -SC_TILE_PER_PRIM_H11 = 0x00000034, -SC_TILE_PER_PRIM_H12 = 0x00000035, -SC_TILE_PER_PRIM_H13 = 0x00000036, -SC_TILE_PER_PRIM_H14 = 0x00000037, -SC_TILE_PER_PRIM_H15 = 0x00000038, -SC_TILE_PER_PRIM_H16 = 0x00000039, -SC_TILE_PER_SUPERTILE_H0 = 0x0000003a, -SC_TILE_PER_SUPERTILE_H1 = 0x0000003b, -SC_TILE_PER_SUPERTILE_H2 = 0x0000003c, -SC_TILE_PER_SUPERTILE_H3 = 0x0000003d, -SC_TILE_PER_SUPERTILE_H4 = 0x0000003e, -SC_TILE_PER_SUPERTILE_H5 = 0x0000003f, -SC_TILE_PER_SUPERTILE_H6 = 0x00000040, -SC_TILE_PER_SUPERTILE_H7 = 0x00000041, -SC_TILE_PER_SUPERTILE_H8 = 0x00000042, -SC_TILE_PER_SUPERTILE_H9 = 0x00000043, -SC_TILE_PER_SUPERTILE_H10 = 0x00000044, -SC_TILE_PER_SUPERTILE_H11 = 0x00000045, -SC_TILE_PER_SUPERTILE_H12 = 0x00000046, -SC_TILE_PER_SUPERTILE_H13 = 0x00000047, -SC_TILE_PER_SUPERTILE_H14 = 0x00000048, -SC_TILE_PER_SUPERTILE_H15 = 0x00000049, -SC_TILE_PER_SUPERTILE_H16 = 0x0000004a, -SC_TILE_PICKED_H1 = 0x0000004b, -SC_TILE_PICKED_H2 = 0x0000004c, -SC_TILE_PICKED_H3 = 0x0000004d, -SC_TILE_PICKED_H4 = 0x0000004e, -SC_QZ0_TILE_COUNT = 0x0000004f, -SC_QZ1_TILE_COUNT = 0x00000050, -SC_QZ2_TILE_COUNT = 0x00000051, -SC_QZ3_TILE_COUNT = 0x00000052, -SC_QZ0_TILE_COVERED_COUNT = 0x00000053, -SC_QZ1_TILE_COVERED_COUNT = 0x00000054, -SC_QZ2_TILE_COVERED_COUNT = 0x00000055, -SC_QZ3_TILE_COVERED_COUNT = 0x00000056, -SC_QZ0_TILE_NOT_COVERED_COUNT = 0x00000057, -SC_QZ1_TILE_NOT_COVERED_COUNT = 0x00000058, -SC_QZ2_TILE_NOT_COVERED_COUNT = 0x00000059, -SC_QZ3_TILE_NOT_COVERED_COUNT = 0x0000005a, -SC_QZ0_QUAD_PER_TILE_H0 = 0x0000005b, -SC_QZ0_QUAD_PER_TILE_H1 = 0x0000005c, -SC_QZ0_QUAD_PER_TILE_H2 = 0x0000005d, -SC_QZ0_QUAD_PER_TILE_H3 = 0x0000005e, -SC_QZ0_QUAD_PER_TILE_H4 = 0x0000005f, -SC_QZ0_QUAD_PER_TILE_H5 = 0x00000060, -SC_QZ0_QUAD_PER_TILE_H6 = 0x00000061, -SC_QZ0_QUAD_PER_TILE_H7 = 0x00000062, -SC_QZ0_QUAD_PER_TILE_H8 = 0x00000063, -SC_QZ0_QUAD_PER_TILE_H9 = 0x00000064, -SC_QZ0_QUAD_PER_TILE_H10 = 0x00000065, -SC_QZ0_QUAD_PER_TILE_H11 = 0x00000066, -SC_QZ0_QUAD_PER_TILE_H12 = 0x00000067, -SC_QZ0_QUAD_PER_TILE_H13 = 0x00000068, -SC_QZ0_QUAD_PER_TILE_H14 = 0x00000069, -SC_QZ0_QUAD_PER_TILE_H15 = 0x0000006a, -SC_QZ0_QUAD_PER_TILE_H16 = 0x0000006b, -SC_QZ1_QUAD_PER_TILE_H0 = 0x0000006c, -SC_QZ1_QUAD_PER_TILE_H1 = 0x0000006d, -SC_QZ1_QUAD_PER_TILE_H2 = 0x0000006e, -SC_QZ1_QUAD_PER_TILE_H3 = 0x0000006f, -SC_QZ1_QUAD_PER_TILE_H4 = 0x00000070, -SC_QZ1_QUAD_PER_TILE_H5 = 0x00000071, -SC_QZ1_QUAD_PER_TILE_H6 = 0x00000072, -SC_QZ1_QUAD_PER_TILE_H7 = 0x00000073, -SC_QZ1_QUAD_PER_TILE_H8 = 0x00000074, -SC_QZ1_QUAD_PER_TILE_H9 = 0x00000075, -SC_QZ1_QUAD_PER_TILE_H10 = 0x00000076, -SC_QZ1_QUAD_PER_TILE_H11 = 0x00000077, -SC_QZ1_QUAD_PER_TILE_H12 = 0x00000078, -SC_QZ1_QUAD_PER_TILE_H13 = 0x00000079, -SC_QZ1_QUAD_PER_TILE_H14 = 0x0000007a, -SC_QZ1_QUAD_PER_TILE_H15 = 0x0000007b, -SC_QZ1_QUAD_PER_TILE_H16 = 0x0000007c, -SC_QZ2_QUAD_PER_TILE_H0 = 0x0000007d, -SC_QZ2_QUAD_PER_TILE_H1 = 0x0000007e, -SC_QZ2_QUAD_PER_TILE_H2 = 0x0000007f, -SC_QZ2_QUAD_PER_TILE_H3 = 0x00000080, -SC_QZ2_QUAD_PER_TILE_H4 = 0x00000081, -SC_QZ2_QUAD_PER_TILE_H5 = 0x00000082, -SC_QZ2_QUAD_PER_TILE_H6 = 0x00000083, -SC_QZ2_QUAD_PER_TILE_H7 = 0x00000084, -SC_QZ2_QUAD_PER_TILE_H8 = 0x00000085, -SC_QZ2_QUAD_PER_TILE_H9 = 0x00000086, -SC_QZ2_QUAD_PER_TILE_H10 = 0x00000087, -SC_QZ2_QUAD_PER_TILE_H11 = 0x00000088, -SC_QZ2_QUAD_PER_TILE_H12 = 0x00000089, -SC_QZ2_QUAD_PER_TILE_H13 = 0x0000008a, -SC_QZ2_QUAD_PER_TILE_H14 = 0x0000008b, -SC_QZ2_QUAD_PER_TILE_H15 = 0x0000008c, -SC_QZ2_QUAD_PER_TILE_H16 = 0x0000008d, -SC_QZ3_QUAD_PER_TILE_H0 = 0x0000008e, -SC_QZ3_QUAD_PER_TILE_H1 = 0x0000008f, -SC_QZ3_QUAD_PER_TILE_H2 = 0x00000090, -SC_QZ3_QUAD_PER_TILE_H3 = 0x00000091, -SC_QZ3_QUAD_PER_TILE_H4 = 0x00000092, -SC_QZ3_QUAD_PER_TILE_H5 = 0x00000093, -SC_QZ3_QUAD_PER_TILE_H6 = 0x00000094, -SC_QZ3_QUAD_PER_TILE_H7 = 0x00000095, -SC_QZ3_QUAD_PER_TILE_H8 = 0x00000096, -SC_QZ3_QUAD_PER_TILE_H9 = 0x00000097, -SC_QZ3_QUAD_PER_TILE_H10 = 0x00000098, -SC_QZ3_QUAD_PER_TILE_H11 = 0x00000099, -SC_QZ3_QUAD_PER_TILE_H12 = 0x0000009a, -SC_QZ3_QUAD_PER_TILE_H13 = 0x0000009b, -SC_QZ3_QUAD_PER_TILE_H14 = 0x0000009c, -SC_QZ3_QUAD_PER_TILE_H15 = 0x0000009d, -SC_QZ3_QUAD_PER_TILE_H16 = 0x0000009e, -SC_QZ0_QUAD_COUNT = 0x0000009f, -SC_QZ1_QUAD_COUNT = 0x000000a0, -SC_QZ2_QUAD_COUNT = 0x000000a1, -SC_QZ3_QUAD_COUNT = 0x000000a2, -SC_P0_HIZ_TILE_COUNT = 0x000000a3, -SC_P1_HIZ_TILE_COUNT = 0x000000a4, -SC_P2_HIZ_TILE_COUNT = 0x000000a5, -SC_P3_HIZ_TILE_COUNT = 0x000000a6, -SC_P0_HIZ_QUAD_PER_TILE_H0 = 0x000000a7, -SC_P0_HIZ_QUAD_PER_TILE_H1 = 0x000000a8, -SC_P0_HIZ_QUAD_PER_TILE_H2 = 0x000000a9, -SC_P0_HIZ_QUAD_PER_TILE_H3 = 0x000000aa, -SC_P0_HIZ_QUAD_PER_TILE_H4 = 0x000000ab, -SC_P0_HIZ_QUAD_PER_TILE_H5 = 0x000000ac, -SC_P0_HIZ_QUAD_PER_TILE_H6 = 0x000000ad, -SC_P0_HIZ_QUAD_PER_TILE_H7 = 0x000000ae, -SC_P0_HIZ_QUAD_PER_TILE_H8 = 0x000000af, -SC_P0_HIZ_QUAD_PER_TILE_H9 = 0x000000b0, -SC_P0_HIZ_QUAD_PER_TILE_H10 = 0x000000b1, -SC_P0_HIZ_QUAD_PER_TILE_H11 = 0x000000b2, -SC_P0_HIZ_QUAD_PER_TILE_H12 = 0x000000b3, -SC_P0_HIZ_QUAD_PER_TILE_H13 = 0x000000b4, -SC_P0_HIZ_QUAD_PER_TILE_H14 = 0x000000b5, -SC_P0_HIZ_QUAD_PER_TILE_H15 = 0x000000b6, -SC_P0_HIZ_QUAD_PER_TILE_H16 = 0x000000b7, -SC_P1_HIZ_QUAD_PER_TILE_H0 = 0x000000b8, -SC_P1_HIZ_QUAD_PER_TILE_H1 = 0x000000b9, -SC_P1_HIZ_QUAD_PER_TILE_H2 = 0x000000ba, -SC_P1_HIZ_QUAD_PER_TILE_H3 = 0x000000bb, -SC_P1_HIZ_QUAD_PER_TILE_H4 = 0x000000bc, -SC_P1_HIZ_QUAD_PER_TILE_H5 = 0x000000bd, -SC_P1_HIZ_QUAD_PER_TILE_H6 = 0x000000be, -SC_P1_HIZ_QUAD_PER_TILE_H7 = 0x000000bf, -SC_P1_HIZ_QUAD_PER_TILE_H8 = 0x000000c0, -SC_P1_HIZ_QUAD_PER_TILE_H9 = 0x000000c1, -SC_P1_HIZ_QUAD_PER_TILE_H10 = 0x000000c2, -SC_P1_HIZ_QUAD_PER_TILE_H11 = 0x000000c3, -SC_P1_HIZ_QUAD_PER_TILE_H12 = 0x000000c4, -SC_P1_HIZ_QUAD_PER_TILE_H13 = 0x000000c5, -SC_P1_HIZ_QUAD_PER_TILE_H14 = 0x000000c6, -SC_P1_HIZ_QUAD_PER_TILE_H15 = 0x000000c7, -SC_P1_HIZ_QUAD_PER_TILE_H16 = 0x000000c8, -SC_P2_HIZ_QUAD_PER_TILE_H0 = 0x000000c9, -SC_P2_HIZ_QUAD_PER_TILE_H1 = 0x000000ca, -SC_P2_HIZ_QUAD_PER_TILE_H2 = 0x000000cb, -SC_P2_HIZ_QUAD_PER_TILE_H3 = 0x000000cc, -SC_P2_HIZ_QUAD_PER_TILE_H4 = 0x000000cd, -SC_P2_HIZ_QUAD_PER_TILE_H5 = 0x000000ce, -SC_P2_HIZ_QUAD_PER_TILE_H6 = 0x000000cf, -SC_P2_HIZ_QUAD_PER_TILE_H7 = 0x000000d0, -SC_P2_HIZ_QUAD_PER_TILE_H8 = 0x000000d1, -SC_P2_HIZ_QUAD_PER_TILE_H9 = 0x000000d2, -SC_P2_HIZ_QUAD_PER_TILE_H10 = 0x000000d3, -SC_P2_HIZ_QUAD_PER_TILE_H11 = 0x000000d4, -SC_P2_HIZ_QUAD_PER_TILE_H12 = 0x000000d5, -SC_P2_HIZ_QUAD_PER_TILE_H13 = 0x000000d6, -SC_P2_HIZ_QUAD_PER_TILE_H14 = 0x000000d7, -SC_P2_HIZ_QUAD_PER_TILE_H15 = 0x000000d8, -SC_P2_HIZ_QUAD_PER_TILE_H16 = 0x000000d9, -SC_P3_HIZ_QUAD_PER_TILE_H0 = 0x000000da, -SC_P3_HIZ_QUAD_PER_TILE_H1 = 0x000000db, -SC_P3_HIZ_QUAD_PER_TILE_H2 = 0x000000dc, -SC_P3_HIZ_QUAD_PER_TILE_H3 = 0x000000dd, -SC_P3_HIZ_QUAD_PER_TILE_H4 = 0x000000de, -SC_P3_HIZ_QUAD_PER_TILE_H5 = 0x000000df, -SC_P3_HIZ_QUAD_PER_TILE_H6 = 0x000000e0, -SC_P3_HIZ_QUAD_PER_TILE_H7 = 0x000000e1, -SC_P3_HIZ_QUAD_PER_TILE_H8 = 0x000000e2, -SC_P3_HIZ_QUAD_PER_TILE_H9 = 0x000000e3, -SC_P3_HIZ_QUAD_PER_TILE_H10 = 0x000000e4, -SC_P3_HIZ_QUAD_PER_TILE_H11 = 0x000000e5, -SC_P3_HIZ_QUAD_PER_TILE_H12 = 0x000000e6, -SC_P3_HIZ_QUAD_PER_TILE_H13 = 0x000000e7, -SC_P3_HIZ_QUAD_PER_TILE_H14 = 0x000000e8, -SC_P3_HIZ_QUAD_PER_TILE_H15 = 0x000000e9, -SC_P3_HIZ_QUAD_PER_TILE_H16 = 0x000000ea, -SC_P0_HIZ_QUAD_COUNT = 0x000000eb, -SC_P1_HIZ_QUAD_COUNT = 0x000000ec, -SC_P2_HIZ_QUAD_COUNT = 0x000000ed, -SC_P3_HIZ_QUAD_COUNT = 0x000000ee, -SC_P0_DETAIL_QUAD_COUNT = 0x000000ef, -SC_P1_DETAIL_QUAD_COUNT = 0x000000f0, -SC_P2_DETAIL_QUAD_COUNT = 0x000000f1, -SC_P3_DETAIL_QUAD_COUNT = 0x000000f2, -SC_P0_DETAIL_QUAD_WITH_1_PIX = 0x000000f3, -SC_P0_DETAIL_QUAD_WITH_2_PIX = 0x000000f4, -SC_P0_DETAIL_QUAD_WITH_3_PIX = 0x000000f5, -SC_P0_DETAIL_QUAD_WITH_4_PIX = 0x000000f6, -SC_P1_DETAIL_QUAD_WITH_1_PIX = 0x000000f7, -SC_P1_DETAIL_QUAD_WITH_2_PIX = 0x000000f8, -SC_P1_DETAIL_QUAD_WITH_3_PIX = 0x000000f9, -SC_P1_DETAIL_QUAD_WITH_4_PIX = 0x000000fa, -SC_P2_DETAIL_QUAD_WITH_1_PIX = 0x000000fb, -SC_P2_DETAIL_QUAD_WITH_2_PIX = 0x000000fc, -SC_P2_DETAIL_QUAD_WITH_3_PIX = 0x000000fd, -SC_P2_DETAIL_QUAD_WITH_4_PIX = 0x000000fe, -SC_P3_DETAIL_QUAD_WITH_1_PIX = 0x000000ff, -SC_P3_DETAIL_QUAD_WITH_2_PIX = 0x00000100, -SC_P3_DETAIL_QUAD_WITH_3_PIX = 0x00000101, -SC_P3_DETAIL_QUAD_WITH_4_PIX = 0x00000102, -SC_EARLYZ_QUAD_COUNT = 0x00000103, -SC_EARLYZ_QUAD_WITH_1_PIX = 0x00000104, -SC_EARLYZ_QUAD_WITH_2_PIX = 0x00000105, -SC_EARLYZ_QUAD_WITH_3_PIX = 0x00000106, -SC_EARLYZ_QUAD_WITH_4_PIX = 0x00000107, -SC_PKR_QUAD_PER_ROW_H1 = 0x00000108, -SC_PKR_QUAD_PER_ROW_H2 = 0x00000109, -SC_PKR_4X2_QUAD_SPLIT = 0x0000010a, -SC_PKR_4X2_FILL_QUAD = 0x0000010b, -SC_PKR_END_OF_VECTOR = 0x0000010c, -SC_PKR_CONTROL_XFER = 0x0000010d, -SC_PKR_DBHANG_FORCE_EOV = 0x0000010e, -SC_REG_SCLK_BUSY = 0x0000010f, -SC_GRP0_DYN_SCLK_BUSY = 0x00000110, -SC_GRP1_DYN_SCLK_BUSY = 0x00000111, -SC_GRP2_DYN_SCLK_BUSY = 0x00000112, -SC_GRP3_DYN_SCLK_BUSY = 0x00000113, -SC_GRP4_DYN_SCLK_BUSY = 0x00000114, -SC_PA0_SC_DATA_FIFO_RD = 0x00000115, -SC_PA0_SC_DATA_FIFO_WE = 0x00000116, -SC_PA1_SC_DATA_FIFO_RD = 0x00000117, -SC_PA1_SC_DATA_FIFO_WE = 0x00000118, -SC_PS_ARB_XFC_ALL_EVENT_OR_PRIM_CYCLES = 0x00000119, -SC_PS_ARB_XFC_ONLY_PRIM_CYCLES = 0x0000011a, -SC_PS_ARB_XFC_ONLY_ONE_INC_PER_PRIM = 0x0000011b, -SC_PS_ARB_STALLED_FROM_BELOW = 0x0000011c, -SC_PS_ARB_STARVED_FROM_ABOVE = 0x0000011d, -SC_PS_ARB_SC_BUSY = 0x0000011e, -SC_PS_ARB_PA_SC_BUSY = 0x0000011f, -SC_PA2_SC_DATA_FIFO_RD = 0x00000120, -SC_PA2_SC_DATA_FIFO_WE = 0x00000121, -SC_PA3_SC_DATA_FIFO_RD = 0x00000122, -SC_PA3_SC_DATA_FIFO_WE = 0x00000123, -SC_PA_SC_DEALLOC_0_0_WE = 0x00000124, -SC_PA_SC_DEALLOC_0_1_WE = 0x00000125, -SC_PA_SC_DEALLOC_1_0_WE = 0x00000126, -SC_PA_SC_DEALLOC_1_1_WE = 0x00000127, -SC_PA_SC_DEALLOC_2_0_WE = 0x00000128, -SC_PA_SC_DEALLOC_2_1_WE = 0x00000129, -SC_PA_SC_DEALLOC_3_0_WE = 0x0000012a, -SC_PA_SC_DEALLOC_3_1_WE = 0x0000012b, -SC_PA0_SC_EOP_WE = 0x0000012c, -SC_PA0_SC_EOPG_WE = 0x0000012d, -SC_PA0_SC_EVENT_WE = 0x0000012e, -SC_PA1_SC_EOP_WE = 0x0000012f, -SC_PA1_SC_EOPG_WE = 0x00000130, -SC_PA1_SC_EVENT_WE = 0x00000131, -SC_PA2_SC_EOP_WE = 0x00000132, -SC_PA2_SC_EOPG_WE = 0x00000133, -SC_PA2_SC_EVENT_WE = 0x00000134, -SC_PA3_SC_EOP_WE = 0x00000135, -SC_PA3_SC_EOPG_WE = 0x00000136, -SC_PA3_SC_EVENT_WE = 0x00000137, -SC_PS_ARB_OOO_THRESHOLD_SWITCH_TO_DESIRED_FIFO = 0x00000138, -SC_PS_ARB_OOO_FIFO_EMPTY_SWITCH = 0x00000139, -SC_PS_ARB_NULL_PRIM_BUBBLE_POP = 0x0000013a, -SC_PS_ARB_EOP_POP_SYNC_POP = 0x0000013b, -SC_PS_ARB_EVENT_SYNC_POP = 0x0000013c, -SC_SC_PS_ENG_MULTICYCLE_BUBBLE = 0x0000013d, -SC_PA0_SC_FPOV_WE = 0x0000013e, -SC_PA1_SC_FPOV_WE = 0x0000013f, -SC_PA2_SC_FPOV_WE = 0x00000140, -SC_PA3_SC_FPOV_WE = 0x00000141, -SC_PA0_SC_LPOV_WE = 0x00000142, -SC_PA1_SC_LPOV_WE = 0x00000143, -SC_PA2_SC_LPOV_WE = 0x00000144, -SC_PA3_SC_LPOV_WE = 0x00000145, -SC_SC_SPI_DEALLOC_0_0 = 0x00000146, -SC_SC_SPI_DEALLOC_0_1 = 0x00000147, -SC_SC_SPI_DEALLOC_0_2 = 0x00000148, -SC_SC_SPI_DEALLOC_1_0 = 0x00000149, -SC_SC_SPI_DEALLOC_1_1 = 0x0000014a, -SC_SC_SPI_DEALLOC_1_2 = 0x0000014b, -SC_SC_SPI_DEALLOC_2_0 = 0x0000014c, -SC_SC_SPI_DEALLOC_2_1 = 0x0000014d, -SC_SC_SPI_DEALLOC_2_2 = 0x0000014e, -SC_SC_SPI_DEALLOC_3_0 = 0x0000014f, -SC_SC_SPI_DEALLOC_3_1 = 0x00000150, -SC_SC_SPI_DEALLOC_3_2 = 0x00000151, -SC_SC_SPI_FPOV_0 = 0x00000152, -SC_SC_SPI_FPOV_1 = 0x00000153, -SC_SC_SPI_FPOV_2 = 0x00000154, -SC_SC_SPI_FPOV_3 = 0x00000155, -SC_SC_SPI_EVENT = 0x00000156, -SC_PS_TS_EVENT_FIFO_PUSH = 0x00000157, -SC_PS_TS_EVENT_FIFO_POP = 0x00000158, -SC_PS_CTX_DONE_FIFO_PUSH = 0x00000159, -SC_PS_CTX_DONE_FIFO_POP = 0x0000015a, -SC_MULTICYCLE_BUBBLE_FREEZE = 0x0000015b, -SC_EOP_SYNC_WINDOW = 0x0000015c, -SC_PA0_SC_NULL_WE = 0x0000015d, -SC_PA0_SC_NULL_DEALLOC_WE = 0x0000015e, -SC_PA0_SC_DATA_FIFO_EOPG_RD = 0x0000015f, -SC_PA0_SC_DATA_FIFO_EOP_RD = 0x00000160, -SC_PA0_SC_DEALLOC_0_RD = 0x00000161, -SC_PA0_SC_DEALLOC_1_RD = 0x00000162, -SC_PA1_SC_DATA_FIFO_EOPG_RD = 0x00000163, -SC_PA1_SC_DATA_FIFO_EOP_RD = 0x00000164, -SC_PA1_SC_DEALLOC_0_RD = 0x00000165, -SC_PA1_SC_DEALLOC_1_RD = 0x00000166, -SC_PA1_SC_NULL_WE = 0x00000167, -SC_PA1_SC_NULL_DEALLOC_WE = 0x00000168, -SC_PA2_SC_DATA_FIFO_EOPG_RD = 0x00000169, -SC_PA2_SC_DATA_FIFO_EOP_RD = 0x0000016a, -SC_PA2_SC_DEALLOC_0_RD = 0x0000016b, -SC_PA2_SC_DEALLOC_1_RD = 0x0000016c, -SC_PA2_SC_NULL_WE = 0x0000016d, -SC_PA2_SC_NULL_DEALLOC_WE = 0x0000016e, -SC_PA3_SC_DATA_FIFO_EOPG_RD = 0x0000016f, -SC_PA3_SC_DATA_FIFO_EOP_RD = 0x00000170, -SC_PA3_SC_DEALLOC_0_RD = 0x00000171, -SC_PA3_SC_DEALLOC_1_RD = 0x00000172, -SC_PA3_SC_NULL_WE = 0x00000173, -SC_PA3_SC_NULL_DEALLOC_WE = 0x00000174, -SC_PS_PA0_SC_FIFO_EMPTY = 0x00000175, -SC_PS_PA0_SC_FIFO_FULL = 0x00000176, -SC_PA0_PS_DATA_SEND = 0x00000177, -SC_PS_PA1_SC_FIFO_EMPTY = 0x00000178, -SC_PS_PA1_SC_FIFO_FULL = 0x00000179, -SC_PA1_PS_DATA_SEND = 0x0000017a, -SC_PS_PA2_SC_FIFO_EMPTY = 0x0000017b, -SC_PS_PA2_SC_FIFO_FULL = 0x0000017c, -SC_PA2_PS_DATA_SEND = 0x0000017d, -SC_PS_PA3_SC_FIFO_EMPTY = 0x0000017e, -SC_PS_PA3_SC_FIFO_FULL = 0x0000017f, -SC_PA3_PS_DATA_SEND = 0x00000180, -SC_BUSY_PROCESSING_MULTICYCLE_PRIM = 0x00000181, -SC_BUSY_CNT_NOT_ZERO = 0x00000182, -SC_BM_BUSY = 0x00000183, -SC_BACKEND_BUSY = 0x00000184, -SC_SCF_SCB_INTERFACE_BUSY = 0x00000185, -SC_SCB_BUSY = 0x00000186, -SC_STARVED_BY_PA_WITH_UNSELECTED_PA_NOT_EMPTY = 0x00000187, -SC_STARVED_BY_PA_WITH_UNSELECTED_PA_FULL = 0x00000188, -SC_PBB_BIN_HIST_NUM_PRIMS = 0x00000189, -SC_PBB_BATCH_HIST_NUM_PRIMS = 0x0000018a, -SC_PBB_BIN_HIST_NUM_CONTEXTS = 0x0000018b, -SC_PBB_BATCH_HIST_NUM_CONTEXTS = 0x0000018c, -SC_PBB_BIN_HIST_NUM_PERSISTENT_STATES = 0x0000018d, -SC_PBB_BATCH_HIST_NUM_PERSISTENT_STATES = 0x0000018e, -SC_PBB_BATCH_HIST_NUM_PS_WAVE_BREAKS = 0x0000018f, -SC_PBB_BATCH_HIST_NUM_TRIV_REJECTED_PRIMS = 0x00000190, -SC_PBB_BATCH_HIST_NUM_ROWS_PER_PRIM = 0x00000191, -SC_PBB_BATCH_HIST_NUM_COLUMNS_PER_ROW = 0x00000192, -SC_PBB_BUSY = 0x00000193, -SC_PBB_BUSY_AND_RTR = 0x00000194, -SC_PBB_STALLS_PA_DUE_TO_NO_TILES = 0x00000195, -SC_PBB_NUM_BINS = 0x00000196, -SC_PBB_END_OF_BIN = 0x00000197, -SC_PBB_END_OF_BATCH = 0x00000198, -SC_PBB_PRIMBIN_PROCESSED = 0x00000199, -SC_PBB_PRIM_ADDED_TO_BATCH = 0x0000019a, -SC_PBB_NONBINNED_PRIM = 0x0000019b, -SC_PBB_TOTAL_REAL_PRIMS_OUT_OF_PBB = 0x0000019c, -SC_PBB_TOTAL_NULL_PRIMS_OUT_OF_PBB = 0x0000019d, -SC_PBB_IDLE_CLK_DUE_TO_ROW_TO_COLUMN_TRANSITION = 0x0000019e, -SC_PBB_IDLE_CLK_DUE_TO_FALSE_POSITIVE_ON_ROW = 0x0000019f, -SC_PBB_IDLE_CLK_DUE_TO_FALSE_POSITIVE_ON_COLUMN = 0x000001a0, -SC_PBB_BATCH_BREAK_DUE_TO_PERSISTENT_STATE = 0x000001a1, -SC_PBB_BATCH_BREAK_DUE_TO_CONTEXT_STATE = 0x000001a2, -SC_PBB_BATCH_BREAK_DUE_TO_PRIM = 0x000001a3, -SC_PBB_BATCH_BREAK_DUE_TO_PC_STORAGE = 0x000001a4, -SC_PBB_BATCH_BREAK_DUE_TO_EVENT = 0x000001a5, -SC_PBB_BATCH_BREAK_DUE_TO_FPOV_LIMIT = 0x000001a6, -SC_POPS_INTRA_WAVE_OVERLAPS = 0x000001a7, -SC_POPS_FORCE_EOV = 0x000001a8, -SC_PKR_QUAD_OVERLAP_NOT_FOUND_IN_WAVE_TABLE = 0x000001a9, -SC_PKR_QUAD_OVERLAP_FOUND_IN_WAVE_TABLE = 0x000001aa, -} SC_PERFCNT_SEL; - -/* - * SePairXsel enum - */ - -typedef enum SePairXsel { -RASTER_CONFIG_SE_PAIR_XSEL_8_WIDE_TILE = 0x00000000, -RASTER_CONFIG_SE_PAIR_XSEL_16_WIDE_TILE = 0x00000001, -RASTER_CONFIG_SE_PAIR_XSEL_32_WIDE_TILE = 0x00000002, -RASTER_CONFIG_SE_PAIR_XSEL_64_WIDE_TILE = 0x00000003, -RASTER_CONFIG_SE_PAIR_XSEL_128_WIDE_TILE = 0x00000004, -} SePairXsel; - -/* - * SePairYsel enum - */ - -typedef enum SePairYsel { -RASTER_CONFIG_SE_PAIR_YSEL_8_WIDE_TILE = 0x00000000, -RASTER_CONFIG_SE_PAIR_YSEL_16_WIDE_TILE = 0x00000001, -RASTER_CONFIG_SE_PAIR_YSEL_32_WIDE_TILE = 0x00000002, -RASTER_CONFIG_SE_PAIR_YSEL_64_WIDE_TILE = 0x00000003, -RASTER_CONFIG_SE_PAIR_YSEL_128_WIDE_TILE = 0x00000004, -} SePairYsel; - -/* - * SePairMap enum - */ - -typedef enum SePairMap { -RASTER_CONFIG_SE_PAIR_MAP_0 = 0x00000000, -RASTER_CONFIG_SE_PAIR_MAP_1 = 0x00000001, -RASTER_CONFIG_SE_PAIR_MAP_2 = 0x00000002, -RASTER_CONFIG_SE_PAIR_MAP_3 = 0x00000003, -} SePairMap; - -/* - * SeXsel enum - */ - -typedef enum SeXsel { -RASTER_CONFIG_SE_XSEL_8_WIDE_TILE = 0x00000000, -RASTER_CONFIG_SE_XSEL_16_WIDE_TILE = 0x00000001, -RASTER_CONFIG_SE_XSEL_32_WIDE_TILE = 0x00000002, -RASTER_CONFIG_SE_XSEL_64_WIDE_TILE = 0x00000003, -RASTER_CONFIG_SE_XSEL_128_WIDE_TILE = 0x00000004, -} SeXsel; - -/* - * SeYsel enum - */ - -typedef enum SeYsel { -RASTER_CONFIG_SE_YSEL_8_WIDE_TILE = 0x00000000, -RASTER_CONFIG_SE_YSEL_16_WIDE_TILE = 0x00000001, -RASTER_CONFIG_SE_YSEL_32_WIDE_TILE = 0x00000002, -RASTER_CONFIG_SE_YSEL_64_WIDE_TILE = 0x00000003, -RASTER_CONFIG_SE_YSEL_128_WIDE_TILE = 0x00000004, -} SeYsel; - -/* - * SeMap enum - */ - -typedef enum SeMap { -RASTER_CONFIG_SE_MAP_0 = 0x00000000, -RASTER_CONFIG_SE_MAP_1 = 0x00000001, -RASTER_CONFIG_SE_MAP_2 = 0x00000002, -RASTER_CONFIG_SE_MAP_3 = 0x00000003, -} SeMap; - -/* - * ScXsel enum - */ - -typedef enum ScXsel { -RASTER_CONFIG_SC_XSEL_8_WIDE_TILE = 0x00000000, -RASTER_CONFIG_SC_XSEL_16_WIDE_TILE = 0x00000001, -RASTER_CONFIG_SC_XSEL_32_WIDE_TILE = 0x00000002, -RASTER_CONFIG_SC_XSEL_64_WIDE_TILE = 0x00000003, -} ScXsel; - -/* - * ScYsel enum - */ - -typedef enum ScYsel { -RASTER_CONFIG_SC_YSEL_8_WIDE_TILE = 0x00000000, -RASTER_CONFIG_SC_YSEL_16_WIDE_TILE = 0x00000001, -RASTER_CONFIG_SC_YSEL_32_WIDE_TILE = 0x00000002, -RASTER_CONFIG_SC_YSEL_64_WIDE_TILE = 0x00000003, -} ScYsel; - -/* - * ScMap enum - */ - -typedef enum ScMap { -RASTER_CONFIG_SC_MAP_0 = 0x00000000, -RASTER_CONFIG_SC_MAP_1 = 0x00000001, -RASTER_CONFIG_SC_MAP_2 = 0x00000002, -RASTER_CONFIG_SC_MAP_3 = 0x00000003, -} ScMap; - -/* - * PkrXsel2 enum - */ - -typedef enum PkrXsel2 { -RASTER_CONFIG_PKR_XSEL2_0 = 0x00000000, -RASTER_CONFIG_PKR_XSEL2_1 = 0x00000001, -RASTER_CONFIG_PKR_XSEL2_2 = 0x00000002, -RASTER_CONFIG_PKR_XSEL2_3 = 0x00000003, -} PkrXsel2; - -/* - * PkrXsel enum - */ - -typedef enum PkrXsel { -RASTER_CONFIG_PKR_XSEL_0 = 0x00000000, -RASTER_CONFIG_PKR_XSEL_1 = 0x00000001, -RASTER_CONFIG_PKR_XSEL_2 = 0x00000002, -RASTER_CONFIG_PKR_XSEL_3 = 0x00000003, -} PkrXsel; - -/* - * PkrYsel enum - */ - -typedef enum PkrYsel { -RASTER_CONFIG_PKR_YSEL_0 = 0x00000000, -RASTER_CONFIG_PKR_YSEL_1 = 0x00000001, -RASTER_CONFIG_PKR_YSEL_2 = 0x00000002, -RASTER_CONFIG_PKR_YSEL_3 = 0x00000003, -} PkrYsel; - -/* - * PkrMap enum - */ - -typedef enum PkrMap { -RASTER_CONFIG_PKR_MAP_0 = 0x00000000, -RASTER_CONFIG_PKR_MAP_1 = 0x00000001, -RASTER_CONFIG_PKR_MAP_2 = 0x00000002, -RASTER_CONFIG_PKR_MAP_3 = 0x00000003, -} PkrMap; - -/* - * RbXsel enum - */ - -typedef enum RbXsel { -RASTER_CONFIG_RB_XSEL_0 = 0x00000000, -RASTER_CONFIG_RB_XSEL_1 = 0x00000001, -} RbXsel; - -/* - * RbYsel enum - */ - -typedef enum RbYsel { -RASTER_CONFIG_RB_YSEL_0 = 0x00000000, -RASTER_CONFIG_RB_YSEL_1 = 0x00000001, -} RbYsel; - -/* - * RbXsel2 enum - */ - -typedef enum RbXsel2 { -RASTER_CONFIG_RB_XSEL2_0 = 0x00000000, -RASTER_CONFIG_RB_XSEL2_1 = 0x00000001, -RASTER_CONFIG_RB_XSEL2_2 = 0x00000002, -RASTER_CONFIG_RB_XSEL2_3 = 0x00000003, -} RbXsel2; - -/* - * RbMap enum - */ - -typedef enum RbMap { -RASTER_CONFIG_RB_MAP_0 = 0x00000000, -RASTER_CONFIG_RB_MAP_1 = 0x00000001, -RASTER_CONFIG_RB_MAP_2 = 0x00000002, -RASTER_CONFIG_RB_MAP_3 = 0x00000003, -} RbMap; - -/* - * BinningMode enum - */ - -typedef enum BinningMode { -BINNING_ALLOWED = 0x00000000, -FORCE_BINNING_ON = 0x00000001, -DISABLE_BINNING_USE_NEW_SC = 0x00000002, -DISABLE_BINNING_USE_LEGACY_SC = 0x00000003, -} BinningMode; - -/* - * BinEventCntl enum - */ - -typedef enum BinEventCntl { -BINNER_BREAK_BATCH = 0x00000000, -BINNER_PIPELINE = 0x00000001, -BINNER_DROP_ASSERT = 0x00000002, -} BinEventCntl; - -/* - * CovToShaderSel enum - */ - -typedef enum CovToShaderSel { -INPUT_COVERAGE = 0x00000000, -INPUT_INNER_COVERAGE = 0x00000001, -INPUT_DEPTH_COVERAGE = 0x00000002, -RAW = 0x00000003, -} CovToShaderSel; - -/******************************************************* - * RMI Enums - *******************************************************/ - -/* - * RMIPerfSel enum - */ - -typedef enum RMIPerfSel { -RMI_PERF_SEL_NONE = 0x00000000, -RMI_PERF_SEL_BUSY = 0x00000001, -RMI_PERF_SEL_REG_CLK_VLD = 0x00000002, -RMI_PERF_SEL_DYN_CLK_CMN_VLD = 0x00000003, -RMI_PERF_SEL_DYN_CLK_RB_VLD = 0x00000004, -RMI_PERF_SEL_DYN_CLK_PERF_VLD = 0x00000005, -RMI_PERF_SEL_PERF_WINDOW = 0x00000006, -RMI_PERF_SEL_EVENT_SEND = 0x00000007, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID0 = 0x00000008, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID1 = 0x00000009, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID2 = 0x0000000a, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID3 = 0x0000000b, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID4 = 0x0000000c, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID5 = 0x0000000d, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID6 = 0x0000000e, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID7 = 0x0000000f, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID8 = 0x00000010, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID9 = 0x00000011, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID10 = 0x00000012, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID11 = 0x00000013, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID12 = 0x00000014, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID13 = 0x00000015, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID14 = 0x00000016, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID15 = 0x00000017, -RMI_PERF_SEL_RMI_INVALIDATION_ATC_REQ_VMID_ALL = 0x00000018, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID0 = 0x00000019, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID1 = 0x0000001a, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID2 = 0x0000001b, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID3 = 0x0000001c, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID4 = 0x0000001d, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID5 = 0x0000001e, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID6 = 0x0000001f, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID7 = 0x00000020, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID8 = 0x00000021, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID9 = 0x00000022, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID10 = 0x00000023, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID11 = 0x00000024, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID12 = 0x00000025, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID13 = 0x00000026, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID14 = 0x00000027, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID15 = 0x00000028, -RMI_PERF_SEL_RMI_INVALIDATION_REQ_START_FINISH_VMID_ALL = 0x00000029, -RMI_PERF_SEL_UTCL1_TRANSLATION_MISS = 0x0000002a, -RMI_PERF_SEL_UTCL1_PERMISSION_MISS = 0x0000002b, -RMI_PERF_SEL_UTCL1_REQUEST = 0x0000002c, -RMI_PERF_SEL_UTCL1_STALL_INFLIGHT_MAX = 0x0000002d, -RMI_PERF_SEL_UTCL1_STALL_LRU_INFLIGHT = 0x0000002e, -RMI_PERF_SEL_UTCL1_LFIFO_FULL = 0x0000002f, -RMI_PERF_SEL_UTCL1_STALL_LFIFO_NOT_RES = 0x00000030, -RMI_PERF_SEL_UTCL1_STALL_UTCL2_REQ_OUT_OF_CREDITS = 0x00000031, -RMI_PERF_SEL_UTCL1_STALL_MISSFIFO_FULL = 0x00000032, -RMI_PERF_SEL_UTCL1_HIT_FIFO_FULL = 0x00000033, -RMI_PERF_SEL_UTCL1_STALL_MULTI_MISS = 0x00000034, -RMI_PERF_SEL_RB_RMI_WRREQ_ALL_CID = 0x00000035, -RMI_PERF_SEL_RB_RMI_WRREQ_BUSY = 0x00000036, -RMI_PERF_SEL_RB_RMI_WRREQ_CID0 = 0x00000037, -RMI_PERF_SEL_RB_RMI_WRREQ_CID1 = 0x00000038, -RMI_PERF_SEL_RB_RMI_WRREQ_CID2 = 0x00000039, -RMI_PERF_SEL_RB_RMI_WRREQ_CID3 = 0x0000003a, -RMI_PERF_SEL_RB_RMI_WRREQ_CID4 = 0x0000003b, -RMI_PERF_SEL_RB_RMI_WRREQ_CID5 = 0x0000003c, -RMI_PERF_SEL_RB_RMI_WRREQ_CID6 = 0x0000003d, -RMI_PERF_SEL_RB_RMI_WRREQ_CID7 = 0x0000003e, -RMI_PERF_SEL_RB_RMI_WRREQ_INFLIGHT_ALL_ORONE_CID = 0x0000003f, -RMI_PERF_SEL_RB_RMI_WRREQ_BURST_LENGTH_ALL_ORONE_CID = 0x00000040, -RMI_PERF_SEL_RB_RMI_WRREQ_BURST_ALL_ORONE_CID = 0x00000041, -RMI_PERF_SEL_RB_RMI_WRREQ_RESIDENCY = 0x00000042, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_ALL_CID = 0x00000043, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID0 = 0x00000044, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID1 = 0x00000045, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID2 = 0x00000046, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID3 = 0x00000047, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID4 = 0x00000048, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID5 = 0x00000049, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID6 = 0x0000004a, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_CID7 = 0x0000004b, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK0 = 0x0000004c, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK1 = 0x0000004d, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK2 = 0x0000004e, -RMI_PERF_SEL_RMI_RB_WRRET_VALID_NACK3 = 0x0000004f, -RMI_PERF_SEL_RB_RMI_32BRDREQ_ALL_CID = 0x00000050, -RMI_PERF_SEL_RB_RMI_RDREQ_ALL_CID = 0x00000051, -RMI_PERF_SEL_RB_RMI_RDREQ_BUSY = 0x00000052, -RMI_PERF_SEL_RB_RMI_32BRDREQ_CID0 = 0x00000053, -RMI_PERF_SEL_RB_RMI_32BRDREQ_CID1 = 0x00000054, -RMI_PERF_SEL_RB_RMI_32BRDREQ_CID2 = 0x00000055, -RMI_PERF_SEL_RB_RMI_32BRDREQ_CID3 = 0x00000056, -RMI_PERF_SEL_RB_RMI_32BRDREQ_CID4 = 0x00000057, -RMI_PERF_SEL_RB_RMI_32BRDREQ_CID5 = 0x00000058, -RMI_PERF_SEL_RB_RMI_32BRDREQ_CID6 = 0x00000059, -RMI_PERF_SEL_RB_RMI_32BRDREQ_CID7 = 0x0000005a, -RMI_PERF_SEL_RB_RMI_RDREQ_CID0 = 0x0000005b, -RMI_PERF_SEL_RB_RMI_RDREQ_CID1 = 0x0000005c, -RMI_PERF_SEL_RB_RMI_RDREQ_CID2 = 0x0000005d, -RMI_PERF_SEL_RB_RMI_RDREQ_CID3 = 0x0000005e, -RMI_PERF_SEL_RB_RMI_RDREQ_CID4 = 0x0000005f, -RMI_PERF_SEL_RB_RMI_RDREQ_CID5 = 0x00000060, -RMI_PERF_SEL_RB_RMI_RDREQ_CID6 = 0x00000061, -RMI_PERF_SEL_RB_RMI_RDREQ_CID7 = 0x00000062, -RMI_PERF_SEL_RB_RMI_32BRDREQ_INFLIGHT_ALL_ORONE_CID = 0x00000063, -RMI_PERF_SEL_RB_RMI_RDREQ_BURST_LENGTH_ALL_ORONE_CID = 0x00000064, -RMI_PERF_SEL_RB_RMI_RDREQ_BURST_ALL_ORONE_CID = 0x00000065, -RMI_PERF_SEL_RB_RMI_RDREQ_RESIDENCY = 0x00000066, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_ALL_CID = 0x00000067, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID0 = 0x00000068, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID1 = 0x00000069, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID2 = 0x0000006a, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID3 = 0x0000006b, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID4 = 0x0000006c, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID5 = 0x0000006d, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID6 = 0x0000006e, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_CID7 = 0x0000006f, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK0 = 0x00000070, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK1 = 0x00000071, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK2 = 0x00000072, -RMI_PERF_SEL_RMI_RB_32BRDRET_VALID_NACK3 = 0x00000073, -RMI_PERF_SEL_RMI_TC_WRREQ_ALL_CID = 0x00000074, -RMI_PERF_SEL_RMI_TC_REQ_BUSY = 0x00000075, -RMI_PERF_SEL_RMI_TC_WRREQ_CID0 = 0x00000076, -RMI_PERF_SEL_RMI_TC_WRREQ_CID1 = 0x00000077, -RMI_PERF_SEL_RMI_TC_WRREQ_CID2 = 0x00000078, -RMI_PERF_SEL_RMI_TC_WRREQ_CID3 = 0x00000079, -RMI_PERF_SEL_RMI_TC_WRREQ_CID4 = 0x0000007a, -RMI_PERF_SEL_RMI_TC_WRREQ_CID5 = 0x0000007b, -RMI_PERF_SEL_RMI_TC_WRREQ_CID6 = 0x0000007c, -RMI_PERF_SEL_RMI_TC_WRREQ_CID7 = 0x0000007d, -RMI_PERF_SEL_RMI_TC_WRREQ_INFLIGHT_ALL_CID = 0x0000007e, -RMI_PERF_SEL_TC_RMI_WRRET_VALID_ALL_CID = 0x0000007f, -RMI_PERF_SEL_RMI_TC_RDREQ_ALL_CID = 0x00000080, -RMI_PERF_SEL_RMI_TC_RDREQ_CID0 = 0x00000081, -RMI_PERF_SEL_RMI_TC_RDREQ_CID1 = 0x00000082, -RMI_PERF_SEL_RMI_TC_RDREQ_CID2 = 0x00000083, -RMI_PERF_SEL_RMI_TC_RDREQ_CID3 = 0x00000084, -RMI_PERF_SEL_RMI_TC_RDREQ_CID4 = 0x00000085, -RMI_PERF_SEL_RMI_TC_RDREQ_CID5 = 0x00000086, -RMI_PERF_SEL_RMI_TC_RDREQ_CID6 = 0x00000087, -RMI_PERF_SEL_RMI_TC_RDREQ_CID7 = 0x00000088, -RMI_PERF_SEL_RMI_TC_RDREQ_INFLIGHT_ALL_CID = 0x00000089, -RMI_PERF_SEL_TC_RMI_RDRET_VALID_ALL_CID = 0x0000008a, -RMI_PERF_SEL_UTCL1_BUSY = 0x0000008b, -RMI_PERF_SEL_RMI_UTC_REQ = 0x0000008c, -RMI_PERF_SEL_RMI_UTC_BUSY = 0x0000008d, -RMI_PERF_SEL_UTCL1_UTCL2_REQ = 0x0000008e, -RMI_PERF_SEL_PROBE_UTCL1_XNACK_RETRY = 0x0000008f, -RMI_PERF_SEL_PROBE_UTCL1_ALL_FAULT = 0x00000090, -RMI_PERF_SEL_PROBE_UTCL1_PRT_FAULT = 0x00000091, -RMI_PERF_SEL_PROBE_UTCL1_VMID_BYPASS = 0x00000092, -RMI_PERF_SEL_PROBE_UTCL1_XNACK_NORETRY_FAULT = 0x00000093, -RMI_PERF_SEL_XNACK_FIFO_NUM_USED = 0x00000094, -RMI_PERF_SEL_LAT_FIFO_NUM_USED = 0x00000095, -RMI_PERF_SEL_LAT_FIFO_BLOCKING_REQ = 0x00000096, -RMI_PERF_SEL_LAT_FIFO_NONBLOCKING_REQ = 0x00000097, -RMI_PERF_SEL_XNACK_FIFO_FULL = 0x00000098, -RMI_PERF_SEL_XNACK_FIFO_BUSY = 0x00000099, -RMI_PERF_SEL_LAT_FIFO_FULL = 0x0000009a, -RMI_PERF_SEL_SKID_FIFO_DEPTH = 0x0000009b, -RMI_PERF_SEL_TCIW_INFLIGHT_COUNT = 0x0000009c, -RMI_PERF_SEL_PRT_FIFO_NUM_USED = 0x0000009d, -RMI_PERF_SEL_PRT_FIFO_REQ = 0x0000009e, -RMI_PERF_SEL_PRT_FIFO_BUSY = 0x0000009f, -RMI_PERF_SEL_TCIW_REQ = 0x000000a0, -RMI_PERF_SEL_TCIW_BUSY = 0x000000a1, -RMI_PERF_SEL_SKID_FIFO_REQ = 0x000000a2, -RMI_PERF_SEL_SKID_FIFO_BUSY = 0x000000a3, -RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK0 = 0x000000a4, -RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK1 = 0x000000a5, -RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK2 = 0x000000a6, -RMI_PERF_SEL_DEMUX_TCIW_RESIDENCY_NACK3 = 0x000000a7, -RMI_PERF_SEL_XBAR_PROBEGEN_RTS_RTR = 0x000000a8, -RMI_PERF_SEL_XBAR_PROBEGEN_RTSB_RTR = 0x000000a9, -RMI_PERF_SEL_XBAR_PROBEGEN_RTS_RTRB = 0x000000aa, -RMI_PERF_SEL_XBAR_PROBEGEN_RTSB_RTRB = 0x000000ab, -RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTR = 0x000000ac, -RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTR = 0x000000ad, -RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTS_RTRB = 0x000000ae, -RMI_PERF_SEL_DEMUX_TCIW_FORMATTER_RTSB_RTRB = 0x000000af, -RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTS_RTR = 0x000000b0, -RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTSB_RTR = 0x000000b1, -RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTS_RTRB = 0x000000b2, -RMI_PERF_SEL_WRREQCONSUMER_XBAR_WRREQ_RTSB_RTRB = 0x000000b3, -RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTS_RTR = 0x000000b4, -RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTSB_RTR = 0x000000b5, -RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTS_RTRB = 0x000000b6, -RMI_PERF_SEL_RDREQCONSUMER_XBAR_RDREQ_RTSB_RTRB = 0x000000b7, -RMI_PERF_SEL_POP_DEMUX_RTS_RTR = 0x000000b8, -RMI_PERF_SEL_POP_DEMUX_RTSB_RTR = 0x000000b9, -RMI_PERF_SEL_POP_DEMUX_RTS_RTRB = 0x000000ba, -RMI_PERF_SEL_POP_DEMUX_RTSB_RTRB = 0x000000bb, -RMI_PERF_SEL_PROBEGEN_UTC_RTS_RTR = 0x000000bc, -RMI_PERF_SEL_PROBEGEN_UTC_RTSB_RTR = 0x000000bd, -RMI_PERF_SEL_PROBEGEN_UTC_RTS_RTRB = 0x000000be, -RMI_PERF_SEL_PROBEGEN_UTC_RTSB_RTRB = 0x000000bf, -RMI_PERF_SEL_UTC_POP_RTS_RTR = 0x000000c0, -RMI_PERF_SEL_UTC_POP_RTSB_RTR = 0x000000c1, -RMI_PERF_SEL_UTC_POP_RTS_RTRB = 0x000000c2, -RMI_PERF_SEL_UTC_POP_RTSB_RTRB = 0x000000c3, -RMI_PERF_SEL_POP_XNACK_RTS_RTR = 0x000000c4, -RMI_PERF_SEL_POP_XNACK_RTSB_RTR = 0x000000c5, -RMI_PERF_SEL_POP_XNACK_RTS_RTRB = 0x000000c6, -RMI_PERF_SEL_POP_XNACK_RTSB_RTRB = 0x000000c7, -RMI_PERF_SEL_XNACK_PROBEGEN_RTS_RTR = 0x000000c8, -RMI_PERF_SEL_XNACK_PROBEGEN_RTSB_RTR = 0x000000c9, -RMI_PERF_SEL_XNACK_PROBEGEN_RTS_RTRB = 0x000000ca, -RMI_PERF_SEL_XNACK_PROBEGEN_RTSB_RTRB = 0x000000cb, -RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTS_RTR = 0x000000cc, -RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTSB_RTR = 0x000000cd, -RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTS_RTRB = 0x000000ce, -RMI_PERF_SEL_PRTFIFO_RTNFORMATTER_RTSB_RTRB = 0x000000cf, -RMI_PERF_SEL_SKID_FIFO_IN_RTS = 0x000000d0, -RMI_PERF_SEL_SKID_FIFO_IN_RTSB = 0x000000d1, -RMI_PERF_SEL_SKID_FIFO_OUT_RTS = 0x000000d2, -RMI_PERF_SEL_SKID_FIFO_OUT_RTSB = 0x000000d3, -RMI_PERF_SEL_XBAR_PROBEGEN_READ_RTS_RTR = 0x000000d4, -RMI_PERF_SEL_XBAR_PROBEGEN_WRITE_RTS_RTR = 0x000000d5, -RMI_PERF_SEL_XBAR_PROBEGEN_IN0_RTS_RTR = 0x000000d6, -RMI_PERF_SEL_XBAR_PROBEGEN_IN1_RTS_RTR = 0x000000d7, -RMI_PERF_SEL_XBAR_PROBEGEN_CB_RTS_RTR = 0x000000d8, -RMI_PERF_SEL_XBAR_PROBEGEN_DB_RTS_RTR = 0x000000d9, -RMI_PERF_SEL_REORDER_FIFO_REQ = 0x000000da, -RMI_PERF_SEL_REORDER_FIFO_BUSY = 0x000000db, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_ALL_CID = 0x000000dc, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID0 = 0x000000dd, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID1 = 0x000000de, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID2 = 0x000000df, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID3 = 0x000000e0, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID4 = 0x000000e1, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID5 = 0x000000e2, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID6 = 0x000000e3, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_CID7 = 0x000000e4, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK0 = 0x000000e5, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK1 = 0x000000e6, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK2 = 0x000000e7, -RMI_PERF_SEL_RMI_RB_EARLY_WRACK_NACK3 = 0x000000e8, -} RMIPerfSel; - - -#endif /*_vega10_ENUM_HEADER*/ - diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/coord.cpp mesa-19.0.1/src/amd/addrlib/gfx9/coord.cpp --- mesa-18.3.3/src/amd/addrlib/gfx9/coord.cpp 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/gfx9/coord.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,707 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -// Coordinate class implementation -#include "addrcommon.h" -#include "coord.h" - -Coordinate::Coordinate() -{ - dim = 'x'; - ord = 0; -} - -Coordinate::Coordinate(INT_8 c, INT_32 n) -{ - set(c, n); -} - -VOID Coordinate::set(INT_8 c, INT_32 n) -{ - dim = c; - ord = static_cast(n); -} - -UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const -{ - UINT_32 bit = static_cast(1ull << static_cast(ord)); - UINT_32 out = 0; - - switch (dim) - { - case 'm': out = m & bit; break; - case 's': out = s & bit; break; - case 'x': out = x & bit; break; - case 'y': out = y & bit; break; - case 'z': out = z & bit; break; - } - return (out != 0) ? 1 : 0; -} - -INT_8 Coordinate::getdim() -{ - return dim; -} - -INT_8 Coordinate::getord() -{ - return ord; -} - -BOOL_32 Coordinate::operator==(const Coordinate& b) -{ - return (dim == b.dim) && (ord == b.ord); -} - -BOOL_32 Coordinate::operator<(const Coordinate& b) -{ - BOOL_32 ret; - - if (dim == b.dim) - { - ret = ord < b.ord; - } - else - { - if (dim == 's' || b.dim == 'm') - { - ret = TRUE; - } - else if (b.dim == 's' || dim == 'm') - { - ret = FALSE; - } - else if (ord == b.ord) - { - ret = dim < b.dim; - } - else - { - ret = ord < b.ord; - } - } - - return ret; -} - -BOOL_32 Coordinate::operator>(const Coordinate& b) -{ - BOOL_32 lt = *this < b; - BOOL_32 eq = *this == b; - return !lt && !eq; -} - -BOOL_32 Coordinate::operator<=(const Coordinate& b) -{ - return (*this < b) || (*this == b); -} - -BOOL_32 Coordinate::operator>=(const Coordinate& b) -{ - return !(*this < b); -} - -BOOL_32 Coordinate::operator!=(const Coordinate& b) -{ - return !(*this == b); -} - -Coordinate& Coordinate::operator++(INT_32) -{ - ord++; - return *this; -} - -// CoordTerm - -CoordTerm::CoordTerm() -{ - num_coords = 0; -} - -VOID CoordTerm::Clear() -{ - num_coords = 0; -} - -VOID CoordTerm::add(Coordinate& co) -{ - // This function adds a coordinate INT_32o the list - // It will prevent the same coordinate from appearing, - // and will keep the list ordered from smallest to largest - UINT_32 i; - - for (i = 0; i < num_coords; i++) - { - if (m_coord[i] == co) - { - break; - } - if (m_coord[i] > co) - { - for (UINT_32 j = num_coords; j > i; j--) - { - m_coord[j] = m_coord[j - 1]; - } - m_coord[i] = co; - num_coords++; - break; - } - } - - if (i == num_coords) - { - m_coord[num_coords] = co; - num_coords++; - } -} - -VOID CoordTerm::add(CoordTerm& cl) -{ - for (UINT_32 i = 0; i < cl.num_coords; i++) - { - add(cl.m_coord[i]); - } -} - -BOOL_32 CoordTerm::remove(Coordinate& co) -{ - BOOL_32 remove = FALSE; - for (UINT_32 i = 0; i < num_coords; i++) - { - if (m_coord[i] == co) - { - remove = TRUE; - num_coords--; - } - - if (remove) - { - m_coord[i] = m_coord[i + 1]; - } - } - return remove; -} - -BOOL_32 CoordTerm::Exists(Coordinate& co) -{ - BOOL_32 exists = FALSE; - for (UINT_32 i = 0; i < num_coords; i++) - { - if (m_coord[i] == co) - { - exists = TRUE; - break; - } - } - return exists; -} - -VOID CoordTerm::copyto(CoordTerm& cl) -{ - cl.num_coords = num_coords; - for (UINT_32 i = 0; i < num_coords; i++) - { - cl.m_coord[i] = m_coord[i]; - } -} - -UINT_32 CoordTerm::getsize() -{ - return num_coords; -} - -UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const -{ - UINT_32 out = 0; - for (UINT_32 i = 0; i < num_coords; i++) - { - out = out ^ m_coord[i].ison(x, y, z, s, m); - } - return out; -} - -VOID CoordTerm::getsmallest(Coordinate& co) -{ - co = m_coord[0]; -} - -UINT_32 CoordTerm::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis) -{ - for (UINT_32 i = start; i < num_coords;) - { - if (((f == '<' && m_coord[i] < co) || - (f == '>' && m_coord[i] > co) || - (f == '=' && m_coord[i] == co)) && - (axis == '\0' || axis == m_coord[i].getdim())) - { - for (UINT_32 j = i; j < num_coords - 1; j++) - { - m_coord[j] = m_coord[j + 1]; - } - num_coords--; - } - else - { - i++; - } - } - return num_coords; -} - -Coordinate& CoordTerm::operator[](UINT_32 i) -{ - return m_coord[i]; -} - -BOOL_32 CoordTerm::operator==(const CoordTerm& b) -{ - BOOL_32 ret = TRUE; - - if (num_coords != b.num_coords) - { - ret = FALSE; - } - else - { - for (UINT_32 i = 0; i < num_coords; i++) - { - // Note: the lists will always be in order, so we can compare the two lists at time - if (m_coord[i] != b.m_coord[i]) - { - ret = FALSE; - break; - } - } - } - return ret; -} - -BOOL_32 CoordTerm::operator!=(const CoordTerm& b) -{ - return !(*this == b); -} - -BOOL_32 CoordTerm::exceedRange(UINT_32 xRange, UINT_32 yRange, UINT_32 zRange, UINT_32 sRange) -{ - BOOL_32 exceed = FALSE; - for (UINT_32 i = 0; (i < num_coords) && (exceed == FALSE); i++) - { - UINT_32 subject; - switch (m_coord[i].getdim()) - { - case 'x': - subject = xRange; - break; - case 'y': - subject = yRange; - break; - case 'z': - subject = zRange; - break; - case 's': - subject = sRange; - break; - case 'm': - subject = 0; - break; - default: - // Invalid input! - ADDR_ASSERT_ALWAYS(); - subject = 0; - break; - } - - exceed = ((1u << m_coord[i].getord()) <= subject); - } - - return exceed; -} - -// coordeq -CoordEq::CoordEq() -{ - m_numBits = 0; -} - -VOID CoordEq::remove(Coordinate& co) -{ - for (UINT_32 i = 0; i < m_numBits; i++) - { - m_eq[i].remove(co); - } -} - -BOOL_32 CoordEq::Exists(Coordinate& co) -{ - BOOL_32 exists = FALSE; - - for (UINT_32 i = 0; i < m_numBits; i++) - { - if (m_eq[i].Exists(co)) - { - exists = TRUE; - } - } - return exists; -} - -VOID CoordEq::resize(UINT_32 n) -{ - if (n > m_numBits) - { - for (UINT_32 i = m_numBits; i < n; i++) - { - m_eq[i].Clear(); - } - } - m_numBits = n; -} - -UINT_32 CoordEq::getsize() -{ - return m_numBits; -} - -UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const -{ - UINT_64 out = 0; - for (UINT_32 i = 0; i < m_numBits; i++) - { - if (m_eq[i].getxor(x, y, z, s, m) != 0) - { - out |= (1ULL << i); - } - } - return out; -} - -VOID CoordEq::solveAddr( - UINT_64 addr, UINT_32 sliceInM, - UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const -{ - UINT_32 xBitsValid = 0; - UINT_32 yBitsValid = 0; - UINT_32 zBitsValid = 0; - UINT_32 sBitsValid = 0; - UINT_32 mBitsValid = 0; - - CoordEq temp = *this; - - x = y = z = s = m = 0; - - UINT_32 bitsLeft = 0; - - for (UINT_32 i = 0; i < temp.m_numBits; i++) - { - UINT_32 termSize = temp.m_eq[i].getsize(); - - if (termSize == 1) - { - INT_8 bit = (addr >> i) & 1; - INT_8 dim = temp.m_eq[i][0].getdim(); - INT_8 ord = temp.m_eq[i][0].getord(); - - ADDR_ASSERT((ord < 32) || (bit == 0)); - - switch (dim) - { - case 'x': - xBitsValid |= (1 << ord); - x |= (bit << ord); - break; - case 'y': - yBitsValid |= (1 << ord); - y |= (bit << ord); - break; - case 'z': - zBitsValid |= (1 << ord); - z |= (bit << ord); - break; - case 's': - sBitsValid |= (1 << ord); - s |= (bit << ord); - break; - case 'm': - mBitsValid |= (1 << ord); - m |= (bit << ord); - break; - default: - break; - } - - temp.m_eq[i].Clear(); - } - else if (termSize > 1) - { - bitsLeft++; - } - } - - if (bitsLeft > 0) - { - if (sliceInM != 0) - { - z = m / sliceInM; - zBitsValid = 0xffffffff; - } - - do - { - bitsLeft = 0; - - for (UINT_32 i = 0; i < temp.m_numBits; i++) - { - UINT_32 termSize = temp.m_eq[i].getsize(); - - if (termSize == 1) - { - INT_8 bit = (addr >> i) & 1; - INT_8 dim = temp.m_eq[i][0].getdim(); - INT_8 ord = temp.m_eq[i][0].getord(); - - ADDR_ASSERT((ord < 32) || (bit == 0)); - - switch (dim) - { - case 'x': - xBitsValid |= (1 << ord); - x |= (bit << ord); - break; - case 'y': - yBitsValid |= (1 << ord); - y |= (bit << ord); - break; - case 'z': - zBitsValid |= (1 << ord); - z |= (bit << ord); - break; - case 's': - ADDR_ASSERT_ALWAYS(); - break; - case 'm': - ADDR_ASSERT_ALWAYS(); - break; - default: - break; - } - - temp.m_eq[i].Clear(); - } - else if (termSize > 1) - { - CoordTerm tmpTerm = temp.m_eq[i]; - - for (UINT_32 j = 0; j < termSize; j++) - { - INT_8 dim = temp.m_eq[i][j].getdim(); - INT_8 ord = temp.m_eq[i][j].getord(); - - switch (dim) - { - case 'x': - if (xBitsValid & (1 << ord)) - { - UINT_32 v = (((x >> ord) & 1) << i); - addr ^= static_cast(v); - tmpTerm.remove(temp.m_eq[i][j]); - } - break; - case 'y': - if (yBitsValid & (1 << ord)) - { - UINT_32 v = (((y >> ord) & 1) << i); - addr ^= static_cast(v); - tmpTerm.remove(temp.m_eq[i][j]); - } - break; - case 'z': - if (zBitsValid & (1 << ord)) - { - UINT_32 v = (((z >> ord) & 1) << i); - addr ^= static_cast(v); - tmpTerm.remove(temp.m_eq[i][j]); - } - break; - case 's': - ADDR_ASSERT_ALWAYS(); - break; - case 'm': - ADDR_ASSERT_ALWAYS(); - break; - default: - break; - } - } - - temp.m_eq[i] = tmpTerm; - - bitsLeft++; - } - } - } while (bitsLeft > 0); - } -} - -VOID CoordEq::copy(CoordEq& o, UINT_32 start, UINT_32 num) -{ - o.m_numBits = (num == 0xFFFFFFFF) ? m_numBits : num; - for (UINT_32 i = 0; i < o.m_numBits; i++) - { - m_eq[start + i].copyto(o.m_eq[i]); - } -} - -VOID CoordEq::reverse(UINT_32 start, UINT_32 num) -{ - UINT_32 n = (num == 0xFFFFFFFF) ? m_numBits : num; - - for (UINT_32 i = 0; i < n / 2; i++) - { - CoordTerm temp; - m_eq[start + i].copyto(temp); - m_eq[start + n - 1 - i].copyto(m_eq[start + i]); - temp.copyto(m_eq[start + n - 1 - i]); - } -} - -VOID CoordEq::xorin(CoordEq& x, UINT_32 start) -{ - UINT_32 n = ((m_numBits - start) < x.m_numBits) ? (m_numBits - start) : x.m_numBits; - for (UINT_32 i = 0; i < n; i++) - { - m_eq[start + i].add(x.m_eq[i]); - } -} - -UINT_32 CoordEq::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis) -{ - for (UINT_32 i = start; i < m_numBits;) - { - UINT_32 m = m_eq[i].Filter(f, co, 0, axis); - if (m == 0) - { - for (UINT_32 j = i; j < m_numBits - 1; j++) - { - m_eq[j] = m_eq[j + 1]; - } - m_numBits--; - } - else - { - i++; - } - } - return m_numBits; -} - -VOID CoordEq::shift(INT_32 amount, INT_32 start) -{ - if (amount != 0) - { - INT_32 numBits = static_cast(m_numBits); - amount = -amount; - INT_32 inc = (amount < 0) ? -1 : 1; - INT_32 i = (amount < 0) ? numBits - 1 : start; - INT_32 end = (amount < 0) ? start - 1 : numBits; - for (; (inc > 0) ? i < end : i > end; i += inc) - { - if ((i + amount < start) || (i + amount >= numBits)) - { - m_eq[i].Clear(); - } - else - { - m_eq[i + amount].copyto(m_eq[i]); - } - } - } -} - -CoordTerm& CoordEq::operator[](UINT_32 i) -{ - return m_eq[i]; -} - -VOID CoordEq::mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start, UINT_32 end) -{ - if (end == 0) - { - ADDR_ASSERT(m_numBits > 0); - end = m_numBits - 1; - } - for (UINT_32 i = start; i <= end; i++) - { - UINT_32 select = (i - start) % 2; - Coordinate& c = (select == 0) ? c0 : c1; - m_eq[i].add(c); - c++; - } -} - -VOID CoordEq::mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start, UINT_32 end) -{ - if (end == 0) - { - ADDR_ASSERT(m_numBits > 0); - end = m_numBits - 1; - } - for (UINT_32 i = start; i <= end; i++) - { - UINT_32 select = (i - start) % 3; - Coordinate& c = (select == 0) ? c0 : ((select == 1) ? c1 : c2); - m_eq[i].add(c); - c++; - } -} - -BOOL_32 CoordEq::operator==(const CoordEq& b) -{ - BOOL_32 ret = TRUE; - - if (m_numBits != b.m_numBits) - { - ret = FALSE; - } - else - { - for (UINT_32 i = 0; i < m_numBits; i++) - { - if (m_eq[i] != b.m_eq[i]) - { - ret = FALSE; - break; - } - } - } - return ret; -} - -BOOL_32 CoordEq::operator!=(const CoordEq& b) -{ - return !(*this == b); -} - diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/coord.h mesa-19.0.1/src/amd/addrlib/gfx9/coord.h --- mesa-18.3.3/src/amd/addrlib/gfx9/coord.h 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/gfx9/coord.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,114 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -// Class used to define a coordinate bit - -#ifndef __COORD_H -#define __COORD_H - -class Coordinate -{ -public: - Coordinate(); - Coordinate(INT_8 c, INT_32 n); - - VOID set(INT_8 c, INT_32 n); - UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const; - INT_8 getdim(); - INT_8 getord(); - - BOOL_32 operator==(const Coordinate& b); - BOOL_32 operator<(const Coordinate& b); - BOOL_32 operator>(const Coordinate& b); - BOOL_32 operator<=(const Coordinate& b); - BOOL_32 operator>=(const Coordinate& b); - BOOL_32 operator!=(const Coordinate& b); - Coordinate& operator++(INT_32); - -private: - INT_8 dim; - INT_8 ord; -}; - -class CoordTerm -{ -public: - CoordTerm(); - VOID Clear(); - VOID add(Coordinate& co); - VOID add(CoordTerm& cl); - BOOL_32 remove(Coordinate& co); - BOOL_32 Exists(Coordinate& co); - VOID copyto(CoordTerm& cl); - UINT_32 getsize(); - UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const; - - VOID getsmallest(Coordinate& co); - UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0'); - Coordinate& operator[](UINT_32 i); - BOOL_32 operator==(const CoordTerm& b); - BOOL_32 operator!=(const CoordTerm& b); - BOOL_32 exceedRange(UINT_32 xRange, UINT_32 yRange = 0, UINT_32 zRange = 0, UINT_32 sRange = 0); - -private: - static const UINT_32 MaxCoords = 8; - UINT_32 num_coords; - Coordinate m_coord[MaxCoords]; -}; - -class CoordEq -{ -public: - CoordEq(); - VOID remove(Coordinate& co); - BOOL_32 Exists(Coordinate& co); - VOID resize(UINT_32 n); - UINT_32 getsize(); - virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const; - virtual VOID solveAddr(UINT_64 addr, UINT_32 sliceInM, - UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const; - - VOID copy(CoordEq& o, UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF); - VOID reverse(UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF); - VOID xorin(CoordEq& x, UINT_32 start = 0); - UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0'); - VOID shift(INT_32 amount, INT_32 start = 0); - virtual CoordTerm& operator[](UINT_32 i); - VOID mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start = 0, UINT_32 end = 0); - VOID mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start = 0, UINT_32 end = 0); - - BOOL_32 operator==(const CoordEq& b); - BOOL_32 operator!=(const CoordEq& b); - -private: - static const UINT_32 MaxEqBits = 64; - UINT_32 m_numBits; - - CoordTerm m_eq[MaxEqBits]; -}; - -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/gfx9addrlib.cpp mesa-19.0.1/src/amd/addrlib/gfx9/gfx9addrlib.cpp --- mesa-18.3.3/src/amd/addrlib/gfx9/gfx9addrlib.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/gfx9/gfx9addrlib.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,5016 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -************************************************************************************************************************ -* @file gfx9addrlib.cpp -* @brief Contgfx9ns the implementation for the Gfx9Lib class. -************************************************************************************************************************ -*/ - -#include "gfx9addrlib.h" - -#include "gfx9_gb_reg.h" - -#include "amdgpu_asic_addr.h" - -#include "util/macros.h" - -//////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////// - -namespace Addr -{ - -/** -************************************************************************************************************************ -* Gfx9HwlInit -* -* @brief -* Creates an Gfx9Lib object. -* -* @return -* Returns an Gfx9Lib object pointer. -************************************************************************************************************************ -*/ -Addr::Lib* Gfx9HwlInit(const Client* pClient) -{ - return V2::Gfx9Lib::CreateObj(pClient); -} - -namespace V2 -{ - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Static Const Member -//////////////////////////////////////////////////////////////////////////////////////////////////// - -const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] = -{//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR - {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S - {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D - {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R - - {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z - {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S - {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D - {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R - - {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z - {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S - {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D - {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R - - {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z - {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S - {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D - {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R - - {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T - {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T - {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T - {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T - - {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x - {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x - {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x - {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x - - {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X - {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X - {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X - {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X - - {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X - {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X - {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X - {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X - {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL -}; - -const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, - 8, 6, 5, 4, 3, 2, 1, 0}; - -const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}}; - -const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}}; - -/** -************************************************************************************************************************ -* Gfx9Lib::Gfx9Lib -* -* @brief -* Constructor -* -************************************************************************************************************************ -*/ -Gfx9Lib::Gfx9Lib(const Client* pClient) - : - Lib(pClient), - m_numEquations(0) -{ - m_class = AI_ADDRLIB; - memset(&m_settings, 0, sizeof(m_settings)); - memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable)); -} - -/** -************************************************************************************************************************ -* Gfx9Lib::~Gfx9Lib -* -* @brief -* Destructor -************************************************************************************************************************ -*/ -Gfx9Lib::~Gfx9Lib() -{ -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeHtileInfo -* -* @brief -* Interface function stub of AddrComputeHtilenfo -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( - const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned, - pIn->swizzleMode); - - UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1; - - UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2; - - if ((numPipeTotal == 1) && (numRbTotal == 1)) - { - numCompressBlkPerMetaBlkLog2 = 10; - } - else - { - if (m_settings.applyAliasFix) - { - numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2); - } - else - { - numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; - } - } - - numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; - - Dim3d metaBlkDim = {8, 8, 1}; - UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2; - UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits); - UINT_32 heightAmp = totalAmpBits - widthAmp; - metaBlkDim.w <<= widthAmp; - metaBlkDim.h <<= heightAmp; - -#if DEBUG - Dim3d metaBlkDimDbg = {8, 8, 1}; - for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++) - { - if ((metaBlkDimDbg.h < metaBlkDimDbg.w) || - ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w))) - { - metaBlkDimDbg.h <<= 1; - } - else - { - metaBlkDimDbg.w <<= 1; - } - } - ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h)); -#endif - - UINT_32 numMetaBlkX; - UINT_32 numMetaBlkY; - UINT_32 numMetaBlkZ; - - GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo, - pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices, - &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); - - const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2; - UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; - - if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2)) - { - align *= (numPipeTotal >> 1); - } - - align = Max(align, metaBlkSize); - - if (m_settings.metaBaseAlignFix) - { - align = Max(align, GetBlockSize(pIn->swizzleMode)); - } - - if (m_settings.htileAlignFix) - { - const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2; - const INT_32 htileCachelineSizeLog2 = 11; - const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal); - - INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits)); - - align <<= rbMaskPadding; - } - - pOut->pitch = numMetaBlkX * metaBlkDim.w; - pOut->height = numMetaBlkY * metaBlkDim.h; - pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize; - - pOut->metaBlkWidth = metaBlkDim.w; - pOut->metaBlkHeight = metaBlkDim.h; - pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; - - pOut->baseAlign = align; - pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align); - - return ADDR_OK; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeCmaskInfo -* -* @brief -* Interface function stub of AddrComputeCmaskInfo -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo( - const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ -// TODO: Clarify with AddrLib team -// ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D); - - UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned, - pIn->swizzleMode); - - UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1; - - UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk; - - if ((numPipeTotal == 1) && (numRbTotal == 1)) - { - numCompressBlkPerMetaBlkLog2 = 13; - } - else - { - if (m_settings.applyAliasFix) - { - numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2); - } - else - { - numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; - } - - numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u); - } - - numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; - - Dim2d metaBlkDim = {8, 8}; - UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2; - UINT_32 heightAmp = totalAmpBits >> 1; - UINT_32 widthAmp = totalAmpBits - heightAmp; - metaBlkDim.w <<= widthAmp; - metaBlkDim.h <<= heightAmp; - -#if DEBUG - Dim2d metaBlkDimDbg = {8, 8}; - for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++) - { - if (metaBlkDimDbg.h < metaBlkDimDbg.w) - { - metaBlkDimDbg.h <<= 1; - } - else - { - metaBlkDimDbg.w <<= 1; - } - } - ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h)); -#endif - - UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w; - UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h; - UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u); - - UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; - - if (m_settings.metaBaseAlignFix) - { - sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); - } - - pOut->pitch = numMetaBlkX * metaBlkDim.w; - pOut->height = numMetaBlkY * metaBlkDim.h; - pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1; - pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); - pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign); - - pOut->metaBlkWidth = metaBlkDim.w; - pOut->metaBlkHeight = metaBlkDim.h; - - pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; - - return ADDR_OK; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::GetMetaMipInfo -* -* @brief -* Get meta mip info -* -* @return -* N/A -************************************************************************************************************************ -*/ -VOID Gfx9Lib::GetMetaMipInfo( - UINT_32 numMipLevels, ///< [in] number of mip levels - Dim3d* pMetaBlkDim, ///< [in] meta block dimension - BOOL_32 dataThick, ///< [in] data surface is thick - ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info - UINT_32 mip0Width, ///< [in] mip0 width - UINT_32 mip0Height, ///< [in] mip0 height - UINT_32 mip0Depth, ///< [in] mip0 depth - UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain - UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain - UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain - const -{ - UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w; - UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h; - UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d; - UINT_32 tailWidth = pMetaBlkDim->w; - UINT_32 tailHeight = pMetaBlkDim->h >> 1; - UINT_32 tailDepth = pMetaBlkDim->d; - BOOL_32 inTail = FALSE; - AddrMajorMode major = ADDR_MAJOR_MAX_TYPE; - - if (numMipLevels > 1) - { - if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY)) - { - // Z major - major = ADDR_MAJOR_Z; - } - else if (numMetaBlkX >= numMetaBlkY) - { - // X major - major = ADDR_MAJOR_X; - } - else - { - // Y major - major = ADDR_MAJOR_Y; - } - - inTail = ((mip0Width <= tailWidth) && - (mip0Height <= tailHeight) && - ((dataThick == FALSE) || (mip0Depth <= tailDepth))); - - if (inTail == FALSE) - { - UINT_32 orderLimit; - UINT_32 *pMipDim; - UINT_32 *pOrderDim; - - if (major == ADDR_MAJOR_Z) - { - // Z major - pMipDim = &numMetaBlkY; - pOrderDim = &numMetaBlkZ; - orderLimit = 4; - } - else if (major == ADDR_MAJOR_X) - { - // X major - pMipDim = &numMetaBlkY; - pOrderDim = &numMetaBlkX; - orderLimit = 4; - } - else - { - // Y major - pMipDim = &numMetaBlkX; - pOrderDim = &numMetaBlkY; - orderLimit = 2; - } - - if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3)) - { - *pMipDim += 2; - } - else - { - *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1)); - } - } - } - - if (pInfo != NULL) - { - UINT_32 mipWidth = mip0Width; - UINT_32 mipHeight = mip0Height; - UINT_32 mipDepth = mip0Depth; - Dim3d mipCoord = {0}; - - for (UINT_32 mip = 0; mip < numMipLevels; mip++) - { - if (inTail) - { - GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip, - pMetaBlkDim); - break; - } - else - { - mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w); - mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h); - mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d); - - pInfo[mip].inMiptail = FALSE; - pInfo[mip].startX = mipCoord.w; - pInfo[mip].startY = mipCoord.h; - pInfo[mip].startZ = mipCoord.d; - pInfo[mip].width = mipWidth; - pInfo[mip].height = mipHeight; - pInfo[mip].depth = dataThick ? mipDepth : 1; - - if ((mip >= 3) || (mip & 1)) - { - switch (major) - { - case ADDR_MAJOR_X: - mipCoord.w += mipWidth; - break; - case ADDR_MAJOR_Y: - mipCoord.h += mipHeight; - break; - case ADDR_MAJOR_Z: - mipCoord.d += mipDepth; - break; - default: - break; - } - } - else - { - switch (major) - { - case ADDR_MAJOR_X: - mipCoord.h += mipHeight; - break; - case ADDR_MAJOR_Y: - mipCoord.w += mipWidth; - break; - case ADDR_MAJOR_Z: - mipCoord.h += mipHeight; - break; - default: - break; - } - } - - mipWidth = Max(mipWidth >> 1, 1u); - mipHeight = Max(mipHeight >> 1, 1u); - mipDepth = Max(mipDepth >> 1, 1u); - - inTail = ((mipWidth <= tailWidth) && - (mipHeight <= tailHeight) && - ((dataThick == FALSE) || (mipDepth <= tailDepth))); - } - } - } - - *pNumMetaBlkX = numMetaBlkX; - *pNumMetaBlkY = numMetaBlkY; - *pNumMetaBlkZ = numMetaBlkZ; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeDccInfo -* -* @brief -* Interface function to compute DCC key info -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( - const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - BOOL_32 dataLinear = IsLinear(pIn->swizzleMode); - BOOL_32 metaLinear = pIn->dccKeyFlags.linear; - BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned; - - if (dataLinear) - { - metaLinear = TRUE; - } - else if (metaLinear == TRUE) - { - pipeAligned = FALSE; - } - - UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode); - - if (metaLinear) - { - // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9. - ADDR_ASSERT_ALWAYS(); - - pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes; - pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign); - } - else - { - BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode); - - UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096; - - UINT_32 numFrags = Max(pIn->numFrags, 1u); - UINT_32 numSlices = Max(pIn->numSlices, 1u); - - minMetaBlkSize /= numFrags; - - UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize; - - UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1; - - if ((numPipeTotal > 1) || (numRbTotal > 1)) - { - const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10); - - numCompressBlkPerMetaBlk = - Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize)); - - if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp) - { - numCompressBlkPerMetaBlk = 65536 * pIn->bpp; - } - } - - Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp); - Dim3d metaBlkDim = compressBlkDim; - - for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1) - { - if ((metaBlkDim.h < metaBlkDim.w) || - ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w))) - { - if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d)) - { - metaBlkDim.h <<= 1; - } - else - { - metaBlkDim.d <<= 1; - } - } - else - { - if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d)) - { - metaBlkDim.w <<= 1; - } - else - { - metaBlkDim.d <<= 1; - } - } - } - - UINT_32 numMetaBlkX; - UINT_32 numMetaBlkY; - UINT_32 numMetaBlkZ; - - GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo, - pIn->unalignedWidth, pIn->unalignedHeight, numSlices, - &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); - - UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; - - if (numFrags > m_maxCompFrag) - { - sizeAlign *= (numFrags / m_maxCompFrag); - } - - if (m_settings.metaBaseAlignFix) - { - sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); - } - - pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ * - numCompressBlkPerMetaBlk * numFrags; - pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign); - pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign); - - pOut->pitch = numMetaBlkX * metaBlkDim.w; - pOut->height = numMetaBlkY * metaBlkDim.h; - pOut->depth = numMetaBlkZ * metaBlkDim.d; - - pOut->compressBlkWidth = compressBlkDim.w; - pOut->compressBlkHeight = compressBlkDim.h; - pOut->compressBlkDepth = compressBlkDim.d; - - pOut->metaBlkWidth = metaBlkDim.w; - pOut->metaBlkHeight = metaBlkDim.h; - pOut->metaBlkDepth = metaBlkDim.d; - - pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; - pOut->fastClearSizePerSlice = - pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag); - } - - return ADDR_OK; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeMaxBaseAlignments -* -* @brief -* Gets maximum alignments -* @return -* maximum alignments -************************************************************************************************************************ -*/ -UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const -{ - return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB); -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeMaxMetaBaseAlignments -* -* @brief -* Gets maximum alignments for metadata -* @return -* maximum alignments for metadata -************************************************************************************************************************ -*/ -UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const -{ - // Max base alignment for Htile - const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z); - const UINT_32 maxNumRbTotal = m_se * m_rbPerSe; - - // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2), - // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic. - ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u)); - const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u); - - UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes; - - if (maxNumPipeTotal > 2) - { - maxBaseAlignHtile *= (maxNumPipeTotal >> 1); - } - - maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile); - - if (m_settings.metaBaseAlignFix) - { - maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB)); - } - - if (m_settings.htileAlignFix) - { - maxBaseAlignHtile *= maxNumPipeTotal; - } - - // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate - - // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate - UINT_32 maxBaseAlignDcc3D = 65536; - - if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1)) - { - maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u); - } - - // Max base alignment for Msaa Dcc - UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag); - - if (m_settings.metaBaseAlignFix) - { - maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB)); - } - - return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D)); -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeCmaskAddrFromCoord -* -* @brief -* Interface function stub of AddrComputeCmaskAddrFromCoord -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure -{ - ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0}; - input.size = sizeof(input); - input.cMaskFlags = pIn->cMaskFlags; - input.colorFlags = pIn->colorFlags; - input.unalignedWidth = Max(pIn->unalignedWidth, 1u); - input.unalignedHeight = Max(pIn->unalignedHeight, 1u); - input.numSlices = Max(pIn->numSlices, 1u); - input.swizzleMode = pIn->swizzleMode; - input.resourceType = pIn->resourceType; - - ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0}; - output.size = sizeof(output); - - ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output); - - if (returnCode == ADDR_OK) - { - UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); - UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3); - UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); - UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - - MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, - Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; - - const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); - - UINT_32 xb = pIn->x / output.metaBlkWidth; - UINT_32 yb = pIn->y / output.metaBlkHeight; - UINT_32 zb = pIn->slice; - - UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; - UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; - UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; - - UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); - - pOut->addr = address >> 1; - pOut->bitPosition = static_cast((address & 1) << 2); - - - UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned, - pIn->swizzleMode); - - UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); - - pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeHtileAddrFromCoord -* -* @brief -* Interface function stub of AddrComputeHtileAddrFromCoord -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pIn->numMipLevels > 1) - { - returnCode = ADDR_NOTIMPLEMENTED; - } - else - { - ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0}; - input.size = sizeof(input); - input.hTileFlags = pIn->hTileFlags; - input.depthFlags = pIn->depthflags; - input.swizzleMode = pIn->swizzleMode; - input.unalignedWidth = Max(pIn->unalignedWidth, 1u); - input.unalignedHeight = Max(pIn->unalignedHeight, 1u); - input.numSlices = Max(pIn->numSlices, 1u); - input.numMipLevels = Max(pIn->numMipLevels, 1u); - - ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0}; - output.size = sizeof(output); - - returnCode = ComputeHtileInfo(&input, &output); - - if (returnCode == ADDR_OK) - { - UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); - UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); - UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - - MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; - - const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); - - UINT_32 xb = pIn->x / output.metaBlkWidth; - UINT_32 yb = pIn->y / output.metaBlkHeight; - UINT_32 zb = pIn->slice; - - UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; - UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; - UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; - - UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); - - pOut->addr = address >> 1; - - UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, - pIn->swizzleMode); - - UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); - - pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); - } - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeHtileCoordFromAddr -* -* @brief -* Interface function stub of AddrComputeHtileCoordFromAddr -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (pIn->numMipLevels > 1) - { - returnCode = ADDR_NOTIMPLEMENTED; - } - else - { - ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0}; - input.size = sizeof(input); - input.hTileFlags = pIn->hTileFlags; - input.swizzleMode = pIn->swizzleMode; - input.unalignedWidth = Max(pIn->unalignedWidth, 1u); - input.unalignedHeight = Max(pIn->unalignedHeight, 1u); - input.numSlices = Max(pIn->numSlices, 1u); - input.numMipLevels = Max(pIn->numMipLevels, 1u); - - ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0}; - output.size = sizeof(output); - - returnCode = ComputeHtileInfo(&input, &output); - - if (returnCode == ADDR_OK) - { - UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); - UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); - UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - UINT_32 numSamplesLog2 = Log2(pIn->numSamples); - - MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, - Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, - metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; - - const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); - - UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, - pIn->swizzleMode); - - UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); - - UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1; - - UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; - UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; - - UINT_32 x, y, z, s, m; - pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m); - - pOut->slice = m / sliceSizeInBlock; - pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y; - pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x; - } - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeDccAddrFromCoord -* -* @brief -* Interface function stub of AddrComputeDccAddrFromCoord -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear) - { - returnCode = ADDR_NOTIMPLEMENTED; - } - else - { - ADDR2_COMPUTE_DCCINFO_INPUT input = {0}; - input.size = sizeof(input); - input.dccKeyFlags = pIn->dccKeyFlags; - input.colorFlags = pIn->colorFlags; - input.swizzleMode = pIn->swizzleMode; - input.resourceType = pIn->resourceType; - input.bpp = pIn->bpp; - input.unalignedWidth = Max(pIn->unalignedWidth, 1u); - input.unalignedHeight = Max(pIn->unalignedHeight, 1u); - input.numSlices = Max(pIn->numSlices, 1u); - input.numFrags = Max(pIn->numFrags, 1u); - input.numMipLevels = Max(pIn->numMipLevels, 1u); - - ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0}; - output.size = sizeof(output); - - returnCode = ComputeDccInfo(&input, &output); - - if (returnCode == ADDR_OK) - { - UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); - UINT_32 numSamplesLog2 = Log2(pIn->numFrags); - UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); - UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); - UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth); - UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth); - UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight); - UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth); - - MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, - Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, - metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, - compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}; - - const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); - - UINT_32 xb = pIn->x / output.metaBlkWidth; - UINT_32 yb = pIn->y / output.metaBlkHeight; - UINT_32 zb = pIn->slice / output.metaBlkDepth; - - UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; - UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; - UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; - - UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex); - - pOut->addr = address >> 1; - - UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned, - pIn->swizzleMode); - - UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); - - pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); - } - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlInitGlobalParams -* -* @brief -* Initializes global parameters -* -* @return -* TRUE if all settings are valid -* -************************************************************************************************************************ -*/ -BOOL_32 Gfx9Lib::HwlInitGlobalParams( - const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input -{ - BOOL_32 valid = TRUE; - - if (m_settings.isArcticIsland) - { - GB_ADDR_CONFIG gbAddrConfig; - - gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig; - - // These values are copied from CModel code - switch (gbAddrConfig.bits.NUM_PIPES) - { - case ADDR_CONFIG_1_PIPE: - m_pipes = 1; - m_pipesLog2 = 0; - break; - case ADDR_CONFIG_2_PIPE: - m_pipes = 2; - m_pipesLog2 = 1; - break; - case ADDR_CONFIG_4_PIPE: - m_pipes = 4; - m_pipesLog2 = 2; - break; - case ADDR_CONFIG_8_PIPE: - m_pipes = 8; - m_pipesLog2 = 3; - break; - case ADDR_CONFIG_16_PIPE: - m_pipes = 16; - m_pipesLog2 = 4; - break; - case ADDR_CONFIG_32_PIPE: - m_pipes = 32; - m_pipesLog2 = 5; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE) - { - case ADDR_CONFIG_PIPE_INTERLEAVE_256B: - m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B; - m_pipeInterleaveLog2 = 8; - break; - case ADDR_CONFIG_PIPE_INTERLEAVE_512B: - m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B; - m_pipeInterleaveLog2 = 9; - break; - case ADDR_CONFIG_PIPE_INTERLEAVE_1KB: - m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB; - m_pipeInterleaveLog2 = 10; - break; - case ADDR_CONFIG_PIPE_INTERLEAVE_2KB: - m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB; - m_pipeInterleaveLog2 = 11; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, - // and any larger value requires a post-process (left shift) on the output pipeBankXor bits. - ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B); - - switch (gbAddrConfig.bits.NUM_BANKS) - { - case ADDR_CONFIG_1_BANK: - m_banks = 1; - m_banksLog2 = 0; - break; - case ADDR_CONFIG_2_BANK: - m_banks = 2; - m_banksLog2 = 1; - break; - case ADDR_CONFIG_4_BANK: - m_banks = 4; - m_banksLog2 = 2; - break; - case ADDR_CONFIG_8_BANK: - m_banks = 8; - m_banksLog2 = 3; - break; - case ADDR_CONFIG_16_BANK: - m_banks = 16; - m_banksLog2 = 4; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - switch (gbAddrConfig.bits.NUM_SHADER_ENGINES) - { - case ADDR_CONFIG_1_SHADER_ENGINE: - m_se = 1; - m_seLog2 = 0; - break; - case ADDR_CONFIG_2_SHADER_ENGINE: - m_se = 2; - m_seLog2 = 1; - break; - case ADDR_CONFIG_4_SHADER_ENGINE: - m_se = 4; - m_seLog2 = 2; - break; - case ADDR_CONFIG_8_SHADER_ENGINE: - m_se = 8; - m_seLog2 = 3; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - switch (gbAddrConfig.bits.NUM_RB_PER_SE) - { - case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE: - m_rbPerSe = 1; - m_rbPerSeLog2 = 0; - break; - case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE: - m_rbPerSe = 2; - m_rbPerSeLog2 = 1; - break; - case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE: - m_rbPerSe = 4; - m_rbPerSeLog2 = 2; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS) - { - case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS: - m_maxCompFrag = 1; - m_maxCompFragLog2 = 0; - break; - case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS: - m_maxCompFrag = 2; - m_maxCompFragLog2 = 1; - break; - case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS: - m_maxCompFrag = 4; - m_maxCompFragLog2 = 2; - break; - case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS: - m_maxCompFrag = 8; - m_maxCompFragLog2 = 3; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2; - ADDR_ASSERT((m_blockVarSizeLog2 == 0) || - ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u))); - m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u); - - if ((m_rbPerSeLog2 == 1) && - (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) || - ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2))))) - { - ADDR_ASSERT(m_settings.isVega10 == FALSE); - ADDR_ASSERT(m_settings.isRaven == FALSE); - ADDR_ASSERT(m_settings.isVega20 == FALSE); - - if (m_settings.isVega12) - { - m_settings.htileCacheRbConflict = 1; - } - } - } - else - { - valid = FALSE; - ADDR_NOT_IMPLEMENTED(); - } - - if (valid) - { - InitEquationTable(); - } - - return valid; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlConvertChipFamily -* -* @brief -* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision -* @return -* ChipFamily -************************************************************************************************************************ -*/ -ChipFamily Gfx9Lib::HwlConvertChipFamily( - UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h - UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h -{ - ChipFamily family = ADDR_CHIP_FAMILY_AI; - - switch (uChipFamily) - { - case FAMILY_AI: - m_settings.isArcticIsland = 1; - m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision); - m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision); - m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision); - m_settings.isDce12 = 1; - - if (m_settings.isVega10 == 0) - { - m_settings.htileAlignFix = 1; - m_settings.applyAliasFix = 1; - } - - m_settings.metaBaseAlignFix = 1; - - m_settings.depthPipeXorDisable = 1; - break; - case FAMILY_RV: - m_settings.isArcticIsland = 1; - m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision) || ASICREV_IS_RAVEN2(uChipRevision); - - if (m_settings.isRaven) - { - m_settings.isDcn1 = 1; - } - - m_settings.metaBaseAlignFix = 1; - - if (ASICREV_IS_RAVEN(uChipRevision)) - { - m_settings.depthPipeXorDisable = 1; - } - break; - - default: - ADDR_ASSERT(!"This should be a Fusion"); - break; - } - - return family; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::InitRbEquation -* -* @brief -* Init RB equation -* @return -* N/A -************************************************************************************************************************ -*/ -VOID Gfx9Lib::GetRbEquation( - CoordEq* pRbEq, ///< [out] rb equation - UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine - UINT_32 numSeLog2) ///< [in] number of shader engine - const -{ - // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32 - UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4; - Coordinate cx('x', rbRegion); - Coordinate cy('y', rbRegion); - - UINT_32 start = 0; - UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2; - - // Clear the rb equation - pRbEq->resize(0); - pRbEq->resize(numRbTotalLog2); - - if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1)) - { - // Special case when more than 1 SE, and 2 RB per SE - (*pRbEq)[0].add(cx); - (*pRbEq)[0].add(cy); - cx++; - cy++; - - if (m_settings.applyAliasFix == false) - { - (*pRbEq)[0].add(cy); - } - - (*pRbEq)[0].add(cy); - start++; - } - - UINT_32 numBits = 2 * (numRbTotalLog2 - start); - - for (UINT_32 i = 0; i < numBits; i++) - { - UINT_32 idx = - start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i); - - if ((i % 2) == 1) - { - (*pRbEq)[idx].add(cx); - cx++; - } - else - { - (*pRbEq)[idx].add(cy); - cy++; - } - } -} - -/** -************************************************************************************************************************ -* Gfx9Lib::GetDataEquation -* -* @brief -* Get data equation for fmask and Z -* @return -* N/A -************************************************************************************************************************ -*/ -VOID Gfx9Lib::GetDataEquation( - CoordEq* pDataEq, ///< [out] data surface equation - Gfx9DataType dataSurfaceType, ///< [in] data surface type - AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode - AddrResourceType resourceType, ///< [in] data surface resource type - UINT_32 elementBytesLog2, ///< [in] data surface element bytes - UINT_32 numSamplesLog2) ///< [in] data surface sample count - const -{ - Coordinate cx('x', 0); - Coordinate cy('y', 0); - Coordinate cz('z', 0); - Coordinate cs('s', 0); - - // Clear the equation - pDataEq->resize(0); - pDataEq->resize(27); - - if (dataSurfaceType == Gfx9DataColor) - { - if (IsLinear(swizzleMode)) - { - Coordinate cm('m', 0); - - pDataEq->resize(49); - - for (UINT_32 i = 0; i < 49; i++) - { - (*pDataEq)[i].add(cm); - cm++; - } - } - else if (IsThick(resourceType, swizzleMode)) - { - // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d - UINT_32 i; - if (IsStandardSwizzle(resourceType, swizzleMode)) - { - // Standard 3d swizzle - // Fill in bottom x bits - for (i = elementBytesLog2; i < 4; i++) - { - (*pDataEq)[i].add(cx); - cx++; - } - // Fill in 2 bits of y and then z - for (i = 4; i < 6; i++) - { - (*pDataEq)[i].add(cy); - cy++; - } - for (i = 6; i < 8; i++) - { - (*pDataEq)[i].add(cz); - cz++; - } - if (elementBytesLog2 < 2) - { - // fill in z & y bit - (*pDataEq)[8].add(cz); - (*pDataEq)[9].add(cy); - cz++; - cy++; - } - else if (elementBytesLog2 == 2) - { - // fill in y and x bit - (*pDataEq)[8].add(cy); - (*pDataEq)[9].add(cx); - cy++; - cx++; - } - else - { - // fill in 2 x bits - (*pDataEq)[8].add(cx); - cx++; - (*pDataEq)[9].add(cx); - cx++; - } - } - else - { - // Z 3d swizzle - UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5); - UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ? - 2 : ((elementBytesLog2 == 1) ? 3 : 1); - pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd); - for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++) - { - (*pDataEq)[i].add(cz); - cz++; - } - if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3)) - { - // add an x and z - (*pDataEq)[6].add(cx); - (*pDataEq)[7].add(cz); - cx++; - cz++; - } - else if (elementBytesLog2 == 2) - { - // add a y and z - (*pDataEq)[6].add(cy); - (*pDataEq)[7].add(cz); - cy++; - cz++; - } - // add y and x - (*pDataEq)[8].add(cy); - (*pDataEq)[9].add(cx); - cy++; - cx++; - } - // Fill in bit 10 and up - pDataEq->mort3d( cz, cy, cx, 10 ); - } - else if (IsThin(resourceType, swizzleMode)) - { - UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode); - // Color 2D - UINT_32 microYBits = (8 - elementBytesLog2) / 2; - UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2; - UINT_32 i; - // Fill in bottom x bits - for (i = elementBytesLog2; i < 4; i++) - { - (*pDataEq)[i].add(cx); - cx++; - } - // Fill in bottom y bits - for (i = 4; i < 4 + microYBits; i++) - { - (*pDataEq)[i].add(cy); - cy++; - } - // Fill in last of the micro_x bits - for (i = 4 + microYBits; i < 8; i++) - { - (*pDataEq)[i].add(cx); - cx++; - } - // Fill in x/y bits below sample split - pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1); - // Fill in sample bits - for (i = 0; i < numSamplesLog2; i++) - { - cs.set('s', i); - (*pDataEq)[tileSplitStart + i].add(cs); - } - // Fill in x/y bits above sample split - if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1)) - { - pDataEq->mort2d(cx, cy, blockSizeLog2); - } - else - { - pDataEq->mort2d(cy, cx, blockSizeLog2); - } - } - else - { - ADDR_ASSERT_ALWAYS(); - } - } - else - { - // Fmask or depth - UINT_32 sampleStart = elementBytesLog2; - UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2; - UINT_32 ymajStart = 6 + numSamplesLog2; - - for (UINT_32 s = 0; s < numSamplesLog2; s++) - { - cs.set('s', s); - (*pDataEq)[sampleStart + s].add(cs); - } - - // Put in the x-major order pixel bits - pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1); - // Put in the y-major order pixel bits - pDataEq->mort2d(cy, cx, ymajStart); - } -} - -/** -************************************************************************************************************************ -* Gfx9Lib::GetPipeEquation -* -* @brief -* Get pipe equation -* @return -* N/A -************************************************************************************************************************ -*/ -VOID Gfx9Lib::GetPipeEquation( - CoordEq* pPipeEq, ///< [out] pipe equation - CoordEq* pDataEq, ///< [in] data equation - UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave - UINT_32 numPipeLog2, ///< [in] number of pipes - UINT_32 numSamplesLog2, ///< [in] data surface sample count - Gfx9DataType dataSurfaceType, ///< [in] data surface type - AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode - AddrResourceType resourceType ///< [in] data surface resource type - ) const -{ - UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode); - CoordEq dataEq; - - pDataEq->copy(dataEq); - - if (dataSurfaceType == Gfx9DataColor) - { - INT_32 shift = static_cast(numSamplesLog2); - dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2); - } - - dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2); - - // This section should only apply to z/stencil, maybe fmask - // If the pipe bit is below the comp block size, - // then keep moving up the address until we find a bit that is above - UINT_32 pipeStart = 0; - - if (dataSurfaceType != Gfx9DataColor) - { - Coordinate tileMin('x', 3); - - while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin) - { - pipeStart++; - } - - // if pipe is 0, then the first pipe bit is above the comp block size, - // so we don't need to do anything - // Note, this if condition is not necessary, since if we execute the loop when pipe==0, - // we will get the same pipe equation - if (pipeStart != 0) - { - for (UINT_32 i = 0; i < numPipeLog2; i++) - { - // Copy the jth bit above pipe interleave to the current pipe equation bit - dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]); - } - } - } - - if (IsPrt(swizzleMode)) - { - // Clear out bits above the block size if prt's are enabled - dataEq.resize(blockSizeLog2); - dataEq.resize(48); - } - - if (IsXor(swizzleMode)) - { - CoordEq xorMask; - - if (IsThick(resourceType, swizzleMode)) - { - CoordEq xorMask2; - - dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2); - - xorMask.resize(numPipeLog2); - - for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++) - { - xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]); - xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]); - } - } - else - { - // Xor in the bits above the pipe+gpu bits - dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2); - - if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE)) - { - Coordinate co; - CoordEq xorMask2; - // if 1xaa and not prt, then xor in the z bits - xorMask2.resize(0); - xorMask2.resize(numPipeLog2); - for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++) - { - co.set('z', numPipeLog2 - 1 - pipeIdx); - xorMask2[pipeIdx].add(co); - } - - pPipeEq->xorin(xorMask2); - } - } - - xorMask.reverse(); - pPipeEq->xorin(xorMask); - } -} -/** -************************************************************************************************************************ -* Gfx9Lib::GetMetaEquation -* -* @brief -* Get meta equation for cmask/htile/DCC -* @return -* Pointer to a calculated meta equation -************************************************************************************************************************ -*/ -const CoordEq* Gfx9Lib::GetMetaEquation( - const MetaEqParams& metaEqParams) -{ - UINT_32 cachedMetaEqIndex; - - for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++) - { - if (memcmp(&metaEqParams, - &m_cachedMetaEqKey[cachedMetaEqIndex], - static_cast(sizeof(metaEqParams))) == 0) - { - break; - } - } - - CoordEq* pMetaEq = NULL; - - if (cachedMetaEqIndex < MaxCachedMetaEq) - { - pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex]; - } - else - { - m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams; - - pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++]; - - m_metaEqOverrideIndex %= MaxCachedMetaEq; - - GenMetaEquation(pMetaEq, - metaEqParams.maxMip, - metaEqParams.elementBytesLog2, - metaEqParams.numSamplesLog2, - metaEqParams.metaFlag, - metaEqParams.dataSurfaceType, - metaEqParams.swizzleMode, - metaEqParams.resourceType, - metaEqParams.metaBlkWidthLog2, - metaEqParams.metaBlkHeightLog2, - metaEqParams.metaBlkDepthLog2, - metaEqParams.compBlkWidthLog2, - metaEqParams.compBlkHeightLog2, - metaEqParams.compBlkDepthLog2); - } - - return pMetaEq; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::GenMetaEquation -* -* @brief -* Get meta equation for cmask/htile/DCC -* @return -* N/A -************************************************************************************************************************ -*/ -VOID Gfx9Lib::GenMetaEquation( - CoordEq* pMetaEq, ///< [out] meta equation - UINT_32 maxMip, ///< [in] max mip Id - UINT_32 elementBytesLog2, ///< [in] data surface element bytes - UINT_32 numSamplesLog2, ///< [in] data surface sample count - ADDR2_META_FLAGS metaFlag, ///< [in] meta falg - Gfx9DataType dataSurfaceType, ///< [in] data surface type - AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode - AddrResourceType resourceType, ///< [in] data surface resource type - UINT_32 metaBlkWidthLog2, ///< [in] meta block width - UINT_32 metaBlkHeightLog2, ///< [in] meta block height - UINT_32 metaBlkDepthLog2, ///< [in] meta block depth - UINT_32 compBlkWidthLog2, ///< [in] compress block width - UINT_32 compBlkHeightLog2, ///< [in] compress block height - UINT_32 compBlkDepthLog2) ///< [in] compress block depth - const -{ - UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode); - UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2; - - // Get the correct data address and rb equation - CoordEq dataEq; - GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType, - elementBytesLog2, numSamplesLog2); - - // Get pipe and rb equations - CoordEq pipeEquation; - GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2, - numSamplesLog2, dataSurfaceType, swizzleMode, resourceType); - numPipeTotalLog2 = pipeEquation.getsize(); - - if (metaFlag.linear) - { - // Linear metadata supporting was removed for GFX9! No one can use this feature. - ADDR_ASSERT_ALWAYS(); - - ADDR_ASSERT(dataSurfaceType == Gfx9DataColor); - - dataEq.copy(*pMetaEq); - - if (IsLinear(swizzleMode)) - { - if (metaFlag.pipeAligned) - { - // Remove the pipe bits - INT_32 shift = static_cast(numPipeTotalLog2); - pMetaEq->shift(-shift, pipeInterleaveLog2); - } - // Divide by comp block size, which for linear (which is always color) is 256 B - pMetaEq->shift(-8); - - if (metaFlag.pipeAligned) - { - // Put pipe bits back in - pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2); - - for (UINT_32 i = 0; i < numPipeTotalLog2; i++) - { - pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]); - } - } - } - - pMetaEq->shift(1); - } - else - { - UINT_32 maxCompFragLog2 = static_cast(m_maxCompFragLog2); - UINT_32 compFragLog2 = - ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ? - maxCompFragLog2 : numSamplesLog2; - - UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2; - - // Make sure the metaaddr is cleared - pMetaEq->resize(0); - pMetaEq->resize(27); - - if (IsThick(resourceType, swizzleMode)) - { - Coordinate cx('x', 0); - Coordinate cy('y', 0); - Coordinate cz('z', 0); - - if (maxMip > 0) - { - pMetaEq->mort3d(cy, cx, cz); - } - else - { - pMetaEq->mort3d(cx, cy, cz); - } - } - else - { - Coordinate cx('x', 0); - Coordinate cy('y', 0); - Coordinate cs; - - if (maxMip > 0) - { - pMetaEq->mort2d(cy, cx, compFragLog2); - } - else - { - pMetaEq->mort2d(cx, cy, compFragLog2); - } - - //------------------------------------------------------------------------------------------------------------------------ - // Put the compressible fragments at the lsb - // the uncompressible frags will be at the msb of the micro address - //------------------------------------------------------------------------------------------------------------------------ - for (UINT_32 s = 0; s < compFragLog2; s++) - { - cs.set('s', s); - (*pMetaEq)[s].add(cs); - } - } - - // Keep a copy of the pipe equations - CoordEq origPipeEquation; - pipeEquation.copy(origPipeEquation); - - Coordinate co; - // filter out everything under the compressed block size - co.set('x', compBlkWidthLog2); - pMetaEq->Filter('<', co, 0, 'x'); - co.set('y', compBlkHeightLog2); - pMetaEq->Filter('<', co, 0, 'y'); - co.set('z', compBlkDepthLog2); - pMetaEq->Filter('<', co, 0, 'z'); - - // For non-color, filter out sample bits - if (dataSurfaceType != Gfx9DataColor) - { - co.set('x', 0); - pMetaEq->Filter('<', co, 0, 's'); - } - - // filter out everything above the metablock size - co.set('x', metaBlkWidthLog2 - 1); - pMetaEq->Filter('>', co, 0, 'x'); - co.set('y', metaBlkHeightLog2 - 1); - pMetaEq->Filter('>', co, 0, 'y'); - co.set('z', metaBlkDepthLog2 - 1); - pMetaEq->Filter('>', co, 0, 'z'); - - // filter out everything above the metablock size for the channel bits - co.set('x', metaBlkWidthLog2 - 1); - pipeEquation.Filter('>', co, 0, 'x'); - co.set('y', metaBlkHeightLog2 - 1); - pipeEquation.Filter('>', co, 0, 'y'); - co.set('z', metaBlkDepthLog2 - 1); - pipeEquation.Filter('>', co, 0, 'z'); - - // Make sure we still have the same number of channel bits - if (pipeEquation.getsize() != numPipeTotalLog2) - { - ADDR_ASSERT_ALWAYS(); - } - - // Loop through all channel and rb bits, - // and make sure these components exist in the metadata address - for (UINT_32 i = 0; i < numPipeTotalLog2; i++) - { - for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--) - { - if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE) - { - ADDR_ASSERT_ALWAYS(); - } - } - } - - const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0; - const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0; - const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2; - CoordEq origRbEquation; - - GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2); - - CoordEq rbEquation = origRbEquation; - - for (UINT_32 i = 0; i < numRbTotalLog2; i++) - { - for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--) - { - if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE) - { - ADDR_ASSERT_ALWAYS(); - } - } - } - - if (m_settings.applyAliasFix) - { - co.set('z', -1); - } - - // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it - for (UINT_32 i = 0; i < numRbTotalLog2; i++) - { - for (UINT_32 j = 0; j < numPipeTotalLog2; j++) - { - BOOL_32 isRbEquationInPipeEquation = FALSE; - - if (m_settings.applyAliasFix) - { - CoordTerm filteredPipeEq; - filteredPipeEq = pipeEquation[j]; - - filteredPipeEq.Filter('>', co, 0, 'z'); - - isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq); - } - else - { - isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]); - } - - if (isRbEquationInPipeEquation) - { - rbEquation[i].Clear(); - } - } - } - - bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {}; - - // Loop through each bit of the channel, get the smallest coordinate, - // and remove it from the metaaddr, and rb_equation - for (UINT_32 i = 0; i < numPipeTotalLog2; i++) - { - pipeEquation[i].getsmallest(co); - - UINT_32 old_size = pMetaEq->getsize(); - pMetaEq->Filter('=', co); - UINT_32 new_size = pMetaEq->getsize(); - if (new_size != old_size-1) - { - ADDR_ASSERT_ALWAYS(); - } - pipeEquation.remove(co); - for (UINT_32 j = 0; j < numRbTotalLog2; j++) - { - if (rbEquation[j].remove(co)) - { - // if we actually removed something from this bit, then add the remaining - // channel bits, as these can be removed for this bit - for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++) - { - if (pipeEquation[i][k] != co) - { - rbEquation[j].add(pipeEquation[i][k]); - rbAppendedWithPipeBits[j] = true; - } - } - } - } - } - - // Loop through the rb bits and see what remain; - // filter out the smallest coordinate if it remains - UINT_32 rbBitsLeft = 0; - for (UINT_32 i = 0; i < numRbTotalLog2; i++) - { - BOOL_32 isRbEqAppended = FALSE; - - if (m_settings.applyAliasFix) - { - isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0)); - } - else - { - isRbEqAppended = (rbEquation[i].getsize() > 0); - } - - if (isRbEqAppended) - { - rbBitsLeft++; - rbEquation[i].getsmallest(co); - UINT_32 old_size = pMetaEq->getsize(); - pMetaEq->Filter('=', co); - UINT_32 new_size = pMetaEq->getsize(); - if (new_size != old_size - 1) - { - // assert warning - } - for (UINT_32 j = i + 1; j < numRbTotalLog2; j++) - { - if (rbEquation[j].remove(co)) - { - // if we actually removed something from this bit, then add the remaining - // rb bits, as these can be removed for this bit - for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++) - { - if (rbEquation[i][k] != co) - { - rbEquation[j].add(rbEquation[i][k]); - rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i]; - } - } - } - } - } - } - - // capture the size of the metaaddr - UINT_32 metaSize = pMetaEq->getsize(); - // resize to 49 bits...make this a nibble address - pMetaEq->resize(49); - // Concatenate the macro address above the current address - for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++) - { - co.set('m', j); - (*pMetaEq)[i].add(co); - } - - // Multiply by meta element size (in nibbles) - if (dataSurfaceType == Gfx9DataColor) - { - pMetaEq->shift(1); - } - else if (dataSurfaceType == Gfx9DataDepthStencil) - { - pMetaEq->shift(3); - } - - //------------------------------------------------------------------------------------------ - // Note the pipeInterleaveLog2+1 is because address is a nibble address - // Shift up from pipe interleave number of channel - // and rb bits left, and uncompressed fragments - //------------------------------------------------------------------------------------------ - - pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1); - - // Put in the channel bits - for (UINT_32 i = 0; i < numPipeTotalLog2; i++) - { - origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]); - } - - // Put in remaining rb bits - for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2) - { - BOOL_32 isRbEqAppended = FALSE; - - if (m_settings.applyAliasFix) - { - isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0)); - } - else - { - isRbEqAppended = (rbEquation[i].getsize() > 0); - } - - if (isRbEqAppended) - { - origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]); - // Mark any rb bit we add in to the rb mask - j++; - } - } - - //------------------------------------------------------------------------------------------ - // Put in the uncompressed fragment bits - //------------------------------------------------------------------------------------------ - for (UINT_32 i = 0; i < uncompFragLog2; i++) - { - co.set('s', compFragLog2 + i); - (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co); - } - } -} - -/** -************************************************************************************************************************ -* Gfx9Lib::IsEquationSupported -* -* @brief -* Check if equation is supported for given swizzle mode and resource type. -* -* @return -* TRUE if supported -************************************************************************************************************************ -*/ -BOOL_32 Gfx9Lib::IsEquationSupported( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2) const -{ - BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) && - (IsLinear(swMode) == FALSE) && - (((IsTex2d(rsrcType) == TRUE) && - ((elementBytesLog2 < 4) || - ((IsRotateSwizzle(swMode) == FALSE) && - (IsZOrderSwizzle(swMode) == FALSE)))) || - ((IsTex3d(rsrcType) == TRUE) && - (IsRotateSwizzle(swMode) == FALSE) && - (IsBlock256b(swMode) == FALSE))); - - return supported; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::InitEquationTable -* -* @brief -* Initialize Equation table. -* -* @return -* N/A -************************************************************************************************************************ -*/ -VOID Gfx9Lib::InitEquationTable() -{ - memset(m_equationTable, 0, sizeof(m_equationTable)); - - // Loop all possible resource type (2D/3D) - for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++) - { - AddrResourceType rsrcType = static_cast(rsrcTypeIdx + ADDR_RSRC_TEX_2D); - - // Loop all possible swizzle mode - for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++) - { - AddrSwizzleMode swMode = static_cast(swModeIdx); - - // Loop all possible bpp - for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++) - { - UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; - - // Check if the input is supported - if (IsEquationSupported(rsrcType, swMode, bppIdx)) - { - ADDR_EQUATION equation; - ADDR_E_RETURNCODE retCode; - - memset(&equation, 0, sizeof(ADDR_EQUATION)); - - // Generate the equation - if (IsBlock256b(swMode) && IsTex2d(rsrcType)) - { - retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation); - } - else if (IsThin(rsrcType, swMode)) - { - retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation); - } - else - { - retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation); - } - - // Only fill the equation into the table if the return code is ADDR_OK, - // otherwise if the return code is not ADDR_OK, it indicates this is not - // a valid input, we do nothing but just fill invalid equation index - // into the lookup table. - if (retCode == ADDR_OK) - { - equationIndex = m_numEquations; - ADDR_ASSERT(equationIndex < EquationTableSize); - - m_equationTable[equationIndex] = equation; - - m_numEquations++; - } - else - { - ADDR_ASSERT_ALWAYS(); - } - } - - // Fill the index into the lookup table, if the combination is not supported - // fill the invalid equation index - m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex; - } - } - } -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlGetEquationIndex -* -* @brief -* Interface function stub of GetEquationIndex -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -UINT_32 Gfx9Lib::HwlGetEquationIndex( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut - ) const -{ - AddrResourceType rsrcType = pIn->resourceType; - AddrSwizzleMode swMode = pIn->swizzleMode; - UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); - UINT_32 index = ADDR_INVALID_EQUATION_INDEX; - - if (IsEquationSupported(rsrcType, swMode, elementBytesLog2)) - { - UINT_32 rsrcTypeIdx = static_cast(rsrcType) - 1; - UINT_32 swModeIdx = static_cast(swMode); - - index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2]; - } - - if (pOut->pMipInfo != NULL) - { - for (UINT_32 i = 0; i < pIn->numMipLevels; i++) - { - pOut->pMipInfo[i].equationIndex = index; - } - } - - return index; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeBlock256Equation -* -* @brief -* Interface function stub of ComputeBlock256Equation -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const -{ - ADDR_E_RETURNCODE ret = ADDR_OK; - - pEquation->numBits = 8; - - UINT_32 i = 0; - for (; i < elementBytesLog2; i++) - { - InitChannel(1, 0 , i, &pEquation->addr[i]); - } - - ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2]; - - const UINT_32 maxBitsUsed = 4; - ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; - ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; - - for (i = 0; i < maxBitsUsed; i++) - { - InitChannel(1, 0, elementBytesLog2 + i, &x[i]); - InitChannel(1, 1, i, &y[i]); - } - - if (IsStandardSwizzle(rsrcType, swMode)) - { - switch (elementBytesLog2) - { - case 0: - pixelBit[0] = x[0]; - pixelBit[1] = x[1]; - pixelBit[2] = x[2]; - pixelBit[3] = x[3]; - pixelBit[4] = y[0]; - pixelBit[5] = y[1]; - pixelBit[6] = y[2]; - pixelBit[7] = y[3]; - break; - case 1: - pixelBit[0] = x[0]; - pixelBit[1] = x[1]; - pixelBit[2] = x[2]; - pixelBit[3] = y[0]; - pixelBit[4] = y[1]; - pixelBit[5] = y[2]; - pixelBit[6] = x[3]; - break; - case 2: - pixelBit[0] = x[0]; - pixelBit[1] = x[1]; - pixelBit[2] = y[0]; - pixelBit[3] = y[1]; - pixelBit[4] = y[2]; - pixelBit[5] = x[2]; - break; - case 3: - pixelBit[0] = x[0]; - pixelBit[1] = y[0]; - pixelBit[2] = y[1]; - pixelBit[3] = x[1]; - pixelBit[4] = x[2]; - break; - case 4: - pixelBit[0] = y[0]; - pixelBit[1] = y[1]; - pixelBit[2] = x[0]; - pixelBit[3] = x[1]; - break; - default: - ADDR_ASSERT_ALWAYS(); - ret = ADDR_INVALIDPARAMS; - break; - } - } - else if (IsDisplaySwizzle(rsrcType, swMode)) - { - switch (elementBytesLog2) - { - case 0: - pixelBit[0] = x[0]; - pixelBit[1] = x[1]; - pixelBit[2] = x[2]; - pixelBit[3] = y[1]; - pixelBit[4] = y[0]; - pixelBit[5] = y[2]; - pixelBit[6] = x[3]; - pixelBit[7] = y[3]; - break; - case 1: - pixelBit[0] = x[0]; - pixelBit[1] = x[1]; - pixelBit[2] = x[2]; - pixelBit[3] = y[0]; - pixelBit[4] = y[1]; - pixelBit[5] = y[2]; - pixelBit[6] = x[3]; - break; - case 2: - pixelBit[0] = x[0]; - pixelBit[1] = x[1]; - pixelBit[2] = y[0]; - pixelBit[3] = x[2]; - pixelBit[4] = y[1]; - pixelBit[5] = y[2]; - break; - case 3: - pixelBit[0] = x[0]; - pixelBit[1] = y[0]; - pixelBit[2] = x[1]; - pixelBit[3] = x[2]; - pixelBit[4] = y[1]; - break; - case 4: - pixelBit[0] = x[0]; - pixelBit[1] = y[0]; - pixelBit[2] = x[1]; - pixelBit[3] = y[1]; - break; - default: - ADDR_ASSERT_ALWAYS(); - ret = ADDR_INVALIDPARAMS; - break; - } - } - else if (IsRotateSwizzle(swMode)) - { - switch (elementBytesLog2) - { - case 0: - pixelBit[0] = y[0]; - pixelBit[1] = y[1]; - pixelBit[2] = y[2]; - pixelBit[3] = x[1]; - pixelBit[4] = x[0]; - pixelBit[5] = x[2]; - pixelBit[6] = x[3]; - pixelBit[7] = y[3]; - break; - case 1: - pixelBit[0] = y[0]; - pixelBit[1] = y[1]; - pixelBit[2] = y[2]; - pixelBit[3] = x[0]; - pixelBit[4] = x[1]; - pixelBit[5] = x[2]; - pixelBit[6] = x[3]; - break; - case 2: - pixelBit[0] = y[0]; - pixelBit[1] = y[1]; - pixelBit[2] = x[0]; - pixelBit[3] = y[2]; - pixelBit[4] = x[1]; - pixelBit[5] = x[2]; - break; - case 3: - pixelBit[0] = y[0]; - pixelBit[1] = x[0]; - pixelBit[2] = y[1]; - pixelBit[3] = x[1]; - pixelBit[4] = x[2]; - break; - default: - ADDR_ASSERT_ALWAYS(); - case 4: - ret = ADDR_INVALIDPARAMS; - break; - } - } - else - { - ADDR_ASSERT_ALWAYS(); - ret = ADDR_INVALIDPARAMS; - } - - // Post validation - if (ret == ADDR_OK) - { - MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2]; - ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) == - (microBlockDim.w * (1 << elementBytesLog2))); - ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h); - } - - return ret; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeThinEquation -* -* @brief -* Interface function stub of ComputeThinEquation -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const -{ - ADDR_E_RETURNCODE ret = ADDR_OK; - - UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); - - UINT_32 maxXorBits = blockSizeLog2; - if (IsNonPrtXor(swMode)) - { - // For non-prt-xor, maybe need to initialize some more bits for xor - // The highest xor bit used in equation will be max the following 3 items: - // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits - // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits - // 3. blockSizeLog2 - - maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2)); - maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + - GetPipeXorBits(blockSizeLog2) + - 2 * GetBankXorBits(blockSizeLog2)); - } - - const UINT_32 maxBitsUsed = 14; - ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits); - ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; - ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; - - const UINT_32 extraXorBits = 16; - ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2); - ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {}; - - for (UINT_32 i = 0; i < maxBitsUsed; i++) - { - InitChannel(1, 0, elementBytesLog2 + i, &x[i]); - InitChannel(1, 1, i, &y[i]); - } - - ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr; - - for (UINT_32 i = 0; i < elementBytesLog2; i++) - { - InitChannel(1, 0 , i, &pixelBit[i]); - } - - UINT_32 xIdx = 0; - UINT_32 yIdx = 0; - UINT_32 lowBits = 0; - - if (IsZOrderSwizzle(swMode)) - { - if (elementBytesLog2 <= 3) - { - for (UINT_32 i = elementBytesLog2; i < 6; i++) - { - pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++]; - } - - lowBits = 6; - } - else - { - ret = ADDR_INVALIDPARAMS; - } - } - else - { - ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation); - - if (ret == ADDR_OK) - { - Dim2d microBlockDim = Block256_2d[elementBytesLog2]; - xIdx = Log2(microBlockDim.w); - yIdx = Log2(microBlockDim.h); - lowBits = 8; - } - } - - if (ret == ADDR_OK) - { - for (UINT_32 i = lowBits; i < blockSizeLog2; i++) - { - pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++]; - } - - for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++) - { - xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++]; - } - - if (IsXor(swMode)) - { - // Fill XOR bits - UINT_32 pipeStart = m_pipeInterleaveLog2; - UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2); - - UINT_32 bankStart = pipeStart + pipeXorBits; - UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2); - - for (UINT_32 i = 0; i < pipeXorBits; i++) - { - UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i; - ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? - &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; - - InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src); - } - - for (UINT_32 i = 0; i < bankXorBits; i++) - { - UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i; - ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? - &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; - - InitChannel(&pEquation->xor1[bankStart + i], pXor1Src); - } - - if (IsPrt(swMode) == FALSE) - { - for (UINT_32 i = 0; i < pipeXorBits; i++) - { - InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]); - } - - for (UINT_32 i = 0; i < bankXorBits; i++) - { - InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]); - } - } - } - - pEquation->numBits = blockSizeLog2; - } - - return ret; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeThickEquation -* -* @brief -* Interface function stub of ComputeThickEquation -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const -{ - ADDR_E_RETURNCODE ret = ADDR_OK; - - ADDR_ASSERT(IsTex3d(rsrcType)); - - UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); - - UINT_32 maxXorBits = blockSizeLog2; - if (IsNonPrtXor(swMode)) - { - // For non-prt-xor, maybe need to initialize some more bits for xor - // The highest xor bit used in equation will be max the following 3: - // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits - // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits - // 3. blockSizeLog2 - - maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2)); - maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + - GetPipeXorBits(blockSizeLog2) + - 3 * GetBankXorBits(blockSizeLog2)); - } - - for (UINT_32 i = 0; i < elementBytesLog2; i++) - { - InitChannel(1, 0 , i, &pEquation->addr[i]); - } - - ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2]; - - const UINT_32 maxBitsUsed = 12; - ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits); - ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; - ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; - ADDR_CHANNEL_SETTING z[maxBitsUsed] = {}; - - const UINT_32 extraXorBits = 24; - ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2); - ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {}; - - for (UINT_32 i = 0; i < maxBitsUsed; i++) - { - InitChannel(1, 0, elementBytesLog2 + i, &x[i]); - InitChannel(1, 1, i, &y[i]); - InitChannel(1, 2, i, &z[i]); - } - - if (IsZOrderSwizzle(swMode)) - { - switch (elementBytesLog2) - { - case 0: - pixelBit[0] = x[0]; - pixelBit[1] = y[0]; - pixelBit[2] = x[1]; - pixelBit[3] = y[1]; - pixelBit[4] = z[0]; - pixelBit[5] = z[1]; - pixelBit[6] = x[2]; - pixelBit[7] = z[2]; - pixelBit[8] = y[2]; - pixelBit[9] = x[3]; - break; - case 1: - pixelBit[0] = x[0]; - pixelBit[1] = y[0]; - pixelBit[2] = x[1]; - pixelBit[3] = y[1]; - pixelBit[4] = z[0]; - pixelBit[5] = z[1]; - pixelBit[6] = z[2]; - pixelBit[7] = y[2]; - pixelBit[8] = x[2]; - break; - case 2: - pixelBit[0] = x[0]; - pixelBit[1] = y[0]; - pixelBit[2] = x[1]; - pixelBit[3] = z[0]; - pixelBit[4] = y[1]; - pixelBit[5] = z[1]; - pixelBit[6] = y[2]; - pixelBit[7] = x[2]; - break; - case 3: - pixelBit[0] = x[0]; - pixelBit[1] = y[0]; - pixelBit[2] = z[0]; - pixelBit[3] = x[1]; - pixelBit[4] = z[1]; - pixelBit[5] = y[1]; - pixelBit[6] = x[2]; - break; - case 4: - pixelBit[0] = x[0]; - pixelBit[1] = y[0]; - pixelBit[2] = z[0]; - pixelBit[3] = z[1]; - pixelBit[4] = y[1]; - pixelBit[5] = x[1]; - break; - default: - ADDR_ASSERT_ALWAYS(); - ret = ADDR_INVALIDPARAMS; - break; - } - } - else if (IsStandardSwizzle(rsrcType, swMode)) - { - switch (elementBytesLog2) - { - case 0: - pixelBit[0] = x[0]; - pixelBit[1] = x[1]; - pixelBit[2] = x[2]; - pixelBit[3] = x[3]; - pixelBit[4] = y[0]; - pixelBit[5] = y[1]; - pixelBit[6] = z[0]; - pixelBit[7] = z[1]; - pixelBit[8] = z[2]; - pixelBit[9] = y[2]; - break; - case 1: - pixelBit[0] = x[0]; - pixelBit[1] = x[1]; - pixelBit[2] = x[2]; - pixelBit[3] = y[0]; - pixelBit[4] = y[1]; - pixelBit[5] = z[0]; - pixelBit[6] = z[1]; - pixelBit[7] = z[2]; - pixelBit[8] = y[2]; - break; - case 2: - pixelBit[0] = x[0]; - pixelBit[1] = x[1]; - pixelBit[2] = y[0]; - pixelBit[3] = y[1]; - pixelBit[4] = z[0]; - pixelBit[5] = z[1]; - pixelBit[6] = y[2]; - pixelBit[7] = x[2]; - break; - case 3: - pixelBit[0] = x[0]; - pixelBit[1] = y[0]; - pixelBit[2] = y[1]; - pixelBit[3] = z[0]; - pixelBit[4] = z[1]; - pixelBit[5] = x[1]; - pixelBit[6] = x[2]; - break; - case 4: - pixelBit[0] = y[0]; - pixelBit[1] = y[1]; - pixelBit[2] = z[0]; - pixelBit[3] = z[1]; - pixelBit[4] = x[0]; - pixelBit[5] = x[1]; - break; - default: - ADDR_ASSERT_ALWAYS(); - ret = ADDR_INVALIDPARAMS; - break; - } - } - else - { - ADDR_ASSERT_ALWAYS(); - ret = ADDR_INVALIDPARAMS; - } - - if (ret == ADDR_OK) - { - Dim3d microBlockDim = Block1K_3d[elementBytesLog2]; - UINT_32 xIdx = Log2(microBlockDim.w); - UINT_32 yIdx = Log2(microBlockDim.h); - UINT_32 zIdx = Log2(microBlockDim.d); - - pixelBit = pEquation->addr; - - const UINT_32 lowBits = 10; - ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1); - ADDR_ASSERT(pEquation->addr[lowBits].valid == 0); - - for (UINT_32 i = lowBits; i < blockSizeLog2; i++) - { - if ((i % 3) == 0) - { - pixelBit[i] = x[xIdx++]; - } - else if ((i % 3) == 1) - { - pixelBit[i] = z[zIdx++]; - } - else - { - pixelBit[i] = y[yIdx++]; - } - } - - for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++) - { - if ((i % 3) == 0) - { - xorExtra[i - blockSizeLog2] = x[xIdx++]; - } - else if ((i % 3) == 1) - { - xorExtra[i - blockSizeLog2] = z[zIdx++]; - } - else - { - xorExtra[i - blockSizeLog2] = y[yIdx++]; - } - } - - if (IsXor(swMode)) - { - // Fill XOR bits - UINT_32 pipeStart = m_pipeInterleaveLog2; - UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2); - for (UINT_32 i = 0; i < pipeXorBits; i++) - { - UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i); - ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? - &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; - - InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src); - - UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i); - ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ? - &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2]; - - InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src); - } - - UINT_32 bankStart = pipeStart + pipeXorBits; - UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2); - for (UINT_32 i = 0; i < bankXorBits; i++) - { - UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i); - ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? - &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; - - InitChannel(&pEquation->xor1[bankStart + i], pXor1Src); - - UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i); - ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ? - &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2]; - - InitChannel(&pEquation->xor2[bankStart + i], pXor2Src); - } - } - - pEquation->numBits = blockSizeLog2; - } - - return ret; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::IsValidDisplaySwizzleMode -* -* @brief -* Check if a swizzle mode is supported by display engine -* -* @return -* TRUE is swizzle mode is supported by display engine -************************************************************************************************************************ -*/ -BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const -{ - BOOL_32 support = FALSE; - - const AddrResourceType resourceType = pIn->resourceType; - (void)resourceType; - const AddrSwizzleMode swizzleMode = pIn->swizzleMode; - - if (m_settings.isDce12) - { - switch (swizzleMode) - { - case ADDR_SW_256B_D: - case ADDR_SW_256B_R: - support = (pIn->bpp == 32); - break; - - case ADDR_SW_LINEAR: - case ADDR_SW_4KB_D: - case ADDR_SW_4KB_R: - case ADDR_SW_64KB_D: - case ADDR_SW_64KB_R: - case ADDR_SW_VAR_D: - case ADDR_SW_VAR_R: - case ADDR_SW_4KB_D_X: - case ADDR_SW_4KB_R_X: - case ADDR_SW_64KB_D_X: - case ADDR_SW_64KB_R_X: - case ADDR_SW_VAR_D_X: - case ADDR_SW_VAR_R_X: - support = (pIn->bpp <= 64); - break; - - default: - break; - } - } - else if (m_settings.isDcn1) - { - switch (swizzleMode) - { - case ADDR_SW_4KB_D: - case ADDR_SW_64KB_D: - case ADDR_SW_VAR_D: - case ADDR_SW_64KB_D_T: - case ADDR_SW_4KB_D_X: - case ADDR_SW_64KB_D_X: - case ADDR_SW_VAR_D_X: - support = (pIn->bpp == 64); - break; - - case ADDR_SW_LINEAR: - case ADDR_SW_4KB_S: - case ADDR_SW_64KB_S: - case ADDR_SW_VAR_S: - case ADDR_SW_64KB_S_T: - case ADDR_SW_4KB_S_X: - case ADDR_SW_64KB_S_X: - case ADDR_SW_VAR_S_X: - support = (pIn->bpp <= 64); - break; - - default: - break; - } - } - else - { - ADDR_NOT_IMPLEMENTED(); - } - - return support; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputePipeBankXor -* -* @brief -* Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address -* -* @return -* PipeBankXor value -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor( - const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const -{ - UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode); - UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); - UINT_32 bankBits = GetBankXorBits(macroBlockBits); - - UINT_32 pipeXor = 0; - UINT_32 bankXor = 0; - - const UINT_32 bankMask = (1 << bankBits) - 1; - const UINT_32 index = pIn->surfIndex & bankMask; - - const UINT_32 bpp = pIn->flags.fmask ? - GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format); - if (bankBits == 4) - { - static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10}; - static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10}; - - bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index]; - } - else if (bankBits > 0) - { - UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1; - bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease; - bankXor = (index * bankIncrease) & bankMask; - } - - pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor; - - return ADDR_OK; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeSlicePipeBankXor -* -* @brief -* Generate slice PipeBankXor value based on base PipeBankXor value and slice id -* -* @return -* PipeBankXor value -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor( - const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const -{ - UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode); - UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); - UINT_32 bankBits = GetBankXorBits(macroBlockBits); - - UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits); - UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits); - - pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits)); - - return ADDR_OK; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern -* -* @brief -* Compute sub resource offset to support swizzle pattern -* -* @return -* Offset -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern( - const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, - ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const -{ - ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode)); - - UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode); - UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); - UINT_32 bankBits = GetBankXorBits(macroBlockBits); - UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits); - UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits); - UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2; - - pOut->offset = pIn->slice * pIn->sliceSize + - pIn->macroBlockOffset + - (pIn->mipTailOffset ^ pipeBankXor) - - static_cast(pipeBankXor); - return ADDR_OK; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeSurfaceInfoSanityCheck -* -* @brief -* Compute surface info sanity check -* -* @return -* Offset -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const -{ - BOOL_32 invalid = FALSE; - - if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16)) - { - invalid = TRUE; - } - else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || - (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)) - { - invalid = TRUE; - } - - BOOL_32 mipmap = (pIn->numMipLevels > 1); - BOOL_32 msaa = (pIn->numFrags > 1); - - ADDR2_SURFACE_FLAGS flags = pIn->flags; - BOOL_32 zbuffer = (flags.depth || flags.stencil); - BOOL_32 color = flags.color; - BOOL_32 display = flags.display || flags.rotated; - - AddrResourceType rsrcType = pIn->resourceType; - BOOL_32 tex3d = IsTex3d(rsrcType); - AddrSwizzleMode swizzle = pIn->swizzleMode; - BOOL_32 linear = IsLinear(swizzle); - BOOL_32 blk256B = IsBlock256b(swizzle); - BOOL_32 blkVar = IsBlockVariable(swizzle); - BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle); - BOOL_32 prt = flags.prt; - BOOL_32 stereo = flags.qbStereo; - - if (invalid == FALSE) - { - if ((pIn->numFrags > 1) && - (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags))) - { - // MSAA surface must have blk_bytes/pipe_interleave >= num_samples - invalid = TRUE; - } - } - - if (invalid == FALSE) - { - switch (rsrcType) - { - case ADDR_RSRC_TEX_1D: - invalid = msaa || zbuffer || display || (linear == FALSE) || stereo; - break; - case ADDR_RSRC_TEX_2D: - invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap); - break; - case ADDR_RSRC_TEX_3D: - invalid = msaa || zbuffer || display || stereo; - break; - default: - invalid = TRUE; - break; - } - } - - if (invalid == FALSE) - { - if (display) - { - invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE); - } - } - - if (invalid == FALSE) - { - if (linear) - { - invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) || - zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0); - } - else - { - if (blk256B || blkVar || isNonPrtXor) - { - invalid = prt; - if (blk256B) - { - invalid = invalid || zbuffer || tex3d || mipmap || msaa; - } - } - - if (invalid == FALSE) - { - if (IsZOrderSwizzle(swizzle)) - { - invalid = color && msaa; - } - else if (IsStandardSwizzle(rsrcType, swizzle)) - { - invalid = zbuffer; - } - else if (IsDisplaySwizzle(rsrcType, swizzle)) - { - invalid = zbuffer; - } - else if (IsRotateSwizzle(swizzle)) - { - invalid = zbuffer || (pIn->bpp > 64) || tex3d; - } - else - { - ADDR_ASSERT(!"invalid swizzle mode"); - invalid = TRUE; - } - } - } - } - - ADDR_ASSERT(invalid == FALSE); - - return invalid ? ADDR_INVALIDPARAMS : ADDR_OK; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlGetPreferredSurfaceSetting -* -* @brief -* Internal function to get suggested surface information for cliet to use -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( - const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, - ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const -{ - // Macro define resource block type - enum AddrBlockType - { - AddrBlockMicro = 0, // Resource uses 256B block - AddrBlock4KB = 1, // Resource uses 4KB block - AddrBlock64KB = 2, // Resource uses 64KB block - AddrBlockVar = 3, // Resource uses var blcok - AddrBlockLinear = 4, // Resource uses linear swizzle mode - - AddrBlockMaxTiledType = AddrBlock64KB + 1, - }; - - enum AddrBlockSet - { - AddrBlockSetMicro = 1 << AddrBlockMicro, - AddrBlockSetMacro4KB = 1 << AddrBlock4KB, - AddrBlockSetMacro64KB = 1 << AddrBlock64KB, - AddrBlockSetVar = 1 << AddrBlockVar, - AddrBlockSetLinear = 1 << AddrBlockLinear, - - AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB, - }; - - enum AddrSwSet - { - AddrSwSetZ = 1 << ADDR_SW_Z, - AddrSwSetS = 1 << ADDR_SW_S, - AddrSwSetD = 1 << ADDR_SW_D, - AddrSwSetR = 1 << ADDR_SW_R, - - AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR, - }; - - ADDR_E_RETURNCODE returnCode = ADDR_OK; - ElemLib* pElemLib = GetElemLib(); - - // Set format to INVALID will skip this conversion - UINT_32 expandX = 1; - UINT_32 expandY = 1; - UINT_32 bpp = pIn->bpp; - UINT_32 width = pIn->width; - UINT_32 height = pIn->height; - - if (pIn->format != ADDR_FMT_INVALID) - { - // Don't care for this case - ElemMode elemMode = ADDR_UNCOMPRESSED; - - // Get compression/expansion factors and element mode which indicates compression/expansion - bpp = pElemLib->GetBitsPerPixel(pIn->format, - &elemMode, - &expandX, - &expandY); - - UINT_32 basePitch = 0; - GetElemLib()->AdjustSurfaceInfo(elemMode, - expandX, - expandY, - &bpp, - &basePitch, - &width, - &height); - } - - UINT_32 numSamples = Max(pIn->numSamples, 1u); - UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags; - UINT_32 slice = Max(pIn->numSlices, 1u); - UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u); - UINT_32 minSizeAlign = NextPow2(pIn->minSizeAlign); - - if (pIn->flags.fmask) - { - bpp = GetFmaskBpp(numSamples, numFrags); - numFrags = 1; - numSamples = 1; - pOut->resourceType = ADDR_RSRC_TEX_2D; - } - else - { - // The output may get changed for volume(3D) texture resource in future - pOut->resourceType = pIn->resourceType; - } - - if (bpp < 8) - { - ADDR_ASSERT_ALWAYS(); - - returnCode = ADDR_INVALIDPARAMS; - } - else if (IsTex1d(pOut->resourceType)) - { - pOut->swizzleMode = ADDR_SW_LINEAR; - pOut->validBlockSet.value = AddrBlockSetLinear; - pOut->canXor = FALSE; - } - else - { - ADDR2_BLOCK_SET blockSet; - blockSet.value = 0; - - ADDR2_SWTYPE_SET addrPreferredSwSet, addrValidSwSet, clientPreferredSwSet; - addrPreferredSwSet.value = AddrSwSetS; - addrValidSwSet = addrPreferredSwSet; - clientPreferredSwSet = pIn->preferredSwSet; - - if (clientPreferredSwSet.value == 0) - { - clientPreferredSwSet.value = AddrSwSetAll; - } - - // prt Xor and non-xor will have less height align requirement for stereo surface - BOOL_32 prtXor = (pIn->flags.prt || pIn->flags.qbStereo) && (pIn->noXor == FALSE); - BOOL_32 displayResource = FALSE; - - pOut->canXor = (pIn->flags.prt == FALSE) && (pIn->noXor == FALSE); - - // Filter out improper swType and blockSet by HW restriction - if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil) - { - ADDR_ASSERT(IsTex2d(pOut->resourceType)); - blockSet.value = AddrBlockSetMacro; - addrPreferredSwSet.value = AddrSwSetZ; - addrValidSwSet.value = AddrSwSetZ; - - if (pIn->flags.noMetadata == FALSE) - { - if (pIn->flags.depth && - pIn->flags.texture && - (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2)))) - { - // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane - // equation from wrong address within memory range a tile covered and use the - // garbage data for compressed Z reading which finally leads to corruption. - pOut->canXor = FALSE; - prtXor = FALSE; - } - - if (m_settings.htileCacheRbConflict && - (pIn->flags.depth || pIn->flags.stencil) && - (slice > 1) && - (pIn->flags.metaRbUnaligned == FALSE) && - (pIn->flags.metaPipeUnaligned == FALSE)) - { - // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency - pOut->canXor = FALSE; - } - } - } - else if (ElemLib::IsBlockCompressed(pIn->format)) - { - // block compressed formats (BCx, ASTC, ETC2) must be either S or D modes. - // Not sure under what circumstances "_D" would be appropriate as these formats - // are not displayable. - blockSet.value = AddrBlockSetMacro; - - // This isn't to be used as texture and caller doesn't allow macro tiled. - if ((pIn->flags.texture == FALSE) && - (pIn->forbiddenBlock.macro4KB && pIn->forbiddenBlock.macro64KB)) - { - blockSet.value |= AddrBlockSetLinear; - } - - addrPreferredSwSet.value = AddrSwSetD; - addrValidSwSet.value = AddrSwSetS | AddrSwSetD; - } - else if (ElemLib::IsMacroPixelPacked(pIn->format)) - { - // macro pixel packed formats (BG_RG, GB_GR) does not support the Z modes. - // Its notclear under what circumstances the D or R modes would be appropriate - // since these formats are not displayable. - blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro; - - addrPreferredSwSet.value = AddrSwSetS; - addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR; - } - else if (IsTex3d(pOut->resourceType)) - { - blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro; - - if (pIn->flags.prt) - { - // PRT cannot use SW_D which gives an unexpected block dimension - addrPreferredSwSet.value = AddrSwSetZ; - addrValidSwSet.value = AddrSwSetZ | AddrSwSetS; - } - else if ((numMipLevels > 1) && (slice >= width) && (slice >= height)) - { - // When depth (Z) is the maximum dimension then must use one of the SW_*_S - // or SW_*_Z modes if mipmapping is desired on a 3D surface - addrPreferredSwSet.value = AddrSwSetZ; - addrValidSwSet.value = AddrSwSetZ | AddrSwSetS; - } - else if (pIn->flags.color) - { - addrPreferredSwSet.value = AddrSwSetD; - addrValidSwSet.value = AddrSwSetZ | AddrSwSetS | AddrSwSetD; - } - else - { - addrPreferredSwSet.value = AddrSwSetZ; - addrValidSwSet.value = AddrSwSetZ | AddrSwSetD; - if (bpp != 128) - { - addrValidSwSet.value |= AddrSwSetS; - } - } - } - else - { - addrPreferredSwSet.value = ((pIn->flags.display == TRUE) || - (pIn->flags.overlay == TRUE) || - (pIn->bpp == 128)) ? AddrSwSetD : AddrSwSetS; - - addrValidSwSet.value = AddrSwSetS | AddrSwSetD | AddrSwSetR; - - if (numMipLevels > 1) - { - ADDR_ASSERT(numFrags == 1); - blockSet.value = AddrBlockSetLinear | AddrBlockSetMacro; - } - else if ((numFrags > 1) || (numSamples > 1)) - { - ADDR_ASSERT(IsTex2d(pOut->resourceType)); - blockSet.value = AddrBlockSetMacro; - } - else - { - ADDR_ASSERT(IsTex2d(pOut->resourceType)); - blockSet.value = AddrBlockSetLinear | AddrBlockSetMicro | AddrBlockSetMacro; - - displayResource = pIn->flags.rotated || pIn->flags.display; - - if (displayResource) - { - addrPreferredSwSet.value = pIn->flags.rotated ? AddrSwSetR : AddrSwSetD; - - if (pIn->bpp > 64) - { - blockSet.value = 0; - } - else if (m_settings.isDce12) - { - if (pIn->bpp != 32) - { - blockSet.micro = FALSE; - } - - // DCE12 does not support display surface to be _T swizzle mode - prtXor = FALSE; - - addrValidSwSet.value = AddrSwSetD | AddrSwSetR; - } - else if (m_settings.isDcn1) - { - // _R is not supported by Dcn1 - if (pIn->bpp == 64) - { - addrPreferredSwSet.value = AddrSwSetD; - addrValidSwSet.value = AddrSwSetS | AddrSwSetD; - } - else - { - addrPreferredSwSet.value = AddrSwSetS; - addrValidSwSet.value = AddrSwSetS; - } - - blockSet.micro = FALSE; - } - else - { - ADDR_NOT_IMPLEMENTED(); - returnCode = ADDR_NOTSUPPORTED; - } - } - } - } - - ADDR_ASSERT((addrValidSwSet.value & addrPreferredSwSet.value) == addrPreferredSwSet.value); - - pOut->clientPreferredSwSet = clientPreferredSwSet; - - // Clamp client preferred set to valid set - clientPreferredSwSet.value &= addrValidSwSet.value; - - pOut->validSwTypeSet = addrValidSwSet; - - if (clientPreferredSwSet.value == 0) - { - // Client asks for an invalid swizzle type... - ADDR_ASSERT_ALWAYS(); - returnCode = ADDR_INVALIDPARAMS; - } - else - { - if (IsPow2(clientPreferredSwSet.value)) - { - // Only one swizzle type left, use it directly - addrPreferredSwSet.value = clientPreferredSwSet.value; - } - else if ((clientPreferredSwSet.value & addrPreferredSwSet.value) == 0) - { - // Client wants 2 or more a valid swizzle type but none of them is addrlib preferred - if (clientPreferredSwSet.sw_D) - { - addrPreferredSwSet.value = AddrSwSetD; - } - else if (clientPreferredSwSet.sw_Z) - { - addrPreferredSwSet.value = AddrSwSetZ; - } - else if (clientPreferredSwSet.sw_R) - { - addrPreferredSwSet.value = AddrSwSetR; - } - else - { - ADDR_ASSERT(clientPreferredSwSet.sw_S); - addrPreferredSwSet.value = AddrSwSetS; - } - } - - if ((numFrags > 1) && - (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags))) - { - // MSAA surface must have blk_bytes/pipe_interleave >= num_samples - blockSet.macro4KB = FALSE; - } - - if (pIn->flags.prt) - { - blockSet.value &= AddrBlockSetMacro64KB; - } - - // Apply customized forbidden setting - blockSet.value &= ~pIn->forbiddenBlock.value; - - if (pIn->maxAlign > 0) - { - if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB)) - { - blockSet.macro64KB = FALSE; - } - - if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB)) - { - blockSet.macro4KB = FALSE; - } - - if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B)) - { - blockSet.micro = FALSE; - } - } - - Dim3d blkAlign[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; - Dim3d paddedDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; - UINT_64 padSize[AddrBlockMaxTiledType] = {0}; - - if (blockSet.micro) - { - returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlockMicro].w, - &blkAlign[AddrBlockMicro].h, - &blkAlign[AddrBlockMicro].d, - bpp, - numFrags, - pOut->resourceType, - ADDR_SW_256B); - - if (returnCode == ADDR_OK) - { - if (displayResource) - { - blkAlign[AddrBlockMicro].w = PowTwoAlign(blkAlign[AddrBlockMicro].w, 32); - } - else if ((blkAlign[AddrBlockMicro].w >= width) && (blkAlign[AddrBlockMicro].h >= height) && - (minSizeAlign <= GetBlockSize(ADDR_SW_256B))) - { - // If one 256B block can contain the surface, don't bother bigger block type - blockSet.macro4KB = FALSE; - blockSet.macro64KB = FALSE; - blockSet.var = FALSE; - } - - padSize[AddrBlockMicro] = ComputePadSize(&blkAlign[AddrBlockMicro], width, height, - slice, &paddedDim[AddrBlockMicro]); - } - } - - if ((returnCode == ADDR_OK) && blockSet.macro4KB) - { - returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock4KB].w, - &blkAlign[AddrBlock4KB].h, - &blkAlign[AddrBlock4KB].d, - bpp, - numFrags, - pOut->resourceType, - ADDR_SW_4KB); - - if (returnCode == ADDR_OK) - { - if (displayResource) - { - blkAlign[AddrBlock4KB].w = PowTwoAlign(blkAlign[AddrBlock4KB].w, 32); - } - - padSize[AddrBlock4KB] = ComputePadSize(&blkAlign[AddrBlock4KB], width, height, - slice, &paddedDim[AddrBlock4KB]); - - ADDR_ASSERT(padSize[AddrBlock4KB] >= padSize[AddrBlockMicro]); - } - } - - if ((returnCode == ADDR_OK) && blockSet.macro64KB) - { - returnCode = ComputeBlockDimensionForSurf(&blkAlign[AddrBlock64KB].w, - &blkAlign[AddrBlock64KB].h, - &blkAlign[AddrBlock64KB].d, - bpp, - numFrags, - pOut->resourceType, - ADDR_SW_64KB); - - if (returnCode == ADDR_OK) - { - if (displayResource) - { - blkAlign[AddrBlock64KB].w = PowTwoAlign(blkAlign[AddrBlock64KB].w, 32); - } - - padSize[AddrBlock64KB] = ComputePadSize(&blkAlign[AddrBlock64KB], width, height, - slice, &paddedDim[AddrBlock64KB]); - - ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlock4KB]); - ADDR_ASSERT(padSize[AddrBlock64KB] >= padSize[AddrBlockMicro]); - } - } - - if (returnCode == ADDR_OK) - { - UINT_64 minSizeAlignInElement = Max(minSizeAlign / (bpp >> 3), 1u); - - for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) - { - padSize[i] = PowTwoAlign(padSize[i], minSizeAlignInElement); - } - - // Use minimum block type which meets all conditions above if flag minimizeAlign was set - if (pIn->flags.minimizeAlign) - { - // If padded size of 64KB block is larger than padded size of 256B block or 4KB - // block, filter out 64KB block from candidate list - if (blockSet.macro64KB && - ((blockSet.micro && (padSize[AddrBlockMicro] < padSize[AddrBlock64KB])) || - (blockSet.macro4KB && (padSize[AddrBlock4KB] < padSize[AddrBlock64KB])))) - { - blockSet.macro64KB = FALSE; - } - - // If padded size of 4KB block is larger than padded size of 256B block, - // filter out 4KB block from candidate list - if (blockSet.macro4KB && - blockSet.micro && - (padSize[AddrBlockMicro] < padSize[AddrBlock4KB])) - { - blockSet.macro4KB = FALSE; - } - } - // Filter out 64KB/4KB block if a smaller block type has 2/3 or less memory footprint - else if (pIn->flags.opt4space) - { - UINT_64 threshold = blockSet.micro ? padSize[AddrBlockMicro] : - (blockSet.macro4KB ? padSize[AddrBlock4KB] : padSize[AddrBlock64KB]); - - threshold += threshold >> 1; - - if (blockSet.macro64KB && (padSize[AddrBlock64KB] > threshold)) - { - blockSet.macro64KB = FALSE; - } - - if (blockSet.macro4KB && (padSize[AddrBlock4KB] > threshold)) - { - blockSet.macro4KB = FALSE; - } - } - else - { - if (blockSet.macro64KB && - (padSize[AddrBlock64KB] >= static_cast(width) * height * slice * 2) && - ((blockSet.value & ~AddrBlockSetMacro64KB) != 0)) - { - // If 64KB block waste more than half memory on padding, filter it out from - // candidate list when it is not the only choice left - blockSet.macro64KB = FALSE; - } - } - - if (blockSet.value == 0) - { - // Bad things happen, client will not get any useful information from AddrLib. - // Maybe we should fill in some output earlier instead of outputing nothing? - ADDR_ASSERT_ALWAYS(); - returnCode = ADDR_INVALIDPARAMS; - } - else - { - pOut->validBlockSet = blockSet; - pOut->canXor = pOut->canXor && - (blockSet.macro4KB || blockSet.macro64KB || blockSet.var); - - if (blockSet.macro64KB || blockSet.macro4KB) - { - if (addrPreferredSwSet.value == AddrSwSetZ) - { - pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_Z : ADDR_SW_4KB_Z; - } - else if (addrPreferredSwSet.value == AddrSwSetS) - { - pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_S : ADDR_SW_4KB_S; - } - else if (addrPreferredSwSet.value == AddrSwSetD) - { - pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_D : ADDR_SW_4KB_D; - } - else - { - ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR); - pOut->swizzleMode = blockSet.macro64KB ? ADDR_SW_64KB_R : ADDR_SW_4KB_R; - } - - if (prtXor && blockSet.macro64KB) - { - // Client wants PRTXOR, give back _T swizzle mode if 64KB is available - const UINT_32 prtGap = ADDR_SW_64KB_Z_T - ADDR_SW_64KB_Z; - pOut->swizzleMode = static_cast(pOut->swizzleMode + prtGap); - } - else if (pOut->canXor) - { - // Client wants XOR and this is allowed, return XOR version swizzle mode - const UINT_32 xorGap = ADDR_SW_4KB_Z_X - ADDR_SW_4KB_Z; - pOut->swizzleMode = static_cast(pOut->swizzleMode + xorGap); - } - } - else if (blockSet.micro) - { - if (addrPreferredSwSet.value == AddrSwSetS) - { - pOut->swizzleMode = ADDR_SW_256B_S; - } - else if (addrPreferredSwSet.value == AddrSwSetD) - { - pOut->swizzleMode = ADDR_SW_256B_D; - } - else - { - ADDR_ASSERT(addrPreferredSwSet.value == AddrSwSetR); - pOut->swizzleMode = ADDR_SW_256B_R; - } - } - else if (blockSet.linear) - { - // Fall into this branch doesn't mean linear is suitable, only no other choices! - pOut->swizzleMode = ADDR_SW_LINEAR; - } - else - { - ADDR_ASSERT(blockSet.var); - - // Designer consider VAR swizzle mode is usless for most cases - ADDR_UNHANDLED_CASE(); - - returnCode = ADDR_NOTSUPPORTED; - } - -#if DEBUG - // Post sanity check, at least AddrLib should accept the output generated by its own - if (pOut->swizzleMode != ADDR_SW_LINEAR) - { - ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; - localIn.flags = pIn->flags; - localIn.swizzleMode = pOut->swizzleMode; - localIn.resourceType = pOut->resourceType; - localIn.format = pIn->format; - localIn.bpp = bpp; - localIn.width = width; - localIn.height = height; - localIn.numSlices = slice; - localIn.numMipLevels = numMipLevels; - localIn.numSamples = numSamples; - localIn.numFrags = numFrags; - - HwlComputeSurfaceInfoSanityCheck(&localIn); - - } -#endif - } - } - } - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::ComputeStereoInfo -* -* @brief -* Compute height alignment and right eye pipeBankXor for stereo surface -* -* @return -* Error code -* -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut, - UINT_32* pHeightAlign - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut); - - if (eqIndex < m_numEquations) - { - if (IsXor(pIn->swizzleMode)) - { - const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); - const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2); - const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2); - const UINT_32 bppLog2 = Log2(pIn->bpp >> 3); - const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1; - MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex]; - - ADDR_ASSERT(maxYCoordBlock256 == - GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1)); - - const UINT_32 maxYCoordInBaseEquation = - (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256; - - ADDR_ASSERT(maxYCoordInBaseEquation == - GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1)); - - const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits; - - ADDR_ASSERT(maxYCoordInPipeXor == - GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1)); - - const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ? - 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits; - - ADDR_ASSERT(maxYCoordInBankXor == - GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1)); - - const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor); - - if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation) - { - *pHeightAlign = 1u << maxYCoordInPipeBankXor; - - if (pOut->pStereoInfo != NULL) - { - pOut->pStereoInfo->rightSwizzle = 0; - - if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0) - { - if (maxYCoordInPipeXor == maxYCoordInPipeBankXor) - { - pOut->pStereoInfo->rightSwizzle |= (1u << 1); - } - - if (maxYCoordInBankXor == maxYCoordInPipeBankXor) - { - pOut->pStereoInfo->rightSwizzle |= - 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1); - } - - ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle == - GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2], - numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor)); - } - } - } - } - } - else - { - ADDR_ASSERT_ALWAYS(); - returnCode = ADDR_ERROR; - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeSurfaceInfoTiled -* -* @brief -* Internal function to calculate alignment for tiled surface -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth, - &pOut->blockHeight, - &pOut->blockSlices, - pIn->bpp, - pIn->numFrags, - pIn->resourceType, - pIn->swizzleMode); - - if (returnCode == ADDR_OK) - { - UINT_32 pitchAlignInElement = pOut->blockWidth; - - if ((IsTex2d(pIn->resourceType) == TRUE) && - (pIn->flags.display || pIn->flags.rotated) && - (pIn->numMipLevels <= 1) && - (pIn->numSamples <= 1) && - (pIn->numFrags <= 1)) - { - // Display engine needs pitch align to be at least 32 pixels. - pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32); - } - - pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement); - - if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0)) - { - if ((pIn->pitchInElement % pitchAlignInElement) != 0) - { - returnCode = ADDR_INVALIDPARAMS; - } - else if (pIn->pitchInElement < pOut->pitch) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - pOut->pitch = pIn->pitchInElement; - } - } - - UINT_32 heightAlign = 0; - - if (pIn->flags.qbStereo) - { - returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign); - } - - if (returnCode == ADDR_OK) - { - pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight); - - if (heightAlign > 1) - { - pOut->height = PowTwoAlign(pOut->height, heightAlign); - } - - pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices); - - pOut->epitchIsHeight = FALSE; - pOut->mipChainInTail = FALSE; - pOut->firstMipIdInTail = pIn->numMipLevels; - - pOut->mipChainPitch = pOut->pitch; - pOut->mipChainHeight = pOut->height; - pOut->mipChainSlice = pOut->numSlices; - - if (pIn->numMipLevels > 1) - { - pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType, - pIn->swizzleMode, - pIn->bpp, - pIn->width, - pIn->height, - pIn->numSlices, - pOut->blockWidth, - pOut->blockHeight, - pOut->blockSlices, - pIn->numMipLevels, - pOut->pMipInfo); - - const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1); - - if (endingMipId == 0) - { - const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType, - pIn->swizzleMode, - pOut->blockWidth, - pOut->blockHeight, - pOut->blockSlices); - - pOut->epitchIsHeight = TRUE; - pOut->pitch = tailMaxDim.w; - pOut->height = tailMaxDim.h; - pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ? - tailMaxDim.d : pIn->numSlices; - pOut->mipChainInTail = TRUE; - } - else - { - UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth; - UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight; - - AddrMajorMode majorMode = GetMajorMode(pIn->resourceType, - pIn->swizzleMode, - mip0WidthInBlk, - mip0HeightInBlk, - pOut->numSlices / pOut->blockSlices); - if (majorMode == ADDR_MAJOR_Y) - { - UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk); - - if ((mip1WidthInBlk == 1) && (endingMipId > 2)) - { - mip1WidthInBlk++; - } - - pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth); - - pOut->epitchIsHeight = FALSE; - } - else - { - UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk); - - if ((mip1HeightInBlk == 1) && (endingMipId > 2)) - { - mip1HeightInBlk++; - } - - pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight); - - pOut->epitchIsHeight = TRUE; - } - } - - if (pOut->pMipInfo != NULL) - { - UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); - - for (UINT_32 i = 0; i < pIn->numMipLevels; i++) - { - Dim3d mipStartPos = {0}; - UINT_32 mipTailOffsetInBytes = 0; - - mipStartPos = GetMipStartPos(pIn->resourceType, - pIn->swizzleMode, - pOut->pitch, - pOut->height, - pOut->numSlices, - pOut->blockWidth, - pOut->blockHeight, - pOut->blockSlices, - i, - elementBytesLog2, - &mipTailOffsetInBytes); - - UINT_32 pitchInBlock = - pOut->mipChainPitch / pOut->blockWidth; - UINT_32 sliceInBlock = - (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock; - UINT_64 blockIndex = - mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w; - UINT_64 macroBlockOffset = - blockIndex << GetBlockSizeLog2(pIn->swizzleMode); - - pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset; - pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes; - } - } - } - else if (pOut->pMipInfo != NULL) - { - pOut->pMipInfo[0].pitch = pOut->pitch; - pOut->pMipInfo[0].height = pOut->height; - pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1; - pOut->pMipInfo[0].offset = 0; - } - - pOut->sliceSize = static_cast(pOut->mipChainPitch) * pOut->mipChainHeight * - (pIn->bpp >> 3) * pIn->numFrags; - pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice; - pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode); - - if (pIn->flags.prt) - { - pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment); - } - } - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeSurfaceInfoLinear -* -* @brief -* Internal function to calculate alignment for linear surface -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - UINT_32 pitch = 0; - UINT_32 actualHeight = 0; - UINT_32 elementBytes = pIn->bpp >> 3; - const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256; - - if (IsTex1d(pIn->resourceType)) - { - if (pIn->height > 1) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - const UINT_32 pitchAlignInElement = alignment / elementBytes; - - pitch = PowTwoAlign(pIn->width, pitchAlignInElement); - actualHeight = pIn->numMipLevels; - - if (pIn->flags.prt == FALSE) - { - returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, - &pitch, &actualHeight); - } - - if (returnCode == ADDR_OK) - { - if (pOut->pMipInfo != NULL) - { - for (UINT_32 i = 0; i < pIn->numMipLevels; i++) - { - pOut->pMipInfo[i].offset = pitch * elementBytes * i; - pOut->pMipInfo[i].pitch = pitch; - pOut->pMipInfo[i].height = 1; - pOut->pMipInfo[i].depth = 1; - } - } - } - } - } - else - { - returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo); - } - - if ((pitch == 0) || (actualHeight == 0)) - { - returnCode = ADDR_INVALIDPARAMS; - } - - if (returnCode == ADDR_OK) - { - pOut->pitch = pitch; - pOut->height = pIn->height; - pOut->numSlices = pIn->numSlices; - pOut->mipChainPitch = pitch; - pOut->mipChainHeight = actualHeight; - pOut->mipChainSlice = pOut->numSlices; - pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE; - pOut->sliceSize = static_cast(pOut->pitch) * actualHeight * elementBytes; - pOut->surfSize = pOut->sliceSize * pOut->numSlices; - pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment; - pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes); - pOut->blockHeight = 1; - pOut->blockSlices = 1; - } - - // Post calculation validate - ADDR_ASSERT(pOut->sliceSize > 0); - - return returnCode; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::GetMipChainInfo -* -* @brief -* Internal function to get out information about mip chain -* -* @return -* Smaller value between Id of first mip fitted in mip tail and max Id of mip being created -************************************************************************************************************************ -*/ -UINT_32 Gfx9Lib::GetMipChainInfo( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - UINT_32 bpp, - UINT_32 mip0Width, - UINT_32 mip0Height, - UINT_32 mip0Depth, - UINT_32 blockWidth, - UINT_32 blockHeight, - UINT_32 blockDepth, - UINT_32 numMipLevel, - ADDR2_MIP_INFO* pMipInfo) const -{ - const Dim3d tailMaxDim = - GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth); - - UINT_32 mipPitch = mip0Width; - UINT_32 mipHeight = mip0Height; - UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1; - UINT_32 offset = 0; - UINT_32 firstMipIdInTail = numMipLevel; - BOOL_32 inTail = FALSE; - BOOL_32 finalDim = FALSE; - BOOL_32 is3dThick = IsThick(resourceType, swizzleMode); - BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE); - - for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++) - { - if (inTail) - { - if (finalDim == FALSE) - { - UINT_32 mipSize; - - if (is3dThick) - { - mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3); - } - else - { - mipSize = mipPitch * mipHeight * (bpp >> 3); - } - - if (mipSize <= 256) - { - UINT_32 index = Log2(bpp >> 3); - - if (is3dThick) - { - mipPitch = Block256_3dZ[index].w; - mipHeight = Block256_3dZ[index].h; - mipDepth = Block256_3dZ[index].d; - } - else - { - mipPitch = Block256_2d[index].w; - mipHeight = Block256_2d[index].h; - } - - finalDim = TRUE; - } - } - } - else - { - inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, - mipPitch, mipHeight, mipDepth); - - if (inTail) - { - firstMipIdInTail = mipId; - mipPitch = tailMaxDim.w; - mipHeight = tailMaxDim.h; - - if (is3dThick) - { - mipDepth = tailMaxDim.d; - } - } - else - { - mipPitch = PowTwoAlign(mipPitch, blockWidth); - mipHeight = PowTwoAlign(mipHeight, blockHeight); - - if (is3dThick) - { - mipDepth = PowTwoAlign(mipDepth, blockDepth); - } - } - } - - if (pMipInfo != NULL) - { - pMipInfo[mipId].pitch = mipPitch; - pMipInfo[mipId].height = mipHeight; - pMipInfo[mipId].depth = mipDepth; - pMipInfo[mipId].offset = offset; - } - - offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3)); - - if (finalDim) - { - if (is3dThin) - { - mipDepth = Max(mipDepth >> 1, 1u); - } - } - else - { - mipPitch = Max(mipPitch >> 1, 1u); - mipHeight = Max(mipHeight >> 1, 1u); - - if (is3dThick || is3dThin) - { - mipDepth = Max(mipDepth >> 1, 1u); - } - } - } - - return firstMipIdInTail; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::GetMetaMiptailInfo -* -* @brief -* Get mip tail coordinate information. -* -* @return -* N/A -************************************************************************************************************************ -*/ -VOID Gfx9Lib::GetMetaMiptailInfo( - ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord - Dim3d mipCoord, ///< [in] mip tail base coord - UINT_32 numMipInTail, ///< [in] number of mips in tail - Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth - ) const -{ - BOOL_32 isThick = (pMetaBlkDim->d > 1); - UINT_32 mipWidth = pMetaBlkDim->w; - UINT_32 mipHeight = pMetaBlkDim->h >> 1; - UINT_32 mipDepth = pMetaBlkDim->d; - UINT_32 minInc; - - if (isThick) - { - minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32); - } - else if (pMetaBlkDim->h >= 1024) - { - minInc = 256; - } - else if (pMetaBlkDim->h == 512) - { - minInc = 128; - } - else - { - minInc = 64; - } - - UINT_32 blk32MipId = 0xFFFFFFFF; - - for (UINT_32 mip = 0; mip < numMipInTail; mip++) - { - pInfo[mip].inMiptail = TRUE; - pInfo[mip].startX = mipCoord.w; - pInfo[mip].startY = mipCoord.h; - pInfo[mip].startZ = mipCoord.d; - pInfo[mip].width = mipWidth; - pInfo[mip].height = mipHeight; - pInfo[mip].depth = mipDepth; - - if (mipWidth <= 32) - { - if (blk32MipId == 0xFFFFFFFF) - { - blk32MipId = mip; - } - - mipCoord.w = pInfo[blk32MipId].startX; - mipCoord.h = pInfo[blk32MipId].startY; - mipCoord.d = pInfo[blk32MipId].startZ; - - switch (mip - blk32MipId) - { - case 0: - mipCoord.w += 32; // 16x16 - break; - case 1: - mipCoord.h += 32; // 8x8 - break; - case 2: - mipCoord.h += 32; // 4x4 - mipCoord.w += 16; - break; - case 3: - mipCoord.h += 32; // 2x2 - mipCoord.w += 32; - break; - case 4: - mipCoord.h += 32; // 1x1 - mipCoord.w += 48; - break; - // The following are for BC/ASTC formats - case 5: - mipCoord.h += 48; // 1/2 x 1/2 - break; - case 6: - mipCoord.h += 48; // 1/4 x 1/4 - mipCoord.w += 16; - break; - case 7: - mipCoord.h += 48; // 1/8 x 1/8 - mipCoord.w += 32; - break; - case 8: - mipCoord.h += 48; // 1/16 x 1/16 - mipCoord.w += 48; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8; - mipHeight = mipWidth; - - if (isThick) - { - mipDepth = mipWidth; - } - } - else - { - if (mipWidth <= minInc) - { - // if we're below the minimal increment... - if (isThick) - { - // For 3d, just go in z direction - mipCoord.d += mipDepth; - } - else - { - // For 2d, first go across, then down - if ((mipWidth * 2) == minInc) - { - // if we're 2 mips below, that's when we go back in x, and down in y - mipCoord.w -= minInc; - mipCoord.h += minInc; - } - else - { - // otherwise, just go across in x - mipCoord.w += minInc; - } - } - } - else - { - // On even mip, go down, otherwise, go across - if (mip & 1) - { - mipCoord.w += mipWidth; - } - else - { - mipCoord.h += mipHeight; - } - } - // Divide the width by 2 - mipWidth >>= 1; - // After the first mip in tail, the mip is always a square - mipHeight = mipWidth; - // ...or for 3d, a cube - if (isThick) - { - mipDepth = mipWidth; - } - } - } -} - -/** -************************************************************************************************************************ -* Gfx9Lib::GetMipStartPos -* -* @brief -* Internal function to get out information about mip logical start position -* -* @return -* logical start position in macro block width/heith/depth of one mip level within one slice -************************************************************************************************************************ -*/ -Dim3d Gfx9Lib::GetMipStartPos( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - UINT_32 width, - UINT_32 height, - UINT_32 depth, - UINT_32 blockWidth, - UINT_32 blockHeight, - UINT_32 blockDepth, - UINT_32 mipId, - UINT_32 log2ElementBytes, - UINT_32* pMipTailBytesOffset) const -{ - Dim3d mipStartPos = {0}; - const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth); - - // Report mip in tail if Mip0 is already in mip tail - BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth); - UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); - UINT_32 mipIndexInTail = mipId; - - if (inMipTail == FALSE) - { - // Mip 0 dimension, unit in block - UINT_32 mipWidthInBlk = width / blockWidth; - UINT_32 mipHeightInBlk = height / blockHeight; - UINT_32 mipDepthInBlk = depth / blockDepth; - AddrMajorMode majorMode = GetMajorMode(resourceType, - swizzleMode, - mipWidthInBlk, - mipHeightInBlk, - mipDepthInBlk); - - UINT_32 endingMip = mipId + 1; - - for (UINT_32 i = 1; i <= mipId; i++) - { - if ((i == 1) || (i == 3)) - { - if (majorMode == ADDR_MAJOR_Y) - { - mipStartPos.w += mipWidthInBlk; - } - else - { - mipStartPos.h += mipHeightInBlk; - } - } - else - { - if (majorMode == ADDR_MAJOR_X) - { - mipStartPos.w += mipWidthInBlk; - } - else if (majorMode == ADDR_MAJOR_Y) - { - mipStartPos.h += mipHeightInBlk; - } - else - { - mipStartPos.d += mipDepthInBlk; - } - } - - BOOL_32 inTail = FALSE; - - if (IsThick(resourceType, swizzleMode)) - { - UINT_32 dim = log2blkSize % 3; - - if (dim == 0) - { - inTail = - (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2); - } - else if (dim == 1) - { - inTail = - (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2); - } - else - { - inTail = - (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1); - } - } - else - { - if (log2blkSize & 1) - { - inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1); - } - else - { - inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2); - } - } - - if (inTail) - { - endingMip = i; - break; - } - - mipWidthInBlk = RoundHalf(mipWidthInBlk); - mipHeightInBlk = RoundHalf(mipHeightInBlk); - mipDepthInBlk = RoundHalf(mipDepthInBlk); - } - - if (mipId >= endingMip) - { - inMipTail = TRUE; - mipIndexInTail = mipId - endingMip; - } - } - - if (inMipTail) - { - UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize; - ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32)); - *pMipTailBytesOffset = MipTailOffset256B[index] << 8; - } - - return mipStartPos; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled -* -* @brief -* Internal function to calculate address from coord for tiled swizzle surface -* -* @return -* ADDR_E_RETURNCODE -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; - localIn.swizzleMode = pIn->swizzleMode; - localIn.flags = pIn->flags; - localIn.resourceType = pIn->resourceType; - localIn.bpp = pIn->bpp; - localIn.width = Max(pIn->unalignedWidth, 1u); - localIn.height = Max(pIn->unalignedHeight, 1u); - localIn.numSlices = Max(pIn->numSlices, 1u); - localIn.numMipLevels = Max(pIn->numMipLevels, 1u); - localIn.numSamples = Max(pIn->numSamples, 1u); - localIn.numFrags = Max(pIn->numFrags, 1u); - if (localIn.numMipLevels <= 1) - { - localIn.pitchInElement = pIn->pitchInElement; - } - - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; - ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut); - - BOOL_32 valid = (returnCode == ADDR_OK) && - (IsThin(pIn->resourceType, pIn->swizzleMode) || - IsThick(pIn->resourceType, pIn->swizzleMode)) && - ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode))); - - if (valid) - { - UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3); - Dim3d mipStartPos = {0}; - UINT_32 mipTailBytesOffset = 0; - - if (pIn->numMipLevels > 1) - { - // Mip-map chain cannot be MSAA surface - ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1)); - - mipStartPos = GetMipStartPos(pIn->resourceType, - pIn->swizzleMode, - localOut.pitch, - localOut.height, - localOut.numSlices, - localOut.blockWidth, - localOut.blockHeight, - localOut.blockSlices, - pIn->mipId, - log2ElementBytes, - &mipTailBytesOffset); - } - - UINT_32 interleaveOffset = 0; - UINT_32 pipeBits = 0; - UINT_32 pipeXor = 0; - UINT_32 bankBits = 0; - UINT_32 bankXor = 0; - - if (IsThin(pIn->resourceType, pIn->swizzleMode)) - { - UINT_32 blockOffset = 0; - UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode); - - if (IsZOrderSwizzle(pIn->swizzleMode)) - { - // Morton generation - if ((log2ElementBytes == 0) || (log2ElementBytes == 2)) - { - UINT_32 totalLowBits = 6 - log2ElementBytes; - UINT_32 mortBits = totalLowBits / 2; - UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits); - // Are 9 bits enough? - UINT_32 highBitsValue = - MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits; - blockOffset = lowBitsValue | highBitsValue; - ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue); - } - else - { - blockOffset = MortonGen2d(pIn->y, pIn->x, 13); - } - - // Fill LSBs with sample bits - if (pIn->numSamples > 1) - { - blockOffset *= pIn->numSamples; - blockOffset |= pIn->sample; - } - - // Shift according to BytesPP - blockOffset <<= log2ElementBytes; - } - else - { - // Micro block offset - UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn); - blockOffset = microBlockOffset; - - // Micro block dimension - ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp); - Dim2d microBlockDim = Block256_2d[log2ElementBytes]; - // Morton generation, does 12 bit enough? - blockOffset |= - MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8; - - // Sample bits start location - UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples); - // Join sample bits information to the highest Macro block bits - if (IsNonPrtXor(pIn->swizzleMode)) - { - // Non-prt-Xor : xor highest Macro block bits with sample bits - blockOffset = blockOffset ^ (pIn->sample << sampleStart); - } - else - { - // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits - // after this op, the blockOffset only contains log2 Macro block size bits - blockOffset %= (1 << sampleStart); - blockOffset |= (pIn->sample << sampleStart); - ADDR_ASSERT((blockOffset >> log2blkSize) == 0); - } - } - - if (IsXor(pIn->swizzleMode)) - { - // Mask off bits above Macro block bits to keep page synonyms working for prt - if (IsPrt(pIn->swizzleMode)) - { - blockOffset &= ((1 << log2blkSize) - 1); - } - - // Preserve offset inside pipe interleave - interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1); - blockOffset >>= m_pipeInterleaveLog2; - - // Pipe/Se xor bits - pipeBits = GetPipeXorBits(log2blkSize); - // Pipe xor - pipeXor = FoldXor2d(blockOffset, pipeBits); - blockOffset >>= pipeBits; - - // Bank xor bits - bankBits = GetBankXorBits(log2blkSize); - // Bank Xor - bankXor = FoldXor2d(blockOffset, bankBits); - blockOffset >>= bankBits; - - // Put all the part back together - blockOffset <<= bankBits; - blockOffset |= bankXor; - blockOffset <<= pipeBits; - blockOffset |= pipeXor; - blockOffset <<= m_pipeInterleaveLog2; - blockOffset |= interleaveOffset; - } - - ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset)); - ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize))); - - blockOffset |= mipTailBytesOffset; - - if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1)) - { - // Apply slice xor if not MSAA/PRT - blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2); - blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) << - (m_pipeInterleaveLog2 + pipeBits)); - } - - returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor, - bankBits, pipeBits, &blockOffset); - - blockOffset %= (1 << log2blkSize); - - UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth; - UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight; - UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock; - UINT_64 macroBlockIndex = - (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock + - ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock + - ((pIn->x / localOut.blockWidth) + mipStartPos.w); - - pOut->addr = blockOffset | (macroBlockIndex << log2blkSize); - } - else - { - UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode); - - Dim3d microBlockDim = Block1K_3d[log2ElementBytes]; - - UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w), - (pIn->y / microBlockDim.h), - (pIn->slice / microBlockDim.d), - 8); - - blockOffset <<= 10; - blockOffset |= ComputeSurface3DMicroBlockOffset(pIn); - - if (IsXor(pIn->swizzleMode)) - { - // Mask off bits above Macro block bits to keep page synonyms working for prt - if (IsPrt(pIn->swizzleMode)) - { - blockOffset &= ((1 << log2blkSize) - 1); - } - - // Preserve offset inside pipe interleave - interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1); - blockOffset >>= m_pipeInterleaveLog2; - - // Pipe/Se xor bits - pipeBits = GetPipeXorBits(log2blkSize); - // Pipe xor - pipeXor = FoldXor3d(blockOffset, pipeBits); - blockOffset >>= pipeBits; - - // Bank xor bits - bankBits = GetBankXorBits(log2blkSize); - // Bank Xor - bankXor = FoldXor3d(blockOffset, bankBits); - blockOffset >>= bankBits; - - // Put all the part back together - blockOffset <<= bankBits; - blockOffset |= bankXor; - blockOffset <<= pipeBits; - blockOffset |= pipeXor; - blockOffset <<= m_pipeInterleaveLog2; - blockOffset |= interleaveOffset; - } - - ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset)); - ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize))); - blockOffset |= mipTailBytesOffset; - - returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor, - bankBits, pipeBits, &blockOffset); - - blockOffset %= (1 << log2blkSize); - - UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w; - UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h; - UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d; - - UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth; - UINT_32 sliceSizeInBlock = - (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock; - UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; - - pOut->addr = blockOffset | (blockIndex << log2blkSize); - } - } - else - { - returnCode = ADDR_INVALIDPARAMS; - } - - return returnCode; -} - -/** -************************************************************************************************************************ -* Gfx9Lib::ComputeSurfaceInfoLinear -* -* @brief -* Internal function to calculate padding for linear swizzle 2D/3D surface -* -* @return -* N/A -************************************************************************************************************************ -*/ -ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture - UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element - UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW - ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - UINT_32 elementBytes = pIn->bpp >> 3; - UINT_32 pitchAlignInElement = 0; - - if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) - { - ADDR_ASSERT(pIn->numMipLevels <= 1); - ADDR_ASSERT(pIn->numSlices <= 1); - pitchAlignInElement = 1; - } - else - { - pitchAlignInElement = (256 / elementBytes); - } - - UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement); - UINT_32 slice0PaddedHeight = pIn->height; - - returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, - &mipChainWidth, &slice0PaddedHeight); - - if (returnCode == ADDR_OK) - { - UINT_32 mipChainHeight = 0; - UINT_32 mipHeight = pIn->height; - - for (UINT_32 i = 0; i < pIn->numMipLevels; i++) - { - if (pMipInfo != NULL) - { - pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes; - pMipInfo[i].pitch = mipChainWidth; - pMipInfo[i].height = mipHeight; - pMipInfo[i].depth = 1; - } - - mipChainHeight += mipHeight; - mipHeight = RoundHalf(mipHeight); - mipHeight = Max(mipHeight, 1u); - } - - *pMipmap0PaddedWidth = mipChainWidth; - *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight; - } - - return returnCode; -} - -} // V2 -} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/gfx9/gfx9addrlib.h mesa-19.0.1/src/amd/addrlib/gfx9/gfx9addrlib.h --- mesa-18.3.3/src/amd/addrlib/gfx9/gfx9addrlib.h 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/gfx9/gfx9addrlib.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,454 +0,0 @@ -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -************************************************************************************************************************ -* @file gfx9addrlib.h -* @brief Contgfx9ns the Gfx9Lib class definition. -************************************************************************************************************************ -*/ - -#ifndef __GFX9_ADDR_LIB_H__ -#define __GFX9_ADDR_LIB_H__ - -#include "addrlib2.h" -#include "coord.h" - -namespace Addr -{ -namespace V2 -{ - -/** -************************************************************************************************************************ -* @brief GFX9 specific settings structure. -************************************************************************************************************************ -*/ -struct Gfx9ChipSettings -{ - struct - { - // Asic/Generation name - UINT_32 isArcticIsland : 1; - UINT_32 isVega10 : 1; - UINT_32 isRaven : 1; - UINT_32 isVega12 : 1; - UINT_32 isVega20 : 1; - - // Display engine IP version name - UINT_32 isDce12 : 1; - UINT_32 isDcn1 : 1; - - // Misc configuration bits - UINT_32 metaBaseAlignFix : 1; - UINT_32 depthPipeXorDisable : 1; - UINT_32 htileAlignFix : 1; - UINT_32 applyAliasFix : 1; - UINT_32 htileCacheRbConflict: 1; - UINT_32 reserved2 : 27; - }; -}; - -/** -************************************************************************************************************************ -* @brief GFX9 data surface type. -************************************************************************************************************************ -*/ -enum Gfx9DataType -{ - Gfx9DataColor, - Gfx9DataDepthStencil, - Gfx9DataFmask -}; - -/** -************************************************************************************************************************ -* @brief GFX9 meta equation parameters -************************************************************************************************************************ -*/ -struct MetaEqParams -{ - UINT_32 maxMip; - UINT_32 elementBytesLog2; - UINT_32 numSamplesLog2; - ADDR2_META_FLAGS metaFlag; - Gfx9DataType dataSurfaceType; - AddrSwizzleMode swizzleMode; - AddrResourceType resourceType; - UINT_32 metaBlkWidthLog2; - UINT_32 metaBlkHeightLog2; - UINT_32 metaBlkDepthLog2; - UINT_32 compBlkWidthLog2; - UINT_32 compBlkHeightLog2; - UINT_32 compBlkDepthLog2; -}; - -/** -************************************************************************************************************************ -* @brief This class is the GFX9 specific address library -* function set. -************************************************************************************************************************ -*/ -class Gfx9Lib : public Lib -{ -public: - /// Creates Gfx9Lib object - static Addr::Lib* CreateObj(const Client* pClient) - { - VOID* pMem = Object::ClientAlloc(sizeof(Gfx9Lib), pClient); - return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL; - } - -protected: - Gfx9Lib(const Client* pClient); - virtual ~Gfx9Lib(); - - virtual BOOL_32 HwlIsStandardSwizzle( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const - { - return m_swizzleModeTable[swizzleMode].isStd || - (IsTex3d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp); - } - - virtual BOOL_32 HwlIsDisplaySwizzle( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const - { - return IsTex2d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp; - } - - virtual BOOL_32 HwlIsThin( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const - { - return ((IsTex2d(resourceType) == TRUE) || - ((IsTex3d(resourceType) == TRUE) && - (m_swizzleModeTable[swizzleMode].isZ == FALSE) && - (m_swizzleModeTable[swizzleMode].isStd == FALSE))); - } - - virtual BOOL_32 HwlIsThick( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode) const - { - return (IsTex3d(resourceType) && - (m_swizzleModeTable[swizzleMode].isZ || m_swizzleModeTable[swizzleMode].isStd)); - } - - virtual ADDR_E_RETURNCODE HwlComputeHtileInfo( - const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, - ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo( - const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, - ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeDccInfo( - const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, - ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( - const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); - - virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( - const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); - - virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr( - const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, - ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); - - virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord( - const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut); - - virtual UINT_32 HwlGetEquationIndex( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const; - - virtual ADDR_E_RETURNCODE HwlComputeThinEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const; - - virtual ADDR_E_RETURNCODE HwlComputeThickEquation( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2, - ADDR_EQUATION* pEquation) const; - - // Get equation table pointer and number of equations - virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const - { - *ppEquationTable = m_equationTable; - - return m_numEquations; - } - - virtual BOOL_32 IsEquationSupported( - AddrResourceType rsrcType, - AddrSwizzleMode swMode, - UINT_32 elementBytesLog2) const; - - UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const - { - UINT_32 baseAlign; - - if (IsXor(swizzleMode)) - { - baseAlign = GetBlockSize(swizzleMode); - } - else - { - baseAlign = 256; - } - - return baseAlign; - } - - virtual ADDR_E_RETURNCODE HwlComputePipeBankXor( - const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor( - const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, - ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern( - const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, - ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting( - const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, - ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled( - const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; - - // Initialize equation table - VOID InitEquationTable(); - - ADDR_E_RETURNCODE ComputeStereoInfo( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut, - UINT_32* pHeightAlign) const; - - UINT_32 GetMipChainInfo( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - UINT_32 bpp, - UINT_32 mip0Width, - UINT_32 mip0Height, - UINT_32 mip0Depth, - UINT_32 blockWidth, - UINT_32 blockHeight, - UINT_32 blockDepth, - UINT_32 numMipLevel, - ADDR2_MIP_INFO* pMipInfo) const; - - VOID GetMetaMiptailInfo( - ADDR2_META_MIP_INFO* pInfo, - Dim3d mipCoord, - UINT_32 numMipInTail, - Dim3d* pMetaBlkDim) const; - - Dim3d GetMipStartPos( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - UINT_32 width, - UINT_32 height, - UINT_32 depth, - UINT_32 blockWidth, - UINT_32 blockHeight, - UINT_32 blockDepth, - UINT_32 mipId, - UINT_32 log2ElementBytes, - UINT_32* pMipTailBytesOffset) const; - - AddrMajorMode GetMajorMode( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - UINT_32 mip0WidthInBlk, - UINT_32 mip0HeightInBlk, - UINT_32 mip0DepthInBlk) const - { - BOOL_32 yMajor = (mip0WidthInBlk < mip0HeightInBlk); - BOOL_32 xMajor = (yMajor == FALSE); - - if (IsThick(resourceType, swizzleMode)) - { - yMajor = yMajor && (mip0HeightInBlk >= mip0DepthInBlk); - xMajor = xMajor && (mip0WidthInBlk >= mip0DepthInBlk); - } - - AddrMajorMode majorMode; - if (xMajor) - { - majorMode = ADDR_MAJOR_X; - } - else if (yMajor) - { - majorMode = ADDR_MAJOR_Y; - } - else - { - majorMode = ADDR_MAJOR_Z; - } - - return majorMode; - } - - Dim3d GetDccCompressBlk( - AddrResourceType resourceType, - AddrSwizzleMode swizzleMode, - UINT_32 bpp) const - { - UINT_32 index = Log2(bpp >> 3); - Dim3d compressBlkDim; - - if (IsThin(resourceType, swizzleMode)) - { - compressBlkDim.w = Block256_2d[index].w; - compressBlkDim.h = Block256_2d[index].h; - compressBlkDim.d = 1; - } - else if (IsStandardSwizzle(resourceType, swizzleMode)) - { - compressBlkDim = Block256_3dS[index]; - } - else - { - compressBlkDim = Block256_3dZ[index]; - } - - return compressBlkDim; - } - - - static const UINT_32 MaxSeLog2 = 3; - static const UINT_32 MaxRbPerSeLog2 = 2; - - static const Dim3d Block256_3dS[MaxNumOfBpp]; - static const Dim3d Block256_3dZ[MaxNumOfBpp]; - - static const UINT_32 MipTailOffset256B[]; - - static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE]; - - // Max number of swizzle mode supported for equation - static const UINT_32 MaxSwMode = 32; - // Max number of resource type (2D/3D) supported for equation - static const UINT_32 MaxRsrcType = 2; - // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp) - static const UINT_32 MaxElementBytesLog2 = 5; - // Almost all swizzle mode + resource type support equation - static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwMode * MaxRsrcType; - // Equation table - ADDR_EQUATION m_equationTable[EquationTableSize]; - - // Number of equation entries in the table - UINT_32 m_numEquations; - // Equation lookup table according to bpp and tile index - UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2]; - - static const UINT_32 MaxCachedMetaEq = 2; - -private: - virtual UINT_32 HwlComputeMaxBaseAlignments() const; - - virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; - - virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn); - - VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const; - - VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType, - AddrSwizzleMode swizzleMode, AddrResourceType resourceType, - UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const; - - VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq, - UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2, - UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType, - AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const; - - VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip, - UINT_32 elementBytesLog2, UINT_32 numSamplesLog2, - ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType, - AddrSwizzleMode swizzleMode, AddrResourceType resourceType, - UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2, - UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2, - UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const; - - const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams); - - virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision); - - VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim, - BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo, - UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth, - UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const; - - BOOL_32 IsValidDisplaySwizzleMode(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; - - ADDR_E_RETURNCODE ComputeSurfaceLinearPadding( - const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, - UINT_32* pMipmap0PaddedWidth, - UINT_32* pSlice0PaddedHeight, - ADDR2_MIP_INFO* pMipInfo = NULL) const; - - Gfx9ChipSettings m_settings; - - CoordEq m_cachedMetaEq[MaxCachedMetaEq]; - MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq]; - UINT_32 m_metaEqOverrideIndex; -}; - -} // V2 -} // Addr - -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/inc/addrinterface.h mesa-19.0.1/src/amd/addrlib/inc/addrinterface.h --- mesa-18.3.3/src/amd/addrlib/inc/addrinterface.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/inc/addrinterface.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,3715 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrinterface.h +* @brief Contains the addrlib interfaces declaration and parameter defines +**************************************************************************************************** +*/ +#ifndef __ADDR_INTERFACE_H__ +#define __ADDR_INTERFACE_H__ + +#include "addrtypes.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define ADDRLIB_VERSION_MAJOR 6 +#define ADDRLIB_VERSION_MINOR 2 +#define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR) + +/// Virtually all interface functions need ADDR_HANDLE as first parameter +typedef VOID* ADDR_HANDLE; + +/// Client handle used in callbacks +typedef VOID* ADDR_CLIENT_HANDLE; + +/** +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* // Callback functions +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)( +* const ADDR_ALLOCSYSMEM_INPUT* pInput); +* typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)( +* VOID* pVirtAddr); +* typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)( +* const ADDR_DEBUGPRINT_INPUT* pInput); +* +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* // Create/Destroy/Config functions +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* AddrCreate() +* AddrDestroy() +* +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* // Surface functions +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* AddrComputeSurfaceInfo() +* AddrComputeSurfaceAddrFromCoord() +* AddrComputeSurfaceCoordFromAddr() +* +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* // HTile functions +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* AddrComputeHtileInfo() +* AddrComputeHtileAddrFromCoord() +* AddrComputeHtileCoordFromAddr() +* +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* // C-mask functions +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* AddrComputeCmaskInfo() +* AddrComputeCmaskAddrFromCoord() +* AddrComputeCmaskCoordFromAddr() +* +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* // F-mask functions +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* AddrComputeFmaskInfo() +* AddrComputeFmaskAddrFromCoord() +* AddrComputeFmaskCoordFromAddr() +* +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* // Element/Utility functions +* ///////////////////////////////////////////////////////////////////////////////////////////////// +* ElemFlt32ToDepthPixel() +* ElemFlt32ToColorPixel() +* AddrExtractBankPipeSwizzle() +* AddrCombineBankPipeSwizzle() +* AddrComputeSliceSwizzle() +* AddrConvertTileInfoToHW() +* AddrConvertTileIndex() +* AddrConvertTileIndex1() +* AddrGetTileIndex() +* AddrComputeBaseSwizzle() +* AddrUseTileIndex() +* AddrUseCombinedSwizzle() +* +**/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Callback functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* @brief channel setting structure +**************************************************************************************************** +*/ +typedef union _ADDR_CHANNEL_SETTING +{ + struct + { + UINT_8 valid : 1; ///< Indicate whehter this channel setting is valid + UINT_8 channel : 2; ///< 0 for x channel, 1 for y channel, 2 for z channel + UINT_8 index : 5; ///< Channel index + }; + UINT_8 value; ///< Value +} ADDR_CHANNEL_SETTING; + +/** +**************************************************************************************************** +* @brief address equation key structure +**************************************************************************************************** +*/ +typedef union _ADDR_EQUATION_KEY +{ + struct + { + UINT_32 log2ElementBytes : 3; ///< Log2 of Bytes per pixel + UINT_32 tileMode : 5; ///< Tile mode + UINT_32 microTileType : 3; ///< Micro tile type + UINT_32 pipeConfig : 5; ///< pipe config + UINT_32 numBanksLog2 : 3; ///< Number of banks log2 + UINT_32 bankWidth : 4; ///< Bank width + UINT_32 bankHeight : 4; ///< Bank height + UINT_32 macroAspectRatio : 3; ///< Macro tile aspect ratio + UINT_32 prt : 1; ///< SI only, indicate whether this equation is for prt + UINT_32 reserved : 1; ///< Reserved bit + } fields; + UINT_32 value; +} ADDR_EQUATION_KEY; + +/** +**************************************************************************************************** +* @brief address equation structure +**************************************************************************************************** +*/ +#define ADDR_MAX_EQUATION_BIT 20u + +// Invalid equation index +#define ADDR_INVALID_EQUATION_INDEX 0xFFFFFFFF + +typedef struct _ADDR_EQUATION +{ + ADDR_CHANNEL_SETTING addr[ADDR_MAX_EQUATION_BIT]; ///< addr setting + ///< each bit is result of addr ^ xor ^ xor2 + ADDR_CHANNEL_SETTING xor1[ADDR_MAX_EQUATION_BIT]; ///< xor setting + ADDR_CHANNEL_SETTING xor2[ADDR_MAX_EQUATION_BIT]; ///< xor2 setting + UINT_32 numBits; ///< The number of bits in equation + BOOL_32 stackedDepthSlices; ///< TRUE if depth slices are treated as being + ///< stacked vertically prior to swizzling +} ADDR_EQUATION; + +/** +**************************************************************************************************** +* @brief Alloc system memory flags. +* @note These flags are reserved for future use and if flags are added will minimize the impact +* of the client. +**************************************************************************************************** +*/ +typedef union _ADDR_ALLOCSYSMEM_FLAGS +{ + struct + { + UINT_32 reserved : 32; ///< Reserved for future use. + } fields; + UINT_32 value; + +} ADDR_ALLOCSYSMEM_FLAGS; + +/** +**************************************************************************************************** +* @brief Alloc system memory input structure +**************************************************************************************************** +*/ +typedef struct _ADDR_ALLOCSYSMEM_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR_ALLOCSYSMEM_FLAGS flags; ///< System memory flags. + UINT_32 sizeInBytes; ///< System memory allocation size in bytes. + ADDR_CLIENT_HANDLE hClient; ///< Client handle +} ADDR_ALLOCSYSMEM_INPUT; + +/** +**************************************************************************************************** +* ADDR_ALLOCSYSMEM +* @brief +* Allocate system memory callback function. Returns valid pointer on success. +**************************************************************************************************** +*/ +typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)( + const ADDR_ALLOCSYSMEM_INPUT* pInput); + +/** +**************************************************************************************************** +* @brief Free system memory input structure +**************************************************************************************************** +*/ +typedef struct _ADDR_FREESYSMEM_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + VOID* pVirtAddr; ///< Virtual address + ADDR_CLIENT_HANDLE hClient; ///< Client handle +} ADDR_FREESYSMEM_INPUT; + +/** +**************************************************************************************************** +* ADDR_FREESYSMEM +* @brief +* Free system memory callback function. +* Returns ADDR_OK on success. +**************************************************************************************************** +*/ +typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)( + const ADDR_FREESYSMEM_INPUT* pInput); + +/** +**************************************************************************************************** +* @brief Print debug message input structure +**************************************************************************************************** +*/ +typedef struct _ADDR_DEBUGPRINT_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + CHAR* pDebugString; ///< Debug print string + va_list ap; ///< Variable argument list + ADDR_CLIENT_HANDLE hClient; ///< Client handle +} ADDR_DEBUGPRINT_INPUT; + +/** +**************************************************************************************************** +* ADDR_DEBUGPRINT +* @brief +* Print debug message callback function. +* Returns ADDR_OK on success. +**************************************************************************************************** +*/ +typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)( + const ADDR_DEBUGPRINT_INPUT* pInput); + +/** +**************************************************************************************************** +* ADDR_CALLBACKS +* +* @brief +* Address Library needs client to provide system memory alloc/free routines. +**************************************************************************************************** +*/ +typedef struct _ADDR_CALLBACKS +{ + ADDR_ALLOCSYSMEM allocSysMem; ///< Routine to allocate system memory + ADDR_FREESYSMEM freeSysMem; ///< Routine to free system memory + ADDR_DEBUGPRINT debugPrint; ///< Routine to print debug message +} ADDR_CALLBACKS; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Create/Destroy functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* ADDR_CREATE_FLAGS +* +* @brief +* This structure is used to pass some setup in creation of AddrLib +* @note +**************************************************************************************************** +*/ +typedef union _ADDR_CREATE_FLAGS +{ + struct + { + UINT_32 noCubeMipSlicesPad : 1; ///< Turn cubemap faces padding off + UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and + /// output structure + UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid + UINT_32 useCombinedSwizzle : 1; ///< Use combined tile swizzle + UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level + UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment + UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize + UINT_32 reserved : 25; ///< Reserved bits for future use + }; + + UINT_32 value; +} ADDR_CREATE_FLAGS; + +/** +**************************************************************************************************** +* ADDR_REGISTER_VALUE +* +* @brief +* Data from registers to setup AddrLib global data, used in AddrCreate +**************************************************************************************************** +*/ +typedef struct _ADDR_REGISTER_VALUE +{ + UINT_32 gbAddrConfig; ///< For R8xx, use GB_ADDR_CONFIG register value. + /// For R6xx/R7xx, use GB_TILING_CONFIG. + /// But they can be treated as the same. + /// if this value is 0, use chip to set default value + UINT_32 backendDisables; ///< 1 bit per backend, starting with LSB. 1=disabled,0=enabled. + /// Register value of CC_RB_BACKEND_DISABLE.BACKEND_DISABLE + + /// R800 registers----------------------------------------------- + UINT_32 noOfBanks; ///< Number of h/w ram banks - For r800: MC_ARB_RAMCFG.NOOFBANK + /// No enums for this value in h/w header files + /// 0: 4 + /// 1: 8 + /// 2: 16 + UINT_32 noOfRanks; /// MC_ARB_RAMCFG.NOOFRANK + /// 0: 1 + /// 1: 2 + /// SI (R1000) registers----------------------------------------- + const UINT_32* pTileConfig; ///< Global tile setting tables + UINT_32 noOfEntries; ///< Number of entries in pTileConfig + + ///< CI registers------------------------------------------------- + const UINT_32* pMacroTileConfig; ///< Global macro tile mode table + UINT_32 noOfMacroEntries; ///< Number of entries in pMacroTileConfig + + ///< GFX9 HW parameters + UINT_32 blockVarSizeLog2; ///< SW_VAR_* block size +} ADDR_REGISTER_VALUE; + +/** +**************************************************************************************************** +* ADDR_CREATE_INPUT +* +* @brief +* Parameters use to create an AddrLib Object. Caller must provide all fields. +* +**************************************************************************************************** +*/ +typedef struct _ADDR_CREATE_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 chipEngine; ///< Chip Engine + UINT_32 chipFamily; ///< Chip Family + UINT_32 chipRevision; ///< Chip Revision + ADDR_CALLBACKS callbacks; ///< Callbacks for sysmem alloc/free/print + ADDR_CREATE_FLAGS createFlags; ///< Flags to setup AddrLib + ADDR_REGISTER_VALUE regValue; ///< Data from registers to setup AddrLib global data + ADDR_CLIENT_HANDLE hClient; ///< Client handle + UINT_32 minPitchAlignPixels; ///< Minimum pitch alignment in pixels +} ADDR_CREATE_INPUT; + +/** +**************************************************************************************************** +* ADDR_CREATEINFO_OUTPUT +* +* @brief +* Return AddrLib handle to client driver +* +**************************************************************************************************** +*/ +typedef struct _ADDR_CREATE_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR_HANDLE hLib; ///< Address lib handle + + UINT_32 numEquations; ///< Number of equations in the table + const ADDR_EQUATION* pEquationTable; ///< Pointer to the equation table +} ADDR_CREATE_OUTPUT; + +/** +**************************************************************************************************** +* AddrCreate +* +* @brief +* Create AddrLib object, must be called before any interface calls +* +* @return +* ADDR_OK if successful +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrCreate( + const ADDR_CREATE_INPUT* pAddrCreateIn, + ADDR_CREATE_OUTPUT* pAddrCreateOut); + +/** +**************************************************************************************************** +* AddrDestroy +* +* @brief +* Destroy AddrLib object, must be called to free internally allocated resources. +* +* @return +* ADDR_OK if successful +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrDestroy( + ADDR_HANDLE hLib); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* @brief +* Bank/tiling parameters. On function input, these can be set as desired or +* left 0 for AddrLib to calculate/default. On function output, these are the actual +* parameters used. +* @note +* Valid bankWidth/bankHeight value: +* 1,2,4,8. They are factors instead of pixels or bytes. +* +* The bank number remains constant across each row of the +* macro tile as each pipe is selected, so the number of +* tiles in the x direction with the same bank number will +* be bank_width * num_pipes. +**************************************************************************************************** +*/ +typedef struct _ADDR_TILEINFO +{ + /// Any of these parameters can be set to 0 to use the HW default. + UINT_32 banks; ///< Number of banks, numerical value + UINT_32 bankWidth; ///< Number of tiles in the X direction in the same bank + UINT_32 bankHeight; ///< Number of tiles in the Y direction in the same bank + UINT_32 macroAspectRatio; ///< Macro tile aspect ratio. 1-1:1, 2-4:1, 4-16:1, 8-64:1 + UINT_32 tileSplitBytes; ///< Tile split size, in bytes + AddrPipeCfg pipeConfig; ///< Pipe Config = HW enum + 1 +} ADDR_TILEINFO; + +// Create a define to avoid client change. The removal of R800 is because we plan to implement SI +// within 800 HWL - An AddrPipeCfg is added in above data structure +typedef ADDR_TILEINFO ADDR_R800_TILEINFO; + +/** +**************************************************************************************************** +* @brief +* Information needed by quad buffer stereo support +**************************************************************************************************** +*/ +typedef struct _ADDR_QBSTEREOINFO +{ + UINT_32 eyeHeight; ///< Height (in pixel rows) to right eye + UINT_32 rightOffset; ///< Offset (in bytes) to right eye + UINT_32 rightSwizzle; ///< TileSwizzle for right eyes +} ADDR_QBSTEREOINFO; + +/** +**************************************************************************************************** +* ADDR_SURFACE_FLAGS +* +* @brief +* Surface flags +**************************************************************************************************** +*/ +typedef union _ADDR_SURFACE_FLAGS +{ + struct + { + UINT_32 color : 1; ///< Flag indicates this is a color buffer + UINT_32 depth : 1; ///< Flag indicates this is a depth/stencil buffer + UINT_32 stencil : 1; ///< Flag indicates this is a stencil buffer + UINT_32 texture : 1; ///< Flag indicates this is a texture + UINT_32 cube : 1; ///< Flag indicates this is a cubemap + UINT_32 volume : 1; ///< Flag indicates this is a volume texture + UINT_32 fmask : 1; ///< Flag indicates this is an fmask + UINT_32 cubeAsArray : 1; ///< Flag indicates if treat cubemap as arrays + UINT_32 compressZ : 1; ///< Flag indicates z buffer is compressed + UINT_32 overlay : 1; ///< Flag indicates this is an overlay surface + UINT_32 noStencil : 1; ///< Flag indicates this depth has no separate stencil + UINT_32 display : 1; ///< Flag indicates this should match display controller req. + UINT_32 opt4Space : 1; ///< Flag indicates this surface should be optimized for space + /// i.e. save some memory but may lose performance + UINT_32 prt : 1; ///< Flag for partially resident texture + UINT_32 qbStereo : 1; ///< Quad buffer stereo surface + UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0) + UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding + UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable + UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce + UINT_32 dccCompatible : 1; ///< VI: whether to make MSAA surface support dcc fast clear + UINT_32 dccPipeWorkaround : 1; ///< VI: whether to workaround the HW limit that + /// dcc can't be enabled if pipe config of tile mode + /// is different from that of ASIC, this flag + /// is address lib internal flag, client should ignore it + UINT_32 czDispCompatible : 1; ///< SI+: CZ family has a HW bug needs special alignment. + /// This flag indicates we need to follow the + /// alignment with CZ families or other ASICs under + /// PX configuration + CZ. + UINT_32 nonSplit : 1; ///< CI: depth texture should not be split + UINT_32 disableLinearOpt : 1; ///< Disable tile mode optimization to linear + UINT_32 needEquation : 1; ///< Make the surface tile setting equation compatible. + /// This flag indicates we need to override tile + /// mode to PRT_* tile mode to disable slice rotation, + /// which is needed by swizzle pattern equation. + UINT_32 skipIndicesOutput : 1; ///< Skipping indices in output. + UINT_32 rotateDisplay : 1; ///< Rotate micro tile type + UINT_32 minimizeAlignment : 1; ///< Minimize alignment + UINT_32 preferEquation : 1; ///< Return equation index without adjusting tile mode + UINT_32 matchStencilTileCfg : 1; ///< Select tile index of stencil as well as depth surface + /// to make sure they share same tile config parameters + UINT_32 disallowLargeThickDegrade : 1; ///< Disallow large thick tile degrade + UINT_32 reserved : 1; ///< Reserved bits + }; + + UINT_32 value; +} ADDR_SURFACE_FLAGS; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_SURFACE_INFO_INPUT +* +* @brief +* Input structure for AddrComputeSurfaceInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrTileMode tileMode; ///< Tile mode + AddrFormat format; ///< If format is set to valid one, bpp/width/height + /// might be overwritten + UINT_32 bpp; ///< Bits per pixel + UINT_32 numSamples; ///< Number of samples + UINT_32 width; ///< Width, in pixels + UINT_32 height; ///< Height, in pixels + UINT_32 numSlices; ///< Number of surface slices or depth + UINT_32 slice; ///< Slice index + UINT_32 mipLevel; ///< Current mipmap level + UINT_32 numMipLevels; ///< Number of mips in mip chain + ADDR_SURFACE_FLAGS flags; ///< Surface type flags + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + /// r800 and later HWL parameters + // Needed by 2D tiling, for linear and 1D tiling, just keep them 0's + ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Set to 0 to default/calculate + AddrTileType tileType; ///< Micro tiling type, not needed when tileIndex != -1 + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + UINT_32 basePitch; ///< Base level pitch in pixels, 0 means ignored, is a + /// must for mip levels from SI+. + /// Don't use pitch in blocks for compressed formats! + UINT_32 maxBaseAlign; ///< Max base alignment request from client + UINT_32 pitchAlign; ///< Pitch alignment request from client + UINT_32 heightAlign; ///< Height alignment request from client +} ADDR_COMPUTE_SURFACE_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_SURFACE_INFO_OUTPUT +* +* @brief +* Output structure for AddrComputeSurfInfo +* @note + Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch + Pixel: Original pixel +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 pitch; ///< Pitch in elements (in blocks for compressed formats) + UINT_32 height; ///< Height in elements (in blocks for compressed formats) + UINT_32 depth; ///< Number of slice/depth + UINT_64 surfSize; ///< Surface size in bytes + AddrTileMode tileMode; ///< Actual tile mode. May differ from that in input + UINT_32 baseAlign; ///< Base address alignment + UINT_32 pitchAlign; ///< Pitch alignment, in elements + UINT_32 heightAlign; ///< Height alignment, in elements + UINT_32 depthAlign; ///< Depth alignment, aligned to thickness, for 3d texture + UINT_32 bpp; ///< Bits per elements (e.g. blocks for BCn, 1/3 for 96bit) + UINT_32 pixelPitch; ///< Pitch in original pixels + UINT_32 pixelHeight; ///< Height in original pixels + UINT_32 pixelBits; ///< Original bits per pixel, passed from input + UINT_64 sliceSize; ///< Size of slice specified by input's slice + /// The result is controlled by surface flags & createFlags + /// By default this value equals to surfSize for volume + UINT_32 pitchTileMax; ///< PITCH_TILE_MAX value for h/w register + UINT_32 heightTileMax; ///< HEIGHT_TILE_MAX value for h/w register + UINT_32 sliceTileMax; ///< SLICE_TILE_MAX value for h/w register + + UINT_32 numSamples; ///< Pass the effective numSamples processed in this call + + /// r800 and later HWL parameters + ADDR_TILEINFO* pTileInfo; ///< Tile parameters used. Filled in if 0 on input + AddrTileType tileType; ///< Micro tiling type, only valid when tileIndex != -1 + INT_32 tileIndex; ///< Tile index, MAY be "downgraded" + + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + /// Output flags + struct + { + /// Special information to work around SI mipmap swizzle bug UBTS #317508 + UINT_32 last2DLevel : 1; ///< TRUE if this is the last 2D(3D) tiled + ///< Only meaningful when create flag checkLast2DLevel is set + UINT_32 tcCompatible : 1; ///< If the surface can be shader compatible + UINT_32 dccUnsupport : 1; ///< If the surface can support DCC compressed rendering + UINT_32 prtTileIndex : 1; ///< SI only, indicate the returned tile index is for PRT + ///< If address lib return true for mip 0, client should set prt flag + ///< for child mips in subsequent compute surface info calls + UINT_32 reserved :28; ///< Reserved bits + }; + + UINT_32 equationIndex; ///< Equation index in the equation table; + + UINT_32 blockWidth; ///< Width in element inside one block(1D->Micro, 2D->Macro) + UINT_32 blockHeight; ///< Height in element inside one block(1D->Micro, 2D->Macro) + UINT_32 blockSlices; ///< Slice number inside one block(1D->Micro, 2D->Macro) + + /// Stereo info + ADDR_QBSTEREOINFO* pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE + + INT_32 stencilTileIdx; ///< stencil tile index output when matchStencilTileCfg was set +} ADDR_COMPUTE_SURFACE_INFO_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeSurfaceInfo +* +* @brief +* Compute surface width/height/depth/alignments and suitable tiling mode +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for AddrComputeSurfaceAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Slice index + UINT_32 sample; ///< Sample index, use fragment index for EQAA + + UINT_32 bpp; ///< Bits per pixel + UINT_32 pitch; ///< Surface pitch, in pixels + UINT_32 height; ///< Surface height, in pixels + UINT_32 numSlices; ///< Surface depth + UINT_32 numSamples; ///< Number of samples + + AddrTileMode tileMode; ///< Tile mode + BOOL_32 isDepth; ///< TRUE if the surface uses depth sample ordering within + /// micro tile. Textures can also choose depth sample order + UINT_32 tileBase; ///< Base offset (in bits) inside micro tile which handles + /// the case that components are stored separately + UINT_32 compBits; ///< The component bits actually needed(for planar surface) + + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + /// r800 and later HWL parameters + // Used for 1D tiling above + AddrTileType tileType; ///< See defintion of AddrTileType + struct + { + UINT_32 ignoreSE : 1; ///< TRUE if shader engines are ignored. This is texture + /// only flag. Only non-RT texture can set this to TRUE + UINT_32 reserved :31; ///< Reserved for future use. + }; + // 2D tiling needs following structure + ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + union + { + struct + { + UINT_32 bankSwizzle; ///< Bank swizzle + UINT_32 pipeSwizzle; ///< Pipe swizzle + }; + UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE + }; +} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for AddrComputeSurfaceAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Byte address + UINT_32 bitPosition; ///< Bit position within surfaceAddr, 0-7. + /// For surface bpp < 8, e.g. FMT_1. + UINT_32 prtBlockIndex; ///< Index of a PRT tile (64K block) +} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeSurfaceAddrFromCoord +* +* @brief +* Compute surface address from a given coordinate. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT +* +* @brief +* Input structure for AddrComputeSurfaceCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Address in bytes + UINT_32 bitPosition; ///< Bit position in addr. 0-7. for surface bpp < 8, + /// e.g. FMT_1; + UINT_32 bpp; ///< Bits per pixel + UINT_32 pitch; ///< Pitch, in pixels + UINT_32 height; ///< Height in pixels + UINT_32 numSlices; ///< Surface depth + UINT_32 numSamples; ///< Number of samples + + AddrTileMode tileMode; ///< Tile mode + BOOL_32 isDepth; ///< Surface uses depth sample ordering within micro tile. + /// Note: Textures can choose depth sample order as well. + UINT_32 tileBase; ///< Base offset (in bits) inside micro tile which handles + /// the case that components are stored separately + UINT_32 compBits; ///< The component bits actually needed(for planar surface) + + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + /// r800 and later HWL parameters + // Used for 1D tiling above + AddrTileType tileType; ///< See defintion of AddrTileType + struct + { + UINT_32 ignoreSE : 1; ///< TRUE if shader engines are ignored. This is texture + /// only flag. Only non-RT texture can set this to TRUE + UINT_32 reserved :31; ///< Reserved for future use. + }; + // 2D tiling needs following structure + ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + union + { + struct + { + UINT_32 bankSwizzle; ///< Bank swizzle + UINT_32 pipeSwizzle; ///< Pipe swizzle + }; + UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE + }; +} ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT +* +* @brief +* Output structure for AddrComputeSurfaceCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Index of slices + UINT_32 sample; ///< Index of samples, means fragment index for EQAA +} ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeSurfaceCoordFromAddr +* +* @brief +* Compute coordinate from a given surface address +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// HTile functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* ADDR_HTILE_FLAGS +* +* @brief +* HTILE flags +**************************************************************************************************** +*/ +typedef union _ADDR_HTILE_FLAGS +{ + struct + { + UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable + UINT_32 skipTcCompatSizeAlign : 1; ///< Flag indicates that addrLib will not align htile + /// size to 256xBankxPipe when computing tc-compatible + /// htile info. + UINT_32 reserved : 30; ///< Reserved bits + }; + + UINT_32 value; +} ADDR_HTILE_FLAGS; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_HTILE_INFO_INPUT +* +* @brief +* Input structure of AddrComputeHtileInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_HTILE_INFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR_HTILE_FLAGS flags; ///< HTILE flags + UINT_32 pitch; ///< Surface pitch, in pixels + UINT_32 height; ///< Surface height, in pixels + UINT_32 numSlices; ///< Number of slices + BOOL_32 isLinear; ///< Linear or tiled HTILE layout + AddrHtileBlockSize blockWidth; ///< 4 or 8. EG above only support 8 + AddrHtileBlockSize blockHeight; ///< 4 or 8. EG above only support 8 + ADDR_TILEINFO* pTileInfo; ///< Tile info + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_COMPUTE_HTILE_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_HTILE_INFO_OUTPUT +* +* @brief +* Output structure of AddrComputeHtileInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 pitch; ///< Pitch in pixels of depth buffer represented in this + /// HTile buffer. This might be larger than original depth + /// buffer pitch when called with an unaligned pitch. + UINT_32 height; ///< Height in pixels, as above + UINT_64 htileBytes; ///< Size of HTILE buffer, in bytes + UINT_32 baseAlign; ///< Base alignment + UINT_32 bpp; ///< Bits per pixel for HTILE is how many bits for an 8x8 block! + UINT_32 macroWidth; ///< Macro width in pixels, actually squared cache shape + UINT_32 macroHeight; ///< Macro height in pixels + UINT_64 sliceSize; ///< Slice size, in bytes. + BOOL_32 sliceInterleaved; ///< Flag to indicate if different slice's htile is interleaved + /// Compute engine clear can't be used if htile is interleaved + BOOL_32 nextMipLevelCompressible; ///< Flag to indicate whether HTILE can be enabled in + /// next mip level, it also indicates if memory set based + /// fast clear can be used for current mip level. +} ADDR_COMPUTE_HTILE_INFO_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeHtileInfo +* +* @brief +* Compute Htile pitch, height, base alignment and size in bytes +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, + ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for AddrComputeHtileAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 pitch; ///< Pitch, in pixels + UINT_32 height; ///< Height in pixels + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Index of slice + UINT_32 numSlices; ///< Number of slices + BOOL_32 isLinear; ///< Linear or tiled HTILE layout + ADDR_HTILE_FLAGS flags; ///< htile flags + AddrHtileBlockSize blockWidth; ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8 + AddrHtileBlockSize blockHeight; ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8 + ADDR_TILEINFO* pTileInfo; ///< Tile info + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid + UINT_32 bpp; ///< depth/stencil buffer bit per pixel size + UINT_32 zStencilAddr; ///< tcCompatible Z/Stencil surface address +} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for AddrComputeHtileAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Address in bytes + UINT_32 bitPosition; ///< Bit position, 0 or 4. CMASK and HTILE shares some lib method. + /// So we keep bitPosition for HTILE as well +} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeHtileAddrFromCoord +* +* @brief +* Compute Htile address according to coordinates (of depth buffer) +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT +* +* @brief +* Input structure for AddrComputeHtileCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Address + UINT_32 bitPosition; ///< Bit position 0 or 4. CMASK and HTILE share some methods + /// so we keep bitPosition for HTILE as well + UINT_32 pitch; ///< Pitch, in pixels + UINT_32 height; ///< Height, in pixels + UINT_32 numSlices; ///< Number of slices + BOOL_32 isLinear; ///< Linear or tiled HTILE layout + AddrHtileBlockSize blockWidth; ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8 + AddrHtileBlockSize blockHeight; ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8 + ADDR_TILEINFO* pTileInfo; ///< Tile info + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT +* +* @brief +* Output structure for AddrComputeHtileCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Slice index +} ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeHtileCoordFromAddr +* +* @brief +* Compute coordinates within depth buffer (1st pixel of a micro tile) according to +* Htile address +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// C-mask functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* ADDR_CMASK_FLAGS +* +* @brief +* CMASK flags +**************************************************************************************************** +*/ +typedef union _ADDR_CMASK_FLAGS +{ + struct + { + UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable + UINT_32 reserved :31; ///< Reserved bits + }; + + UINT_32 value; +} ADDR_CMASK_FLAGS; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_CMASK_INFO_INPUT +* +* @brief +* Input structure of AddrComputeCmaskInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_CMASKINFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR_CMASK_FLAGS flags; ///< CMASK flags + UINT_32 pitch; ///< Pitch, in pixels, of color buffer + UINT_32 height; ///< Height, in pixels, of color buffer + UINT_32 numSlices; ///< Number of slices, of color buffer + BOOL_32 isLinear; ///< Linear or tiled layout, Only SI can be linear + ADDR_TILEINFO* pTileInfo; ///< Tile info + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_COMPUTE_CMASK_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_CMASK_INFO_OUTPUT +* +* @brief +* Output structure of AddrComputeCmaskInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_CMASK_INFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 pitch; ///< Pitch in pixels of color buffer which + /// this Cmask matches. The size might be larger than + /// original color buffer pitch when called with + /// an unaligned pitch. + UINT_32 height; ///< Height in pixels, as above + UINT_64 cmaskBytes; ///< Size in bytes of CMask buffer + UINT_32 baseAlign; ///< Base alignment + UINT_32 blockMax; ///< Cmask block size. Need this to set CB_COLORn_MASK register + UINT_32 macroWidth; ///< Macro width in pixels, actually squared cache shape + UINT_32 macroHeight; ///< Macro height in pixels + UINT_64 sliceSize; ///< Slice size, in bytes. +} ADDR_COMPUTE_CMASK_INFO_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeCmaskInfo +* +* @brief +* Compute Cmask pitch, height, base alignment and size in bytes from color buffer +* info +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for AddrComputeCmaskAddrFromCoord +* +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_64 fmaskAddr; ///< Fmask addr for tc compatible Cmask + UINT_32 slice; ///< Slice index + UINT_32 pitch; ///< Pitch in pixels, of color buffer + UINT_32 height; ///< Height in pixels, of color buffer + UINT_32 numSlices; ///< Number of slices + UINT_32 bpp; + BOOL_32 isLinear; ///< Linear or tiled layout, Only SI can be linear + ADDR_CMASK_FLAGS flags; ///< CMASK flags + ADDR_TILEINFO* pTileInfo; ///< Tile info + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + ///< while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for AddrComputeCmaskAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< CMASK address in bytes + UINT_32 bitPosition; ///< Bit position within addr, 0-7. CMASK is 4 bpp, + /// so the address may be located in bit 0 (0) or 4 (4) +} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeCmaskAddrFromCoord +* +* @brief +* Compute Cmask address according to coordinates (of MSAA color buffer) +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT +* +* @brief +* Input structure for AddrComputeCmaskCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< CMASK address in bytes + UINT_32 bitPosition; ///< Bit position within addr, 0-7. CMASK is 4 bpp, + /// so the address may be located in bit 0 (0) or 4 (4) + UINT_32 pitch; ///< Pitch, in pixels + UINT_32 height; ///< Height in pixels + UINT_32 numSlices; ///< Number of slices + BOOL_32 isLinear; ///< Linear or tiled layout, Only SI can be linear + ADDR_TILEINFO* pTileInfo; ///< Tile info + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT +* +* @brief +* Output structure for AddrComputeCmaskCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Slice index +} ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeCmaskCoordFromAddr +* +* @brief +* Compute coordinates within color buffer (1st pixel of a micro tile) according to +* Cmask address +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// F-mask functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* ADDR_COMPUTE_FMASK_INFO_INPUT +* +* @brief +* Input structure for AddrComputeFmaskInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_FMASK_INFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrTileMode tileMode; ///< Tile mode + UINT_32 pitch; ///< Surface pitch, in pixels + UINT_32 height; ///< Surface height, in pixels + UINT_32 numSlices; ///< Number of slice/depth + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + /// r800 and later HWL parameters + struct + { + UINT_32 resolved: 1; ///< TRUE if the surface is for resolved fmask, only used + /// by H/W clients. S/W should always set it to FALSE. + UINT_32 reserved: 31; ///< Reserved for future use. + }; + ADDR_TILEINFO* pTileInfo; ///< 2D tiling parameters. Clients must give valid data + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 +} ADDR_COMPUTE_FMASK_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_FMASK_INFO_OUTPUT +* +* @brief +* Output structure for AddrComputeFmaskInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_FMASK_INFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 pitch; ///< Pitch of fmask in pixels + UINT_32 height; ///< Height of fmask in pixels + UINT_32 numSlices; ///< Slices of fmask + UINT_64 fmaskBytes; ///< Size of fmask in bytes + UINT_32 baseAlign; ///< Base address alignment + UINT_32 pitchAlign; ///< Pitch alignment + UINT_32 heightAlign; ///< Height alignment + UINT_32 bpp; ///< Bits per pixel of FMASK is: number of bit planes + UINT_32 numSamples; ///< Number of samples, used for dump, export this since input + /// may be changed in 9xx and above + /// r800 and later HWL parameters + ADDR_TILEINFO* pTileInfo; ///< Tile parameters used. Fmask can have different + /// bank_height from color buffer + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + UINT_64 sliceSize; ///< Size of slice in bytes +} ADDR_COMPUTE_FMASK_INFO_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeFmaskInfo +* +* @brief +* Compute Fmask pitch/height/depth/alignments and size in bytes +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for AddrComputeFmaskAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Slice index + UINT_32 plane; ///< Plane number + UINT_32 sample; ///< Sample index (fragment index for EQAA) + + UINT_32 pitch; ///< Surface pitch, in pixels + UINT_32 height; ///< Surface height, in pixels + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + + AddrTileMode tileMode; ///< Tile mode + union + { + struct + { + UINT_32 bankSwizzle; ///< Bank swizzle + UINT_32 pipeSwizzle; ///< Pipe swizzle + }; + UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE + }; + + /// r800 and later HWL parameters + struct + { + UINT_32 resolved: 1; ///< TRUE if this is a resolved fmask, used by H/W clients + UINT_32 ignoreSE: 1; ///< TRUE if shader engines are ignored. + UINT_32 reserved: 30; ///< Reserved for future use. + }; + ADDR_TILEINFO* pTileInfo; ///< 2D tiling parameters. Client must provide all data + +} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for AddrComputeFmaskAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Fmask address + UINT_32 bitPosition; ///< Bit position within fmaskAddr, 0-7. +} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeFmaskAddrFromCoord +* +* @brief +* Compute Fmask address according to coordinates (x,y,slice,sample,plane) +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT +* +* @brief +* Input structure for AddrComputeFmaskCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Address + UINT_32 bitPosition; ///< Bit position within addr, 0-7. + + UINT_32 pitch; ///< Pitch, in pixels + UINT_32 height; ///< Height in pixels + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments + AddrTileMode tileMode; ///< Tile mode + union + { + struct + { + UINT_32 bankSwizzle; ///< Bank swizzle + UINT_32 pipeSwizzle; ///< Pipe swizzle + }; + UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE + }; + + /// r800 and later HWL parameters + struct + { + UINT_32 resolved: 1; ///< TRUE if this is a resolved fmask, used by HW components + UINT_32 ignoreSE: 1; ///< TRUE if shader engines are ignored. + UINT_32 reserved: 30; ///< Reserved for future use. + }; + ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data + +} ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT +* +* @brief +* Output structure for AddrComputeFmaskCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Slice index + UINT_32 plane; ///< Plane number + UINT_32 sample; ///< Sample index (fragment index for EQAA) +} ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeFmaskCoordFromAddr +* +* @brief +* Compute FMASK coordinate from an given address +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Element/utility functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* AddrGetVersion +* +* @brief +* Get AddrLib version number +**************************************************************************************************** +*/ +UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib); + +/** +**************************************************************************************************** +* AddrUseTileIndex +* +* @brief +* Return TRUE if tileIndex is enabled in this address library +**************************************************************************************************** +*/ +BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib); + +/** +**************************************************************************************************** +* AddrUseCombinedSwizzle +* +* @brief +* Return TRUE if combined swizzle is enabled in this address library +**************************************************************************************************** +*/ +BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib); + +/** +**************************************************************************************************** +* ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT +* +* @brief +* Input structure of AddrExtractBankPipeSwizzle +**************************************************************************************************** +*/ +typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 base256b; ///< Base256b value + + /// r800 and later HWL parameters + ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT; + +/** +**************************************************************************************************** +* ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT +* +* @brief +* Output structure of AddrExtractBankPipeSwizzle +**************************************************************************************************** +*/ +typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 bankSwizzle; ///< Bank swizzle + UINT_32 pipeSwizzle; ///< Pipe swizzle +} ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT; + +/** +**************************************************************************************************** +* AddrExtractBankPipeSwizzle +* +* @brief +* Extract Bank and Pipe swizzle from base256b +* @return +* ADDR_OK if no error +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle( + ADDR_HANDLE hLib, + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT +* +* @brief +* Input structure of AddrCombineBankPipeSwizzle +**************************************************************************************************** +*/ +typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 bankSwizzle; ///< Bank swizzle + UINT_32 pipeSwizzle; ///< Pipe swizzle + UINT_64 baseAddr; ///< Base address (leave it zero for driver clients) + + /// r800 and later HWL parameters + ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT +* +* @brief +* Output structure of AddrCombineBankPipeSwizzle +**************************************************************************************************** +*/ +typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 tileSwizzle; ///< Combined swizzle +} ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT; + +/** +**************************************************************************************************** +* AddrCombineBankPipeSwizzle +* +* @brief +* Combine Bank and Pipe swizzle +* @return +* ADDR_OK if no error +* @note +* baseAddr here is full MCAddress instead of base256b +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle( + ADDR_HANDLE hLib, + const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_COMPUTE_SLICESWIZZLE_INPUT +* +* @brief +* Input structure of AddrComputeSliceSwizzle +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_SLICESWIZZLE_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrTileMode tileMode; ///< Tile Mode + UINT_32 baseSwizzle; ///< Base tile swizzle + UINT_32 slice; ///< Slice index + UINT_64 baseAddr; ///< Base address, driver should leave it 0 in most cases + + /// r800 and later HWL parameters + ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Actually banks needed here! + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_COMPUTE_SLICESWIZZLE_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_SLICESWIZZLE_OUTPUT +* +* @brief +* Output structure of AddrComputeSliceSwizzle +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_SLICESWIZZLE_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 tileSwizzle; ///< Recalculated tileSwizzle value +} ADDR_COMPUTE_SLICESWIZZLE_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeSliceSwizzle +* +* @brief +* Extract Bank and Pipe swizzle from base256b +* @return +* ADDR_OK if no error +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut); + +/** +**************************************************************************************************** +* AddrSwizzleGenOption +* +* @brief +* Which swizzle generating options: legacy or linear +**************************************************************************************************** +*/ +typedef enum _AddrSwizzleGenOption +{ + ADDR_SWIZZLE_GEN_DEFAULT = 0, ///< As is in client driver implemention for swizzle + ADDR_SWIZZLE_GEN_LINEAR = 1, ///< Using a linear increment of swizzle +} AddrSwizzleGenOption; + +/** +**************************************************************************************************** +* AddrSwizzleOption +* +* @brief +* Controls how swizzle is generated +**************************************************************************************************** +*/ +typedef union _ADDR_SWIZZLE_OPTION +{ + struct + { + UINT_32 genOption : 1; ///< The way swizzle is generated, see AddrSwizzleGenOption + UINT_32 reduceBankBit : 1; ///< TRUE if we need reduce swizzle bits + UINT_32 reserved :30; ///< Reserved bits + }; + + UINT_32 value; + +} ADDR_SWIZZLE_OPTION; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_BASE_SWIZZLE_INPUT +* +* @brief +* Input structure of AddrComputeBaseSwizzle +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR_SWIZZLE_OPTION option; ///< Swizzle option + UINT_32 surfIndex; ///< Index of this surface type + AddrTileMode tileMode; ///< Tile Mode + + /// r800 and later HWL parameters + ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Actually banks needed here! + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_COMPUTE_BASE_SWIZZLE_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT +* +* @brief +* Output structure of AddrComputeBaseSwizzle +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 tileSwizzle; ///< Combined swizzle +} ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeBaseSwizzle +* +* @brief +* Return a Combined Bank and Pipe swizzle base on surface based on surface type/index +* @return +* ADDR_OK if no error +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ELEM_GETEXPORTNORM_INPUT +* +* @brief +* Input structure for ElemGetExportNorm +* +**************************************************************************************************** +*/ +typedef struct _ELEM_GETEXPORTNORM_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrColorFormat format; ///< Color buffer format; Client should use ColorFormat + AddrSurfaceNumber num; ///< Surface number type; Client should use NumberType + AddrSurfaceSwap swap; ///< Surface swap byte swap; Client should use SurfaceSwap + UINT_32 numSamples; ///< Number of samples +} ELEM_GETEXPORTNORM_INPUT; + +/** +**************************************************************************************************** +* ElemGetExportNorm +* +* @brief +* Helper function to check one format can be EXPORT_NUM, which is a register +* CB_COLOR_INFO.SURFACE_FORMAT. FP16 can be reported as EXPORT_NORM for rv770 in r600 +* family +* @note +* The implementation is only for r600. +* 00 - EXPORT_FULL: PS exports are 4 pixels with 4 components with 32-bits-per-component. (two +* clocks per export) +* 01 - EXPORT_NORM: PS exports are 4 pixels with 4 components with 16-bits-per-component. (one +* clock per export) +* +**************************************************************************************************** +*/ +BOOL_32 ADDR_API ElemGetExportNorm( + ADDR_HANDLE hLib, + const ELEM_GETEXPORTNORM_INPUT* pIn); + +/** +**************************************************************************************************** +* ELEM_FLT32TODEPTHPIXEL_INPUT +* +* @brief +* Input structure for addrFlt32ToDepthPixel +* +**************************************************************************************************** +*/ +typedef struct _ELEM_FLT32TODEPTHPIXEL_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrDepthFormat format; ///< Depth buffer format + ADDR_FLT_32 comps[2]; ///< Component values (Z/stencil) +} ELEM_FLT32TODEPTHPIXEL_INPUT; + +/** +**************************************************************************************************** +* ELEM_FLT32TODEPTHPIXEL_INPUT +* +* @brief +* Output structure for ElemFlt32ToDepthPixel +* +**************************************************************************************************** +*/ +typedef struct _ELEM_FLT32TODEPTHPIXEL_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_8* pPixel; ///< Real depth value. Same data type as depth buffer. + /// Client must provide enough storage for this type. + UINT_32 depthBase; ///< Tile base in bits for depth bits + UINT_32 stencilBase; ///< Tile base in bits for stencil bits + UINT_32 depthBits; ///< Bits for depth + UINT_32 stencilBits; ///< Bits for stencil +} ELEM_FLT32TODEPTHPIXEL_OUTPUT; + +/** +**************************************************************************************************** +* ElemFlt32ToDepthPixel +* +* @brief +* Convert a FLT_32 value to a depth/stencil pixel value +* +* @return +* Return code +* +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel( + ADDR_HANDLE hLib, + const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, + ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ELEM_FLT32TOCOLORPIXEL_INPUT +* +* @brief +* Input structure for addrFlt32ToColorPixel +* +**************************************************************************************************** +*/ +typedef struct _ELEM_FLT32TOCOLORPIXEL_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrColorFormat format; ///< Color buffer format + AddrSurfaceNumber surfNum; ///< Surface number + AddrSurfaceSwap surfSwap; ///< Surface swap + ADDR_FLT_32 comps[4]; ///< Component values (r/g/b/a) +} ELEM_FLT32TOCOLORPIXEL_INPUT; + +/** +**************************************************************************************************** +* ELEM_FLT32TOCOLORPIXEL_INPUT +* +* @brief +* Output structure for ElemFlt32ToColorPixel +* +**************************************************************************************************** +*/ +typedef struct _ELEM_FLT32TOCOLORPIXEL_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_8* pPixel; ///< Real color value. Same data type as color buffer. + /// Client must provide enough storage for this type. +} ELEM_FLT32TOCOLORPIXEL_OUTPUT; + +/** +**************************************************************************************************** +* ElemFlt32ToColorPixel +* +* @brief +* Convert a FLT_32 value to a red/green/blue/alpha pixel value +* +* @return +* Return code +* +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel( + ADDR_HANDLE hLib, + const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, + ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ElemSize +* +* @brief +* Get bits-per-element for specified format +* +* @return +* Bits-per-element of specified format +* +**************************************************************************************************** +*/ +UINT_32 ADDR_API ElemSize( + ADDR_HANDLE hLib, + AddrFormat format); + +/** +**************************************************************************************************** +* ADDR_CONVERT_TILEINFOTOHW_INPUT +* +* @brief +* Input structure for AddrConvertTileInfoToHW +* @note +* When reverse is TRUE, indices are igonred +**************************************************************************************************** +*/ +typedef struct _ADDR_CONVERT_TILEINFOTOHW_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + BOOL_32 reverse; ///< Convert control flag. + /// FALSE: convert from real value to HW value; + /// TRUE: convert from HW value to real value. + + /// r800 and later HWL parameters + ADDR_TILEINFO* pTileInfo; ///< Tile parameters with real value + + INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it + /// while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid + UINT_32 bpp; ///< Bits per pixel +} ADDR_CONVERT_TILEINFOTOHW_INPUT; + +/** +**************************************************************************************************** +* ADDR_CONVERT_TILEINFOTOHW_OUTPUT +* +* @brief +* Output structure for AddrConvertTileInfoToHW +**************************************************************************************************** +*/ +typedef struct _ADDR_CONVERT_TILEINFOTOHW_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + /// r800 and later HWL parameters + ADDR_TILEINFO* pTileInfo; ///< Tile parameters with hardware register value + +} ADDR_CONVERT_TILEINFOTOHW_OUTPUT; + +/** +**************************************************************************************************** +* AddrConvertTileInfoToHW +* +* @brief +* Convert tile info from real value to hardware register value +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW( + ADDR_HANDLE hLib, + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_CONVERT_TILEINDEX_INPUT +* +* @brief +* Input structure for AddrConvertTileIndex +**************************************************************************************************** +*/ +typedef struct _ADDR_CONVERT_TILEINDEX_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + INT_32 tileIndex; ///< Tile index + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + UINT_32 bpp; ///< Bits per pixel + BOOL_32 tileInfoHw; ///< Set to TRUE if client wants HW enum, otherwise actual +} ADDR_CONVERT_TILEINDEX_INPUT; + +/** +**************************************************************************************************** +* ADDR_CONVERT_TILEINDEX_OUTPUT +* +* @brief +* Output structure for AddrConvertTileIndex +**************************************************************************************************** +*/ +typedef struct _ADDR_CONVERT_TILEINDEX_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrTileMode tileMode; ///< Tile mode + AddrTileType tileType; ///< Tile type + ADDR_TILEINFO* pTileInfo; ///< Tile info + +} ADDR_CONVERT_TILEINDEX_OUTPUT; + +/** +**************************************************************************************************** +* AddrConvertTileIndex +* +* @brief +* Convert tile index to tile mode/type/info +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex( + ADDR_HANDLE hLib, + const ADDR_CONVERT_TILEINDEX_INPUT* pIn, + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_GET_MACROMODEINDEX_INPUT +* +* @brief +* Input structure for AddrGetMacroModeIndex +**************************************************************************************************** +*/ +typedef struct _ADDR_GET_MACROMODEINDEX_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + ADDR_SURFACE_FLAGS flags; ///< Surface flag + INT_32 tileIndex; ///< Tile index + UINT_32 bpp; ///< Bits per pixel + UINT_32 numFrags; ///< Number of color fragments +} ADDR_GET_MACROMODEINDEX_INPUT; + +/** +**************************************************************************************************** +* ADDR_GET_MACROMODEINDEX_OUTPUT +* +* @brief +* Output structure for AddrGetMacroModeIndex +**************************************************************************************************** +*/ +typedef struct _ADDR_GET_MACROMODEINDEX_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) +} ADDR_GET_MACROMODEINDEX_OUTPUT; + +/** +**************************************************************************************************** +* AddrGetMacroModeIndex +* +* @brief +* Get macro mode index based on input parameters +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex( + ADDR_HANDLE hLib, + const ADDR_GET_MACROMODEINDEX_INPUT* pIn, + ADDR_GET_MACROMODEINDEX_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_CONVERT_TILEINDEX1_INPUT +* +* @brief +* Input structure for AddrConvertTileIndex1 (without macro mode index) +**************************************************************************************************** +*/ +typedef struct _ADDR_CONVERT_TILEINDEX1_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + INT_32 tileIndex; ///< Tile index + UINT_32 bpp; ///< Bits per pixel + UINT_32 numSamples; ///< Number of samples + BOOL_32 tileInfoHw; ///< Set to TRUE if client wants HW enum, otherwise actual +} ADDR_CONVERT_TILEINDEX1_INPUT; + +/** +**************************************************************************************************** +* AddrConvertTileIndex1 +* +* @brief +* Convert tile index to tile mode/type/info +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1( + ADDR_HANDLE hLib, + const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_GET_TILEINDEX_INPUT +* +* @brief +* Input structure for AddrGetTileIndex +**************************************************************************************************** +*/ +typedef struct _ADDR_GET_TILEINDEX_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrTileMode tileMode; ///< Tile mode + AddrTileType tileType; ///< Tile-type: disp/non-disp/... + ADDR_TILEINFO* pTileInfo; ///< Pointer to tile-info structure, can be NULL for linear/1D +} ADDR_GET_TILEINDEX_INPUT; + +/** +**************************************************************************************************** +* ADDR_GET_TILEINDEX_OUTPUT +* +* @brief +* Output structure for AddrGetTileIndex +**************************************************************************************************** +*/ +typedef struct _ADDR_GET_TILEINDEX_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + INT_32 index; ///< index in table +} ADDR_GET_TILEINDEX_OUTPUT; + +/** +**************************************************************************************************** +* AddrGetTileIndex +* +* @brief +* Get the tiling mode index in table +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex( + ADDR_HANDLE hLib, + const ADDR_GET_TILEINDEX_INPUT* pIn, + ADDR_GET_TILEINDEX_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_PRT_INFO_INPUT +* +* @brief +* Input structure for AddrComputePrtInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_PRT_INFO_INPUT +{ + AddrFormat format; ///< Surface format + UINT_32 baseMipWidth; ///< Base mipmap width + UINT_32 baseMipHeight; ///< Base mipmap height + UINT_32 baseMipDepth; ///< Base mipmap depth + UINT_32 numFrags; ///< Number of fragments, +} ADDR_PRT_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR_PRT_INFO_OUTPUT +* +* @brief +* Input structure for AddrComputePrtInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_PRT_INFO_OUTPUT +{ + UINT_32 prtTileWidth; + UINT_32 prtTileHeight; +} ADDR_PRT_INFO_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputePrtInfo +* +* @brief +* Compute prt surface related information +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo( + ADDR_HANDLE hLib, + const ADDR_PRT_INFO_INPUT* pIn, + ADDR_PRT_INFO_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// DCC key functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* _ADDR_COMPUTE_DCCINFO_INPUT +* +* @brief +* Input structure of AddrComputeDccInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_DCCINFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 bpp; ///< BitPP of color surface + UINT_32 numSamples; ///< Sample number of color surface + UINT_64 colorSurfSize; ///< Size of color surface to which dcc key is bound + AddrTileMode tileMode; ///< Tile mode of color surface + ADDR_TILEINFO tileInfo; ///< Tile info of color surface + UINT_32 tileSwizzle; ///< Tile swizzle + INT_32 tileIndex; ///< Tile index of color surface, + ///< MUST be -1 if you don't want to use it + ///< while the global useTileIndex is set to 1 + INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI) + ///< README: When tileIndex is not -1, this must be valid +} ADDR_COMPUTE_DCCINFO_INPUT; + +/** +**************************************************************************************************** +* ADDR_COMPUTE_DCCINFO_OUTPUT +* +* @brief +* Output structure of AddrComputeDccInfo +**************************************************************************************************** +*/ +typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 dccRamBaseAlign; ///< Base alignment of dcc key + UINT_64 dccRamSize; ///< Size of dcc key + UINT_64 dccFastClearSize; ///< Size of dcc key portion that can be fast cleared + BOOL_32 subLvlCompressible; ///< Whether sub resource is compressiable + BOOL_32 dccRamSizeAligned; ///< Whether the dcc key size is aligned +} ADDR_COMPUTE_DCCINFO_OUTPUT; + +/** +**************************************************************************************************** +* AddrComputeDccInfo +* +* @brief +* Compute DCC key size, base alignment +* info +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR_GET_MAX_ALINGMENTS_OUTPUT +* +* @brief +* Output structure of AddrGetMaxAlignments +**************************************************************************************************** +*/ +typedef struct _ADDR_GET_MAX_ALINGMENTS_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 baseAlign; ///< Maximum base alignment in bytes +} ADDR_GET_MAX_ALINGMENTS_OUTPUT; + +/** +**************************************************************************************************** +* AddrGetMaxAlignments +* +* @brief +* Gets maximnum alignments +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( + ADDR_HANDLE hLib, + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut); + +/** +**************************************************************************************************** +* AddrGetMaxMetaAlignments +* +* @brief +* Gets maximnum alignments for metadata +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( + ADDR_HANDLE hLib, + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut); + +/** +**************************************************************************************************** +* Address library interface version 2 +* available from Gfx9 hardware +**************************************************************************************************** +* Addr2ComputeSurfaceInfo() +* Addr2ComputeSurfaceAddrFromCoord() +* Addr2ComputeSurfaceCoordFromAddr() + +* Addr2ComputeHtileInfo() +* Addr2ComputeHtileAddrFromCoord() +* Addr2ComputeHtileCoordFromAddr() +* +* Addr2ComputeCmaskInfo() +* Addr2ComputeCmaskAddrFromCoord() +* Addr2ComputeCmaskCoordFromAddr() +* +* Addr2ComputeFmaskInfo() +* Addr2ComputeFmaskAddrFromCoord() +* Addr2ComputeFmaskCoordFromAddr() +* +* Addr2ComputeDccInfo() +* +**/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface functions for Gfx9 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* ADDR2_SURFACE_FLAGS +* +* @brief +* Surface flags +**************************************************************************************************** +*/ +typedef union _ADDR2_SURFACE_FLAGS +{ + struct + { + UINT_32 color : 1; ///< This resource is a color buffer, can be used with RTV + UINT_32 depth : 1; ///< Thie resource is a depth buffer, can be used with DSV + UINT_32 stencil : 1; ///< Thie resource is a stencil buffer, can be used with DSV + UINT_32 fmask : 1; ///< This is an fmask surface + UINT_32 overlay : 1; ///< This is an overlay surface + UINT_32 display : 1; ///< This resource is displable, can be used with DRV + UINT_32 prt : 1; ///< This is a partially resident texture + UINT_32 qbStereo : 1; ///< This is a quad buffer stereo surface + UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding + UINT_32 texture : 1; ///< This resource can be used with SRV + UINT_32 unordered : 1; ///< This resource can be used with UAV + UINT_32 rotated : 1; ///< This resource is rotated and displable + UINT_32 needEquation : 1; ///< This resource needs equation to be generated if possible + UINT_32 opt4space : 1; ///< This resource should be optimized for space + UINT_32 minimizeAlign : 1; ///< This resource should use minimum alignment + UINT_32 noMetadata : 1; ///< This resource has no metadata + UINT_32 metaRbUnaligned : 1; ///< This resource has rb unaligned metadata + UINT_32 metaPipeUnaligned : 1; ///< This resource has pipe unaligned metadata + UINT_32 view3dAs2dArray : 1; ///< This resource is a 3D resource viewed as 2D array + UINT_32 reserved : 13; ///< Reserved bits + }; + + UINT_32 value; +} ADDR2_SURFACE_FLAGS; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SURFACE_INFO_INPUT +* +* @brief +* Input structure for Addr2ComputeSurfaceInfo +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SURFACE_INFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR2_SURFACE_FLAGS flags; ///< Surface flags + AddrSwizzleMode swizzleMode; ///< Swizzle Mode for Gfx9 + AddrResourceType resourceType; ///< Surface type + AddrFormat format; ///< Surface format + UINT_32 bpp; ///< bits per pixel + UINT_32 width; ///< Width (of mip0), in pixels + UINT_32 height; ///< Height (of mip0), in pixels + UINT_32 numSlices; ///< Number surface slice/depth (of mip0), + UINT_32 numMipLevels; ///< Total mipmap levels. + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats) + UINT_32 sliceAlign; ///< Required slice size in bytes +} ADDR2_COMPUTE_SURFACE_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR2_MIP_INFO +* +* @brief +* Structure that contains information for mip level +* +**************************************************************************************************** +*/ +typedef struct _ADDR2_MIP_INFO +{ + UINT_32 pitch; ///< Pitch in elements + UINT_32 height; ///< Padded height in elements + UINT_32 depth; ///< Padded depth + UINT_32 pixelPitch; ///< Pitch in pixels + UINT_32 pixelHeight; ///< Padded height in pixels + UINT_32 equationIndex; ///< Equation index in the equation table + UINT_64 offset; ///< Offset in bytes from mip base, should only be used + ///< to setup vam surface descriptor, can't be used + ///< to setup swizzle pattern + UINT_64 macroBlockOffset; ///< macro block offset in bytes from mip base + UINT_32 mipTailOffset; ///< mip tail offset in bytes + UINT_32 mipTailCoordX; ///< mip tail coord x + UINT_32 mipTailCoordY; ///< mip tail coord y + UINT_32 mipTailCoordZ; ///< mip tail coord z +} ADDR2_MIP_INFO; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SURFACE_INFO_OUTPUT +* +* @brief +* Output structure for Addr2ComputeSurfInfo +* @note + Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch + Pixel: Original pixel +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SURFACE_INFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 pitch; ///< Pitch in elements (blocks for compressed formats) + UINT_32 height; ///< Padded height (of mip0) in elements + UINT_32 numSlices; ///< Padded depth for 3d resource + ///< or padded number of slices for 2d array resource + UINT_32 mipChainPitch; ///< Pitch (of total mip chain) in elements + UINT_32 mipChainHeight; ///< Padded height (of total mip chain) in elements + UINT_32 mipChainSlice; ///< Padded depth (of total mip chain) + UINT_64 sliceSize; ///< Slice (total mip chain) size in bytes + UINT_64 surfSize; ///< Surface (total mip chain) size in bytes + UINT_32 baseAlign; ///< Base address alignment + UINT_32 bpp; ///< Bits per elements + /// (e.g. blocks for BCn, 1/3 for 96bit) + UINT_32 pixelMipChainPitch; ///< Mip chain pitch in original pixels + UINT_32 pixelMipChainHeight; ///< Mip chain height in original pixels + UINT_32 pixelPitch; ///< Pitch in original pixels + UINT_32 pixelHeight; ///< Height in original pixels + UINT_32 pixelBits; ///< Original bits per pixel, passed from input + + UINT_32 blockWidth; ///< Width in element inside one block + UINT_32 blockHeight; ///< Height in element inside one block + UINT_32 blockSlices; ///< Slice number inside one block + ///< Prt tile is one block, its width/height/slice + ///< equals to blcok width/height/slice + + BOOL_32 epitchIsHeight; ///< Whether to use height to program epitch register + /// Stereo info + ADDR_QBSTEREOINFO* pStereoInfo; ///< Stereo info, needed if qbStereo flag is TRUE + /// Mip info + ADDR2_MIP_INFO* pMipInfo; ///< Pointer to mip information array + /// if it is not NULL, the array is assumed to + /// contain numMipLevels entries + + UINT_32 equationIndex; ///< Equation index in the equation table of mip0 + BOOL_32 mipChainInTail; ///< If whole mipchain falls into mip tail block + UINT_32 firstMipIdInTail; ///< The id of first mip in tail, if there is no mip + /// in tail, it will be set to number of mip levels +} ADDR2_COMPUTE_SURFACE_INFO_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeSurfaceInfo +* +* @brief +* Compute surface width/height/slices/alignments and suitable tiling mode +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for Addr2ComputeSurfaceAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Slice index + UINT_32 sample; ///< Sample index, use fragment index for EQAA + UINT_32 mipId; ///< the mip ID in mip chain + + AddrSwizzleMode swizzleMode; ///< Swizzle mode for Gfx9 + ADDR2_SURFACE_FLAGS flags; ///< Surface flags + AddrResourceType resourceType; ///< Surface type + UINT_32 bpp; ///< Bits per pixel + UINT_32 unalignedWidth; ///< Surface original width (of mip0) + UINT_32 unalignedHeight; ///< Surface original height (of mip0) + UINT_32 numSlices; ///< Surface original slices (of mip0) + UINT_32 numMipLevels; ///< Total mipmap levels + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + + UINT_32 pipeBankXor; ///< Combined swizzle used to do bank/pipe rotation + UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats) +} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for Addr2ComputeSurfaceAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Byte address + UINT_32 bitPosition; ///< Bit position within surfaceAddr, 0-7. + /// For surface bpp < 8, e.g. FMT_1. + UINT_32 prtBlockIndex; ///< Index of a PRT tile (64K block) +} ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeSurfaceAddrFromCoord +* +* @brief +* Compute surface address from a given coordinate. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT +* +* @brief +* Input structure for Addr2ComputeSurfaceCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Address in bytes + UINT_32 bitPosition; ///< Bit position in addr. 0-7. for surface bpp < 8, + /// e.g. FMT_1; + + AddrSwizzleMode swizzleMode; ///< Swizzle mode for Gfx9 + ADDR2_SURFACE_FLAGS flags; ///< Surface flags + AddrResourceType resourceType; ///< Surface type + UINT_32 bpp; ///< Bits per pixel + UINT_32 unalignedWidth; ///< Surface original width (of mip0) + UINT_32 unalignedHeight; ///< Surface original height (of mip0) + UINT_32 numSlices; ///< Surface original slices (of mip0) + UINT_32 numMipLevels; ///< Total mipmap levels. + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + + UINT_32 pipeBankXor; ///< Combined swizzle used to do bank/pipe rotation + UINT_32 pitchInElement; ///< Pitch in elements (blocks for compressed formats) +} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT +* +* @brief +* Output structure for Addr2ComputeSurfaceCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Index of slices + UINT_32 sample; ///< Index of samples, means fragment index for EQAA + UINT_32 mipId; ///< mipmap level id +} ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeSurfaceCoordFromAddr +* +* @brief +* Compute coordinate from a given surface address +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// HTile functions for Gfx9 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* ADDR2_META_FLAGS +* +* @brief +* Metadata flags +**************************************************************************************************** +*/ +typedef union _ADDR2_META_FLAGS +{ + struct + { + UINT_32 pipeAligned : 1; ///< if Metadata being pipe aligned + UINT_32 rbAligned : 1; ///< if Metadata being RB aligned + UINT_32 linear : 1; ///< if Metadata linear, GFX9 does not suppord this! + UINT_32 reserved : 29; ///< Reserved bits + }; + + UINT_32 value; +} ADDR2_META_FLAGS; + +/** +**************************************************************************************************** +* ADDR2_META_MIP_INFO +* +* @brief +* Structure to store per mip metadata information +**************************************************************************************************** +*/ +typedef struct _ADDR2_META_MIP_INFO +{ + BOOL_32 inMiptail; + union + { + struct + { + UINT_32 startX; + UINT_32 startY; + UINT_32 startZ; + UINT_32 width; + UINT_32 height; + UINT_32 depth; + }; + + struct + { + UINT_32 offset; ///< Metadata offset within one slice, + /// the thickness of a slice is meta block depth. + UINT_32 sliceSize; ///< Metadata size within one slice, + /// the thickness of a slice is meta block depth. + }; + }; +} ADDR2_META_MIP_INFO; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_HTILE_INFO_INPUT +* +* @brief +* Input structure of Addr2ComputeHtileInfo +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_HTILE_INFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR2_META_FLAGS hTileFlags; ///< HTILE flags + ADDR2_SURFACE_FLAGS depthFlags; ///< Depth surface flags + AddrSwizzleMode swizzleMode; ///< Depth surface swizzle mode + UINT_32 unalignedWidth; ///< Depth surface original width (of mip0) + UINT_32 unalignedHeight; ///< Depth surface original height (of mip0) + UINT_32 numSlices; ///< Number of slices of depth surface (of mip0) + UINT_32 numMipLevels; ///< Total mipmap levels of color surface + UINT_32 firstMipIdInTail; /// Id of the first mip in tail, + /// if no mip is in tail, it should be set to + /// number of mip levels +} ADDR2_COMPUTE_HTILE_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_HTILE_INFO_OUTPUT +* +* @brief +* Output structure of Addr2ComputeHtileInfo +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_HTILE_INFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 pitch; ///< Pitch in pixels of depth buffer represented in this + /// HTile buffer. This might be larger than original depth + /// buffer pitch when called with an unaligned pitch. + UINT_32 height; ///< Height in pixels, as above + UINT_32 baseAlign; ///< Base alignment + UINT_32 sliceSize; ///< Slice size, in bytes. + UINT_32 htileBytes; ///< Size of HTILE buffer, in bytes + UINT_32 metaBlkWidth; ///< Meta block width + UINT_32 metaBlkHeight; ///< Meta block height + UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice + + ADDR2_META_MIP_INFO* pMipInfo; ///< HTILE mip information +} ADDR2_COMPUTE_HTILE_INFO_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeHtileInfo +* +* @brief +* Compute Htile pitch, height, base alignment and size in bytes +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, + ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for Addr2ComputeHtileAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Index of slices + UINT_32 mipId; ///< mipmap level id + + ADDR2_META_FLAGS hTileFlags; ///< HTILE flags + ADDR2_SURFACE_FLAGS depthflags; ///< Depth surface flags + AddrSwizzleMode swizzleMode; ///< Depth surface swizzle mode + UINT_32 bpp; ///< Depth surface bits per pixel + UINT_32 unalignedWidth; ///< Depth surface original width (of mip0) + UINT_32 unalignedHeight; ///< Depth surface original height (of mip0) + UINT_32 numSlices; ///< Depth surface original depth (of mip0) + UINT_32 numMipLevels; ///< Depth surface total mipmap levels + UINT_32 numSamples; ///< Depth surface number of samples + UINT_32 pipeXor; ///< Pipe xor setting +} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for Addr2ComputeHtileAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Address in bytes +} ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeHtileAddrFromCoord +* +* @brief +* Compute Htile address according to coordinates (of depth buffer) +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT +* +* @brief +* Input structure for Addr2ComputeHtileCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Address + + ADDR2_META_FLAGS hTileFlags; ///< HTILE flags + ADDR2_SURFACE_FLAGS depthFlags; ///< Depth surface flags + AddrSwizzleMode swizzleMode; ///< Depth surface swizzle mode + UINT_32 bpp; ///< Depth surface bits per pixel + UINT_32 unalignedWidth; ///< Depth surface original width (of mip0) + UINT_32 unalignedHeight; ///< Depth surface original height (of mip0) + UINT_32 numSlices; ///< Depth surface original depth (of mip0) + UINT_32 numMipLevels; ///< Depth surface total mipmap levels + UINT_32 numSamples; ///< Depth surface number of samples + UINT_32 pipeXor; ///< Pipe xor setting +} ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT +* +* @brief +* Output structure for Addr2ComputeHtileCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Index of slices + UINT_32 mipId; ///< mipmap level id +} ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeHtileCoordFromAddr +* +* @brief +* Compute coordinates within depth buffer (1st pixel of a micro tile) according to +* Htile address +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// C-mask functions for Gfx9 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_CMASK_INFO_INPUT +* +* @brief +* Input structure of Addr2ComputeCmaskInfo +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_CMASKINFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR2_META_FLAGS cMaskFlags; ///< CMASK flags + ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags + AddrResourceType resourceType; ///< Color surface type + AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode + UINT_32 unalignedWidth; ///< Color surface original width + UINT_32 unalignedHeight; ///< Color surface original height + UINT_32 numSlices; ///< Number of slices of color buffer +} ADDR2_COMPUTE_CMASK_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_CMASK_INFO_OUTPUT +* +* @brief +* Output structure of Addr2ComputeCmaskInfo +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_CMASK_INFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 pitch; ///< Pitch in pixels of color buffer which + /// this Cmask matches. The size might be larger than + /// original color buffer pitch when called with + /// an unaligned pitch. + UINT_32 height; ///< Height in pixels, as above + UINT_32 baseAlign; ///< Base alignment + UINT_32 sliceSize; ///< Slice size, in bytes. + UINT_32 cmaskBytes; ///< Size in bytes of CMask buffer + UINT_32 metaBlkWidth; ///< Meta block width + UINT_32 metaBlkHeight; ///< Meta block height + + UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice +} ADDR2_COMPUTE_CMASK_INFO_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeCmaskInfo +* +* @brief +* Compute Cmask pitch, height, base alignment and size in bytes from color buffer +* info +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, + ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for Addr2ComputeCmaskAddrFromCoord +* +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Index of slices + + ADDR2_META_FLAGS cMaskFlags; ///< CMASK flags + ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags + AddrResourceType resourceType; ///< Color surface type + AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode + + UINT_32 unalignedWidth; ///< Color surface original width (of mip0) + UINT_32 unalignedHeight; ///< Color surface original height (of mip0) + UINT_32 numSlices; ///< Color surface original slices (of mip0) + + UINT_32 numSamples; ///< Color surfae sample number + UINT_32 numFrags; ///< Color surface fragment number + + UINT_32 pipeXor; ///< pipe Xor setting +} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for Addr2ComputeCmaskAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< CMASK address in bytes + UINT_32 bitPosition; ///< Bit position within addr, 0 or 4 +} ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeCmaskAddrFromCoord +* +* @brief +* Compute Cmask address according to coordinates (of MSAA color buffer) +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT +* +* @brief +* Input structure for Addr2ComputeCmaskCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< CMASK address in bytes + UINT_32 bitPosition; ///< Bit position within addr, 0 or 4 + + ADDR2_META_FLAGS cMaskFlags; ///< CMASK flags + ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags + AddrResourceType resourceType; ///< Color surface type + AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode + + UINT_32 unalignedWidth; ///< Color surface original width (of mip0) + UINT_32 unalignedHeight; ///< Color surface original height (of mip0) + UINT_32 numSlices; ///< Color surface original slices (of mip0) + UINT_32 numMipLevels; ///< Color surface total mipmap levels. +} ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT +* +* @brief +* Output structure for Addr2ComputeCmaskCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Index of slices + UINT_32 mipId; ///< mipmap level id +} ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeCmaskCoordFromAddr +* +* @brief +* Compute coordinates within color buffer (1st pixel of a micro tile) according to +* Cmask address +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// F-mask functions for Gfx9 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* ADDR2_FMASK_FLAGS +* +* @brief +* FMASK flags +**************************************************************************************************** +*/ +typedef union _ADDR2_FMASK_FLAGS +{ + struct + { + UINT_32 resolved : 1; ///< TRUE if this is a resolved fmask, used by H/W clients + /// by H/W clients. S/W should always set it to FALSE. + UINT_32 reserved : 31; ///< Reserved for future use. + }; + + UINT_32 value; +} ADDR2_FMASK_FLAGS; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_FMASK_INFO_INPUT +* +* @brief +* Input structure for Addr2ComputeFmaskInfo +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_FMASK_INFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode + UINT_32 unalignedWidth; ///< Color surface original width + UINT_32 unalignedHeight; ///< Color surface original height + UINT_32 numSlices; ///< Number of slices/depth + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + ADDR2_FMASK_FLAGS fMaskFlags; ///< FMASK flags +} ADDR2_COMPUTE_FMASK_INFO_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_FMASK_INFO_OUTPUT +* +* @brief +* Output structure for Addr2ComputeFmaskInfo +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_FMASK_INFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 pitch; ///< Pitch of fmask in pixels + UINT_32 height; ///< Height of fmask in pixels + UINT_32 baseAlign; ///< Base alignment + UINT_32 numSlices; ///< Slices of fmask + UINT_32 fmaskBytes; ///< Size of fmask in bytes + UINT_32 bpp; ///< Bits per pixel of FMASK is: number of bit planes + UINT_32 numSamples; ///< Number of samples + UINT_32 sliceSize; ///< Size of slice in bytes +} ADDR2_COMPUTE_FMASK_INFO_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeFmaskInfo +* +* @brief +* Compute Fmask pitch/height/slices/alignments and size in bytes +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for Addr2ComputeFmaskAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Slice index + UINT_32 sample; ///< Sample index (fragment index for EQAA) + UINT_32 plane; ///< Plane number + + UINT_32 unalignedWidth; ///< Color surface original width + UINT_32 unalignedHeight; ///< Color surface original height + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + UINT_32 tileSwizzle; ///< Combined swizzle used to do bank/pipe rotation + + ADDR2_FMASK_FLAGS fMaskFlags; ///< FMASK flags +} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for Addr2ComputeFmaskAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Fmask address + UINT_32 bitPosition; ///< Bit position within fmaskAddr, 0-7. +} ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeFmaskAddrFromCoord +* +* @brief +* Compute Fmask address according to coordinates (x,y,slice,sample,plane) +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT +* +* @brief +* Input structure for Addr2ComputeFmaskCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< Address + UINT_32 bitPosition; ///< Bit position within addr, 0-7. + AddrSwizzleMode swizzleMode; ///< FMask surface swizzle mode + + UINT_32 unalignedWidth; ///< Color surface original width + UINT_32 unalignedHeight; ///< Color surface original height + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments + + UINT_32 tileSwizzle; ///< Combined swizzle used to do bank/pipe rotation + + ADDR2_FMASK_FLAGS fMaskFlags; ///< FMASK flags +} ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT +* +* @brief +* Output structure for Addr2ComputeFmaskCoordFromAddr +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Slice index + UINT_32 sample; ///< Sample index (fragment index for EQAA) + UINT_32 plane; ///< Plane number +} ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeFmaskCoordFromAddr +* +* @brief +* Compute FMASK coordinate from an given address +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// DCC key functions for Gfx9 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* _ADDR2_COMPUTE_DCCINFO_INPUT +* +* @brief +* Input structure of Addr2ComputeDccInfo +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_DCCINFO_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR2_META_FLAGS dccKeyFlags; ///< DCC key flags + ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags + AddrResourceType resourceType; ///< Color surface type + AddrSwizzleMode swizzleMode; ///< Color surface swizzle mode + UINT_32 bpp; ///< bits per pixel + UINT_32 unalignedWidth; ///< Color surface original width (of mip0) + UINT_32 unalignedHeight; ///< Color surface original height (of mip0) + UINT_32 numSlices; ///< Number of slices, of color surface (of mip0) + UINT_32 numFrags; ///< Fragment number of color surface + UINT_32 numMipLevels; ///< Total mipmap levels of color surface + UINT_32 dataSurfaceSize; ///< The padded size of all slices and mip levels + ///< useful in meta linear case + UINT_32 firstMipIdInTail; ///< The id of first mip in tail, if no mip is in tail, + /// it should be number of mip levels +} ADDR2_COMPUTE_DCCINFO_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_DCCINFO_OUTPUT +* +* @brief +* Output structure of Addr2ComputeDccInfo +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_DCCINFO_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 dccRamBaseAlign; ///< Base alignment of dcc key + UINT_32 dccRamSize; ///< Size of dcc key + + UINT_32 pitch; ///< DCC surface mip chain pitch + UINT_32 height; ///< DCC surface mip chain height + UINT_32 depth; ///< DCC surface mip chain depth + + UINT_32 compressBlkWidth; ///< DCC compress block width + UINT_32 compressBlkHeight; ///< DCC compress block height + UINT_32 compressBlkDepth; ///< DCC compress block depth + + UINT_32 metaBlkWidth; ///< DCC meta block width + UINT_32 metaBlkHeight; ///< DCC meta block height + UINT_32 metaBlkDepth; ///< DCC meta block depth + + UINT_32 metaBlkNumPerSlice; ///< Number of metablock within one slice + + union + { + UINT_32 fastClearSizePerSlice; ///< Size of DCC within a slice should be fast cleared + UINT_32 dccRamSliceSize; ///< DCC ram size per slice. For mipmap, it's + /// the slize size of a mip chain, the thickness of a + /// a slice is meta block depth + }; + + ADDR2_META_MIP_INFO* pMipInfo; ///< DCC mip information +} ADDR2_COMPUTE_DCCINFO_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeDccInfo +* +* @brief +* Compute DCC key size, base alignment +* info +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, + ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT +* +* @brief +* Input structure for Addr2ComputeDccAddrFromCoord +* +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_32 x; ///< X coordinate + UINT_32 y; ///< Y coordinate + UINT_32 slice; ///< Index of slices + UINT_32 sample; ///< Index of samples, means fragment index for EQAA + UINT_32 mipId; ///< mipmap level id + + ADDR2_META_FLAGS dccKeyFlags; ///< DCC flags + ADDR2_SURFACE_FLAGS colorFlags; ///< Color surface flags + AddrResourceType resourceType; ///< Color surface type + AddrSwizzleMode swizzleMode; ///< Color surface swizzle mode + UINT_32 bpp; ///< Color surface bits per pixel + UINT_32 unalignedWidth; ///< Color surface original width (of mip0) + UINT_32 unalignedHeight; ///< Color surface original height (of mip0) + UINT_32 numSlices; ///< Color surface original slices (of mip0) + UINT_32 numMipLevels; ///< Color surface mipmap levels + UINT_32 numFrags; ///< Color surface fragment number + + UINT_32 pipeXor; ///< pipe Xor setting +} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT +* +* @brief +* Output structure for Addr2ComputeDccAddrFromCoord +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + UINT_64 addr; ///< DCC address in bytes +} ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeDccAddrFromCoord +* +* @brief +* Compute DCC address according to coordinates (of MSAA color buffer) +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut); + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Misc functions for Gfx9 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_PIPEBANKXOR_INPUT +* +* @brief +* Input structure of Addr2ComputePipebankXor +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 surfIndex; ///< Input surface index + ADDR2_SURFACE_FLAGS flags; ///< Surface flag + AddrSwizzleMode swizzleMode; ///< Surface swizzle mode + AddrResourceType resourceType; ///< Surface resource type + AddrFormat format; ///< Surface format + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA +} ADDR2_COMPUTE_PIPEBANKXOR_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT +* +* @brief +* Output structure of Addr2ComputePipebankXor +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 pipeBankXor; ///< Pipe bank xor +} ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputePipeBankXor +* +* @brief +* Calculate a valid bank pipe xor value for client to use. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT +* +* @brief +* Input structure of Addr2ComputeSlicePipeBankXor +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + AddrSwizzleMode swizzleMode; ///< Surface swizzle mode + AddrResourceType resourceType; ///< Surface resource type + UINT_32 basePipeBankXor; ///< Base pipe bank xor + UINT_32 slice; ///< Slice id + UINT_32 numSamples; ///< Number of samples +} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT +* +* @brief +* Output structure of Addr2ComputeSlicePipeBankXor +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_32 pipeBankXor; ///< Pipe bank xor +} ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeSlicePipeBankXor +* +* @brief +* Calculate slice pipe bank xor value based on base pipe bank xor and slice id. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT +* +* @brief +* Input structure of Addr2ComputeSubResourceOffsetForSwizzlePattern +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + AddrSwizzleMode swizzleMode; ///< Surface swizzle mode + AddrResourceType resourceType; ///< Surface resource type + UINT_32 pipeBankXor; ///< Per resource xor + UINT_32 slice; ///< Slice id + UINT_64 sliceSize; ///< Slice size of a mip chain + UINT_64 macroBlockOffset; ///< Macro block offset, returned in ADDR2_MIP_INFO + UINT_32 mipTailOffset; ///< Mip tail offset, returned in ADDR2_MIP_INFO +} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT; + +/** +**************************************************************************************************** +* ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT +* +* @brief +* Output structure of Addr2ComputeSubResourceOffsetForSwizzlePattern +**************************************************************************************************** +*/ +typedef struct _ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + UINT_64 offset; ///< offset +} ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT; + +/** +**************************************************************************************************** +* Addr2ComputeSubResourceOffsetForSwizzlePattern +* +* @brief +* Calculate sub resource offset to support swizzle pattern. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern( + ADDR_HANDLE hLib, + const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut); + +/** +**************************************************************************************************** +* ADDR2_BLOCK_SET +* +* @brief +* Bit field that defines block type +**************************************************************************************************** +*/ +typedef union _ADDR2_BLOCK_SET +{ + struct + { + UINT_32 micro : 1; // 256B block for 2D resource + UINT_32 macro4KB : 1; // 4KB for 2D/3D resource + UINT_32 macro64KB : 1; // 64KB for 2D/3D resource + UINT_32 var : 1; // VAR block + UINT_32 linear : 1; // Linear block + UINT_32 reserved : 27; + }; + + UINT_32 value; +} ADDR2_BLOCK_SET; + +/** +**************************************************************************************************** +* ADDR2_SWTYPE_SET +* +* @brief +* Bit field that defines swizzle type +**************************************************************************************************** +*/ +typedef union _ADDR2_SWTYPE_SET +{ + struct + { + UINT_32 sw_Z : 1; // SW_*_Z_* + UINT_32 sw_S : 1; // SW_*_S_* + UINT_32 sw_D : 1; // SW_*_D_* + UINT_32 sw_R : 1; // SW_*_R_* + UINT_32 reserved : 28; + }; + + UINT_32 value; +} ADDR2_SWTYPE_SET; + +/** +**************************************************************************************************** +* ADDR2_SWMODE_SET +* +* @brief +* Bit field that defines swizzle type +**************************************************************************************************** +*/ +typedef union _ADDR2_SWMODE_SET +{ + struct + { + UINT_32 swLinear : 1; + UINT_32 sw256B_S : 1; + UINT_32 sw256B_D : 1; + UINT_32 sw256B_R : 1; + UINT_32 sw4KB_Z : 1; + UINT_32 sw4KB_S : 1; + UINT_32 sw4KB_D : 1; + UINT_32 sw4KB_R : 1; + UINT_32 sw64KB_Z : 1; + UINT_32 sw64KB_S : 1; + UINT_32 sw64KB_D : 1; + UINT_32 sw64KB_R : 1; + UINT_32 swVar_Z : 1; + UINT_32 swVar_S : 1; + UINT_32 swVar_D : 1; + UINT_32 swVar_R : 1; + UINT_32 sw64KB_Z_T : 1; + UINT_32 sw64KB_S_T : 1; + UINT_32 sw64KB_D_T : 1; + UINT_32 sw64KB_R_T : 1; + UINT_32 sw4KB_Z_X : 1; + UINT_32 sw4KB_S_X : 1; + UINT_32 sw4KB_D_X : 1; + UINT_32 sw4KB_R_X : 1; + UINT_32 sw64KB_Z_X : 1; + UINT_32 sw64KB_S_X : 1; + UINT_32 sw64KB_D_X : 1; + UINT_32 sw64KB_R_X : 1; + UINT_32 swVar_Z_X : 1; + UINT_32 swVar_S_X : 1; + UINT_32 swVar_D_X : 1; + UINT_32 swVar_R_X : 1; + }; + + UINT_32 value; +} ADDR2_SWMODE_SET; + +/** +**************************************************************************************************** +* ADDR2_GET_PREFERRED_SURF_SETTING_INPUT +* +* @brief +* Input structure of Addr2GetPreferredSurfaceSetting +**************************************************************************************************** +*/ +typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_INPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + ADDR2_SURFACE_FLAGS flags; ///< Surface flags + AddrResourceType resourceType; ///< Surface type + AddrFormat format; ///< Surface format + AddrResrouceLocation resourceLoction; ///< Surface heap choice + ADDR2_BLOCK_SET forbiddenBlock; ///< Client can use it to disable some block setting + ///< such as linear for DXTn, tiled for YUV + ADDR2_SWTYPE_SET preferredSwSet; ///< Client can use it to specify sw type(s) wanted + BOOL_32 noXor; ///< Do not use xor mode for this resource + UINT_32 bpp; ///< bits per pixel + UINT_32 width; ///< Width (of mip0), in pixels + UINT_32 height; ///< Height (of mip0), in pixels + UINT_32 numSlices; ///< Number surface slice/depth (of mip0), + UINT_32 numMipLevels; ///< Total mipmap levels. + UINT_32 numSamples; ///< Number of samples + UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as + /// number of samples for normal AA; Set it to the + /// number of fragments for EQAA + UINT_32 maxAlign; ///< maximum base/size alignment requested by client + UINT_32 minSizeAlign; ///< memory allocated for surface in client driver will + /// be padded to multiple of this value (in bytes) +} ADDR2_GET_PREFERRED_SURF_SETTING_INPUT; + +/** +**************************************************************************************************** +* ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT +* +* @brief +* Output structure of Addr2GetPreferredSurfaceSetting +**************************************************************************************************** +*/ +typedef struct _ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT +{ + UINT_32 size; ///< Size of this structure in bytes + + AddrSwizzleMode swizzleMode; ///< Suggested swizzle mode to be used + AddrResourceType resourceType; ///< Suggested resource type to program HW + ADDR2_BLOCK_SET validBlockSet; ///< Valid block type bit conbination + BOOL_32 canXor; ///< If client can use xor on a valid macro block + /// type + ADDR2_SWTYPE_SET validSwTypeSet; ///< Valid swizzle type bit combination + ADDR2_SWTYPE_SET clientPreferredSwSet; ///< Client-preferred swizzle type bit combination + ADDR2_SWMODE_SET validSwModeSet; ///< Valid swizzle mode bit combination +} ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT; + +/** +**************************************************************************************************** +* Addr2GetPreferredSurfaceSetting +* +* @brief +* Suggest a preferred setting for client driver to program HW register +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting( + ADDR_HANDLE hLib, + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut); + +/** +**************************************************************************************************** +* Addr2IsValidDisplaySwizzleMode +* +* @brief +* Return whether the swizzle mode is supported by DCE / DCN. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( + ADDR_HANDLE hLib, + AddrSwizzleMode swizzleMode, + UINT_32 bpp, + bool *result); + +#if defined(__cplusplus) +} +#endif + +#endif // __ADDR_INTERFACE_H__ diff -Nru mesa-18.3.3/src/amd/addrlib/inc/addrtypes.h mesa-19.0.1/src/amd/addrlib/inc/addrtypes.h --- mesa-18.3.3/src/amd/addrlib/inc/addrtypes.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/inc/addrtypes.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,746 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrtypes.h +* @brief Contains the helper function and constants +**************************************************************************************************** +*/ +#ifndef __ADDR_TYPES_H__ +#define __ADDR_TYPES_H__ + +#if defined(__APPLE__) && !defined(HAVE_TSERVER) +// External definitions header maintained by Apple driver team, but not for diag team under Mac. +// Helps address compilation issues & reduces code covered by NDA +#include "addrExtDef.h" + +#else + +// Windows and/or Linux +#if !defined(VOID) +typedef void VOID; +#endif + +#if !defined(FLOAT) +typedef float FLOAT; +#endif + +#if !defined(CHAR) +typedef char CHAR; +#endif + +#if !defined(INT) +typedef int INT; +#endif + +#include // va_list...etc need this header + +#endif // defined (__APPLE__) && !defined(HAVE_TSERVER) + +/** +**************************************************************************************************** +* Calling conventions +**************************************************************************************************** +*/ +#ifndef ADDR_CDECL + #if defined(__GNUC__) + #define ADDR_CDECL __attribute__((cdecl)) + #else + #define ADDR_CDECL __cdecl + #endif +#endif + +#ifndef ADDR_STDCALL + #if defined(__GNUC__) + #if defined(__amd64__) || defined(__x86_64__) + #define ADDR_STDCALL + #else + #define ADDR_STDCALL __attribute__((stdcall)) + #endif + #else + #define ADDR_STDCALL __stdcall + #endif +#endif + +#ifndef ADDR_FASTCALL + #if defined(BRAHMA_ARM) + #define ADDR_FASTCALL + #elif defined(__GNUC__) + #if defined(__i386__) + #define ADDR_FASTCALL __attribute__((regparm(0))) + #else + #define ADDR_FASTCALL + #endif + #else + #define ADDR_FASTCALL __fastcall + #endif +#endif + +#ifndef GC_CDECL + #define GC_CDECL ADDR_CDECL +#endif + +#ifndef GC_STDCALL + #define GC_STDCALL ADDR_STDCALL +#endif + +#ifndef GC_FASTCALL + #define GC_FASTCALL ADDR_FASTCALL +#endif + +#if defined(__GNUC__) + #define ADDR_INLINE static inline // inline needs to be static to link +#else + // win32, win64, other platforms + #define ADDR_INLINE __inline +#endif // #if defined(__GNUC__) + +#define ADDR_API ADDR_FASTCALL //default call convention is fast call + +/** +**************************************************************************************************** +* Global defines used by other modules +**************************************************************************************************** +*/ +#if !defined(TILEINDEX_INVALID) +#define TILEINDEX_INVALID -1 +#endif + +#if !defined(TILEINDEX_LINEAR_GENERAL) +#define TILEINDEX_LINEAR_GENERAL -2 +#endif + +#if !defined(TILEINDEX_LINEAR_ALIGNED) +#define TILEINDEX_LINEAR_ALIGNED 8 +#endif + +/** +**************************************************************************************************** +* Return codes +**************************************************************************************************** +*/ +typedef enum _ADDR_E_RETURNCODE +{ + // General Return + ADDR_OK = 0, + ADDR_ERROR = 1, + + // Specific Errors + ADDR_OUTOFMEMORY, + ADDR_INVALIDPARAMS, + ADDR_NOTSUPPORTED, + ADDR_NOTIMPLEMENTED, + ADDR_PARAMSIZEMISMATCH, + ADDR_INVALIDGBREGVALUES, + +} ADDR_E_RETURNCODE; + +/** +**************************************************************************************************** +* @brief +* Neutral enums that define tile modes for all H/W +* @note +* R600/R800 tiling mode can be cast to hw enums directly but never cast into HW enum from +* ADDR_TM_2D_TILED_XTHICK +* +**************************************************************************************************** +*/ +typedef enum _AddrTileMode +{ + ADDR_TM_LINEAR_GENERAL = 0, ///< Least restrictions, pitch: multiple of 8 if not buffer + ADDR_TM_LINEAR_ALIGNED = 1, ///< Requests pitch or slice to be multiple of 64 pixels + ADDR_TM_1D_TILED_THIN1 = 2, ///< Linear array of 8x8 tiles + ADDR_TM_1D_TILED_THICK = 3, ///< Linear array of 8x8x4 tiles + ADDR_TM_2D_TILED_THIN1 = 4, ///< A set of macro tiles consist of 8x8 tiles + ADDR_TM_2D_TILED_THIN2 = 5, ///< 600 HWL only, macro tile ratio is 1:4 + ADDR_TM_2D_TILED_THIN4 = 6, ///< 600 HWL only, macro tile ratio is 1:16 + ADDR_TM_2D_TILED_THICK = 7, ///< A set of macro tiles consist of 8x8x4 tiles + ADDR_TM_2B_TILED_THIN1 = 8, ///< 600 HWL only, with bank swap + ADDR_TM_2B_TILED_THIN2 = 9, ///< 600 HWL only, with bank swap and ratio is 1:4 + ADDR_TM_2B_TILED_THIN4 = 10, ///< 600 HWL only, with bank swap and ratio is 1:16 + ADDR_TM_2B_TILED_THICK = 11, ///< 600 HWL only, with bank swap, consists of 8x8x4 tiles + ADDR_TM_3D_TILED_THIN1 = 12, ///< Macro tiling w/ pipe rotation between slices + ADDR_TM_3D_TILED_THICK = 13, ///< Macro tiling w/ pipe rotation bwtween slices, thick + ADDR_TM_3B_TILED_THIN1 = 14, ///< 600 HWL only, with bank swap + ADDR_TM_3B_TILED_THICK = 15, ///< 600 HWL only, with bank swap, thick + ADDR_TM_2D_TILED_XTHICK = 16, ///< Tile is 8x8x8, valid from NI + ADDR_TM_3D_TILED_XTHICK = 17, ///< Tile is 8x8x8, valid from NI + ADDR_TM_POWER_SAVE = 18, ///< Power save mode, only used by KMD on NI + ADDR_TM_PRT_TILED_THIN1 = 19, ///< No bank/pipe rotation or hashing beyond macrotile size + ADDR_TM_PRT_2D_TILED_THIN1 = 20, ///< Same as 2D_TILED_THIN1, PRT only + ADDR_TM_PRT_3D_TILED_THIN1 = 21, ///< Same as 3D_TILED_THIN1, PRT only + ADDR_TM_PRT_TILED_THICK = 22, ///< No bank/pipe rotation or hashing beyond macrotile size + ADDR_TM_PRT_2D_TILED_THICK = 23, ///< Same as 2D_TILED_THICK, PRT only + ADDR_TM_PRT_3D_TILED_THICK = 24, ///< Same as 3D_TILED_THICK, PRT only + ADDR_TM_UNKNOWN = 25, ///< Unkown tile mode, should be decided by address lib + ADDR_TM_COUNT = 26, ///< Must be the value of the last tile mode +} AddrTileMode; + +/** +**************************************************************************************************** +* @brief +* Neutral enums that define swizzle modes for Gfx9 ASIC +* @note +* +* ADDR_SW_LINEAR linear aligned addressing mode, for 1D/2D/3D resouce +* ADDR_SW_256B_* addressing block aligned size is 256B, for 2D/3D resouce +* ADDR_SW_4KB_* addressing block aligned size is 4KB, for 2D/3D resouce +* ADDR_SW_64KB_* addressing block aligned size is 64KB, for 2D/3D resouce +* ADDR_SW_VAR_* addressing block aligned size is ASIC specific, for 2D/3D resouce +* +* ADDR_SW_*_Z For 2D resouce, represents Z-order swizzle mode for depth/stencil/FMask + For 3D resouce, represents a swizzle mode similar to legacy thick tile mode +* ADDR_SW_*_S represents standard swizzle mode defined by MS +* ADDR_SW_*_D For 2D resouce, represents a swizzle mode for displayable resource +* For 3D resouce, represents a swizzle mode which places each slice in order & pixel + within slice is placed as 2D ADDR_SW_*_S. Don't use this combination if possible! +* ADDR_SW_*_R For 2D resouce only, represents a swizzle mode for rotated displayable resource +* +**************************************************************************************************** +*/ +typedef enum _AddrSwizzleMode +{ + ADDR_SW_LINEAR = 0, + ADDR_SW_256B_S = 1, + ADDR_SW_256B_D = 2, + ADDR_SW_256B_R = 3, + ADDR_SW_4KB_Z = 4, + ADDR_SW_4KB_S = 5, + ADDR_SW_4KB_D = 6, + ADDR_SW_4KB_R = 7, + ADDR_SW_64KB_Z = 8, + ADDR_SW_64KB_S = 9, + ADDR_SW_64KB_D = 10, + ADDR_SW_64KB_R = 11, + ADDR_SW_VAR_Z = 12, + ADDR_SW_VAR_S = 13, + ADDR_SW_VAR_D = 14, + ADDR_SW_VAR_R = 15, + ADDR_SW_64KB_Z_T = 16, + ADDR_SW_64KB_S_T = 17, + ADDR_SW_64KB_D_T = 18, + ADDR_SW_64KB_R_T = 19, + ADDR_SW_4KB_Z_X = 20, + ADDR_SW_4KB_S_X = 21, + ADDR_SW_4KB_D_X = 22, + ADDR_SW_4KB_R_X = 23, + ADDR_SW_64KB_Z_X = 24, + ADDR_SW_64KB_S_X = 25, + ADDR_SW_64KB_D_X = 26, + ADDR_SW_64KB_R_X = 27, + ADDR_SW_VAR_Z_X = 28, + ADDR_SW_VAR_S_X = 29, + ADDR_SW_VAR_D_X = 30, + ADDR_SW_VAR_R_X = 31, + ADDR_SW_LINEAR_GENERAL = 32, + ADDR_SW_MAX_TYPE = 33, + + // Used for represent block with identical size + ADDR_SW_256B = ADDR_SW_256B_S, + ADDR_SW_4KB = ADDR_SW_4KB_S_X, + ADDR_SW_64KB = ADDR_SW_64KB_S_X, + ADDR_SW_VAR = ADDR_SW_VAR_S_X, +} AddrSwizzleMode; + +/** +**************************************************************************************************** +* @brief +* Neutral enums that define image type +* @note +* this is new for address library interface version 2 +* +**************************************************************************************************** +*/ +typedef enum _AddrResourceType +{ + ADDR_RSRC_TEX_1D = 0, + ADDR_RSRC_TEX_2D = 1, + ADDR_RSRC_TEX_3D = 2, + ADDR_RSRC_MAX_TYPE = 3, +} AddrResourceType; + +/** +**************************************************************************************************** +* @brief +* Neutral enums that define resource heap location +* @note +* this is new for address library interface version 2 +* +**************************************************************************************************** +*/ +typedef enum _AddrResrouceLocation +{ + ADDR_RSRC_LOC_UNDEF = 0, // Resource heap is undefined/unknown + ADDR_RSRC_LOC_LOCAL = 1, // CPU visable and CPU invisable local heap + ADDR_RSRC_LOC_USWC = 2, // CPU write-combined non-cached nonlocal heap + ADDR_RSRC_LOC_CACHED = 3, // CPU cached nonlocal heap + ADDR_RSRC_LOC_INVIS = 4, // CPU invisable local heap only + ADDR_RSRC_LOC_MAX_TYPE = 5, +} AddrResrouceLocation; + +/** +**************************************************************************************************** +* @brief +* Neutral enums that define resource basic swizzle mode +* @note +* this is new for address library interface version 2 +* +**************************************************************************************************** +*/ +typedef enum _AddrSwType +{ + ADDR_SW_Z = 0, // Resource basic swizzle mode is ZOrder + ADDR_SW_S = 1, // Resource basic swizzle mode is Standard + ADDR_SW_D = 2, // Resource basic swizzle mode is Display + ADDR_SW_R = 3, // Resource basic swizzle mode is Rotated +} AddrSwType; + +/** +**************************************************************************************************** +* @brief +* Neutral enums that define mipmap major mode +* @note +* this is new for address library interface version 2 +* +**************************************************************************************************** +*/ +typedef enum _AddrMajorMode +{ + ADDR_MAJOR_X = 0, + ADDR_MAJOR_Y = 1, + ADDR_MAJOR_Z = 2, + ADDR_MAJOR_MAX_TYPE = 3, +} AddrMajorMode; + +/** +**************************************************************************************************** +* AddrFormat +* +* @brief +* Neutral enum for SurfaceFormat +* +**************************************************************************************************** +*/ +typedef enum _AddrFormat { + ADDR_FMT_INVALID = 0x00000000, + ADDR_FMT_8 = 0x00000001, + ADDR_FMT_4_4 = 0x00000002, + ADDR_FMT_3_3_2 = 0x00000003, + ADDR_FMT_RESERVED_4 = 0x00000004, + ADDR_FMT_16 = 0x00000005, + ADDR_FMT_16_FLOAT = ADDR_FMT_16, + ADDR_FMT_8_8 = 0x00000007, + ADDR_FMT_5_6_5 = 0x00000008, + ADDR_FMT_6_5_5 = 0x00000009, + ADDR_FMT_1_5_5_5 = 0x0000000a, + ADDR_FMT_4_4_4_4 = 0x0000000b, + ADDR_FMT_5_5_5_1 = 0x0000000c, + ADDR_FMT_32 = 0x0000000d, + ADDR_FMT_32_FLOAT = ADDR_FMT_32, + ADDR_FMT_16_16 = 0x0000000f, + ADDR_FMT_16_16_FLOAT = ADDR_FMT_16_16, + ADDR_FMT_8_24 = 0x00000011, + ADDR_FMT_8_24_FLOAT = ADDR_FMT_8_24, + ADDR_FMT_24_8 = 0x00000013, + ADDR_FMT_24_8_FLOAT = ADDR_FMT_24_8, + ADDR_FMT_10_11_11 = 0x00000015, + ADDR_FMT_10_11_11_FLOAT = ADDR_FMT_10_11_11, + ADDR_FMT_11_11_10 = 0x00000017, + ADDR_FMT_11_11_10_FLOAT = ADDR_FMT_11_11_10, + ADDR_FMT_2_10_10_10 = 0x00000019, + ADDR_FMT_8_8_8_8 = 0x0000001a, + ADDR_FMT_10_10_10_2 = 0x0000001b, + ADDR_FMT_X24_8_32_FLOAT = 0x0000001c, + ADDR_FMT_32_32 = 0x0000001d, + ADDR_FMT_32_32_FLOAT = ADDR_FMT_32_32, + ADDR_FMT_16_16_16_16 = 0x0000001f, + ADDR_FMT_16_16_16_16_FLOAT = ADDR_FMT_16_16_16_16, + ADDR_FMT_RESERVED_33 = 0x00000021, + ADDR_FMT_32_32_32_32 = 0x00000022, + ADDR_FMT_32_32_32_32_FLOAT = ADDR_FMT_32_32_32_32, + ADDR_FMT_RESERVED_36 = 0x00000024, + ADDR_FMT_1 = 0x00000025, + ADDR_FMT_1_REVERSED = 0x00000026, + ADDR_FMT_GB_GR = 0x00000027, + ADDR_FMT_BG_RG = 0x00000028, + ADDR_FMT_32_AS_8 = 0x00000029, + ADDR_FMT_32_AS_8_8 = 0x0000002a, + ADDR_FMT_5_9_9_9_SHAREDEXP = 0x0000002b, + ADDR_FMT_8_8_8 = 0x0000002c, + ADDR_FMT_16_16_16 = 0x0000002d, + ADDR_FMT_16_16_16_FLOAT = ADDR_FMT_16_16_16, + ADDR_FMT_32_32_32 = 0x0000002f, + ADDR_FMT_32_32_32_FLOAT = ADDR_FMT_32_32_32, + ADDR_FMT_BC1 = 0x00000031, + ADDR_FMT_BC2 = 0x00000032, + ADDR_FMT_BC3 = 0x00000033, + ADDR_FMT_BC4 = 0x00000034, + ADDR_FMT_BC5 = 0x00000035, + ADDR_FMT_BC6 = 0x00000036, + ADDR_FMT_BC7 = 0x00000037, + ADDR_FMT_32_AS_32_32_32_32 = 0x00000038, + ADDR_FMT_APC3 = 0x00000039, + ADDR_FMT_APC4 = 0x0000003a, + ADDR_FMT_APC5 = 0x0000003b, + ADDR_FMT_APC6 = 0x0000003c, + ADDR_FMT_APC7 = 0x0000003d, + ADDR_FMT_CTX1 = 0x0000003e, + ADDR_FMT_RESERVED_63 = 0x0000003f, + ADDR_FMT_ASTC_4x4 = 0x00000040, + ADDR_FMT_ASTC_5x4 = 0x00000041, + ADDR_FMT_ASTC_5x5 = 0x00000042, + ADDR_FMT_ASTC_6x5 = 0x00000043, + ADDR_FMT_ASTC_6x6 = 0x00000044, + ADDR_FMT_ASTC_8x5 = 0x00000045, + ADDR_FMT_ASTC_8x6 = 0x00000046, + ADDR_FMT_ASTC_8x8 = 0x00000047, + ADDR_FMT_ASTC_10x5 = 0x00000048, + ADDR_FMT_ASTC_10x6 = 0x00000049, + ADDR_FMT_ASTC_10x8 = 0x0000004a, + ADDR_FMT_ASTC_10x10 = 0x0000004b, + ADDR_FMT_ASTC_12x10 = 0x0000004c, + ADDR_FMT_ASTC_12x12 = 0x0000004d, + ADDR_FMT_ETC2_64BPP = 0x0000004e, + ADDR_FMT_ETC2_128BPP = 0x0000004f, +} AddrFormat; + +/** +**************************************************************************************************** +* AddrDepthFormat +* +* @brief +* Neutral enum for addrFlt32ToDepthPixel +* +**************************************************************************************************** +*/ +typedef enum _AddrDepthFormat +{ + ADDR_DEPTH_INVALID = 0x00000000, + ADDR_DEPTH_16 = 0x00000001, + ADDR_DEPTH_X8_24 = 0x00000002, + ADDR_DEPTH_8_24 = 0x00000003, + ADDR_DEPTH_X8_24_FLOAT = 0x00000004, + ADDR_DEPTH_8_24_FLOAT = 0x00000005, + ADDR_DEPTH_32_FLOAT = 0x00000006, + ADDR_DEPTH_X24_8_32_FLOAT = 0x00000007, + +} AddrDepthFormat; + +/** +**************************************************************************************************** +* AddrColorFormat +* +* @brief +* Neutral enum for ColorFormat +* +**************************************************************************************************** +*/ +typedef enum _AddrColorFormat +{ + ADDR_COLOR_INVALID = 0x00000000, + ADDR_COLOR_8 = 0x00000001, + ADDR_COLOR_4_4 = 0x00000002, + ADDR_COLOR_3_3_2 = 0x00000003, + ADDR_COLOR_RESERVED_4 = 0x00000004, + ADDR_COLOR_16 = 0x00000005, + ADDR_COLOR_16_FLOAT = 0x00000006, + ADDR_COLOR_8_8 = 0x00000007, + ADDR_COLOR_5_6_5 = 0x00000008, + ADDR_COLOR_6_5_5 = 0x00000009, + ADDR_COLOR_1_5_5_5 = 0x0000000a, + ADDR_COLOR_4_4_4_4 = 0x0000000b, + ADDR_COLOR_5_5_5_1 = 0x0000000c, + ADDR_COLOR_32 = 0x0000000d, + ADDR_COLOR_32_FLOAT = 0x0000000e, + ADDR_COLOR_16_16 = 0x0000000f, + ADDR_COLOR_16_16_FLOAT = 0x00000010, + ADDR_COLOR_8_24 = 0x00000011, + ADDR_COLOR_8_24_FLOAT = 0x00000012, + ADDR_COLOR_24_8 = 0x00000013, + ADDR_COLOR_24_8_FLOAT = 0x00000014, + ADDR_COLOR_10_11_11 = 0x00000015, + ADDR_COLOR_10_11_11_FLOAT = 0x00000016, + ADDR_COLOR_11_11_10 = 0x00000017, + ADDR_COLOR_11_11_10_FLOAT = 0x00000018, + ADDR_COLOR_2_10_10_10 = 0x00000019, + ADDR_COLOR_8_8_8_8 = 0x0000001a, + ADDR_COLOR_10_10_10_2 = 0x0000001b, + ADDR_COLOR_X24_8_32_FLOAT = 0x0000001c, + ADDR_COLOR_32_32 = 0x0000001d, + ADDR_COLOR_32_32_FLOAT = 0x0000001e, + ADDR_COLOR_16_16_16_16 = 0x0000001f, + ADDR_COLOR_16_16_16_16_FLOAT = 0x00000020, + ADDR_COLOR_RESERVED_33 = 0x00000021, + ADDR_COLOR_32_32_32_32 = 0x00000022, + ADDR_COLOR_32_32_32_32_FLOAT = 0x00000023, +} AddrColorFormat; + +/** +**************************************************************************************************** +* AddrSurfaceNumber +* +* @brief +* Neutral enum for SurfaceNumber +* +**************************************************************************************************** +*/ +typedef enum _AddrSurfaceNumber { + ADDR_NUMBER_UNORM = 0x00000000, + ADDR_NUMBER_SNORM = 0x00000001, + ADDR_NUMBER_USCALED = 0x00000002, + ADDR_NUMBER_SSCALED = 0x00000003, + ADDR_NUMBER_UINT = 0x00000004, + ADDR_NUMBER_SINT = 0x00000005, + ADDR_NUMBER_SRGB = 0x00000006, + ADDR_NUMBER_FLOAT = 0x00000007, +} AddrSurfaceNumber; + +/** +**************************************************************************************************** +* AddrSurfaceSwap +* +* @brief +* Neutral enum for SurfaceSwap +* +**************************************************************************************************** +*/ +typedef enum _AddrSurfaceSwap { + ADDR_SWAP_STD = 0x00000000, + ADDR_SWAP_ALT = 0x00000001, + ADDR_SWAP_STD_REV = 0x00000002, + ADDR_SWAP_ALT_REV = 0x00000003, +} AddrSurfaceSwap; + +/** +**************************************************************************************************** +* AddrHtileBlockSize +* +* @brief +* Size of HTILE blocks, valid values are 4 or 8 for now +**************************************************************************************************** +*/ +typedef enum _AddrHtileBlockSize +{ + ADDR_HTILE_BLOCKSIZE_4 = 4, + ADDR_HTILE_BLOCKSIZE_8 = 8, +} AddrHtileBlockSize; + +/** +**************************************************************************************************** +* AddrPipeCfg +* +* @brief +* The pipe configuration field specifies both the number of pipes and +* how pipes are interleaved on the surface. +* The expression of number of pipes, the shader engine tile size, and packer tile size +* is encoded in a PIPE_CONFIG register field. +* In general the number of pipes usually matches the number of memory channels of the +* hardware configuration. +* For hw configurations w/ non-pow2 memory number of memory channels, it usually matches +* the number of ROP units(? TODO: which registers??) +* The enum value = hw enum + 1 which is to reserve 0 for requesting default. +**************************************************************************************************** +*/ +typedef enum _AddrPipeCfg +{ + ADDR_PIPECFG_INVALID = 0, + ADDR_PIPECFG_P2 = 1, /// 2 pipes, + ADDR_PIPECFG_P4_8x16 = 5, /// 4 pipes, + ADDR_PIPECFG_P4_16x16 = 6, + ADDR_PIPECFG_P4_16x32 = 7, + ADDR_PIPECFG_P4_32x32 = 8, + ADDR_PIPECFG_P8_16x16_8x16 = 9, /// 8 pipes + ADDR_PIPECFG_P8_16x32_8x16 = 10, + ADDR_PIPECFG_P8_32x32_8x16 = 11, + ADDR_PIPECFG_P8_16x32_16x16 = 12, + ADDR_PIPECFG_P8_32x32_16x16 = 13, + ADDR_PIPECFG_P8_32x32_16x32 = 14, + ADDR_PIPECFG_P8_32x64_32x32 = 15, + ADDR_PIPECFG_P16_32x32_8x16 = 17, /// 16 pipes + ADDR_PIPECFG_P16_32x32_16x16 = 18, + ADDR_PIPECFG_RESERVED = 19, /// reserved for internal use + ADDR_PIPECFG_MAX = 20, +} AddrPipeCfg; + +/** +**************************************************************************************************** +* AddrTileType +* +* @brief +* Neutral enums that specifies micro tile type (MICRO_TILE_MODE) +**************************************************************************************************** +*/ +typedef enum _AddrTileType +{ + ADDR_DISPLAYABLE = 0, ///< Displayable tiling + ADDR_NON_DISPLAYABLE = 1, ///< Non-displayable tiling, a.k.a thin micro tiling + ADDR_DEPTH_SAMPLE_ORDER = 2, ///< Same as non-displayable plus depth-sample-order + ADDR_ROTATED = 3, ///< Rotated displayable tiling + ADDR_THICK = 4, ///< Thick micro-tiling, only valid for THICK and XTHICK +} AddrTileType; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Type definitions: short system-independent names for address library types +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if !defined(__APPLE__) || defined(HAVE_TSERVER) + +#ifndef BOOL_32 // no bool type in C +/// @brief Boolean type, since none is defined in C +/// @ingroup type +#define BOOL_32 int +#endif + +#ifndef INT_32 +#define INT_32 int +#endif + +#ifndef UINT_32 +#define UINT_32 unsigned int +#endif + +#ifndef INT_16 +#define INT_16 short +#endif + +#ifndef UINT_16 +#define UINT_16 unsigned short +#endif + +#ifndef INT_8 +#define INT_8 char +#endif + +#ifndef UINT_8 +#define UINT_8 unsigned char +#endif + +#ifndef NULL +#define NULL 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +// +// 64-bit integer types depend on the compiler +// +#if defined( __GNUC__ ) || defined( __WATCOMC__ ) +#define INT_64 long long +#define UINT_64 unsigned long long + +#elif defined( _WIN32 ) +#define INT_64 __int64 +#define UINT_64 unsigned __int64 + +#else +#error Unsupported compiler and/or operating system for 64-bit integers + +/// @brief 64-bit signed integer type (compiler dependent) +/// @ingroup type +/// +/// The addrlib defines a 64-bit signed integer type for either +/// Gnu/Watcom compilers (which use the first syntax) or for +/// the Windows VCC compiler (which uses the second syntax). +#define INT_64 long long OR __int64 + +/// @brief 64-bit unsigned integer type (compiler dependent) +/// @ingroup type +/// +/// The addrlib defines a 64-bit unsigned integer type for either +/// Gnu/Watcom compilers (which use the first syntax) or for +/// the Windows VCC compiler (which uses the second syntax). +/// +#define UINT_64 unsigned long long OR unsigned __int64 +#endif + +#endif // #if !defined(__APPLE__) || defined(HAVE_TSERVER) + +// ADDR64X is used to print addresses in hex form on both Windows and Linux +// +#if defined( __GNUC__ ) || defined( __WATCOMC__ ) +#define ADDR64X "llx" +#define ADDR64D "lld" + +#elif defined( _WIN32 ) +#define ADDR64X "I64x" +#define ADDR64D "I64d" + +#else +#error Unsupported compiler and/or operating system for 64-bit integers + +/// @brief Addrlib device address 64-bit printf tag (compiler dependent) +/// @ingroup type +/// +/// This allows printf to display an ADDR_64 for either the Windows VCC compiler +/// (which used this value) or the Gnu/Watcom compilers (which use "llx". +/// An example of use is printf("addr 0x%"ADDR64X"\n", address); +/// +#define ADDR64X "llx" OR "I64x" +#define ADDR64D "lld" OR "I64d" +#endif + +/// @brief Union for storing a 32-bit float or 32-bit integer +/// @ingroup type +/// +/// This union provides a simple way to convert between a 32-bit float +/// and a 32-bit integer. It also prevents the compiler from producing +/// code that alters NaN values when assiging or coying floats. +/// Therefore, all address library routines that pass or return 32-bit +/// floating point data do so by passing or returning a FLT_32. +/// +typedef union { + INT_32 i; + UINT_32 u; + float f; +} ADDR_FLT_32; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Macros for controlling linking and building on multiple systems +// +//////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(_MSC_VER) +#if defined(va_copy) +#undef va_copy //redefine va_copy to support VC2013 +#endif +#endif + +#if !defined(va_copy) +#define va_copy(dst, src) \ + ((void) memcpy(&(dst), &(src), sizeof(va_list))) +#endif + +#endif // __ADDR_TYPES_H__ + diff -Nru mesa-18.3.3/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h mesa-19.0.1/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h --- mesa-18.3.3/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/inc/chip/gfx9/gfx9_gb_reg.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,89 +0,0 @@ -#if !defined (__GFX9_GB_REG_H__) -#define __GFX9_GB_REG_H__ - -/* - * Copyright © 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -#include "util/u_endian.h" - -#if defined(PIPE_ARCH_LITTLE_ENDIAN) -#define LITTLEENDIAN_CPU -#elif defined(PIPE_ARCH_BIG_ENDIAN) -#define BIGENDIAN_CPU -#endif - -// -// Make sure the necessary endian defines are there. -// -#if defined(LITTLEENDIAN_CPU) -#elif defined(BIGENDIAN_CPU) -#else -#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" -#endif - -union GB_ADDR_CONFIG { - struct { -#if defined(LITTLEENDIAN_CPU) - unsigned int NUM_PIPES : 3; - unsigned int PIPE_INTERLEAVE_SIZE : 3; - unsigned int MAX_COMPRESSED_FRAGS : 2; - unsigned int BANK_INTERLEAVE_SIZE : 3; - unsigned int : 1; - unsigned int NUM_BANKS : 3; - unsigned int : 1; - unsigned int SHADER_ENGINE_TILE_SIZE : 3; - unsigned int NUM_SHADER_ENGINES : 2; - unsigned int NUM_GPUS : 3; - unsigned int MULTI_GPU_TILE_SIZE : 2; - unsigned int NUM_RB_PER_SE : 2; - unsigned int ROW_SIZE : 2; - unsigned int NUM_LOWER_PIPES : 1; - unsigned int SE_ENABLE : 1; -#elif defined(BIGENDIAN_CPU) - unsigned int SE_ENABLE : 1; - unsigned int NUM_LOWER_PIPES : 1; - unsigned int ROW_SIZE : 2; - unsigned int NUM_RB_PER_SE : 2; - unsigned int MULTI_GPU_TILE_SIZE : 2; - unsigned int NUM_GPUS : 3; - unsigned int NUM_SHADER_ENGINES : 2; - unsigned int SHADER_ENGINE_TILE_SIZE : 3; - unsigned int : 1; - unsigned int NUM_BANKS : 3; - unsigned int : 1; - unsigned int BANK_INTERLEAVE_SIZE : 3; - unsigned int MAX_COMPRESSED_FRAGS : 2; - unsigned int PIPE_INTERLEAVE_SIZE : 3; - unsigned int NUM_PIPES : 3; -#endif - } bitfields, bits; - unsigned int u32All; - signed int i32All; - float f32All; -}; - -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/inc/chip/r800/si_gb_reg.h mesa-19.0.1/src/amd/addrlib/inc/chip/r800/si_gb_reg.h --- mesa-18.3.3/src/amd/addrlib/inc/chip/r800/si_gb_reg.h 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/inc/chip/r800/si_gb_reg.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,163 +0,0 @@ -#if !defined (__SI_GB_REG_H__) -#define __SI_GB_REG_H__ - -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -#include "util/u_endian.h" - -#if defined(PIPE_ARCH_LITTLE_ENDIAN) -#define LITTLEENDIAN_CPU -#elif defined(PIPE_ARCH_BIG_ENDIAN) -#define BIGENDIAN_CPU -#endif - -// -// Make sure the necessary endian defines are there. -// -#if defined(LITTLEENDIAN_CPU) -#elif defined(BIGENDIAN_CPU) -#else -#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" -#endif - -/* - * GB_ADDR_CONFIG struct - */ - -#if defined(LITTLEENDIAN_CPU) - - typedef struct _GB_ADDR_CONFIG_T { - unsigned int num_pipes : 3; - unsigned int : 1; - unsigned int pipe_interleave_size : 3; - unsigned int : 1; - unsigned int bank_interleave_size : 3; - unsigned int : 1; - unsigned int num_shader_engines : 2; - unsigned int : 2; - unsigned int shader_engine_tile_size : 3; - unsigned int : 1; - unsigned int num_gpus : 3; - unsigned int : 1; - unsigned int multi_gpu_tile_size : 2; - unsigned int : 2; - unsigned int row_size : 2; - unsigned int num_lower_pipes : 1; - unsigned int : 1; - } GB_ADDR_CONFIG_T; - -#elif defined(BIGENDIAN_CPU) - - typedef struct _GB_ADDR_CONFIG_T { - unsigned int : 1; - unsigned int num_lower_pipes : 1; - unsigned int row_size : 2; - unsigned int : 2; - unsigned int multi_gpu_tile_size : 2; - unsigned int : 1; - unsigned int num_gpus : 3; - unsigned int : 1; - unsigned int shader_engine_tile_size : 3; - unsigned int : 2; - unsigned int num_shader_engines : 2; - unsigned int : 1; - unsigned int bank_interleave_size : 3; - unsigned int : 1; - unsigned int pipe_interleave_size : 3; - unsigned int : 1; - unsigned int num_pipes : 3; - } GB_ADDR_CONFIG_T; - -#endif - -typedef union { - unsigned int val : 32; - GB_ADDR_CONFIG_T f; -} GB_ADDR_CONFIG; - -#if defined(LITTLEENDIAN_CPU) - - typedef struct _GB_TILE_MODE_T { - unsigned int micro_tile_mode : 2; - unsigned int array_mode : 4; - unsigned int pipe_config : 5; - unsigned int tile_split : 3; - unsigned int bank_width : 2; - unsigned int bank_height : 2; - unsigned int macro_tile_aspect : 2; - unsigned int num_banks : 2; - unsigned int micro_tile_mode_new : 3; - unsigned int sample_split : 2; - unsigned int : 5; - } GB_TILE_MODE_T; - - typedef struct _GB_MACROTILE_MODE_T { - unsigned int bank_width : 2; - unsigned int bank_height : 2; - unsigned int macro_tile_aspect : 2; - unsigned int num_banks : 2; - unsigned int : 24; - } GB_MACROTILE_MODE_T; - -#elif defined(BIGENDIAN_CPU) - - typedef struct _GB_TILE_MODE_T { - unsigned int : 5; - unsigned int sample_split : 2; - unsigned int micro_tile_mode_new : 3; - unsigned int num_banks : 2; - unsigned int macro_tile_aspect : 2; - unsigned int bank_height : 2; - unsigned int bank_width : 2; - unsigned int tile_split : 3; - unsigned int pipe_config : 5; - unsigned int array_mode : 4; - unsigned int micro_tile_mode : 2; - } GB_TILE_MODE_T; - - typedef struct _GB_MACROTILE_MODE_T { - unsigned int : 24; - unsigned int num_banks : 2; - unsigned int macro_tile_aspect : 2; - unsigned int bank_height : 2; - unsigned int bank_width : 2; - } GB_MACROTILE_MODE_T; - -#endif - -typedef union { - unsigned int val : 32; - GB_TILE_MODE_T f; -} GB_TILE_MODE; - -typedef union { - unsigned int val : 32; - GB_MACROTILE_MODE_T f; -} GB_MACROTILE_MODE; - -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/meson.build mesa-19.0.1/src/amd/addrlib/meson.build --- mesa-18.3.3/src/amd/addrlib/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -19,35 +19,33 @@ # SOFTWARE. files_addrlib = files( - 'addrinterface.cpp', - 'addrinterface.h', - 'addrtypes.h', - 'core/addrcommon.h', - 'core/addrelemlib.cpp', - 'core/addrelemlib.h', - 'core/addrlib.cpp', - 'core/addrlib.h', - 'core/addrlib1.cpp', - 'core/addrlib1.h', - 'core/addrlib2.cpp', - 'core/addrlib2.h', - 'core/addrobject.cpp', - 'core/addrobject.h', - 'gfx9/chip/gfx9_enum.h', - 'gfx9/coord.cpp', - 'gfx9/coord.h', - 'gfx9/gfx9addrlib.cpp', - 'gfx9/gfx9addrlib.h', - 'amdgpu_asic_addr.h', - 'inc/chip/gfx9/gfx9_gb_reg.h', - 'inc/chip/r800/si_gb_reg.h', - 'r800/chip/si_ci_vi_merged_enum.h', - 'r800/ciaddrlib.cpp', - 'r800/ciaddrlib.h', - 'r800/egbaddrlib.cpp', - 'r800/egbaddrlib.h', - 'r800/siaddrlib.cpp', - 'r800/siaddrlib.h', + 'inc/addrinterface.h', + 'inc/addrtypes.h', + 'src/addrinterface.cpp', + 'src/core/addrcommon.h', + 'src/core/addrelemlib.cpp', + 'src/core/addrelemlib.h', + 'src/core/addrlib.cpp', + 'src/core/addrlib.h', + 'src/core/addrlib1.cpp', + 'src/core/addrlib1.h', + 'src/core/addrlib2.cpp', + 'src/core/addrlib2.h', + 'src/core/addrobject.cpp', + 'src/core/addrobject.h', + 'src/core/coord.cpp', + 'src/core/coord.h', + 'src/gfx9/gfx9addrlib.cpp', + 'src/gfx9/gfx9addrlib.h', + 'src/amdgpu_asic_addr.h', + 'src/chip/gfx9/gfx9_gb_reg.h', + 'src/chip/r800/si_gb_reg.h', + 'src/r800/ciaddrlib.cpp', + 'src/r800/ciaddrlib.h', + 'src/r800/egbaddrlib.cpp', + 'src/r800/egbaddrlib.h', + 'src/r800/siaddrlib.cpp', + 'src/r800/siaddrlib.h', ) libamdgpu_addrlib = static_library( @@ -55,7 +53,7 @@ files_addrlib, include_directories : [ include_directories( - 'core', 'inc/chip/gfx9', 'inc/chip/r800', 'gfx9/chip', 'r800/chip', + 'inc', 'src', 'src/core', 'src/chip/gfx9', 'src/chip/r800', ), inc_amd_common, inc_common, inc_src, ], diff -Nru mesa-18.3.3/src/amd/addrlib/r800/chip/si_ci_vi_merged_enum.h mesa-19.0.1/src/amd/addrlib/r800/chip/si_ci_vi_merged_enum.h --- mesa-18.3.3/src/amd/addrlib/r800/chip/si_ci_vi_merged_enum.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/r800/chip/si_ci_vi_merged_enum.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ -#if !defined (SI_CI_VI_MERGED_ENUM_HEADER) -#define SI_CI_VI_MERGED_ENUM_HEADER - -typedef enum PipeInterleaveSize { -ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x00000000, -ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x00000001, -} PipeInterleaveSize; - -typedef enum RowSize { -ADDR_CONFIG_1KB_ROW = 0x00000000, -ADDR_CONFIG_2KB_ROW = 0x00000001, -ADDR_CONFIG_4KB_ROW = 0x00000002, -} RowSize; - -#endif diff -Nru mesa-18.3.3/src/amd/addrlib/r800/ciaddrlib.cpp mesa-19.0.1/src/amd/addrlib/r800/ciaddrlib.cpp --- mesa-18.3.3/src/amd/addrlib/r800/ciaddrlib.cpp 2018-04-19 04:33:31.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/r800/ciaddrlib.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,2341 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file ciaddrlib.cpp -* @brief Contains the implementation for the CiLib class. -**************************************************************************************************** -*/ - -#include "ciaddrlib.h" - -#include "si_gb_reg.h" - -#include "amdgpu_asic_addr.h" - -//////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////// - -namespace Addr -{ - -/** -**************************************************************************************************** -* CiHwlInit -* -* @brief -* Creates an CiLib object. -* -* @return -* Returns an CiLib object pointer. -**************************************************************************************************** -*/ -Lib* CiHwlInit(const Client* pClient) -{ - return V1::CiLib::CreateObj(pClient); -} - -namespace V1 -{ - -/** -**************************************************************************************************** -* Mask -* -* @brief -* Gets a mask of "width" -* @return -* Bit mask -**************************************************************************************************** -*/ -static UINT_64 Mask( - UINT_32 width) ///< Width of bits -{ - UINT_64 ret; - - if (width >= sizeof(UINT_64)*8) - { - ret = ~((UINT_64) 0); - } - else - { - return (((UINT_64) 1) << width) - 1; - } - return ret; -} - -/** -**************************************************************************************************** -* GetBits -* -* @brief -* Gets bits within a range of [msb, lsb] -* @return -* Bits of this range -**************************************************************************************************** -*/ -static UINT_64 GetBits( - UINT_64 bits, ///< Source bits - UINT_32 msb, ///< Most signicant bit - UINT_32 lsb) ///< Least signicant bit -{ - UINT_64 ret = 0; - - if (msb >= lsb) - { - ret = (bits >> lsb) & (Mask(1 + msb - lsb)); - } - return ret; -} - -/** -**************************************************************************************************** -* RemoveBits -* -* @brief -* Removes bits within the range of [msb, lsb] -* @return -* Modified bits -**************************************************************************************************** -*/ -static UINT_64 RemoveBits( - UINT_64 bits, ///< Source bits - UINT_32 msb, ///< Most signicant bit - UINT_32 lsb) ///< Least signicant bit -{ - UINT_64 ret = bits; - - if (msb >= lsb) - { - ret = GetBits(bits, lsb - 1, 0) // low bits - | (GetBits(bits, 8 * sizeof(bits) - 1, msb + 1) << lsb); //high bits - } - return ret; -} - -/** -**************************************************************************************************** -* InsertBits -* -* @brief -* Inserts new bits into the range of [msb, lsb] -* @return -* Modified bits -**************************************************************************************************** -*/ -static UINT_64 InsertBits( - UINT_64 bits, ///< Source bits - UINT_64 newBits, ///< New bits to be inserted - UINT_32 msb, ///< Most signicant bit - UINT_32 lsb) ///< Least signicant bit -{ - UINT_64 ret = bits; - - if (msb >= lsb) - { - ret = GetBits(bits, lsb - 1, 0) // old low bitss - | (GetBits(newBits, msb - lsb, 0) << lsb) //new bits - | (GetBits(bits, 8 * sizeof(bits) - 1, lsb) << (msb + 1)); //old high bits - } - return ret; -} - -/** -**************************************************************************************************** -* CiLib::CiLib -* -* @brief -* Constructor -* -**************************************************************************************************** -*/ -CiLib::CiLib(const Client* pClient) - : - SiLib(pClient), - m_noOfMacroEntries(0), - m_allowNonDispThickModes(FALSE) -{ - m_class = CI_ADDRLIB; -} - -/** -**************************************************************************************************** -* CiLib::~CiLib -* -* @brief -* Destructor -**************************************************************************************************** -*/ -CiLib::~CiLib() -{ -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeDccInfo -* -* @brief -* Compute DCC key size, base alignment -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE CiLib::HwlComputeDccInfo( - const ADDR_COMPUTE_DCCINFO_INPUT* pIn, - ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - if (m_settings.isVolcanicIslands && IsMacroTiled(pIn->tileMode)) - { - UINT_64 dccFastClearSize = pIn->colorSurfSize >> 8; - - ADDR_ASSERT(0 == (pIn->colorSurfSize & 0xff)); - - if (pIn->numSamples > 1) - { - UINT_32 tileSizePerSample = BITS_TO_BYTES(pIn->bpp * MicroTileWidth * MicroTileHeight); - UINT_32 samplesPerSplit = pIn->tileInfo.tileSplitBytes / tileSizePerSample; - - if (samplesPerSplit < pIn->numSamples) - { - UINT_32 numSplits = pIn->numSamples / samplesPerSplit; - UINT_32 fastClearBaseAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes; - - ADDR_ASSERT(IsPow2(fastClearBaseAlign)); - - dccFastClearSize /= numSplits; - - if (0 != (dccFastClearSize & (fastClearBaseAlign - 1))) - { - // Disable dcc fast clear - // if key size of fisrt sample split is not pipe*interleave aligned - dccFastClearSize = 0; - } - } - } - - pOut->dccRamSize = pIn->colorSurfSize >> 8; - pOut->dccRamBaseAlign = pIn->tileInfo.banks * - HwlGetPipes(&pIn->tileInfo) * - m_pipeInterleaveBytes; - pOut->dccFastClearSize = dccFastClearSize; - pOut->dccRamSizeAligned = TRUE; - - ADDR_ASSERT(IsPow2(pOut->dccRamBaseAlign)); - - if (0 == (pOut->dccRamSize & (pOut->dccRamBaseAlign - 1))) - { - pOut->subLvlCompressible = TRUE; - } - else - { - UINT_64 dccRamSizeAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes; - - if (pOut->dccRamSize == pOut->dccFastClearSize) - { - pOut->dccFastClearSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign); - } - if ((pOut->dccRamSize & (dccRamSizeAlign - 1)) != 0) - { - pOut->dccRamSizeAligned = FALSE; - } - pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign); - pOut->subLvlCompressible = FALSE; - } - } - else - { - returnCode = ADDR_NOTSUPPORTED; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeCmaskAddrFromCoord -* -* @brief -* Compute tc compatible Cmask address from fmask ram address -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE CiLib::HwlComputeCmaskAddrFromCoord( - const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] fmask addr/bpp/tile input - ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] cmask address - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED; - - if ((m_settings.isVolcanicIslands == TRUE) && - (pIn->flags.tcCompatible == TRUE)) - { - UINT_32 numOfPipes = HwlGetPipes(pIn->pTileInfo); - UINT_32 numOfBanks = pIn->pTileInfo->banks; - UINT_64 fmaskAddress = pIn->fmaskAddr; - UINT_32 elemBits = pIn->bpp; - UINT_32 blockByte = 64 * elemBits / 8; - UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(fmaskAddress, - 0, - 0, - 4, // cmask 4 bits - elemBits, - blockByte, - m_pipeInterleaveBytes, - numOfPipes, - numOfBanks, - 1); - pOut->addr = (metaNibbleAddress >> 1); - pOut->bitPosition = (metaNibbleAddress % 2) ? 4 : 0; - returnCode = ADDR_OK; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeHtileAddrFromCoord -* -* @brief -* Compute tc compatible Htile address from depth/stencil address -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE CiLib::HwlComputeHtileAddrFromCoord( - const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] depth/stencil addr/bpp/tile input - ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] htile address - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED; - - if ((m_settings.isVolcanicIslands == TRUE) && - (pIn->flags.tcCompatible == TRUE)) - { - UINT_32 numOfPipes = HwlGetPipes(pIn->pTileInfo); - UINT_32 numOfBanks = pIn->pTileInfo->banks; - UINT_64 zStencilAddr = pIn->zStencilAddr; - UINT_32 elemBits = pIn->bpp; - UINT_32 blockByte = 64 * elemBits / 8; - UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(zStencilAddr, - 0, - 0, - 32, // htile 32 bits - elemBits, - blockByte, - m_pipeInterleaveBytes, - numOfPipes, - numOfBanks, - 1); - pOut->addr = (metaNibbleAddress >> 1); - pOut->bitPosition = 0; - returnCode = ADDR_OK; - } - - return returnCode; -} - -/** -**************************************************************************************************** -* CiLib::HwlConvertChipFamily -* -* @brief -* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision -* @return -* ChipFamily -**************************************************************************************************** -*/ -ChipFamily CiLib::HwlConvertChipFamily( - UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h - UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h -{ - ChipFamily family = ADDR_CHIP_FAMILY_CI; - - switch (uChipFamily) - { - case FAMILY_CI: - m_settings.isSeaIsland = 1; - m_settings.isBonaire = ASICREV_IS_BONAIRE_M(uChipRevision); - m_settings.isHawaii = ASICREV_IS_HAWAII_P(uChipRevision); - break; - case FAMILY_KV: - m_settings.isKaveri = 1; - m_settings.isSpectre = ASICREV_IS_SPECTRE(uChipRevision); - m_settings.isSpooky = ASICREV_IS_SPOOKY(uChipRevision); - m_settings.isKalindi = ASICREV_IS_KALINDI(uChipRevision); - break; - case FAMILY_VI: - m_settings.isVolcanicIslands = 1; - m_settings.isIceland = ASICREV_IS_ICELAND_M(uChipRevision); - m_settings.isTonga = ASICREV_IS_TONGA_P(uChipRevision); - m_settings.isFiji = ASICREV_IS_FIJI_P(uChipRevision); - m_settings.isPolaris10 = ASICREV_IS_POLARIS10_P(uChipRevision); - m_settings.isPolaris11 = ASICREV_IS_POLARIS11_M(uChipRevision); - m_settings.isPolaris12 = ASICREV_IS_POLARIS12_V(uChipRevision); - m_settings.isVegaM = ASICREV_IS_VEGAM_P(uChipRevision); - family = ADDR_CHIP_FAMILY_VI; - break; - case FAMILY_CZ: - m_settings.isCarrizo = 1; - m_settings.isVolcanicIslands = 1; - family = ADDR_CHIP_FAMILY_VI; - break; - default: - ADDR_ASSERT(!"This should be a unexpected Fusion"); - break; - } - - return family; -} - -/** -**************************************************************************************************** -* CiLib::HwlInitGlobalParams -* -* @brief -* Initializes global parameters -* -* @return -* TRUE if all settings are valid -* -**************************************************************************************************** -*/ -BOOL_32 CiLib::HwlInitGlobalParams( - const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input -{ - BOOL_32 valid = TRUE; - - const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue; - - valid = DecodeGbRegs(pRegValue); - - // The following assignments for m_pipes is only for fail-safe, InitTileSettingTable should - // read the correct pipes from tile mode table - if (m_settings.isHawaii) - { - m_pipes = 16; - } - else if (m_settings.isBonaire || m_settings.isSpectre) - { - m_pipes = 4; - } - else // Treat other KV asics to be 2-pipe - { - m_pipes = 2; - } - - // @todo: VI - // Move this to VI code path once created - if (m_settings.isTonga || m_settings.isPolaris10) - { - m_pipes = 8; - } - else if (m_settings.isIceland) - { - m_pipes = 2; - } - else if (m_settings.isFiji) - { - m_pipes = 16; - } - else if (m_settings.isPolaris11 || m_settings.isPolaris12) - { - m_pipes = 4; - } - else if (m_settings.isVegaM) - { - m_pipes = 16; - } - - if (valid) - { - valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries); - } - if (valid) - { - valid = InitMacroTileCfgTable(pRegValue->pMacroTileConfig, pRegValue->noOfMacroEntries); - } - - if (valid) - { - InitEquationTable(); - } - - return valid; -} - -/** -**************************************************************************************************** -* CiLib::HwlPostCheckTileIndex -* -* @brief -* Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches -* tile mode/type/info and change the index if needed -* @return -* Tile index. -**************************************************************************************************** -*/ -INT_32 CiLib::HwlPostCheckTileIndex( - const ADDR_TILEINFO* pInfo, ///< [in] Tile Info - AddrTileMode mode, ///< [in] Tile mode - AddrTileType type, ///< [in] Tile type - INT curIndex ///< [in] Current index assigned in HwlSetupTileInfo - ) const -{ - INT_32 index = curIndex; - - if (mode == ADDR_TM_LINEAR_GENERAL) - { - index = TileIndexLinearGeneral; - } - else - { - BOOL_32 macroTiled = IsMacroTiled(mode); - - // We need to find a new index if either of them is true - // 1. curIndex is invalid - // 2. tile mode is changed - // 3. tile info does not match for macro tiled - if ((index == TileIndexInvalid) || - (mode != m_tileTable[index].mode) || - (macroTiled && pInfo->pipeConfig != m_tileTable[index].info.pipeConfig)) - { - for (index = 0; index < static_cast(m_noOfEntries); index++) - { - if (macroTiled) - { - // macro tile modes need all to match - if ((pInfo->pipeConfig == m_tileTable[index].info.pipeConfig) && - (mode == m_tileTable[index].mode) && - (type == m_tileTable[index].type)) - { - // tileSplitBytes stored in m_tileTable is only valid for depth entries - if (type == ADDR_DEPTH_SAMPLE_ORDER) - { - if (Min(m_tileTable[index].info.tileSplitBytes, - m_rowSize) == pInfo->tileSplitBytes) - { - break; - } - } - else // other entries are determined by other 3 fields - { - break; - } - } - } - else if (mode == ADDR_TM_LINEAR_ALIGNED) - { - // linear mode only needs tile mode to match - if (mode == m_tileTable[index].mode) - { - break; - } - } - else - { - // micro tile modes only need tile mode and tile type to match - if (mode == m_tileTable[index].mode && - type == m_tileTable[index].type) - { - break; - } - } - } - } - } - - ADDR_ASSERT(index < static_cast(m_noOfEntries)); - - if (index >= static_cast(m_noOfEntries)) - { - index = TileIndexInvalid; - } - - return index; -} - -/** -**************************************************************************************************** -* CiLib::HwlSetupTileCfg -* -* @brief -* Map tile index to tile setting. -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE CiLib::HwlSetupTileCfg( - UINT_32 bpp, ///< Bits per pixel - INT_32 index, ///< Tile index - INT_32 macroModeIndex, ///< Index in macro tile mode table(CI) - ADDR_TILEINFO* pInfo, ///< [out] Tile Info - AddrTileMode* pMode, ///< [out] Tile mode - AddrTileType* pType ///< [out] Tile type - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - // Global flag to control usage of tileIndex - if (UseTileIndex(index)) - { - if (index == TileIndexLinearGeneral) - { - pInfo->banks = 2; - pInfo->bankWidth = 1; - pInfo->bankHeight = 1; - pInfo->macroAspectRatio = 1; - pInfo->tileSplitBytes = 64; - pInfo->pipeConfig = ADDR_PIPECFG_P2; - } - else if (static_cast(index) >= m_noOfEntries) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - const TileConfig* pCfgTable = GetTileSetting(index); - - if (pInfo != NULL) - { - if (IsMacroTiled(pCfgTable->mode)) - { - ADDR_ASSERT((macroModeIndex != TileIndexInvalid) && - (macroModeIndex != TileIndexNoMacroIndex)); - - UINT_32 tileSplit; - - *pInfo = m_macroTileTable[macroModeIndex]; - - if (pCfgTable->type == ADDR_DEPTH_SAMPLE_ORDER) - { - tileSplit = pCfgTable->info.tileSplitBytes; - } - else - { - if (bpp > 0) - { - UINT_32 thickness = Thickness(pCfgTable->mode); - UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness); - // Non-depth entries store a split factor - UINT_32 sampleSplit = m_tileTable[index].info.tileSplitBytes; - tileSplit = Max(256u, sampleSplit * tileBytes1x); - } - else - { - // Return tileBytes instead if not enough info - tileSplit = pInfo->tileSplitBytes; - } - } - - // Clamp to row_size - pInfo->tileSplitBytes = Min(m_rowSize, tileSplit); - - pInfo->pipeConfig = pCfgTable->info.pipeConfig; - } - else // 1D and linear modes, we return default value stored in table - { - *pInfo = pCfgTable->info; - } - } - - if (pMode != NULL) - { - *pMode = pCfgTable->mode; - } - - if (pType != NULL) - { - *pType = pCfgTable->type; - } - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeSurfaceInfo -* -* @brief -* Entry of CI's ComputeSurfaceInfo -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - // If tileIndex is invalid, force macroModeIndex to be invalid, too - if (pIn->tileIndex == TileIndexInvalid) - { - pOut->macroModeIndex = TileIndexInvalid; - } - - ADDR_E_RETURNCODE retCode = SiLib::HwlComputeSurfaceInfo(pIn, pOut); - - if ((pIn->mipLevel > 0) && - (pOut->tcCompatible == TRUE) && - (pOut->tileMode != pIn->tileMode) && - (m_settings.isVolcanicIslands == TRUE)) - { - pOut->tcCompatible = CheckTcCompatibility(pOut->pTileInfo, pIn->bpp, pOut->tileMode, pOut->tileType, pOut); - } - - if (pOut->macroModeIndex == TileIndexNoMacroIndex) - { - pOut->macroModeIndex = TileIndexInvalid; - } - - if ((pIn->flags.matchStencilTileCfg == TRUE) && - (pIn->flags.depth == TRUE)) - { - pOut->stencilTileIdx = TileIndexInvalid; - - if ((MinDepth2DThinIndex <= pOut->tileIndex) && - (MaxDepth2DThinIndex >= pOut->tileIndex)) - { - BOOL_32 depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut); - - if ((depthStencil2DTileConfigMatch == FALSE) && - (pOut->tcCompatible == TRUE)) - { - pOut->macroModeIndex = TileIndexInvalid; - - ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; - localIn.tileIndex = TileIndexInvalid; - localIn.pTileInfo = NULL; - localIn.flags.tcCompatible = FALSE; - - SiLib::HwlComputeSurfaceInfo(&localIn, pOut); - - ADDR_ASSERT((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex)); - - depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut); - } - - if ((depthStencil2DTileConfigMatch == FALSE) && - (pIn->numSamples <= 1)) - { - pOut->macroModeIndex = TileIndexInvalid; - - ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; - localIn.tileMode = ADDR_TM_1D_TILED_THIN1; - localIn.tileIndex = TileIndexInvalid; - localIn.pTileInfo = NULL; - - retCode = SiLib::HwlComputeSurfaceInfo(&localIn, pOut); - } - } - - if (pOut->tileIndex == Depth1DThinIndex) - { - pOut->stencilTileIdx = Depth1DThinIndex; - } - } - - return retCode; -} - -/** -**************************************************************************************************** -* CiLib::HwlFmaskSurfaceInfo -* @brief -* Entry of r800's ComputeFmaskInfo -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE CiLib::HwlComputeFmaskInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure - ) -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - ADDR_TILEINFO tileInfo = {0}; - ADDR_COMPUTE_FMASK_INFO_INPUT fmaskIn; - fmaskIn = *pIn; - - AddrTileMode tileMode = pIn->tileMode; - - // Use internal tile info if pOut does not have a valid pTileInfo - if (pOut->pTileInfo == NULL) - { - pOut->pTileInfo = &tileInfo; - } - - ADDR_ASSERT(tileMode == ADDR_TM_2D_TILED_THIN1 || - tileMode == ADDR_TM_3D_TILED_THIN1 || - tileMode == ADDR_TM_PRT_TILED_THIN1 || - tileMode == ADDR_TM_PRT_2D_TILED_THIN1 || - tileMode == ADDR_TM_PRT_3D_TILED_THIN1); - - ADDR_ASSERT(m_tileTable[14].mode == ADDR_TM_2D_TILED_THIN1); - ADDR_ASSERT(m_tileTable[15].mode == ADDR_TM_3D_TILED_THIN1); - - // The only valid tile modes for fmask are 2D_THIN1 and 3D_THIN1 plus non-displayable - INT_32 tileIndex = tileMode == ADDR_TM_2D_TILED_THIN1 ? 14 : 15; - ADDR_SURFACE_FLAGS flags = {{0}}; - flags.fmask = 1; - - INT_32 macroModeIndex = TileIndexInvalid; - - UINT_32 numSamples = pIn->numSamples; - UINT_32 numFrags = pIn->numFrags == 0 ? numSamples : pIn->numFrags; - - UINT_32 bpp = QLog2(numFrags); - - // EQAA needs one more bit - if (numSamples > numFrags) - { - bpp++; - } - - if (bpp == 3) - { - bpp = 4; - } - - bpp = Max(8u, bpp * numSamples); - - macroModeIndex = HwlComputeMacroModeIndex(tileIndex, flags, bpp, numSamples, pOut->pTileInfo); - - fmaskIn.tileIndex = tileIndex; - fmaskIn.pTileInfo = pOut->pTileInfo; - pOut->macroModeIndex = macroModeIndex; - pOut->tileIndex = tileIndex; - - retCode = DispatchComputeFmaskInfo(&fmaskIn, pOut); - - if (retCode == ADDR_OK) - { - pOut->tileIndex = - HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE, - pOut->tileIndex); - } - - // Resets pTileInfo to NULL if the internal tile info is used - if (pOut->pTileInfo == &tileInfo) - { - pOut->pTileInfo = NULL; - } - - return retCode; -} - -/** -**************************************************************************************************** -* CiLib::HwlFmaskPreThunkSurfInfo -* -* @brief -* Some preparation before thunking a ComputeSurfaceInfo call for Fmask -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -VOID CiLib::HwlFmaskPreThunkSurfInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, ///< [in] Input of fmask info - const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, ///< [in] Output of fmask info - ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, ///< [out] Input of thunked surface info - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut ///< [out] Output of thunked surface info - ) const -{ - pSurfIn->tileIndex = pFmaskIn->tileIndex; - pSurfOut->macroModeIndex = pFmaskOut->macroModeIndex; -} - -/** -**************************************************************************************************** -* CiLib::HwlFmaskPostThunkSurfInfo -* -* @brief -* Copy hwl extra field after calling thunked ComputeSurfaceInfo -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -VOID CiLib::HwlFmaskPostThunkSurfInfo( - const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, ///< [in] Output of surface info - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut ///< [out] Output of fmask info - ) const -{ - pFmaskOut->tileIndex = pSurfOut->tileIndex; - pFmaskOut->macroModeIndex = pSurfOut->macroModeIndex; -} - -/** -**************************************************************************************************** -* CiLib::HwlDegradeThickTileMode -* -* @brief -* Degrades valid tile mode for thick modes if needed -* -* @return -* Suitable tile mode -**************************************************************************************************** -*/ -AddrTileMode CiLib::HwlDegradeThickTileMode( - AddrTileMode baseTileMode, ///< [in] base tile mode - UINT_32 numSlices, ///< [in] current number of slices - UINT_32* pBytesPerTile ///< [in,out] pointer to bytes per slice - ) const -{ - return baseTileMode; -} - -/** -**************************************************************************************************** -* CiLib::HwlOptimizeTileMode -* -* @brief -* Optimize tile mode on CI -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID CiLib::HwlOptimizeTileMode( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure - ) const -{ - AddrTileMode tileMode = pInOut->tileMode; - - // Override 2D/3D macro tile mode to PRT_* tile mode if - // client driver requests this surface is equation compatible - if (IsMacroTiled(tileMode) == TRUE) - { - if ((pInOut->flags.needEquation == TRUE) && - (pInOut->numSamples <= 1) && - (IsPrtTileMode(tileMode) == FALSE)) - { - if ((pInOut->numSlices > 1) && ((pInOut->maxBaseAlign == 0) || (pInOut->maxBaseAlign >= Block64K))) - { - UINT_32 thickness = Thickness(tileMode); - - if (thickness == 1) - { - tileMode = ADDR_TM_PRT_TILED_THIN1; - } - else - { - static const UINT_32 PrtTileBytes = 0x10000; - // First prt thick tile index in the tile mode table - static const UINT_32 PrtThickTileIndex = 22; - ADDR_TILEINFO tileInfo = {0}; - - HwlComputeMacroModeIndex(PrtThickTileIndex, - pInOut->flags, - pInOut->bpp, - pInOut->numSamples, - &tileInfo); - - UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples * - thickness * HwlGetPipes(&tileInfo) * - tileInfo.banks * tileInfo.bankWidth * - tileInfo.bankHeight; - - if (macroTileBytes <= PrtTileBytes) - { - tileMode = ADDR_TM_PRT_TILED_THICK; - } - else - { - tileMode = ADDR_TM_PRT_TILED_THIN1; - } - } - } - } - - if (pInOut->maxBaseAlign != 0) - { - pInOut->flags.dccPipeWorkaround = FALSE; - } - } - - if (tileMode != pInOut->tileMode) - { - pInOut->tileMode = tileMode; - } -} - -/** -**************************************************************************************************** -* CiLib::HwlOverrideTileMode -* -* @brief -* Override THICK to THIN, for specific formats on CI -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID CiLib::HwlOverrideTileMode( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure - ) const -{ - AddrTileMode tileMode = pInOut->tileMode; - AddrTileType tileType = pInOut->tileType; - - // currently, all CI/VI family do not - // support ADDR_TM_PRT_2D_TILED_THICK,ADDR_TM_PRT_3D_TILED_THICK and - // ADDR_TM_PRT_2D_TILED_THIN1, ADDR_TM_PRT_3D_TILED_THIN1 - switch (tileMode) - { - case ADDR_TM_PRT_2D_TILED_THICK: - case ADDR_TM_PRT_3D_TILED_THICK: - tileMode = ADDR_TM_PRT_TILED_THICK; - break; - case ADDR_TM_PRT_2D_TILED_THIN1: - case ADDR_TM_PRT_3D_TILED_THIN1: - tileMode = ADDR_TM_PRT_TILED_THIN1; - break; - default: - break; - } - - // UBTS#404321, we do not need such overriding, as THICK+THICK entries removed from the tile-mode table - if (!m_settings.isBonaire) - { - UINT_32 thickness = Thickness(tileMode); - - // tile_thickness = (array_mode == XTHICK) ? 8 : ((array_mode == THICK) ? 4 : 1) - if (thickness > 1) - { - switch (pInOut->format) - { - // see //gfxip/gcB/devel/cds/src/verif/tc/models/csim/tcp.cpp - // tcpError("Thick micro tiling is not supported for format... - case ADDR_FMT_X24_8_32_FLOAT: - case ADDR_FMT_32_AS_8: - case ADDR_FMT_32_AS_8_8: - case ADDR_FMT_32_AS_32_32_32_32: - - // packed formats - case ADDR_FMT_GB_GR: - case ADDR_FMT_BG_RG: - case ADDR_FMT_1_REVERSED: - case ADDR_FMT_1: - case ADDR_FMT_BC1: - case ADDR_FMT_BC2: - case ADDR_FMT_BC3: - case ADDR_FMT_BC4: - case ADDR_FMT_BC5: - case ADDR_FMT_BC6: - case ADDR_FMT_BC7: - switch (tileMode) - { - case ADDR_TM_1D_TILED_THICK: - tileMode = ADDR_TM_1D_TILED_THIN1; - break; - - case ADDR_TM_2D_TILED_XTHICK: - case ADDR_TM_2D_TILED_THICK: - tileMode = ADDR_TM_2D_TILED_THIN1; - break; - - case ADDR_TM_3D_TILED_XTHICK: - case ADDR_TM_3D_TILED_THICK: - tileMode = ADDR_TM_3D_TILED_THIN1; - break; - - case ADDR_TM_PRT_TILED_THICK: - tileMode = ADDR_TM_PRT_TILED_THIN1; - break; - - case ADDR_TM_PRT_2D_TILED_THICK: - tileMode = ADDR_TM_PRT_2D_TILED_THIN1; - break; - - case ADDR_TM_PRT_3D_TILED_THICK: - tileMode = ADDR_TM_PRT_3D_TILED_THIN1; - break; - - default: - break; - - } - - // Switch tile type from thick to thin - if (tileMode != pInOut->tileMode) - { - // see tileIndex: 13-18 - tileType = ADDR_NON_DISPLAYABLE; - } - - break; - default: - break; - } - } - } - - if (tileMode != pInOut->tileMode) - { - pInOut->tileMode = tileMode; - pInOut->tileType = tileType; - } -} - -/** -**************************************************************************************************** -* CiLib::HwlSelectTileMode -* -* @brief -* Select tile modes. -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID CiLib::HwlSelectTileMode( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure - ) const -{ - AddrTileMode tileMode; - AddrTileType tileType; - - if (pInOut->flags.rotateDisplay) - { - tileMode = ADDR_TM_2D_TILED_THIN1; - tileType = ADDR_ROTATED; - } - else if (pInOut->flags.volume) - { - BOOL_32 bThin = (m_settings.isBonaire == TRUE) || - ((m_allowNonDispThickModes == TRUE) && (pInOut->flags.color == TRUE)); - - if (pInOut->numSlices >= 8) - { - tileMode = ADDR_TM_2D_TILED_XTHICK; - tileType = (bThin == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK; - } - else if (pInOut->numSlices >= 4) - { - tileMode = ADDR_TM_2D_TILED_THICK; - tileType = (bThin == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK; - } - else - { - tileMode = ADDR_TM_2D_TILED_THIN1; - tileType = ADDR_NON_DISPLAYABLE; - } - } - else - { - tileMode = ADDR_TM_2D_TILED_THIN1; - - if (pInOut->flags.depth || pInOut->flags.stencil) - { - tileType = ADDR_DEPTH_SAMPLE_ORDER; - } - else if ((pInOut->bpp <= 32) || - (pInOut->flags.display == TRUE) || - (pInOut->flags.overlay == TRUE)) - { - tileType = ADDR_DISPLAYABLE; - } - else - { - tileType = ADDR_NON_DISPLAYABLE; - } - } - - if (pInOut->flags.prt) - { - if (Thickness(tileMode) > 1) - { - tileMode = ADDR_TM_PRT_TILED_THICK; - tileType = (m_settings.isBonaire == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK; - } - else - { - tileMode = ADDR_TM_PRT_TILED_THIN1; - } - } - - pInOut->tileMode = tileMode; - pInOut->tileType = tileType; - - if ((pInOut->flags.dccCompatible == FALSE) && - (pInOut->flags.tcCompatible == FALSE)) - { - pInOut->flags.opt4Space = TRUE; - pInOut->maxBaseAlign = Block64K; - } - - // Optimize tile mode if possible - OptimizeTileMode(pInOut); - - HwlOverrideTileMode(pInOut); -} - -/** -**************************************************************************************************** -* CiLib::HwlSetPrtTileMode -* -* @brief -* Set PRT tile mode. -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID CiLib::HwlSetPrtTileMode( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure - ) const -{ - AddrTileMode tileMode = pInOut->tileMode; - AddrTileType tileType = pInOut->tileType; - - if (Thickness(tileMode) > 1) - { - tileMode = ADDR_TM_PRT_TILED_THICK; - tileType = (m_settings.isBonaire == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK; - } - else - { - tileMode = ADDR_TM_PRT_TILED_THIN1; - tileType = (tileType == ADDR_THICK) ? ADDR_NON_DISPLAYABLE : tileType; - } - - pInOut->tileMode = tileMode; - pInOut->tileType = tileType; -} - -/** -**************************************************************************************************** -* CiLib::HwlSetupTileInfo -* -* @brief -* Setup default value of tile info for SI -**************************************************************************************************** -*/ -VOID CiLib::HwlSetupTileInfo( - AddrTileMode tileMode, ///< [in] Tile mode - ADDR_SURFACE_FLAGS flags, ///< [in] Surface type flags - UINT_32 bpp, ///< [in] Bits per pixel - UINT_32 pitch, ///< [in] Pitch in pixels - UINT_32 height, ///< [in] Height in pixels - UINT_32 numSamples, ///< [in] Number of samples - ADDR_TILEINFO* pTileInfoIn, ///< [in] Tile info input: NULL for default - ADDR_TILEINFO* pTileInfoOut, ///< [out] Tile info output - AddrTileType inTileType, ///< [in] Tile type - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output - ) const -{ - UINT_32 thickness = Thickness(tileMode); - ADDR_TILEINFO* pTileInfo = pTileInfoOut; - INT index = TileIndexInvalid; - INT macroModeIndex = TileIndexInvalid; - - // Fail-safe code - if (IsLinear(tileMode) == FALSE) - { - // Thick tile modes must use thick micro tile mode but Bonaire does not support due to - // old derived netlists (UBTS 404321) - if (thickness > 1) - { - if (m_settings.isBonaire) - { - inTileType = ADDR_NON_DISPLAYABLE; - } - else if ((m_allowNonDispThickModes == FALSE) || - (inTileType != ADDR_NON_DISPLAYABLE) || - // There is no PRT_THICK + THIN entry in tile mode table except Bonaire - (IsPrtTileMode(tileMode) == TRUE)) - { - inTileType = ADDR_THICK; - } - } - // 128 bpp tiling must be non-displayable. - // Fmask reuse color buffer's entry but bank-height field can be from another entry - // To simplify the logic, fmask entry should be picked from non-displayable ones - else if (bpp == 128 || flags.fmask) - { - inTileType = ADDR_NON_DISPLAYABLE; - } - // These two modes only have non-disp entries though they can be other micro tile modes - else if (tileMode == ADDR_TM_3D_TILED_THIN1 || tileMode == ADDR_TM_PRT_3D_TILED_THIN1) - { - inTileType = ADDR_NON_DISPLAYABLE; - } - - if (flags.depth || flags.stencil) - { - inTileType = ADDR_DEPTH_SAMPLE_ORDER; - } - } - - // tcCompatible flag is only meaningful for gfx8. - if (m_settings.isVolcanicIslands == FALSE) - { - flags.tcCompatible = FALSE; - } - - if (IsTileInfoAllZero(pTileInfo)) - { - // See table entries 0-4 - if (flags.depth || flags.stencil) - { - // tileSize = thickness * bpp * numSamples * 8 * 8 / 8 - UINT_32 tileSize = thickness * bpp * numSamples * 8; - - // Turn off tc compatible if row_size is smaller than tile size (tile split occurs). - if (m_rowSize < tileSize) - { - flags.tcCompatible = FALSE; - } - - if (flags.nonSplit | flags.tcCompatible | flags.needEquation) - { - // Texture readable depth surface should not be split - switch (tileSize) - { - case 64: - index = 0; - break; - case 128: - index = 1; - break; - case 256: - index = 2; - break; - case 512: - index = 3; - break; - default: - index = 4; - break; - } - } - else - { - // Depth and stencil need to use the same index, thus the pre-defined tile_split - // can meet the requirement to choose the same macro mode index - // uncompressed depth/stencil are not supported for now - switch (numSamples) - { - case 1: - index = 0; - break; - case 2: - case 4: - index = 1; - break; - case 8: - index = 2; - break; - default: - break; - } - } - } - - // See table entries 5-6 - if (inTileType == ADDR_DEPTH_SAMPLE_ORDER) - { - switch (tileMode) - { - case ADDR_TM_1D_TILED_THIN1: - index = 5; - break; - case ADDR_TM_PRT_TILED_THIN1: - index = 6; - break; - default: - break; - } - } - - // See table entries 8-12 - if (inTileType == ADDR_DISPLAYABLE) - { - switch (tileMode) - { - case ADDR_TM_1D_TILED_THIN1: - index = 9; - break; - case ADDR_TM_2D_TILED_THIN1: - index = 10; - break; - case ADDR_TM_PRT_TILED_THIN1: - index = 11; - break; - default: - break; - } - } - - // See table entries 13-18 - if (inTileType == ADDR_NON_DISPLAYABLE) - { - switch (tileMode) - { - case ADDR_TM_1D_TILED_THIN1: - index = 13; - break; - case ADDR_TM_2D_TILED_THIN1: - index = 14; - break; - case ADDR_TM_3D_TILED_THIN1: - index = 15; - break; - case ADDR_TM_PRT_TILED_THIN1: - index = 16; - break; - default: - break; - } - } - - // See table entries 19-26 - if (thickness > 1) - { - switch (tileMode) - { - case ADDR_TM_1D_TILED_THICK: - // special check for bonaire, for the compatablity between old KMD and new UMD - index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 19 : 18; - break; - case ADDR_TM_2D_TILED_THICK: - // special check for bonaire, for the compatablity between old KMD and new UMD - index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 20 : 24; - break; - case ADDR_TM_3D_TILED_THICK: - index = 21; - break; - case ADDR_TM_PRT_TILED_THICK: - index = 22; - break; - case ADDR_TM_2D_TILED_XTHICK: - index = 25; - break; - case ADDR_TM_3D_TILED_XTHICK: - index = 26; - break; - default: - break; - } - } - - // See table entries 27-30 - if (inTileType == ADDR_ROTATED) - { - switch (tileMode) - { - case ADDR_TM_1D_TILED_THIN1: - index = 27; - break; - case ADDR_TM_2D_TILED_THIN1: - index = 28; - break; - case ADDR_TM_PRT_TILED_THIN1: - index = 29; - break; - case ADDR_TM_PRT_2D_TILED_THIN1: - index = 30; - break; - default: - break; - } - } - - if (m_pipes >= 8) - { - ADDR_ASSERT((index + 1) < static_cast(m_noOfEntries)); - // Only do this when tile mode table is updated. - if (((tileMode == ADDR_TM_PRT_TILED_THIN1) || (tileMode == ADDR_TM_PRT_TILED_THICK)) && - (m_tileTable[index + 1].mode == tileMode)) - { - static const UINT_32 PrtTileBytes = 0x10000; - ADDR_TILEINFO tileInfo = {0}; - - HwlComputeMacroModeIndex(index, flags, bpp, numSamples, &tileInfo); - - UINT_32 macroTileBytes = (bpp >> 3) * 64 * numSamples * thickness * - HwlGetPipes(&tileInfo) * tileInfo.banks * - tileInfo.bankWidth * tileInfo.bankHeight; - - if (macroTileBytes != PrtTileBytes) - { - // Switching to next tile mode entry to make sure macro tile size is 64KB - index += 1; - - tileInfo.pipeConfig = m_tileTable[index].info.pipeConfig; - - macroTileBytes = (bpp >> 3) * 64 * numSamples * thickness * - HwlGetPipes(&tileInfo) * tileInfo.banks * - tileInfo.bankWidth * tileInfo.bankHeight; - - ADDR_ASSERT(macroTileBytes == PrtTileBytes); - - flags.tcCompatible = FALSE; - pOut->dccUnsupport = TRUE; - } - } - } - } - else - { - // A pre-filled tile info is ready - index = pOut->tileIndex; - macroModeIndex = pOut->macroModeIndex; - - // pass tile type back for post tile index compute - pOut->tileType = inTileType; - - if (flags.depth || flags.stencil) - { - // tileSize = thickness * bpp * numSamples * 8 * 8 / 8 - UINT_32 tileSize = thickness * bpp * numSamples * 8; - - // Turn off tc compatible if row_size is smaller than tile size (tile split occurs). - if (m_rowSize < tileSize) - { - flags.tcCompatible = FALSE; - } - } - - UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig); - - if (m_pipes != numPipes) - { - pOut->dccUnsupport = TRUE; - } - } - - // We only need to set up tile info if there is a valid index but macroModeIndex is invalid - if ((index != TileIndexInvalid) && (macroModeIndex == TileIndexInvalid)) - { - macroModeIndex = HwlComputeMacroModeIndex(index, flags, bpp, numSamples, pTileInfo); - - // Copy to pOut->tileType/tileIndex/macroModeIndex - pOut->tileIndex = index; - pOut->tileType = m_tileTable[index].type; // Or inTileType, the samea - pOut->macroModeIndex = macroModeIndex; - } - else if (tileMode == ADDR_TM_LINEAR_GENERAL) - { - pOut->tileIndex = TileIndexLinearGeneral; - - // Copy linear-aligned entry?? - *pTileInfo = m_tileTable[8].info; - } - else if (tileMode == ADDR_TM_LINEAR_ALIGNED) - { - pOut->tileIndex = 8; - *pTileInfo = m_tileTable[8].info; - } - - if (flags.tcCompatible) - { - flags.tcCompatible = CheckTcCompatibility(pTileInfo, bpp, tileMode, inTileType, pOut); - } - - pOut->tcCompatible = flags.tcCompatible; -} - -/** -**************************************************************************************************** -* CiLib::ReadGbTileMode -* -* @brief -* Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG. -**************************************************************************************************** -*/ -VOID CiLib::ReadGbTileMode( - UINT_32 regValue, ///< [in] GB_TILE_MODE register - TileConfig* pCfg ///< [out] output structure - ) const -{ - GB_TILE_MODE gbTileMode; - gbTileMode.val = regValue; - - pCfg->type = static_cast(gbTileMode.f.micro_tile_mode_new); - pCfg->info.pipeConfig = static_cast(gbTileMode.f.pipe_config + 1); - - if (pCfg->type == ADDR_DEPTH_SAMPLE_ORDER) - { - pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split; - } - else - { - pCfg->info.tileSplitBytes = 1 << gbTileMode.f.sample_split; - } - - UINT_32 regArrayMode = gbTileMode.f.array_mode; - - pCfg->mode = static_cast(regArrayMode); - - switch (regArrayMode) - { - case 5: - pCfg->mode = ADDR_TM_PRT_TILED_THIN1; - break; - case 6: - pCfg->mode = ADDR_TM_PRT_2D_TILED_THIN1; - break; - case 8: - pCfg->mode = ADDR_TM_2D_TILED_XTHICK; - break; - case 9: - pCfg->mode = ADDR_TM_PRT_TILED_THICK; - break; - case 0xa: - pCfg->mode = ADDR_TM_PRT_2D_TILED_THICK; - break; - case 0xb: - pCfg->mode = ADDR_TM_PRT_3D_TILED_THIN1; - break; - case 0xe: - pCfg->mode = ADDR_TM_3D_TILED_XTHICK; - break; - case 0xf: - pCfg->mode = ADDR_TM_PRT_3D_TILED_THICK; - break; - default: - break; - } - - // Fail-safe code for these always convert tile info, as the non-macro modes - // return the entry of tile mode table directly without looking up macro mode table - if (!IsMacroTiled(pCfg->mode)) - { - pCfg->info.banks = 2; - pCfg->info.bankWidth = 1; - pCfg->info.bankHeight = 1; - pCfg->info.macroAspectRatio = 1; - pCfg->info.tileSplitBytes = 64; - } -} - -/** -**************************************************************************************************** -* CiLib::InitTileSettingTable -* -* @brief -* Initialize the ADDR_TILE_CONFIG table. -* @return -* TRUE if tile table is correctly initialized -**************************************************************************************************** -*/ -BOOL_32 CiLib::InitTileSettingTable( - const UINT_32* pCfg, ///< [in] Pointer to table of tile configs - UINT_32 noOfEntries ///< [in] Numbe of entries in the table above - ) -{ - BOOL_32 initOk = TRUE; - - ADDR_ASSERT(noOfEntries <= TileTableSize); - - memset(m_tileTable, 0, sizeof(m_tileTable)); - - if (noOfEntries != 0) - { - m_noOfEntries = noOfEntries; - } - else - { - m_noOfEntries = TileTableSize; - } - - if (pCfg) // From Client - { - for (UINT_32 i = 0; i < m_noOfEntries; i++) - { - ReadGbTileMode(*(pCfg + i), &m_tileTable[i]); - } - } - else - { - ADDR_ASSERT_ALWAYS(); - initOk = FALSE; - } - - if (initOk) - { - ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED); - - if (m_settings.isBonaire == FALSE) - { - // Check if entry 18 is "thick+thin" combination - if ((m_tileTable[18].mode == ADDR_TM_1D_TILED_THICK) && - (m_tileTable[18].type == ADDR_NON_DISPLAYABLE)) - { - m_allowNonDispThickModes = TRUE; - ADDR_ASSERT(m_tileTable[24].mode == ADDR_TM_2D_TILED_THICK); - } - } - else - { - m_allowNonDispThickModes = TRUE; - } - - // Assume the first entry is always programmed with full pipes - m_pipes = HwlGetPipes(&m_tileTable[0].info); - } - - return initOk; -} - -/** -**************************************************************************************************** -* CiLib::ReadGbMacroTileCfg -* -* @brief -* Convert GB_MACRO_TILE_CFG HW value to ADDR_TILE_CONFIG. -**************************************************************************************************** -*/ -VOID CiLib::ReadGbMacroTileCfg( - UINT_32 regValue, ///< [in] GB_MACRO_TILE_MODE register - ADDR_TILEINFO* pCfg ///< [out] output structure - ) const -{ - GB_MACROTILE_MODE gbTileMode; - gbTileMode.val = regValue; - - pCfg->bankHeight = 1 << gbTileMode.f.bank_height; - pCfg->bankWidth = 1 << gbTileMode.f.bank_width; - pCfg->banks = 1 << (gbTileMode.f.num_banks + 1); - pCfg->macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect; -} - -/** -**************************************************************************************************** -* CiLib::InitMacroTileCfgTable -* -* @brief -* Initialize the ADDR_MACRO_TILE_CONFIG table. -* @return -* TRUE if macro tile table is correctly initialized -**************************************************************************************************** -*/ -BOOL_32 CiLib::InitMacroTileCfgTable( - const UINT_32* pCfg, ///< [in] Pointer to table of tile configs - UINT_32 noOfMacroEntries ///< [in] Numbe of entries in the table above - ) -{ - BOOL_32 initOk = TRUE; - - ADDR_ASSERT(noOfMacroEntries <= MacroTileTableSize); - - memset(m_macroTileTable, 0, sizeof(m_macroTileTable)); - - if (noOfMacroEntries != 0) - { - m_noOfMacroEntries = noOfMacroEntries; - } - else - { - m_noOfMacroEntries = MacroTileTableSize; - } - - if (pCfg) // From Client - { - for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) - { - ReadGbMacroTileCfg(*(pCfg + i), &m_macroTileTable[i]); - - m_macroTileTable[i].tileSplitBytes = 64 << (i % 8); - } - } - else - { - ADDR_ASSERT_ALWAYS(); - initOk = FALSE; - } - return initOk; -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeMacroModeIndex -* -* @brief -* Computes macro tile mode index -* @return -* TRUE if macro tile table is correctly initialized -**************************************************************************************************** -*/ -INT_32 CiLib::HwlComputeMacroModeIndex( - INT_32 tileIndex, ///< [in] Tile mode index - ADDR_SURFACE_FLAGS flags, ///< [in] Surface flags - UINT_32 bpp, ///< [in] Bit per pixel - UINT_32 numSamples, ///< [in] Number of samples - ADDR_TILEINFO* pTileInfo, ///< [out] Pointer to ADDR_TILEINFO - AddrTileMode* pTileMode, ///< [out] Pointer to AddrTileMode - AddrTileType* pTileType ///< [out] Pointer to AddrTileType - ) const -{ - INT_32 macroModeIndex = TileIndexInvalid; - - AddrTileMode tileMode = m_tileTable[tileIndex].mode; - AddrTileType tileType = m_tileTable[tileIndex].type; - UINT_32 thickness = Thickness(tileMode); - - if (!IsMacroTiled(tileMode)) - { - *pTileInfo = m_tileTable[tileIndex].info; - macroModeIndex = TileIndexNoMacroIndex; - } - else - { - UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness); - UINT_32 tileSplit; - - if (m_tileTable[tileIndex].type == ADDR_DEPTH_SAMPLE_ORDER) - { - // Depth entries store real tileSplitBytes - tileSplit = m_tileTable[tileIndex].info.tileSplitBytes; - } - else - { - // Non-depth entries store a split factor - UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes; - UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x); - - tileSplit = colorTileSplit; - } - - UINT_32 tileSplitC = Min(m_rowSize, tileSplit); - UINT_32 tileBytes; - - if (flags.fmask) - { - tileBytes = Min(tileSplitC, tileBytes1x); - } - else - { - tileBytes = Min(tileSplitC, numSamples * tileBytes1x); - } - - if (tileBytes < 64) - { - tileBytes = 64; - } - - macroModeIndex = Log2(tileBytes / 64); - - if (flags.prt || IsPrtTileMode(tileMode)) - { - macroModeIndex += PrtMacroModeOffset; - *pTileInfo = m_macroTileTable[macroModeIndex]; - } - else - { - *pTileInfo = m_macroTileTable[macroModeIndex]; - } - - pTileInfo->pipeConfig = m_tileTable[tileIndex].info.pipeConfig; - - pTileInfo->tileSplitBytes = tileSplitC; - } - - if (NULL != pTileMode) - { - *pTileMode = tileMode; - } - - if (NULL != pTileType) - { - *pTileType = tileType; - } - - return macroModeIndex; -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeTileDataWidthAndHeightLinear -* -* @brief -* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout -* -* @note -* MacroWidth and macroHeight are measured in pixels -**************************************************************************************************** -*/ -VOID CiLib::HwlComputeTileDataWidthAndHeightLinear( - UINT_32* pMacroWidth, ///< [out] macro tile width - UINT_32* pMacroHeight, ///< [out] macro tile height - UINT_32 bpp, ///< [in] bits per pixel - ADDR_TILEINFO* pTileInfo ///< [in] tile info - ) const -{ - ADDR_ASSERT(pTileInfo != NULL); - - UINT_32 numTiles; - - switch (pTileInfo->pipeConfig) - { - case ADDR_PIPECFG_P16_32x32_8x16: - case ADDR_PIPECFG_P16_32x32_16x16: - case ADDR_PIPECFG_P8_32x64_32x32: - case ADDR_PIPECFG_P8_32x32_16x32: - case ADDR_PIPECFG_P8_32x32_16x16: - case ADDR_PIPECFG_P8_32x32_8x16: - case ADDR_PIPECFG_P4_32x32: - numTiles = 8; - break; - default: - numTiles = 4; - break; - } - - *pMacroWidth = numTiles * MicroTileWidth; - *pMacroHeight = numTiles * MicroTileHeight; -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeMetadataNibbleAddress -* -* @brief -* calculate meta data address based on input information -* -* ¶meter -* uncompressedDataByteAddress - address of a pixel in color surface -* dataBaseByteAddress - base address of color surface -* metadataBaseByteAddress - base address of meta ram -* metadataBitSize - meta key size, 8 for DCC, 4 for cmask -* elementBitSize - element size of color surface -* blockByteSize - compression block size, 256 for DCC -* pipeInterleaveBytes - pipe interleave size -* numOfPipes - number of pipes -* numOfBanks - number of banks -* numOfSamplesPerSplit - number of samples per tile split -* @return -* meta data nibble address (nibble address is used to support DCC compatible cmask) -* -**************************************************************************************************** -*/ -UINT_64 CiLib::HwlComputeMetadataNibbleAddress( - UINT_64 uncompressedDataByteAddress, - UINT_64 dataBaseByteAddress, - UINT_64 metadataBaseByteAddress, - UINT_32 metadataBitSize, - UINT_32 elementBitSize, - UINT_32 blockByteSize, - UINT_32 pipeInterleaveBytes, - UINT_32 numOfPipes, - UINT_32 numOfBanks, - UINT_32 numOfSamplesPerSplit) const -{ - ///-------------------------------------------------------------------------------------------- - /// Get pipe interleave, bank and pipe bits - ///-------------------------------------------------------------------------------------------- - UINT_32 pipeInterleaveBits = Log2(pipeInterleaveBytes); - UINT_32 pipeBits = Log2(numOfPipes); - UINT_32 bankBits = Log2(numOfBanks); - - ///-------------------------------------------------------------------------------------------- - /// Clear pipe and bank swizzles - ///-------------------------------------------------------------------------------------------- - UINT_32 dataMacrotileBits = pipeInterleaveBits + pipeBits + bankBits; - UINT_32 metadataMacrotileBits = pipeInterleaveBits + pipeBits + bankBits; - - UINT_64 dataMacrotileClearMask = ~((1L << dataMacrotileBits) - 1); - UINT_64 metadataMacrotileClearMask = ~((1L << metadataMacrotileBits) - 1); - - UINT_64 dataBaseByteAddressNoSwizzle = dataBaseByteAddress & dataMacrotileClearMask; - UINT_64 metadataBaseByteAddressNoSwizzle = metadataBaseByteAddress & metadataMacrotileClearMask; - - ///-------------------------------------------------------------------------------------------- - /// Modify metadata base before adding in so that when final address is divided by data ratio, - /// the base address returns to where it should be - ///-------------------------------------------------------------------------------------------- - ADDR_ASSERT((0 != metadataBitSize)); - UINT_64 metadataBaseShifted = metadataBaseByteAddressNoSwizzle * blockByteSize * 8 / - metadataBitSize; - UINT_64 offset = uncompressedDataByteAddress - - dataBaseByteAddressNoSwizzle + - metadataBaseShifted; - - ///-------------------------------------------------------------------------------------------- - /// Save bank data bits - ///-------------------------------------------------------------------------------------------- - UINT_32 lsb = pipeBits + pipeInterleaveBits; - UINT_32 msb = bankBits - 1 + lsb; - - UINT_64 bankDataBits = GetBits(offset, msb, lsb); - - ///-------------------------------------------------------------------------------------------- - /// Save pipe data bits - ///-------------------------------------------------------------------------------------------- - lsb = pipeInterleaveBits; - msb = pipeBits - 1 + lsb; - - UINT_64 pipeDataBits = GetBits(offset, msb, lsb); - - ///-------------------------------------------------------------------------------------------- - /// Remove pipe and bank bits - ///-------------------------------------------------------------------------------------------- - lsb = pipeInterleaveBits; - msb = dataMacrotileBits - 1; - - UINT_64 offsetWithoutPipeBankBits = RemoveBits(offset, msb, lsb); - - ADDR_ASSERT((0 != blockByteSize)); - UINT_64 blockInBankpipe = offsetWithoutPipeBankBits / blockByteSize; - - UINT_32 tileSize = 8 * 8 * elementBitSize/8 * numOfSamplesPerSplit; - UINT_32 blocksInTile = tileSize / blockByteSize; - - if (0 == blocksInTile) - { - lsb = 0; - } - else - { - lsb = Log2(blocksInTile); - } - msb = bankBits - 1 + lsb; - - UINT_64 blockInBankpipeWithBankBits = InsertBits(blockInBankpipe, bankDataBits, msb, lsb); - - /// NOTE *2 because we are converting to Nibble address in this step - UINT_64 metaAddressInPipe = blockInBankpipeWithBankBits * 2 * metadataBitSize / 8; - - - ///-------------------------------------------------------------------------------------------- - /// Reinsert pipe bits back into the final address - ///-------------------------------------------------------------------------------------------- - lsb = pipeInterleaveBits + 1; ///<+1 due to Nibble address now gives interleave bits extra lsb. - msb = pipeBits - 1 + lsb; - UINT_64 metadataAddress = InsertBits(metaAddressInPipe, pipeDataBits, msb, lsb); - - return metadataAddress; -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeSurfaceAlignmentsMacroTiled -* -* @brief -* Hardware layer function to compute alignment request for macro tile mode -* -**************************************************************************************************** -*/ -VOID CiLib::HwlComputeSurfaceAlignmentsMacroTiled( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 mipLevel, ///< [in] mip level - UINT_32 numSamples, ///< [in] number of samples - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] Surface output - ) const -{ - // This is to workaround a H/W limitation that DCC doesn't work when pipe config is switched to - // P4. In theory, all asics that have such switching should be patched but we now only know what - // to pad for Fiji. - if ((m_settings.isFiji == TRUE) && - (flags.dccPipeWorkaround == TRUE) && - (flags.prt == FALSE) && - (mipLevel == 0) && - (tileMode == ADDR_TM_PRT_TILED_THIN1) && - (pOut->dccUnsupport == TRUE)) - { - pOut->pitchAlign = PowTwoAlign(pOut->pitchAlign, 256); - // In case the client still requests DCC usage. - pOut->dccUnsupport = FALSE; - } -} - -/** -**************************************************************************************************** -* CiLib::HwlPadDimensions -* -* @brief -* Helper function to pad dimensions -* -**************************************************************************************************** -*/ -VOID CiLib::HwlPadDimensions( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 numSamples, ///< [in] number of samples - ADDR_TILEINFO* pTileInfo, ///< [in] tile info - UINT_32 mipLevel, ///< [in] mip level - UINT_32* pPitch, ///< [in,out] pitch in pixels - UINT_32* pPitchAlign, ///< [in,out] pitch alignment - UINT_32 height, ///< [in] height in pixels - UINT_32 heightAlign ///< [in] height alignment - ) const -{ - if ((m_settings.isVolcanicIslands == TRUE) && - (flags.dccCompatible == TRUE) && - (numSamples > 1) && - (mipLevel == 0) && - (IsMacroTiled(tileMode) == TRUE)) - { - UINT_32 tileSizePerSample = BITS_TO_BYTES(bpp * MicroTileWidth * MicroTileHeight); - UINT_32 samplesPerSplit = pTileInfo->tileSplitBytes / tileSizePerSample; - - if (samplesPerSplit < numSamples) - { - UINT_32 dccFastClearByteAlign = HwlGetPipes(pTileInfo) * m_pipeInterleaveBytes * 256; - UINT_32 bytesPerSplit = BITS_TO_BYTES((*pPitch) * height * bpp * samplesPerSplit); - - ADDR_ASSERT(IsPow2(dccFastClearByteAlign)); - - if (0 != (bytesPerSplit & (dccFastClearByteAlign - 1))) - { - UINT_32 dccFastClearPixelAlign = dccFastClearByteAlign / - BITS_TO_BYTES(bpp) / - samplesPerSplit; - UINT_32 macroTilePixelAlign = (*pPitchAlign) * heightAlign; - - if ((dccFastClearPixelAlign >= macroTilePixelAlign) && - ((dccFastClearPixelAlign % macroTilePixelAlign) == 0)) - { - UINT_32 dccFastClearPitchAlignInMacroTile = - dccFastClearPixelAlign / macroTilePixelAlign; - UINT_32 heightInMacroTile = height / heightAlign; - - while ((heightInMacroTile > 1) && - ((heightInMacroTile % 2) == 0) && - (dccFastClearPitchAlignInMacroTile > 1) && - ((dccFastClearPitchAlignInMacroTile % 2) == 0)) - { - heightInMacroTile >>= 1; - dccFastClearPitchAlignInMacroTile >>= 1; - } - - UINT_32 dccFastClearPitchAlignInPixels = - (*pPitchAlign) * dccFastClearPitchAlignInMacroTile; - - if (IsPow2(dccFastClearPitchAlignInPixels)) - { - *pPitch = PowTwoAlign((*pPitch), dccFastClearPitchAlignInPixels); - } - else - { - *pPitch += (dccFastClearPitchAlignInPixels - 1); - *pPitch /= dccFastClearPitchAlignInPixels; - *pPitch *= dccFastClearPitchAlignInPixels; - } - - *pPitchAlign = dccFastClearPitchAlignInPixels; - } - } - } - } -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeMaxBaseAlignments -* -* @brief -* Gets maximum alignments -* @return -* maximum alignments -**************************************************************************************************** -*/ -UINT_32 CiLib::HwlComputeMaxBaseAlignments() const -{ - const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info); - - // Initial size is 64 KiB for PRT. - UINT_32 maxBaseAlign = 64 * 1024; - - for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) - { - // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice. - UINT_32 tileSize = m_macroTileTable[i].tileSplitBytes; - - UINT_32 baseAlign = tileSize * pipes * m_macroTileTable[i].banks * - m_macroTileTable[i].bankWidth * m_macroTileTable[i].bankHeight; - - if (baseAlign > maxBaseAlign) - { - maxBaseAlign = baseAlign; - } - } - - return maxBaseAlign; -} - -/** -**************************************************************************************************** -* CiLib::HwlComputeMaxMetaBaseAlignments -* -* @brief -* Gets maximum alignments for metadata -* @return -* maximum alignments for metadata -**************************************************************************************************** -*/ -UINT_32 CiLib::HwlComputeMaxMetaBaseAlignments() const -{ - UINT_32 maxBank = 1; - - for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) - { - if ((m_settings.isVolcanicIslands) && IsMacroTiled(m_tileTable[i].mode)) - { - maxBank = Max(maxBank, m_macroTileTable[i].banks); - } - } - - return SiLib::HwlComputeMaxMetaBaseAlignments() * maxBank; -} - -/** -**************************************************************************************************** -* CiLib::DepthStencilTileCfgMatch -* -* @brief -* Try to find a tile index for stencil which makes its tile config parameters matches to depth -* @return -* TRUE if such tile index for stencil can be found -**************************************************************************************************** -*/ -BOOL_32 CiLib::DepthStencilTileCfgMatch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - BOOL_32 depthStencil2DTileConfigMatch = FALSE; - - for (INT_32 stencilTileIndex = MinDepth2DThinIndex; - stencilTileIndex <= MaxDepth2DThinIndex; - stencilTileIndex++) - { - ADDR_TILEINFO tileInfo = {0}; - INT_32 stencilMacroIndex = HwlComputeMacroModeIndex(stencilTileIndex, - pIn->flags, - 8, - pIn->numSamples, - &tileInfo); - - if (stencilMacroIndex != TileIndexNoMacroIndex) - { - if ((m_macroTileTable[stencilMacroIndex].banks == - m_macroTileTable[pOut->macroModeIndex].banks) && - (m_macroTileTable[stencilMacroIndex].bankWidth == - m_macroTileTable[pOut->macroModeIndex].bankWidth) && - (m_macroTileTable[stencilMacroIndex].bankHeight == - m_macroTileTable[pOut->macroModeIndex].bankHeight) && - (m_macroTileTable[stencilMacroIndex].macroAspectRatio == - m_macroTileTable[pOut->macroModeIndex].macroAspectRatio) && - (m_macroTileTable[stencilMacroIndex].pipeConfig == - m_macroTileTable[pOut->macroModeIndex].pipeConfig)) - { - if ((pOut->tcCompatible == FALSE) || - (tileInfo.tileSplitBytes >= MicroTileWidth * MicroTileHeight * pIn->numSamples)) - { - depthStencil2DTileConfigMatch = TRUE; - pOut->stencilTileIdx = stencilTileIndex; - break; - } - } - } - else - { - ADDR_ASSERT_ALWAYS(); - } - } - - return depthStencil2DTileConfigMatch; -} - -/** -**************************************************************************************************** -* CiLib::DepthStencilTileCfgMatch -* -* @brief -* Check if tc compatibility is available -* @return -* If tc compatibility is not available -**************************************************************************************************** -*/ -BOOL_32 CiLib::CheckTcCompatibility( - const ADDR_TILEINFO* pTileInfo, ///< [in] input tile info - UINT_32 bpp, ///< [in] Bits per pixel - AddrTileMode tileMode, ///< [in] input tile mode - AddrTileType tileType, ///< [in] input tile type - const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in] output surf info - ) const -{ - BOOL_32 tcCompatible = TRUE; - - if (IsMacroTiled(tileMode)) - { - if (tileType != ADDR_DEPTH_SAMPLE_ORDER) - { - // Turn off tcCompatible for color surface if tileSplit happens. Depth/stencil - // tileSplit case was handled at tileIndex selecting time. - INT_32 tileIndex = pOut->tileIndex; - - if ((tileIndex == TileIndexInvalid) && (IsTileInfoAllZero(pTileInfo) == FALSE)) - { - tileIndex = HwlPostCheckTileIndex(pTileInfo, tileMode, tileType, tileIndex); - } - - if (tileIndex != TileIndexInvalid) - { - UINT_32 thickness = Thickness(tileMode); - - ADDR_ASSERT(static_cast(tileIndex) < TileTableSize); - // Non-depth entries store a split factor - UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes; - UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness); - UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x); - - if (m_rowSize < colorTileSplit) - { - tcCompatible = FALSE; - } - } - } - } - else - { - // Client should not enable tc compatible for linear and 1D tile modes. - tcCompatible = FALSE; - } - - return tcCompatible; -} - -} // V1 -} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/r800/ciaddrlib.h mesa-19.0.1/src/amd/addrlib/r800/ciaddrlib.h --- mesa-18.3.3/src/amd/addrlib/r800/ciaddrlib.h 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/r800/ciaddrlib.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,201 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file ciaddrlib.h -* @brief Contains the CiLib class definition. -**************************************************************************************************** -*/ - -#ifndef __CI_ADDR_LIB_H__ -#define __CI_ADDR_LIB_H__ - -#include "addrlib1.h" -#include "siaddrlib.h" - -namespace Addr -{ -namespace V1 -{ - -/** -**************************************************************************************************** -* @brief This class is the CI specific address library -* function set. -**************************************************************************************************** -*/ -class CiLib : public SiLib -{ -public: - /// Creates CiLib object - static Addr::Lib* CreateObj(const Client* pClient) - { - VOID* pMem = Object::ClientAlloc(sizeof(CiLib), pClient); - return (pMem != NULL) ? new (pMem) CiLib(pClient) : NULL; - } - -private: - CiLib(const Client* pClient); - virtual ~CiLib(); - -protected: - - // Hwl interface - defined in AddrLib1 - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); - - virtual ChipFamily HwlConvertChipFamily( - UINT_32 uChipFamily, UINT_32 uChipRevision); - - virtual BOOL_32 HwlInitGlobalParams( - const ADDR_CREATE_INPUT* pCreateIn); - - virtual ADDR_E_RETURNCODE HwlSetupTileCfg( - UINT_32 bpp, INT_32 index, INT_32 macroModeIndex, ADDR_TILEINFO* pInfo, - AddrTileMode* pMode = 0, AddrTileType* pType = 0) const; - - virtual VOID HwlComputeTileDataWidthAndHeightLinear( - UINT_32* pMacroWidth, UINT_32* pMacroHeight, - UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const; - - virtual INT_32 HwlComputeMacroModeIndex( - INT_32 tileIndex, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples, - ADDR_TILEINFO* pTileInfo, AddrTileMode* pTileMode = NULL, AddrTileType* pTileType = NULL - ) const; - - // Sub-hwl interface - defined in EgBasedLib - virtual VOID HwlSetupTileInfo( - AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags, - UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, - ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo, - AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - virtual INT_32 HwlPostCheckTileIndex( - const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type, - INT curIndex = TileIndexInvalid) const; - - virtual VOID HwlFmaskPreThunkSurfInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, - const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, - ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const; - - virtual VOID HwlFmaskPostThunkSurfInfo( - const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const; - - virtual AddrTileMode HwlDegradeThickTileMode( - AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; - - virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; - - virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; - - virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; - - /// Overwrite tile setting to PRT - virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeDccInfo( - const ADDR_COMPUTE_DCCINFO_INPUT* pIn, - ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( - const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( - const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const; - - virtual UINT_32 HwlComputeMaxBaseAlignments() const; - - virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; - - virtual VOID HwlPadDimensions( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, - UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel, - UINT_32* pPitch, UINT_32 *PitchAlign, UINT_32 height, UINT_32 heightAlign) const; - - virtual VOID HwlComputeSurfaceAlignmentsMacroTiled( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, - UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - -private: - VOID ReadGbTileMode( - UINT_32 regValue, TileConfig* pCfg) const; - - VOID ReadGbMacroTileCfg( - UINT_32 regValue, ADDR_TILEINFO* pCfg) const; - - BOOL_32 InitTileSettingTable( - const UINT_32 *pSetting, UINT_32 noOfEntries); - - BOOL_32 InitMacroTileCfgTable( - const UINT_32 *pSetting, UINT_32 noOfEntries); - - UINT_64 HwlComputeMetadataNibbleAddress( - UINT_64 uncompressedDataByteAddress, - UINT_64 dataBaseByteAddress, - UINT_64 metadataBaseByteAddress, - UINT_32 metadataBitSize, - UINT_32 elementBitSize, - UINT_32 blockByteSize, - UINT_32 pipeInterleaveBytes, - UINT_32 numOfPipes, - UINT_32 numOfBanks, - UINT_32 numOfSamplesPerSplit) const; - - BOOL_32 DepthStencilTileCfgMatch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - BOOL_32 CheckTcCompatibility(const ADDR_TILEINFO* pTileInfo, UINT_32 bpp, AddrTileMode tileMode, - AddrTileType tileType, const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - static const UINT_32 MacroTileTableSize = 16; - static const UINT_32 PrtMacroModeOffset = MacroTileTableSize / 2; - static const INT_32 MinDepth2DThinIndex = 0; - static const INT_32 MaxDepth2DThinIndex = 4; - static const INT_32 Depth1DThinIndex = 5; - - ADDR_TILEINFO m_macroTileTable[MacroTileTableSize]; - UINT_32 m_noOfMacroEntries; - BOOL_32 m_allowNonDispThickModes; -}; - -} // V1 -} // Addr - -#endif - - diff -Nru mesa-18.3.3/src/amd/addrlib/r800/egbaddrlib.cpp mesa-19.0.1/src/amd/addrlib/r800/egbaddrlib.cpp --- mesa-18.3.3/src/amd/addrlib/r800/egbaddrlib.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/r800/egbaddrlib.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,4168 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file egbaddrlib.cpp -* @brief Contains the EgBasedLib class implementation. -**************************************************************************************************** -*/ - -#include "egbaddrlib.h" - -#include "util/macros.h" - -namespace Addr -{ -namespace V1 -{ - -/** -**************************************************************************************************** -* EgBasedLib::EgBasedLib -* -* @brief -* Constructor -* -* @note -* -**************************************************************************************************** -*/ -EgBasedLib::EgBasedLib(const Client* pClient) - : - Lib(pClient), - m_ranks(0), - m_logicalBanks(0), - m_bankInterleave(1) -{ -} - -/** -**************************************************************************************************** -* EgBasedLib::~EgBasedLib -* -* @brief -* Destructor -**************************************************************************************************** -*/ -EgBasedLib::~EgBasedLib() -{ -} - -/** -**************************************************************************************************** -* EgBasedLib::DispatchComputeSurfaceInfo -* -* @brief -* Compute surface sizes include padded pitch,height,slices,total size in bytes, -* meanwhile output suitable tile mode and base alignment might be changed in this -* call as well. Results are returned through output parameters. -* -* @return -* TRUE if no error occurs -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::DispatchComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - AddrTileMode tileMode = pIn->tileMode; - UINT_32 bpp = pIn->bpp; - UINT_32 numSamples = pIn->numSamples; - UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags); - UINT_32 pitch = pIn->width; - UINT_32 height = pIn->height; - UINT_32 numSlices = pIn->numSlices; - UINT_32 mipLevel = pIn->mipLevel; - ADDR_SURFACE_FLAGS flags = pIn->flags; - - ADDR_TILEINFO tileInfoDef = {0}; - ADDR_TILEINFO* pTileInfo = &tileInfoDef; - UINT_32 padDims = 0; - BOOL_32 valid; - - if (pIn->flags.disallowLargeThickDegrade == 0) - { - tileMode = DegradeLargeThickTile(tileMode, bpp); - } - - // Only override numSamples for NI above - if (m_chipFamily >= ADDR_CHIP_FAMILY_NI) - { - if (numFrags != numSamples) // This means EQAA - { - // The real surface size needed is determined by number of fragments - numSamples = numFrags; - } - - // Save altered numSamples in pOut - pOut->numSamples = numSamples; - } - - // Caller makes sure pOut->pTileInfo is not NULL, see HwlComputeSurfaceInfo - ADDR_ASSERT(pOut->pTileInfo); - - if (pOut->pTileInfo != NULL) - { - pTileInfo = pOut->pTileInfo; - } - - // Set default values - if (pIn->pTileInfo != NULL) - { - if (pTileInfo != pIn->pTileInfo) - { - *pTileInfo = *pIn->pTileInfo; - } - } - else - { - memset(pTileInfo, 0, sizeof(ADDR_TILEINFO)); - } - - // For macro tile mode, we should calculate default tiling parameters - HwlSetupTileInfo(tileMode, - flags, - bpp, - pitch, - height, - numSamples, - pIn->pTileInfo, - pTileInfo, - pIn->tileType, - pOut); - - if (flags.cube) - { - if (mipLevel == 0) - { - padDims = 2; - } - - if (numSlices == 1) - { - // This is calculating one face, remove cube flag - flags.cube = 0; - } - } - - switch (tileMode) - { - case ADDR_TM_LINEAR_GENERAL://fall through - case ADDR_TM_LINEAR_ALIGNED: - valid = ComputeSurfaceInfoLinear(pIn, pOut, padDims); - break; - - case ADDR_TM_1D_TILED_THIN1://fall through - case ADDR_TM_1D_TILED_THICK: - valid = ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, tileMode); - break; - - case ADDR_TM_2D_TILED_THIN1: //fall through - case ADDR_TM_2D_TILED_THICK: //fall through - case ADDR_TM_3D_TILED_THIN1: //fall through - case ADDR_TM_3D_TILED_THICK: //fall through - case ADDR_TM_2D_TILED_XTHICK: //fall through - case ADDR_TM_3D_TILED_XTHICK: //fall through - case ADDR_TM_PRT_TILED_THIN1: //fall through - case ADDR_TM_PRT_2D_TILED_THIN1://fall through - case ADDR_TM_PRT_3D_TILED_THIN1://fall through - case ADDR_TM_PRT_TILED_THICK: //fall through - case ADDR_TM_PRT_2D_TILED_THICK://fall through - case ADDR_TM_PRT_3D_TILED_THICK: - valid = ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, tileMode); - break; - - default: - valid = FALSE; - ADDR_ASSERT_ALWAYS(); - break; - } - - return valid; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceInfoLinear -* -* @brief -* Compute linear surface sizes include padded pitch, height, slices, total size in -* bytes, meanwhile alignments as well. Since it is linear mode, so output tile mode -* will not be changed here. Results are returned through output parameters. -* -* @return -* TRUE if no error occurs -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::ComputeSurfaceInfoLinear( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, ///< [out] Output structure - UINT_32 padDims ///< [in] Dimensions to padd - ) const -{ - UINT_32 expPitch = pIn->width; - UINT_32 expHeight = pIn->height; - UINT_32 expNumSlices = pIn->numSlices; - - // No linear MSAA on real H/W, keep this for TGL - UINT_32 numSamples = pOut->numSamples; - - const UINT_32 microTileThickness = 1; - - // - // Compute the surface alignments. - // - ComputeSurfaceAlignmentsLinear(pIn->tileMode, - pIn->bpp, - pIn->flags, - &pOut->baseAlign, - &pOut->pitchAlign, - &pOut->heightAlign); - - if ((pIn->tileMode == ADDR_TM_LINEAR_GENERAL) && pIn->flags.color && (pIn->height > 1)) - { -#if !ALT_TEST - // When linear_general surface is accessed in multiple lines, it requires 8 pixels in pitch - // alignment since PITCH_TILE_MAX is in unit of 8 pixels. - // It is OK if it is accessed per line. - ADDR_ASSERT((pIn->width % 8) == 0); -#endif - } - - pOut->depthAlign = microTileThickness; - - expPitch = HwlPreHandleBaseLvl3xPitch(pIn, expPitch); - - // - // Pad pitch and height to the required granularities. - // - PadDimensions(pIn->tileMode, - pIn->bpp, - pIn->flags, - numSamples, - pOut->pTileInfo, - padDims, - pIn->mipLevel, - &expPitch, &pOut->pitchAlign, - &expHeight, pOut->heightAlign, - &expNumSlices, microTileThickness); - - expPitch = HwlPostHandleBaseLvl3xPitch(pIn, expPitch); - - // - // Adjust per HWL - // - - UINT_64 logicalSliceSize; - - logicalSliceSize = HwlGetSizeAdjustmentLinear(pIn->tileMode, - pIn->bpp, - numSamples, - pOut->baseAlign, - pOut->pitchAlign, - &expPitch, - &expHeight, - &pOut->heightAlign); - - if ((pIn->pitchAlign != 0) || (pIn->heightAlign != 0)) - { - if (pIn->pitchAlign != 0) - { - ADDR_ASSERT((pIn->pitchAlign % pOut->pitchAlign) == 0); - pOut->pitchAlign = pIn->pitchAlign; - - if (IsPow2(pOut->pitchAlign)) - { - expPitch = PowTwoAlign(expPitch, pOut->pitchAlign); - } - else - { - expPitch += pOut->pitchAlign - 1; - expPitch /= pOut->pitchAlign; - expPitch *= pOut->pitchAlign; - } - } - - if (pIn->heightAlign != 0) - { - ADDR_ASSERT((pIn->heightAlign % pOut->heightAlign) == 0); - pOut->heightAlign = pIn->heightAlign; - - if (IsPow2(pOut->heightAlign)) - { - expHeight = PowTwoAlign(expHeight, pOut->heightAlign); - } - else - { - expHeight += pOut->heightAlign - 1; - expHeight /= pOut->heightAlign; - expHeight *= pOut->heightAlign; - } - } - - logicalSliceSize = BITS_TO_BYTES(expPitch * expHeight * pIn->bpp); - } - - pOut->pitch = expPitch; - pOut->height = expHeight; - pOut->depth = expNumSlices; - - pOut->surfSize = logicalSliceSize * expNumSlices; - - pOut->tileMode = pIn->tileMode; - - return TRUE; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceInfoMicroTiled -* -* @brief -* Compute 1D/Micro Tiled surface sizes include padded pitch, height, slices, total -* size in bytes, meanwhile alignments as well. Results are returned through output -* parameters. -* -* @return -* TRUE if no error occurs -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::ComputeSurfaceInfoMicroTiled( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, ///< [out] Output structure - UINT_32 padDims, ///< [in] Dimensions to padd - AddrTileMode expTileMode ///< [in] Expected tile mode - ) const -{ - BOOL_32 valid = TRUE; - - UINT_32 microTileThickness; - UINT_32 expPitch = pIn->width; - UINT_32 expHeight = pIn->height; - UINT_32 expNumSlices = pIn->numSlices; - - // No 1D MSAA on real H/W, keep this for TGL - UINT_32 numSamples = pOut->numSamples; - - // - // Compute the micro tile thickness. - // - microTileThickness = Thickness(expTileMode); - - // - // Extra override for mip levels - // - if (pIn->mipLevel > 0) - { - // - // Reduce tiling mode from thick to thin if the number of slices is less than the - // micro tile thickness. - // - if ((expTileMode == ADDR_TM_1D_TILED_THICK) && - (expNumSlices < ThickTileThickness)) - { - expTileMode = HwlDegradeThickTileMode(ADDR_TM_1D_TILED_THICK, expNumSlices, NULL); - if (expTileMode != ADDR_TM_1D_TILED_THICK) - { - microTileThickness = 1; - } - } - } - - // - // Compute the surface restrictions. - // - ComputeSurfaceAlignmentsMicroTiled(expTileMode, - pIn->bpp, - pIn->flags, - pIn->mipLevel, - numSamples, - &pOut->baseAlign, - &pOut->pitchAlign, - &pOut->heightAlign); - - pOut->depthAlign = microTileThickness; - - // - // Pad pitch and height to the required granularities. - // Compute surface size. - // Return parameters. - // - PadDimensions(expTileMode, - pIn->bpp, - pIn->flags, - numSamples, - pOut->pTileInfo, - padDims, - pIn->mipLevel, - &expPitch, &pOut->pitchAlign, - &expHeight, pOut->heightAlign, - &expNumSlices, microTileThickness); - - // - // Get HWL specific pitch adjustment - // - UINT_64 logicalSliceSize = HwlGetSizeAdjustmentMicroTiled(microTileThickness, - pIn->bpp, - pIn->flags, - numSamples, - pOut->baseAlign, - pOut->pitchAlign, - &expPitch, - &expHeight); - - - pOut->pitch = expPitch; - pOut->height = expHeight; - pOut->depth = expNumSlices; - - pOut->surfSize = logicalSliceSize * expNumSlices; - - pOut->tileMode = expTileMode; - - return valid; -} - - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceInfoMacroTiled -* -* @brief -* Compute 2D/macro tiled surface sizes include padded pitch, height, slices, total -* size in bytes, meanwhile output suitable tile mode and alignments might be changed -* in this call as well. Results are returned through output parameters. -* -* @return -* TRUE if no error occurs -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::ComputeSurfaceInfoMacroTiled( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, ///< [out] Output structure - UINT_32 padDims, ///< [in] Dimensions to padd - AddrTileMode expTileMode ///< [in] Expected tile mode - ) const -{ - BOOL_32 valid = TRUE; - - AddrTileMode origTileMode = expTileMode; - UINT_32 microTileThickness; - - UINT_32 paddedPitch; - UINT_32 paddedHeight; - UINT_64 bytesPerSlice; - - UINT_32 expPitch = pIn->width; - UINT_32 expHeight = pIn->height; - UINT_32 expNumSlices = pIn->numSlices; - - UINT_32 numSamples = pOut->numSamples; - - // - // Compute the surface restrictions as base - // SanityCheckMacroTiled is called in ComputeSurfaceAlignmentsMacroTiled - // - valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode, - pIn->bpp, - pIn->flags, - pIn->mipLevel, - numSamples, - pOut); - - if (valid) - { - // - // Compute the micro tile thickness. - // - microTileThickness = Thickness(expTileMode); - - // - // Find the correct tiling mode for mip levels - // - if (pIn->mipLevel > 0) - { - // - // Try valid tile mode - // - expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode, - pIn->bpp, - expPitch, - expHeight, - expNumSlices, - numSamples, - pOut->blockWidth, - pOut->blockHeight, - pOut->pTileInfo); - - if (!IsMacroTiled(expTileMode)) // Downgraded to micro-tiled - { - return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode); - } - else if (microTileThickness != Thickness(expTileMode)) - { - // - // Re-compute if thickness changed since bank-height may be changed! - // - return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode); - } - } - - paddedPitch = expPitch; - paddedHeight = expHeight; - - // - // Re-cal alignment - // - if (expTileMode != origTileMode) // Tile mode is changed but still macro-tiled - { - valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode, - pIn->bpp, - pIn->flags, - pIn->mipLevel, - numSamples, - pOut); - } - - // - // Do padding - // - PadDimensions(expTileMode, - pIn->bpp, - pIn->flags, - numSamples, - pOut->pTileInfo, - padDims, - pIn->mipLevel, - &paddedPitch, &pOut->pitchAlign, - &paddedHeight, pOut->heightAlign, - &expNumSlices, microTileThickness); - - if (pIn->flags.qbStereo && - (pOut->pStereoInfo != NULL)) - { - UINT_32 stereoHeightAlign = HwlStereoCheckRightOffsetPadding(pOut->pTileInfo); - - if (stereoHeightAlign != 0) - { - paddedHeight = PowTwoAlign(paddedHeight, stereoHeightAlign); - } - } - - if ((pIn->flags.needEquation == TRUE) && - (m_chipFamily == ADDR_CHIP_FAMILY_SI) && - (pIn->numMipLevels > 1) && - (pIn->mipLevel == 0)) - { - BOOL_32 convertTo1D = FALSE; - - ADDR_ASSERT(Thickness(expTileMode) == 1); - - for (UINT_32 i = 1; i < pIn->numMipLevels; i++) - { - UINT_32 mipPitch = Max(1u, paddedPitch >> i); - UINT_32 mipHeight = Max(1u, pIn->height >> i); - UINT_32 mipSlices = pIn->flags.volume ? - Max(1u, pIn->numSlices >> i) : pIn->numSlices; - expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode, - pIn->bpp, - mipPitch, - mipHeight, - mipSlices, - numSamples, - pOut->blockWidth, - pOut->blockHeight, - pOut->pTileInfo); - - if (IsMacroTiled(expTileMode)) - { - if (PowTwoAlign(mipPitch, pOut->blockWidth) != - PowTwoAlign(mipPitch, pOut->pitchAlign)) - { - convertTo1D = TRUE; - break; - } - } - else - { - break; - } - } - - if (convertTo1D) - { - return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, ADDR_TM_1D_TILED_THIN1); - } - } - - pOut->pitch = paddedPitch; - // Put this check right here to workaround special mipmap cases which the original height - // is needed. - // The original height is pre-stored in pOut->height in PostComputeMipLevel and - // pOut->pitch is needed in HwlCheckLastMacroTiledLvl, too. - if (m_configFlags.checkLast2DLevel && (numSamples == 1)) // Don't check MSAA - { - // Set a TRUE in pOut if next Level is the first 1D sub level - HwlCheckLastMacroTiledLvl(pIn, pOut); - } - pOut->height = paddedHeight; - - pOut->depth = expNumSlices; - - // - // Compute the size of a slice. - // - bytesPerSlice = BITS_TO_BYTES(static_cast(paddedPitch) * - paddedHeight * NextPow2(pIn->bpp) * numSamples); - - pOut->surfSize = bytesPerSlice * expNumSlices; - - pOut->tileMode = expTileMode; - - pOut->depthAlign = microTileThickness; - - } // if (valid) - - return valid; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceAlignmentsLinear -* -* @brief -* Compute linear surface alignment, calculation results are returned through -* output parameters. -* -* @return -* TRUE if no error occurs -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsLinear( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32* pBaseAlign, ///< [out] base address alignment in bytes - UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels - UINT_32* pHeightAlign ///< [out] height alignment in pixels - ) const -{ - BOOL_32 valid = TRUE; - - switch (tileMode) - { - case ADDR_TM_LINEAR_GENERAL: - // - // The required base alignment and pitch and height granularities is to 1 element. - // - *pBaseAlign = (bpp > 8) ? bpp / 8 : 1; - *pPitchAlign = 1; - *pHeightAlign = 1; - break; - case ADDR_TM_LINEAR_ALIGNED: - // - // The required alignment for base is the pipe interleave size. - // The required granularity for pitch is hwl dependent. - // The required granularity for height is one row. - // - *pBaseAlign = m_pipeInterleaveBytes; - *pPitchAlign = HwlGetPitchAlignmentLinear(bpp, flags); - *pHeightAlign = 1; - break; - default: - *pBaseAlign = 1; - *pPitchAlign = 1; - *pHeightAlign = 1; - ADDR_UNHANDLED_CASE(); - break; - } - - AdjustPitchAlignment(flags, pPitchAlign); - - return valid; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceAlignmentsMicroTiled -* -* @brief -* Compute 1D tiled surface alignment, calculation results are returned through -* output parameters. -* -* @return -* TRUE if no error occurs -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMicroTiled( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 mipLevel, ///< [in] mip level - UINT_32 numSamples, ///< [in] number of samples - UINT_32* pBaseAlign, ///< [out] base address alignment in bytes - UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels - UINT_32* pHeightAlign ///< [out] height alignment in pixels - ) const -{ - BOOL_32 valid = TRUE; - - // - // The required alignment for base is the pipe interleave size. - // - *pBaseAlign = m_pipeInterleaveBytes; - - *pPitchAlign = HwlGetPitchAlignmentMicroTiled(tileMode, bpp, flags, numSamples); - - *pHeightAlign = MicroTileHeight; - - AdjustPitchAlignment(flags, pPitchAlign); - - // Workaround 2 for 1D tiling - There is HW bug for Carrizo, - // where it requires the following alignments for 1D tiling. - if (flags.czDispCompatible && (mipLevel == 0)) - { - *pBaseAlign = PowTwoAlign(*pBaseAlign, 4096); //Base address MOD 4096 = 0 - *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0 - } - // end Carrizo workaround for 1D tilling - - return valid; -} - - -/** -**************************************************************************************************** -* EgBasedLib::HwlReduceBankWidthHeight -* -* @brief -* Additional checks, reduce bankHeight/bankWidth if needed and possible -* tileSize*BANK_WIDTH*BANK_HEIGHT <= ROW_SIZE -* -* @return -* TRUE if no error occurs -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::HwlReduceBankWidthHeight( - UINT_32 tileSize, ///< [in] tile size - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 numSamples, ///< [in] number of samples - UINT_32 bankHeightAlign, ///< [in] bank height alignment - UINT_32 pipes, ///< [in] pipes - ADDR_TILEINFO* pTileInfo ///< [in,out] bank structure. - ) const -{ - UINT_32 macroAspectAlign; - BOOL_32 valid = TRUE; - - if (tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize) - { - BOOL_32 stillGreater = TRUE; - - // Try reducing bankWidth first - if (stillGreater && pTileInfo->bankWidth > 1) - { - while (stillGreater && pTileInfo->bankWidth > 0) - { - pTileInfo->bankWidth >>= 1; - - if (pTileInfo->bankWidth == 0) - { - pTileInfo->bankWidth = 1; - break; - } - - stillGreater = - tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize; - } - - // bankWidth is reduced above, so we need to recalculate bankHeight and ratio - bankHeightAlign = Max(1u, - m_pipeInterleaveBytes * m_bankInterleave / - (tileSize * pTileInfo->bankWidth) - ); - - // We cannot increase bankHeight so just assert this case. - ADDR_ASSERT((pTileInfo->bankHeight % bankHeightAlign) == 0); - - if (numSamples == 1) - { - macroAspectAlign = Max(1u, - m_pipeInterleaveBytes * m_bankInterleave / - (tileSize * pipes * pTileInfo->bankWidth) - ); - pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio, - macroAspectAlign); - } - } - - // Early quit bank_height degradation for "64" bit z buffer - if (flags.depth && bpp >= 64) - { - stillGreater = FALSE; - } - - // Then try reducing bankHeight - if (stillGreater && pTileInfo->bankHeight > bankHeightAlign) - { - while (stillGreater && pTileInfo->bankHeight > bankHeightAlign) - { - pTileInfo->bankHeight >>= 1; - - if (pTileInfo->bankHeight < bankHeightAlign) - { - pTileInfo->bankHeight = bankHeightAlign; - break; - } - - stillGreater = - tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize; - } - } - - valid = !stillGreater; - - // Generate a warning if we still fail to meet this constraint - if (valid == FALSE) - { - ADDR_WARN( - 0, ("TILE_SIZE(%d)*BANK_WIDTH(%d)*BANK_HEIGHT(%d) <= ROW_SIZE(%d)", - tileSize, pTileInfo->bankWidth, pTileInfo->bankHeight, m_rowSize)); - } - } - - return valid; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceAlignmentsMacroTiled -* -* @brief -* Compute 2D tiled surface alignment, calculation results are returned through -* output parameters. -* -* @return -* TRUE if no error occurs -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMacroTiled( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 mipLevel, ///< [in] mip level - UINT_32 numSamples, ///< [in] number of samples - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] Surface output - ) const -{ - ADDR_TILEINFO* pTileInfo = pOut->pTileInfo; - - BOOL_32 valid = SanityCheckMacroTiled(pTileInfo); - - if (valid) - { - UINT_32 macroTileWidth; - UINT_32 macroTileHeight; - - UINT_32 tileSize; - UINT_32 bankHeightAlign; - UINT_32 macroAspectAlign; - - UINT_32 thickness = Thickness(tileMode); - UINT_32 pipes = HwlGetPipes(pTileInfo); - - // - // Align bank height first according to latest h/w spec - // - - // tile_size = MIN(tile_split, 64 * tile_thickness * element_bytes * num_samples) - tileSize = Min(pTileInfo->tileSplitBytes, - BITS_TO_BYTES(64 * thickness * bpp * numSamples)); - - // bank_height_align = - // MAX(1, (pipe_interleave_bytes * bank_interleave)/(tile_size*bank_width)) - bankHeightAlign = Max(1u, - m_pipeInterleaveBytes * m_bankInterleave / - (tileSize * pTileInfo->bankWidth) - ); - - pTileInfo->bankHeight = PowTwoAlign(pTileInfo->bankHeight, bankHeightAlign); - - // num_pipes * bank_width * macro_tile_aspect >= - // (pipe_interleave_size * bank_interleave) / tile_size - if (numSamples == 1) - { - // this restriction is only for mipmap (mipmap's numSamples must be 1) - macroAspectAlign = Max(1u, - m_pipeInterleaveBytes * m_bankInterleave / - (tileSize * pipes * pTileInfo->bankWidth) - ); - pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio, macroAspectAlign); - } - - valid = HwlReduceBankWidthHeight(tileSize, - bpp, - flags, - numSamples, - bankHeightAlign, - pipes, - pTileInfo); - - // - // The required granularity for pitch is the macro tile width. - // - macroTileWidth = MicroTileWidth * pTileInfo->bankWidth * pipes * - pTileInfo->macroAspectRatio; - - pOut->pitchAlign = macroTileWidth; - pOut->blockWidth = macroTileWidth; - - AdjustPitchAlignment(flags, &pOut->pitchAlign); - - // - // The required granularity for height is the macro tile height. - // - macroTileHeight = MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / - pTileInfo->macroAspectRatio; - - pOut->heightAlign = macroTileHeight; - pOut->blockHeight = macroTileHeight; - - // - // Compute base alignment - // - pOut->baseAlign = - pipes * pTileInfo->bankWidth * pTileInfo->banks * pTileInfo->bankHeight * tileSize; - - HwlComputeSurfaceAlignmentsMacroTiled(tileMode, bpp, flags, mipLevel, numSamples, pOut); - } - - return valid; -} - -/** -**************************************************************************************************** -* EgBasedLib::SanityCheckMacroTiled -* -* @brief -* Check if macro-tiled parameters are valid -* @return -* TRUE if valid -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::SanityCheckMacroTiled( - ADDR_TILEINFO* pTileInfo ///< [in] macro-tiled parameters - ) const -{ - BOOL_32 valid = TRUE; - MAYBE_UNUSED UINT_32 numPipes = HwlGetPipes(pTileInfo); - - switch (pTileInfo->banks) - { - case 2: //fall through - case 4: //fall through - case 8: //fall through - case 16: - break; - default: - valid = FALSE; - break; - - } - - if (valid) - { - switch (pTileInfo->bankWidth) - { - case 1: //fall through - case 2: //fall through - case 4: //fall through - case 8: - break; - default: - valid = FALSE; - break; - } - } - - if (valid) - { - switch (pTileInfo->bankHeight) - { - case 1: //fall through - case 2: //fall through - case 4: //fall through - case 8: - break; - default: - valid = FALSE; - break; - } - } - - if (valid) - { - switch (pTileInfo->macroAspectRatio) - { - case 1: //fall through - case 2: //fall through - case 4: //fall through - case 8: - break; - default: - valid = FALSE; - break; - } - } - - if (valid) - { - if (pTileInfo->banks < pTileInfo->macroAspectRatio) - { - // This will generate macro tile height <= 1 - valid = FALSE; - } - } - - if (valid) - { - if (pTileInfo->tileSplitBytes > m_rowSize) - { - ADDR_WARN(0, ("tileSplitBytes is bigger than row size")); - } - } - - if (valid) - { - valid = HwlSanityCheckMacroTiled(pTileInfo); - } - - ADDR_ASSERT(valid == TRUE); - - // Add this assert for guidance - ADDR_ASSERT(numPipes * pTileInfo->banks >= 4); - - return valid; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceMipLevelTileMode -* -* @brief -* Compute valid tile mode for surface mipmap sub-levels -* -* @return -* Suitable tile mode -**************************************************************************************************** -*/ -AddrTileMode EgBasedLib::ComputeSurfaceMipLevelTileMode( - AddrTileMode baseTileMode, ///< [in] base tile mode - UINT_32 bpp, ///< [in] bits per pixels - UINT_32 pitch, ///< [in] current level pitch - UINT_32 height, ///< [in] current level height - UINT_32 numSlices, ///< [in] current number of slices - UINT_32 numSamples, ///< [in] number of samples - UINT_32 pitchAlign, ///< [in] pitch alignment - UINT_32 heightAlign, ///< [in] height alignment - ADDR_TILEINFO* pTileInfo ///< [in] ptr to bank structure - ) const -{ - UINT_64 bytesPerSlice; - (void)bytesPerSlice; - UINT_32 bytesPerTile; - - AddrTileMode expTileMode = baseTileMode; - UINT_32 microTileThickness = Thickness(expTileMode); - UINT_32 interleaveSize = m_pipeInterleaveBytes * m_bankInterleave; - - // - // Compute the size of a slice. - // - bytesPerSlice = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples); - bytesPerTile = BITS_TO_BYTES(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples); - - // - // Reduce tiling mode from thick to thin if the number of slices is less than the - // micro tile thickness. - // - if (numSlices < microTileThickness) - { - expTileMode = HwlDegradeThickTileMode(expTileMode, numSlices, &bytesPerTile); - } - - if (bytesPerTile > pTileInfo->tileSplitBytes) - { - bytesPerTile = pTileInfo->tileSplitBytes; - } - - UINT_32 threshold1 = - bytesPerTile * HwlGetPipes(pTileInfo) * pTileInfo->bankWidth * pTileInfo->macroAspectRatio; - - UINT_32 threshold2 = - bytesPerTile * pTileInfo->bankWidth * pTileInfo->bankHeight; - - // - // Reduce the tile mode from 2D/3D to 1D in following conditions - // - switch (expTileMode) - { - case ADDR_TM_2D_TILED_THIN1: //fall through - case ADDR_TM_3D_TILED_THIN1: - case ADDR_TM_PRT_TILED_THIN1: - case ADDR_TM_PRT_2D_TILED_THIN1: - case ADDR_TM_PRT_3D_TILED_THIN1: - if ((pitch < pitchAlign) || - (height < heightAlign) || - (interleaveSize > threshold1) || - (interleaveSize > threshold2)) - { - expTileMode = ADDR_TM_1D_TILED_THIN1; - } - break; - case ADDR_TM_2D_TILED_THICK: //fall through - case ADDR_TM_3D_TILED_THICK: - case ADDR_TM_2D_TILED_XTHICK: - case ADDR_TM_3D_TILED_XTHICK: - case ADDR_TM_PRT_TILED_THICK: - case ADDR_TM_PRT_2D_TILED_THICK: - case ADDR_TM_PRT_3D_TILED_THICK: - if ((pitch < pitchAlign) || - (height < heightAlign)) - { - expTileMode = ADDR_TM_1D_TILED_THICK; - } - break; - default: - break; - } - - return expTileMode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlGetAlignmentInfoMacroTiled -* @brief -* Get alignment info for giving tile mode -* @return -* TRUE if getting alignment is OK -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::HwlGetAlignmentInfoMacroTiled( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] create surface info - UINT_32* pPitchAlign, ///< [out] pitch alignment - UINT_32* pHeightAlign, ///< [out] height alignment - UINT_32* pSizeAlign ///< [out] size alignment - ) const -{ - BOOL_32 valid = TRUE; - - ADDR_ASSERT(IsMacroTiled(pIn->tileMode)); - - UINT_32 numSamples = (pIn->numFrags == 0) ? pIn->numSamples : pIn->numFrags; - - ADDR_ASSERT(pIn->pTileInfo); - ADDR_TILEINFO tileInfo = *pIn->pTileInfo; - ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; - out.pTileInfo = &tileInfo; - - if (UseTileIndex(pIn->tileIndex)) - { - out.tileIndex = pIn->tileIndex; - out.macroModeIndex = TileIndexInvalid; - } - - HwlSetupTileInfo(pIn->tileMode, - pIn->flags, - pIn->bpp, - pIn->width, - pIn->height, - numSamples, - &tileInfo, - &tileInfo, - pIn->tileType, - &out); - - valid = ComputeSurfaceAlignmentsMacroTiled(pIn->tileMode, - pIn->bpp, - pIn->flags, - pIn->mipLevel, - numSamples, - &out); - - if (valid) - { - *pPitchAlign = out.pitchAlign; - *pHeightAlign = out.heightAlign; - *pSizeAlign = out.baseAlign; - } - - return valid; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlDegradeThickTileMode -* -* @brief -* Degrades valid tile mode for thick modes if needed -* -* @return -* Suitable tile mode -**************************************************************************************************** -*/ -AddrTileMode EgBasedLib::HwlDegradeThickTileMode( - AddrTileMode baseTileMode, ///< [in] base tile mode - UINT_32 numSlices, ///< [in] current number of slices - UINT_32* pBytesPerTile ///< [in,out] pointer to bytes per slice - ) const -{ - ADDR_ASSERT(numSlices < Thickness(baseTileMode)); - // if pBytesPerTile is NULL, this is a don't-care.... - UINT_32 bytesPerTile = pBytesPerTile != NULL ? *pBytesPerTile : 64; - - AddrTileMode expTileMode = baseTileMode; - switch (baseTileMode) - { - case ADDR_TM_1D_TILED_THICK: - expTileMode = ADDR_TM_1D_TILED_THIN1; - bytesPerTile >>= 2; - break; - case ADDR_TM_2D_TILED_THICK: - expTileMode = ADDR_TM_2D_TILED_THIN1; - bytesPerTile >>= 2; - break; - case ADDR_TM_3D_TILED_THICK: - expTileMode = ADDR_TM_3D_TILED_THIN1; - bytesPerTile >>= 2; - break; - case ADDR_TM_2D_TILED_XTHICK: - if (numSlices < ThickTileThickness) - { - expTileMode = ADDR_TM_2D_TILED_THIN1; - bytesPerTile >>= 3; - } - else - { - expTileMode = ADDR_TM_2D_TILED_THICK; - bytesPerTile >>= 1; - } - break; - case ADDR_TM_3D_TILED_XTHICK: - if (numSlices < ThickTileThickness) - { - expTileMode = ADDR_TM_3D_TILED_THIN1; - bytesPerTile >>= 3; - } - else - { - expTileMode = ADDR_TM_3D_TILED_THICK; - bytesPerTile >>= 1; - } - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - if (pBytesPerTile != NULL) - { - *pBytesPerTile = bytesPerTile; - } - - return expTileMode; -} - -/** -**************************************************************************************************** -* EgBasedLib::DispatchComputeSurfaceAddrFromCoord -* -* @brief -* Compute surface address from given coord (x, y, slice,sample) -* -* @return -* Address in bytes -**************************************************************************************************** -*/ -UINT_64 EgBasedLib::DispatchComputeSurfaceAddrFromCoord( - const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - UINT_32 x = pIn->x; - UINT_32 y = pIn->y; - UINT_32 slice = pIn->slice; - UINT_32 sample = pIn->sample; - UINT_32 bpp = pIn->bpp; - UINT_32 pitch = pIn->pitch; - UINT_32 height = pIn->height; - UINT_32 numSlices = pIn->numSlices; - UINT_32 numSamples = ((pIn->numSamples == 0) ? 1 : pIn->numSamples); - UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags); - AddrTileMode tileMode = pIn->tileMode; - AddrTileType microTileType = pIn->tileType; - BOOL_32 ignoreSE = pIn->ignoreSE; - BOOL_32 isDepthSampleOrder = pIn->isDepth; - ADDR_TILEINFO* pTileInfo = pIn->pTileInfo; - - UINT_32* pBitPosition = &pOut->bitPosition; - UINT_64 addr; - - // ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order - if (microTileType == ADDR_DEPTH_SAMPLE_ORDER) - { - isDepthSampleOrder = TRUE; - } - - if (m_chipFamily >= ADDR_CHIP_FAMILY_NI) - { - if (numFrags != numSamples) - { - numSamples = numFrags; - ADDR_ASSERT(sample < numSamples); - } - - /// @note - /// 128 bit/thick tiled surface doesn't support display tiling and - /// mipmap chain must have the same tileType, so please fill tileType correctly - if (IsLinear(pIn->tileMode) == FALSE) - { - if (bpp >= 128 || Thickness(tileMode) > 1) - { - ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE); - } - } - } - - switch (tileMode) - { - case ADDR_TM_LINEAR_GENERAL://fall through - case ADDR_TM_LINEAR_ALIGNED: - addr = ComputeSurfaceAddrFromCoordLinear(x, - y, - slice, - sample, - bpp, - pitch, - height, - numSlices, - pBitPosition); - break; - case ADDR_TM_1D_TILED_THIN1://fall through - case ADDR_TM_1D_TILED_THICK: - addr = ComputeSurfaceAddrFromCoordMicroTiled(x, - y, - slice, - sample, - bpp, - pitch, - height, - numSamples, - tileMode, - microTileType, - isDepthSampleOrder, - pBitPosition); - break; - case ADDR_TM_2D_TILED_THIN1: //fall through - case ADDR_TM_2D_TILED_THICK: //fall through - case ADDR_TM_3D_TILED_THIN1: //fall through - case ADDR_TM_3D_TILED_THICK: //fall through - case ADDR_TM_2D_TILED_XTHICK: //fall through - case ADDR_TM_3D_TILED_XTHICK: //fall through - case ADDR_TM_PRT_TILED_THIN1: //fall through - case ADDR_TM_PRT_2D_TILED_THIN1://fall through - case ADDR_TM_PRT_3D_TILED_THIN1://fall through - case ADDR_TM_PRT_TILED_THICK: //fall through - case ADDR_TM_PRT_2D_TILED_THICK://fall through - case ADDR_TM_PRT_3D_TILED_THICK: - UINT_32 pipeSwizzle; - UINT_32 bankSwizzle; - - if (m_configFlags.useCombinedSwizzle) - { - ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo, - &bankSwizzle, &pipeSwizzle); - } - else - { - pipeSwizzle = pIn->pipeSwizzle; - bankSwizzle = pIn->bankSwizzle; - } - - addr = ComputeSurfaceAddrFromCoordMacroTiled(x, - y, - slice, - sample, - bpp, - pitch, - height, - numSamples, - tileMode, - microTileType, - ignoreSE, - isDepthSampleOrder, - pipeSwizzle, - bankSwizzle, - pTileInfo, - pBitPosition); - break; - default: - addr = 0; - ADDR_ASSERT_ALWAYS(); - break; - } - - return addr; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeMacroTileEquation -* -* @brief -* Computes the address equation in macro tile -* @return -* If equation can be computed -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::ComputeMacroTileEquation( - UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel - AddrTileMode tileMode, ///< [in] tile mode - AddrTileType microTileType, ///< [in] micro tiling type - ADDR_TILEINFO* pTileInfo, ///< [in] bank structure - ADDR_EQUATION* pEquation ///< [out] Equation for addressing in macro tile - ) const -{ - ADDR_E_RETURNCODE retCode; - - // Element equation within a tile - retCode = ComputeMicroTileEquation(log2BytesPP, tileMode, microTileType, pEquation); - - if (retCode == ADDR_OK) - { - // Tile equesiton with signle pipe bank - UINT_32 numPipes = HwlGetPipes(pTileInfo); - UINT_32 numPipeBits = Log2(numPipes); - - for (UINT_32 i = 0; i < Log2(pTileInfo->bankWidth); i++) - { - pEquation->addr[pEquation->numBits].valid = 1; - pEquation->addr[pEquation->numBits].channel = 0; - pEquation->addr[pEquation->numBits].index = i + log2BytesPP + 3 + numPipeBits; - pEquation->numBits++; - } - - for (UINT_32 i = 0; i < Log2(pTileInfo->bankHeight); i++) - { - pEquation->addr[pEquation->numBits].valid = 1; - pEquation->addr[pEquation->numBits].channel = 1; - pEquation->addr[pEquation->numBits].index = i + 3; - pEquation->numBits++; - } - - ADDR_EQUATION equation; - memset(&equation, 0, sizeof(ADDR_EQUATION)); - - UINT_32 thresholdX = 32; - UINT_32 thresholdY = 32; - - if (IsPrtNoRotationTileMode(tileMode)) - { - UINT_32 macroTilePitch = - (MicroTileWidth * pTileInfo->bankWidth * numPipes) * pTileInfo->macroAspectRatio; - UINT_32 macroTileHeight = - (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / - pTileInfo->macroAspectRatio; - thresholdX = Log2(macroTilePitch); - thresholdY = Log2(macroTileHeight); - } - - // Pipe equation - retCode = ComputePipeEquation(log2BytesPP, thresholdX, thresholdY, pTileInfo, &equation); - - if (retCode == ADDR_OK) - { - UINT_32 pipeBitStart = Log2(m_pipeInterleaveBytes); - - if (pEquation->numBits > pipeBitStart) - { - UINT_32 numLeftShift = pEquation->numBits - pipeBitStart; - - for (UINT_32 i = 0; i < numLeftShift; i++) - { - pEquation->addr[pEquation->numBits + equation.numBits - i - 1] = - pEquation->addr[pEquation->numBits - i - 1]; - pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] = - pEquation->xor1[pEquation->numBits - i - 1]; - pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] = - pEquation->xor2[pEquation->numBits - i - 1]; - } - } - - for (UINT_32 i = 0; i < equation.numBits; i++) - { - pEquation->addr[pipeBitStart + i] = equation.addr[i]; - pEquation->xor1[pipeBitStart + i] = equation.xor1[i]; - pEquation->xor2[pipeBitStart + i] = equation.xor2[i]; - pEquation->numBits++; - } - - // Bank equation - memset(&equation, 0, sizeof(ADDR_EQUATION)); - - retCode = ComputeBankEquation(log2BytesPP, thresholdX, thresholdY, - pTileInfo, &equation); - - if (retCode == ADDR_OK) - { - UINT_32 bankBitStart = pipeBitStart + numPipeBits + Log2(m_bankInterleave); - - if (pEquation->numBits > bankBitStart) - { - UINT_32 numLeftShift = pEquation->numBits - bankBitStart; - - for (UINT_32 i = 0; i < numLeftShift; i++) - { - pEquation->addr[pEquation->numBits + equation.numBits - i - 1] = - pEquation->addr[pEquation->numBits - i - 1]; - pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] = - pEquation->xor1[pEquation->numBits - i - 1]; - pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] = - pEquation->xor2[pEquation->numBits - i - 1]; - } - } - - for (UINT_32 i = 0; i < equation.numBits; i++) - { - pEquation->addr[bankBitStart + i] = equation.addr[i]; - pEquation->xor1[bankBitStart + i] = equation.xor1[i]; - pEquation->xor2[bankBitStart + i] = equation.xor2[i]; - pEquation->numBits++; - } - } - } - } - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled -* -* @brief -* Computes the surface address and bit position from a -* coordinate for 2D tilied (macro tiled) -* @return -* The byte address -**************************************************************************************************** -*/ -UINT_64 EgBasedLib::ComputeSurfaceAddrFromCoordMacroTiled( - UINT_32 x, ///< [in] x coordinate - UINT_32 y, ///< [in] y coordinate - UINT_32 slice, ///< [in] slice index - UINT_32 sample, ///< [in] sample index - UINT_32 bpp, ///< [in] bits per pixel - UINT_32 pitch, ///< [in] surface pitch, in pixels - UINT_32 height, ///< [in] surface height, in pixels - UINT_32 numSamples, ///< [in] number of samples - AddrTileMode tileMode, ///< [in] tile mode - AddrTileType microTileType, ///< [in] micro tiling type - BOOL_32 ignoreSE, ///< [in] TRUE if shader enginers can be ignored - BOOL_32 isDepthSampleOrder, ///< [in] TRUE if it depth sample ordering is used - UINT_32 pipeSwizzle, ///< [in] pipe swizzle - UINT_32 bankSwizzle, ///< [in] bank swizzle - ADDR_TILEINFO* pTileInfo, ///< [in] bank structure - /// **All fields to be valid on entry** - UINT_32* pBitPosition ///< [out] bit position, e.g. FMT_1 will use this - ) const -{ - UINT_64 addr; - - UINT_32 microTileBytes; - UINT_32 microTileBits; - UINT_32 sampleOffset; - UINT_32 pixelIndex; - UINT_32 pixelOffset; - UINT_32 elementOffset; - UINT_32 tileSplitSlice; - UINT_32 pipe; - UINT_32 bank; - UINT_64 sliceBytes; - UINT_64 sliceOffset; - UINT_32 macroTilePitch; - UINT_32 macroTileHeight; - UINT_32 macroTilesPerRow; - UINT_32 macroTilesPerSlice; - UINT_64 macroTileBytes; - UINT_32 macroTileIndexX; - UINT_32 macroTileIndexY; - UINT_64 macroTileOffset; - UINT_64 totalOffset; - UINT_64 pipeInterleaveMask; - UINT_64 bankInterleaveMask; - UINT_64 pipeInterleaveOffset; - UINT_32 bankInterleaveOffset; - UINT_64 offset; - UINT_32 tileRowIndex; - UINT_32 tileColumnIndex; - UINT_32 tileIndex; - UINT_32 tileOffset; - - UINT_32 microTileThickness = Thickness(tileMode); - - // - // Compute the number of group, pipe, and bank bits. - // - UINT_32 numPipes = HwlGetPipes(pTileInfo); - UINT_32 numPipeInterleaveBits = Log2(m_pipeInterleaveBytes); - UINT_32 numPipeBits = Log2(numPipes); - UINT_32 numBankInterleaveBits = Log2(m_bankInterleave); - UINT_32 numBankBits = Log2(pTileInfo->banks); - - // - // Compute the micro tile size. - // - microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples; - - microTileBytes = microTileBits / 8; - // - // Compute the pixel index within the micro tile. - // - pixelIndex = ComputePixelIndexWithinMicroTile(x, - y, - slice, - bpp, - tileMode, - microTileType); - - // - // Compute the sample offset and pixel offset. - // - if (isDepthSampleOrder) - { - // - // For depth surfaces, samples are stored contiguously for each element, so the sample - // offset is the sample number times the element size. - // - sampleOffset = sample * bpp; - pixelOffset = pixelIndex * bpp * numSamples; - } - else - { - // - // For color surfaces, all elements for a particular sample are stored contiguously, so - // the sample offset is the sample number times the micro tile size divided yBit the number - // of samples. - // - sampleOffset = sample * (microTileBits / numSamples); - pixelOffset = pixelIndex * bpp; - } - - // - // Compute the element offset. - // - elementOffset = pixelOffset + sampleOffset; - - *pBitPosition = static_cast(elementOffset % 8); - - elementOffset /= 8; //bit-to-byte - - // - // Determine if tiles need to be split across slices. - // - // If the size of the micro tile is larger than the tile split size, then the tile will be - // split across multiple slices. - // - UINT_32 slicesPerTile = 1; - - if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1)) - { //don't support for thick mode - - // - // Compute the number of slices per tile. - // - slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes; - - // - // Compute the tile split slice number for use in rotating the bank. - // - tileSplitSlice = elementOffset / pTileInfo->tileSplitBytes; - - // - // Adjust the element offset to account for the portion of the tile that is being moved to - // a new slice.. - // - elementOffset %= pTileInfo->tileSplitBytes; - - // - // Adjust the microTileBytes size to tileSplitBytes size since - // a new slice.. - // - microTileBytes = pTileInfo->tileSplitBytes; - } - else - { - tileSplitSlice = 0; - } - - // - // Compute macro tile pitch and height. - // - macroTilePitch = - (MicroTileWidth * pTileInfo->bankWidth * numPipes) * pTileInfo->macroAspectRatio; - macroTileHeight = - (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / pTileInfo->macroAspectRatio; - - // - // Compute the number of bytes per macro tile. Note: bytes of the same bank/pipe actually - // - macroTileBytes = - static_cast(microTileBytes) * - (macroTilePitch / MicroTileWidth) * (macroTileHeight / MicroTileHeight) / - (numPipes * pTileInfo->banks); - - // - // Compute the number of macro tiles per row. - // - macroTilesPerRow = pitch / macroTilePitch; - - // - // Compute the offset to the macro tile containing the specified coordinate. - // - macroTileIndexX = x / macroTilePitch; - macroTileIndexY = y / macroTileHeight; - macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes; - - // - // Compute the number of macro tiles per slice. - // - macroTilesPerSlice = macroTilesPerRow * (height / macroTileHeight); - - // - // Compute the slice size. - // - sliceBytes = macroTilesPerSlice * macroTileBytes; - - // - // Compute the slice offset. - // - sliceOffset = sliceBytes * (tileSplitSlice + slicesPerTile * (slice / microTileThickness)); - - // - // Compute tile offest - // - tileRowIndex = (y / MicroTileHeight) % pTileInfo->bankHeight; - tileColumnIndex = ((x / MicroTileWidth) / numPipes) % pTileInfo->bankWidth; - tileIndex = (tileRowIndex * pTileInfo->bankWidth) + tileColumnIndex; - tileOffset = tileIndex * microTileBytes; - - // - // Combine the slice offset and macro tile offset with the pixel and sample offsets, accounting - // for the pipe and bank bits in the middle of the address. - // - totalOffset = sliceOffset + macroTileOffset + elementOffset + tileOffset; - - // - // Get the pipe and bank. - // - - // when the tileMode is PRT type, then adjust x and y coordinates - if (IsPrtNoRotationTileMode(tileMode)) - { - x = x % macroTilePitch; - y = y % macroTileHeight; - } - - pipe = ComputePipeFromCoord(x, - y, - slice, - tileMode, - pipeSwizzle, - ignoreSE, - pTileInfo); - - bank = ComputeBankFromCoord(x, - y, - slice, - tileMode, - bankSwizzle, - tileSplitSlice, - pTileInfo); - - - // - // Split the offset to put some bits below the pipe+bank bits and some above. - // - pipeInterleaveMask = (1 << numPipeInterleaveBits) - 1; - bankInterleaveMask = (1 << numBankInterleaveBits) - 1; - pipeInterleaveOffset = totalOffset & pipeInterleaveMask; - bankInterleaveOffset = static_cast((totalOffset >> numPipeInterleaveBits) & - bankInterleaveMask); - offset = totalOffset >> (numPipeInterleaveBits + numBankInterleaveBits); - - // - // Assemble the address from its components. - // - addr = pipeInterleaveOffset; - // This is to remove /analyze warnings - UINT_32 pipeBits = pipe << numPipeInterleaveBits; - UINT_32 bankInterleaveBits = bankInterleaveOffset << (numPipeInterleaveBits + numPipeBits); - UINT_32 bankBits = bank << (numPipeInterleaveBits + numPipeBits + - numBankInterleaveBits); - UINT_64 offsetBits = offset << (numPipeInterleaveBits + numPipeBits + - numBankInterleaveBits + numBankBits); - - addr |= pipeBits; - addr |= bankInterleaveBits; - addr |= bankBits; - addr |= offsetBits; - - return addr; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled -* -* @brief -* Computes the surface address and bit position from a coordinate for 1D tilied -* (micro tiled) -* @return -* The byte address -**************************************************************************************************** -*/ -UINT_64 EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled( - UINT_32 x, ///< [in] x coordinate - UINT_32 y, ///< [in] y coordinate - UINT_32 slice, ///< [in] slice index - UINT_32 sample, ///< [in] sample index - UINT_32 bpp, ///< [in] bits per pixel - UINT_32 pitch, ///< [in] pitch, in pixels - UINT_32 height, ///< [in] height, in pixels - UINT_32 numSamples, ///< [in] number of samples - AddrTileMode tileMode, ///< [in] tile mode - AddrTileType microTileType, ///< [in] micro tiling type - BOOL_32 isDepthSampleOrder, ///< [in] TRUE if depth sample ordering is used - UINT_32* pBitPosition ///< [out] bit position, e.g. FMT_1 will use this - ) const -{ - UINT_64 addr = 0; - - UINT_32 microTileBytes; - UINT_64 sliceBytes; - UINT_32 microTilesPerRow; - UINT_32 microTileIndexX; - UINT_32 microTileIndexY; - UINT_32 microTileIndexZ; - UINT_64 sliceOffset; - UINT_64 microTileOffset; - UINT_32 sampleOffset; - UINT_32 pixelIndex; - UINT_32 pixelOffset; - - UINT_32 microTileThickness = Thickness(tileMode); - - // - // Compute the micro tile size. - // - microTileBytes = BITS_TO_BYTES(MicroTilePixels * microTileThickness * bpp * numSamples); - - // - // Compute the slice size. - // - sliceBytes = - BITS_TO_BYTES(static_cast(pitch) * height * microTileThickness * bpp * numSamples); - - // - // Compute the number of micro tiles per row. - // - microTilesPerRow = pitch / MicroTileWidth; - - // - // Compute the micro tile index. - // - microTileIndexX = x / MicroTileWidth; - microTileIndexY = y / MicroTileHeight; - microTileIndexZ = slice / microTileThickness; - - // - // Compute the slice offset. - // - sliceOffset = static_cast(microTileIndexZ) * sliceBytes; - - // - // Compute the offset to the micro tile containing the specified coordinate. - // - microTileOffset = (static_cast(microTileIndexY) * microTilesPerRow + microTileIndexX) * - microTileBytes; - - // - // Compute the pixel index within the micro tile. - // - pixelIndex = ComputePixelIndexWithinMicroTile(x, - y, - slice, - bpp, - tileMode, - microTileType); - - // Compute the sample offset. - // - if (isDepthSampleOrder) - { - // - // For depth surfaces, samples are stored contiguously for each element, so the sample - // offset is the sample number times the element size. - // - sampleOffset = sample * bpp; - pixelOffset = pixelIndex * bpp * numSamples; - } - else - { - // - // For color surfaces, all elements for a particular sample are stored contiguously, so - // the sample offset is the sample number times the micro tile size divided yBit the number - // of samples. - // - sampleOffset = sample * (microTileBytes*8 / numSamples); - pixelOffset = pixelIndex * bpp; - } - - // - // Compute the bit position of the pixel. Each element is stored with one bit per sample. - // - - UINT_32 elemOffset = sampleOffset + pixelOffset; - - *pBitPosition = elemOffset % 8; - elemOffset /= 8; - - // - // Combine the slice offset, micro tile offset, sample offset, and pixel offsets. - // - addr = sliceOffset + microTileOffset + elemOffset; - - return addr; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputePixelCoordFromOffset -* -* @brief -* Compute pixel coordinate from offset inside a micro tile -* @return -* N/A -**************************************************************************************************** -*/ -VOID EgBasedLib::HwlComputePixelCoordFromOffset( - UINT_32 offset, ///< [in] offset inside micro tile in bits - UINT_32 bpp, ///< [in] bits per pixel - UINT_32 numSamples, ///< [in] number of samples - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 tileBase, ///< [in] base offset within a tile - UINT_32 compBits, ///< [in] component bits actually needed(for planar surface) - UINT_32* pX, ///< [out] x coordinate - UINT_32* pY, ///< [out] y coordinate - UINT_32* pSlice, ///< [out] slice index - UINT_32* pSample, ///< [out] sample index - AddrTileType microTileType, ///< [in] micro tiling type - BOOL_32 isDepthSampleOrder ///< [in] TRUE if depth sample order in microtile is used - ) const -{ - UINT_32 x = 0; - UINT_32 y = 0; - UINT_32 z = 0; - UINT_32 thickness = Thickness(tileMode); - - // For planar surface, we adjust offset acoording to tile base - if ((bpp != compBits) && (compBits != 0) && isDepthSampleOrder) - { - offset -= tileBase; - - ADDR_ASSERT(microTileType == ADDR_NON_DISPLAYABLE || - microTileType == ADDR_DEPTH_SAMPLE_ORDER); - - bpp = compBits; - } - - UINT_32 sampleTileBits; - UINT_32 samplePixelBits; - UINT_32 pixelIndex; - - if (isDepthSampleOrder) - { - samplePixelBits = bpp * numSamples; - pixelIndex = offset / samplePixelBits; - *pSample = (offset % samplePixelBits) / bpp; - } - else - { - sampleTileBits = MicroTilePixels * bpp * thickness; - *pSample = offset / sampleTileBits; - pixelIndex = (offset % sampleTileBits) / bpp; - } - - if (microTileType != ADDR_THICK) - { - if (microTileType == ADDR_DISPLAYABLE) // displayable - { - switch (bpp) - { - case 8: - x = pixelIndex & 0x7; - y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4)); - break; - case 16: - x = pixelIndex & 0x7; - y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3)); - break; - case 32: - x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0)); - y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2)); - break; - case 64: - x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); - y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,1)); - break; - case 128: - x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,1)); - y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,0)); - break; - default: - break; - } - } - else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER) - { - x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); - y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,1)); - } - else if (microTileType == ADDR_ROTATED) - { - /* - 8-Bit Elements - element_index[5:0] = { x[2], x[0], x[1], y[2], y[1], y[0] } - - 16-Bit Elements - element_index[5:0] = { x[2], x[1], x[0], y[2], y[1], y[0] } - - 32-Bit Elements - element_index[5:0] = { x[2], x[1], y[2], x[0], y[1], y[0] } - - 64-Bit Elements - element_index[5:0] = { y[2], x[2], x[1], y[1], x[0], y[0] } - */ - switch(bpp) - { - case 8: - x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4)); - y = pixelIndex & 0x7; - break; - case 16: - x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3)); - y = pixelIndex & 0x7; - break; - case 32: - x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2)); - y = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0)); - break; - case 64: - x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,3),_BIT(pixelIndex,1)); - y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - } - - if (thickness > 1) // thick - { - z = Bits2Number(3, _BIT(pixelIndex,8),_BIT(pixelIndex,7),_BIT(pixelIndex,6)); - } - } - else - { - ADDR_ASSERT((m_chipFamily >= ADDR_CHIP_FAMILY_CI) && (thickness > 1)); - /* - 8-Bit Elements and 16-Bit Elements - element_index[7:0] = { y[2], x[2], z[1], z[0], y[1], x[1], y[0], x[0] } - - 32-Bit Elements - element_index[7:0] = { y[2], x[2], z[1], y[1], z[0], x[1], y[0], x[0] } - - 64-Bit Elements and 128-Bit Elements - element_index[7:0] = { y[2], x[2], z[1], y[1], x[1], z[0], y[0], x[0] } - - The equation to compute the element index for the extra thick tile: - element_index[8] = z[2] - */ - switch (bpp) - { - case 8: - case 16: // fall-through - x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); - y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,3),_BIT(pixelIndex,1)); - z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,4)); - break; - case 32: - x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); - y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1)); - z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,3)); - break; - case 64: - case 128: // fall-through - x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,3),_BIT(pixelIndex,0)); - y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1)); - z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,2)); - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - if (thickness == 8) - { - z += Bits2Number(3,_BIT(pixelIndex,8),0,0); - } - } - - *pX = x; - *pY = y; - *pSlice += z; -} - - -/** -**************************************************************************************************** -* EgBasedLib::DispatchComputeSurfaceCoordFromAddrDispatch -* -* @brief -* Compute (x,y,slice,sample) coordinates from surface address -* @return -* N/A -**************************************************************************************************** -*/ -VOID EgBasedLib::DispatchComputeSurfaceCoordFromAddr( - const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - UINT_64 addr = pIn->addr; - UINT_32 bitPosition = pIn->bitPosition; - UINT_32 bpp = pIn->bpp; - UINT_32 pitch = pIn->pitch; - UINT_32 height = pIn->height; - UINT_32 numSlices = pIn->numSlices; - UINT_32 numSamples = ((pIn->numSamples == 0) ? 1 : pIn->numSamples); - UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags); - AddrTileMode tileMode = pIn->tileMode; - UINT_32 tileBase = pIn->tileBase; - UINT_32 compBits = pIn->compBits; - AddrTileType microTileType = pIn->tileType; - BOOL_32 ignoreSE = pIn->ignoreSE; - BOOL_32 isDepthSampleOrder = pIn->isDepth; - ADDR_TILEINFO* pTileInfo = pIn->pTileInfo; - - UINT_32* pX = &pOut->x; - UINT_32* pY = &pOut->y; - UINT_32* pSlice = &pOut->slice; - UINT_32* pSample = &pOut->sample; - - if (microTileType == ADDR_DEPTH_SAMPLE_ORDER) - { - isDepthSampleOrder = TRUE; - } - - if (m_chipFamily >= ADDR_CHIP_FAMILY_NI) - { - if (numFrags != numSamples) - { - numSamples = numFrags; - } - - /// @note - /// 128 bit/thick tiled surface doesn't support display tiling and - /// mipmap chain must have the same tileType, so please fill tileType correctly - if (IsLinear(pIn->tileMode) == FALSE) - { - if (bpp >= 128 || Thickness(tileMode) > 1) - { - ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE); - } - } - } - - switch (tileMode) - { - case ADDR_TM_LINEAR_GENERAL://fall through - case ADDR_TM_LINEAR_ALIGNED: - ComputeSurfaceCoordFromAddrLinear(addr, - bitPosition, - bpp, - pitch, - height, - numSlices, - pX, - pY, - pSlice, - pSample); - break; - case ADDR_TM_1D_TILED_THIN1://fall through - case ADDR_TM_1D_TILED_THICK: - ComputeSurfaceCoordFromAddrMicroTiled(addr, - bitPosition, - bpp, - pitch, - height, - numSamples, - tileMode, - tileBase, - compBits, - pX, - pY, - pSlice, - pSample, - microTileType, - isDepthSampleOrder); - break; - case ADDR_TM_2D_TILED_THIN1: //fall through - case ADDR_TM_2D_TILED_THICK: //fall through - case ADDR_TM_3D_TILED_THIN1: //fall through - case ADDR_TM_3D_TILED_THICK: //fall through - case ADDR_TM_2D_TILED_XTHICK: //fall through - case ADDR_TM_3D_TILED_XTHICK: //fall through - case ADDR_TM_PRT_TILED_THIN1: //fall through - case ADDR_TM_PRT_2D_TILED_THIN1://fall through - case ADDR_TM_PRT_3D_TILED_THIN1://fall through - case ADDR_TM_PRT_TILED_THICK: //fall through - case ADDR_TM_PRT_2D_TILED_THICK://fall through - case ADDR_TM_PRT_3D_TILED_THICK: - UINT_32 pipeSwizzle; - UINT_32 bankSwizzle; - - if (m_configFlags.useCombinedSwizzle) - { - ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo, - &bankSwizzle, &pipeSwizzle); - } - else - { - pipeSwizzle = pIn->pipeSwizzle; - bankSwizzle = pIn->bankSwizzle; - } - - ComputeSurfaceCoordFromAddrMacroTiled(addr, - bitPosition, - bpp, - pitch, - height, - numSamples, - tileMode, - tileBase, - compBits, - microTileType, - ignoreSE, - isDepthSampleOrder, - pipeSwizzle, - bankSwizzle, - pTileInfo, - pX, - pY, - pSlice, - pSample); - break; - default: - ADDR_ASSERT_ALWAYS(); - } -} - - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled -* -* @brief -* Compute surface coordinates from address for macro tiled surface -* @return -* N/A -**************************************************************************************************** -*/ -VOID EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled( - UINT_64 addr, ///< [in] byte address - UINT_32 bitPosition, ///< [in] bit position - UINT_32 bpp, ///< [in] bits per pixel - UINT_32 pitch, ///< [in] pitch in pixels - UINT_32 height, ///< [in] height in pixels - UINT_32 numSamples, ///< [in] number of samples - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 tileBase, ///< [in] tile base offset - UINT_32 compBits, ///< [in] component bits (for planar surface) - AddrTileType microTileType, ///< [in] micro tiling type - BOOL_32 ignoreSE, ///< [in] TRUE if shader engines can be ignored - BOOL_32 isDepthSampleOrder, ///< [in] TRUE if depth sample order is used - UINT_32 pipeSwizzle, ///< [in] pipe swizzle - UINT_32 bankSwizzle, ///< [in] bank swizzle - ADDR_TILEINFO* pTileInfo, ///< [in] bank structure. - /// **All fields to be valid on entry** - UINT_32* pX, ///< [out] X coord - UINT_32* pY, ///< [out] Y coord - UINT_32* pSlice, ///< [out] slice index - UINT_32* pSample ///< [out] sample index - ) const -{ - UINT_32 mx; - UINT_32 my; - UINT_64 tileBits; - UINT_64 macroTileBits; - UINT_32 slices; - UINT_32 tileSlices; - UINT_64 elementOffset; - UINT_64 macroTileIndex; - UINT_32 tileIndex; - UINT_64 totalOffset; - - - UINT_32 bank; - UINT_32 pipe; - UINT_32 groupBits = m_pipeInterleaveBytes << 3; - UINT_32 pipes = HwlGetPipes(pTileInfo); - UINT_32 banks = pTileInfo->banks; - - UINT_32 bankInterleave = m_bankInterleave; - - UINT_64 addrBits = BYTES_TO_BITS(addr) + bitPosition; - - // - // remove bits for bank and pipe - // - totalOffset = (addrBits % groupBits) + - (((addrBits / groupBits / pipes) % bankInterleave) * groupBits) + - (((addrBits / groupBits / pipes) / bankInterleave) / banks) * groupBits * bankInterleave; - - UINT_32 microTileThickness = Thickness(tileMode); - - UINT_32 microTileBits = bpp * microTileThickness * MicroTilePixels * numSamples; - - UINT_32 microTileBytes = BITS_TO_BYTES(microTileBits); - // - // Determine if tiles need to be split across slices. - // - // If the size of the micro tile is larger than the tile split size, then the tile will be - // split across multiple slices. - // - UINT_32 slicesPerTile = 1; //_State->TileSlices - - if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1)) - { //don't support for thick mode - - // - // Compute the number of slices per tile. - // - slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes; - } - - tileBits = microTileBits / slicesPerTile; // micro tile bits - - // in micro tiles because not MicroTileWidth timed. - UINT_32 macroWidth = pTileInfo->bankWidth * pipes * pTileInfo->macroAspectRatio; - // in micro tiles as well - UINT_32 macroHeight = pTileInfo->bankHeight * banks / pTileInfo->macroAspectRatio; - - UINT_32 pitchInMacroTiles = pitch / MicroTileWidth / macroWidth; - - macroTileBits = (macroWidth * macroHeight) * tileBits / (banks * pipes); - - macroTileIndex = totalOffset / macroTileBits; - - // pitchMacros * height / heightMacros; macroTilesPerSlice == _State->SliceMacros - UINT_32 macroTilesPerSlice = (pitch / (macroWidth * MicroTileWidth)) * height / - (macroHeight * MicroTileWidth); - - slices = static_cast(macroTileIndex / macroTilesPerSlice); - - *pSlice = static_cast(slices / slicesPerTile * microTileThickness); - - // - // calculate element offset and x[2:0], y[2:0], z[1:0] for thick - // - tileSlices = slices % slicesPerTile; - - elementOffset = tileSlices * tileBits; - elementOffset += totalOffset % tileBits; - - UINT_32 coordZ = 0; - - HwlComputePixelCoordFromOffset(static_cast(elementOffset), - bpp, - numSamples, - tileMode, - tileBase, - compBits, - pX, - pY, - &coordZ, - pSample, - microTileType, - isDepthSampleOrder); - - macroTileIndex = macroTileIndex % macroTilesPerSlice; - *pY += static_cast(macroTileIndex / pitchInMacroTiles * macroHeight * MicroTileHeight); - *pX += static_cast(macroTileIndex % pitchInMacroTiles * macroWidth * MicroTileWidth); - - *pSlice += coordZ; - - tileIndex = static_cast((totalOffset % macroTileBits) / tileBits); - - my = (tileIndex / pTileInfo->bankWidth) % pTileInfo->bankHeight * MicroTileHeight; - mx = (tileIndex % pTileInfo->bankWidth) * pipes * MicroTileWidth; - - *pY += my; - *pX += mx; - - bank = ComputeBankFromAddr(addr, banks, pipes); - pipe = ComputePipeFromAddr(addr, pipes); - - HwlComputeSurfaceCoord2DFromBankPipe(tileMode, - pX, - pY, - *pSlice, - bank, - pipe, - bankSwizzle, - pipeSwizzle, - tileSlices, - ignoreSE, - pTileInfo); -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSurfaceCoord2DFromBankPipe -* -* @brief -* Compute surface x,y coordinates from bank/pipe info -* @return -* N/A -**************************************************************************************************** -*/ -VOID EgBasedLib::ComputeSurfaceCoord2DFromBankPipe( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 x, ///< [in] x coordinate - UINT_32 y, ///< [in] y coordinate - UINT_32 slice, ///< [in] slice index - UINT_32 bank, ///< [in] bank number - UINT_32 pipe, ///< [in] pipe number - UINT_32 bankSwizzle,///< [in] bank swizzle - UINT_32 pipeSwizzle,///< [in] pipe swizzle - UINT_32 tileSlices, ///< [in] slices in a micro tile - ADDR_TILEINFO* pTileInfo, ///< [in] bank structure. **All fields to be valid on entry** - CoordFromBankPipe* pOutput ///< [out] pointer to extracted x/y bits - ) const -{ - UINT_32 yBit3 = 0; - UINT_32 yBit4 = 0; - UINT_32 yBit5 = 0; - UINT_32 yBit6 = 0; - - UINT_32 xBit3 = 0; - UINT_32 xBit4 = 0; - UINT_32 xBit5 = 0; - - UINT_32 tileSplitRotation; - - UINT_32 numPipes = HwlGetPipes(pTileInfo); - - UINT_32 bankRotation = ComputeBankRotation(tileMode, - pTileInfo->banks, numPipes); - - UINT_32 pipeRotation = ComputePipeRotation(tileMode, numPipes); - - UINT_32 xBit = x / (MicroTileWidth * pTileInfo->bankWidth * numPipes); - UINT_32 yBit = y / (MicroTileHeight * pTileInfo->bankHeight); - - //calculate the bank and pipe before rotation and swizzle - - switch (tileMode) - { - case ADDR_TM_2D_TILED_THIN1: //fall through - case ADDR_TM_2D_TILED_THICK: //fall through - case ADDR_TM_2D_TILED_XTHICK: //fall through - case ADDR_TM_3D_TILED_THIN1: //fall through - case ADDR_TM_3D_TILED_THICK: //fall through - case ADDR_TM_3D_TILED_XTHICK: - tileSplitRotation = ((pTileInfo->banks / 2) + 1); - break; - default: - tileSplitRotation = 0; - break; - } - - UINT_32 microTileThickness = Thickness(tileMode); - - bank ^= tileSplitRotation * tileSlices; - if (pipeRotation == 0) - { - bank ^= bankRotation * (slice / microTileThickness) + bankSwizzle; - bank %= pTileInfo->banks; - pipe ^= pipeSwizzle; - } - else - { - bank ^= bankRotation * (slice / microTileThickness) / numPipes + bankSwizzle; - bank %= pTileInfo->banks; - pipe ^= pipeRotation * (slice / microTileThickness) + pipeSwizzle; - } - - if (pTileInfo->macroAspectRatio == 1) - { - switch (pTileInfo->banks) - { - case 2: - yBit3 = _BIT(bank, 0) ^ _BIT(xBit,0); - break; - case 4: - yBit4 = _BIT(bank, 0) ^ _BIT(xBit,0); - yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1); - break; - case 8: - yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2); - yBit5 = _BIT(bank, 0) ^ _BIT(xBit,0); - yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ yBit5; - break; - case 16: - yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); - yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); - yBit6 = _BIT(bank, 0) ^ _BIT(xBit, 0); - yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ yBit6; - break; - default: - break; - } - - } - else if (pTileInfo->macroAspectRatio == 2) - { - switch (pTileInfo->banks) - { - case 2: //xBit3 = yBit3^b0 - xBit3 = _BIT(bank, 0) ^ _BIT(yBit,0); - break; - case 4: //xBit3=yBit4^b0; yBit3=xBit4^b1 - xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1); - yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1); - break; - case 8: //xBit4, xBit5, yBit5 are known - xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); - yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2); - yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ _BIT(yBit, 2); - break; - case 16://x4,x5,x6,y6 are known - xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3); //x3 = y6 ^ b0 - yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3 - yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = x5 ^ b2 - yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ _BIT(yBit, 3); //y5=x4^y6^b1 - break; - default: - break; - } - } - else if (pTileInfo->macroAspectRatio == 4) - { - switch (pTileInfo->banks) - { - case 4: //yBit3, yBit4 - xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1); - xBit4 = _BIT(bank, 1) ^ _BIT(yBit,0); - break; - case 8: //xBit5, yBit4, yBit5 - xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); - yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2); - xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit,2); - break; - case 16: //xBit5, xBit6, yBit5, yBit6 - xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = b0 ^ y6 - xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = b1 ^ y5 ^ y6; - yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = b3 ^ x6; - yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = b2 ^ x5; - break; - default: - break; - } - } - else if (pTileInfo->macroAspectRatio == 8) - { - switch (pTileInfo->banks) - { - case 8: //yBit3, yBit4, yBit5 - xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); //x3 = b0 ^ y5; - xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit, 2);//x4 = b1 ^ y4 ^ y5; - xBit5 = _BIT(bank, 2) ^ _BIT(yBit,0); - break; - case 16: //xBit6, yBit4, yBit5, yBit6 - xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = y6 ^ b0 - xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = y5 ^ y6 ^ b1 - xBit5 = _BIT(bank, 2) ^ _BIT(yBit, 1);//x5 = y4 ^ b2 - yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3 - break; - default: - break; - } - } - - pOutput->xBits = xBit; - pOutput->yBits = yBit; - - pOutput->xBit3 = xBit3; - pOutput->xBit4 = xBit4; - pOutput->xBit5 = xBit5; - pOutput->yBit3 = yBit3; - pOutput->yBit4 = yBit4; - pOutput->yBit5 = yBit5; - pOutput->yBit6 = yBit6; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlExtractBankPipeSwizzle -* @brief -* Entry of EgBasedLib ExtractBankPipeSwizzle -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlExtractBankPipeSwizzle( - const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure - ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure - ) const -{ - ExtractBankPipeSwizzle(pIn->base256b, - pIn->pTileInfo, - &pOut->bankSwizzle, - &pOut->pipeSwizzle); - - return ADDR_OK; -} - - -/** -**************************************************************************************************** -* EgBasedLib::HwlCombineBankPipeSwizzle -* @brief -* Combine bank/pipe swizzle -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlCombineBankPipeSwizzle( - UINT_32 bankSwizzle, ///< [in] bank swizzle - UINT_32 pipeSwizzle, ///< [in] pipe swizzle - ADDR_TILEINFO* pTileInfo, ///< [in] tile info - UINT_64 baseAddr, ///< [in] base address - UINT_32* pTileSwizzle ///< [out] combined swizzle - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - if (pTileSwizzle) - { - *pTileSwizzle = GetBankPipeSwizzle(bankSwizzle, pipeSwizzle, baseAddr, pTileInfo); - } - else - { - retCode = ADDR_INVALIDPARAMS; - } - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeBaseSwizzle -* @brief -* Compute base swizzle -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlComputeBaseSwizzle( - const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, - ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut - ) const -{ - UINT_32 bankSwizzle = 0; - UINT_32 pipeSwizzle = 0; - ADDR_TILEINFO* pTileInfo = pIn->pTileInfo; - - ADDR_ASSERT(IsMacroTiled(pIn->tileMode)); - ADDR_ASSERT(pIn->pTileInfo); - - /// This is a legacy misreading of h/w doc, use it as it doesn't hurt. - static const UINT_8 bankRotationArray[4][16] = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // ADDR_SURF_2_BANK - { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // ADDR_SURF_4_BANK - { 0, 3, 6, 1, 4, 7, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0 }, // ADDR_SURF_8_BANK - { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 }, // ADDR_SURF_16_BANK - }; - - UINT_32 pipes = HwlGetPipes(pTileInfo); - (void)pipes; - UINT_32 banks = pTileInfo ? pTileInfo->banks : 2; - UINT_32 hwNumBanks; - - // Uses less bank swizzle bits - if (pIn->option.reduceBankBit && banks > 2) - { - banks >>= 1; - } - - switch (banks) - { - case 2: - hwNumBanks = 0; - break; - case 4: - hwNumBanks = 1; - break; - case 8: - hwNumBanks = 2; - break; - case 16: - hwNumBanks = 3; - break; - default: - ADDR_ASSERT_ALWAYS(); - hwNumBanks = 0; - break; - } - - if (pIn->option.genOption == ADDR_SWIZZLE_GEN_LINEAR) - { - bankSwizzle = pIn->surfIndex & (banks - 1); - } - else // (pIn->option.genOption == ADDR_SWIZZLE_GEN_DEFAULT) - { - bankSwizzle = bankRotationArray[hwNumBanks][pIn->surfIndex & (banks - 1)]; - } - - if (IsMacro3dTiled(pIn->tileMode)) - { - pipeSwizzle = pIn->surfIndex & (HwlGetPipes(pTileInfo) - 1); - } - - return HwlCombineBankPipeSwizzle(bankSwizzle, pipeSwizzle, pTileInfo, 0, &pOut->tileSwizzle); -} - -/** -**************************************************************************************************** -* EgBasedLib::ExtractBankPipeSwizzle -* @brief -* Extract bank/pipe swizzle from base256b -* @return -* N/A -**************************************************************************************************** -*/ -VOID EgBasedLib::ExtractBankPipeSwizzle( - UINT_32 base256b, ///< [in] input base256b register value - ADDR_TILEINFO* pTileInfo, ///< [in] 2D tile parameters. Client must provide all data - UINT_32* pBankSwizzle, ///< [out] bank swizzle - UINT_32* pPipeSwizzle ///< [out] pipe swizzle - ) const -{ - UINT_32 bankSwizzle = 0; - UINT_32 pipeSwizzle = 0; - - if (base256b != 0) - { - UINT_32 numPipes = HwlGetPipes(pTileInfo); - UINT_32 bankBits = QLog2(pTileInfo->banks); - UINT_32 pipeBits = QLog2(numPipes); - UINT_32 groupBytes = m_pipeInterleaveBytes; - UINT_32 bankInterleave = m_bankInterleave; - - pipeSwizzle = - (base256b / (groupBytes >> 8)) & ((1<> 8) / numPipes / bankInterleave) & ((1 << bankBits) - 1); - } - - *pPipeSwizzle = pipeSwizzle; - *pBankSwizzle = bankSwizzle; -} - -/** -**************************************************************************************************** -* EgBasedLib::GetBankPipeSwizzle -* @brief -* Combine bank/pipe swizzle -* @return -* Base256b bits (only filled bank/pipe bits) -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::GetBankPipeSwizzle( - UINT_32 bankSwizzle, ///< [in] bank swizzle - UINT_32 pipeSwizzle, ///< [in] pipe swizzle - UINT_64 baseAddr, ///< [in] base address - ADDR_TILEINFO* pTileInfo ///< [in] tile info - ) const -{ - UINT_32 pipeBits = QLog2(HwlGetPipes(pTileInfo)); - UINT_32 bankInterleaveBits = QLog2(m_bankInterleave); - UINT_32 tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits); - - baseAddr ^= tileSwizzle * m_pipeInterleaveBytes; - baseAddr >>= 8; - - return static_cast(baseAddr); -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeSliceTileSwizzle -* @brief -* Compute cubemap/3d texture faces/slices tile swizzle -* @return -* Tile swizzle -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::ComputeSliceTileSwizzle( - AddrTileMode tileMode, ///< [in] Tile mode - UINT_32 baseSwizzle, ///< [in] Base swizzle - UINT_32 slice, ///< [in] Slice index, Cubemap face index, 0 means +X - UINT_64 baseAddr, ///< [in] Base address - ADDR_TILEINFO* pTileInfo ///< [in] Bank structure - ) const -{ - UINT_32 tileSwizzle = 0; - - if (IsMacroTiled(tileMode)) // Swizzle only for macro tile mode - { - UINT_32 firstSlice = slice / Thickness(tileMode); - - UINT_32 numPipes = HwlGetPipes(pTileInfo); - UINT_32 numBanks = pTileInfo->banks; - - UINT_32 pipeRotation; - UINT_32 bankRotation; - - UINT_32 bankSwizzle = 0; - UINT_32 pipeSwizzle = 0; - - pipeRotation = ComputePipeRotation(tileMode, numPipes); - bankRotation = ComputeBankRotation(tileMode, numBanks, numPipes); - - if (baseSwizzle != 0) - { - ExtractBankPipeSwizzle(baseSwizzle, - pTileInfo, - &bankSwizzle, - &pipeSwizzle); - } - - if (pipeRotation == 0) //2D mode - { - bankSwizzle += firstSlice * bankRotation; - bankSwizzle %= numBanks; - } - else //3D mode - { - pipeSwizzle += firstSlice * pipeRotation; - pipeSwizzle %= numPipes; - bankSwizzle += firstSlice * bankRotation / numPipes; - bankSwizzle %= numBanks; - } - - tileSwizzle = GetBankPipeSwizzle(bankSwizzle, - pipeSwizzle, - baseAddr, - pTileInfo); - } - - return tileSwizzle; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeQbStereoRightSwizzle -* -* @brief -* Compute right eye swizzle -* @return -* swizzle -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::HwlComputeQbStereoRightSwizzle( - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo ///< [in] Surface info, must be valid - ) const -{ - UINT_32 bankBits = 0; - UINT_32 swizzle = 0; - - // The assumption is default swizzle for left eye is 0 - if (IsMacroTiled(pInfo->tileMode) && pInfo->pStereoInfo && pInfo->pTileInfo) - { - bankBits = ComputeBankFromCoord(0, pInfo->height, 0, - pInfo->tileMode, 0, 0, pInfo->pTileInfo); - - if (bankBits) - { - HwlCombineBankPipeSwizzle(bankBits, 0, pInfo->pTileInfo, 0, &swizzle); - } - } - - return swizzle; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeBankFromCoord -* -* @brief -* Compute bank number from coordinates -* @return -* Bank number -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::ComputeBankFromCoord( - UINT_32 x, ///< [in] x coordinate - UINT_32 y, ///< [in] y coordinate - UINT_32 slice, ///< [in] slice index - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bankSwizzle, ///< [in] bank swizzle - UINT_32 tileSplitSlice, ///< [in] If the size of the pixel offset is larger than the - /// tile split size, then the pixel will be moved to a separate - /// slice. This value equals pixelOffset / tileSplitBytes - /// in this case. Otherwise this is 0. - ADDR_TILEINFO* pTileInfo ///< [in] tile info - ) const -{ - UINT_32 pipes = HwlGetPipes(pTileInfo); - UINT_32 bankBit0 = 0; - UINT_32 bankBit1 = 0; - UINT_32 bankBit2 = 0; - UINT_32 bankBit3 = 0; - UINT_32 sliceRotation; - UINT_32 tileSplitRotation; - UINT_32 bank; - UINT_32 numBanks = pTileInfo->banks; - UINT_32 bankWidth = pTileInfo->bankWidth; - UINT_32 bankHeight = pTileInfo->bankHeight; - - UINT_32 tx = x / MicroTileWidth / (bankWidth * pipes); - UINT_32 ty = y / MicroTileHeight / bankHeight; - - UINT_32 x3 = _BIT(tx,0); - UINT_32 x4 = _BIT(tx,1); - UINT_32 x5 = _BIT(tx,2); - UINT_32 x6 = _BIT(tx,3); - UINT_32 y3 = _BIT(ty,0); - UINT_32 y4 = _BIT(ty,1); - UINT_32 y5 = _BIT(ty,2); - UINT_32 y6 = _BIT(ty,3); - - switch (numBanks) - { - case 16: - bankBit0 = x3 ^ y6; - bankBit1 = x4 ^ y5 ^ y6; - bankBit2 = x5 ^ y4; - bankBit3 = x6 ^ y3; - break; - case 8: - bankBit0 = x3 ^ y5; - bankBit1 = x4 ^ y4 ^ y5; - bankBit2 = x5 ^ y3; - break; - case 4: - bankBit0 = x3 ^ y4; - bankBit1 = x4 ^ y3; - break; - case 2: - bankBit0 = x3 ^ y3; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - - bank = bankBit0 | (bankBit1 << 1) | (bankBit2 << 2) | (bankBit3 << 3); - - //Bits2Number(4, bankBit3, bankBit2, bankBit1, bankBit0); - - bank = HwlPreAdjustBank((x / MicroTileWidth), bank, pTileInfo); - // - // Compute bank rotation for the slice. - // - UINT_32 microTileThickness = Thickness(tileMode); - - switch (tileMode) - { - case ADDR_TM_2D_TILED_THIN1: // fall through - case ADDR_TM_2D_TILED_THICK: // fall through - case ADDR_TM_2D_TILED_XTHICK: - sliceRotation = ((numBanks / 2) - 1) * (slice / microTileThickness); - break; - case ADDR_TM_3D_TILED_THIN1: // fall through - case ADDR_TM_3D_TILED_THICK: // fall through - case ADDR_TM_3D_TILED_XTHICK: - sliceRotation = - Max(1u, (pipes / 2) - 1) * (slice / microTileThickness) / pipes; - break; - default: - sliceRotation = 0; - break; - } - - - // - // Compute bank rotation for the tile split slice. - // - // The sample slice will be non-zero if samples must be split across multiple slices. - // This situation arises when the micro tile size multiplied yBit the number of samples exceeds - // the split size (set in GB_ADDR_CONFIG). - // - switch (tileMode) - { - case ADDR_TM_2D_TILED_THIN1: //fall through - case ADDR_TM_3D_TILED_THIN1: //fall through - case ADDR_TM_PRT_2D_TILED_THIN1: //fall through - case ADDR_TM_PRT_3D_TILED_THIN1: //fall through - tileSplitRotation = ((numBanks / 2) + 1) * tileSplitSlice; - break; - default: - tileSplitRotation = 0; - break; - } - - // - // Apply bank rotation for the slice and tile split slice. - // - bank ^= bankSwizzle + sliceRotation; - bank ^= tileSplitRotation; - - bank &= (numBanks - 1); - - return bank; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeBankFromAddr -* -* @brief -* Compute the bank number from an address -* @return -* Bank number -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::ComputeBankFromAddr( - UINT_64 addr, ///< [in] address - UINT_32 numBanks, ///< [in] number of banks - UINT_32 numPipes ///< [in] number of pipes - ) const -{ - UINT_32 bank; - - // - // The LSBs of the address are arranged as follows: - // bank | bankInterleave | pipe | pipeInterleave - // - // To get the bank number, shift off the pipe interleave, pipe, and bank interlave bits and - // mask the bank bits. - // - bank = static_cast( - (addr >> Log2(m_pipeInterleaveBytes * numPipes * m_bankInterleave)) & - (numBanks - 1) - ); - - return bank; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputePipeRotation -* -* @brief -* Compute pipe rotation value -* @return -* Pipe rotation -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::ComputePipeRotation( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 numPipes ///< [in] number of pipes - ) const -{ - UINT_32 rotation; - - switch (tileMode) - { - case ADDR_TM_3D_TILED_THIN1: //fall through - case ADDR_TM_3D_TILED_THICK: //fall through - case ADDR_TM_3D_TILED_XTHICK: //fall through - case ADDR_TM_PRT_3D_TILED_THIN1: //fall through - case ADDR_TM_PRT_3D_TILED_THICK: - rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1); - break; - default: - rotation = 0; - } - - return rotation; -} - - - -/** -**************************************************************************************************** -* EgBasedLib::ComputeBankRotation -* -* @brief -* Compute bank rotation value -* @return -* Bank rotation -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::ComputeBankRotation( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 numBanks, ///< [in] number of banks - UINT_32 numPipes ///< [in] number of pipes - ) const -{ - UINT_32 rotation; - - switch (tileMode) - { - case ADDR_TM_2D_TILED_THIN1: // fall through - case ADDR_TM_2D_TILED_THICK: // fall through - case ADDR_TM_2D_TILED_XTHICK: - case ADDR_TM_PRT_2D_TILED_THIN1: - case ADDR_TM_PRT_2D_TILED_THICK: - // Rotate banks per Z-slice yBit 1 for 4-bank or 3 for 8-bank - rotation = numBanks / 2 - 1; - break; - case ADDR_TM_3D_TILED_THIN1: // fall through - case ADDR_TM_3D_TILED_THICK: // fall through - case ADDR_TM_3D_TILED_XTHICK: - case ADDR_TM_PRT_3D_TILED_THIN1: - case ADDR_TM_PRT_3D_TILED_THICK: - rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1); // rotate pipes & banks - break; - default: - rotation = 0; - } - - return rotation; -} - - -/** -**************************************************************************************************** -* EgBasedLib::ComputeHtileBytes -* -* @brief -* Compute htile size in bytes -* -* @return -* Htile size in bytes -**************************************************************************************************** -*/ -UINT_64 EgBasedLib::ComputeHtileBytes( - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 bpp, ///< [in] bits per pixel - BOOL_32 isLinear, ///< [in] if it is linear mode - UINT_32 numSlices, ///< [in] number of slices - UINT_64* sliceBytes, ///< [out] bytes per slice - UINT_32 baseAlign ///< [in] base alignments - ) const -{ - UINT_64 surfBytes; - - const UINT_64 HtileCacheLineSize = BITS_TO_BYTES(HtileCacheBits); - - *sliceBytes = BITS_TO_BYTES(static_cast(pitch) * height * bpp / 64); - - if (m_configFlags.useHtileSliceAlign) - { - // Align the sliceSize to htilecachelinesize * pipes at first - *sliceBytes = PowTwoAlign(*sliceBytes, HtileCacheLineSize * m_pipes); - surfBytes = *sliceBytes * numSlices; - } - else - { - // Align the surfSize to htilecachelinesize * pipes at last - surfBytes = *sliceBytes * numSlices; - surfBytes = PowTwoAlign(surfBytes, HtileCacheLineSize * m_pipes); - } - - return surfBytes; -} - -/** -**************************************************************************************************** -* EgBasedLib::DispatchComputeFmaskInfo -* -* @brief -* Compute fmask sizes include padded pitch, height, slices, total size in bytes, -* meanwhile output suitable tile mode and alignments as well. Results are returned -* through output parameters. -* -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::DispatchComputeFmaskInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) ///< [out] output structure -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - ADDR_COMPUTE_SURFACE_INFO_INPUT surfIn = {0}; - ADDR_COMPUTE_SURFACE_INFO_OUTPUT surfOut = {0}; - - // Setup input structure - surfIn.tileMode = pIn->tileMode; - surfIn.width = pIn->pitch; - surfIn.height = pIn->height; - surfIn.numSlices = pIn->numSlices; - surfIn.pTileInfo = pIn->pTileInfo; - surfIn.tileType = ADDR_NON_DISPLAYABLE; - surfIn.flags.fmask = 1; - - // Setup output structure - surfOut.pTileInfo = pOut->pTileInfo; - - // Setup hwl specific fields - HwlFmaskPreThunkSurfInfo(pIn, pOut, &surfIn, &surfOut); - - surfIn.bpp = HwlComputeFmaskBits(pIn, &surfIn.numSamples); - - // ComputeSurfaceInfo needs numSamples in surfOut as surface routines need adjusted numSamples - surfOut.numSamples = surfIn.numSamples; - - retCode = HwlComputeSurfaceInfo(&surfIn, &surfOut); - - // Save bpp field for surface dump support - surfOut.bpp = surfIn.bpp; - - if (retCode == ADDR_OK) - { - pOut->bpp = surfOut.bpp; - pOut->pitch = surfOut.pitch; - pOut->height = surfOut.height; - pOut->numSlices = surfOut.depth; - pOut->fmaskBytes = surfOut.surfSize; - pOut->baseAlign = surfOut.baseAlign; - pOut->pitchAlign = surfOut.pitchAlign; - pOut->heightAlign = surfOut.heightAlign; - - if (surfOut.depth > 1) - { - // For fmask, expNumSlices is stored in depth. - pOut->sliceSize = surfOut.surfSize / surfOut.depth; - } - else - { - pOut->sliceSize = surfOut.surfSize; - } - - // Save numSamples field for surface dump support - pOut->numSamples = surfOut.numSamples; - - HwlFmaskPostThunkSurfInfo(&surfOut, pOut); - } - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlFmaskSurfaceInfo -* @brief -* Entry of EgBasedLib ComputeFmaskInfo -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure - ) -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - ADDR_TILEINFO tileInfo = {0}; - - // Use internal tile info if pOut does not have a valid pTileInfo - if (pOut->pTileInfo == NULL) - { - pOut->pTileInfo = &tileInfo; - } - - retCode = DispatchComputeFmaskInfo(pIn, pOut); - - if (retCode == ADDR_OK) - { - pOut->tileIndex = - HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE, - pOut->tileIndex); - } - - // Resets pTileInfo to NULL if the internal tile info is used - if (pOut->pTileInfo == &tileInfo) - { - pOut->pTileInfo = NULL; - } - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeFmaskAddrFromCoord -* @brief -* Entry of EgBasedLib ComputeFmaskAddrFromCoord -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskAddrFromCoord( - const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeFmaskCoordFromAddr -* @brief -* Entry of EgBasedLib ComputeFmaskCoordFromAddr -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskCoordFromAddr( - const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeFmaskNumPlanesFromNumSamples -* -* @brief -* Compute fmask number of planes from number of samples -* -* @return -* Number of planes -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::ComputeFmaskNumPlanesFromNumSamples( - UINT_32 numSamples) ///< [in] number of samples -{ - UINT_32 numPlanes; - - // - // FMASK is stored such that each micro tile is composed of elements containing N bits, where - // N is the number of samples. There is a micro tile for each bit in the FMASK address, and - // micro tiles for each address bit, sometimes referred to as a plane, are stored sequentially. - // The FMASK for a 2-sample surface looks like a general surface with 2 bits per element. - // The FMASK for a 4-sample surface looks like a general surface with 4 bits per element and - // 2 samples. The FMASK for an 8-sample surface looks like a general surface with 8 bits per - // element and 4 samples. R6xx and R7xx only stored 3 planes for 8-sample FMASK surfaces. - // This was changed for R8xx to simplify the logic in the CB. - // - switch (numSamples) - { - case 2: - numPlanes = 1; - break; - case 4: - numPlanes = 2; - break; - case 8: - numPlanes = 4; - break; - default: - ADDR_UNHANDLED_CASE(); - numPlanes = 0; - break; - } - return numPlanes; -} - -/** -**************************************************************************************************** -* EgBasedLib::ComputeFmaskResolvedBppFromNumSamples -* -* @brief -* Compute resolved fmask effective bpp based on number of samples -* -* @return -* bpp -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::ComputeFmaskResolvedBppFromNumSamples( - UINT_32 numSamples) ///< number of samples -{ - UINT_32 bpp; - - // - // Resolved FMASK surfaces are generated yBit the CB and read yBit the texture unit - // so that the texture unit can read compressed multi-sample color data. - // These surfaces store each index value packed per element. - // Each element contains at least num_samples * log2(num_samples) bits. - // Resolved FMASK surfaces are addressed as follows: - // 2-sample Addressed similarly to a color surface with 8 bits per element and 1 sample. - // 4-sample Addressed similarly to a color surface with 8 bits per element and 1 sample. - // 8-sample Addressed similarly to a color surface with 32 bits per element and 1 sample. - - switch (numSamples) - { - case 2: - bpp = 8; - break; - case 4: - bpp = 8; - break; - case 8: - bpp = 32; - break; - default: - ADDR_UNHANDLED_CASE(); - bpp = 0; - break; - } - return bpp; -} - -/** -**************************************************************************************************** -* EgBasedLib::IsTileInfoAllZero -* -* @brief -* Return TRUE if all field are zero -* @note -* Since NULL input is consider to be all zero -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::IsTileInfoAllZero( - const ADDR_TILEINFO* pTileInfo) -{ - BOOL_32 allZero = TRUE; - - if (pTileInfo) - { - if ((pTileInfo->banks != 0) || - (pTileInfo->bankWidth != 0) || - (pTileInfo->bankHeight != 0) || - (pTileInfo->macroAspectRatio != 0) || - (pTileInfo->tileSplitBytes != 0) || - (pTileInfo->pipeConfig != 0) - ) - { - allZero = FALSE; - } - } - - return allZero; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlTileInfoEqual -* -* @brief -* Return TRUE if all field are equal -* @note -* Only takes care of current HWL's data -**************************************************************************************************** -*/ -BOOL_32 EgBasedLib::HwlTileInfoEqual( - const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand - const ADDR_TILEINFO* pRight ///<[in] Right compare operand - ) const -{ - BOOL_32 equal = FALSE; - - if (pLeft->banks == pRight->banks && - pLeft->bankWidth == pRight->bankWidth && - pLeft->bankHeight == pRight->bankHeight && - pLeft->macroAspectRatio == pRight->macroAspectRatio && - pLeft->tileSplitBytes == pRight->tileSplitBytes) - { - equal = TRUE; - } - - return equal; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlConvertTileInfoToHW -* @brief -* Entry of EgBasedLib ConvertTileInfoToHW -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlConvertTileInfoToHW( - const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure - ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - ADDR_TILEINFO *pTileInfoIn = pIn->pTileInfo; - ADDR_TILEINFO *pTileInfoOut = pOut->pTileInfo; - - if ((pTileInfoIn != NULL) && (pTileInfoOut != NULL)) - { - if (pIn->reverse == FALSE) - { - switch (pTileInfoIn->banks) - { - case 2: - pTileInfoOut->banks = 0; - break; - case 4: - pTileInfoOut->banks = 1; - break; - case 8: - pTileInfoOut->banks = 2; - break; - case 16: - pTileInfoOut->banks = 3; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->banks = 0; - break; - } - - switch (pTileInfoIn->bankWidth) - { - case 1: - pTileInfoOut->bankWidth = 0; - break; - case 2: - pTileInfoOut->bankWidth = 1; - break; - case 4: - pTileInfoOut->bankWidth = 2; - break; - case 8: - pTileInfoOut->bankWidth = 3; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->bankWidth = 0; - break; - } - - switch (pTileInfoIn->bankHeight) - { - case 1: - pTileInfoOut->bankHeight = 0; - break; - case 2: - pTileInfoOut->bankHeight = 1; - break; - case 4: - pTileInfoOut->bankHeight = 2; - break; - case 8: - pTileInfoOut->bankHeight = 3; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->bankHeight = 0; - break; - } - - switch (pTileInfoIn->macroAspectRatio) - { - case 1: - pTileInfoOut->macroAspectRatio = 0; - break; - case 2: - pTileInfoOut->macroAspectRatio = 1; - break; - case 4: - pTileInfoOut->macroAspectRatio = 2; - break; - case 8: - pTileInfoOut->macroAspectRatio = 3; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->macroAspectRatio = 0; - break; - } - - switch (pTileInfoIn->tileSplitBytes) - { - case 64: - pTileInfoOut->tileSplitBytes = 0; - break; - case 128: - pTileInfoOut->tileSplitBytes = 1; - break; - case 256: - pTileInfoOut->tileSplitBytes = 2; - break; - case 512: - pTileInfoOut->tileSplitBytes = 3; - break; - case 1024: - pTileInfoOut->tileSplitBytes = 4; - break; - case 2048: - pTileInfoOut->tileSplitBytes = 5; - break; - case 4096: - pTileInfoOut->tileSplitBytes = 6; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->tileSplitBytes = 0; - break; - } - } - else - { - switch (pTileInfoIn->banks) - { - case 0: - pTileInfoOut->banks = 2; - break; - case 1: - pTileInfoOut->banks = 4; - break; - case 2: - pTileInfoOut->banks = 8; - break; - case 3: - pTileInfoOut->banks = 16; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->banks = 2; - break; - } - - switch (pTileInfoIn->bankWidth) - { - case 0: - pTileInfoOut->bankWidth = 1; - break; - case 1: - pTileInfoOut->bankWidth = 2; - break; - case 2: - pTileInfoOut->bankWidth = 4; - break; - case 3: - pTileInfoOut->bankWidth = 8; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->bankWidth = 1; - break; - } - - switch (pTileInfoIn->bankHeight) - { - case 0: - pTileInfoOut->bankHeight = 1; - break; - case 1: - pTileInfoOut->bankHeight = 2; - break; - case 2: - pTileInfoOut->bankHeight = 4; - break; - case 3: - pTileInfoOut->bankHeight = 8; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->bankHeight = 1; - break; - } - - switch (pTileInfoIn->macroAspectRatio) - { - case 0: - pTileInfoOut->macroAspectRatio = 1; - break; - case 1: - pTileInfoOut->macroAspectRatio = 2; - break; - case 2: - pTileInfoOut->macroAspectRatio = 4; - break; - case 3: - pTileInfoOut->macroAspectRatio = 8; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->macroAspectRatio = 1; - break; - } - - switch (pTileInfoIn->tileSplitBytes) - { - case 0: - pTileInfoOut->tileSplitBytes = 64; - break; - case 1: - pTileInfoOut->tileSplitBytes = 128; - break; - case 2: - pTileInfoOut->tileSplitBytes = 256; - break; - case 3: - pTileInfoOut->tileSplitBytes = 512; - break; - case 4: - pTileInfoOut->tileSplitBytes = 1024; - break; - case 5: - pTileInfoOut->tileSplitBytes = 2048; - break; - case 6: - pTileInfoOut->tileSplitBytes = 4096; - break; - default: - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - pTileInfoOut->tileSplitBytes = 64; - break; - } - } - - if (pTileInfoIn != pTileInfoOut) - { - pTileInfoOut->pipeConfig = pTileInfoIn->pipeConfig; - } - } - else - { - ADDR_ASSERT_ALWAYS(); - retCode = ADDR_INVALIDPARAMS; - } - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeSurfaceInfo -* @brief -* Entry of EgBasedLib ComputeSurfaceInfo -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - if (pIn->numSamples < pIn->numFrags) - { - retCode = ADDR_INVALIDPARAMS; - } - - ADDR_TILEINFO tileInfo = {0}; - - if (retCode == ADDR_OK) - { - // Uses internal tile info if pOut does not have a valid pTileInfo - if (pOut->pTileInfo == NULL) - { - pOut->pTileInfo = &tileInfo; - } - - if (DispatchComputeSurfaceInfo(pIn, pOut) == FALSE) - { - retCode = ADDR_INVALIDPARAMS; - } - - // In case client uses tile info as input and would like to calculate a correct size and - // alignment together with tile info as output when the tile info is not suppose to have any - // matching indices in tile mode tables. - if (pIn->flags.skipIndicesOutput == FALSE) - { - // Returns an index - pOut->tileIndex = HwlPostCheckTileIndex(pOut->pTileInfo, - pOut->tileMode, - pOut->tileType, - pOut->tileIndex); - - if (IsMacroTiled(pOut->tileMode) && (pOut->macroModeIndex == TileIndexInvalid)) - { - pOut->macroModeIndex = HwlComputeMacroModeIndex(pOut->tileIndex, - pIn->flags, - pIn->bpp, - pIn->numSamples, - pOut->pTileInfo); - } - } - - // Resets pTileInfo to NULL if the internal tile info is used - if (pOut->pTileInfo == &tileInfo) - { -#if DEBUG - // Client does not pass in a valid pTileInfo - if (IsMacroTiled(pOut->tileMode)) - { - // If a valid index is returned, then no pTileInfo is okay - ADDR_ASSERT((m_configFlags.useTileIndex == FALSE) || - (pOut->tileIndex != TileIndexInvalid)); - - if (IsTileInfoAllZero(pIn->pTileInfo) == FALSE) - { - // The initial value of pIn->pTileInfo is copied to tileInfo - // We do not expect any of these value to be changed nor any 0 of inputs - ADDR_ASSERT(tileInfo.banks == pIn->pTileInfo->banks); - ADDR_ASSERT(tileInfo.bankWidth == pIn->pTileInfo->bankWidth); - ADDR_ASSERT(tileInfo.bankHeight == pIn->pTileInfo->bankHeight); - ADDR_ASSERT(tileInfo.macroAspectRatio == pIn->pTileInfo->macroAspectRatio); - ADDR_ASSERT(tileInfo.tileSplitBytes == pIn->pTileInfo->tileSplitBytes); - } - } -#endif - pOut->pTileInfo = NULL; - } - } - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeSurfaceAddrFromCoord -* @brief -* Entry of EgBasedLib ComputeSurfaceAddrFromCoord -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceAddrFromCoord( - const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - if ( -#if !ALT_TEST // Overflow test needs this out-of-boundary coord - (pIn->x > pIn->pitch) || - (pIn->y > pIn->height) || -#endif - (pIn->numSamples > m_maxSamples)) - { - retCode = ADDR_INVALIDPARAMS; - } - else - { - pOut->addr = DispatchComputeSurfaceAddrFromCoord(pIn, pOut); - } - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeSurfaceCoordFromAddr -* @brief -* Entry of EgBasedLib ComputeSurfaceCoordFromAddr -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceCoordFromAddr( - const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - if ((pIn->bitPosition >= 8) || - (pIn->numSamples > m_maxSamples)) - { - retCode = ADDR_INVALIDPARAMS; - } - else - { - DispatchComputeSurfaceCoordFromAddr(pIn, pOut); - } - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeSliceTileSwizzle -* @brief -* Entry of EgBasedLib ComputeSurfaceCoordFromAddr -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE EgBasedLib::HwlComputeSliceTileSwizzle( - const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - if (pIn->pTileInfo && (pIn->pTileInfo->banks > 0)) - { - - pOut->tileSwizzle = ComputeSliceTileSwizzle(pIn->tileMode, - pIn->baseSwizzle, - pIn->slice, - pIn->baseAddr, - pIn->pTileInfo); - } - else - { - retCode = ADDR_INVALIDPARAMS; - } - - return retCode; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeHtileBpp -* -* @brief -* Compute htile bpp -* -* @return -* Htile bpp -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::HwlComputeHtileBpp( - BOOL_32 isWidth8, ///< [in] TRUE if block width is 8 - BOOL_32 isHeight8 ///< [in] TRUE if block height is 8 - ) const -{ - // only support 8x8 mode - ADDR_ASSERT(isWidth8 && isHeight8); - return 32; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlComputeHtileBaseAlign -* -* @brief -* Compute htile base alignment -* -* @return -* Htile base alignment -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::HwlComputeHtileBaseAlign( - BOOL_32 isTcCompatible, ///< [in] if TC compatible - BOOL_32 isLinear, ///< [in] if it is linear mode - ADDR_TILEINFO* pTileInfo ///< [in] Tile info - ) const -{ - UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo); - - if (isTcCompatible) - { - ADDR_ASSERT(pTileInfo != NULL); - if (pTileInfo) - { - baseAlign *= pTileInfo->banks; - } - } - - return baseAlign; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlGetPitchAlignmentMicroTiled -* -* @brief -* Compute 1D tiled surface pitch alignment, calculation results are returned through -* output parameters. -* -* @return -* pitch alignment -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::HwlGetPitchAlignmentMicroTiled( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 numSamples ///< [in] number of samples - ) const -{ - UINT_32 pitchAlign; - - UINT_32 microTileThickness = Thickness(tileMode); - - UINT_32 pixelsPerMicroTile; - UINT_32 pixelsPerPipeInterleave; - UINT_32 microTilesPerPipeInterleave; - - // - // Special workaround for depth/stencil buffer, use 8 bpp to meet larger requirement for - // stencil buffer since pitch alignment is related to bpp. - // For a depth only buffer do not set this. - // - // Note: this actually does not work for mipmap but mipmap depth texture is not really - // sampled with mipmap. - // - if (flags.depth && (flags.noStencil == FALSE)) - { - bpp = 8; - } - - pixelsPerMicroTile = MicroTilePixels * microTileThickness; - pixelsPerPipeInterleave = BYTES_TO_BITS(m_pipeInterleaveBytes) / (bpp * numSamples); - microTilesPerPipeInterleave = pixelsPerPipeInterleave / pixelsPerMicroTile; - - pitchAlign = Max(MicroTileWidth, microTilesPerPipeInterleave * MicroTileWidth); - - return pitchAlign; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlGetSizeAdjustmentMicroTiled -* -* @brief -* Adjust 1D tiled surface pitch and slice size -* -* @return -* Logical slice size in bytes -**************************************************************************************************** -*/ -UINT_64 EgBasedLib::HwlGetSizeAdjustmentMicroTiled( - UINT_32 thickness, ///< [in] thickness - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 numSamples, ///< [in] number of samples - UINT_32 baseAlign, ///< [in] base alignment - UINT_32 pitchAlign, ///< [in] pitch alignment - UINT_32* pPitch, ///< [in,out] pointer to pitch - UINT_32* pHeight ///< [in,out] pointer to height - ) const -{ - UINT_64 logicalSliceSize; - MAYBE_UNUSED UINT_64 physicalSliceSize; - - UINT_32 pitch = *pPitch; - UINT_32 height = *pHeight; - - // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1) - logicalSliceSize = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples); - - // Physical slice: multiplied by thickness - physicalSliceSize = logicalSliceSize * thickness; - - // - // R800 will always pad physical slice size to baseAlign which is pipe_interleave_bytes - // - ADDR_ASSERT((physicalSliceSize % baseAlign) == 0); - - return logicalSliceSize; -} - -/** -**************************************************************************************************** -* EgBasedLib::HwlStereoCheckRightOffsetPadding -* -* @brief -* check if the height needs extra padding for stereo right eye offset, to avoid swizzling -* -* @return -* TRUE is the extra padding is needed -* -**************************************************************************************************** -*/ -UINT_32 EgBasedLib::HwlStereoCheckRightOffsetPadding( - ADDR_TILEINFO* pTileInfo ///< Tiling info - ) const -{ - UINT_32 stereoHeightAlign = 0; - - if (pTileInfo->macroAspectRatio > 2) - { - // Since 3D rendering treats right eye surface starting from y == "eye height" while - // display engine treats it to be 0, so the bank bits may be different. - // Additional padding in height is required to make sure it's possible - // to achieve synonym by adjusting bank swizzle of right eye surface. - - static const UINT_32 StereoAspectRatio = 2; - stereoHeightAlign = pTileInfo->banks * - pTileInfo->bankHeight * - MicroTileHeight / - StereoAspectRatio; - } - - return stereoHeightAlign; -} - -} // V1 -} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/r800/egbaddrlib.h mesa-19.0.1/src/amd/addrlib/r800/egbaddrlib.h --- mesa-18.3.3/src/amd/addrlib/r800/egbaddrlib.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/r800/egbaddrlib.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,430 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file egbaddrlib.h -* @brief Contains the EgBasedLib class definition. -**************************************************************************************************** -*/ - -#ifndef __EG_BASED_ADDR_LIB_H__ -#define __EG_BASED_ADDR_LIB_H__ - -#include "addrlib1.h" - -namespace Addr -{ -namespace V1 -{ -/// Structures for functions -struct CoordFromBankPipe -{ - UINT_32 xBits : 3; - UINT_32 yBits : 4; - - UINT_32 xBit3 : 1; - UINT_32 xBit4 : 1; - UINT_32 xBit5 : 1; - UINT_32 yBit3 : 1; - UINT_32 yBit4 : 1; - UINT_32 yBit5 : 1; - UINT_32 yBit6 : 1; -}; - -/** -**************************************************************************************************** -* @brief This class is the Evergreen based address library -* @note Abstract class -**************************************************************************************************** -*/ -class EgBasedLib : public Lib -{ -protected: - EgBasedLib(const Client* pClient); - virtual ~EgBasedLib(); - -public: - - /// Surface info functions - - // NOTE: DispatchComputeSurfaceInfo using TileInfo takes both an input and an output. - // On input: - // One or more fields may be 0 to be calculated/defaulted - pre-SI h/w. - // H/W using tile mode index only accepts none or all 0's - SI and newer h/w. - // It then returns the actual tiling configuration used. - // Other methods' TileInfo must be valid on entry - BOOL_32 DispatchComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - ADDR_E_RETURNCODE DispatchComputeFmaskInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); - -protected: - // Hwl interface - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord( - const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr( - const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle( - const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, - ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle( - const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, - ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle( - UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO* pTileInfo, - UINT_64 baseAddr, UINT_32* pTileSwizzle) const; - - virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle( - const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, - ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW( - const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, - ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const; - - virtual UINT_32 HwlComputeHtileBpp( - BOOL_32 isWidth8, BOOL_32 isHeight8) const; - - virtual UINT_32 HwlComputeHtileBaseAlign( - BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const; - - virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); - - virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord( - const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr( - const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; - - virtual BOOL_32 HwlGetAlignmentInfoMacroTiled( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const; - - virtual UINT_32 HwlComputeQbStereoRightSwizzle( - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo) const; - - virtual VOID HwlComputePixelCoordFromOffset( - UINT_32 offset, UINT_32 bpp, UINT_32 numSamples, - AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, - UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, - AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const; - - /// Return Cmask block max - virtual BOOL_32 HwlGetMaxCmaskBlockMax() const - { - return 0x3FFF; // 14 bits, 0n16383 - } - - // Sub-hwl interface - /// Pure virtual function to setup tile info (indices) if client requests to do so - virtual VOID HwlSetupTileInfo( - AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags, - UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, - ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo, - AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; - - /// Pure virtual function to get pitch alignment for linear modes - virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const = 0; - - /// Pure virtual function to get size adjustment for linear modes - virtual UINT_64 HwlGetSizeAdjustmentLinear( - AddrTileMode tileMode, - UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign, - UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const = 0; - - virtual UINT_32 HwlGetPitchAlignmentMicroTiled( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const; - - virtual UINT_64 HwlGetSizeAdjustmentMicroTiled( - UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, - UINT_32 baseAlign, UINT_32 pitchAlign, - UINT_32 *pPitch, UINT_32 *pHeight) const; - - /// Pure virtual function to do extra sanity check - virtual BOOL_32 HwlSanityCheckMacroTiled( - ADDR_TILEINFO* pTileInfo) const = 0; - - /// Pure virtual function to check current level to be the last macro tiled one - virtual VOID HwlCheckLastMacroTiledLvl( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; - - /// Adjusts bank before bank is modified by rotation - virtual UINT_32 HwlPreAdjustBank( - UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const = 0; - - virtual VOID HwlComputeSurfaceCoord2DFromBankPipe( - AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice, - UINT_32 bank, UINT_32 pipe, - UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices, - BOOL_32 ignoreSE, - ADDR_TILEINFO* pTileInfo) const = 0; - - virtual BOOL_32 HwlTileInfoEqual( - const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const; - - virtual AddrTileMode HwlDegradeThickTileMode( - AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; - - virtual INT_32 HwlPostCheckTileIndex( - const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type, - INT curIndex = TileIndexInvalid) const - { - return TileIndexInvalid; - } - - virtual VOID HwlFmaskPreThunkSurfInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, - const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, - ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const - { - } - - virtual VOID HwlFmaskPostThunkSurfInfo( - const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const - { - } - - virtual UINT_32 HwlStereoCheckRightOffsetPadding(ADDR_TILEINFO* pTileInfo) const; - - virtual BOOL_32 HwlReduceBankWidthHeight( - UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, - UINT_32 bankHeightAlign, UINT_32 pipes, - ADDR_TILEINFO* pTileInfo) const; - - // Protected non-virtual functions - - /// Mip level functions - AddrTileMode ComputeSurfaceMipLevelTileMode( - AddrTileMode baseTileMode, UINT_32 bpp, - UINT_32 pitch, UINT_32 height, UINT_32 numSlices, UINT_32 numSamples, - UINT_32 pitchAlign, UINT_32 heightAlign, - ADDR_TILEINFO* pTileInfo) const; - - /// Swizzle functions - VOID ExtractBankPipeSwizzle( - UINT_32 base256b, ADDR_TILEINFO* pTileInfo, - UINT_32* pBankSwizzle, UINT_32* pPipeSwizzle) const; - - UINT_32 GetBankPipeSwizzle( - UINT_32 bankSwizzle, UINT_32 pipeSwizzle, - UINT_64 baseAddr, ADDR_TILEINFO* pTileInfo) const; - - UINT_32 ComputeSliceTileSwizzle( - AddrTileMode tileMode, UINT_32 baseSwizzle, UINT_32 slice, UINT_64 baseAddr, - ADDR_TILEINFO* pTileInfo) const; - - /// Addressing functions - virtual ADDR_E_RETURNCODE ComputeBankEquation( - UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, - ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const - { - return ADDR_NOTSUPPORTED; - } - - UINT_32 ComputeBankFromCoord( - UINT_32 x, UINT_32 y, UINT_32 slice, - AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice, - ADDR_TILEINFO* pTileInfo) const; - - UINT_32 ComputeBankFromAddr( - UINT_64 addr, UINT_32 numBanks, UINT_32 numPipes) const; - - UINT_32 ComputePipeRotation( - AddrTileMode tileMode, UINT_32 numPipes) const; - - UINT_32 ComputeBankRotation( - AddrTileMode tileMode, UINT_32 numBanks, - UINT_32 numPipes) const; - - VOID ComputeSurfaceCoord2DFromBankPipe( - AddrTileMode tileMode, UINT_32 x, UINT_32 y, UINT_32 slice, - UINT_32 bank, UINT_32 pipe, - UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices, - ADDR_TILEINFO* pTileInfo, - CoordFromBankPipe *pOutput) const; - - /// Htile/Cmask functions - UINT_64 ComputeHtileBytes( - UINT_32 pitch, UINT_32 height, UINT_32 bpp, - BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const; - - ADDR_E_RETURNCODE ComputeMacroTileEquation( - UINT_32 log2BytesPP, AddrTileMode tileMode, AddrTileType microTileType, - ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; - - // Static functions - static BOOL_32 IsTileInfoAllZero(const ADDR_TILEINFO* pTileInfo); - static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples); - static UINT_32 ComputeFmaskResolvedBppFromNumSamples(UINT_32 numSamples); - - virtual VOID HwlComputeSurfaceAlignmentsMacroTiled( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, - UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const - { - } - -private: - - BOOL_32 ComputeSurfaceInfoLinear( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, - UINT_32 padDims) const; - - BOOL_32 ComputeSurfaceInfoMicroTiled( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, - UINT_32 padDims, - AddrTileMode expTileMode) const; - - BOOL_32 ComputeSurfaceInfoMacroTiled( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, - UINT_32 padDims, - AddrTileMode expTileMode) const; - - BOOL_32 ComputeSurfaceAlignmentsLinear( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, - UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const; - - BOOL_32 ComputeSurfaceAlignmentsMicroTiled( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, - UINT_32 mipLevel, UINT_32 numSamples, - UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const; - - BOOL_32 ComputeSurfaceAlignmentsMacroTiled( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, - UINT_32 mipLevel, UINT_32 numSamples, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - /// Surface addressing functions - UINT_64 DispatchComputeSurfaceAddrFromCoord( - const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; - - VOID DispatchComputeSurfaceCoordFromAddr( - const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; - - UINT_64 ComputeSurfaceAddrFromCoordMicroTiled( - UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, - UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, - AddrTileMode tileMode, - AddrTileType microTileType, BOOL_32 isDepthSampleOrder, - UINT_32* pBitPosition) const; - - UINT_64 ComputeSurfaceAddrFromCoordMacroTiled( - UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, - UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, - AddrTileMode tileMode, - AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder, - UINT_32 pipeSwizzle, UINT_32 bankSwizzle, - ADDR_TILEINFO* pTileInfo, - UINT_32* pBitPosition) const; - - VOID ComputeSurfaceCoordFromAddrMacroTiled( - UINT_64 addr, UINT_32 bitPosition, - UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, - AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, - AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder, - UINT_32 pipeSwizzle, UINT_32 bankSwizzle, - ADDR_TILEINFO* pTileInfo, - UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const; - - /// Fmask functions - UINT_64 DispatchComputeFmaskAddrFromCoord( - const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, - ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; - - VOID DispatchComputeFmaskCoordFromAddr( - const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, - ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; - - // FMASK related methods - private - UINT_64 ComputeFmaskAddrFromCoordMicroTiled( - UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane, - UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode, - BOOL_32 resolved, UINT_32* pBitPosition) const; - - VOID ComputeFmaskCoordFromAddrMicroTiled( - UINT_64 addr, UINT_32 bitPosition, - UINT_32 pitch, UINT_32 height, UINT_32 numSamples, - AddrTileMode tileMode, BOOL_32 resolved, - UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const; - - VOID ComputeFmaskCoordFromAddrMacroTiled( - UINT_64 addr, UINT_32 bitPosition, - UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode, - UINT_32 pipeSwizzle, UINT_32 bankSwizzle, - BOOL_32 ignoreSE, - ADDR_TILEINFO* pTileInfo, - BOOL_32 resolved, - UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const; - - UINT_64 ComputeFmaskAddrFromCoordMacroTiled( - UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane, - UINT_32 pitch, UINT_32 height, UINT_32 numSamples, - AddrTileMode tileMode, UINT_32 pipeSwizzle, UINT_32 bankSwizzle, - BOOL_32 ignoreSE, - ADDR_TILEINFO* pTileInfo, - BOOL_32 resolved, - UINT_32* pBitPosition) const; - - /// Sanity check functions - BOOL_32 SanityCheckMacroTiled( - ADDR_TILEINFO* pTileInfo) const; - -protected: - UINT_32 m_ranks; ///< Number of ranks - MC_ARB_RAMCFG.NOOFRANK - UINT_32 m_logicalBanks; ///< Logical banks = m_banks * m_ranks if m_banks != 16 - UINT_32 m_bankInterleave; ///< Bank interleave, as a multiple of pipe interleave size -}; - -} // V1 -} // Addr - -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/r800/siaddrlib.cpp mesa-19.0.1/src/amd/addrlib/r800/siaddrlib.cpp --- mesa-18.3.3/src/amd/addrlib/r800/siaddrlib.cpp 2018-04-19 04:33:31.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/r800/siaddrlib.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,3872 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file siaddrlib.cpp -* @brief Contains the implementation for the SiLib class. -**************************************************************************************************** -*/ - -#include "siaddrlib.h" -#include "si_gb_reg.h" - -#include "amdgpu_asic_addr.h" - -//////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////// -namespace Addr -{ - -/** -**************************************************************************************************** -* SiHwlInit -* -* @brief -* Creates an SiLib object. -* -* @return -* Returns an SiLib object pointer. -**************************************************************************************************** -*/ -Lib* SiHwlInit(const Client* pClient) -{ - return V1::SiLib::CreateObj(pClient); -} - -namespace V1 -{ - -// We don't support MSAA for equation -const BOOL_32 SiLib::m_EquationSupport[SiLib::TileTableSize][SiLib::MaxNumElementBytes] = -{ - {TRUE, TRUE, TRUE, FALSE, FALSE}, // 0, non-AA compressed depth or any stencil - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 1, 2xAA/4xAA compressed depth with or without stencil - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 2, 8xAA compressed depth with or without stencil - {FALSE, TRUE, FALSE, FALSE, FALSE}, // 3, 16 bpp depth PRT (non-MSAA), don't support uncompressed depth - {TRUE, TRUE, TRUE, FALSE, FALSE}, // 4, 1D depth - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 5, 16 bpp depth PRT (4xMSAA) - {FALSE, FALSE, TRUE, FALSE, FALSE}, // 6, 32 bpp depth PRT (non-MSAA) - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 7, 32 bpp depth PRT (4xMSAA) - {TRUE, TRUE, TRUE, TRUE, TRUE }, // 8, Linear - {TRUE, TRUE, TRUE, TRUE, TRUE }, // 9, 1D display - {TRUE, FALSE, FALSE, FALSE, FALSE}, // 10, 8 bpp color (displayable) - {FALSE, TRUE, FALSE, FALSE, FALSE}, // 11, 16 bpp color (displayable) - {FALSE, FALSE, TRUE, TRUE, FALSE}, // 12, 32/64 bpp color (displayable) - {TRUE, TRUE, TRUE, TRUE, TRUE }, // 13, 1D thin - {TRUE, FALSE, FALSE, FALSE, FALSE}, // 14, 8 bpp color non-displayable - {FALSE, TRUE, FALSE, FALSE, FALSE}, // 15, 16 bpp color non-displayable - {FALSE, FALSE, TRUE, FALSE, FALSE}, // 16, 32 bpp color non-displayable - {FALSE, FALSE, FALSE, TRUE, TRUE }, // 17, 64/128 bpp color non-displayable - {TRUE, TRUE, TRUE, TRUE, TRUE }, // 18, 1D THICK - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 19, 2D XTHICK - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 20, 2D THICK - {TRUE, FALSE, FALSE, FALSE, FALSE}, // 21, 8 bpp 2D PRTs (non-MSAA) - {FALSE, TRUE, FALSE, FALSE, FALSE}, // 22, 16 bpp 2D PRTs (non-MSAA) - {FALSE, FALSE, TRUE, FALSE, FALSE}, // 23, 32 bpp 2D PRTs (non-MSAA) - {FALSE, FALSE, FALSE, TRUE, FALSE}, // 24, 64 bpp 2D PRTs (non-MSAA) - {FALSE, FALSE, FALSE, FALSE, TRUE }, // 25, 128bpp 2D PRTs (non-MSAA) - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 26, none - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 27, none - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 28, none - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 29, none - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 30, 64bpp 2D PRTs (4xMSAA) - {FALSE, FALSE, FALSE, FALSE, FALSE}, // 31, none -}; - -/** -**************************************************************************************************** -* SiLib::SiLib -* -* @brief -* Constructor -* -**************************************************************************************************** -*/ -SiLib::SiLib(const Client* pClient) - : - EgBasedLib(pClient), - m_noOfEntries(0), - m_numEquations(0) -{ - m_class = SI_ADDRLIB; - memset(&m_settings, 0, sizeof(m_settings)); -} - -/** -**************************************************************************************************** -* SiLib::~SiLib -* -* @brief -* Destructor -**************************************************************************************************** -*/ -SiLib::~SiLib() -{ -} - -/** -**************************************************************************************************** -* SiLib::HwlGetPipes -* -* @brief -* Get number pipes -* @return -* num pipes -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlGetPipes( - const ADDR_TILEINFO* pTileInfo ///< [in] Tile info - ) const -{ - UINT_32 numPipes; - - if (pTileInfo) - { - numPipes = GetPipePerSurf(pTileInfo->pipeConfig); - } - else - { - ADDR_ASSERT_ALWAYS(); - numPipes = m_pipes; // Suppose we should still have a global pipes - } - - return numPipes; -} - -/** -**************************************************************************************************** -* SiLib::GetPipePerSurf -* @brief -* get pipe num base on inputing tileinfo->pipeconfig -* @return -* pipe number -**************************************************************************************************** -*/ -UINT_32 SiLib::GetPipePerSurf( - AddrPipeCfg pipeConfig ///< [in] pipe config - ) const -{ - UINT_32 numPipes = 0; - - switch (pipeConfig) - { - case ADDR_PIPECFG_P2: - numPipes = 2; - break; - case ADDR_PIPECFG_P4_8x16: - case ADDR_PIPECFG_P4_16x16: - case ADDR_PIPECFG_P4_16x32: - case ADDR_PIPECFG_P4_32x32: - numPipes = 4; - break; - case ADDR_PIPECFG_P8_16x16_8x16: - case ADDR_PIPECFG_P8_16x32_8x16: - case ADDR_PIPECFG_P8_32x32_8x16: - case ADDR_PIPECFG_P8_16x32_16x16: - case ADDR_PIPECFG_P8_32x32_16x16: - case ADDR_PIPECFG_P8_32x32_16x32: - case ADDR_PIPECFG_P8_32x64_32x32: - numPipes = 8; - break; - case ADDR_PIPECFG_P16_32x32_8x16: - case ADDR_PIPECFG_P16_32x32_16x16: - numPipes = 16; - break; - default: - ADDR_ASSERT(!"Invalid pipe config"); - numPipes = m_pipes; - } - return numPipes; -} - -/** -**************************************************************************************************** -* SiLib::ComputeBankEquation -* -* @brief -* Compute bank equation -* -* @return -* If equation can be computed -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE SiLib::ComputeBankEquation( - UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel - UINT_32 threshX, ///< [in] threshold for x channel - UINT_32 threshY, ///< [in] threshold for y channel - ADDR_TILEINFO* pTileInfo, ///< [in] tile info - ADDR_EQUATION* pEquation ///< [out] bank equation - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - UINT_32 pipes = HwlGetPipes(pTileInfo); - UINT_32 bankXStart = 3 + Log2(pipes) + Log2(pTileInfo->bankWidth); - UINT_32 bankYStart = 3 + Log2(pTileInfo->bankHeight); - - ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, log2BytesPP + bankXStart); - ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, log2BytesPP + bankXStart + 1); - ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, log2BytesPP + bankXStart + 2); - ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, log2BytesPP + bankXStart + 3); - ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, bankYStart); - ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, bankYStart + 1); - ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, bankYStart + 2); - ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, bankYStart + 3); - - x3.value = (threshX > bankXStart) ? x3.value : 0; - x4.value = (threshX > bankXStart + 1) ? x4.value : 0; - x5.value = (threshX > bankXStart + 2) ? x5.value : 0; - x6.value = (threshX > bankXStart + 3) ? x6.value : 0; - y3.value = (threshY > bankYStart) ? y3.value : 0; - y4.value = (threshY > bankYStart + 1) ? y4.value : 0; - y5.value = (threshY > bankYStart + 2) ? y5.value : 0; - y6.value = (threshY > bankYStart + 3) ? y6.value : 0; - - switch (pTileInfo->banks) - { - case 16: - if (pTileInfo->macroAspectRatio == 1) - { - pEquation->addr[0] = y6; - pEquation->xor1[0] = x3; - pEquation->addr[1] = y5; - pEquation->xor1[1] = y6; - pEquation->xor2[1] = x4; - pEquation->addr[2] = y4; - pEquation->xor1[2] = x5; - pEquation->addr[3] = y3; - pEquation->xor1[3] = x6; - } - else if (pTileInfo->macroAspectRatio == 2) - { - pEquation->addr[0] = x3; - pEquation->xor1[0] = y6; - pEquation->addr[1] = y5; - pEquation->xor1[1] = y6; - pEquation->xor2[1] = x4; - pEquation->addr[2] = y4; - pEquation->xor1[2] = x5; - pEquation->addr[3] = y3; - pEquation->xor1[3] = x6; - } - else if (pTileInfo->macroAspectRatio == 4) - { - pEquation->addr[0] = x3; - pEquation->xor1[0] = y6; - pEquation->addr[1] = x4; - pEquation->xor1[1] = y5; - pEquation->xor2[1] = y6; - pEquation->addr[2] = y4; - pEquation->xor1[2] = x5; - pEquation->addr[3] = y3; - pEquation->xor1[3] = x6; - } - else if (pTileInfo->macroAspectRatio == 8) - { - pEquation->addr[0] = x3; - pEquation->xor1[0] = y6; - pEquation->addr[1] = x4; - pEquation->xor1[1] = y5; - pEquation->xor2[1] = y6; - pEquation->addr[2] = x5; - pEquation->xor1[2] = y4; - pEquation->addr[3] = y3; - pEquation->xor1[3] = x6; - } - else - { - ADDR_ASSERT_ALWAYS(); - } - pEquation->numBits = 4; - break; - case 8: - if (pTileInfo->macroAspectRatio == 1) - { - pEquation->addr[0] = y5; - pEquation->xor1[0] = x3; - pEquation->addr[1] = y4; - pEquation->xor1[1] = y5; - pEquation->xor2[1] = x4; - pEquation->addr[2] = y3; - pEquation->xor1[2] = x5; - } - else if (pTileInfo->macroAspectRatio == 2) - { - pEquation->addr[0] = x3; - pEquation->xor1[0] = y5; - pEquation->addr[1] = y4; - pEquation->xor1[1] = y5; - pEquation->xor2[1] = x4; - pEquation->addr[2] = y3; - pEquation->xor1[2] = x5; - } - else if (pTileInfo->macroAspectRatio == 4) - { - pEquation->addr[0] = x3; - pEquation->xor1[0] = y5; - pEquation->addr[1] = x4; - pEquation->xor1[1] = y4; - pEquation->xor2[1] = y5; - pEquation->addr[2] = y3; - pEquation->xor1[2] = x5; - } - else - { - ADDR_ASSERT_ALWAYS(); - } - pEquation->numBits = 3; - break; - case 4: - if (pTileInfo->macroAspectRatio == 1) - { - pEquation->addr[0] = y4; - pEquation->xor1[0] = x3; - pEquation->addr[1] = y3; - pEquation->xor1[1] = x4; - } - else if (pTileInfo->macroAspectRatio == 2) - { - pEquation->addr[0] = x3; - pEquation->xor1[0] = y4; - pEquation->addr[1] = y3; - pEquation->xor1[1] = x4; - } - else - { - pEquation->addr[0] = x3; - pEquation->xor1[0] = y4; - pEquation->addr[1] = x4; - pEquation->xor1[1] = y3; - } - pEquation->numBits = 2; - break; - case 2: - if (pTileInfo->macroAspectRatio == 1) - { - pEquation->addr[0] = y3; - pEquation->xor1[0] = x3; - } - else - { - pEquation->addr[0] = x3; - pEquation->xor1[0] = y3; - } - pEquation->numBits = 1; - break; - default: - pEquation->numBits = 0; - retCode = ADDR_NOTSUPPORTED; - ADDR_ASSERT_ALWAYS(); - break; - } - - for (UINT_32 i = 0; i < pEquation->numBits; i++) - { - if (pEquation->addr[i].value == 0) - { - if (pEquation->xor1[i].value == 0) - { - // 00X -> X00 - pEquation->addr[i].value = pEquation->xor2[i].value; - pEquation->xor2[i].value = 0; - } - else - { - pEquation->addr[i].value = pEquation->xor1[i].value; - - if (pEquation->xor2[i].value != 0) - { - // 0XY -> XY0 - pEquation->xor1[i].value = pEquation->xor2[i].value; - pEquation->xor2[i].value = 0; - } - else - { - // 0X0 -> X00 - pEquation->xor1[i].value = 0; - } - } - } - else if (pEquation->xor1[i].value == 0) - { - if (pEquation->xor2[i].value != 0) - { - // X0Y -> XY0 - pEquation->xor1[i].value = pEquation->xor2[i].value; - pEquation->xor2[i].value = 0; - } - } - } - - if ((pTileInfo->bankWidth == 1) && - ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) || - (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32))) - { - retCode = ADDR_NOTSUPPORTED; - } - - return retCode; -} - -/** -**************************************************************************************************** -* SiLib::ComputePipeEquation -* -* @brief -* Compute pipe equation -* -* @return -* If equation can be computed -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE SiLib::ComputePipeEquation( - UINT_32 log2BytesPP, ///< [in] Log2 of bytes per pixel - UINT_32 threshX, ///< [in] Threshold for X channel - UINT_32 threshY, ///< [in] Threshold for Y channel - ADDR_TILEINFO* pTileInfo, ///< [in] Tile info - ADDR_EQUATION* pEquation ///< [out] Pipe configure - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - ADDR_CHANNEL_SETTING* pAddr = pEquation->addr; - ADDR_CHANNEL_SETTING* pXor1 = pEquation->xor1; - ADDR_CHANNEL_SETTING* pXor2 = pEquation->xor2; - - ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, 3 + log2BytesPP); - ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, 4 + log2BytesPP); - ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, 5 + log2BytesPP); - ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, 6 + log2BytesPP); - ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, 3); - ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, 4); - ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, 5); - ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, 6); - - x3.value = (threshX > 3) ? x3.value : 0; - x4.value = (threshX > 4) ? x4.value : 0; - x5.value = (threshX > 5) ? x5.value : 0; - x6.value = (threshX > 6) ? x6.value : 0; - y3.value = (threshY > 3) ? y3.value : 0; - y4.value = (threshY > 4) ? y4.value : 0; - y5.value = (threshY > 5) ? y5.value : 0; - y6.value = (threshY > 6) ? y6.value : 0; - - switch (pTileInfo->pipeConfig) - { - case ADDR_PIPECFG_P2: - pAddr[0] = x3; - pXor1[0] = y3; - pEquation->numBits = 1; - break; - case ADDR_PIPECFG_P4_8x16: - pAddr[0] = x4; - pXor1[0] = y3; - pAddr[1] = x3; - pXor1[1] = y4; - pEquation->numBits = 2; - break; - case ADDR_PIPECFG_P4_16x16: - pAddr[0] = x3; - pXor1[0] = y3; - pXor2[0] = x4; - pAddr[1] = x4; - pXor1[1] = y4; - pEquation->numBits = 2; - break; - case ADDR_PIPECFG_P4_16x32: - pAddr[0] = x3; - pXor1[0] = y3; - pXor2[0] = x4; - pAddr[1] = x4; - pXor1[1] = y5; - pEquation->numBits = 2; - break; - case ADDR_PIPECFG_P4_32x32: - pAddr[0] = x3; - pXor1[0] = y3; - pXor2[0] = x5; - pAddr[1] = x5; - pXor1[1] = y5; - pEquation->numBits = 2; - break; - case ADDR_PIPECFG_P8_16x16_8x16: - pAddr[0] = x4; - pXor1[0] = y3; - pXor2[0] = x5; - pAddr[1] = x3; - pXor1[1] = y5; - pEquation->numBits = 3; - break; - case ADDR_PIPECFG_P8_16x32_8x16: - pAddr[0] = x4; - pXor1[0] = y3; - pXor2[0] = x5; - pAddr[1] = x3; - pXor1[1] = y4; - pAddr[2] = x4; - pXor1[2] = y5; - pEquation->numBits = 3; - break; - case ADDR_PIPECFG_P8_16x32_16x16: - pAddr[0] = x3; - pXor1[0] = y3; - pXor2[0] = x4; - pAddr[1] = x5; - pXor1[1] = y4; - pAddr[2] = x4; - pXor1[2] = y5; - pEquation->numBits = 3; - break; - case ADDR_PIPECFG_P8_32x32_8x16: - pAddr[0] = x4; - pXor1[0] = y3; - pXor2[0] = x5; - pAddr[1] = x3; - pXor1[1] = y4; - pAddr[2] = x5; - pXor1[2] = y5; - pEquation->numBits = 3; - break; - case ADDR_PIPECFG_P8_32x32_16x16: - pAddr[0] = x3; - pXor1[0] = y3; - pXor2[0] = x4; - pAddr[1] = x4; - pXor1[1] = y4; - pAddr[2] = x5; - pXor1[2] = y5; - pEquation->numBits = 3; - break; - case ADDR_PIPECFG_P8_32x32_16x32: - pAddr[0] = x3; - pXor1[0] = y3; - pXor2[0] = x4; - pAddr[1] = x4; - pXor1[1] = y6; - pAddr[2] = x5; - pXor1[2] = y5; - pEquation->numBits = 3; - break; - case ADDR_PIPECFG_P8_32x64_32x32: - pAddr[0] = x3; - pXor1[0] = y3; - pXor2[0] = x5; - pAddr[1] = x6; - pXor1[1] = y5; - pAddr[2] = x5; - pXor1[2] = y6; - pEquation->numBits = 3; - break; - case ADDR_PIPECFG_P16_32x32_8x16: - pAddr[0] = x4; - pXor1[0] = y3; - pAddr[1] = x3; - pXor1[1] = y4; - pAddr[2] = x5; - pXor1[2] = y6; - pAddr[3] = x6; - pXor1[3] = y5; - pEquation->numBits = 4; - break; - case ADDR_PIPECFG_P16_32x32_16x16: - pAddr[0] = x3; - pXor1[0] = y3; - pXor2[0] = x4; - pAddr[1] = x4; - pXor1[1] = y4; - pAddr[2] = x5; - pXor1[2] = y6; - pAddr[3] = x6; - pXor1[3] = y5; - pEquation->numBits = 4; - break; - default: - ADDR_UNHANDLED_CASE(); - pEquation->numBits = 0; - retCode = ADDR_NOTSUPPORTED; - break; - } - - if (m_settings.isVegaM && (pEquation->numBits == 4)) - { - ADDR_CHANNEL_SETTING addeMsb = pAddr[0]; - ADDR_CHANNEL_SETTING xor1Msb = pXor1[0]; - ADDR_CHANNEL_SETTING xor2Msb = pXor2[0]; - - pAddr[0] = pAddr[1]; - pXor1[0] = pXor1[1]; - pXor2[0] = pXor2[1]; - - pAddr[1] = pAddr[2]; - pXor1[1] = pXor1[2]; - pXor2[1] = pXor2[2]; - - pAddr[2] = pAddr[3]; - pXor1[2] = pXor1[3]; - pXor2[2] = pXor2[3]; - - pAddr[3] = addeMsb; - pXor1[3] = xor1Msb; - pXor2[3] = xor2Msb; - } - - for (UINT_32 i = 0; i < pEquation->numBits; i++) - { - if (pAddr[i].value == 0) - { - if (pXor1[i].value == 0) - { - pAddr[i].value = pXor2[i].value; - } - else - { - pAddr[i].value = pXor1[i].value; - pXor1[i].value = 0; - } - } - } - - return retCode; -} - -/** -**************************************************************************************************** -* SiLib::ComputePipeFromCoord -* -* @brief -* Compute pipe number from coordinates -* @return -* Pipe number -**************************************************************************************************** -*/ -UINT_32 SiLib::ComputePipeFromCoord( - UINT_32 x, ///< [in] x coordinate - UINT_32 y, ///< [in] y coordinate - UINT_32 slice, ///< [in] slice index - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 pipeSwizzle, ///< [in] pipe swizzle - BOOL_32 ignoreSE, ///< [in] TRUE if shader engines are ignored - ADDR_TILEINFO* pTileInfo ///< [in] Tile info - ) const -{ - UINT_32 pipe; - UINT_32 pipeBit0 = 0; - UINT_32 pipeBit1 = 0; - UINT_32 pipeBit2 = 0; - UINT_32 pipeBit3 = 0; - UINT_32 sliceRotation; - UINT_32 numPipes = 0; - - UINT_32 tx = x / MicroTileWidth; - UINT_32 ty = y / MicroTileHeight; - UINT_32 x3 = _BIT(tx,0); - UINT_32 x4 = _BIT(tx,1); - UINT_32 x5 = _BIT(tx,2); - UINT_32 x6 = _BIT(tx,3); - UINT_32 y3 = _BIT(ty,0); - UINT_32 y4 = _BIT(ty,1); - UINT_32 y5 = _BIT(ty,2); - UINT_32 y6 = _BIT(ty,3); - - switch (pTileInfo->pipeConfig) - { - case ADDR_PIPECFG_P2: - pipeBit0 = x3 ^ y3; - numPipes = 2; - break; - case ADDR_PIPECFG_P4_8x16: - pipeBit0 = x4 ^ y3; - pipeBit1 = x3 ^ y4; - numPipes = 4; - break; - case ADDR_PIPECFG_P4_16x16: - pipeBit0 = x3 ^ y3 ^ x4; - pipeBit1 = x4 ^ y4; - numPipes = 4; - break; - case ADDR_PIPECFG_P4_16x32: - pipeBit0 = x3 ^ y3 ^ x4; - pipeBit1 = x4 ^ y5; - numPipes = 4; - break; - case ADDR_PIPECFG_P4_32x32: - pipeBit0 = x3 ^ y3 ^ x5; - pipeBit1 = x5 ^ y5; - numPipes = 4; - break; - case ADDR_PIPECFG_P8_16x16_8x16: - pipeBit0 = x4 ^ y3 ^ x5; - pipeBit1 = x3 ^ y5; - numPipes = 8; - break; - case ADDR_PIPECFG_P8_16x32_8x16: - pipeBit0 = x4 ^ y3 ^ x5; - pipeBit1 = x3 ^ y4; - pipeBit2 = x4 ^ y5; - numPipes = 8; - break; - case ADDR_PIPECFG_P8_16x32_16x16: - pipeBit0 = x3 ^ y3 ^ x4; - pipeBit1 = x5 ^ y4; - pipeBit2 = x4 ^ y5; - numPipes = 8; - break; - case ADDR_PIPECFG_P8_32x32_8x16: - pipeBit0 = x4 ^ y3 ^ x5; - pipeBit1 = x3 ^ y4; - pipeBit2 = x5 ^ y5; - numPipes = 8; - break; - case ADDR_PIPECFG_P8_32x32_16x16: - pipeBit0 = x3 ^ y3 ^ x4; - pipeBit1 = x4 ^ y4; - pipeBit2 = x5 ^ y5; - numPipes = 8; - break; - case ADDR_PIPECFG_P8_32x32_16x32: - pipeBit0 = x3 ^ y3 ^ x4; - pipeBit1 = x4 ^ y6; - pipeBit2 = x5 ^ y5; - numPipes = 8; - break; - case ADDR_PIPECFG_P8_32x64_32x32: - pipeBit0 = x3 ^ y3 ^ x5; - pipeBit1 = x6 ^ y5; - pipeBit2 = x5 ^ y6; - numPipes = 8; - break; - case ADDR_PIPECFG_P16_32x32_8x16: - pipeBit0 = x4 ^ y3; - pipeBit1 = x3 ^ y4; - pipeBit2 = x5 ^ y6; - pipeBit3 = x6 ^ y5; - numPipes = 16; - break; - case ADDR_PIPECFG_P16_32x32_16x16: - pipeBit0 = x3 ^ y3 ^ x4; - pipeBit1 = x4 ^ y4; - pipeBit2 = x5 ^ y6; - pipeBit3 = x6 ^ y5; - numPipes = 16; - break; - default: - ADDR_UNHANDLED_CASE(); - break; - } - - if (m_settings.isVegaM && (numPipes == 16)) - { - UINT_32 pipeMsb = pipeBit0; - pipeBit0 = pipeBit1; - pipeBit1 = pipeBit2; - pipeBit2 = pipeBit3; - pipeBit3 = pipeMsb; - } - - pipe = pipeBit0 | (pipeBit1 << 1) | (pipeBit2 << 2) | (pipeBit3 << 3); - - UINT_32 microTileThickness = Thickness(tileMode); - - // - // Apply pipe rotation for the slice. - // - switch (tileMode) - { - case ADDR_TM_3D_TILED_THIN1: //fall through thin - case ADDR_TM_3D_TILED_THICK: //fall through thick - case ADDR_TM_3D_TILED_XTHICK: - sliceRotation = - Max(1, static_cast(numPipes / 2) - 1) * (slice / microTileThickness); - break; - default: - sliceRotation = 0; - break; - } - pipeSwizzle += sliceRotation; - pipeSwizzle &= (numPipes - 1); - - pipe = pipe ^ pipeSwizzle; - - return pipe; -} - -/** -**************************************************************************************************** -* SiLib::ComputeTileCoordFromPipeAndElemIdx -* -* @brief -* Compute (x,y) of a tile within a macro tile from address -* @return -* Pipe number -**************************************************************************************************** -*/ -VOID SiLib::ComputeTileCoordFromPipeAndElemIdx( - UINT_32 elemIdx, ///< [in] per pipe element index within a macro tile - UINT_32 pipe, ///< [in] pipe index - AddrPipeCfg pipeCfg, ///< [in] pipe config - UINT_32 pitchInMacroTile, ///< [in] surface pitch in macro tile - UINT_32 x, ///< [in] x coordinate of the (0,0) tile in a macro tile - UINT_32 y, ///< [in] y coordinate of the (0,0) tile in a macro tile - UINT_32* pX, ///< [out] x coordinate - UINT_32* pY ///< [out] y coordinate - ) const -{ - UINT_32 pipebit0 = _BIT(pipe,0); - UINT_32 pipebit1 = _BIT(pipe,1); - UINT_32 pipebit2 = _BIT(pipe,2); - UINT_32 pipebit3 = _BIT(pipe,3); - UINT_32 elemIdx0 = _BIT(elemIdx,0); - UINT_32 elemIdx1 = _BIT(elemIdx,1); - UINT_32 elemIdx2 = _BIT(elemIdx,2); - UINT_32 x3 = 0; - UINT_32 x4 = 0; - UINT_32 x5 = 0; - UINT_32 x6 = 0; - UINT_32 y3 = 0; - UINT_32 y4 = 0; - UINT_32 y5 = 0; - UINT_32 y6 = 0; - - switch(pipeCfg) - { - case ADDR_PIPECFG_P2: - x4 = elemIdx2; - y4 = elemIdx1 ^ x4; - y3 = elemIdx0 ^ x4; - x3 = pipebit0 ^ y3; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - break; - case ADDR_PIPECFG_P4_8x16: - x4 = elemIdx1; - y4 = elemIdx0 ^ x4; - x3 = pipebit1 ^ y4; - y3 = pipebit0 ^ x4; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - break; - case ADDR_PIPECFG_P4_16x16: - x4 = elemIdx1; - y3 = elemIdx0 ^ x4; - y4 = pipebit1 ^ x4; - x3 = pipebit0 ^ y3 ^ x4; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - break; - case ADDR_PIPECFG_P4_16x32: - x3 = elemIdx0 ^ pipebit0; - y5 = _BIT(y,5); - x4 = pipebit1 ^ y5; - y3 = pipebit0 ^ x3 ^ x4; - y4 = elemIdx1 ^ x4; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - break; - case ADDR_PIPECFG_P4_32x32: - x4 = elemIdx2; - y3 = elemIdx0 ^ x4; - y4 = elemIdx1 ^ x4; - if((pitchInMacroTile % 2) == 0) - { //even - y5 = _BIT(y,5); - x5 = pipebit1 ^ y5; - x3 = pipebit0 ^ y3 ^ x5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(3, x5, x4, x3); - } - else - { //odd - x5 = _BIT(x,5); - x3 = pipebit0 ^ y3 ^ x5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - } - break; - case ADDR_PIPECFG_P8_16x16_8x16: - x4 = elemIdx0; - y5 = _BIT(y,5); - x5 = _BIT(x,5); - x3 = pipebit1 ^ y5; - y4 = pipebit2 ^ x4; - y3 = pipebit0 ^ x5 ^ x4; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - break; - case ADDR_PIPECFG_P8_16x32_8x16: - x3 = elemIdx0; - y4 = pipebit1 ^ x3; - y5 = _BIT(y,5); - x5 = _BIT(x,5); - x4 = pipebit2 ^ y5; - y3 = pipebit0 ^ x4 ^ x5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - break; - case ADDR_PIPECFG_P8_32x32_8x16: - x4 = elemIdx1; - y4 = elemIdx0 ^ x4; - x3 = pipebit1 ^ y4; - if((pitchInMacroTile % 2) == 0) - { //even - y5 = _BIT(y,5); - x5 = _BIT(x,5); - x5 = pipebit2 ^ y5; - y3 = pipebit0 ^ x4 ^ x5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(3, x5, x4, x3); - } - else - { //odd - x5 = _BIT(x,5); - y3 = pipebit0 ^ x4 ^ x5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - } - break; - case ADDR_PIPECFG_P8_16x32_16x16: - x3 = elemIdx0; - x5 = _BIT(x,5); - y5 = _BIT(y,5); - x4 = pipebit2 ^ y5; - y4 = pipebit1 ^ x5; - y3 = pipebit0 ^ x3 ^ x4; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - break; - case ADDR_PIPECFG_P8_32x32_16x16: - x4 = elemIdx1; - y3 = elemIdx0 ^ x4; - x3 = y3^x4^pipebit0; - y4 = pipebit1 ^ x4; - if((pitchInMacroTile % 2) == 0) - { //even - y5 = _BIT(y,5); - x5 = pipebit2 ^ y5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(3, x5, x4, x3); - } - else - { //odd - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - } - break; - case ADDR_PIPECFG_P8_32x32_16x32: - if((pitchInMacroTile % 2) == 0) - { //even - y5 = _BIT(y,5); - y6 = _BIT(y,6); - x4 = pipebit1 ^ y6; - y3 = elemIdx0 ^ x4; - y4 = elemIdx1 ^ x4; - x3 = pipebit0 ^ y3 ^ x4; - x5 = pipebit2 ^ y5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(3, x5, x4, x3); - } - else - { //odd - y6 = _BIT(y,6); - x4 = pipebit1 ^ y6; - y3 = elemIdx0 ^ x4; - y4 = elemIdx1 ^ x4; - x3 = pipebit0 ^ y3 ^ x4; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(2, x4, x3); - } - break; - case ADDR_PIPECFG_P8_32x64_32x32: - x4 = elemIdx2; - y3 = elemIdx0 ^ x4; - y4 = elemIdx1 ^ x4; - if((pitchInMacroTile % 4) == 0) - { //multiple of 4 - y5 = _BIT(y,5); - y6 = _BIT(y,6); - x5 = pipebit2 ^ y6; - x6 = pipebit1 ^ y5; - x3 = pipebit0 ^ y3 ^ x5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(4, x6, x5, x4, x3); - } - else - { - y6 = _BIT(y,6); - x5 = pipebit2 ^ y6; - x3 = pipebit0 ^ y3 ^ x5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(3, x5, x4, x3); - } - break; - case ADDR_PIPECFG_P16_32x32_8x16: - x4 = elemIdx1; - y4 = elemIdx0 ^ x4; - y3 = pipebit0 ^ x4; - x3 = pipebit1 ^ y4; - if((pitchInMacroTile % 4) == 0) - { //multiple of 4 - y5 = _BIT(y,5); - y6 = _BIT(y,6); - x5 = pipebit2 ^ y6; - x6 = pipebit3 ^ y5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(4, x6, x5,x4, x3); - } - else - { - y6 = _BIT(y,6); - x5 = pipebit2 ^ y6; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(3, x5, x4, x3); - } - break; - case ADDR_PIPECFG_P16_32x32_16x16: - x4 = elemIdx1; - y3 = elemIdx0 ^ x4; - y4 = pipebit1 ^ x4; - x3 = pipebit0 ^ y3 ^ x4; - if((pitchInMacroTile % 4) == 0) - { //multiple of 4 - y5 = _BIT(y,5); - y6 = _BIT(y,6); - x5 = pipebit2 ^ y6; - x6 = pipebit3 ^ y5; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(4, x6, x5, x4, x3); - } - else - { - y6 = _BIT(y,6); - x5 = pipebit2 ^ y6; - *pY = Bits2Number(2, y4, y3); - *pX = Bits2Number(3, x5, x4, x3); - } - break; - default: - ADDR_UNHANDLED_CASE(); - } -} - -/** -**************************************************************************************************** -* SiLib::TileCoordToMaskElementIndex -* -* @brief -* Compute element index from coordinates in tiles -* @return -* Element index -**************************************************************************************************** -*/ -UINT_32 SiLib::TileCoordToMaskElementIndex( - UINT_32 tx, ///< [in] x coord, in Tiles - UINT_32 ty, ///< [in] y coord, in Tiles - AddrPipeCfg pipeConfig, ///< [in] pipe config - UINT_32* macroShift, ///< [out] macro shift - UINT_32* elemIdxBits ///< [out] tile offset bits - ) const -{ - UINT_32 elemIdx = 0; - UINT_32 elemIdx0, elemIdx1, elemIdx2; - UINT_32 tx0, tx1; - UINT_32 ty0, ty1; - - tx0 = _BIT(tx,0); - tx1 = _BIT(tx,1); - ty0 = _BIT(ty,0); - ty1 = _BIT(ty,1); - - switch(pipeConfig) - { - case ADDR_PIPECFG_P2: - *macroShift = 3; - *elemIdxBits =3; - elemIdx2 = tx1; - elemIdx1 = tx1 ^ ty1; - elemIdx0 = tx1 ^ ty0; - elemIdx = Bits2Number(3,elemIdx2,elemIdx1,elemIdx0); - break; - case ADDR_PIPECFG_P4_8x16: - *macroShift = 2; - *elemIdxBits =2; - elemIdx1 = tx1; - elemIdx0 = tx1 ^ ty1; - elemIdx = Bits2Number(2,elemIdx1,elemIdx0); - break; - case ADDR_PIPECFG_P4_16x16: - *macroShift = 2; - *elemIdxBits =2; - elemIdx0 = tx1^ty0; - elemIdx1 = tx1; - elemIdx = Bits2Number(2, elemIdx1, elemIdx0); - break; - case ADDR_PIPECFG_P4_16x32: - *macroShift = 2; - *elemIdxBits =2; - elemIdx0 = tx1^ty0; - elemIdx1 = tx1^ty1; - elemIdx = Bits2Number(2, elemIdx1, elemIdx0); - break; - case ADDR_PIPECFG_P4_32x32: - *macroShift = 2; - *elemIdxBits =3; - elemIdx0 = tx1^ty0; - elemIdx1 = tx1^ty1; - elemIdx2 = tx1; - elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0); - break; - case ADDR_PIPECFG_P8_16x16_8x16: - *macroShift = 1; - *elemIdxBits =1; - elemIdx0 = tx1; - elemIdx = elemIdx0; - break; - case ADDR_PIPECFG_P8_16x32_8x16: - *macroShift = 1; - *elemIdxBits =1; - elemIdx0 = tx0; - elemIdx = elemIdx0; - break; - case ADDR_PIPECFG_P8_32x32_8x16: - *macroShift = 1; - *elemIdxBits =2; - elemIdx1 = tx1; - elemIdx0 = tx1^ty1; - elemIdx = Bits2Number(2, elemIdx1, elemIdx0); - break; - case ADDR_PIPECFG_P8_16x32_16x16: - *macroShift = 1; - *elemIdxBits =1; - elemIdx0 = tx0; - elemIdx = elemIdx0; - break; - case ADDR_PIPECFG_P8_32x32_16x16: - *macroShift = 1; - *elemIdxBits =2; - elemIdx0 = tx1^ty0; - elemIdx1 = tx1; - elemIdx = Bits2Number(2, elemIdx1, elemIdx0); - break; - case ADDR_PIPECFG_P8_32x32_16x32: - *macroShift = 1; - *elemIdxBits =2; - elemIdx0 = tx1^ty0; - elemIdx1 = tx1^ty1; - elemIdx = Bits2Number(2, elemIdx1, elemIdx0); - break; - case ADDR_PIPECFG_P8_32x64_32x32: - *macroShift = 1; - *elemIdxBits =3; - elemIdx0 = tx1^ty0; - elemIdx1 = tx1^ty1; - elemIdx2 = tx1; - elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0); - break; - case ADDR_PIPECFG_P16_32x32_8x16: - *macroShift = 0; - *elemIdxBits =2; - elemIdx0 = tx1^ty1; - elemIdx1 = tx1; - elemIdx = Bits2Number(2, elemIdx1, elemIdx0); - break; - case ADDR_PIPECFG_P16_32x32_16x16: - *macroShift = 0; - *elemIdxBits =2; - elemIdx0 = tx1^ty0; - elemIdx1 = tx1; - elemIdx = Bits2Number(2, elemIdx1, elemIdx0); - break; - default: - ADDR_UNHANDLED_CASE(); - break; - } - - return elemIdx; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeTileDataWidthAndHeightLinear -* -* @brief -* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout -* -* @return -* N/A -* -* @note -* MacroWidth and macroHeight are measured in pixels -**************************************************************************************************** -*/ -VOID SiLib::HwlComputeTileDataWidthAndHeightLinear( - UINT_32* pMacroWidth, ///< [out] macro tile width - UINT_32* pMacroHeight, ///< [out] macro tile height - UINT_32 bpp, ///< [in] bits per pixel - ADDR_TILEINFO* pTileInfo ///< [in] tile info - ) const -{ - ADDR_ASSERT(pTileInfo != NULL); - UINT_32 macroWidth; - UINT_32 macroHeight; - - /// In linear mode, the htile or cmask buffer must be padded out to 4 tiles - /// but for P8_32x64_32x32, it must be padded out to 8 tiles - /// Actually there are more pipe configs which need 8-tile padding but SI family - /// has a bug which is fixed in CI family - if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32) || - (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) || - (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x32_16x16)) - { - macroWidth = 8*MicroTileWidth; - macroHeight = 8*MicroTileHeight; - } - else - { - macroWidth = 4*MicroTileWidth; - macroHeight = 4*MicroTileHeight; - } - - *pMacroWidth = macroWidth; - *pMacroHeight = macroHeight; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeHtileBytes -* -* @brief -* Compute htile size in bytes -* -* @return -* Htile size in bytes -**************************************************************************************************** -*/ -UINT_64 SiLib::HwlComputeHtileBytes( - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 bpp, ///< [in] bits per pixel - BOOL_32 isLinear, ///< [in] if it is linear mode - UINT_32 numSlices, ///< [in] number of slices - UINT_64* pSliceBytes, ///< [out] bytes per slice - UINT_32 baseAlign ///< [in] base alignments - ) const -{ - return ComputeHtileBytes(pitch, height, bpp, isLinear, numSlices, pSliceBytes, baseAlign); -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeXmaskAddrFromCoord -* -* @brief -* Compute address from coordinates for htile/cmask -* @return -* Byte address -**************************************************************************************************** -*/ -UINT_64 SiLib::HwlComputeXmaskAddrFromCoord( - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 x, ///< [in] x coord - UINT_32 y, ///< [in] y coord - UINT_32 slice, ///< [in] slice/depth index - UINT_32 numSlices, ///< [in] number of slices - UINT_32 factor, ///< [in] factor that indicates cmask(2) or htile(1) - BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout - BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value - BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value - ADDR_TILEINFO* pTileInfo, ///< [in] Tile info - UINT_32* pBitPosition ///< [out] bit position inside a byte - ) const -{ - UINT_32 tx = x / MicroTileWidth; - UINT_32 ty = y / MicroTileHeight; - UINT_32 newPitch; - UINT_32 newHeight; - UINT_64 totalBytes; - UINT_32 macroWidth; - UINT_32 macroHeight; - UINT_64 pSliceBytes; - UINT_32 pBaseAlign; - UINT_32 tileNumPerPipe; - UINT_32 elemBits; - - if (factor == 2) //CMASK - { - ADDR_CMASK_FLAGS flags = {{0}}; - - tileNumPerPipe = 256; - - ComputeCmaskInfo(flags, - pitch, - height, - numSlices, - isLinear, - pTileInfo, - &newPitch, - &newHeight, - &totalBytes, - ¯oWidth, - ¯oHeight); - elemBits = CmaskElemBits; - } - else //HTile - { - ADDR_HTILE_FLAGS flags = {{0}}; - - tileNumPerPipe = 512; - - ComputeHtileInfo(flags, - pitch, - height, - numSlices, - isLinear, - TRUE, - TRUE, - pTileInfo, - &newPitch, - &newHeight, - &totalBytes, - ¯oWidth, - ¯oHeight, - &pSliceBytes, - &pBaseAlign); - elemBits = 32; - } - - const UINT_32 pitchInTile = newPitch / MicroTileWidth; - const UINT_32 heightInTile = newHeight / MicroTileWidth; - UINT_64 macroOffset; // Per pipe starting offset of the macro tile in which this tile lies. - UINT_64 microNumber; // Per pipe starting offset of the macro tile in which this tile lies. - UINT_32 microX; - UINT_32 microY; - UINT_64 microOffset; - UINT_32 microShift; - UINT_64 totalOffset; - UINT_32 elemIdxBits; - UINT_32 elemIdx = - TileCoordToMaskElementIndex(tx, ty, pTileInfo->pipeConfig, µShift, &elemIdxBits); - - UINT_32 numPipes = HwlGetPipes(pTileInfo); - - if (isLinear) - { //linear addressing - // Linear addressing is extremelly wasting memory if slice > 1, since each pipe has the full - // slice memory foot print instead of divided by numPipes. - microX = tx / 4; // Macro Tile is 4x4 - microY = ty / 4 ; - microNumber = static_cast(microX + microY * (pitchInTile / 4)) << microShift; - - UINT_32 sliceBits = pitchInTile * heightInTile; - - // do htile single slice alignment if the flag is true - if (m_configFlags.useHtileSliceAlign && (factor == 1)) //Htile - { - sliceBits = PowTwoAlign(sliceBits, BITS_TO_BYTES(HtileCacheBits) * numPipes / elemBits); - } - macroOffset = slice * (sliceBits / numPipes) * elemBits ; - } - else - { //tiled addressing - const UINT_32 macroWidthInTile = macroWidth / MicroTileWidth; // Now in unit of Tiles - const UINT_32 macroHeightInTile = macroHeight / MicroTileHeight; - const UINT_32 pitchInCL = pitchInTile / macroWidthInTile; - const UINT_32 heightInCL = heightInTile / macroHeightInTile; - - const UINT_32 macroX = x / macroWidth; - const UINT_32 macroY = y / macroHeight; - const UINT_32 macroNumber = macroX + macroY * pitchInCL + slice * pitchInCL * heightInCL; - - // Per pipe starting offset of the cache line in which this tile lies. - microX = (x % macroWidth) / MicroTileWidth / 4; // Macro Tile is 4x4 - microY = (y % macroHeight) / MicroTileHeight / 4 ; - microNumber = static_cast(microX + microY * (macroWidth / MicroTileWidth / 4)) << microShift; - - macroOffset = macroNumber * tileNumPerPipe * elemBits; - } - - if(elemIdxBits == microShift) - { - microNumber += elemIdx; - } - else - { - microNumber >>= elemIdxBits; - microNumber <<= elemIdxBits; - microNumber += elemIdx; - } - - microOffset = elemBits * microNumber; - totalOffset = microOffset + macroOffset; - - UINT_32 pipe = ComputePipeFromCoord(x, y, 0, ADDR_TM_2D_TILED_THIN1, 0, FALSE, pTileInfo); - UINT_64 addrInBits = totalOffset % (m_pipeInterleaveBytes * 8) + - pipe * (m_pipeInterleaveBytes * 8) + - totalOffset / (m_pipeInterleaveBytes * 8) * (m_pipeInterleaveBytes * 8) * numPipes; - *pBitPosition = static_cast(addrInBits) % 8; - UINT_64 addr = addrInBits / 8; - - return addr; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeXmaskCoordFromAddr -* -* @brief -* Compute the coord from an address of a cmask/htile -* -* @return -* N/A -* -* @note -* This method is reused by htile, so rename to Xmask -**************************************************************************************************** -*/ -VOID SiLib::HwlComputeXmaskCoordFromAddr( - UINT_64 addr, ///< [in] address - UINT_32 bitPosition, ///< [in] bitPosition in a byte - UINT_32 pitch, ///< [in] pitch - UINT_32 height, ///< [in] height - UINT_32 numSlices, ///< [in] number of slices - UINT_32 factor, ///< [in] factor that indicates cmask or htile - BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout - BOOL_32 isWidth8, ///< [in] Not used by SI - BOOL_32 isHeight8, ///< [in] Not used by SI - ADDR_TILEINFO* pTileInfo, ///< [in] Tile info - UINT_32* pX, ///< [out] x coord - UINT_32* pY, ///< [out] y coord - UINT_32* pSlice ///< [out] slice index - ) const -{ - UINT_32 newPitch; - UINT_32 newHeight; - UINT_64 totalBytes; - UINT_32 clWidth; - UINT_32 clHeight; - UINT_32 tileNumPerPipe; - UINT_64 sliceBytes; - - *pX = 0; - *pY = 0; - *pSlice = 0; - - if (factor == 2) //CMASK - { - ADDR_CMASK_FLAGS flags = {{0}}; - - tileNumPerPipe = 256; - - ComputeCmaskInfo(flags, - pitch, - height, - numSlices, - isLinear, - pTileInfo, - &newPitch, - &newHeight, - &totalBytes, - &clWidth, - &clHeight); - } - else //HTile - { - ADDR_HTILE_FLAGS flags = {{0}}; - - tileNumPerPipe = 512; - - ComputeHtileInfo(flags, - pitch, - height, - numSlices, - isLinear, - TRUE, - TRUE, - pTileInfo, - &newPitch, - &newHeight, - &totalBytes, - &clWidth, - &clHeight, - &sliceBytes); - } - - const UINT_32 pitchInTile = newPitch / MicroTileWidth; - const UINT_32 heightInTile = newHeight / MicroTileWidth; - const UINT_32 pitchInMacroTile = pitchInTile / 4; - UINT_32 macroShift; - UINT_32 elemIdxBits; - // get macroShift and elemIdxBits - TileCoordToMaskElementIndex(0, 0, pTileInfo->pipeConfig, ¯oShift, &elemIdxBits); - - const UINT_32 numPipes = HwlGetPipes(pTileInfo); - const UINT_32 pipe = (UINT_32)((addr / m_pipeInterleaveBytes) % numPipes); - // per pipe - UINT_64 localOffset = (addr % m_pipeInterleaveBytes) + - (addr / m_pipeInterleaveBytes / numPipes)* m_pipeInterleaveBytes; - - UINT_32 tileIndex; - if (factor == 2) //CMASK - { - tileIndex = (UINT_32)(localOffset * 2 + (bitPosition != 0)); - } - else - { - tileIndex = (UINT_32)(localOffset / 4); - } - - UINT_32 macroOffset; - if (isLinear) - { - UINT_32 sliceSizeInTile = pitchInTile * heightInTile; - - // do htile single slice alignment if the flag is true - if (m_configFlags.useHtileSliceAlign && (factor == 1)) //Htile - { - sliceSizeInTile = PowTwoAlign(sliceSizeInTile, static_cast(sliceBytes) / 64); - } - *pSlice = tileIndex / (sliceSizeInTile / numPipes); - macroOffset = tileIndex % (sliceSizeInTile / numPipes); - } - else - { - const UINT_32 clWidthInTile = clWidth / MicroTileWidth; // Now in unit of Tiles - const UINT_32 clHeightInTile = clHeight / MicroTileHeight; - const UINT_32 pitchInCL = pitchInTile / clWidthInTile; - const UINT_32 heightInCL = heightInTile / clHeightInTile; - const UINT_32 clIndex = tileIndex / tileNumPerPipe; - - UINT_32 clX = clIndex % pitchInCL; - UINT_32 clY = (clIndex % (heightInCL * pitchInCL)) / pitchInCL; - - *pX = clX * clWidthInTile * MicroTileWidth; - *pY = clY * clHeightInTile * MicroTileHeight; - *pSlice = clIndex / (heightInCL * pitchInCL); - - macroOffset = tileIndex % tileNumPerPipe; - } - - UINT_32 elemIdx = macroOffset & 7; - macroOffset >>= elemIdxBits; - - if (elemIdxBits != macroShift) - { - macroOffset <<= (elemIdxBits - macroShift); - - UINT_32 pipebit1 = _BIT(pipe,1); - UINT_32 pipebit2 = _BIT(pipe,2); - UINT_32 pipebit3 = _BIT(pipe,3); - if (pitchInMacroTile % 2) - { //odd - switch (pTileInfo->pipeConfig) - { - case ADDR_PIPECFG_P4_32x32: - macroOffset |= pipebit1; - break; - case ADDR_PIPECFG_P8_32x32_8x16: - case ADDR_PIPECFG_P8_32x32_16x16: - case ADDR_PIPECFG_P8_32x32_16x32: - macroOffset |= pipebit2; - break; - default: - break; - } - - } - - if (pitchInMacroTile % 4) - { - if (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32) - { - macroOffset |= (pipebit1<<1); - } - if((pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) || - (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_16x16)) - { - macroOffset |= (pipebit3<<1); - } - } - } - - UINT_32 macroX; - UINT_32 macroY; - - if (isLinear) - { - macroX = macroOffset % pitchInMacroTile; - macroY = macroOffset / pitchInMacroTile; - } - else - { - const UINT_32 clWidthInMacroTile = clWidth / (MicroTileWidth * 4); - macroX = macroOffset % clWidthInMacroTile; - macroY = macroOffset / clWidthInMacroTile; - } - - *pX += macroX * 4 * MicroTileWidth; - *pY += macroY * 4 * MicroTileHeight; - - UINT_32 microX; - UINT_32 microY; - ComputeTileCoordFromPipeAndElemIdx(elemIdx, pipe, pTileInfo->pipeConfig, pitchInMacroTile, - *pX, *pY, µX, µY); - - *pX += microX * MicroTileWidth; - *pY += microY * MicroTileWidth; -} - -/** -**************************************************************************************************** -* SiLib::HwlGetPitchAlignmentLinear -* @brief -* Get pitch alignment -* @return -* pitch alignment -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlGetPitchAlignmentLinear( - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags ///< [in] surface flags - ) const -{ - UINT_32 pitchAlign; - - // Interleaved access requires a 256B aligned pitch, so fall back to pre-SI alignment - if (flags.interleaved) - { - pitchAlign = Max(64u, m_pipeInterleaveBytes / BITS_TO_BYTES(bpp)); - - } - else - { - pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp)); - } - - return pitchAlign; -} - -/** -**************************************************************************************************** -* SiLib::HwlGetSizeAdjustmentLinear -* -* @brief -* Adjust linear surface pitch and slice size -* -* @return -* Logical slice size in bytes -**************************************************************************************************** -*/ -UINT_64 SiLib::HwlGetSizeAdjustmentLinear( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - UINT_32 numSamples, ///< [in] number of samples - UINT_32 baseAlign, ///< [in] base alignment - UINT_32 pitchAlign, ///< [in] pitch alignment - UINT_32* pPitch, ///< [in,out] pointer to pitch - UINT_32* pHeight, ///< [in,out] pointer to height - UINT_32* pHeightAlign ///< [in,out] pointer to height align - ) const -{ - UINT_64 sliceSize; - if (tileMode == ADDR_TM_LINEAR_GENERAL) - { - sliceSize = BITS_TO_BYTES(static_cast(*pPitch) * (*pHeight) * bpp * numSamples); - } - else - { - UINT_32 pitch = *pPitch; - UINT_32 height = *pHeight; - - UINT_32 pixelsPerPipeInterleave = m_pipeInterleaveBytes / BITS_TO_BYTES(bpp); - UINT_32 sliceAlignInPixel = pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave; - - // numSamples should be 1 in real cases (no MSAA for linear but TGL may pass non 1 value) - UINT_64 pixelPerSlice = static_cast(pitch) * height * numSamples; - - while (pixelPerSlice % sliceAlignInPixel) - { - pitch += pitchAlign; - pixelPerSlice = static_cast(pitch) * height * numSamples; - } - - *pPitch = pitch; - - UINT_32 heightAlign = 1; - - while ((pitch * heightAlign) % sliceAlignInPixel) - { - heightAlign++; - } - - *pHeightAlign = heightAlign; - - sliceSize = BITS_TO_BYTES(pixelPerSlice * bpp); - } - - return sliceSize; -} - -/** -**************************************************************************************************** -* SiLib::HwlPreHandleBaseLvl3xPitch -* -* @brief -* Pre-handler of 3x pitch (96 bit) adjustment -* -* @return -* Expected pitch -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlPreHandleBaseLvl3xPitch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input - UINT_32 expPitch ///< [in] pitch - ) const -{ - ADDR_ASSERT(pIn->width == expPitch); - - // From SI, if pow2Pad is 1 the pitch is expanded 3x first, then padded to pow2, so nothing to - // do here - if (pIn->flags.pow2Pad == FALSE) - { - Addr::V1::Lib::HwlPreHandleBaseLvl3xPitch(pIn, expPitch); - } - else - { - ADDR_ASSERT(IsPow2(expPitch)); - } - - return expPitch; -} - -/** -**************************************************************************************************** -* SiLib::HwlPostHandleBaseLvl3xPitch -* -* @brief -* Post-handler of 3x pitch adjustment -* -* @return -* Expected pitch -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlPostHandleBaseLvl3xPitch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input - UINT_32 expPitch ///< [in] pitch - ) const -{ - /** - * @note The pitch will be divided by 3 in the end so the value will look odd but h/w should - * be able to compute a correct pitch from it as h/w address library is doing the job. - */ - // From SI, the pitch is expanded 3x first, then padded to pow2, so no special handler here - if (pIn->flags.pow2Pad == FALSE) - { - Addr::V1::Lib::HwlPostHandleBaseLvl3xPitch(pIn, expPitch); - } - - return expPitch; -} - -/** -**************************************************************************************************** -* SiLib::HwlGetPitchAlignmentMicroTiled -* -* @brief -* Compute 1D tiled surface pitch alignment -* -* @return -* pitch alignment -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlGetPitchAlignmentMicroTiled( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 numSamples ///< [in] number of samples - ) const -{ - UINT_32 pitchAlign; - - if (flags.qbStereo) - { - pitchAlign = EgBasedLib::HwlGetPitchAlignmentMicroTiled(tileMode,bpp,flags,numSamples); - } - else - { - pitchAlign = 8; - } - - return pitchAlign; -} - -/** -**************************************************************************************************** -* SiLib::HwlGetSizeAdjustmentMicroTiled -* -* @brief -* Adjust 1D tiled surface pitch and slice size -* -* @return -* Logical slice size in bytes -**************************************************************************************************** -*/ -UINT_64 SiLib::HwlGetSizeAdjustmentMicroTiled( - UINT_32 thickness, ///< [in] thickness - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 numSamples, ///< [in] number of samples - UINT_32 baseAlign, ///< [in] base alignment - UINT_32 pitchAlign, ///< [in] pitch alignment - UINT_32* pPitch, ///< [in,out] pointer to pitch - UINT_32* pHeight ///< [in,out] pointer to height - ) const -{ - UINT_64 logicalSliceSize; - UINT_64 physicalSliceSize; - - UINT_32 pitch = *pPitch; - UINT_32 height = *pHeight; - - // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1) - logicalSliceSize = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples); - - // Physical slice: multiplied by thickness - physicalSliceSize = logicalSliceSize * thickness; - - // Pitch alignment is always 8, so if slice size is not padded to base alignment - // (pipe_interleave_size), we need to increase pitch - while ((physicalSliceSize % baseAlign) != 0) - { - pitch += pitchAlign; - - logicalSliceSize = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples); - - physicalSliceSize = logicalSliceSize * thickness; - } - -#if !ALT_TEST - // - // Special workaround for depth/stencil buffer, use 8 bpp to align depth buffer again since - // the stencil plane may have larger pitch if the slice size is smaller than base alignment. - // - // Note: this actually does not work for mipmap but mipmap depth texture is not really - // sampled with mipmap. - // - if (flags.depth && (flags.noStencil == FALSE)) - { - ADDR_ASSERT(numSamples == 1); - - UINT_64 logicalSiceSizeStencil = static_cast(pitch) * height; // 1 byte stencil - - while ((logicalSiceSizeStencil % baseAlign) != 0) - { - pitch += pitchAlign; // Stencil plane's pitch alignment is the same as depth plane's - - logicalSiceSizeStencil = static_cast(pitch) * height; - } - - if (pitch != *pPitch) - { - // If this is a mipmap, this padded one cannot be sampled as a whole mipmap! - logicalSliceSize = logicalSiceSizeStencil * BITS_TO_BYTES(bpp); - } - } -#endif - *pPitch = pitch; - - // No adjust for pHeight - - return logicalSliceSize; -} - -/** -**************************************************************************************************** -* SiLib::HwlConvertChipFamily -* -* @brief -* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision -* @return -* ChipFamily -**************************************************************************************************** -*/ -ChipFamily SiLib::HwlConvertChipFamily( - UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h - UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h -{ - ChipFamily family = ADDR_CHIP_FAMILY_SI; - - switch (uChipFamily) - { - case FAMILY_SI: - m_settings.isSouthernIsland = 1; - m_settings.isTahiti = ASICREV_IS_TAHITI_P(uChipRevision); - m_settings.isPitCairn = ASICREV_IS_PITCAIRN_PM(uChipRevision); - m_settings.isCapeVerde = ASICREV_IS_CAPEVERDE_M(uChipRevision); - m_settings.isOland = ASICREV_IS_OLAND_M(uChipRevision); - m_settings.isHainan = ASICREV_IS_HAINAN_V(uChipRevision); - break; - default: - ADDR_ASSERT(!"This should be a Fusion"); - break; - } - - return family; -} - -/** -**************************************************************************************************** -* SiLib::HwlSetupTileInfo -* -* @brief -* Setup default value of tile info for SI -**************************************************************************************************** -*/ -VOID SiLib::HwlSetupTileInfo( - AddrTileMode tileMode, ///< [in] Tile mode - ADDR_SURFACE_FLAGS flags, ///< [in] Surface type flags - UINT_32 bpp, ///< [in] Bits per pixel - UINT_32 pitch, ///< [in] Pitch in pixels - UINT_32 height, ///< [in] Height in pixels - UINT_32 numSamples, ///< [in] Number of samples - ADDR_TILEINFO* pTileInfoIn, ///< [in] Tile info input: NULL for default - ADDR_TILEINFO* pTileInfoOut, ///< [out] Tile info output - AddrTileType inTileType, ///< [in] Tile type - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output - ) const -{ - UINT_32 thickness = Thickness(tileMode); - ADDR_TILEINFO* pTileInfo = pTileInfoOut; - INT index = TileIndexInvalid; - - // Fail-safe code - if (IsLinear(tileMode) == FALSE) - { - // 128 bpp/thick tiling must be non-displayable. - // Fmask reuse color buffer's entry but bank-height field can be from another entry - // To simplify the logic, fmask entry should be picked from non-displayable ones - if (bpp == 128 || thickness > 1 || flags.fmask || flags.prt) - { - inTileType = ADDR_NON_DISPLAYABLE; - } - - if (flags.depth || flags.stencil) - { - inTileType = ADDR_DEPTH_SAMPLE_ORDER; - } - } - - // Partial valid fields are not allowed for SI. - if (IsTileInfoAllZero(pTileInfo)) - { - if (IsMacroTiled(tileMode)) - { - if (flags.prt) - { - if (numSamples == 1) - { - if (flags.depth) - { - switch (bpp) - { - case 16: - index = 3; - break; - case 32: - index = 6; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - } - else - { - switch (bpp) - { - case 8: - index = 21; - break; - case 16: - index = 22; - break; - case 32: - index = 23; - break; - case 64: - index = 24; - break; - case 128: - index = 25; - break; - default: - break; - } - - if (thickness > 1) - { - ADDR_ASSERT(bpp != 128); - index += 5; - } - } - } - else - { - ADDR_ASSERT(numSamples == 4); - - if (flags.depth) - { - switch (bpp) - { - case 16: - index = 5; - break; - case 32: - index = 7; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - } - else - { - switch (bpp) - { - case 8: - index = 23; - break; - case 16: - index = 24; - break; - case 32: - index = 25; - break; - case 64: - index = 30; - break; - default: - ADDR_ASSERT_ALWAYS(); - break; - } - } - } - }//end of PRT part - // See table entries 0-7 - else if (flags.depth || flags.stencil) - { - if (flags.compressZ) - { - if (flags.stencil) - { - index = 0; - } - else - { - // optimal tile index for compressed depth/stencil. - switch (numSamples) - { - case 1: - index = 0; - break; - case 2: - case 4: - index = 1; - break; - case 8: - index = 2; - break; - default: - break; - } - } - } - else // unCompressZ - { - index = 3; - } - } - else //non PRT & non Depth & non Stencil - { - // See table entries 9-12 - if (inTileType == ADDR_DISPLAYABLE) - { - switch (bpp) - { - case 8: - index = 10; - break; - case 16: - index = 11; - break; - case 32: - index = 12; - break; - case 64: - index = 12; - break; - default: - break; - } - } - else - { - // See table entries 13-17 - if (thickness == 1) - { - if (flags.fmask) - { - UINT_32 fmaskPixelSize = bpp * numSamples; - - switch (fmaskPixelSize) - { - case 8: - index = 14; - break; - case 16: - index = 15; - break; - case 32: - index = 16; - break; - case 64: - index = 17; - break; - default: - ADDR_ASSERT_ALWAYS(); - } - } - else - { - switch (bpp) - { - case 8: - index = 14; - break; - case 16: - index = 15; - break; - case 32: - index = 16; - break; - case 64: - index = 17; - break; - case 128: - index = 17; - break; - default: - break; - } - } - } - else // thick tiling - entries 18-20 - { - switch (thickness) - { - case 4: - index = 20; - break; - case 8: - index = 19; - break; - default: - break; - } - } - } - } - } - else - { - if (tileMode == ADDR_TM_LINEAR_ALIGNED) - { - index = 8; - } - else if (tileMode == ADDR_TM_LINEAR_GENERAL) - { - index = TileIndexLinearGeneral; - } - else - { - if (flags.depth || flags.stencil) - { - index = 4; - } - else if (inTileType == ADDR_DISPLAYABLE) - { - index = 9; - } - else if (thickness == 1) - { - index = 13; - } - else - { - index = 18; - } - } - } - - if (index >= 0 && index <= 31) - { - *pTileInfo = m_tileTable[index].info; - pOut->tileType = m_tileTable[index].type; - } - - if (index == TileIndexLinearGeneral) - { - *pTileInfo = m_tileTable[8].info; - pOut->tileType = m_tileTable[8].type; - } - } - else - { - if (pTileInfoIn) - { - if (flags.stencil && pTileInfoIn->tileSplitBytes == 0) - { - // Stencil always uses index 0 - *pTileInfo = m_tileTable[0].info; - } - } - // Pass through tile type - pOut->tileType = inTileType; - } - - pOut->tileIndex = index; - pOut->prtTileIndex = flags.prt; -} - -/** -**************************************************************************************************** -* SiLib::DecodeGbRegs -* -* @brief -* Decodes GB_ADDR_CONFIG and noOfBanks/noOfRanks -* -* @return -* TRUE if all settings are valid -* -**************************************************************************************************** -*/ -BOOL_32 SiLib::DecodeGbRegs( - const ADDR_REGISTER_VALUE* pRegValue) ///< [in] create input -{ - GB_ADDR_CONFIG reg; - BOOL_32 valid = TRUE; - - reg.val = pRegValue->gbAddrConfig; - - switch (reg.f.pipe_interleave_size) - { - case ADDR_CONFIG_PIPE_INTERLEAVE_256B: - m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B; - break; - case ADDR_CONFIG_PIPE_INTERLEAVE_512B: - m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B; - break; - default: - valid = FALSE; - ADDR_UNHANDLED_CASE(); - break; - } - - switch (reg.f.row_size) - { - case ADDR_CONFIG_1KB_ROW: - m_rowSize = ADDR_ROWSIZE_1KB; - break; - case ADDR_CONFIG_2KB_ROW: - m_rowSize = ADDR_ROWSIZE_2KB; - break; - case ADDR_CONFIG_4KB_ROW: - m_rowSize = ADDR_ROWSIZE_4KB; - break; - default: - valid = FALSE; - ADDR_UNHANDLED_CASE(); - break; - } - - switch (pRegValue->noOfBanks) - { - case 0: - m_banks = 4; - break; - case 1: - m_banks = 8; - break; - case 2: - m_banks = 16; - break; - default: - valid = FALSE; - ADDR_UNHANDLED_CASE(); - break; - } - - switch (pRegValue->noOfRanks) - { - case 0: - m_ranks = 1; - break; - case 1: - m_ranks = 2; - break; - default: - valid = FALSE; - ADDR_UNHANDLED_CASE(); - break; - } - - m_logicalBanks = m_banks * m_ranks; - - ADDR_ASSERT(m_logicalBanks <= 16); - - return valid; -} - -/** -**************************************************************************************************** -* SiLib::HwlInitGlobalParams -* -* @brief -* Initializes global parameters -* -* @return -* TRUE if all settings are valid -* -**************************************************************************************************** -*/ -BOOL_32 SiLib::HwlInitGlobalParams( - const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input -{ - BOOL_32 valid = TRUE; - const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue; - - valid = DecodeGbRegs(pRegValue); - - if (valid) - { - if (m_settings.isTahiti || m_settings.isPitCairn) - { - m_pipes = 8; - } - else if (m_settings.isCapeVerde || m_settings.isOland) - { - m_pipes = 4; - } - else - { - // Hainan is 2-pipe (m_settings.isHainan == 1) - m_pipes = 2; - } - - valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries); - - if (valid) - { - InitEquationTable(); - } - - m_maxSamples = 16; - } - - return valid; -} - -/** -**************************************************************************************************** -* SiLib::HwlConvertTileInfoToHW -* @brief -* Entry of si's ConvertTileInfoToHW -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE SiLib::HwlConvertTileInfoToHW( - const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure - ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure - ) const -{ - ADDR_E_RETURNCODE retCode = ADDR_OK; - - retCode = EgBasedLib::HwlConvertTileInfoToHW(pIn, pOut); - - if (retCode == ADDR_OK) - { - if (pIn->reverse == FALSE) - { - if (pIn->pTileInfo->pipeConfig == ADDR_PIPECFG_INVALID) - { - retCode = ADDR_INVALIDPARAMS; - } - else - { - pOut->pTileInfo->pipeConfig = - static_cast(pIn->pTileInfo->pipeConfig - 1); - } - } - else - { - pOut->pTileInfo->pipeConfig = - static_cast(pIn->pTileInfo->pipeConfig + 1); - } - } - - return retCode; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeXmaskCoordYFrom8Pipe -* -* @brief -* Compute the Y coord which will be added to Xmask Y -* coord. -* @return -* Y coord -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlComputeXmaskCoordYFrom8Pipe( - UINT_32 pipe, ///< [in] pipe id - UINT_32 x ///< [in] tile coord x, which is original x coord / 8 - ) const -{ - // This function should never be called since it is 6xx/8xx specfic. - // Keep this empty implementation to avoid any mis-use. - ADDR_ASSERT_ALWAYS(); - - return 0; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeSurfaceCoord2DFromBankPipe -* -* @brief -* Compute surface x,y coordinates from bank/pipe info -* @return -* N/A -**************************************************************************************************** -*/ -VOID SiLib::HwlComputeSurfaceCoord2DFromBankPipe( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32* pX, ///< [in,out] x coordinate - UINT_32* pY, ///< [in,out] y coordinate - UINT_32 slice, ///< [in] slice index - UINT_32 bank, ///< [in] bank number - UINT_32 pipe, ///< [in] pipe number - UINT_32 bankSwizzle,///< [in] bank swizzle - UINT_32 pipeSwizzle,///< [in] pipe swizzle - UINT_32 tileSlices, ///< [in] slices in a micro tile - BOOL_32 ignoreSE, ///< [in] TRUE if shader engines are ignored - ADDR_TILEINFO* pTileInfo ///< [in] bank structure. **All fields to be valid on entry** - ) const -{ - UINT_32 xBit; - UINT_32 yBit; - UINT_32 yBit3 = 0; - UINT_32 yBit4 = 0; - UINT_32 yBit5 = 0; - UINT_32 yBit6 = 0; - - UINT_32 xBit3 = 0; - UINT_32 xBit4 = 0; - UINT_32 xBit5 = 0; - - UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig); - - CoordFromBankPipe xyBits = {0}; - ComputeSurfaceCoord2DFromBankPipe(tileMode, *pX, *pY, slice, bank, pipe, - bankSwizzle, pipeSwizzle, tileSlices, pTileInfo, - &xyBits); - yBit3 = xyBits.yBit3; - yBit4 = xyBits.yBit4; - yBit5 = xyBits.yBit5; - yBit6 = xyBits.yBit6; - - xBit3 = xyBits.xBit3; - xBit4 = xyBits.xBit4; - xBit5 = xyBits.xBit5; - - yBit = xyBits.yBits; - - UINT_32 yBitTemp = 0; - - if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) || - (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)) - { - ADDR_ASSERT(pTileInfo->bankWidth == 1 && pTileInfo->macroAspectRatio > 1); - UINT_32 yBitToCheck = QLog2(pTileInfo->banks) - 1; - - ADDR_ASSERT(yBitToCheck <= 3); - - yBitTemp = _BIT(yBit, yBitToCheck); - - xBit3 = 0; - } - - yBit = Bits2Number(4, yBit6, yBit5, yBit4, yBit3); - xBit = Bits2Number(3, xBit5, xBit4, xBit3); - - *pY += yBit * pTileInfo->bankHeight * MicroTileHeight; - *pX += xBit * numPipes * pTileInfo->bankWidth * MicroTileWidth; - - //calculate the bank and pipe bits in x, y - UINT_32 xTile; //x in micro tile - UINT_32 x3 = 0; - UINT_32 x4 = 0; - UINT_32 x5 = 0; - UINT_32 x6 = 0; - UINT_32 y = *pY; - - UINT_32 pipeBit0 = _BIT(pipe,0); - UINT_32 pipeBit1 = _BIT(pipe,1); - UINT_32 pipeBit2 = _BIT(pipe,2); - - UINT_32 y3 = _BIT(y, 3); - UINT_32 y4 = _BIT(y, 4); - UINT_32 y5 = _BIT(y, 5); - UINT_32 y6 = _BIT(y, 6); - - // bankbit0 after ^x4^x5 - UINT_32 bankBit00 = _BIT(bank,0); - UINT_32 bankBit0 = 0; - - switch (pTileInfo->pipeConfig) - { - case ADDR_PIPECFG_P2: - x3 = pipeBit0 ^ y3; - break; - case ADDR_PIPECFG_P4_8x16: - x4 = pipeBit0 ^ y3; - x3 = pipeBit0 ^ y4; - break; - case ADDR_PIPECFG_P4_16x16: - x4 = pipeBit1 ^ y4; - x3 = pipeBit0 ^ y3 ^ x4; - break; - case ADDR_PIPECFG_P4_16x32: - x4 = pipeBit1 ^ y4; - x3 = pipeBit0 ^ y3 ^ x4; - break; - case ADDR_PIPECFG_P4_32x32: - x5 = pipeBit1 ^ y5; - x3 = pipeBit0 ^ y3 ^ x5; - bankBit0 = yBitTemp ^ x5; - x4 = bankBit00 ^ x5 ^ bankBit0; - *pX += x5 * 4 * 1 * 8; // x5 * num_pipes * bank_width * 8; - break; - case ADDR_PIPECFG_P8_16x16_8x16: - x3 = pipeBit1 ^ y5; - x4 = pipeBit2 ^ y4; - x5 = pipeBit0 ^ y3 ^ x4; - break; - case ADDR_PIPECFG_P8_16x32_8x16: - x3 = pipeBit1 ^ y4; - x4 = pipeBit2 ^ y5; - x5 = pipeBit0 ^ y3 ^ x4; - break; - case ADDR_PIPECFG_P8_32x32_8x16: - x3 = pipeBit1 ^ y4; - x5 = pipeBit2 ^ y5; - x4 = pipeBit0 ^ y3 ^ x5; - break; - case ADDR_PIPECFG_P8_16x32_16x16: - x4 = pipeBit2 ^ y5; - x5 = pipeBit1 ^ y4; - x3 = pipeBit0 ^ y3 ^ x4; - break; - case ADDR_PIPECFG_P8_32x32_16x16: - x5 = pipeBit2 ^ y5; - x4 = pipeBit1 ^ y4; - x3 = pipeBit0 ^ y3 ^ x4; - break; - case ADDR_PIPECFG_P8_32x32_16x32: - x5 = pipeBit2 ^ y5; - x4 = pipeBit1 ^ y6; - x3 = pipeBit0 ^ y3 ^ x4; - break; - case ADDR_PIPECFG_P8_32x64_32x32: - x6 = pipeBit1 ^ y5; - x5 = pipeBit2 ^ y6; - x3 = pipeBit0 ^ y3 ^ x5; - bankBit0 = yBitTemp ^ x6; - x4 = bankBit00 ^ x5 ^ bankBit0; - *pX += x6 * 8 * 1 * 8; // x6 * num_pipes * bank_width * 8; - break; - default: - ADDR_ASSERT_ALWAYS(); - } - - xTile = Bits2Number(3, x5, x4, x3); - - *pX += xTile << 3; -} - -/** -**************************************************************************************************** -* SiLib::HwlPreAdjustBank -* -* @brief -* Adjust bank before calculating address acoording to bank/pipe -* @return -* Adjusted bank -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlPreAdjustBank( - UINT_32 tileX, ///< [in] x coordinate in unit of tile - UINT_32 bank, ///< [in] bank - ADDR_TILEINFO* pTileInfo ///< [in] tile info - ) const -{ - if (((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) || - (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)) && (pTileInfo->bankWidth == 1)) - { - UINT_32 bankBit0 = _BIT(bank, 0); - UINT_32 x4 = _BIT(tileX, 1); - UINT_32 x5 = _BIT(tileX, 2); - - bankBit0 = bankBit0 ^ x4 ^ x5; - bank |= bankBit0; - - ADDR_ASSERT(pTileInfo->macroAspectRatio > 1); - } - - return bank; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeSurfaceInfo -* -* @brief -* Entry of si's ComputeSurfaceInfo -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure - ) const -{ - pOut->tileIndex = pIn->tileIndex; - - ADDR_E_RETURNCODE retCode = EgBasedLib::HwlComputeSurfaceInfo(pIn, pOut); - - UINT_32 tileIndex = static_cast(pOut->tileIndex); - - if (((pIn->flags.needEquation == TRUE) || - (pIn->flags.preferEquation == TRUE)) && - (pIn->numSamples <= 1) && - (tileIndex < TileTableSize)) - { - static const UINT_32 SiUncompressDepthTileIndex = 3; - - if ((pIn->numSlices > 1) && - (IsMacroTiled(pOut->tileMode) == TRUE) && - ((m_chipFamily == ADDR_CHIP_FAMILY_SI) || - (IsPrtTileMode(pOut->tileMode) == FALSE))) - { - pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX; - } - else if ((pIn->flags.prt == FALSE) && - (m_uncompressDepthEqIndex != 0) && - (tileIndex == SiUncompressDepthTileIndex)) - { - pOut->equationIndex = m_uncompressDepthEqIndex + Log2(pIn->bpp >> 3); - } - else - { - - pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex]; - } - - if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX) - { - pOut->blockWidth = m_blockWidth[pOut->equationIndex]; - - pOut->blockHeight = m_blockHeight[pOut->equationIndex]; - - pOut->blockSlices = m_blockSlices[pOut->equationIndex]; - } - } - else - { - pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX; - } - - return retCode; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeMipLevel -* @brief -* Compute MipLevel info (including level 0) -* @return -* TRUE if HWL's handled -**************************************************************************************************** -*/ -BOOL_32 SiLib::HwlComputeMipLevel( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure - ) const -{ - // basePitch is calculated from level 0 so we only check this for mipLevel > 0 - if (pIn->mipLevel > 0) - { - // Note: Don't check expand 3x formats(96 bit) as the basePitch is not pow2 even if - // we explicity set pow2Pad flag. The 3x base pitch is padded to pow2 but after being - // divided by expandX factor (3) - to program texture pitch, the basePitch is never pow2. - if (ElemLib::IsExpand3x(pIn->format) == FALSE) - { - // Sublevel pitches are generated from base level pitch instead of width on SI - // If pow2Pad is 0, we don't assert - as this is not really used for a mip chain - ADDR_ASSERT((pIn->flags.pow2Pad == FALSE) || - ((pIn->basePitch != 0) && IsPow2(pIn->basePitch))); - } - - if (pIn->basePitch != 0) - { - pIn->width = Max(1u, pIn->basePitch >> pIn->mipLevel); - } - } - - // pow2Pad is done in PostComputeMipLevel - - return TRUE; -} - -/** -**************************************************************************************************** -* SiLib::HwlCheckLastMacroTiledLvl -* -* @brief -* Sets pOut->last2DLevel to TRUE if it is -* @note -* -**************************************************************************************************** -*/ -VOID SiLib::HwlCheckLastMacroTiledLvl( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] Output structure (used as input, too) - ) const -{ - // pow2Pad covers all mipmap cases - if (pIn->flags.pow2Pad) - { - ADDR_ASSERT(IsMacroTiled(pIn->tileMode)); - - UINT_32 nextPitch; - UINT_32 nextHeight; - UINT_32 nextSlices; - - AddrTileMode nextTileMode; - - if (pIn->mipLevel == 0 || pIn->basePitch == 0) - { - // Base level or fail-safe case (basePitch == 0) - nextPitch = pOut->pitch >> 1; - } - else - { - // Sub levels - nextPitch = pIn->basePitch >> (pIn->mipLevel + 1); - } - - // nextHeight must be shifted from this level's original height rather than a pow2 padded - // one but this requires original height stored somewhere (pOut->height) - ADDR_ASSERT(pOut->height != 0); - - // next level's height is just current level's >> 1 in pixels - nextHeight = pOut->height >> 1; - // Special format such as FMT_1 and FMT_32_32_32 can be linear only so we consider block - // compressed foramts - if (ElemLib::IsBlockCompressed(pIn->format)) - { - nextHeight = (nextHeight + 3) / 4; - } - nextHeight = NextPow2(nextHeight); - - // nextSlices may be 0 if this level's is 1 - if (pIn->flags.volume) - { - nextSlices = Max(1u, pIn->numSlices >> 1); - } - else - { - nextSlices = pIn->numSlices; - } - - nextTileMode = ComputeSurfaceMipLevelTileMode(pIn->tileMode, - pIn->bpp, - nextPitch, - nextHeight, - nextSlices, - pIn->numSamples, - pOut->blockWidth, - pOut->blockHeight, - pOut->pTileInfo); - - pOut->last2DLevel = IsMicroTiled(nextTileMode); - } -} - -/** -**************************************************************************************************** -* SiLib::HwlDegradeThickTileMode -* -* @brief -* Degrades valid tile mode for thick modes if needed -* -* @return -* Suitable tile mode -**************************************************************************************************** -*/ -AddrTileMode SiLib::HwlDegradeThickTileMode( - AddrTileMode baseTileMode, ///< base tile mode - UINT_32 numSlices, ///< current number of slices - UINT_32* pBytesPerTile ///< [in,out] pointer to bytes per slice - ) const -{ - return EgBasedLib::HwlDegradeThickTileMode(baseTileMode, numSlices, pBytesPerTile); -} - -/** -**************************************************************************************************** -* SiLib::HwlTileInfoEqual -* -* @brief -* Return TRUE if all field are equal -* @note -* Only takes care of current HWL's data -**************************************************************************************************** -*/ -BOOL_32 SiLib::HwlTileInfoEqual( - const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand - const ADDR_TILEINFO* pRight ///<[in] Right compare operand - ) const -{ - BOOL_32 equal = FALSE; - - if (pLeft->pipeConfig == pRight->pipeConfig) - { - equal = EgBasedLib::HwlTileInfoEqual(pLeft, pRight); - } - - return equal; -} - -/** -**************************************************************************************************** -* SiLib::GetTileSettings -* -* @brief -* Get tile setting infos by index. -* @return -* Tile setting info. -**************************************************************************************************** -*/ -const TileConfig* SiLib::GetTileSetting( - UINT_32 index ///< [in] Tile index - ) const -{ - ADDR_ASSERT(index < m_noOfEntries); - return &m_tileTable[index]; -} - -/** -**************************************************************************************************** -* SiLib::HwlPostCheckTileIndex -* -* @brief -* Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches -* tile mode/type/info and change the index if needed -* @return -* Tile index. -**************************************************************************************************** -*/ -INT_32 SiLib::HwlPostCheckTileIndex( - const ADDR_TILEINFO* pInfo, ///< [in] Tile Info - AddrTileMode mode, ///< [in] Tile mode - AddrTileType type, ///< [in] Tile type - INT curIndex ///< [in] Current index assigned in HwlSetupTileInfo - ) const -{ - INT_32 index = curIndex; - - if (mode == ADDR_TM_LINEAR_GENERAL) - { - index = TileIndexLinearGeneral; - } - else - { - BOOL_32 macroTiled = IsMacroTiled(mode); - - // We need to find a new index if either of them is true - // 1. curIndex is invalid - // 2. tile mode is changed - // 3. tile info does not match for macro tiled - if ((index == TileIndexInvalid || - (mode != m_tileTable[index].mode) || - (macroTiled && (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) == FALSE)))) - { - for (index = 0; index < static_cast(m_noOfEntries); index++) - { - if (macroTiled) - { - // macro tile modes need all to match - if (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) && - (mode == m_tileTable[index].mode) && - (type == m_tileTable[index].type)) - { - break; - } - } - else if (mode == ADDR_TM_LINEAR_ALIGNED) - { - // linear mode only needs tile mode to match - if (mode == m_tileTable[index].mode) - { - break; - } - } - else - { - // micro tile modes only need tile mode and tile type to match - if (mode == m_tileTable[index].mode && - type == m_tileTable[index].type) - { - break; - } - } - } - } - } - - ADDR_ASSERT(index < static_cast(m_noOfEntries)); - - if (index >= static_cast(m_noOfEntries)) - { - index = TileIndexInvalid; - } - - return index; -} - -/** -**************************************************************************************************** -* SiLib::HwlSetupTileCfg -* -* @brief -* Map tile index to tile setting. -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE SiLib::HwlSetupTileCfg( - UINT_32 bpp, ///< Bits per pixel - INT_32 index, ///< Tile index - INT_32 macroModeIndex, ///< Index in macro tile mode table(CI) - ADDR_TILEINFO* pInfo, ///< [out] Tile Info - AddrTileMode* pMode, ///< [out] Tile mode - AddrTileType* pType ///< [out] Tile type - ) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - // Global flag to control usage of tileIndex - if (UseTileIndex(index)) - { - if (index == TileIndexLinearGeneral) - { - if (pMode) - { - *pMode = ADDR_TM_LINEAR_GENERAL; - } - - if (pType) - { - *pType = ADDR_DISPLAYABLE; - } - - if (pInfo) - { - pInfo->banks = 2; - pInfo->bankWidth = 1; - pInfo->bankHeight = 1; - pInfo->macroAspectRatio = 1; - pInfo->tileSplitBytes = 64; - pInfo->pipeConfig = ADDR_PIPECFG_P2; - } - } - else if (static_cast(index) >= m_noOfEntries) - { - returnCode = ADDR_INVALIDPARAMS; - } - else - { - const TileConfig* pCfgTable = GetTileSetting(index); - - if (pInfo) - { - *pInfo = pCfgTable->info; - } - else - { - if (IsMacroTiled(pCfgTable->mode)) - { - returnCode = ADDR_INVALIDPARAMS; - } - } - - if (pMode) - { - *pMode = pCfgTable->mode; - } - - if (pType) - { - *pType = pCfgTable->type; - } - } - } - - return returnCode; -} - -/** -**************************************************************************************************** -* SiLib::ReadGbTileMode -* -* @brief -* Convert GB_TILE_MODE HW value to TileConfig. -* @return -* NA. -**************************************************************************************************** -*/ -VOID SiLib::ReadGbTileMode( - UINT_32 regValue, ///< [in] GB_TILE_MODE register - TileConfig* pCfg ///< [out] output structure - ) const -{ - GB_TILE_MODE gbTileMode; - gbTileMode.val = regValue; - - pCfg->type = static_cast(gbTileMode.f.micro_tile_mode); - pCfg->info.bankHeight = 1 << gbTileMode.f.bank_height; - pCfg->info.bankWidth = 1 << gbTileMode.f.bank_width; - pCfg->info.banks = 1 << (gbTileMode.f.num_banks + 1); - pCfg->info.macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect; - pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split; - pCfg->info.pipeConfig = static_cast(gbTileMode.f.pipe_config + 1); - - UINT_32 regArrayMode = gbTileMode.f.array_mode; - - pCfg->mode = static_cast(regArrayMode); - - if (regArrayMode == 8) //ARRAY_2D_TILED_XTHICK - { - pCfg->mode = ADDR_TM_2D_TILED_XTHICK; - } - else if (regArrayMode >= 14) //ARRAY_3D_TILED_XTHICK - { - pCfg->mode = static_cast(pCfg->mode + 3); - } -} - -/** -**************************************************************************************************** -* SiLib::InitTileSettingTable -* -* @brief -* Initialize the ADDR_TILE_CONFIG table. -* @return -* TRUE if tile table is correctly initialized -**************************************************************************************************** -*/ -BOOL_32 SiLib::InitTileSettingTable( - const UINT_32* pCfg, ///< [in] Pointer to table of tile configs - UINT_32 noOfEntries ///< [in] Numbe of entries in the table above - ) -{ - BOOL_32 initOk = TRUE; - - ADDR_ASSERT(noOfEntries <= TileTableSize); - - memset(m_tileTable, 0, sizeof(m_tileTable)); - - if (noOfEntries != 0) - { - m_noOfEntries = noOfEntries; - } - else - { - m_noOfEntries = TileTableSize; - } - - if (pCfg) // From Client - { - for (UINT_32 i = 0; i < m_noOfEntries; i++) - { - ReadGbTileMode(*(pCfg + i), &m_tileTable[i]); - } - } - else - { - ADDR_ASSERT_ALWAYS(); - initOk = FALSE; - } - - if (initOk) - { - ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED); - } - - return initOk; -} - -/** -**************************************************************************************************** -* SiLib::HwlGetTileIndex -* -* @brief -* Return the virtual/real index for given mode/type/info -* @return -* ADDR_OK if successful. -**************************************************************************************************** -*/ -ADDR_E_RETURNCODE SiLib::HwlGetTileIndex( - const ADDR_GET_TILEINDEX_INPUT* pIn, - ADDR_GET_TILEINDEX_OUTPUT* pOut) const -{ - ADDR_E_RETURNCODE returnCode = ADDR_OK; - - pOut->index = HwlPostCheckTileIndex(pIn->pTileInfo, pIn->tileMode, pIn->tileType); - - return returnCode; -} - -/** -**************************************************************************************************** -* SiLib::HwlFmaskPreThunkSurfInfo -* -* @brief -* Some preparation before thunking a ComputeSurfaceInfo call for Fmask -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -VOID SiLib::HwlFmaskPreThunkSurfInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, ///< [in] Input of fmask info - const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, ///< [in] Output of fmask info - ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, ///< [out] Input of thunked surface info - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut ///< [out] Output of thunked surface info - ) const -{ - pSurfIn->tileIndex = pFmaskIn->tileIndex; -} - -/** -**************************************************************************************************** -* SiLib::HwlFmaskPostThunkSurfInfo -* -* @brief -* Copy hwl extra field after calling thunked ComputeSurfaceInfo -* @return -* ADDR_E_RETURNCODE -**************************************************************************************************** -*/ -VOID SiLib::HwlFmaskPostThunkSurfInfo( - const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, ///< [in] Output of surface info - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut ///< [out] Output of fmask info - ) const -{ - pFmaskOut->macroModeIndex = TileIndexInvalid; - pFmaskOut->tileIndex = pSurfOut->tileIndex; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeFmaskBits -* @brief -* Computes fmask bits -* @return -* Fmask bits -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlComputeFmaskBits( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, - UINT_32* pNumSamples - ) const -{ - UINT_32 numSamples = pIn->numSamples; - UINT_32 numFrags = GetNumFragments(numSamples, pIn->numFrags); - UINT_32 bpp; - - if (numFrags != numSamples) // EQAA - { - ADDR_ASSERT(numFrags <= 8); - - if (pIn->resolved == FALSE) - { - if (numFrags == 1) - { - bpp = 1; - numSamples = numSamples == 16 ? 16 : 8; - } - else if (numFrags == 2) - { - ADDR_ASSERT(numSamples >= 4); - - bpp = 2; - numSamples = numSamples; - } - else if (numFrags == 4) - { - ADDR_ASSERT(numSamples >= 4); - - bpp = 4; - numSamples = numSamples; - } - else // numFrags == 8 - { - ADDR_ASSERT(numSamples == 16); - - bpp = 4; - numSamples = numSamples; - } - } - else - { - if (numFrags == 1) - { - bpp = (numSamples == 16) ? 16 : 8; - numSamples = 1; - } - else if (numFrags == 2) - { - ADDR_ASSERT(numSamples >= 4); - - bpp = numSamples*2; - numSamples = 1; - } - else if (numFrags == 4) - { - ADDR_ASSERT(numSamples >= 4); - - bpp = numSamples*4; - numSamples = 1; - } - else // numFrags == 8 - { - ADDR_ASSERT(numSamples >= 16); - - bpp = 16*4; - numSamples = 1; - } - } - } - else // Normal AA - { - if (pIn->resolved == FALSE) - { - bpp = ComputeFmaskNumPlanesFromNumSamples(numSamples); - numSamples = numSamples == 2 ? 8 : numSamples; - } - else - { - // The same as 8XX - bpp = ComputeFmaskResolvedBppFromNumSamples(numSamples); - numSamples = 1; // 1x sample - } - } - - SafeAssign(pNumSamples, numSamples); - - return bpp; -} - -/** -**************************************************************************************************** -* SiLib::HwlOptimizeTileMode -* -* @brief -* Optimize tile mode on SI -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID SiLib::HwlOptimizeTileMode( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure - ) const -{ - AddrTileMode tileMode = pInOut->tileMode; - - if ((pInOut->flags.needEquation == TRUE) && - (IsMacroTiled(tileMode) == TRUE) && - (pInOut->numSamples <= 1)) - { - UINT_32 thickness = Thickness(tileMode); - - if (thickness > 1) - { - tileMode = ADDR_TM_1D_TILED_THICK; - } - else if (pInOut->numSlices > 1) - { - tileMode = ADDR_TM_1D_TILED_THIN1; - } - else - { - tileMode = ADDR_TM_2D_TILED_THIN1; - } - } - - if (tileMode != pInOut->tileMode) - { - pInOut->tileMode = tileMode; - } -} - -/** -**************************************************************************************************** -* SiLib::HwlOverrideTileMode -* -* @brief -* Override tile modes (for PRT only, avoid client passes in an invalid PRT mode for SI. -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID SiLib::HwlOverrideTileMode( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure - ) const -{ - AddrTileMode tileMode = pInOut->tileMode; - - switch (tileMode) - { - case ADDR_TM_PRT_TILED_THIN1: - tileMode = ADDR_TM_2D_TILED_THIN1; - break; - - case ADDR_TM_PRT_TILED_THICK: - tileMode = ADDR_TM_2D_TILED_THICK; - break; - - case ADDR_TM_PRT_2D_TILED_THICK: - tileMode = ADDR_TM_2D_TILED_THICK; - break; - - case ADDR_TM_PRT_3D_TILED_THICK: - tileMode = ADDR_TM_3D_TILED_THICK; - break; - - default: - break; - } - - if (tileMode != pInOut->tileMode) - { - pInOut->tileMode = tileMode; - // Only PRT tile modes are overridden for now. Revisit this once new modes are added above. - pInOut->flags.prt = TRUE; - } -} - -/** -**************************************************************************************************** -* SiLib::HwlSetPrtTileMode -* -* @brief -* Set prt tile modes. -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID SiLib::HwlSetPrtTileMode( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure - ) const -{ - pInOut->tileMode = ADDR_TM_2D_TILED_THIN1; - pInOut->tileType = (pInOut->tileType == ADDR_DEPTH_SAMPLE_ORDER) ? - ADDR_DEPTH_SAMPLE_ORDER : ADDR_NON_DISPLAYABLE; - pInOut->flags.prt = TRUE; -} - -/** -**************************************************************************************************** -* SiLib::HwlSelectTileMode -* -* @brief -* Select tile modes. -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID SiLib::HwlSelectTileMode( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure - ) const -{ - AddrTileMode tileMode; - AddrTileType tileType; - - if (pInOut->flags.volume) - { - if (pInOut->numSlices >= 8) - { - tileMode = ADDR_TM_2D_TILED_XTHICK; - } - else if (pInOut->numSlices >= 4) - { - tileMode = ADDR_TM_2D_TILED_THICK; - } - else - { - tileMode = ADDR_TM_2D_TILED_THIN1; - } - tileType = ADDR_NON_DISPLAYABLE; - } - else - { - tileMode = ADDR_TM_2D_TILED_THIN1; - - if (pInOut->flags.depth || pInOut->flags.stencil) - { - tileType = ADDR_DEPTH_SAMPLE_ORDER; - } - else if ((pInOut->bpp <= 32) || - (pInOut->flags.display == TRUE) || - (pInOut->flags.overlay == TRUE)) - { - tileType = ADDR_DISPLAYABLE; - } - else - { - tileType = ADDR_NON_DISPLAYABLE; - } - } - - if (pInOut->flags.prt) - { - tileMode = ADDR_TM_2D_TILED_THIN1; - tileType = (tileType == ADDR_DISPLAYABLE) ? ADDR_NON_DISPLAYABLE : tileType; - } - - pInOut->tileMode = tileMode; - pInOut->tileType = tileType; - - // Optimize tile mode if possible - pInOut->flags.opt4Space = TRUE; - - // Optimize tile mode if possible - OptimizeTileMode(pInOut); - - HwlOverrideTileMode(pInOut); -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeMaxBaseAlignments -* -* @brief -* Gets maximum alignments -* @return -* maximum alignments -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlComputeMaxBaseAlignments() const -{ - const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info); - - // Initial size is 64 KiB for PRT. - UINT_32 maxBaseAlign = 64 * 1024; - - for (UINT_32 i = 0; i < m_noOfEntries; i++) - { - if ((IsMacroTiled(m_tileTable[i].mode) == TRUE) && - (IsPrtTileMode(m_tileTable[i].mode) == FALSE)) - { - // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice. - UINT_32 tileSize = Min(m_tileTable[i].info.tileSplitBytes, - MicroTilePixels * 8 * 16); - - UINT_32 baseAlign = tileSize * pipes * m_tileTable[i].info.banks * - m_tileTable[i].info.bankWidth * m_tileTable[i].info.bankHeight; - - if (baseAlign > maxBaseAlign) - { - maxBaseAlign = baseAlign; - } - } - } - - return maxBaseAlign; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeMaxMetaBaseAlignments -* -* @brief -* Gets maximum alignments for metadata -* @return -* maximum alignments for metadata -**************************************************************************************************** -*/ -UINT_32 SiLib::HwlComputeMaxMetaBaseAlignments() const -{ - UINT_32 maxPipe = 1; - - for (UINT_32 i = 0; i < m_noOfEntries; i++) - { - maxPipe = Max(maxPipe, HwlGetPipes(&m_tileTable[i].info)); - } - - return m_pipeInterleaveBytes * maxPipe; -} - -/** -**************************************************************************************************** -* SiLib::HwlComputeSurfaceAlignmentsMacroTiled -* -* @brief -* Hardware layer function to compute alignment request for macro tile mode -* -* @return -* N/A -* -**************************************************************************************************** -*/ -VOID SiLib::HwlComputeSurfaceAlignmentsMacroTiled( - AddrTileMode tileMode, ///< [in] tile mode - UINT_32 bpp, ///< [in] bits per pixel - ADDR_SURFACE_FLAGS flags, ///< [in] surface flags - UINT_32 mipLevel, ///< [in] mip level - UINT_32 numSamples, ///< [in] number of samples - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] Surface output - ) const -{ - if ((mipLevel == 0) && (flags.prt)) - { - UINT_32 macroTileSize = pOut->blockWidth * pOut->blockHeight * numSamples * bpp / 8; - - if (macroTileSize < PrtTileSize) - { - UINT_32 numMacroTiles = PrtTileSize / macroTileSize; - - ADDR_ASSERT((PrtTileSize % macroTileSize) == 0); - - pOut->pitchAlign *= numMacroTiles; - pOut->baseAlign *= numMacroTiles; - } - } -} - -/** -**************************************************************************************************** -* SiLib::InitEquationTable -* -* @brief -* Initialize Equation table. -* -* @return -* N/A -**************************************************************************************************** -*/ -VOID SiLib::InitEquationTable() -{ - ADDR_EQUATION_KEY equationKeyTable[EquationTableSize]; - memset(equationKeyTable, 0, sizeof(equationKeyTable)); - - memset(m_equationTable, 0, sizeof(m_equationTable)); - - memset(m_blockWidth, 0, sizeof(m_blockWidth)); - - memset(m_blockHeight, 0, sizeof(m_blockHeight)); - - memset(m_blockSlices, 0, sizeof(m_blockSlices)); - - // Loop all possible bpp - for (UINT_32 log2ElementBytes = 0; log2ElementBytes < MaxNumElementBytes; log2ElementBytes++) - { - // Get bits per pixel - UINT_32 bpp = 1 << (log2ElementBytes + 3); - - // Loop all possible tile index - for (INT_32 tileIndex = 0; tileIndex < static_cast(m_noOfEntries); tileIndex++) - { - UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; - - TileConfig tileConfig = m_tileTable[tileIndex]; - - ADDR_SURFACE_FLAGS flags = {{0}}; - - // Compute tile info, hardcode numSamples to 1 because MSAA is not supported - // in swizzle pattern equation - HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL); - - // Check if the input is supported - if (IsEquationSupported(bpp, tileConfig, tileIndex, log2ElementBytes) == TRUE) - { - ADDR_EQUATION_KEY key = {{0}}; - - // Generate swizzle equation key from bpp and tile config - key.fields.log2ElementBytes = log2ElementBytes; - key.fields.tileMode = tileConfig.mode; - // Treat depth micro tile type and non-display micro tile type as the same key - // because they have the same equation actually - key.fields.microTileType = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ? - ADDR_NON_DISPLAYABLE : tileConfig.type; - key.fields.pipeConfig = tileConfig.info.pipeConfig; - key.fields.numBanksLog2 = Log2(tileConfig.info.banks); - key.fields.bankWidth = tileConfig.info.bankWidth; - key.fields.bankHeight = tileConfig.info.bankHeight; - key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio; - key.fields.prt = ((m_chipFamily == ADDR_CHIP_FAMILY_SI) && - ((1 << tileIndex) & SiPrtTileIndexMask)) ? 1 : 0; - - // Find in the table if the equation has been built based on the key - for (UINT_32 i = 0; i < m_numEquations; i++) - { - if (key.value == equationKeyTable[i].value) - { - equationIndex = i; - break; - } - } - - // If found, just fill the index into the lookup table and no need - // to generate the equation again. Otherwise, generate the equation. - if (equationIndex == ADDR_INVALID_EQUATION_INDEX) - { - ADDR_EQUATION equation; - ADDR_E_RETURNCODE retCode; - - memset(&equation, 0, sizeof(ADDR_EQUATION)); - - // Generate the equation - if (IsMicroTiled(tileConfig.mode)) - { - retCode = ComputeMicroTileEquation(log2ElementBytes, - tileConfig.mode, - tileConfig.type, - &equation); - } - else - { - retCode = ComputeMacroTileEquation(log2ElementBytes, - tileConfig.mode, - tileConfig.type, - &tileConfig.info, - &equation); - } - // Only fill the equation into the table if the return code is ADDR_OK, - // otherwise if the return code is not ADDR_OK, it indicates this is not - // a valid input, we do nothing but just fill invalid equation index - // into the lookup table. - if (retCode == ADDR_OK) - { - equationIndex = m_numEquations; - ADDR_ASSERT(equationIndex < EquationTableSize); - - m_blockSlices[equationIndex] = Thickness(tileConfig.mode); - - if (IsMicroTiled(tileConfig.mode)) - { - m_blockWidth[equationIndex] = MicroTileWidth; - m_blockHeight[equationIndex] = MicroTileHeight; - } - else - { - const ADDR_TILEINFO* pTileInfo = &tileConfig.info; - - m_blockWidth[equationIndex] = - HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth * - pTileInfo->macroAspectRatio; - m_blockHeight[equationIndex] = - MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / - pTileInfo->macroAspectRatio; - - if (key.fields.prt) - { - UINT_32 macroTileSize = - m_blockWidth[equationIndex] * m_blockHeight[equationIndex] * - bpp / 8; - - if (macroTileSize < PrtTileSize) - { - UINT_32 numMacroTiles = PrtTileSize / macroTileSize; - - ADDR_ASSERT(macroTileSize == (1u << equation.numBits)); - ADDR_ASSERT((PrtTileSize % macroTileSize) == 0); - - UINT_32 numBits = Log2(numMacroTiles); - - UINT_32 xStart = Log2(m_blockWidth[equationIndex]) + - log2ElementBytes; - - m_blockWidth[equationIndex] *= numMacroTiles; - - for (UINT_32 i = 0; i < numBits; i++) - { - equation.addr[equation.numBits + i].valid = 1; - equation.addr[equation.numBits + i].index = xStart + i; - } - - equation.numBits += numBits; - } - } - } - - equationKeyTable[equationIndex] = key; - m_equationTable[equationIndex] = equation; - - m_numEquations++; - } - } - } - - // Fill the index into the lookup table, if the combination is not supported - // fill the invalid equation index - m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex; - } - - if (m_chipFamily == ADDR_CHIP_FAMILY_SI) - { - // For tile index 3 which is shared between PRT depth and uncompressed depth - m_uncompressDepthEqIndex = m_numEquations; - - for (UINT_32 log2ElemBytes = 0; log2ElemBytes < MaxNumElementBytes; log2ElemBytes++) - { - TileConfig tileConfig = m_tileTable[3]; - ADDR_EQUATION equation; - ADDR_E_RETURNCODE retCode; - - memset(&equation, 0, sizeof(ADDR_EQUATION)); - - retCode = ComputeMacroTileEquation(log2ElemBytes, - tileConfig.mode, - tileConfig.type, - &tileConfig.info, - &equation); - - if (retCode == ADDR_OK) - { - UINT_32 equationIndex = m_numEquations; - ADDR_ASSERT(equationIndex < EquationTableSize); - - m_blockSlices[equationIndex] = 1; - - const ADDR_TILEINFO* pTileInfo = &tileConfig.info; - - m_blockWidth[equationIndex] = - HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth * - pTileInfo->macroAspectRatio; - m_blockHeight[equationIndex] = - MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / - pTileInfo->macroAspectRatio; - - m_equationTable[equationIndex] = equation; - - m_numEquations++; - } - } - } - } -} - -/** -**************************************************************************************************** -* SiLib::IsEquationSupported -* -* @brief -* Check if it is supported for given bpp and tile config to generate a equation. -* -* @return -* TRUE if supported -**************************************************************************************************** -*/ -BOOL_32 SiLib::IsEquationSupported( - UINT_32 bpp, ///< Bits per pixel - TileConfig tileConfig, ///< Tile config - INT_32 tileIndex, ///< Tile index - UINT_32 elementBytesLog2 ///< Log2 of element bytes - ) const -{ - BOOL_32 supported = TRUE; - - // Linear tile mode is not supported in swizzle pattern equation - if (IsLinear(tileConfig.mode)) - { - supported = FALSE; - } - // These tile modes are for Tex2DArray and Tex3D which has depth (num_slice > 1) use, - // which is not supported in swizzle pattern equation due to slice rotation - else if ((tileConfig.mode == ADDR_TM_2D_TILED_THICK) || - (tileConfig.mode == ADDR_TM_2D_TILED_XTHICK) || - (tileConfig.mode == ADDR_TM_3D_TILED_THIN1) || - (tileConfig.mode == ADDR_TM_3D_TILED_THICK) || - (tileConfig.mode == ADDR_TM_3D_TILED_XTHICK)) - { - supported = FALSE; - } - // Only 8bpp(stencil), 16bpp and 32bpp is supported for depth - else if ((tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) && (bpp > 32)) - { - supported = FALSE; - } - // Tile split is not supported in swizzle pattern equation - else if (IsMacroTiled(tileConfig.mode)) - { - UINT_32 thickness = Thickness(tileConfig.mode); - if (((bpp >> 3) * MicroTilePixels * thickness) > tileConfig.info.tileSplitBytes) - { - supported = FALSE; - } - - if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI)) - { - supported = m_EquationSupport[tileIndex][elementBytesLog2]; - } - } - - return supported; -} - -} // V1 -} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/r800/siaddrlib.h mesa-19.0.1/src/amd/addrlib/r800/siaddrlib.h --- mesa-18.3.3/src/amd/addrlib/r800/siaddrlib.h 2018-04-19 04:33:31.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/r800/siaddrlib.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,348 +0,0 @@ -/* - * Copyright © 2014 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS - * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - */ - -/** -**************************************************************************************************** -* @file siaddrlib.h -* @brief Contains the R800Lib class definition. -**************************************************************************************************** -*/ - -#ifndef __SI_ADDR_LIB_H__ -#define __SI_ADDR_LIB_H__ - -#include "addrlib1.h" -#include "egbaddrlib.h" - -namespace Addr -{ -namespace V1 -{ - -/** -**************************************************************************************************** -* @brief Describes the information in tile mode table -**************************************************************************************************** -*/ -struct TileConfig -{ - AddrTileMode mode; - AddrTileType type; - ADDR_TILEINFO info; -}; - -/** -**************************************************************************************************** -* @brief SI specific settings structure. -**************************************************************************************************** -*/ -struct SiChipSettings -{ - UINT_32 isSouthernIsland : 1; - UINT_32 isTahiti : 1; - UINT_32 isPitCairn : 1; - UINT_32 isCapeVerde : 1; - // Oland/Hainan are of GFXIP 6.0, similar with SI - UINT_32 isOland : 1; - UINT_32 isHainan : 1; - - // CI - UINT_32 isSeaIsland : 1; - UINT_32 isBonaire : 1; - UINT_32 isKaveri : 1; - UINT_32 isSpectre : 1; - UINT_32 isSpooky : 1; - UINT_32 isKalindi : 1; - // Hawaii is GFXIP 7.2 - UINT_32 isHawaii : 1; - - // VI - UINT_32 isVolcanicIslands : 1; - UINT_32 isIceland : 1; - UINT_32 isTonga : 1; - UINT_32 isFiji : 1; - UINT_32 isPolaris10 : 1; - UINT_32 isPolaris11 : 1; - UINT_32 isPolaris12 : 1; - UINT_32 isVegaM : 1; - // VI fusion - UINT_32 isCarrizo : 1; -}; - -/** -**************************************************************************************************** -* @brief This class is the SI specific address library -* function set. -**************************************************************************************************** -*/ -class SiLib : public EgBasedLib -{ -public: - /// Creates SiLib object - static Addr::Lib* CreateObj(const Client* pClient) - { - VOID* pMem = Object::ClientAlloc(sizeof(SiLib), pClient); - return (pMem != NULL) ? new (pMem) SiLib(pClient) : NULL; - } - -protected: - SiLib(const Client* pClient); - virtual ~SiLib(); - - // Hwl interface - defined in AddrLib1 - virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW( - const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, - ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const; - - virtual UINT_64 HwlComputeXmaskAddrFromCoord( - UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 numSlices, - UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, - ADDR_TILEINFO* pTileInfo, UINT_32* pBitPosition) const; - - virtual VOID HwlComputeXmaskCoordFromAddr( - UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices, - UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, - ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const; - - virtual ADDR_E_RETURNCODE HwlGetTileIndex( - const ADDR_GET_TILEINDEX_INPUT* pIn, - ADDR_GET_TILEINDEX_OUTPUT* pOut) const; - - virtual BOOL_32 HwlComputeMipLevel( - ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const; - - virtual ChipFamily HwlConvertChipFamily( - UINT_32 uChipFamily, UINT_32 uChipRevision); - - virtual BOOL_32 HwlInitGlobalParams( - const ADDR_CREATE_INPUT* pCreateIn); - - virtual ADDR_E_RETURNCODE HwlSetupTileCfg( - UINT_32 bpp, INT_32 index, INT_32 macroModeIndex, - ADDR_TILEINFO* pInfo, AddrTileMode* pMode = 0, AddrTileType* pType = 0) const; - - virtual VOID HwlComputeTileDataWidthAndHeightLinear( - UINT_32* pMacroWidth, UINT_32* pMacroHeight, - UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const; - - virtual UINT_64 HwlComputeHtileBytes( - UINT_32 pitch, UINT_32 height, UINT_32 bpp, - BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const; - - virtual ADDR_E_RETURNCODE ComputeBankEquation( - UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, - ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; - - virtual ADDR_E_RETURNCODE ComputePipeEquation( - UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, - ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; - - virtual UINT_32 ComputePipeFromCoord( - UINT_32 x, UINT_32 y, UINT_32 slice, - AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE, - ADDR_TILEINFO* pTileInfo) const; - - virtual UINT_32 HwlGetPipes(const ADDR_TILEINFO* pTileInfo) const; - - /// Pre-handler of 3x pitch (96 bit) adjustment - virtual UINT_32 HwlPreHandleBaseLvl3xPitch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; - /// Post-handler of 3x pitch adjustment - virtual UINT_32 HwlPostHandleBaseLvl3xPitch( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; - - /// Dummy function to finalize the inheritance - virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe( - UINT_32 pipe, UINT_32 x) const; - - // Sub-hwl interface - defined in EgBasedLib - virtual VOID HwlSetupTileInfo( - AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags, - UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, - ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo, - AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - virtual UINT_32 HwlGetPitchAlignmentMicroTiled( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const; - - virtual UINT_64 HwlGetSizeAdjustmentMicroTiled( - UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, - UINT_32 baseAlign, UINT_32 pitchAlign, - UINT_32 *pPitch, UINT_32 *pHeight) const; - - virtual VOID HwlCheckLastMacroTiledLvl( - const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - virtual BOOL_32 HwlTileInfoEqual( - const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const; - - virtual AddrTileMode HwlDegradeThickTileMode( - AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; - - virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; - - virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; - - virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; - - /// Overwrite tile setting to PRT - virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; - - virtual BOOL_32 HwlSanityCheckMacroTiled( - ADDR_TILEINFO* pTileInfo) const - { - return TRUE; - } - - virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const; - - virtual UINT_64 HwlGetSizeAdjustmentLinear( - AddrTileMode tileMode, - UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign, - UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const; - - virtual VOID HwlComputeSurfaceCoord2DFromBankPipe( - AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice, - UINT_32 bank, UINT_32 pipe, - UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices, - BOOL_32 ignoreSE, - ADDR_TILEINFO* pTileInfo) const; - - virtual UINT_32 HwlPreAdjustBank( - UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const; - - virtual INT_32 HwlPostCheckTileIndex( - const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type, - INT curIndex = TileIndexInvalid) const; - - virtual VOID HwlFmaskPreThunkSurfInfo( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, - const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, - ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, - ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const; - - virtual VOID HwlFmaskPostThunkSurfInfo( - const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, - ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const; - - virtual UINT_32 HwlComputeFmaskBits( - const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, - UINT_32* pNumSamples) const; - - virtual BOOL_32 HwlReduceBankWidthHeight( - UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, - UINT_32 bankHeightAlign, UINT_32 pipes, - ADDR_TILEINFO* pTileInfo) const - { - return TRUE; - } - - virtual UINT_32 HwlComputeMaxBaseAlignments() const; - - virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; - - virtual VOID HwlComputeSurfaceAlignmentsMacroTiled( - AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, - UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; - - // Get equation table pointer and number of equations - virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const - { - *ppEquationTable = m_equationTable; - - return m_numEquations; - } - - // Check if it is supported for given bpp and tile config to generate an equation - BOOL_32 IsEquationSupported( - UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex, UINT_32 elementBytesLog2) const; - - // Protected non-virtual functions - VOID ComputeTileCoordFromPipeAndElemIdx( - UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile, - UINT_32 x, UINT_32 y, UINT_32* pX, UINT_32* pY) const; - - UINT_32 TileCoordToMaskElementIndex( - UINT_32 tx, UINT_32 ty, AddrPipeCfg pipeConfig, - UINT_32 *macroShift, UINT_32 *elemIdxBits) const; - - BOOL_32 DecodeGbRegs( - const ADDR_REGISTER_VALUE* pRegValue); - - const TileConfig* GetTileSetting( - UINT_32 index) const; - - // Initialize equation table - VOID InitEquationTable(); - - UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const; - - static const UINT_32 TileTableSize = 32; - TileConfig m_tileTable[TileTableSize]; - UINT_32 m_noOfEntries; - - // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp) - static const UINT_32 MaxNumElementBytes = 5; - - static const BOOL_32 m_EquationSupport[TileTableSize][MaxNumElementBytes]; - - // Prt tile mode index mask - static const UINT_32 SiPrtTileIndexMask = ((1 << 3) | (1 << 5) | (1 << 6) | (1 << 7) | - (1 << 21) | (1 << 22) | (1 << 23) | (1 << 24) | - (1 << 25) | (1 << 30)); - - // More than half slots in tile mode table can't support equation - static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2; - // Equation table - ADDR_EQUATION m_equationTable[EquationTableSize]; - UINT_32 m_numMacroBits[EquationTableSize]; - UINT_32 m_blockWidth[EquationTableSize]; - UINT_32 m_blockHeight[EquationTableSize]; - UINT_32 m_blockSlices[EquationTableSize]; - // Number of equation entries in the table - UINT_32 m_numEquations; - // Equation lookup table according to bpp and tile index - UINT_32 m_equationLookupTable[MaxNumElementBytes][TileTableSize]; - - UINT_32 m_uncompressDepthEqIndex; - - SiChipSettings m_settings; - -private: - - VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const; - BOOL_32 InitTileSettingTable(const UINT_32 *pSetting, UINT_32 noOfEntries); -}; - -} // V1 -} // Addr - -#endif - diff -Nru mesa-18.3.3/src/amd/addrlib/src/addrinterface.cpp mesa-19.0.1/src/amd/addrlib/src/addrinterface.cpp --- mesa-18.3.3/src/amd/addrlib/src/addrinterface.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/addrinterface.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,1740 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrinterface.cpp +* @brief Contains the addrlib interface functions +**************************************************************************************************** +*/ +#include "addrinterface.h" +#include "addrlib1.h" +#include "addrlib2.h" + +#include "addrcommon.h" + +#include "util/macros.h" + +using namespace Addr; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Create/Destroy/Config functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* AddrCreate +* +* @brief +* Create address lib object +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrCreate( + const ADDR_CREATE_INPUT* pAddrCreateIn, ///< [in] infomation for creating address lib object + ADDR_CREATE_OUTPUT* pAddrCreateOut) ///< [out] address lib handle +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + { + returnCode = Lib::Create(pAddrCreateIn, pAddrCreateOut); + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrDestroy +* +* @brief +* Destroy address lib object +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrDestroy( + ADDR_HANDLE hLib) ///< address lib handle +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (hLib) + { + Lib* pLib = Lib::GetLib(hLib); + pLib->Destroy(); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* AddrComputeSurfaceInfo +* +* @brief +* Calculate surface width/height/depth/alignments and suitable tiling mode +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] surface information + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) ///< [out] surface parameters and alignments +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeSurfaceInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeSurfaceAddrFromCoord +* +* @brief +* Compute surface address according to coordinates +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] surface info and coordinates + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] surface address +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeSurfaceCoordFromAddr +* +* @brief +* Compute coordinates according to surface address +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] surface info and address + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) ///< [out] coordinates +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// HTile functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* AddrComputeHtileInfo +* +* @brief +* Compute Htile pitch, height, base alignment and size in bytes +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] Htile information + ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) ///< [out] Htile pitch, height and size in bytes +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeHtileInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeHtileAddrFromCoord +* +* @brief +* Compute Htile address according to coordinates (of depth buffer) +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] Htile info and coordinates + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Htile address +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeHtileCoordFromAddr +* +* @brief +* Compute coordinates within depth buffer (1st pixel of a micro tile) according to +* Htile address +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] Htile info and address + ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] Htile coordinates +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// C-mask functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* AddrComputeCmaskInfo +* +* @brief +* Compute Cmask pitch, height, base alignment and size in bytes from color buffer +* info +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] Cmask pitch and height + ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) ///< [out] Cmask pitch, height and size in bytes +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeCmaskInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeCmaskAddrFromCoord +* +* @brief +* Compute Cmask address according to coordinates (of MSAA color buffer) +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Cmask info and coordinates + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Cmask address +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeCmaskCoordFromAddr +* +* @brief +* Compute coordinates within color buffer (1st pixel of a micro tile) according to +* Cmask address +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Cmask info and address + ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Cmask coordinates +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// F-mask functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* AddrComputeFmaskInfo +* +* @brief +* Compute Fmask pitch/height/depth/alignments and size in bytes +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] Fmask information + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) ///< [out] Fmask pitch and height +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeFmaskInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeFmaskAddrFromCoord +* +* @brief +* Compute Fmask address according to coordinates (x,y,slice,sample,plane) +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Fmask info and coordinates + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Fmask address +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeFmaskCoordFromAddr +* +* @brief +* Compute coordinates (x,y,slice,sample,plane) according to Fmask address +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Fmask info and address + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Fmask coordinates +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// DCC key functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* AddrComputeDccInfo +* +* @brief +* Compute DCC key size, base alignment based on color surface size, tile info or tile index +* +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputeDccInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/////////////////////////////////////////////////////////////////////////////// +// Below functions are element related or helper functions +/////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* AddrGetVersion +* +* @brief +* Get AddrLib version number. Client may check this return value against ADDRLIB_VERSION +* defined in addrinterface.h to see if there is a mismatch. +**************************************************************************************************** +*/ +UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib) +{ + UINT_32 version = 0; + + Addr::Lib* pLib = Lib::GetLib(hLib); + + ADDR_ASSERT(pLib != NULL); + + if (pLib) + { + version = pLib->GetVersion(); + } + + return version; +} + +/** +**************************************************************************************************** +* AddrUseTileIndex +* +* @brief +* Return TRUE if tileIndex is enabled in this address library +**************************************************************************************************** +*/ +BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib) +{ + BOOL_32 useTileIndex = FALSE; + + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_ASSERT(pLib != NULL); + + if (pLib) + { + useTileIndex = pLib->UseTileIndex(0); + } + + return useTileIndex; +} + +/** +**************************************************************************************************** +* AddrUseCombinedSwizzle +* +* @brief +* Return TRUE if combined swizzle is enabled in this address library +**************************************************************************************************** +*/ +BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib) +{ + BOOL_32 useCombinedSwizzle = FALSE; + + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_ASSERT(pLib != NULL); + + if (pLib) + { + useCombinedSwizzle = pLib->UseCombinedSwizzle(); + } + + return useCombinedSwizzle; +} + +/** +**************************************************************************************************** +* AddrExtractBankPipeSwizzle +* +* @brief +* Extract Bank and Pipe swizzle from base256b +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle( + ADDR_HANDLE hLib, ///< addrlib handle + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) ///< [out] output structure +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ExtractBankPipeSwizzle(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrCombineBankPipeSwizzle +* +* @brief +* Combine Bank and Pipe swizzle +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle( + ADDR_HANDLE hLib, + const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->CombineBankPipeSwizzle(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeSliceSwizzle +* +* @brief +* Compute a swizzle for slice from a base swizzle +* @return +* ADDR_OK if no error +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputeSliceTileSwizzle(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputeBaseSwizzle +* +* @brief +* Return a Combined Bank and Pipe swizzle base on surface based on surface type/index +* @return +* ADDR_OK if no error +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle( + ADDR_HANDLE hLib, + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputeBaseSwizzle(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* ElemFlt32ToDepthPixel +* +* @brief +* Convert a FLT_32 value to a depth/stencil pixel value +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +* +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel( + ADDR_HANDLE hLib, ///< addrlib handle + const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, ///< [in] per-component value + ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) ///< [out] final pixel value +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + Lib* pLib = Lib::GetLib(hLib); + + if (pLib != NULL) + { + pLib->Flt32ToDepthPixel(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* ElemFlt32ToColorPixel +* +* @brief +* Convert a FLT_32 value to a red/green/blue/alpha pixel value +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +* +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel( + ADDR_HANDLE hLib, ///< addrlib handle + const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, ///< [in] format, surface number and swap value + ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) ///< [out] final pixel value +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + Lib* pLib = Lib::GetLib(hLib); + + if (pLib != NULL) + { + pLib->Flt32ToColorPixel(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* ElemGetExportNorm +* +* @brief +* Helper function to check one format can be EXPORT_NUM, +* which is a register CB_COLOR_INFO.SURFACE_FORMAT. +* FP16 can be reported as EXPORT_NORM for rv770 in r600 +* family +* +**************************************************************************************************** +*/ +BOOL_32 ADDR_API ElemGetExportNorm( + ADDR_HANDLE hLib, ///< addrlib handle + const ELEM_GETEXPORTNORM_INPUT* pIn) ///< [in] input structure +{ + Addr::Lib* pLib = Lib::GetLib(hLib); + BOOL_32 enabled = FALSE; + + MAYBE_UNUSED ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + enabled = pLib->GetExportNorm(pIn); + } + else + { + returnCode = ADDR_ERROR; + } + + ADDR_ASSERT(returnCode == ADDR_OK); + + return enabled; +} + +/** +**************************************************************************************************** +* ElemSize +* +* @brief +* Get bits-per-element for specified format +* +* @return +* Bits-per-element of specified format +* +**************************************************************************************************** +*/ +UINT_32 ADDR_API ElemSize( + ADDR_HANDLE hLib, + AddrFormat format) +{ + UINT_32 bpe = 0; + + Addr::Lib* pLib = Lib::GetLib(hLib); + + if (pLib != NULL) + { + bpe = pLib->GetBpe(format); + } + + return bpe; +} + +/** +**************************************************************************************************** +* AddrConvertTileInfoToHW +* +* @brief +* Convert tile info from real value to hardware register value +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] tile info with real value + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) ///< [out] tile info with HW register value +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ConvertTileInfoToHW(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrConvertTileIndex +* +* @brief +* Convert tile index to tile mode/type/info +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input - tile index + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) ///< [out] tile mode/type/info +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ConvertTileIndex(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrGetMacroModeIndex +* +* @brief +* Get macro mode index based on input parameters +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMacroModeIndex( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_GET_MACROMODEINDEX_INPUT* pIn, ///< [in] input + ADDR_GET_MACROMODEINDEX_OUTPUT* pOut) ///< [out] macro mode index +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode; + + if (pLib != NULL) + { + returnCode = pLib->GetMacroModeIndex(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrConvertTileIndex1 +* +* @brief +* Convert tile index to tile mode/type/info +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, ///< [in] input - tile index + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) ///< [out] tile mode/type/info +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ConvertTileIndex1(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrGetTileIndex +* +* @brief +* Get tile index from tile mode/type/info +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +* +* @note +* Only meaningful for SI (and above) +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex( + ADDR_HANDLE hLib, + const ADDR_GET_TILEINDEX_INPUT* pIn, + ADDR_GET_TILEINDEX_OUTPUT* pOut) +{ + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->GetTileIndex(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrComputePrtInfo +* +* @brief +* Interface function for ComputePrtInfo +* +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo( + ADDR_HANDLE hLib, + const ADDR_PRT_INFO_INPUT* pIn, + ADDR_PRT_INFO_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + V1::Lib* pLib = V1::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputePrtInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrGetMaxAlignments +* +* @brief +* Convert maximum alignments +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMaxAlignments( + ADDR_HANDLE hLib, ///< address lib handle + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure +{ + Addr::Lib* pLib = Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->GetMaxAlignments(pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* AddrGetMaxMetaAlignments +* +* @brief +* Convert maximum alignments for metadata +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API AddrGetMaxMetaAlignments( + ADDR_HANDLE hLib, ///< address lib handle + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) ///< [out] output structure +{ + Addr::Lib* pLib = Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->GetMaxMetaAlignments(pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface functions for Addr2 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Addr2ComputeSurfaceInfo +* +* @brief +* Calculate surface width/height/depth/alignments and suitable tiling mode +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceInfo( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] surface information + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) ///< [out] surface parameters and alignments +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeSurfaceInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeSurfaceAddrFromCoord +* +* @brief +* Compute surface address according to coordinates +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] surface info and coordinates + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] surface address +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeSurfaceCoordFromAddr +* +* @brief +* Compute coordinates according to surface address +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSurfaceCoordFromAddr( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] surface info and address + ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) ///< [out] coordinates +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// HTile functions for Addr2 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Addr2ComputeHtileInfo +* +* @brief +* Compute Htile pitch, height, base alignment and size in bytes +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileInfo( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] Htile information + ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) ///< [out] Htile pitch, height and size in bytes +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeHtileInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeHtileAddrFromCoord +* +* @brief +* Compute Htile address according to coordinates (of depth buffer) +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] Htile info and coordinates + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Htile address +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeHtileCoordFromAddr +* +* @brief +* Compute coordinates within depth buffer (1st pixel of a micro tile) according to +* Htile address +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeHtileCoordFromAddr( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] Htile info and address + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] Htile coordinates +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// C-mask functions for Addr2 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Addr2ComputeCmaskInfo +* +* @brief +* Compute Cmask pitch, height, base alignment and size in bytes from color buffer +* info +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskInfo( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] Cmask pitch and height + ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) ///< [out] Cmask pitch, height and size in bytes +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeCmaskInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeCmaskAddrFromCoord +* +* @brief +* Compute Cmask address according to coordinates (of MSAA color buffer) +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Cmask info and coordinates + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Cmask address +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeCmaskCoordFromAddr +* +* @brief +* Compute coordinates within color buffer (1st pixel of a micro tile) according to +* Cmask address +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeCmaskCoordFromAddr( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Cmask info and address + ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Cmask coordinates +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// F-mask functions for Addr2 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Addr2ComputeFmaskInfo +* +* @brief +* Compute Fmask pitch/height/depth/alignments and size in bytes +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskInfo( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] Fmask information + ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut) ///< [out] Fmask pitch and height +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeFmaskInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeFmaskAddrFromCoord +* +* @brief +* Compute Fmask address according to coordinates (x,y,slice,sample,plane) +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Fmask info and coordinates + ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Fmask address +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeFmaskCoordFromAddr +* +* @brief +* Compute coordinates (x,y,slice,sample,plane) according to Fmask address +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeFmaskCoordFromAddr( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Fmask info and address + ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Fmask coordinates +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// DCC key functions for Addr2 +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Addr2ComputeDccInfo +* +* @brief +* Compute DCC key size, base alignment based on color surface size, tile info or tile index +* +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccInfo( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input + ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputeDccInfo(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeDccAddrFromCoord +* +* @brief +* Compute DCC key address according to coordinates +* +* @return +* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeDccAddrFromCoord( + ADDR_HANDLE hLib, ///< address lib handle + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] Dcc info and coordinates + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Dcc address +{ + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pLib != NULL) + { + returnCode = pLib->ComputeDccAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputePipeBankXor +* +* @brief +* Calculate a valid bank pipe xor value for client to use. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputePipeBankXor( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input + ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputePipeBankXor(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeSlicePipeBankXor +* +* @brief +* Calculate slice pipe bank xor value based on base pipe bank xor and slice id. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSlicePipeBankXor( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input + ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputeSlicePipeBankXor(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2ComputeSubResourceOffsetForSwizzlePattern +* +* @brief +* Calculate sub resource offset for swizzle pattern. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2ComputeSubResourceOffsetForSwizzlePattern( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input + ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->ComputeSubResourceOffsetForSwizzlePattern(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2GetPreferredSurfaceSetting +* +* @brief +* Suggest a preferred setting for client driver to program HW register +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2GetPreferredSurfaceSetting( + ADDR_HANDLE hLib, ///< handle of addrlib + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) ///< [out] output +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + returnCode = pLib->Addr2GetPreferredSurfaceSetting(pIn, pOut); + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Addr2IsValidDisplaySwizzleMode +* +* @brief +* Return whether the swizzle mode is supported by DCE / DCN. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE ADDR_API Addr2IsValidDisplaySwizzleMode( + ADDR_HANDLE hLib, + AddrSwizzleMode swizzleMode, + UINT_32 bpp, + bool *result) +{ + ADDR_E_RETURNCODE returnCode; + + V2::Lib* pLib = V2::Lib::GetLib(hLib); + + if (pLib != NULL) + { + ADDR2_COMPUTE_SURFACE_INFO_INPUT in; + in.swizzleMode = swizzleMode; + in.bpp = bpp; + + *result = pLib->IsValidDisplaySwizzleMode(&in); + returnCode = ADDR_OK; + } + else + { + returnCode = ADDR_ERROR; + } + + return returnCode; +} diff -Nru mesa-18.3.3/src/amd/addrlib/src/amdgpu_asic_addr.h mesa-19.0.1/src/amd/addrlib/src/amdgpu_asic_addr.h --- mesa-18.3.3/src/amd/addrlib/src/amdgpu_asic_addr.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/amdgpu_asic_addr.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,137 @@ +/* + * Copyright © 2017-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +#ifndef _AMDGPU_ASIC_ADDR_H +#define _AMDGPU_ASIC_ADDR_H + +#define ATI_VENDOR_ID 0x1002 +#define AMD_VENDOR_ID 0x1022 + +// AMDGPU_VENDOR_IS_AMD(vendorId) +#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID)) + +#define FAMILY_UNKNOWN 0x00 +#define FAMILY_TN 0x69 +#define FAMILY_SI 0x6E +#define FAMILY_CI 0x78 +#define FAMILY_KV 0x7D +#define FAMILY_VI 0x82 +#define FAMILY_POLARIS 0x82 +#define FAMILY_CZ 0x87 +#define FAMILY_AI 0x8D +#define FAMILY_RV 0x8E + +// AMDGPU_FAMILY_IS(familyId, familyName) +#define FAMILY_IS(f, fn) (f == FAMILY_##fn) +#define FAMILY_IS_TN(f) FAMILY_IS(f, TN) +#define FAMILY_IS_SI(f) FAMILY_IS(f, SI) +#define FAMILY_IS_CI(f) FAMILY_IS(f, CI) +#define FAMILY_IS_KV(f) FAMILY_IS(f, KV) +#define FAMILY_IS_VI(f) FAMILY_IS(f, VI) +#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS) +#define FAMILY_IS_CZ(f) FAMILY_IS(f, CZ) +#define FAMILY_IS_AI(f) FAMILY_IS(f, AI) +#define FAMILY_IS_RV(f) FAMILY_IS(f, RV) + +#define AMDGPU_UNKNOWN 0xFF + +#define AMDGPU_TAHITI_RANGE 0x05, 0x14 +#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28 +#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C +#define AMDGPU_OLAND_RANGE 0x3C, 0x46 +#define AMDGPU_HAINAN_RANGE 0x46, 0xFF + +#define AMDGPU_BONAIRE_RANGE 0x14, 0x28 +#define AMDGPU_HAWAII_RANGE 0x28, 0x3C + +#define AMDGPU_SPECTRE_RANGE 0x01, 0x41 +#define AMDGPU_SPOOKY_RANGE 0x41, 0x81 +#define AMDGPU_KALINDI_RANGE 0x81, 0xA1 +#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF + +#define AMDGPU_ICELAND_RANGE 0x01, 0x14 +#define AMDGPU_TONGA_RANGE 0x14, 0x28 +#define AMDGPU_FIJI_RANGE 0x3C, 0x50 + +#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A +#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64 +#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E +#define AMDGPU_VEGAM_RANGE 0x6E, 0xFF + +#define AMDGPU_CARRIZO_RANGE 0x01, 0x21 +#define AMDGPU_BRISTOL_RANGE 0x10, 0x21 +#define AMDGPU_STONEY_RANGE 0x61, 0xFF + +#define AMDGPU_VEGA10_RANGE 0x01, 0x14 +#define AMDGPU_VEGA12_RANGE 0x14, 0x28 +#define AMDGPU_VEGA20_RANGE 0x28, 0xFF + +#define AMDGPU_RAVEN_RANGE 0x01, 0x81 +#define AMDGPU_RAVEN2_RANGE 0x81, 0xFF + +#define AMDGPU_EXPAND_FIX(x) x +#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max)) +#define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__)) + +// ASICREV_IS(eRevisionId, revisionName) +#define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE) +#define ASICREV_IS_TAHITI_P(r) ASICREV_IS(r, TAHITI) +#define ASICREV_IS_PITCAIRN_PM(r) ASICREV_IS(r, PITCAIRN) +#define ASICREV_IS_CAPEVERDE_M(r) ASICREV_IS(r, CAPEVERDE) +#define ASICREV_IS_OLAND_M(r) ASICREV_IS(r, OLAND) +#define ASICREV_IS_HAINAN_V(r) ASICREV_IS(r, HAINAN) + +#define ASICREV_IS_BONAIRE_M(r) ASICREV_IS(r, BONAIRE) +#define ASICREV_IS_HAWAII_P(r) ASICREV_IS(r, HAWAII) + +#define ASICREV_IS_SPECTRE(r) ASICREV_IS(r, SPECTRE) +#define ASICREV_IS_SPOOKY(r) ASICREV_IS(r, SPOOKY) +#define ASICREV_IS_KALINDI(r) ASICREV_IS(r, KALINDI) +#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI) + +#define ASICREV_IS_ICELAND_M(r) ASICREV_IS(r, ICELAND) +#define ASICREV_IS_TONGA_P(r) ASICREV_IS(r, TONGA) +#define ASICREV_IS_FIJI_P(r) ASICREV_IS(r, FIJI) + +#define ASICREV_IS_POLARIS10_P(r) ASICREV_IS(r, POLARIS10) +#define ASICREV_IS_POLARIS11_M(r) ASICREV_IS(r, POLARIS11) +#define ASICREV_IS_POLARIS12_V(r) ASICREV_IS(r, POLARIS12) +#define ASICREV_IS_VEGAM_P(r) ASICREV_IS(r, VEGAM) + +#define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO) +#define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL) +#define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY) + +#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10) +#define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10) +#define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12) +#define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12) +#define ASICREV_IS_VEGA20_P(r) ASICREV_IS(r, VEGA20) + +#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN) +#define ASICREV_IS_RAVEN2(r) ASICREV_IS(r, RAVEN2) + +#endif // _AMDGPU_ASIC_ADDR_H diff -Nru mesa-18.3.3/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h mesa-19.0.1/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h --- mesa-18.3.3/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/chip/gfx9/gfx9_gb_reg.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,89 @@ +#if !defined (__GFX9_GB_REG_H__) +#define __GFX9_GB_REG_H__ + +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +#include "util/u_endian.h" + +#if defined(PIPE_ARCH_LITTLE_ENDIAN) +#define LITTLEENDIAN_CPU +#elif defined(PIPE_ARCH_BIG_ENDIAN) +#define BIGENDIAN_CPU +#endif + +// +// Make sure the necessary endian defines are there. +// +#if defined(LITTLEENDIAN_CPU) +#elif defined(BIGENDIAN_CPU) +#else +#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" +#endif + +union GB_ADDR_CONFIG { + struct { +#if defined(LITTLEENDIAN_CPU) + unsigned int NUM_PIPES : 3; + unsigned int PIPE_INTERLEAVE_SIZE : 3; + unsigned int MAX_COMPRESSED_FRAGS : 2; + unsigned int BANK_INTERLEAVE_SIZE : 3; + unsigned int : 1; + unsigned int NUM_BANKS : 3; + unsigned int : 1; + unsigned int SHADER_ENGINE_TILE_SIZE : 3; + unsigned int NUM_SHADER_ENGINES : 2; + unsigned int NUM_GPUS : 3; + unsigned int MULTI_GPU_TILE_SIZE : 2; + unsigned int NUM_RB_PER_SE : 2; + unsigned int ROW_SIZE : 2; + unsigned int NUM_LOWER_PIPES : 1; + unsigned int SE_ENABLE : 1; +#elif defined(BIGENDIAN_CPU) + unsigned int SE_ENABLE : 1; + unsigned int NUM_LOWER_PIPES : 1; + unsigned int ROW_SIZE : 2; + unsigned int NUM_RB_PER_SE : 2; + unsigned int MULTI_GPU_TILE_SIZE : 2; + unsigned int NUM_GPUS : 3; + unsigned int NUM_SHADER_ENGINES : 2; + unsigned int SHADER_ENGINE_TILE_SIZE : 3; + unsigned int : 1; + unsigned int NUM_BANKS : 3; + unsigned int : 1; + unsigned int BANK_INTERLEAVE_SIZE : 3; + unsigned int MAX_COMPRESSED_FRAGS : 2; + unsigned int PIPE_INTERLEAVE_SIZE : 3; + unsigned int NUM_PIPES : 3; +#endif + } bitfields, bits; + unsigned int u32All; + signed int i32All; + float f32All; +}; + +#endif + diff -Nru mesa-18.3.3/src/amd/addrlib/src/chip/r800/si_gb_reg.h mesa-19.0.1/src/amd/addrlib/src/chip/r800/si_gb_reg.h --- mesa-18.3.3/src/amd/addrlib/src/chip/r800/si_gb_reg.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/chip/r800/si_gb_reg.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,163 @@ +#if !defined (__SI_GB_REG_H__) +#define __SI_GB_REG_H__ + +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +#include "util/u_endian.h" + +#if defined(PIPE_ARCH_LITTLE_ENDIAN) +#define LITTLEENDIAN_CPU +#elif defined(PIPE_ARCH_BIG_ENDIAN) +#define BIGENDIAN_CPU +#endif + +// +// Make sure the necessary endian defines are there. +// +#if defined(LITTLEENDIAN_CPU) +#elif defined(BIGENDIAN_CPU) +#else +#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined" +#endif + +/* + * GB_ADDR_CONFIG struct + */ + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _GB_ADDR_CONFIG_T { + unsigned int num_pipes : 3; + unsigned int : 1; + unsigned int pipe_interleave_size : 3; + unsigned int : 1; + unsigned int bank_interleave_size : 3; + unsigned int : 1; + unsigned int num_shader_engines : 2; + unsigned int : 2; + unsigned int shader_engine_tile_size : 3; + unsigned int : 1; + unsigned int num_gpus : 3; + unsigned int : 1; + unsigned int multi_gpu_tile_size : 2; + unsigned int : 2; + unsigned int row_size : 2; + unsigned int num_lower_pipes : 1; + unsigned int : 1; + } GB_ADDR_CONFIG_T; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _GB_ADDR_CONFIG_T { + unsigned int : 1; + unsigned int num_lower_pipes : 1; + unsigned int row_size : 2; + unsigned int : 2; + unsigned int multi_gpu_tile_size : 2; + unsigned int : 1; + unsigned int num_gpus : 3; + unsigned int : 1; + unsigned int shader_engine_tile_size : 3; + unsigned int : 2; + unsigned int num_shader_engines : 2; + unsigned int : 1; + unsigned int bank_interleave_size : 3; + unsigned int : 1; + unsigned int pipe_interleave_size : 3; + unsigned int : 1; + unsigned int num_pipes : 3; + } GB_ADDR_CONFIG_T; + +#endif + +typedef union { + unsigned int val : 32; + GB_ADDR_CONFIG_T f; +} GB_ADDR_CONFIG; + +#if defined(LITTLEENDIAN_CPU) + + typedef struct _GB_TILE_MODE_T { + unsigned int micro_tile_mode : 2; + unsigned int array_mode : 4; + unsigned int pipe_config : 5; + unsigned int tile_split : 3; + unsigned int bank_width : 2; + unsigned int bank_height : 2; + unsigned int macro_tile_aspect : 2; + unsigned int num_banks : 2; + unsigned int micro_tile_mode_new : 3; + unsigned int sample_split : 2; + unsigned int : 5; + } GB_TILE_MODE_T; + + typedef struct _GB_MACROTILE_MODE_T { + unsigned int bank_width : 2; + unsigned int bank_height : 2; + unsigned int macro_tile_aspect : 2; + unsigned int num_banks : 2; + unsigned int : 24; + } GB_MACROTILE_MODE_T; + +#elif defined(BIGENDIAN_CPU) + + typedef struct _GB_TILE_MODE_T { + unsigned int : 5; + unsigned int sample_split : 2; + unsigned int micro_tile_mode_new : 3; + unsigned int num_banks : 2; + unsigned int macro_tile_aspect : 2; + unsigned int bank_height : 2; + unsigned int bank_width : 2; + unsigned int tile_split : 3; + unsigned int pipe_config : 5; + unsigned int array_mode : 4; + unsigned int micro_tile_mode : 2; + } GB_TILE_MODE_T; + + typedef struct _GB_MACROTILE_MODE_T { + unsigned int : 24; + unsigned int num_banks : 2; + unsigned int macro_tile_aspect : 2; + unsigned int bank_height : 2; + unsigned int bank_width : 2; + } GB_MACROTILE_MODE_T; + +#endif + +typedef union { + unsigned int val : 32; + GB_TILE_MODE_T f; +} GB_TILE_MODE; + +typedef union { + unsigned int val : 32; + GB_MACROTILE_MODE_T f; +} GB_MACROTILE_MODE; + +#endif + diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrcommon.h mesa-19.0.1/src/amd/addrlib/src/core/addrcommon.h --- mesa-18.3.3/src/amd/addrlib/src/core/addrcommon.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrcommon.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,922 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrcommon.h +* @brief Contains the helper function and constants. +**************************************************************************************************** +*/ + +#ifndef __ADDR_COMMON_H__ +#define __ADDR_COMMON_H__ + +#include "addrinterface.h" + +#include +#include +#include + +#if !defined(DEBUG) +#ifdef NDEBUG +#define DEBUG 0 +#else +#define DEBUG 1 +#endif +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Platform specific debug break defines +//////////////////////////////////////////////////////////////////////////////////////////////////// +#if DEBUG + #if defined(__GNUC__) + #define ADDR_DBG_BREAK() assert(false) + #elif defined(__APPLE__) + #define ADDR_DBG_BREAK() { IOPanic("");} + #else + #define ADDR_DBG_BREAK() { __debugbreak(); } + #endif +#else + #define ADDR_DBG_BREAK() +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Debug assertions used in AddrLib +//////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(_WIN32) && (_MSC_VER >= 1400) + #define ADDR_ANALYSIS_ASSUME(expr) __analysis_assume(expr) +#else + #define ADDR_ANALYSIS_ASSUME(expr) do { (void)(expr); } while (0) +#endif + +#define ADDR_ASSERT(__e) assert(__e) +#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK() +#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case") +#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented"); +//////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Debug print macro from legacy address library +//////////////////////////////////////////////////////////////////////////////////////////////////// +#if DEBUG + +#define ADDR_PRNT(a) Object::DebugPrint a + +/// @brief Macro for reporting informational messages +/// @ingroup util +/// +/// This macro optionally prints an informational message to stdout. +/// The first parameter is a condition -- if it is true, nothing is done. +/// The second pararmeter MUST be a parenthesis-enclosed list of arguments, +/// starting with a string. This is passed to printf() or an equivalent +/// in order to format the informational message. For example, +/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3". +/// +#define ADDR_INFO(cond, a) \ +{ if (!(cond)) { ADDR_PRNT(a); } } + +/// @brief Macro for reporting error warning messages +/// @ingroup util +/// +/// This macro optionally prints an error warning message to stdout, +/// followed by the file name and line number where the macro was called. +/// The first parameter is a condition -- if it is true, nothing is done. +/// The second pararmeter MUST be a parenthesis-enclosed list of arguments, +/// starting with a string. This is passed to printf() or an equivalent +/// in order to format the informational message. For example, +/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by +/// a second line with the file name and line number. +/// +#define ADDR_WARN(cond, a) \ +{ if (!(cond)) \ + { ADDR_PRNT(a); \ + ADDR_PRNT((" WARNING in file %s, line %d\n", __FILE__, __LINE__)); \ +} } + +/// @brief Macro for reporting fatal error conditions +/// @ingroup util +/// +/// This macro optionally stops execution of the current routine +/// after printing an error warning message to stdout, +/// followed by the file name and line number where the macro was called. +/// The first parameter is a condition -- if it is true, nothing is done. +/// The second pararmeter MUST be a parenthesis-enclosed list of arguments, +/// starting with a string. This is passed to printf() or an equivalent +/// in order to format the informational message. For example, +/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by +/// a second line with the file name and line number, then stops execution. +/// +#define ADDR_EXIT(cond, a) \ +{ if (!(cond)) \ + { ADDR_PRNT(a); ADDR_DBG_BREAK();\ +} } + +#else // DEBUG + +#define ADDRDPF 1 ? (void)0 : (void) + +#define ADDR_PRNT(a) + +#define ADDR_DBG_BREAK() + +#define ADDR_INFO(cond, a) + +#define ADDR_WARN(cond, a) + +#define ADDR_EXIT(cond, a) + +#endif // DEBUG +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#define ADDR_C_ASSERT(__e) typedef char __ADDR_C_ASSERT__[(__e) ? 1 : -1] + +namespace Addr +{ + +namespace V1 +{ +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Common constants +//////////////////////////////////////////////////////////////////////////////////////////////////// +static const UINT_32 MicroTileWidth = 8; ///< Micro tile width, for 1D and 2D tiling +static const UINT_32 MicroTileHeight = 8; ///< Micro tile height, for 1D and 2D tiling +static const UINT_32 ThickTileThickness = 4; ///< Micro tile thickness, for THICK modes +static const UINT_32 XThickTileThickness = 8; ///< Extra thick tiling thickness +static const UINT_32 PowerSaveTileBytes = 64; ///< Nuber of bytes per tile for power save 64 +static const UINT_32 CmaskCacheBits = 1024; ///< Number of bits for CMASK cache +static const UINT_32 CmaskElemBits = 4; ///< Number of bits for CMASK element +static const UINT_32 HtileCacheBits = 16384; ///< Number of bits for HTILE cache 512*32 + +static const UINT_32 MicroTilePixels = MicroTileWidth * MicroTileHeight; + +static const INT_32 TileIndexInvalid = TILEINDEX_INVALID; +static const INT_32 TileIndexLinearGeneral = TILEINDEX_LINEAR_GENERAL; +static const INT_32 TileIndexNoMacroIndex = -3; + +} // V1 + +namespace V2 +{ +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Common constants +//////////////////////////////////////////////////////////////////////////////////////////////////// +static const UINT_32 MaxSurfaceHeight = 16384; + +} // V2 + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Common macros +//////////////////////////////////////////////////////////////////////////////////////////////////// +#define BITS_PER_BYTE 8 +#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE ) +#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE ) + +/// Helper macros to select a single bit from an int (undefined later in section) +#define _BIT(v,b) (((v) >> (b) ) & 1) + +/** +**************************************************************************************************** +* @brief Enums to identify AddrLib type +**************************************************************************************************** +*/ +enum LibClass +{ + BASE_ADDRLIB = 0x0, + R600_ADDRLIB = 0x6, + R800_ADDRLIB = 0x8, + SI_ADDRLIB = 0xa, + CI_ADDRLIB = 0xb, + AI_ADDRLIB = 0xd, +}; + +/** +**************************************************************************************************** +* ChipFamily +* +* @brief +* Neutral enums that specifies chip family. +* +**************************************************************************************************** +*/ +enum ChipFamily +{ + ADDR_CHIP_FAMILY_IVLD, ///< Invalid family + ADDR_CHIP_FAMILY_R6XX, + ADDR_CHIP_FAMILY_R7XX, + ADDR_CHIP_FAMILY_R8XX, + ADDR_CHIP_FAMILY_NI, + ADDR_CHIP_FAMILY_SI, + ADDR_CHIP_FAMILY_CI, + ADDR_CHIP_FAMILY_VI, + ADDR_CHIP_FAMILY_AI, +}; + +/** +**************************************************************************************************** +* ConfigFlags +* +* @brief +* This structure is used to set configuration flags. +**************************************************************************************************** +*/ +union ConfigFlags +{ + struct + { + /// These flags are set up internally thru AddrLib::Create() based on ADDR_CREATE_FLAGS + UINT_32 optimalBankSwap : 1; ///< New bank tiling for RV770 only + UINT_32 noCubeMipSlicesPad : 1; ///< Disables faces padding for cubemap mipmaps + UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and + /// output structure + UINT_32 ignoreTileInfo : 1; ///< Don't use tile info structure + UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid + UINT_32 useCombinedSwizzle : 1; ///< Use combined swizzle + UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level + UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment + UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize + UINT_32 disableLinearOpt : 1; ///< Disallow tile modes to be optimized to linear + UINT_32 use32bppFor422Fmt : 1; ///< View 422 formats as 32 bits per pixel element + UINT_32 reserved : 21; ///< Reserved bits for future use + }; + + UINT_32 value; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Misc helper functions +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* AddrXorReduce +* +* @brief +* Xor the right-side numberOfBits bits of x. +**************************************************************************************************** +*/ +static inline UINT_32 XorReduce( + UINT_32 x, + UINT_32 numberOfBits) +{ + UINT_32 i; + UINT_32 result = x & 1; + + for (i=1; i>i) & 1); + } + + return result; +} + +/** +**************************************************************************************************** +* IsPow2 +* +* @brief +* Check if the size (UINT_32) is pow 2 +**************************************************************************************************** +*/ +static inline UINT_32 IsPow2( + UINT_32 dim) ///< [in] dimension of miplevel +{ + ADDR_ASSERT(dim > 0); + return !(dim & (dim - 1)); +} + +/** +**************************************************************************************************** +* IsPow2 +* +* @brief +* Check if the size (UINT_64) is pow 2 +**************************************************************************************************** +*/ +static inline UINT_64 IsPow2( + UINT_64 dim) ///< [in] dimension of miplevel +{ + ADDR_ASSERT(dim > 0); + return !(dim & (dim - 1)); +} + +/** +**************************************************************************************************** +* ByteAlign +* +* @brief +* Align UINT_32 "x" to "align" alignment, "align" should be power of 2 +**************************************************************************************************** +*/ +static inline UINT_32 PowTwoAlign( + UINT_32 x, + UINT_32 align) +{ + // + // Assert that x is a power of two. + // + ADDR_ASSERT(IsPow2(align)); + return (x + (align - 1)) & (~(align - 1)); +} + +/** +**************************************************************************************************** +* ByteAlign +* +* @brief +* Align UINT_64 "x" to "align" alignment, "align" should be power of 2 +**************************************************************************************************** +*/ +static inline UINT_64 PowTwoAlign( + UINT_64 x, + UINT_64 align) +{ + // + // Assert that x is a power of two. + // + ADDR_ASSERT(IsPow2(align)); + return (x + (align - 1)) & (~(align - 1)); +} + +/** +**************************************************************************************************** +* Min +* +* @brief +* Get the min value between two unsigned values +**************************************************************************************************** +*/ +static inline UINT_32 Min( + UINT_32 value1, + UINT_32 value2) +{ + return ((value1 < (value2)) ? (value1) : value2); +} + +/** +**************************************************************************************************** +* Min +* +* @brief +* Get the min value between two signed values +**************************************************************************************************** +*/ +static inline INT_32 Min( + INT_32 value1, + INT_32 value2) +{ + return ((value1 < (value2)) ? (value1) : value2); +} + +/** +**************************************************************************************************** +* Max +* +* @brief +* Get the max value between two unsigned values +**************************************************************************************************** +*/ +static inline UINT_32 Max( + UINT_32 value1, + UINT_32 value2) +{ + return ((value1 > (value2)) ? (value1) : value2); +} + +/** +**************************************************************************************************** +* Max +* +* @brief +* Get the max value between two signed values +**************************************************************************************************** +*/ +static inline INT_32 Max( + INT_32 value1, + INT_32 value2) +{ + return ((value1 > (value2)) ? (value1) : value2); +} + +/** +**************************************************************************************************** +* NextPow2 +* +* @brief +* Compute the mipmap's next level dim size +**************************************************************************************************** +*/ +static inline UINT_32 NextPow2( + UINT_32 dim) ///< [in] dimension of miplevel +{ + UINT_32 newDim = 1; + + if (dim > 0x7fffffff) + { + ADDR_ASSERT_ALWAYS(); + newDim = 0x80000000; + } + else + { + while (newDim < dim) + { + newDim <<= 1; + } + } + + return newDim; +} + +/** +**************************************************************************************************** +* Log2NonPow2 +* +* @brief +* Compute log of base 2 no matter the target is power of 2 or not +**************************************************************************************************** +*/ +static inline UINT_32 Log2NonPow2( + UINT_32 x) ///< [in] the value should calculate log based 2 +{ + UINT_32 y; + + y = 0; + while (x > 1) + { + x >>= 1; + y++; + } + + return y; +} + +/** +**************************************************************************************************** +* Log2 +* +* @brief +* Compute log of base 2 +**************************************************************************************************** +*/ +static inline UINT_32 Log2( + UINT_32 x) ///< [in] the value should calculate log based 2 +{ + // Assert that x is a power of two. + ADDR_ASSERT(IsPow2(x)); + + return Log2NonPow2(x); +} + +/** +**************************************************************************************************** +* QLog2 +* +* @brief +* Compute log of base 2 quickly (<= 16) +**************************************************************************************************** +*/ +static inline UINT_32 QLog2( + UINT_32 x) ///< [in] the value should calculate log based 2 +{ + ADDR_ASSERT(x <= 16); + + UINT_32 y = 0; + + switch (x) + { + case 1: + y = 0; + break; + case 2: + y = 1; + break; + case 4: + y = 2; + break; + case 8: + y = 3; + break; + case 16: + y = 4; + break; + default: + ADDR_ASSERT_ALWAYS(); + } + + return y; +} + +/** +**************************************************************************************************** +* SafeAssign +* +* @brief +* NULL pointer safe assignment +**************************************************************************************************** +*/ +static inline VOID SafeAssign( + UINT_32* pLVal, ///< [in] Pointer to left val + UINT_32 rVal) ///< [in] Right value +{ + if (pLVal) + { + *pLVal = rVal; + } +} + +/** +**************************************************************************************************** +* SafeAssign +* +* @brief +* NULL pointer safe assignment for 64bit values +**************************************************************************************************** +*/ +static inline VOID SafeAssign( + UINT_64* pLVal, ///< [in] Pointer to left val + UINT_64 rVal) ///< [in] Right value +{ + if (pLVal) + { + *pLVal = rVal; + } +} + +/** +**************************************************************************************************** +* SafeAssign +* +* @brief +* NULL pointer safe assignment for AddrTileMode +**************************************************************************************************** +*/ +static inline VOID SafeAssign( + AddrTileMode* pLVal, ///< [in] Pointer to left val + AddrTileMode rVal) ///< [in] Right value +{ + if (pLVal) + { + *pLVal = rVal; + } +} + +/** +**************************************************************************************************** +* RoundHalf +* +* @brief +* return (x + 1) / 2 +**************************************************************************************************** +*/ +static inline UINT_32 RoundHalf( + UINT_32 x) ///< [in] input value +{ + ADDR_ASSERT(x != 0); + +#if 1 + return (x >> 1) + (x & 1); +#else + return (x + 1) >> 1; +#endif +} + +/** +**************************************************************************************************** +* SumGeo +* +* @brief +* Calculate sum of a geometric progression whose ratio is 1/2 +**************************************************************************************************** +*/ +static inline UINT_32 SumGeo( + UINT_32 base, ///< [in] First term in the geometric progression + UINT_32 num) ///< [in] Number of terms to be added into sum +{ + ADDR_ASSERT(base > 0); + + UINT_32 sum = 0; + UINT_32 i = 0; + for (; (i < num) && (base > 1); i++) + { + sum += base; + base = RoundHalf(base); + } + sum += num - i; + + return sum; +} + +/** +**************************************************************************************************** +* GetBit +* +* @brief +* Extract bit N value (0 or 1) of a UINT32 value. +**************************************************************************************************** +*/ +static inline UINT_32 GetBit( + UINT_32 u32, ///< [in] UINT32 value + UINT_32 pos) ///< [in] bit position from LSB, valid range is [0..31] +{ + ADDR_ASSERT(pos <= 31); + + return (u32 >> pos) & 0x1; +} + +/** +**************************************************************************************************** +* GetBits +* +* @brief +* Copy 'bitsNum' bits from src start from srcStartPos into destination from dstStartPos +* srcStartPos: 0~31 for UINT_32 +* bitsNum : 1~32 for UINT_32 +* srcStartPos: 0~31 for UINT_32 +* src start position +* | +* src : b[31] b[30] b[29] ... ... ... ... ... ... ... ... b[end]..b[beg] ... b[1] b[0] +* || Bits num || copy length || Bits num || +* dst : b[31] b[30] b[29] ... b[end]..b[beg] ... ... ... ... ... ... ... ... b[1] b[0] +* | +* dst start position +**************************************************************************************************** +*/ +static inline UINT_32 GetBits( + UINT_32 src, + UINT_32 srcStartPos, + UINT_32 bitsNum, + UINT_32 dstStartPos) +{ + ADDR_ASSERT((srcStartPos < 32) && (dstStartPos < 32) && (bitsNum > 0)); + ADDR_ASSERT((bitsNum + dstStartPos <= 32) && (bitsNum + srcStartPos <= 32)); + + return ((src >> srcStartPos) << (32 - bitsNum)) >> (32 - bitsNum - dstStartPos); +} + +/** +**************************************************************************************************** +* MortonGen2d +* +* @brief +* Generate 2D Morton interleave code with num lowest bits in each channel +**************************************************************************************************** +*/ +static inline UINT_32 MortonGen2d( + UINT_32 x, ///< [in] First channel + UINT_32 y, ///< [in] Second channel + UINT_32 num) ///< [in] Number of bits extracted from each channel +{ + UINT_32 mort = 0; + + for (UINT_32 i = 0; i < num; i++) + { + mort |= (GetBit(y, i) << (2 * i)); + mort |= (GetBit(x, i) << (2 * i + 1)); + } + + return mort; +} + +/** +**************************************************************************************************** +* MortonGen3d +* +* @brief +* Generate 3D Morton interleave code with num lowest bits in each channel +**************************************************************************************************** +*/ +static inline UINT_32 MortonGen3d( + UINT_32 x, ///< [in] First channel + UINT_32 y, ///< [in] Second channel + UINT_32 z, ///< [in] Third channel + UINT_32 num) ///< [in] Number of bits extracted from each channel +{ + UINT_32 mort = 0; + + for (UINT_32 i = 0; i < num; i++) + { + mort |= (GetBit(z, i) << (3 * i)); + mort |= (GetBit(y, i) << (3 * i + 1)); + mort |= (GetBit(x, i) << (3 * i + 2)); + } + + return mort; +} + +/** +**************************************************************************************************** +* ReverseBitVector +* +* @brief +* Return reversed lowest num bits of v: v[0]v[1]...v[num-2]v[num-1] +**************************************************************************************************** +*/ +static inline UINT_32 ReverseBitVector( + UINT_32 v, ///< [in] Reverse operation base value + UINT_32 num) ///< [in] Number of bits used in reverse operation +{ + UINT_32 reverse = 0; + + for (UINT_32 i = 0; i < num; i++) + { + reverse |= (GetBit(v, num - 1 - i) << i); + } + + return reverse; +} + +/** +**************************************************************************************************** +* FoldXor2d +* +* @brief +* Xor bit vector v[num-1]v[num-2]...v[1]v[0] with v[num]v[num+1]...v[2*num-2]v[2*num-1] +**************************************************************************************************** +*/ +static inline UINT_32 FoldXor2d( + UINT_32 v, ///< [in] Xor operation base value + UINT_32 num) ///< [in] Number of bits used in fold xor operation +{ + return (v & ((1 << num) - 1)) ^ ReverseBitVector(v >> num, num); +} + +/** +**************************************************************************************************** +* DeMort +* +* @brief +* Return v[0] | v[2] | v[4] | v[6]... | v[2*num - 2] +**************************************************************************************************** +*/ +static inline UINT_32 DeMort( + UINT_32 v, ///< [in] DeMort operation base value + UINT_32 num) ///< [in] Number of bits used in fold DeMort operation +{ + UINT_32 d = 0; + + for (UINT_32 i = 0; i < num; i++) + { + d |= ((v & (1 << (i << 1))) >> i); + } + + return d; +} + +/** +**************************************************************************************************** +* FoldXor3d +* +* @brief +* v[0]...v[num-1] ^ v[3*num-1]v[3*num-3]...v[num+2]v[num] ^ v[3*num-2]...v[num+1]v[num-1] +**************************************************************************************************** +*/ +static inline UINT_32 FoldXor3d( + UINT_32 v, ///< [in] Xor operation base value + UINT_32 num) ///< [in] Number of bits used in fold xor operation +{ + UINT_32 t = v & ((1 << num) - 1); + t ^= ReverseBitVector(DeMort(v >> num, num), num); + t ^= ReverseBitVector(DeMort(v >> (num + 1), num), num); + + return t; +} + +/** +**************************************************************************************************** +* InitChannel +* +* @brief +* Set channel initialization value via a return value +**************************************************************************************************** +*/ +static inline ADDR_CHANNEL_SETTING InitChannel( + UINT_32 valid, ///< [in] valid setting + UINT_32 channel, ///< [in] channel setting + UINT_32 index) ///< [in] index setting +{ + ADDR_CHANNEL_SETTING t; + t.valid = valid; + t.channel = channel; + t.index = index; + + return t; +} + +/** +**************************************************************************************************** +* InitChannel +* +* @brief +* Set channel initialization value via channel pointer +**************************************************************************************************** +*/ +static inline VOID InitChannel( + UINT_32 valid, ///< [in] valid setting + UINT_32 channel, ///< [in] channel setting + UINT_32 index, ///< [in] index setting + ADDR_CHANNEL_SETTING *pChanSet) ///< [out] channel setting to be initialized +{ + pChanSet->valid = valid; + pChanSet->channel = channel; + pChanSet->index = index; +} + +/** +**************************************************************************************************** +* InitChannel +* +* @brief +* Set channel initialization value via another channel +**************************************************************************************************** +*/ +static inline VOID InitChannel( + ADDR_CHANNEL_SETTING *pChanDst, ///< [in] channel setting to be copied from + ADDR_CHANNEL_SETTING *pChanSrc) ///< [out] channel setting to be initialized +{ + pChanDst->valid = pChanSrc->valid; + pChanDst->channel = pChanSrc->channel; + pChanDst->index = pChanSrc->index; +} + +/** +**************************************************************************************************** +* GetMaxValidChannelIndex +* +* @brief +* Get max valid index for a specific channel +**************************************************************************************************** +*/ +static inline UINT_32 GetMaxValidChannelIndex( + const ADDR_CHANNEL_SETTING *pChanSet, ///< [in] channel setting to be initialized + UINT_32 searchCount,///< [in] number of channel setting to be searched + UINT_32 channel) ///< [in] channel to be searched +{ + UINT_32 index = 0; + + for (UINT_32 i = 0; i < searchCount; i++) + { + if (pChanSet[i].valid && (pChanSet[i].channel == channel)) + { + index = Max(index, static_cast(pChanSet[i].index)); + } + } + + return index; +} + +/** +**************************************************************************************************** +* GetCoordActiveMask +* +* @brief +* Get bit mask which indicates which positions in the equation match the target coord +**************************************************************************************************** +*/ +static inline UINT_32 GetCoordActiveMask( + const ADDR_CHANNEL_SETTING *pChanSet, ///< [in] channel setting to be initialized + UINT_32 searchCount,///< [in] number of channel setting to be searched + UINT_32 channel, ///< [in] channel to be searched + UINT_32 index) ///< [in] index to be searched +{ + UINT_32 mask = 0; + + for (UINT_32 i = 0; i < searchCount; i++) + { + if ((pChanSet[i].valid == TRUE) && + (pChanSet[i].channel == channel) && + (pChanSet[i].index == index)) + { + mask |= (1 << i); + } + } + + return mask; +} + +} // Addr + +#endif // __ADDR_COMMON_H__ + diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrelemlib.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrelemlib.cpp --- mesa-18.3.3/src/amd/addrlib/src/core/addrelemlib.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrelemlib.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,1830 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrelemlib.cpp +* @brief Contains the class implementation for element/pixel related functions. +**************************************************************************************************** +*/ + +#include "addrelemlib.h" +#include "addrlib.h" + +namespace Addr +{ + +/** +**************************************************************************************************** +* ElemLib::ElemLib +* +* @brief +* constructor +* +* @return +* N/A +**************************************************************************************************** +*/ +ElemLib::ElemLib( + Lib* pAddrLib) ///< [in] Parent addrlib instance pointer + : + Object(pAddrLib->GetClient()), + m_pAddrLib(pAddrLib) +{ + switch (m_pAddrLib->GetChipFamily()) + { + case ADDR_CHIP_FAMILY_R6XX: + m_depthPlanarType = ADDR_DEPTH_PLANAR_R600; + m_fp16ExportNorm = 0; + break; + case ADDR_CHIP_FAMILY_R7XX: + m_depthPlanarType = ADDR_DEPTH_PLANAR_R600; + m_fp16ExportNorm = 1; + break; + case ADDR_CHIP_FAMILY_R8XX: + case ADDR_CHIP_FAMILY_NI: // Same as 8xx + m_depthPlanarType = ADDR_DEPTH_PLANAR_R800; + m_fp16ExportNorm = 1; + break; + default: + m_fp16ExportNorm = 1; + m_depthPlanarType = ADDR_DEPTH_PLANAR_R800; + break; + } + + m_configFlags.value = 0; +} + +/** +**************************************************************************************************** +* ElemLib::~ElemLib +* +* @brief +* destructor +* +* @return +* N/A +**************************************************************************************************** +*/ +ElemLib::~ElemLib() +{ +} + +/** +**************************************************************************************************** +* ElemLib::Create +* +* @brief +* Creates and initializes AddrLib object. +* +* @return +* Returns point to ADDR_CREATEINFO if successful. +**************************************************************************************************** +*/ +ElemLib* ElemLib::Create( + const Lib* pAddrLib) ///< [in] Pointer of parent AddrLib instance +{ + ElemLib* pElemLib = NULL; + + if (pAddrLib) + { + VOID* pObj = Object::ClientAlloc(sizeof(ElemLib), pAddrLib->GetClient()); + if (pObj) + { + pElemLib = new(pObj) ElemLib(const_cast(pAddrLib)); + } + } + + return pElemLib; +} + +/************************************************************************************************** +* ElemLib::Flt32sToInt32s +* +* @brief +* Convert a ADDR_FLT_32 value to Int32 value +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID ElemLib::Flt32sToInt32s( + ADDR_FLT_32 value, ///< [in] ADDR_FLT_32 value + UINT_32 bits, ///< [in] nubmer of bits in value + NumberType numberType, ///< [in] the type of number + UINT_32* pResult) ///< [out] Int32 value +{ + UINT_8 round = 128; //ADDR_ROUND_BY_HALF + UINT_32 uscale; + UINT_32 sign; + + //convert each component to an INT_32 + switch ( numberType ) + { + case ADDR_NO_NUMBER: //fall through + case ADDR_ZERO: //fall through + case ADDR_ONE: //fall through + case ADDR_EPSILON: //fall through + return; // these are zero-bit components, so don't set result + + case ADDR_UINT_BITS: // unsigned integer bit field, clamped to range + uscale = (1< uscale)) + { + *pResult = uscale; + } + else + { + *pResult = value.i; + } + return; + } + + // The algorithm used in the DB and TX differs at one value for 24-bit unorms + case ADDR_UNORM_R6XXDB: // unsigned repeating fraction + if ((bits==24) && (value.i == 0x33000000)) + { + *pResult = 1; + return; + } // Else treat like ADDR_UNORM_R6XX + + case ADDR_UNORM_R6XX: // unsigned repeating fraction + if (value.f <= 0) + { + *pResult = 0; // first clamp to [0..1] + } + else + { + if (value.f >= 1) + { + *pResult = (1<(f + (round/256.0f)); + } + #endif + else + { + ADDR_FLT_32 scaled; + ADDR_FLT_32 shifted; + UINT_64 truncated, rounded; + UINT_32 altShift; + UINT_32 mask = (1 << bits) - 1; + UINT_32 half = 1 << (bits - 1); + UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000; + UINT_64 temp = mant24 - (mant24>>bits) - + static_cast((mant24 & mask) > half); + UINT_32 exp8 = value.i >> 23; + UINT_32 shift = 126 - exp8 + 24 - bits; + UINT_64 final; + + if (shift >= 32) // This is zero, even with maximum dither add + { + final = 0; + } + else + { + final = ((temp<<8) + (static_cast(round)<> (shift+8); + } + //ADDR_EXIT( *pResult == final, + // ("Float %x converted to %d-bit Unorm %x != bitwise %x", + // value.u, bits, (UINT_32)*pResult, (UINT_32)final) ); + if (final > mask) + { + final = mask; + } + + scaled.f = value.f * ((1<>23)&0xFF); + truncated = (altShift > 60) ? 0 : truncated >> altShift; + rounded = static_cast((round + truncated) >> 8); + //if (rounded > ((1<(rounded); //(INT_32)final; + } + } + } + + return; + + case ADDR_S8FLOAT32: // 32-bit IEEE float, passes through NaN values + *pResult = value.i; + return; + + // @@ FIX ROUNDING in this code, fix the denorm case + case ADDR_U4FLOATC: // Unsigned float, 4-bit exponent. bias 15, clamped [0..1] + sign = (value.i >> 31) & 1; + if ((value.i&0x7F800000) == 0x7F800000) // If NaN or INF: + { + if ((value.i&0x007FFFFF) != 0) // then if NaN + { + *pResult = 0; // return 0 + } + else + { + *pResult = (sign)?0:0xF00000; // else +INF->+1, -INF->0 + } + return; + } + if (value.f <= 0) + { + *pResult = 0; + } + else + { + if (value.f>=1) + { + *pResult = 0xF << (bits-4); + } + else + { + if ((value.i>>23) > 112 ) + { + // 24-bit float: normalized + // value.i += 1 << (22-bits+4); + // round the IEEE mantissa to mantissa size + // @@ NOTE: add code to support rounding + value.u &= 0x7FFFFFF; // mask off high 4 exponent bits + *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits + } + else + { + // 24-bit float: denormalized + value.f = value.f / (1<<28) / (1<<28); + value.f = value.f / (1<<28) / (1<<28); // convert to IEEE denorm + // value.i += 1 << (22-bits+4); + // round the IEEE mantissa to mantissa size + // @@ NOTE: add code to support rounding + *pResult = value.i >> (23-bits+4); // shift off unused mantissa bits + } + } + } + + return; + + default: // invalid number mode + //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) ); + break; + + } +} + +/** +**************************************************************************************************** +* ElemLib::Int32sToPixel +* +* @brief +* Pack 32-bit integer values into an uncompressed pixel, +* in the proper order +* +* @return +* N/A +* +* @note +* This entry point packes four 32-bit integer values into +* an uncompressed pixel. The pixel values are specifies in +* standard order, e.g. depth/stencil. This routine asserts +* if called on compressed pixel. +**************************************************************************************************** +*/ +VOID ElemLib::Int32sToPixel( + UINT_32 numComps, ///< [in] number of components + UINT_32* pComps, ///< [in] compnents + UINT_32* pCompBits, ///< [in] total bits in each component + UINT_32* pCompStart, ///< [in] the first bit position of each component + ComponentFlags properties, ///< [in] properties about byteAligned, exportNorm + UINT_32 resultBits, ///< [in] result bits: total bpp after decompression + UINT_8* pPixel) ///< [out] a depth/stencil pixel value +{ + UINT_32 i; + UINT_32 j; + UINT_32 start; + UINT_32 size; + UINT_32 byte; + UINT_32 value = 0; + UINT_32 compMask; + UINT_32 elemMask=0; + UINT_32 elementXor = 0; // address xor when reading bytes from elements + + // @@ NOTE: assert if called on a compressed format! + + if (properties.byteAligned) // Components are all byte-sized + { + for (i = 0; i < numComps; i++) // Then for each component + { + // Copy the bytes of the component into the element + start = pCompStart[i] / 8; + size = pCompBits[i] / 8; + for (j = 0; j < size; j++) + { + pPixel[(j+start)^elementXor] = static_cast(pComps[i] >> (8*j)); + } + } + } + else // Element is 32-bits or less, components are bit fields + { + // First, extract each component in turn and combine it into a 32-bit value + for (i = 0; i < numComps; i++) + { + compMask = (1 << pCompBits[i]) - 1; + elemMask |= compMask << pCompStart[i]; + value |= (pComps[i] & compMask) << pCompStart[i]; + } + + // Mext, copy the masked value into the element + size = (resultBits + 7) / 8; + for (i = 0; i < size; i++) + { + byte = pPixel[i^elementXor] & ~(elemMask >> (8*i)); + pPixel[i^elementXor] = static_cast(byte | ((elemMask & value) >> (8*i))); + } + } +} + +/** +**************************************************************************************************** +* Flt32ToDepthPixel +* +* @brief +* Convert a FLT_32 value to a depth/stencil pixel value +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID ElemLib::Flt32ToDepthPixel( + AddrDepthFormat format, ///< [in] Depth format + const ADDR_FLT_32 comps[2], ///< [in] two components of depth + UINT_8* pPixel ///< [out] depth pixel value + ) const +{ + UINT_32 i; + UINT_32 values[2]; + ComponentFlags properties; // byteAligned, exportNorm + UINT_32 resultBits = 0; // result bits: total bits per pixel after decompression + + PixelFormatInfo fmt; + + // get type for each component + PixGetDepthCompInfo(format, &fmt); + + //initialize properties + properties.byteAligned = TRUE; + properties.exportNorm = TRUE; + properties.floatComp = FALSE; + + //set properties and result bits + for (i = 0; i < 2; i++) + { + if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7)) + { + properties.byteAligned = FALSE; + } + + if (resultBits < fmt.compStart[i] + fmt.compBit[i]) + { + resultBits = fmt.compStart[i] + fmt.compBit[i]; + } + + // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format + if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED) + { + properties.exportNorm = FALSE; + } + + // Mark if there are any floating point components + if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) ) + { + properties.floatComp = TRUE; + } + } + + // Convert the two input floats to integer values + for (i = 0; i < 2; i++) + { + Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]); + } + + // Then pack the two integer components, in the proper order + Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel ); + +} + +/** +**************************************************************************************************** +* Flt32ToColorPixel +* +* @brief +* Convert a FLT_32 value to a red/green/blue/alpha pixel value +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID ElemLib::Flt32ToColorPixel( + AddrColorFormat format, ///< [in] Color format + AddrSurfaceNumber surfNum, ///< [in] Surface number + AddrSurfaceSwap surfSwap, ///< [in] Surface swap + const ADDR_FLT_32 comps[4], ///< [in] four components of color + UINT_8* pPixel ///< [out] a red/green/blue/alpha pixel value + ) const +{ + PixelFormatInfo pixelInfo; + + UINT_32 i; + UINT_32 values[4]; + ComponentFlags properties; // byteAligned, exportNorm + UINT_32 resultBits = 0; // result bits: total bits per pixel after decompression + + memset(&pixelInfo, 0, sizeof(PixelFormatInfo)); + + PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo); + + //initialize properties + properties.byteAligned = TRUE; + properties.exportNorm = TRUE; + properties.floatComp = FALSE; + + //set properties and result bits + for (i = 0; i < 4; i++) + { + if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) ) + { + properties.byteAligned = FALSE; + } + + if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i]) + { + resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i]; + } + + if (m_fp16ExportNorm) + { + // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format + // or if it's not FP and <=16 bits + if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED)) + && (pixelInfo.numType[i] !=ADDR_U4FLOATC)) + { + properties.exportNorm = FALSE; + } + } + else + { + // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format + if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED) + { + properties.exportNorm = FALSE; + } + } + + // Mark if there are any floating point components + if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) || + (pixelInfo.numType[i] >= ADDR_S8FLOAT) ) + { + properties.floatComp = TRUE; + } + } + + // Convert the four input floats to integer values + for (i = 0; i < 4; i++) + { + Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]); + } + + // Then pack the four integer components, in the proper order + Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0], + properties, resultBits, pPixel); +} + +/** +**************************************************************************************************** +* ElemLib::GetCompType +* +* @brief +* Fill per component info +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID ElemLib::GetCompType( + AddrColorFormat format, ///< [in] surface format + AddrSurfaceNumber numType, ///< [in] number type + PixelFormatInfo* pInfo) ///< [in][out] per component info out +{ + BOOL_32 handled = FALSE; + + // Floating point formats override the number format + switch (format) + { + case ADDR_COLOR_16_FLOAT: // fall through for all pure floating point format + case ADDR_COLOR_16_16_FLOAT: + case ADDR_COLOR_16_16_16_16_FLOAT: + case ADDR_COLOR_32_FLOAT: + case ADDR_COLOR_32_32_FLOAT: + case ADDR_COLOR_32_32_32_32_FLOAT: + case ADDR_COLOR_10_11_11_FLOAT: + case ADDR_COLOR_11_11_10_FLOAT: + numType = ADDR_NUMBER_FLOAT; + break; + // Special handling for the depth formats + case ADDR_COLOR_8_24: // fall through for these 2 similar format + case ADDR_COLOR_24_8: + for (UINT_32 c = 0; c < 4; c++) + { + if (pInfo->compBit[c] == 8) + { + pInfo->numType[c] = ADDR_UINT_BITS; + } + else if (pInfo->compBit[c] == 24) + { + pInfo->numType[c] = ADDR_UNORM_R6XX; + } + else + { + pInfo->numType[c] = ADDR_NO_NUMBER; + } + } + handled = TRUE; + break; + case ADDR_COLOR_8_24_FLOAT: // fall through for these 3 similar format + case ADDR_COLOR_24_8_FLOAT: + case ADDR_COLOR_X24_8_32_FLOAT: + for (UINT_32 c = 0; c < 4; c++) + { + if (pInfo->compBit[c] == 8) + { + pInfo->numType[c] = ADDR_UINT_BITS; + } + else if (pInfo->compBit[c] == 24) + { + pInfo->numType[c] = ADDR_U4FLOATC; + } + else if (pInfo->compBit[c] == 32) + { + pInfo->numType[c] = ADDR_S8FLOAT32; + } + else + { + pInfo->numType[c] = ADDR_NO_NUMBER; + } + } + handled = TRUE; + break; + default: + break; + } + + if (!handled) + { + for (UINT_32 c = 0; c < 4; c++) + { + // Assign a number type for each component + AddrSurfaceNumber cnum; + + // First handle default component values + if (pInfo->compBit[c] == 0) + { + if (c < 3) + { + pInfo->numType[c] = ADDR_ZERO; // Default is zero for RGB + } + else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT) + { + pInfo->numType[c] = ADDR_EPSILON; // Alpha INT_32 bits default is 0x01 + } + else + { + pInfo->numType[c] = ADDR_ONE; // Alpha normal default is float 1.0 + } + continue; + } + // Now handle small components + else if (pInfo->compBit[c] == 1) + { + if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT) + { + cnum = ADDR_NUMBER_UINT; + } + else + { + cnum = ADDR_NUMBER_UNORM; + } + } + else + { + cnum = numType; + } + + // If no default, set the number type fom num, compbits, and architecture + switch (cnum) + { + case ADDR_NUMBER_SRGB: + pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX; + break; + case ADDR_NUMBER_UNORM: + pInfo->numType[c] = ADDR_UNORM_R6XX; + break; + case ADDR_NUMBER_SNORM: + pInfo->numType[c] = ADDR_SNORM_R6XX; + break; + case ADDR_NUMBER_USCALED: + pInfo->numType[c] = ADDR_USCALED; // @@ Do we need separate Pele routine? + break; + case ADDR_NUMBER_SSCALED: + pInfo->numType[c] = ADDR_SSCALED; // @@ Do we need separate Pele routine? + break; + case ADDR_NUMBER_FLOAT: + if (pInfo->compBit[c] == 32) + { + pInfo->numType[c] = ADDR_S8FLOAT32; + } + else if (pInfo->compBit[c] == 16) + { + pInfo->numType[c] = ADDR_S5FLOAT; + } + else if (pInfo->compBit[c] >= 10) + { + pInfo->numType[c] = ADDR_U5FLOAT; + } + else + { + ADDR_ASSERT_ALWAYS(); + } + break; + case ADDR_NUMBER_SINT: + pInfo->numType[c] = ADDR_SINT_BITS; + break; + case ADDR_NUMBER_UINT: + pInfo->numType[c] = ADDR_UINT_BITS; + break; + + default: + ADDR_ASSERT(!"Invalid number type"); + pInfo->numType[c] = ADDR_NO_NUMBER; + break; + } + } + } +} + +/** +**************************************************************************************************** +* ElemLib::GetCompSwap +* +* @brief +* Get components swapped for color surface +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID ElemLib::GetCompSwap( + AddrSurfaceSwap swap, ///< [in] swap mode + PixelFormatInfo* pInfo) ///< [in,out] output per component info +{ + switch (pInfo->comps) + { + case 4: + switch (swap) + { + case ADDR_SWAP_ALT: + SwapComps( 0, 2, pInfo ); + break; // BGRA + case ADDR_SWAP_STD_REV: + SwapComps( 0, 3, pInfo ); + SwapComps( 1, 2, pInfo ); + break; // ABGR + case ADDR_SWAP_ALT_REV: + SwapComps( 0, 3, pInfo ); + SwapComps( 0, 2, pInfo ); + SwapComps( 0, 1, pInfo ); + break; // ARGB + default: + break; + } + break; + case 3: + switch (swap) + { + case ADDR_SWAP_ALT_REV: + SwapComps( 0, 3, pInfo ); + SwapComps( 0, 2, pInfo ); + break; // AGR + case ADDR_SWAP_STD_REV: + SwapComps( 0, 2, pInfo ); + break; // BGR + case ADDR_SWAP_ALT: + SwapComps( 2, 3, pInfo ); + break; // RGA + default: + break; // RGB + } + break; + case 2: + switch (swap) + { + case ADDR_SWAP_ALT_REV: + SwapComps( 0, 1, pInfo ); + SwapComps( 1, 3, pInfo ); + break; // AR + case ADDR_SWAP_STD_REV: + SwapComps( 0, 1, pInfo ); + break; // GR + case ADDR_SWAP_ALT: + SwapComps( 1, 3, pInfo ); + break; // RA + default: + break; // RG + } + break; + case 1: + switch (swap) + { + case ADDR_SWAP_ALT_REV: + SwapComps( 0, 3, pInfo ); + break; // A + case ADDR_SWAP_STD_REV: + SwapComps( 0, 2, pInfo ); + break; // B + case ADDR_SWAP_ALT: + SwapComps( 0, 1, pInfo ); + break; // G + default: + break; // R + } + break; + } +} + +/** +**************************************************************************************************** +* ElemLib::GetCompSwap +* +* @brief +* Get components swapped for color surface +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID ElemLib::SwapComps( + UINT_32 c0, ///< [in] component index 0 + UINT_32 c1, ///< [in] component index 1 + PixelFormatInfo* pInfo) ///< [in,out] output per component info +{ + UINT_32 start; + UINT_32 bits; + + start = pInfo->compStart[c0]; + pInfo->compStart[c0] = pInfo->compStart[c1]; + pInfo->compStart[c1] = start; + + bits = pInfo->compBit[c0]; + pInfo->compBit[c0] = pInfo->compBit[c1]; + pInfo->compBit[c1] = bits; +} + +/** +**************************************************************************************************** +* ElemLib::PixGetColorCompInfo +* +* @brief +* Get per component info for color surface +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID ElemLib::PixGetColorCompInfo( + AddrColorFormat format, ///< [in] surface format, read from register + AddrSurfaceNumber number, ///< [in] pixel number type + AddrSurfaceSwap swap, ///< [in] component swap mode + PixelFormatInfo* pInfo ///< [out] output per component info + ) const +{ + // 1. Get componet bits + switch (format) + { + case ADDR_COLOR_8: + GetCompBits(8, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_1_5_5_5: + GetCompBits(5, 5, 5, 1, pInfo); + break; + case ADDR_COLOR_5_6_5: + GetCompBits(8, 6, 5, 0, pInfo); + break; + case ADDR_COLOR_6_5_5: + GetCompBits(5, 5, 6, 0, pInfo); + break; + case ADDR_COLOR_8_8: + GetCompBits(8, 8, 0, 0, pInfo); + break; + case ADDR_COLOR_4_4_4_4: + GetCompBits(4, 4, 4, 4, pInfo); + break; + case ADDR_COLOR_16: + GetCompBits(16, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_8_8_8_8: + GetCompBits(8, 8, 8, 8, pInfo); + break; + case ADDR_COLOR_2_10_10_10: + GetCompBits(10, 10, 10, 2, pInfo); + break; + case ADDR_COLOR_10_11_11: + GetCompBits(11, 11, 10, 0, pInfo); + break; + case ADDR_COLOR_11_11_10: + GetCompBits(10, 11, 11, 0, pInfo); + break; + case ADDR_COLOR_16_16: + GetCompBits(16, 16, 0, 0, pInfo); + break; + case ADDR_COLOR_16_16_16_16: + GetCompBits(16, 16, 16, 16, pInfo); + break; + case ADDR_COLOR_16_FLOAT: + GetCompBits(16, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_16_16_FLOAT: + GetCompBits(16, 16, 0, 0, pInfo); + break; + case ADDR_COLOR_32_FLOAT: + GetCompBits(32, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_32_32_FLOAT: + GetCompBits(32, 32, 0, 0, pInfo); + break; + case ADDR_COLOR_16_16_16_16_FLOAT: + GetCompBits(16, 16, 16, 16, pInfo); + break; + case ADDR_COLOR_32_32_32_32_FLOAT: + GetCompBits(32, 32, 32, 32, pInfo); + break; + + case ADDR_COLOR_32: + GetCompBits(32, 0, 0, 0, pInfo); + break; + case ADDR_COLOR_32_32: + GetCompBits(32, 32, 0, 0, pInfo); + break; + case ADDR_COLOR_32_32_32_32: + GetCompBits(32, 32, 32, 32, pInfo); + break; + case ADDR_COLOR_10_10_10_2: + GetCompBits(2, 10, 10, 10, pInfo); + break; + case ADDR_COLOR_10_11_11_FLOAT: + GetCompBits(11, 11, 10, 0, pInfo); + break; + case ADDR_COLOR_11_11_10_FLOAT: + GetCompBits(10, 11, 11, 0, pInfo); + break; + case ADDR_COLOR_5_5_5_1: + GetCompBits(1, 5, 5, 5, pInfo); + break; + case ADDR_COLOR_3_3_2: + GetCompBits(2, 3, 3, 0, pInfo); + break; + case ADDR_COLOR_4_4: + GetCompBits(4, 4, 0, 0, pInfo); + break; + case ADDR_COLOR_8_24: + case ADDR_COLOR_8_24_FLOAT: // same bit count, fall through + GetCompBits(24, 8, 0, 0, pInfo); + break; + case ADDR_COLOR_24_8: + case ADDR_COLOR_24_8_FLOAT: // same bit count, fall through + GetCompBits(8, 24, 0, 0, pInfo); + break; + case ADDR_COLOR_X24_8_32_FLOAT: + GetCompBits(32, 8, 0, 0, pInfo); + break; + + case ADDR_COLOR_INVALID: + GetCompBits(0, 0, 0, 0, pInfo); + break; + default: + ADDR_ASSERT(0); + GetCompBits(0, 0, 0, 0, pInfo); + break; + } + + // 2. Get component number type + + GetCompType(format, number, pInfo); + + // 3. Swap components if needed + + GetCompSwap(swap, pInfo); +} + +/** +**************************************************************************************************** +* ElemLib::PixGetDepthCompInfo +* +* @brief +* Get per component info for depth surface +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID ElemLib::PixGetDepthCompInfo( + AddrDepthFormat format, ///< [in] surface format, read from register + PixelFormatInfo* pInfo ///< [out] output per component bits and type + ) const +{ + if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800) + { + if (format == ADDR_DEPTH_8_24_FLOAT) + { + format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8 + } + + if (format == ADDR_DEPTH_X8_24_FLOAT) + { + format = ADDR_DEPTH_32_FLOAT; + } + } + + switch (format) + { + case ADDR_DEPTH_16: + GetCompBits(16, 0, 0, 0, pInfo); + break; + case ADDR_DEPTH_8_24: + case ADDR_DEPTH_8_24_FLOAT: // similar format, fall through + GetCompBits(24, 8, 0, 0, pInfo); + break; + case ADDR_DEPTH_X8_24: + case ADDR_DEPTH_X8_24_FLOAT: // similar format, fall through + GetCompBits(24, 0, 0, 0, pInfo); + break; + case ADDR_DEPTH_32_FLOAT: + GetCompBits(32, 0, 0, 0, pInfo); + break; + case ADDR_DEPTH_X24_8_32_FLOAT: + GetCompBits(32, 8, 0, 0, pInfo); + break; + case ADDR_DEPTH_INVALID: + GetCompBits(0, 0, 0, 0, pInfo); + break; + default: + ADDR_ASSERT(0); + GetCompBits(0, 0, 0, 0, pInfo); + break; + } + + switch (format) + { + case ADDR_DEPTH_16: + pInfo->numType [0] = ADDR_UNORM_R6XX; + pInfo->numType [1] = ADDR_ZERO; + break; + case ADDR_DEPTH_8_24: + pInfo->numType [0] = ADDR_UNORM_R6XXDB; + pInfo->numType [1] = ADDR_UINT_BITS; + break; + case ADDR_DEPTH_8_24_FLOAT: + pInfo->numType [0] = ADDR_U4FLOATC; + pInfo->numType [1] = ADDR_UINT_BITS; + break; + case ADDR_DEPTH_X8_24: + pInfo->numType [0] = ADDR_UNORM_R6XXDB; + pInfo->numType [1] = ADDR_ZERO; + break; + case ADDR_DEPTH_X8_24_FLOAT: + pInfo->numType [0] = ADDR_U4FLOATC; + pInfo->numType [1] = ADDR_ZERO; + break; + case ADDR_DEPTH_32_FLOAT: + pInfo->numType [0] = ADDR_S8FLOAT32; + pInfo->numType [1] = ADDR_ZERO; + break; + case ADDR_DEPTH_X24_8_32_FLOAT: + pInfo->numType [0] = ADDR_S8FLOAT32; + pInfo->numType [1] = ADDR_UINT_BITS; + break; + default: + pInfo->numType [0] = ADDR_NO_NUMBER; + pInfo->numType [1] = ADDR_NO_NUMBER; + break; + } + + pInfo->numType [2] = ADDR_NO_NUMBER; + pInfo->numType [3] = ADDR_NO_NUMBER; +} + +/** +**************************************************************************************************** +* ElemLib::PixGetExportNorm +* +* @brief +* Check if fp16 export norm can be enabled. +* +* @return +* TRUE if this can be enabled. +* +**************************************************************************************************** +*/ +BOOL_32 ElemLib::PixGetExportNorm( + AddrColorFormat colorFmt, ///< [in] surface format, read from register + AddrSurfaceNumber numberFmt, ///< [in] pixel number type + AddrSurfaceSwap swap ///< [in] components swap type + ) const +{ + BOOL_32 enabled = TRUE; + + PixelFormatInfo formatInfo; + + PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo); + + for (UINT_32 c = 0; c < 4; c++) + { + if (m_fp16ExportNorm) + { + if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) && + (formatInfo.numType[c] != ADDR_U4FLOATC) && + (formatInfo.numType[c] != ADDR_S5FLOAT) && + (formatInfo.numType[c] != ADDR_S5FLOATM) && + (formatInfo.numType[c] != ADDR_U5FLOAT) && + (formatInfo.numType[c] != ADDR_U3FLOATM)) + { + enabled = FALSE; + break; + } + } + else + { + if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) + { + enabled = FALSE; + break; + } + } + } + + return enabled; +} + +/** +**************************************************************************************************** +* ElemLib::AdjustSurfaceInfo +* +* @brief +* Adjust bpp/base pitch/width/height according to elemMode and expandX/Y +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID ElemLib::AdjustSurfaceInfo( + ElemMode elemMode, ///< [in] element mode + UINT_32 expandX, ///< [in] decompression expansion factor in X + UINT_32 expandY, ///< [in] decompression expansion factor in Y + UINT_32* pBpp, ///< [in,out] bpp + UINT_32* pBasePitch, ///< [in,out] base pitch + UINT_32* pWidth, ///< [in,out] width + UINT_32* pHeight) ///< [in,out] height +{ + UINT_32 packedBits; + UINT_32 basePitch; + UINT_32 width; + UINT_32 height; + UINT_32 bpp; + BOOL_32 bBCnFormat = FALSE; + + ADDR_ASSERT(pBpp != NULL); + ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL); + + if (pBpp) + { + bpp = *pBpp; + + switch (elemMode) + { + case ADDR_EXPANDED: + packedBits = bpp / expandX / expandY; + break; + case ADDR_PACKED_STD: // Different bit order + case ADDR_PACKED_REV: + packedBits = bpp * expandX * expandY; + break; + case ADDR_PACKED_GBGR: + case ADDR_PACKED_BGRG: + packedBits = bpp; // 32-bit packed ==> 2 32-bit result + break; + case ADDR_PACKED_BC1: // Fall through + case ADDR_PACKED_BC4: + packedBits = 64; + bBCnFormat = TRUE; + break; + case ADDR_PACKED_BC2: // Fall through + case ADDR_PACKED_BC3: // Fall through + case ADDR_PACKED_BC5: // Fall through + bBCnFormat = TRUE; + // fall through + case ADDR_PACKED_ASTC: + case ADDR_PACKED_ETC2_128BPP: + packedBits = 128; + break; + case ADDR_PACKED_ETC2_64BPP: + packedBits = 64; + break; + case ADDR_ROUND_BY_HALF: // Fall through + case ADDR_ROUND_TRUNCATE: // Fall through + case ADDR_ROUND_DITHER: // Fall through + case ADDR_UNCOMPRESSED: + packedBits = bpp; + break; + default: + packedBits = bpp; + ADDR_ASSERT_ALWAYS(); + break; + } + + *pBpp = packedBits; + } + + if (pWidth && pHeight && pBasePitch) + { + basePitch = *pBasePitch; + width = *pWidth; + height = *pHeight; + + if ((expandX > 1) || (expandY > 1)) + { + if (elemMode == ADDR_EXPANDED) + { + basePitch *= expandX; + width *= expandX; + height *= expandY; + } + else + { + // Evergreen family workaround + if (bBCnFormat && (m_pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_R8XX)) + { + // For BCn we now pad it to POW2 at the beginning so it is safe to + // divide by 4 directly + basePitch = basePitch / expandX; + width = width / expandX; + height = height / expandY; +#if DEBUG + width = (width == 0) ? 1 : width; + height = (height == 0) ? 1 : height; + + if ((*pWidth > PowTwoAlign(width, 8) * expandX) || + (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment + { + // if this assertion is hit we may have issues if app samples + // rightmost/bottommost pixels + ADDR_ASSERT_ALWAYS(); + } +#endif + } + else // Not BCn format we still keep old way (FMT_1? No real test yet) + { + basePitch = (basePitch + expandX - 1) / expandX; + width = (width + expandX - 1) / expandX; + height = (height + expandY - 1) / expandY; + } + } + + *pBasePitch = basePitch; // 0 is legal value for base pitch. + *pWidth = (width == 0) ? 1 : width; + *pHeight = (height == 0) ? 1 : height; + } //if (pWidth && pHeight && pBasePitch) + } +} + +/** +**************************************************************************************************** +* ElemLib::RestoreSurfaceInfo +* +* @brief +* Reverse operation of AdjustSurfaceInfo +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID ElemLib::RestoreSurfaceInfo( + ElemMode elemMode, ///< [in] element mode + UINT_32 expandX, ///< [in] decompression expansion factor in X + UINT_32 expandY, ///< [out] decompression expansion factor in Y + UINT_32* pBpp, ///< [in,out] bpp + UINT_32* pWidth, ///< [in,out] width + UINT_32* pHeight) ///< [in,out] height +{ + UINT_32 originalBits; + UINT_32 width; + UINT_32 height; + UINT_32 bpp; + + BOOL_32 bBCnFormat = FALSE; + (void)bBCnFormat; + + ADDR_ASSERT(pBpp != NULL); + ADDR_ASSERT(pWidth != NULL && pHeight != NULL); + + if (pBpp) + { + bpp = *pBpp; + + switch (elemMode) + { + case ADDR_EXPANDED: + originalBits = bpp * expandX * expandY; + break; + case ADDR_PACKED_STD: // Different bit order + case ADDR_PACKED_REV: + originalBits = bpp / expandX / expandY; + break; + case ADDR_PACKED_GBGR: + case ADDR_PACKED_BGRG: + originalBits = bpp; // 32-bit packed ==> 2 32-bit result + break; + case ADDR_PACKED_BC1: // Fall through + case ADDR_PACKED_BC4: + originalBits = 64; + bBCnFormat = TRUE; + break; + case ADDR_PACKED_BC2: // Fall through + case ADDR_PACKED_BC3: // Fall through + case ADDR_PACKED_BC5: + bBCnFormat = TRUE; + // fall through + case ADDR_PACKED_ASTC: + case ADDR_PACKED_ETC2_128BPP: + originalBits = 128; + break; + case ADDR_PACKED_ETC2_64BPP: + originalBits = 64; + break; + case ADDR_ROUND_BY_HALF: // Fall through + case ADDR_ROUND_TRUNCATE: // Fall through + case ADDR_ROUND_DITHER: // Fall through + case ADDR_UNCOMPRESSED: + originalBits = bpp; + break; + default: + originalBits = bpp; + ADDR_ASSERT_ALWAYS(); + break; + } + + *pBpp = originalBits; + } + + if (pWidth && pHeight) + { + width = *pWidth; + height = *pHeight; + + if ((expandX > 1) || (expandY > 1)) + { + if (elemMode == ADDR_EXPANDED) + { + width /= expandX; + height /= expandY; + } + else + { + width *= expandX; + height *= expandY; + } + } + + *pWidth = (width == 0) ? 1 : width; + *pHeight = (height == 0) ? 1 : height; + } +} + +/** +**************************************************************************************************** +* ElemLib::GetBitsPerPixel +* +* @brief +* Compute the total bits per element according to a format +* code. For compressed formats, this is not the same as +* the number of bits per decompressed element. +* +* @return +* Bits per pixel +**************************************************************************************************** +*/ +UINT_32 ElemLib::GetBitsPerPixel( + AddrFormat format, ///< [in] surface format code + ElemMode* pElemMode, ///< [out] element mode + UINT_32* pExpandX, ///< [out] decompression expansion factor in X + UINT_32* pExpandY, ///< [out] decompression expansion factor in Y + UINT_32* pUnusedBits) ///< [out] bits unused +{ + UINT_32 bpp; + UINT_32 expandX = 1; + UINT_32 expandY = 1; + UINT_32 bitUnused = 0; + ElemMode elemMode = ADDR_UNCOMPRESSED; // default value + + switch (format) + { + case ADDR_FMT_8: + bpp = 8; + break; + case ADDR_FMT_1_5_5_5: + case ADDR_FMT_5_6_5: + case ADDR_FMT_6_5_5: + case ADDR_FMT_8_8: + case ADDR_FMT_4_4_4_4: + case ADDR_FMT_16: + bpp = 16; + break; + case ADDR_FMT_GB_GR: + elemMode = ADDR_PACKED_GBGR; + bpp = m_configFlags.use32bppFor422Fmt ? 32 : 16; + expandX = m_configFlags.use32bppFor422Fmt ? 2 : 1; + break; + case ADDR_FMT_BG_RG: + elemMode = ADDR_PACKED_BGRG; + bpp = m_configFlags.use32bppFor422Fmt ? 32 : 16; + expandX = m_configFlags.use32bppFor422Fmt ? 2 : 1; + break; + case ADDR_FMT_8_8_8_8: + case ADDR_FMT_2_10_10_10: + case ADDR_FMT_10_11_11: + case ADDR_FMT_11_11_10: + case ADDR_FMT_16_16: + case ADDR_FMT_32: + case ADDR_FMT_24_8: + bpp = 32; + break; + case ADDR_FMT_16_16_16_16: + case ADDR_FMT_32_32: + case ADDR_FMT_CTX1: + bpp = 64; + break; + case ADDR_FMT_32_32_32_32: + bpp = 128; + break; + case ADDR_FMT_INVALID: + bpp = 0; + break; + case ADDR_FMT_1_REVERSED: + elemMode = ADDR_PACKED_REV; + expandX = 8; + bpp = 1; + break; + case ADDR_FMT_1: + elemMode = ADDR_PACKED_STD; + expandX = 8; + bpp = 1; + break; + case ADDR_FMT_4_4: + case ADDR_FMT_3_3_2: + bpp = 8; + break; + case ADDR_FMT_5_5_5_1: + bpp = 16; + break; + case ADDR_FMT_32_AS_8: + case ADDR_FMT_32_AS_8_8: + case ADDR_FMT_8_24: + case ADDR_FMT_10_10_10_2: + case ADDR_FMT_5_9_9_9_SHAREDEXP: + bpp = 32; + break; + case ADDR_FMT_X24_8_32_FLOAT: + bpp = 64; + bitUnused = 24; + break; + case ADDR_FMT_8_8_8: + elemMode = ADDR_EXPANDED; + bpp = 24;//@@ 8; // read 3 elements per pixel + expandX = 3; + break; + case ADDR_FMT_16_16_16: + elemMode = ADDR_EXPANDED; + bpp = 48;//@@ 16; // read 3 elements per pixel + expandX = 3; + break; + case ADDR_FMT_32_32_32: + elemMode = ADDR_EXPANDED; + expandX = 3; + bpp = 96;//@@ 32; // read 3 elements per pixel + break; + case ADDR_FMT_BC1: + elemMode = ADDR_PACKED_BC1; + expandX = 4; + expandY = 4; + bpp = 64; + break; + case ADDR_FMT_BC4: + elemMode = ADDR_PACKED_BC4; + expandX = 4; + expandY = 4; + bpp = 64; + break; + case ADDR_FMT_BC2: + elemMode = ADDR_PACKED_BC2; + expandX = 4; + expandY = 4; + bpp = 128; + break; + case ADDR_FMT_BC3: + elemMode = ADDR_PACKED_BC3; + expandX = 4; + expandY = 4; + bpp = 128; + break; + case ADDR_FMT_BC5: + case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5 + case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5 + elemMode = ADDR_PACKED_BC5; + expandX = 4; + expandY = 4; + bpp = 128; + break; + + case ADDR_FMT_ETC2_64BPP: + elemMode = ADDR_PACKED_ETC2_64BPP; + expandX = 4; + expandY = 4; + bpp = 64; + break; + + case ADDR_FMT_ETC2_128BPP: + elemMode = ADDR_PACKED_ETC2_128BPP; + expandX = 4; + expandY = 4; + bpp = 128; + break; + + case ADDR_FMT_ASTC_4x4: + elemMode = ADDR_PACKED_ASTC; + expandX = 4; + expandY = 4; + bpp = 128; + break; + + case ADDR_FMT_ASTC_5x4: + elemMode = ADDR_PACKED_ASTC; + expandX = 5; + expandY = 4; + bpp = 128; + break; + + case ADDR_FMT_ASTC_5x5: + elemMode = ADDR_PACKED_ASTC; + expandX = 5; + expandY = 5; + bpp = 128; + break; + + case ADDR_FMT_ASTC_6x5: + elemMode = ADDR_PACKED_ASTC; + expandX = 6; + expandY = 5; + bpp = 128; + break; + + case ADDR_FMT_ASTC_6x6: + elemMode = ADDR_PACKED_ASTC; + expandX = 6; + expandY = 6; + bpp = 128; + break; + + case ADDR_FMT_ASTC_8x5: + elemMode = ADDR_PACKED_ASTC; + expandX = 8; + expandY = 5; + bpp = 128; + break; + + case ADDR_FMT_ASTC_8x6: + elemMode = ADDR_PACKED_ASTC; + expandX = 8; + expandY = 6; + bpp = 128; + break; + + case ADDR_FMT_ASTC_8x8: + elemMode = ADDR_PACKED_ASTC; + expandX = 8; + expandY = 8; + bpp = 128; + break; + + case ADDR_FMT_ASTC_10x5: + elemMode = ADDR_PACKED_ASTC; + expandX = 10; + expandY = 5; + bpp = 128; + break; + + case ADDR_FMT_ASTC_10x6: + elemMode = ADDR_PACKED_ASTC; + expandX = 10; + expandY = 6; + bpp = 128; + break; + + case ADDR_FMT_ASTC_10x8: + elemMode = ADDR_PACKED_ASTC; + expandX = 10; + expandY = 8; + bpp = 128; + break; + + case ADDR_FMT_ASTC_10x10: + elemMode = ADDR_PACKED_ASTC; + expandX = 10; + expandY = 10; + bpp = 128; + break; + + case ADDR_FMT_ASTC_12x10: + elemMode = ADDR_PACKED_ASTC; + expandX = 12; + expandY = 10; + bpp = 128; + break; + + case ADDR_FMT_ASTC_12x12: + elemMode = ADDR_PACKED_ASTC; + expandX = 12; + expandY = 12; + bpp = 128; + break; + + default: + bpp = 0; + ADDR_ASSERT_ALWAYS(); + break; + // @@ or should this be an error? + } + + SafeAssign(pExpandX, expandX); + SafeAssign(pExpandY, expandY); + SafeAssign(pUnusedBits, bitUnused); + SafeAssign(reinterpret_cast(pElemMode), elemMode); + + return bpp; +} + +/** +**************************************************************************************************** +* ElemLib::GetCompBits +* +* @brief +* Set each component's bit size and bit start. And set element mode and number type +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID ElemLib::GetCompBits( + UINT_32 c0, ///< [in] bits of component 0 + UINT_32 c1, ///< [in] bits of component 1 + UINT_32 c2, ///< [in] bits of component 2 + UINT_32 c3, ///< [in] bits of component 3 + PixelFormatInfo* pInfo, ///< [out] per component info out + ElemMode elemMode) ///< [in] element mode +{ + pInfo->comps = 0; + + pInfo->compBit[0] = c0; + pInfo->compBit[1] = c1; + pInfo->compBit[2] = c2; + pInfo->compBit[3] = c3; + + pInfo->compStart[0] = 0; + pInfo->compStart[1] = c0; + pInfo->compStart[2] = c0+c1; + pInfo->compStart[3] = c0+c1+c2; + + pInfo->elemMode = elemMode; + // still needed since component swap may depend on number of components + for (INT i=0; i<4; i++) + { + if (pInfo->compBit[i] == 0) + { + pInfo->compStart[i] = 0; // all null components start at bit 0 + pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type + } + else + { + pInfo->comps++; + } + } +} + +/** +**************************************************************************************************** +* ElemLib::GetCompBits +* +* @brief +* Set the clear color (or clear depth/stencil) for a surface +* +* @note +* If clearColor is zero, a default clear value is used in place of comps[4]. +* If float32 is set, full precision is used, else the mantissa is reduced to 12-bits +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID ElemLib::SetClearComps( + ADDR_FLT_32 comps[4], ///< [in,out] components + BOOL_32 clearColor, ///< [in] TRUE if clear color is set (CLEAR_COLOR) + BOOL_32 float32) ///< [in] TRUE if float32 component (BLEND_FLOAT32) +{ + INT_32 i; + + // Use default clearvalues if clearColor is disabled + if (clearColor == FALSE) + { + for (i=0; i<3; i++) + { + comps[i].f = 0.0; + } + comps[3].f = 1.0; + } + + // Otherwise use the (modified) clear value + else + { + for (i=0; i<4; i++) + { // If full precision, use clear value unchanged + if (float32) + { + // Do nothing + //comps[i] = comps[i]; + } + // Else if it is a NaN, use the standard NaN value + else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000) + { + comps[i].u = 0xFFC00000; + } + // Else reduce the mantissa precision + else + { + comps[i].u = comps[i].u & 0xFFFFF000; + } + } + } +} + +/** +**************************************************************************************************** +* ElemLib::IsBlockCompressed +* +* @brief +* TRUE if this is block compressed format +* +* @note +* +* @return +* BOOL_32 +**************************************************************************************************** +*/ +BOOL_32 ElemLib::IsBlockCompressed( + AddrFormat format) ///< [in] Format +{ + return (((format >= ADDR_FMT_BC1) && (format <= ADDR_FMT_BC7)) || + ((format >= ADDR_FMT_ASTC_4x4) && (format <= ADDR_FMT_ETC2_128BPP))); +} + +/** +**************************************************************************************************** +* ElemLib::IsCompressed +* +* @brief +* TRUE if this is block compressed format or 1 bit format +* +* @note +* +* @return +* BOOL_32 +**************************************************************************************************** +*/ +BOOL_32 ElemLib::IsCompressed( + AddrFormat format) ///< [in] Format +{ + return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7; +} + +/** +**************************************************************************************************** +* ElemLib::IsExpand3x +* +* @brief +* TRUE if this is 3x expand format +* +* @note +* +* @return +* BOOL_32 +**************************************************************************************************** +*/ +BOOL_32 ElemLib::IsExpand3x( + AddrFormat format) ///< [in] Format +{ + BOOL_32 is3x = FALSE; + + switch (format) + { + case ADDR_FMT_8_8_8: + case ADDR_FMT_16_16_16: + case ADDR_FMT_32_32_32: + is3x = TRUE; + break; + default: + break; + } + + return is3x; +} + +/** +**************************************************************************************************** +* ElemLib::IsMacroPixelPacked +* +* @brief +* TRUE if this is a macro-pixel-packed format. +* +* @note +* +* @return +* BOOL_32 +**************************************************************************************************** +*/ +BOOL_32 ElemLib::IsMacroPixelPacked( + AddrFormat format) ///< [in] Format +{ + BOOL_32 isMacroPixelPacked = FALSE; + + switch (format) + { + case ADDR_FMT_BG_RG: + case ADDR_FMT_GB_GR: + isMacroPixelPacked = TRUE; + break; + default: + break; + } + + return isMacroPixelPacked; +} + +} diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrelemlib.h mesa-19.0.1/src/amd/addrlib/src/core/addrelemlib.h --- mesa-18.3.3/src/amd/addrlib/src/core/addrelemlib.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrelemlib.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,279 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrelemlib.h +* @brief Contains the class for element/pixel related functions. +**************************************************************************************************** +*/ + +#ifndef __ELEM_LIB_H__ +#define __ELEM_LIB_H__ + +#include "addrinterface.h" +#include "addrobject.h" +#include "addrcommon.h" + +namespace Addr +{ + +class Lib; + +// The masks for property bits within the Properties INT_32 +union ComponentFlags +{ + struct + { + UINT_32 byteAligned : 1; ///< all components are byte aligned + UINT_32 exportNorm : 1; ///< components support R6xx NORM compression + UINT_32 floatComp : 1; ///< there is at least one floating point component + }; + + UINT_32 value; +}; + +// Copy from legacy lib's NumberType +enum NumberType +{ + // The following number types have the range [-1..1] + ADDR_NO_NUMBER, // This component doesn't exist and has no default value + ADDR_EPSILON, // Force component value to integer 0x00000001 + ADDR_ZERO, // Force component value to integer 0x00000000 + ADDR_ONE, // Force component value to floating point 1.0 + // Above values don't have any bits per component (keep ADDR_ONE the last of these) + + ADDR_UNORM, // Unsigned normalized (repeating fraction) full precision + ADDR_SNORM, // Signed normalized (repeating fraction) full precision + ADDR_GAMMA, // Gamma-corrected, full precision + + ADDR_UNORM_R5XXRB, // Unsigned normalized (repeating fraction) for r5xx RB + ADDR_SNORM_R5XXRB, // Signed normalized (repeating fraction) for r5xx RB + ADDR_GAMMA_R5XXRB, // Gamma-corrected for r5xx RB (note: unnormalized value) + ADDR_UNORM_R5XXBC, // Unsigned normalized (repeating fraction) for r5xx BC + ADDR_SNORM_R5XXBC, // Signed normalized (repeating fraction) for r5xx BC + ADDR_GAMMA_R5XXBC, // Gamma-corrected for r5xx BC (note: unnormalized value) + + ADDR_UNORM_R6XX, // Unsigned normalized (repeating fraction) for R6xx + ADDR_UNORM_R6XXDB, // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX + ADDR_SNORM_R6XX, // Signed normalized (repeating fraction) for R6xx + ADDR_GAMMA8_R6XX, // Gamma-corrected for r6xx + ADDR_GAMMA8_R7XX_TP, // Gamma-corrected for r7xx TP 12bit unorm 8.4. + + ADDR_U4FLOATC, // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1] + ADDR_GAMMA_4SEG, // Gamma-corrected, four segment approximation + ADDR_U0FIXED, // Unsigned 0.N-bit fixed point + + // The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine) + ADDR_USCALED, // Unsigned integer converted to/from floating point + ADDR_SSCALED, // Signed integer converted to/from floating point + ADDR_USCALED_R5XXRB, // Unsigned integer to/from floating point for r5xx RB + ADDR_SSCALED_R5XXRB, // Signed integer to/from floating point for r5xx RB + ADDR_UINT_BITS, // Keep in unsigned integer form, clamped to specified range + ADDR_SINT_BITS, // Keep in signed integer form, clamped to specified range + ADDR_UINTBITS, // @@ remove Keep in unsigned integer form, use modulus to reduce bits + ADDR_SINTBITS, // @@ remove Keep in signed integer form, use modulus to reduce bits + + // The following number types and ADDR_U4FLOATC have exponents + // (LEAVE ADDR_S8FLOAT first or fix Finish routine) + ADDR_S8FLOAT, // Signed floating point with 8-bit exponent, bias=127 + ADDR_S8FLOAT32, // 32-bit IEEE float, passes through NaN values + ADDR_S5FLOAT, // Signed floating point with 5-bit exponent, bias=15 + ADDR_S5FLOATM, // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf + ADDR_U5FLOAT, // Signed floating point with 5-bit exponent, bias=15 + ADDR_U3FLOATM, // Unsigned floating point with 3-bit exponent, bias=3 + + ADDR_S5FIXED, // Signed 5.N-bit fixed point, with rounding + + ADDR_END_NUMBER // Used for range comparisons +}; + +// Copy from legacy lib's AddrElement +enum ElemMode +{ + // These formats allow both packing an unpacking + ADDR_ROUND_BY_HALF, // add 1/2 and truncate when packing this element + ADDR_ROUND_TRUNCATE, // truncate toward 0 for sign/mag, else toward neg + ADDR_ROUND_DITHER, // Pack by dithering -- requires (x,y) position + + // These formats only allow unpacking, no packing + ADDR_UNCOMPRESSED, // Elements are not compressed: one data element per pixel/texel + ADDR_EXPANDED, // Elements are split up and stored in multiple data elements + ADDR_PACKED_STD, // Elements are compressed into ExpandX by ExpandY data elements + ADDR_PACKED_REV, // Like ADDR_PACKED, but X order of pixels is reverved + ADDR_PACKED_GBGR, // Elements are compressed 4:2:2 in G1B_G0R order (high to low) + ADDR_PACKED_BGRG, // Elements are compressed 4:2:2 in BG1_RG0 order (high to low) + ADDR_PACKED_BC1, // Each data element is uncompressed to a 4x4 pixel/texel array + ADDR_PACKED_BC2, // Each data element is uncompressed to a 4x4 pixel/texel array + ADDR_PACKED_BC3, // Each data element is uncompressed to a 4x4 pixel/texel array + ADDR_PACKED_BC4, // Each data element is uncompressed to a 4x4 pixel/texel array + ADDR_PACKED_BC5, // Each data element is uncompressed to a 4x4 pixel/texel array + ADDR_PACKED_ETC2_64BPP, // ETC2 formats that use 64bpp to represent each 4x4 block + ADDR_PACKED_ETC2_128BPP, // ETC2 formats that use 128bpp to represent each 4x4 block + ADDR_PACKED_ASTC, // Various ASTC formats, all are 128bpp with varying block sizes + + // These formats provide various kinds of compression + ADDR_ZPLANE_R5XX, // Compressed Zplane using r5xx architecture format + ADDR_ZPLANE_R6XX, // Compressed Zplane using r6xx architecture format + //@@ Fill in the compression modes + + ADDR_END_ELEMENT // Used for range comparisons +}; + +enum DepthPlanarType +{ + ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl + ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile + ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes +}; + +/** +**************************************************************************************************** +* PixelFormatInfo +* +* @brief +* Per component info +* +**************************************************************************************************** +*/ +struct PixelFormatInfo +{ + UINT_32 compBit[4]; + NumberType numType[4]; + UINT_32 compStart[4]; + ElemMode elemMode; + UINT_32 comps; ///< Number of components +}; + +/** +**************************************************************************************************** +* @brief This class contains asic indepentent element related attributes and operations +**************************************************************************************************** +*/ +class ElemLib : public Object +{ +protected: + ElemLib(Lib* pAddrLib); + +public: + + /// Makes this class virtual + virtual ~ElemLib(); + + static ElemLib* Create( + const Lib* pAddrLib); + + /// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx + BOOL_32 PixGetExportNorm( + AddrColorFormat colorFmt, + AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const; + + /// Below method are asic independent, so make them just static. + /// Remove static if we need different operation in hwl. + + VOID Flt32ToDepthPixel( + AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const; + + VOID Flt32ToColorPixel( + AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap, + const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const; + + static VOID Flt32sToInt32s( + ADDR_FLT_32 value, UINT_32 bits, NumberType numberType, UINT_32* pResult); + + static VOID Int32sToPixel( + UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart, + ComponentFlags properties, UINT_32 resultBits, UINT_8* pPixel); + + VOID PixGetColorCompInfo( + AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap, + PixelFormatInfo* pInfo) const; + + VOID PixGetDepthCompInfo( + AddrDepthFormat format, PixelFormatInfo* pInfo) const; + + UINT_32 GetBitsPerPixel( + AddrFormat format, ElemMode* pElemMode = NULL, + UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL); + + static VOID SetClearComps( + ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32); + + VOID AdjustSurfaceInfo( + ElemMode elemMode, UINT_32 expandX, UINT_32 expandY, + UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight); + + VOID RestoreSurfaceInfo( + ElemMode elemMode, UINT_32 expandX, UINT_32 expandY, + UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight); + + /// Checks if depth and stencil are planar inside a tile + BOOL_32 IsDepthStencilTilePlanar() + { + return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE; + } + + /// Sets m_configFlags, copied from AddrLib + VOID SetConfigFlags(ConfigFlags flags) + { + m_configFlags = flags; + } + + static BOOL_32 IsCompressed(AddrFormat format); + static BOOL_32 IsBlockCompressed(AddrFormat format); + static BOOL_32 IsExpand3x(AddrFormat format); + static BOOL_32 IsMacroPixelPacked(AddrFormat format); + +protected: + + static VOID GetCompBits( + UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3, + PixelFormatInfo* pInfo, + ElemMode elemMode = ADDR_ROUND_BY_HALF); + + static VOID GetCompType( + AddrColorFormat format, AddrSurfaceNumber numType, + PixelFormatInfo* pInfo); + + static VOID GetCompSwap( + AddrSurfaceSwap swap, PixelFormatInfo* pInfo); + + static VOID SwapComps( + UINT_32 c0, UINT_32 c1, PixelFormatInfo* pInfo); + +private: + + UINT_32 m_fp16ExportNorm; ///< If allow FP16 to be reported as EXPORT_NORM + DepthPlanarType m_depthPlanarType; + + ConfigFlags m_configFlags; ///< Copy of AddrLib's configFlags + Addr::Lib* const m_pAddrLib; ///< Pointer to parent addrlib instance +}; + +} //Addr + +#endif + diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib1.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrlib1.cpp --- mesa-18.3.3/src/amd/addrlib/src/core/addrlib1.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib1.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,4061 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addr1lib.cpp +* @brief Contains the implementation for the Addr::V1::Lib base class. +**************************************************************************************************** +*/ + +#include "addrinterface.h" +#include "addrlib1.h" +#include "addrcommon.h" + +namespace Addr +{ +namespace V1 +{ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Static Const Member +//////////////////////////////////////////////////////////////////////////////////////////////////// + +const TileModeFlags Lib::ModeFlags[ADDR_TM_COUNT] = +{// T L 1 2 3 P Pr B + {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_GENERAL + {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_ALIGNED + {1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THIN1 + {4, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THICK + {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN1 + {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN2 + {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN4 + {4, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THICK + {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN1 + {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN2 + {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN4 + {4, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THICK + {1, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THIN1 + {4, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THICK + {1, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THIN1 + {4, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THICK + {8, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_XTHICK + {8, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_XTHICK + {1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_POWER_SAVE + {1, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THIN1 + {1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THIN1 + {1, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THIN1 + {4, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THICK + {4, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THICK + {4, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THICK + {0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_UNKNOWN +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Constructor/Destructor +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Lib::AddrLib1 +* +* @brief +* Constructor for the AddrLib1 class +* +**************************************************************************************************** +*/ +Lib::Lib() + : + Addr::Lib() +{ +} + +/** +**************************************************************************************************** +* Lib::Lib +* +* @brief +* Constructor for the Addr::V1::Lib class with hClient as parameter +* +**************************************************************************************************** +*/ +Lib::Lib(const Client* pClient) + : + Addr::Lib(pClient) +{ +} + +/** +**************************************************************************************************** +* Lib::~AddrLib1 +* +* @brief +* Destructor for the AddrLib1 class +* +**************************************************************************************************** +*/ +Lib::~Lib() +{ +} + +/** +**************************************************************************************************** +* Lib::GetLib +* +* @brief +* Get AddrLib1 pointer +* +* @return +* An Addr::V1::Lib class pointer +**************************************************************************************************** +*/ +Lib* Lib::GetLib( + ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE +{ + Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib); + if ((pAddrLib != NULL) && + ((pAddrLib->GetChipFamily() == ADDR_CHIP_FAMILY_IVLD) || + (pAddrLib->GetChipFamily() > ADDR_CHIP_FAMILY_VI))) + { + // only valid and pre-VI ASIC can use AddrLib1 function. + ADDR_ASSERT_ALWAYS(); + hLib = NULL; + } + return static_cast(hLib); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface Methods +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Lib::ComputeSurfaceInfo +* +* @brief +* Interface function stub of AddrComputeSurfaceInfo. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + // We suggest client do sanity check but a check here is also good + if (pIn->bpp > 128) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if ((pIn->tileMode == ADDR_TM_UNKNOWN) && (pIn->mipLevel > 0)) + { + returnCode = ADDR_INVALIDPARAMS; + } + + // Thick modes don't support multisample + if ((Thickness(pIn->tileMode) > 1) && (pIn->numSamples > 1)) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if (returnCode == ADDR_OK) + { + // Get a local copy of input structure and only reference pIn for unadjusted values + ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; + ADDR_TILEINFO tileInfoNull = {0}; + + if (UseTileInfo()) + { + // If the original input has a valid ADDR_TILEINFO pointer then copy its contents. + // Otherwise the default 0's in tileInfoNull are used. + if (pIn->pTileInfo) + { + tileInfoNull = *pIn->pTileInfo; + } + localIn.pTileInfo = &tileInfoNull; + } + + localIn.numSamples = (pIn->numSamples == 0) ? 1 : pIn->numSamples; + + // Do mipmap check first + // If format is BCn, pre-pad dimension to power-of-two according to HWL + ComputeMipLevel(&localIn); + + if (m_configFlags.checkLast2DLevel) + { + // Save this level's original height in pixels + pOut->height = pIn->height; + } + + UINT_32 expandX = 1; + UINT_32 expandY = 1; + ElemMode elemMode; + + // Save outputs that may not go through HWL + pOut->pixelBits = localIn.bpp; + pOut->numSamples = localIn.numSamples; + pOut->last2DLevel = FALSE; + pOut->tcCompatible = FALSE; + +#if !ALT_TEST + if (localIn.numSamples > 1) + { + ADDR_ASSERT(localIn.mipLevel == 0); + } +#endif + + if (localIn.format != ADDR_FMT_INVALID) // Set format to INVALID will skip this conversion + { + // Get compression/expansion factors and element mode + // (which indicates compression/expansion + localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format, + &elemMode, + &expandX, + &expandY); + + // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is + // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear- + // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw + // restrictions are different. + // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround + // but we use this flag to skip RestoreSurfaceInfo below + + if ((elemMode == ADDR_EXPANDED) && (expandX > 1)) + { + ADDR_ASSERT(IsLinear(localIn.tileMode)); + } + + GetElemLib()->AdjustSurfaceInfo(elemMode, + expandX, + expandY, + &localIn.bpp, + &localIn.basePitch, + &localIn.width, + &localIn.height); + + // Overwrite these parameters if we have a valid format + } + else if (localIn.bpp != 0) + { + localIn.width = (localIn.width != 0) ? localIn.width : 1; + localIn.height = (localIn.height != 0) ? localIn.height : 1; + } + else // Rule out some invalid parameters + { + ADDR_ASSERT_ALWAYS(); + + returnCode = ADDR_INVALIDPARAMS; + } + + // Check mipmap after surface expansion + if (returnCode == ADDR_OK) + { + returnCode = PostComputeMipLevel(&localIn, pOut); + } + + if (returnCode == ADDR_OK) + { + if (UseTileIndex(localIn.tileIndex)) + { + // Make sure pTileInfo is not NULL + ADDR_ASSERT(localIn.pTileInfo); + + UINT_32 numSamples = GetNumFragments(localIn.numSamples, localIn.numFrags); + + INT_32 macroModeIndex = TileIndexNoMacroIndex; + + if (localIn.tileIndex != TileIndexLinearGeneral) + { + // Try finding a macroModeIndex + macroModeIndex = HwlComputeMacroModeIndex(localIn.tileIndex, + localIn.flags, + localIn.bpp, + numSamples, + localIn.pTileInfo, + &localIn.tileMode, + &localIn.tileType); + } + + // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info + if (macroModeIndex == TileIndexNoMacroIndex) + { + returnCode = HwlSetupTileCfg(localIn.bpp, + localIn.tileIndex, macroModeIndex, + localIn.pTileInfo, + &localIn.tileMode, &localIn.tileType); + } + // If macroModeIndex is invalid, then assert this is not macro tiled + else if (macroModeIndex == TileIndexInvalid) + { + ADDR_ASSERT(!IsMacroTiled(localIn.tileMode)); + } + + pOut->macroModeIndex = macroModeIndex; + } + } + + if (returnCode == ADDR_OK) + { + localIn.flags.dccPipeWorkaround = localIn.flags.dccCompatible; + + if (localIn.tileMode == ADDR_TM_UNKNOWN) + { + // HWL layer may override tile mode if necessary + HwlSelectTileMode(&localIn); + } + else + { + // HWL layer may override tile mode if necessary + HwlOverrideTileMode(&localIn); + + // Optimize tile mode if possible + OptimizeTileMode(&localIn); + } + } + + // Call main function to compute surface info + if (returnCode == ADDR_OK) + { + returnCode = HwlComputeSurfaceInfo(&localIn, pOut); + } + + if (returnCode == ADDR_OK) + { + // Since bpp might be changed we just pass it through + pOut->bpp = localIn.bpp; + + // Also original width/height/bpp + pOut->pixelPitch = pOut->pitch; + pOut->pixelHeight = pOut->height; + +#if DEBUG + if (localIn.flags.display) + { + ADDR_ASSERT((pOut->pitchAlign % 32) == 0); + } +#endif //DEBUG + + if (localIn.format != ADDR_FMT_INVALID) + { + // + // Note: For 96 bit surface, the pixelPitch returned might be an odd number, but it + // is okay to program texture pitch as HW's mip calculator would multiply 3 first, + // then do the appropriate paddings (linear alignment requirement and possible the + // nearest power-of-two for mipmaps), which results in the original pitch. + // + GetElemLib()->RestoreSurfaceInfo(elemMode, + expandX, + expandY, + &localIn.bpp, + &pOut->pixelPitch, + &pOut->pixelHeight); + } + + if (localIn.flags.qbStereo) + { + if (pOut->pStereoInfo) + { + ComputeQbStereoInfo(pOut); + } + } + + if (localIn.flags.volume) // For volume sliceSize equals to all z-slices + { + pOut->sliceSize = pOut->surfSize; + } + else // For array: sliceSize is likely to have slice-padding (the last one) + { + pOut->sliceSize = pOut->surfSize / pOut->depth; + + // array or cubemap + if (pIn->numSlices > 1) + { + // If this is the last slice then add the padding size to this slice + if (pIn->slice == (pIn->numSlices - 1)) + { + pOut->sliceSize += pOut->sliceSize * (pOut->depth - pIn->numSlices); + } + else if (m_configFlags.checkLast2DLevel) + { + // Reset last2DLevel flag if this is not the last array slice + pOut->last2DLevel = FALSE; + } + } + } + + pOut->pitchTileMax = pOut->pitch / 8 - 1; + pOut->heightTileMax = pOut->height / 8 - 1; + pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1; + } + } + + ValidBaseAlignments(pOut->baseAlign); + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeSurfaceInfo +* +* @brief +* Interface function stub of AddrComputeSurfaceInfo. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + const ADDR_SURFACE_FLAGS flags = {{0}}; + UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags); + + // Try finding a macroModeIndex + INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex, + flags, + input.bpp, + numSamples, + input.pTileInfo, + &input.tileMode, + &input.tileType); + + // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info + if (macroModeIndex == TileIndexNoMacroIndex) + { + returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex, + input.pTileInfo, &input.tileMode, &input.tileType); + } + // If macroModeIndex is invalid, then assert this is not macro tiled + else if (macroModeIndex == TileIndexInvalid) + { + ADDR_ASSERT(!IsMacroTiled(input.tileMode)); + } + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlComputeSurfaceAddrFromCoord(pIn, pOut); + + if (returnCode == ADDR_OK) + { + pOut->prtBlockIndex = static_cast(pOut->addr / (64 * 1024)); + } + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeSurfaceCoordFromAddr +* +* @brief +* Interface function stub of ComputeSurfaceCoordFromAddr. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + const ADDR_SURFACE_FLAGS flags = {{0}}; + UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags); + + // Try finding a macroModeIndex + INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex, + flags, + input.bpp, + numSamples, + input.pTileInfo, + &input.tileMode, + &input.tileType); + + // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info + if (macroModeIndex == TileIndexNoMacroIndex) + { + returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, macroModeIndex, + input.pTileInfo, &input.tileMode, &input.tileType); + } + // If macroModeIndex is invalid, then assert this is not macro tiled + else if (macroModeIndex == TileIndexInvalid) + { + ADDR_ASSERT(!IsMacroTiled(input.tileMode)); + } + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlComputeSurfaceCoordFromAddr(pIn, pOut); + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeSliceTileSwizzle +* +* @brief +* Interface function stub of ComputeSliceTileSwizzle. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeSliceTileSwizzle( + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_SLICESWIZZLE_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, + input.pTileInfo, &input.tileMode); + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlComputeSliceTileSwizzle(pIn, pOut); + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ExtractBankPipeSwizzle +* +* @brief +* Interface function stub of AddrExtractBankPipeSwizzle. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ExtractBankPipeSwizzle( + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT)) || + (pOut->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlExtractBankPipeSwizzle(pIn, pOut); + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::CombineBankPipeSwizzle +* +* @brief +* Interface function stub of AddrCombineBankPipeSwizzle. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::CombineBankPipeSwizzle( + const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure + ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlCombineBankPipeSwizzle(pIn->bankSwizzle, + pIn->pipeSwizzle, + pIn->pTileInfo, + pIn->baseAddr, + &pOut->tileSwizzle); + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeBaseSwizzle +* +* @brief +* Interface function stub of AddrCompueBaseSwizzle. +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeBaseSwizzle( + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_BASE_SWIZZLE_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + if (IsMacroTiled(pIn->tileMode)) + { + returnCode = HwlComputeBaseSwizzle(pIn, pOut); + } + else + { + pOut->tileSwizzle = 0; + } + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeFmaskInfo +* +* @brief +* Interface function stub of ComputeFmaskInfo. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure + ) +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + // No thick MSAA + if (Thickness(pIn->tileMode) > 1) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_FMASK_INFO_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + + if (pOut->pTileInfo) + { + // Use temp tile info for calcalation + input.pTileInfo = pOut->pTileInfo; + } + else + { + input.pTileInfo = &tileInfoNull; + } + + ADDR_SURFACE_FLAGS flags = {{0}}; + flags.fmask = 1; + + // Try finding a macroModeIndex + INT_32 macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex, + flags, + HwlComputeFmaskBits(pIn, NULL), + pIn->numSamples, + input.pTileInfo, + &input.tileMode); + + // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info + if (macroModeIndex == TileIndexNoMacroIndex) + { + returnCode = HwlSetupTileCfg(0, input.tileIndex, macroModeIndex, + input.pTileInfo, &input.tileMode); + } + + ADDR_ASSERT(macroModeIndex != TileIndexInvalid); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + if (pIn->numSamples > 1) + { + returnCode = HwlComputeFmaskInfo(pIn, pOut); + } + else + { + memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)); + + returnCode = ADDR_INVALIDPARAMS; + } + } + } + + ValidBaseAlignments(pOut->baseAlign); + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeFmaskAddrFromCoord +* +* @brief +* Interface function stub of ComputeFmaskAddrFromCoord. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord( + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_ASSERT(pIn->numSamples > 1); + + if (pIn->numSamples > 1) + { + returnCode = HwlComputeFmaskAddrFromCoord(pIn, pOut); + } + else + { + returnCode = ADDR_INVALIDPARAMS; + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeFmaskCoordFromAddr +* +* @brief +* Interface function stub of ComputeFmaskAddrFromCoord. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr( + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_ASSERT(pIn->numSamples > 1); + + if (pIn->numSamples > 1) + { + returnCode = HwlComputeFmaskCoordFromAddr(pIn, pOut); + } + else + { + returnCode = ADDR_INVALIDPARAMS; + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ConvertTileInfoToHW +* +* @brief +* Convert tile info from real value to HW register value in HW layer +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_INPUT)) || + (pOut->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_CONVERT_TILEINFOTOHW_INPUT input; + // if pIn->reverse is TRUE, indices are ignored + if (pIn->reverse == FALSE && UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(input.bpp, input.tileIndex, + input.macroModeIndex, input.pTileInfo); + + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlConvertTileInfoToHW(pIn, pOut); + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ConvertTileIndex +* +* @brief +* Convert tile index to tile mode/type/info +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ConvertTileIndex( + const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input structure + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX_INPUT)) || + (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + + returnCode = HwlSetupTileCfg(pIn->bpp, pIn->tileIndex, pIn->macroModeIndex, + pOut->pTileInfo, &pOut->tileMode, &pOut->tileType); + + if (returnCode == ADDR_OK && pIn->tileInfoHw) + { + ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0}; + ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0}; + + hwInput.pTileInfo = pOut->pTileInfo; + hwInput.tileIndex = -1; + hwOutput.pTileInfo = pOut->pTileInfo; + + returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput); + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::GetMacroModeIndex +* +* @brief +* Get macro mode index based on input info +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::GetMacroModeIndex( + const ADDR_GET_MACROMODEINDEX_INPUT* pIn, ///< [in] input structure + ADDR_GET_MACROMODEINDEX_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags()) + { + if ((pIn->size != sizeof(ADDR_GET_MACROMODEINDEX_INPUT)) || + (pOut->size != sizeof(ADDR_GET_MACROMODEINDEX_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfo = {0}; + pOut->macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex, pIn->flags, pIn->bpp, + pIn->numFrags, &tileInfo); + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ConvertTileIndex1 +* +* @brief +* Convert tile index to tile mode/type/info +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ConvertTileIndex1( + const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, ///< [in] input structure + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX1_INPUT)) || + (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_SURFACE_FLAGS flags = {{0}}; + + HwlComputeMacroModeIndex(pIn->tileIndex, flags, pIn->bpp, pIn->numSamples, + pOut->pTileInfo, &pOut->tileMode, &pOut->tileType); + + if (pIn->tileInfoHw) + { + ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0}; + ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0}; + + hwInput.pTileInfo = pOut->pTileInfo; + hwInput.tileIndex = -1; + hwOutput.pTileInfo = pOut->pTileInfo; + + returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput); + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::GetTileIndex +* +* @brief +* Get tile index from tile mode/type/info +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::GetTileIndex( + const ADDR_GET_TILEINDEX_INPUT* pIn, ///< [in] input structure + ADDR_GET_TILEINDEX_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_GET_TILEINDEX_INPUT)) || + (pOut->size != sizeof(ADDR_GET_TILEINDEX_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + returnCode = HwlGetTileIndex(pIn, pOut); + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::Thickness +* +* @brief +* Get tile mode thickness +* +* @return +* Tile mode thickness +**************************************************************************************************** +*/ +UINT_32 Lib::Thickness( + AddrTileMode tileMode) ///< [in] tile mode +{ + return ModeFlags[tileMode].thickness; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// CMASK/HTILE +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Lib::ComputeHtileInfo +* +* @brief +* Interface function stub of AddrComputeHtilenfo +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeHtileInfo( + const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE; + BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_HTILE_INFO_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + if (pIn->flags.tcCompatible) + { + const UINT_32 sliceSize = pIn->pitch * pIn->height * 4 / (8 * 8); + const UINT_32 align = HwlGetPipes(pIn->pTileInfo) * pIn->pTileInfo->banks * m_pipeInterleaveBytes; + + if (pIn->numSlices > 1) + { + const UINT_32 surfBytes = (sliceSize * pIn->numSlices); + + pOut->sliceSize = sliceSize; + pOut->htileBytes = pIn->flags.skipTcCompatSizeAlign ? + surfBytes : PowTwoAlign(surfBytes, align); + pOut->sliceInterleaved = ((sliceSize % align) != 0) ? TRUE : FALSE; + } + else + { + pOut->sliceSize = pIn->flags.skipTcCompatSizeAlign ? + sliceSize : PowTwoAlign(sliceSize, align); + pOut->htileBytes = pOut->sliceSize; + pOut->sliceInterleaved = FALSE; + } + + pOut->nextMipLevelCompressible = ((sliceSize % align) == 0) ? TRUE : FALSE; + + pOut->pitch = pIn->pitch; + pOut->height = pIn->height; + pOut->baseAlign = align; + pOut->macroWidth = 0; + pOut->macroHeight = 0; + pOut->bpp = 32; + } + else + { + pOut->bpp = ComputeHtileInfo(pIn->flags, + pIn->pitch, + pIn->height, + pIn->numSlices, + pIn->isLinear, + isWidth8, + isHeight8, + pIn->pTileInfo, + &pOut->pitch, + &pOut->height, + &pOut->htileBytes, + &pOut->macroWidth, + &pOut->macroHeight, + &pOut->sliceSize, + &pOut->baseAlign); + } + } + } + + ValidMetaBaseAlignments(pOut->baseAlign); + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeCmaskInfo +* +* @brief +* Interface function stub of AddrComputeCmaskInfo +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( + const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_INFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_CMASK_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_CMASK_INFO_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + returnCode = ComputeCmaskInfo(pIn->flags, + pIn->pitch, + pIn->height, + pIn->numSlices, + pIn->isLinear, + pIn->pTileInfo, + &pOut->pitch, + &pOut->height, + &pOut->cmaskBytes, + &pOut->macroWidth, + &pOut->macroHeight, + &pOut->sliceSize, + &pOut->baseAlign, + &pOut->blockMax); + } + } + + ValidMetaBaseAlignments(pOut->baseAlign); + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeDccInfo +* +* @brief +* Interface function to compute DCC key info +* +* @return +* return code of HwlComputeDccInfo +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeDccInfo( + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE ret = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_DCCINFO_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT))) + { + ret = ADDR_PARAMSIZEMISMATCH; + } + } + + if (ret == ADDR_OK) + { + ADDR_COMPUTE_DCCINFO_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + + ret = HwlSetupTileCfg(input.bpp, input.tileIndex, input.macroModeIndex, + &input.tileInfo, &input.tileMode); + + pIn = &input; + } + + if (ret == ADDR_OK) + { + ret = HwlComputeDccInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->dccRamBaseAlign); + } + } + + return ret; +} + +/** +**************************************************************************************************** +* Lib::ComputeHtileAddrFromCoord +* +* @brief +* Interface function stub of AddrComputeHtileAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord( + const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE; + BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + if (pIn->flags.tcCompatible) + { + HwlComputeHtileAddrFromCoord(pIn, pOut); + } + else + { + pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch, + pIn->height, + pIn->x, + pIn->y, + pIn->slice, + pIn->numSlices, + 1, + pIn->isLinear, + isWidth8, + isHeight8, + pIn->pTileInfo, + &pOut->bitPosition); + } + } + } + + return returnCode; + +} + +/** +**************************************************************************************************** +* Lib::ComputeHtileCoordFromAddr +* +* @brief +* Interface function stub of AddrComputeHtileCoordFromAddr +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr( + const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE; + BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + HwlComputeXmaskCoordFromAddr(pIn->addr, + pIn->bitPosition, + pIn->pitch, + pIn->height, + pIn->numSlices, + 1, + pIn->isLinear, + isWidth8, + isHeight8, + pIn->pTileInfo, + &pOut->x, + &pOut->y, + &pOut->slice); + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeCmaskAddrFromCoord +* +* @brief +* Interface function stub of AddrComputeCmaskAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord( + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + if (pIn->flags.tcCompatible == TRUE) + { + returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut); + } + else + { + pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch, + pIn->height, + pIn->x, + pIn->y, + pIn->slice, + pIn->numSlices, + 2, + pIn->isLinear, + FALSE, //this is cmask, isWidth8 is not needed + FALSE, //this is cmask, isHeight8 is not needed + pIn->pTileInfo, + &pOut->bitPosition); + } + + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeCmaskCoordFromAddr +* +* @brief +* Interface function stub of AddrComputeCmaskCoordFromAddr +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr( + const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + ADDR_TILEINFO tileInfoNull; + ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT input; + + if (UseTileIndex(pIn->tileIndex)) + { + input = *pIn; + // Use temp tile info for calcalation + input.pTileInfo = &tileInfoNull; + + returnCode = HwlSetupTileCfg(0, input.tileIndex, input.macroModeIndex, input.pTileInfo); + + // Change the input structure + pIn = &input; + } + + if (returnCode == ADDR_OK) + { + HwlComputeXmaskCoordFromAddr(pIn->addr, + pIn->bitPosition, + pIn->pitch, + pIn->height, + pIn->numSlices, + 2, + pIn->isLinear, + FALSE, + FALSE, + pIn->pTileInfo, + &pOut->x, + &pOut->y, + &pOut->slice); + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeTileDataWidthAndHeight +* +* @brief +* Compute the squared cache shape for per-tile data (CMASK and HTILE) +* +* @return +* N/A +* +* @note +* MacroWidth and macroHeight are measured in pixels +**************************************************************************************************** +*/ +VOID Lib::ComputeTileDataWidthAndHeight( + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 cacheBits, ///< [in] bits of cache + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pMacroWidth, ///< [out] macro tile width + UINT_32* pMacroHeight ///< [out] macro tile height + ) const +{ + UINT_32 height = 1; + UINT_32 width = cacheBits / bpp; + UINT_32 pipes = HwlGetPipes(pTileInfo); + + // Double height until the macro-tile is close to square + // Height can only be doubled if width is even + + while ((width > height * 2 * pipes) && !(width & 1)) + { + width /= 2; + height *= 2; + } + + *pMacroWidth = 8 * width; + *pMacroHeight = 8 * height * pipes; + + // Note: The above iterative comptuation is equivalent to the following + // + //int log2_height = ((log2(cacheBits)-log2(bpp)-log2(pipes))/2); + //int macroHeight = pow2( 3+log2(pipes)+log2_height ); +} + +/** +**************************************************************************************************** +* Lib::HwlComputeTileDataWidthAndHeightLinear +* +* @brief +* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout +* +* @return +* N/A +* +* @note +* MacroWidth and macroHeight are measured in pixels +**************************************************************************************************** +*/ +VOID Lib::HwlComputeTileDataWidthAndHeightLinear( + UINT_32* pMacroWidth, ///< [out] macro tile width + UINT_32* pMacroHeight, ///< [out] macro tile height + UINT_32 bpp, ///< [in] bits per pixel + ADDR_TILEINFO* pTileInfo ///< [in] tile info + ) const +{ + ADDR_ASSERT(bpp != 4); // Cmask does not support linear layout prior to SI + *pMacroWidth = 8 * 512 / bpp; // Align width to 512-bit memory accesses + *pMacroHeight = 8 * m_pipes; // Align height to number of pipes +} + +/** +**************************************************************************************************** +* Lib::ComputeHtileInfo +* +* @brief +* Compute htile pitch,width, bytes per 2D slice +* +* @return +* Htile bpp i.e. How many bits for an 8x8 tile +* Also returns by output parameters: +* *Htile pitch, height, total size in bytes, macro-tile dimensions and slice size* +**************************************************************************************************** +*/ +UINT_32 Lib::ComputeHtileInfo( + ADDR_HTILE_FLAGS flags, ///< [in] htile flags + UINT_32 pitchIn, ///< [in] pitch input + UINT_32 heightIn, ///< [in] height input + UINT_32 numSlices, ///< [in] number of slices + BOOL_32 isLinear, ///< [in] if it is linear mode + BOOL_32 isWidth8, ///< [in] if htile block width is 8 + BOOL_32 isHeight8, ///< [in] if htile block height is 8 + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pPitchOut, ///< [out] pitch output + UINT_32* pHeightOut, ///< [out] height output + UINT_64* pHtileBytes, ///< [out] bytes per 2D slice + UINT_32* pMacroWidth, ///< [out] macro-tile width in pixels + UINT_32* pMacroHeight, ///< [out] macro-tile width in pixels + UINT_64* pSliceSize, ///< [out] slice size in bytes + UINT_32* pBaseAlign ///< [out] base alignment + ) const +{ + + UINT_32 macroWidth; + UINT_32 macroHeight; + UINT_32 baseAlign; + UINT_64 surfBytes; + UINT_64 sliceBytes; + + numSlices = Max(1u, numSlices); + + const UINT_32 bpp = HwlComputeHtileBpp(isWidth8, isHeight8); + const UINT_32 cacheBits = HtileCacheBits; + + if (isLinear) + { + HwlComputeTileDataWidthAndHeightLinear(¯oWidth, + ¯oHeight, + bpp, + pTileInfo); + } + else + { + ComputeTileDataWidthAndHeight(bpp, + cacheBits, + pTileInfo, + ¯oWidth, + ¯oHeight); + } + + *pPitchOut = PowTwoAlign(pitchIn, macroWidth); + *pHeightOut = PowTwoAlign(heightIn, macroHeight); + + baseAlign = HwlComputeHtileBaseAlign(flags.tcCompatible, isLinear, pTileInfo); + + surfBytes = HwlComputeHtileBytes(*pPitchOut, + *pHeightOut, + bpp, + isLinear, + numSlices, + &sliceBytes, + baseAlign); + + *pHtileBytes = surfBytes; + + // + // Use SafeAssign since they are optional + // + SafeAssign(pMacroWidth, macroWidth); + + SafeAssign(pMacroHeight, macroHeight); + + SafeAssign(pSliceSize, sliceBytes); + + SafeAssign(pBaseAlign, baseAlign); + + return bpp; +} + +/** +**************************************************************************************************** +* Lib::ComputeCmaskBaseAlign +* +* @brief +* Compute cmask base alignment +* +* @return +* Cmask base alignment +**************************************************************************************************** +*/ +UINT_32 Lib::ComputeCmaskBaseAlign( + ADDR_CMASK_FLAGS flags, ///< [in] Cmask flags + ADDR_TILEINFO* pTileInfo ///< [in] Tile info + ) const +{ + UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo); + + if (flags.tcCompatible) + { + ADDR_ASSERT(pTileInfo != NULL); + if (pTileInfo) + { + baseAlign *= pTileInfo->banks; + } + } + + return baseAlign; +} + +/** +**************************************************************************************************** +* Lib::ComputeCmaskBytes +* +* @brief +* Compute cmask size in bytes +* +* @return +* Cmask size in bytes +**************************************************************************************************** +*/ +UINT_64 Lib::ComputeCmaskBytes( + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSlices ///< [in] number of slices + ) const +{ + return BITS_TO_BYTES(static_cast(pitch) * height * numSlices * CmaskElemBits) / + MicroTilePixels; +} + +/** +**************************************************************************************************** +* Lib::ComputeCmaskInfo +* +* @brief +* Compute cmask pitch,width, bytes per 2D slice +* +* @return +* BlockMax. Also by output parameters: Cmask pitch,height, total size in bytes, +* macro-tile dimensions +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( + ADDR_CMASK_FLAGS flags, ///< [in] cmask flags + UINT_32 pitchIn, ///< [in] pitch input + UINT_32 heightIn, ///< [in] height input + UINT_32 numSlices, ///< [in] number of slices + BOOL_32 isLinear, ///< [in] is linear mode + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pPitchOut, ///< [out] pitch output + UINT_32* pHeightOut, ///< [out] height output + UINT_64* pCmaskBytes, ///< [out] bytes per 2D slice + UINT_32* pMacroWidth, ///< [out] macro-tile width in pixels + UINT_32* pMacroHeight, ///< [out] macro-tile width in pixels + UINT_64* pSliceSize, ///< [out] slice size in bytes + UINT_32* pBaseAlign, ///< [out] base alignment + UINT_32* pBlockMax ///< [out] block max == slice / 128 / 128 - 1 + ) const +{ + UINT_32 macroWidth; + UINT_32 macroHeight; + UINT_32 baseAlign; + UINT_64 surfBytes; + UINT_64 sliceBytes; + + numSlices = Max(1u, numSlices); + + const UINT_32 bpp = CmaskElemBits; + const UINT_32 cacheBits = CmaskCacheBits; + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (isLinear) + { + HwlComputeTileDataWidthAndHeightLinear(¯oWidth, + ¯oHeight, + bpp, + pTileInfo); + } + else + { + ComputeTileDataWidthAndHeight(bpp, + cacheBits, + pTileInfo, + ¯oWidth, + ¯oHeight); + } + + *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1); + *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1); + + sliceBytes = ComputeCmaskBytes(*pPitchOut, + *pHeightOut, + 1); + + baseAlign = ComputeCmaskBaseAlign(flags, pTileInfo); + + while (sliceBytes % baseAlign) + { + *pHeightOut += macroHeight; + + sliceBytes = ComputeCmaskBytes(*pPitchOut, + *pHeightOut, + 1); + } + + surfBytes = sliceBytes * numSlices; + + *pCmaskBytes = surfBytes; + + // + // Use SafeAssign since they are optional + // + SafeAssign(pMacroWidth, macroWidth); + + SafeAssign(pMacroHeight, macroHeight); + + SafeAssign(pBaseAlign, baseAlign); + + SafeAssign(pSliceSize, sliceBytes); + + UINT_32 slice = (*pPitchOut) * (*pHeightOut); + UINT_32 blockMax = slice / 128 / 128 - 1; + +#if DEBUG + if (slice % (64*256) != 0) + { + ADDR_ASSERT_ALWAYS(); + } +#endif //DEBUG + + UINT_32 maxBlockMax = HwlGetMaxCmaskBlockMax(); + + if (blockMax > maxBlockMax) + { + blockMax = maxBlockMax; + returnCode = ADDR_INVALIDPARAMS; + } + + SafeAssign(pBlockMax, blockMax); + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::ComputeXmaskCoordYFromPipe +* +* @brief +* Compute the Y coord from pipe number for cmask/htile +* +* @return +* Y coordinate +* +**************************************************************************************************** +*/ +UINT_32 Lib::ComputeXmaskCoordYFromPipe( + UINT_32 pipe, ///< [in] pipe number + UINT_32 x ///< [in] x coordinate + ) const +{ + UINT_32 pipeBit0; + UINT_32 pipeBit1; + UINT_32 xBit0; + UINT_32 xBit1; + UINT_32 yBit0; + UINT_32 yBit1; + + UINT_32 y = 0; + + UINT_32 numPipes = m_pipes; // SI has its implementation + // + // Convert pipe + x to y coordinate. + // + switch (numPipes) + { + case 1: + // + // 1 pipe + // + // p0 = 0 + // + y = 0; + break; + case 2: + // + // 2 pipes + // + // p0 = x0 ^ y0 + // + // y0 = p0 ^ x0 + // + pipeBit0 = pipe & 0x1; + + xBit0 = x & 0x1; + + yBit0 = pipeBit0 ^ xBit0; + + y = yBit0; + break; + case 4: + // + // 4 pipes + // + // p0 = x1 ^ y0 + // p1 = x0 ^ y1 + // + // y0 = p0 ^ x1 + // y1 = p1 ^ x0 + // + pipeBit0 = pipe & 0x1; + pipeBit1 = (pipe & 0x2) >> 1; + + xBit0 = x & 0x1; + xBit1 = (x & 0x2) >> 1; + + yBit0 = pipeBit0 ^ xBit1; + yBit1 = pipeBit1 ^ xBit0; + + y = (yBit0 | + (yBit1 << 1)); + break; + case 8: + // + // 8 pipes + // + // r600 and r800 have different method + // + y = HwlComputeXmaskCoordYFrom8Pipe(pipe, x); + break; + default: + break; + } + return y; +} + +/** +**************************************************************************************************** +* Lib::HwlComputeXmaskCoordFromAddr +* +* @brief +* Compute the coord from an address of a cmask/htile +* +* @return +* N/A +* +* @note +* This method is reused by htile, so rename to Xmask +**************************************************************************************************** +*/ +VOID Lib::HwlComputeXmaskCoordFromAddr( + UINT_64 addr, ///< [in] address + UINT_32 bitPosition, ///< [in] bitPosition in a byte + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSlices, ///< [in] number of slices + UINT_32 factor, ///< [in] factor that indicates cmask or htile + BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout + BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pX, ///< [out] x coord + UINT_32* pY, ///< [out] y coord + UINT_32* pSlice ///< [out] slice index + ) const +{ + UINT_32 pipe; + UINT_32 numPipes; + UINT_32 numGroupBits; + (void)numGroupBits; + UINT_32 numPipeBits; + UINT_32 macroTilePitch; + UINT_32 macroTileHeight; + + UINT_64 bitAddr; + + UINT_32 microTileCoordY; + + UINT_32 elemBits; + + UINT_32 pitchAligned = pitch; + UINT_32 heightAligned = height; + UINT_64 totalBytes; + + UINT_64 elemOffset; + + UINT_64 macroIndex; + UINT_32 microIndex; + + UINT_64 macroNumber; + UINT_32 microNumber; + + UINT_32 macroX; + UINT_32 macroY; + UINT_32 macroZ; + + UINT_32 microX; + UINT_32 microY; + + UINT_32 tilesPerMacro; + UINT_32 macrosPerPitch; + UINT_32 macrosPerSlice; + + // + // Extract pipe. + // + numPipes = HwlGetPipes(pTileInfo); + pipe = ComputePipeFromAddr(addr, numPipes); + + // + // Compute the number of group and pipe bits. + // + numGroupBits = Log2(m_pipeInterleaveBytes); + numPipeBits = Log2(numPipes); + + UINT_32 groupBits = 8 * m_pipeInterleaveBytes; + UINT_32 pipes = numPipes; + + // + // Compute the micro tile size, in bits. And macro tile pitch and height. + // + if (factor == 2) //CMASK + { + ADDR_CMASK_FLAGS flags = {{0}}; + + elemBits = CmaskElemBits; + + ComputeCmaskInfo(flags, + pitch, + height, + numSlices, + isLinear, + pTileInfo, + &pitchAligned, + &heightAligned, + &totalBytes, + ¯oTilePitch, + ¯oTileHeight); + } + else //HTILE + { + ADDR_HTILE_FLAGS flags = {{0}}; + + if (factor != 1) + { + factor = 1; + } + + elemBits = HwlComputeHtileBpp(isWidth8, isHeight8); + + ComputeHtileInfo(flags, + pitch, + height, + numSlices, + isLinear, + isWidth8, + isHeight8, + pTileInfo, + &pitchAligned, + &heightAligned, + &totalBytes, + ¯oTilePitch, + ¯oTileHeight); + } + + // Should use aligned dims + // + pitch = pitchAligned; + height = heightAligned; + + // + // Convert byte address to bit address. + // + bitAddr = BYTES_TO_BITS(addr) + bitPosition; + + // + // Remove pipe bits from address. + // + + bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits); + + elemOffset = bitAddr / elemBits; + + tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits; + + macrosPerPitch = pitch / (macroTilePitch/factor); + macrosPerSlice = macrosPerPitch * height / macroTileHeight; + + macroIndex = elemOffset / factor / tilesPerMacro; + microIndex = static_cast(elemOffset % (tilesPerMacro * factor)); + + macroNumber = macroIndex * factor + microIndex % factor; + microNumber = microIndex / factor; + + macroX = static_cast((macroNumber % macrosPerPitch)); + macroY = static_cast((macroNumber % macrosPerSlice) / macrosPerPitch); + macroZ = static_cast((macroNumber / macrosPerSlice)); + + microX = microNumber % (macroTilePitch / factor / MicroTileWidth); + microY = (microNumber / (macroTilePitch / factor / MicroTileHeight)); + + *pX = macroX * (macroTilePitch/factor) + microX * MicroTileWidth; + *pY = macroY * macroTileHeight + (microY * MicroTileHeight << numPipeBits); + *pSlice = macroZ; + + microTileCoordY = ComputeXmaskCoordYFromPipe(pipe, + *pX/MicroTileWidth); + + // + // Assemble final coordinates. + // + *pY += microTileCoordY * MicroTileHeight; + +} + +/** +**************************************************************************************************** +* Lib::HwlComputeXmaskAddrFromCoord +* +* @brief +* Compute the address from an address of cmask (prior to si) +* +* @return +* Address in bytes +* +**************************************************************************************************** +*/ +UINT_64 Lib::HwlComputeXmaskAddrFromCoord( + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 x, ///< [in] x coord + UINT_32 y, ///< [in] y coord + UINT_32 slice, ///< [in] slice/depth index + UINT_32 numSlices, ///< [in] number of slices + UINT_32 factor, ///< [in] factor that indicates cmask(2) or htile(1) + BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout + BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pBitPosition ///< [out] bit position inside a byte + ) const +{ + UINT_64 addr; + UINT_32 numGroupBits; + UINT_32 numPipeBits; + UINT_32 newPitch = 0; + UINT_32 newHeight = 0; + UINT_64 sliceBytes = 0; + UINT_64 totalBytes = 0; + UINT_64 sliceOffset; + UINT_32 pipe; + UINT_32 macroTileWidth; + UINT_32 macroTileHeight; + UINT_32 macroTilesPerRow; + UINT_32 macroTileBytes; + UINT_32 macroTileIndexX; + UINT_32 macroTileIndexY; + UINT_64 macroTileOffset; + UINT_32 pixelBytesPerRow; + UINT_32 pixelOffsetX; + UINT_32 pixelOffsetY; + UINT_32 pixelOffset; + UINT_64 totalOffset; + UINT_64 offsetLo; + UINT_64 offsetHi; + UINT_64 groupMask; + + UINT_32 elemBits = 0; + + UINT_32 numPipes = m_pipes; // This function is accessed prior to si only + + if (factor == 2) //CMASK + { + elemBits = CmaskElemBits; + + // For asics before SI, cmask is always tiled + isLinear = FALSE; + } + else //HTILE + { + if (factor != 1) // Fix compile warning + { + factor = 1; + } + + elemBits = HwlComputeHtileBpp(isWidth8, isHeight8); + } + + // + // Compute the number of group bits and pipe bits. + // + numGroupBits = Log2(m_pipeInterleaveBytes); + numPipeBits = Log2(numPipes); + + // + // Compute macro tile dimensions. + // + if (factor == 2) // CMASK + { + ADDR_CMASK_FLAGS flags = {{0}}; + + ComputeCmaskInfo(flags, + pitch, + height, + numSlices, + isLinear, + pTileInfo, + &newPitch, + &newHeight, + &totalBytes, + ¯oTileWidth, + ¯oTileHeight); + + sliceBytes = totalBytes / numSlices; + } + else // HTILE + { + ADDR_HTILE_FLAGS flags = {{0}}; + + ComputeHtileInfo(flags, + pitch, + height, + numSlices, + isLinear, + isWidth8, + isHeight8, + pTileInfo, + &newPitch, + &newHeight, + &totalBytes, + ¯oTileWidth, + ¯oTileHeight, + &sliceBytes); + } + + sliceOffset = slice * sliceBytes; + + // + // Get the pipe. Note that neither slice rotation nor pipe swizzling apply for CMASK. + // + pipe = ComputePipeFromCoord(x, + y, + 0, + ADDR_TM_2D_TILED_THIN1, + 0, + FALSE, + pTileInfo); + + // + // Compute the number of macro tiles per row. + // + macroTilesPerRow = newPitch / macroTileWidth; + + // + // Compute the number of bytes per macro tile. + // + macroTileBytes = BITS_TO_BYTES((macroTileWidth * macroTileHeight * elemBits) / MicroTilePixels); + + // + // Compute the offset to the macro tile containing the specified coordinate. + // + macroTileIndexX = x / macroTileWidth; + macroTileIndexY = y / macroTileHeight; + macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes; + + // + // Compute the pixel offset within the macro tile. + // + pixelBytesPerRow = BITS_TO_BYTES(macroTileWidth * elemBits) / MicroTileWidth; + + // + // The nibbles are interleaved (see below), so the part of the offset relative to the x + // coordinate repeats halfway across the row. (Not for HTILE) + // + if (factor == 2) + { + pixelOffsetX = (x % (macroTileWidth / 2)) / MicroTileWidth; + } + else + { + pixelOffsetX = (x % (macroTileWidth)) / MicroTileWidth * BITS_TO_BYTES(elemBits); + } + + // + // Compute the y offset within the macro tile. + // + pixelOffsetY = (((y % macroTileHeight) / MicroTileHeight) / numPipes) * pixelBytesPerRow; + + pixelOffset = pixelOffsetX + pixelOffsetY; + + // + // Combine the slice offset and macro tile offset with the pixel offset, accounting for the + // pipe bits in the middle of the address. + // + totalOffset = ((sliceOffset + macroTileOffset) >> numPipeBits) + pixelOffset; + + // + // Split the offset to put some bits below the pipe bits and some above. + // + groupMask = (1 << numGroupBits) - 1; + offsetLo = totalOffset & groupMask; + offsetHi = (totalOffset & ~groupMask) << numPipeBits; + + // + // Assemble the address from its components. + // + addr = offsetLo; + addr |= offsetHi; + // This is to remove warning with /analyze option + UINT_32 pipeBits = pipe << numGroupBits; + addr |= pipeBits; + + // + // Compute the bit position. The lower nibble is used when the x coordinate within the macro + // tile is less than half of the macro tile width, and the upper nibble is used when the x + // coordinate within the macro tile is greater than or equal to half the macro tile width. + // + *pBitPosition = ((x % macroTileWidth) < (macroTileWidth / factor)) ? 0 : 4; + + return addr; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface Addressing Shared +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Lib::ComputeSurfaceAddrFromCoordLinear +* +* @brief +* Compute address from coord for linear surface +* +* @return +* Address in bytes +* +**************************************************************************************************** +*/ +UINT_64 Lib::ComputeSurfaceAddrFromCoordLinear( + UINT_32 x, ///< [in] x coord + UINT_32 y, ///< [in] y coord + UINT_32 slice, ///< [in] slice/depth index + UINT_32 sample, ///< [in] sample index + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSlices, ///< [in] number of slices + UINT_32* pBitPosition ///< [out] bit position inside a byte + ) const +{ + const UINT_64 sliceSize = static_cast(pitch) * height; + + UINT_64 sliceOffset = (slice + sample * numSlices)* sliceSize; + UINT_64 rowOffset = static_cast(y) * pitch; + UINT_64 pixOffset = x; + + UINT_64 addr = (sliceOffset + rowOffset + pixOffset) * bpp; + + *pBitPosition = static_cast(addr % 8); + addr /= 8; + + return addr; +} + +/** +**************************************************************************************************** +* Lib::ComputeSurfaceCoordFromAddrLinear +* +* @brief +* Compute the coord from an address of a linear surface +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::ComputeSurfaceCoordFromAddrLinear( + UINT_64 addr, ///< [in] address + UINT_32 bitPosition, ///< [in] bitPosition in a byte + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSlices, ///< [in] number of slices + UINT_32* pX, ///< [out] x coord + UINT_32* pY, ///< [out] y coord + UINT_32* pSlice, ///< [out] slice/depth index + UINT_32* pSample ///< [out] sample index + ) const +{ + const UINT_64 sliceSize = static_cast(pitch) * height; + const UINT_64 linearOffset = (BYTES_TO_BITS(addr) + bitPosition) / bpp; + + *pX = static_cast((linearOffset % sliceSize) % pitch); + *pY = static_cast((linearOffset % sliceSize) / pitch % height); + *pSlice = static_cast((linearOffset / sliceSize) % numSlices); + *pSample = static_cast((linearOffset / sliceSize) / numSlices); +} + +/** +**************************************************************************************************** +* Lib::ComputeSurfaceCoordFromAddrMicroTiled +* +* @brief +* Compute the coord from an address of a micro tiled surface +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::ComputeSurfaceCoordFromAddrMicroTiled( + UINT_64 addr, ///< [in] address + UINT_32 bitPosition, ///< [in] bitPosition in a byte + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSamples, ///< [in] number of samples + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 tileBase, ///< [in] base offset within a tile + UINT_32 compBits, ///< [in] component bits actually needed(for planar surface) + UINT_32* pX, ///< [out] x coord + UINT_32* pY, ///< [out] y coord + UINT_32* pSlice, ///< [out] slice/depth index + UINT_32* pSample, ///< [out] sample index, + AddrTileType microTileType, ///< [in] micro tiling order + BOOL_32 isDepthSampleOrder ///< [in] TRUE if in depth sample order + ) const +{ + UINT_64 bitAddr; + UINT_32 microTileThickness; + UINT_32 microTileBits; + UINT_64 sliceBits; + UINT_64 rowBits; + UINT_32 sliceIndex; + UINT_32 microTileCoordX; + UINT_32 microTileCoordY; + UINT_32 pixelOffset; + UINT_32 pixelCoordX = 0; + UINT_32 pixelCoordY = 0; + UINT_32 pixelCoordZ = 0; + UINT_32 pixelCoordS = 0; + + // + // Convert byte address to bit address. + // + bitAddr = BYTES_TO_BITS(addr) + bitPosition; + + // + // Compute the micro tile size, in bits. + // + switch (tileMode) + { + case ADDR_TM_1D_TILED_THICK: + microTileThickness = ThickTileThickness; + break; + default: + microTileThickness = 1; + break; + } + + microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples; + + // + // Compute number of bits per slice and number of bits per row of micro tiles. + // + sliceBits = static_cast(pitch) * height * microTileThickness * bpp * numSamples; + + rowBits = (pitch / MicroTileWidth) * microTileBits; + + // + // Extract the slice index. + // + sliceIndex = static_cast(bitAddr / sliceBits); + bitAddr -= sliceIndex * sliceBits; + + // + // Extract the y coordinate of the micro tile. + // + microTileCoordY = static_cast(bitAddr / rowBits) * MicroTileHeight; + bitAddr -= (microTileCoordY / MicroTileHeight) * rowBits; + + // + // Extract the x coordinate of the micro tile. + // + microTileCoordX = static_cast(bitAddr / microTileBits) * MicroTileWidth; + + // + // Compute the pixel offset within the micro tile. + // + pixelOffset = static_cast(bitAddr % microTileBits); + + // + // Extract pixel coordinates from the offset. + // + HwlComputePixelCoordFromOffset(pixelOffset, + bpp, + numSamples, + tileMode, + tileBase, + compBits, + &pixelCoordX, + &pixelCoordY, + &pixelCoordZ, + &pixelCoordS, + microTileType, + isDepthSampleOrder); + + // + // Assemble final coordinates. + // + *pX = microTileCoordX + pixelCoordX; + *pY = microTileCoordY + pixelCoordY; + *pSlice = (sliceIndex * microTileThickness) + pixelCoordZ; + *pSample = pixelCoordS; + + if (microTileThickness > 1) + { + *pSample = 0; + } +} + +/** +**************************************************************************************************** +* Lib::ComputePipeFromAddr +* +* @brief +* Compute the pipe number from an address +* +* @return +* Pipe number +* +**************************************************************************************************** +*/ +UINT_32 Lib::ComputePipeFromAddr( + UINT_64 addr, ///< [in] address + UINT_32 numPipes ///< [in] number of banks + ) const +{ + UINT_32 pipe; + + UINT_32 groupBytes = m_pipeInterleaveBytes; //just different terms + + // R600 + // The LSBs of the address are arranged as follows: + // bank | pipe | group + // + // To get the pipe number, shift off the group bits and mask the pipe bits. + // + + // R800 + // The LSBs of the address are arranged as follows: + // bank | bankInterleave | pipe | pipeInterleave + // + // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits. + // + + pipe = static_cast(addr >> Log2(groupBytes)) & (numPipes - 1); + + return pipe; +} + +/** +**************************************************************************************************** +* Lib::ComputeMicroTileEquation +* +* @brief +* Compute micro tile equation +* +* @return +* If equation can be computed +* +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputeMicroTileEquation( + UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType, ///< [in] pixel order in display/non-display mode + ADDR_EQUATION* pEquation ///< [out] equation + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + for (UINT_32 i = 0; i < log2BytesPP; i++) + { + pEquation->addr[i].valid = 1; + pEquation->addr[i].channel = 0; + pEquation->addr[i].index = i; + } + + ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[log2BytesPP]; + + ADDR_CHANNEL_SETTING x0 = InitChannel(1, 0, log2BytesPP + 0); + ADDR_CHANNEL_SETTING x1 = InitChannel(1, 0, log2BytesPP + 1); + ADDR_CHANNEL_SETTING x2 = InitChannel(1, 0, log2BytesPP + 2); + ADDR_CHANNEL_SETTING y0 = InitChannel(1, 1, 0); + ADDR_CHANNEL_SETTING y1 = InitChannel(1, 1, 1); + ADDR_CHANNEL_SETTING y2 = InitChannel(1, 1, 2); + ADDR_CHANNEL_SETTING z0 = InitChannel(1, 2, 0); + ADDR_CHANNEL_SETTING z1 = InitChannel(1, 2, 1); + ADDR_CHANNEL_SETTING z2 = InitChannel(1, 2, 2); + + UINT_32 thickness = Thickness(tileMode); + UINT_32 bpp = 1 << (log2BytesPP + 3); + + if (microTileType != ADDR_THICK) + { + if (microTileType == ADDR_DISPLAYABLE) + { + switch (bpp) + { + case 8: + pixelBit[0] = x0; + pixelBit[1] = x1; + pixelBit[2] = x2; + pixelBit[3] = y1; + pixelBit[4] = y0; + pixelBit[5] = y2; + break; + case 16: + pixelBit[0] = x0; + pixelBit[1] = x1; + pixelBit[2] = x2; + pixelBit[3] = y0; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + case 32: + pixelBit[0] = x0; + pixelBit[1] = x1; + pixelBit[2] = y0; + pixelBit[3] = x2; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + case 64: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = x2; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + case 128: + pixelBit[0] = y0; + pixelBit[1] = x0; + pixelBit[2] = x1; + pixelBit[3] = x2; + pixelBit[4] = y1; + pixelBit[5] = y2; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER) + { + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = y1; + pixelBit[4] = x2; + pixelBit[5] = y2; + } + else if (microTileType == ADDR_ROTATED) + { + ADDR_ASSERT(thickness == 1); + + switch (bpp) + { + case 8: + pixelBit[0] = y0; + pixelBit[1] = y1; + pixelBit[2] = y2; + pixelBit[3] = x1; + pixelBit[4] = x0; + pixelBit[5] = x2; + break; + case 16: + pixelBit[0] = y0; + pixelBit[1] = y1; + pixelBit[2] = y2; + pixelBit[3] = x0; + pixelBit[4] = x1; + pixelBit[5] = x2; + break; + case 32: + pixelBit[0] = y0; + pixelBit[1] = y1; + pixelBit[2] = x0; + pixelBit[3] = y2; + pixelBit[4] = x1; + pixelBit[5] = x2; + break; + case 64: + pixelBit[0] = y0; + pixelBit[1] = x0; + pixelBit[2] = y1; + pixelBit[3] = x1; + pixelBit[4] = x2; + pixelBit[5] = y2; + break; + default: + retCode = ADDR_NOTSUPPORTED; + break; + } + } + + if (thickness > 1) + { + pixelBit[6] = z0; + pixelBit[7] = z1; + pEquation->numBits = 8 + log2BytesPP; + } + else + { + pEquation->numBits = 6 + log2BytesPP; + } + } + else // ADDR_THICK + { + ADDR_ASSERT(thickness > 1); + + switch (bpp) + { + case 8: + case 16: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = y1; + pixelBit[4] = z0; + pixelBit[5] = z1; + break; + case 32: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = x1; + pixelBit[3] = z0; + pixelBit[4] = y1; + pixelBit[5] = z1; + break; + case 64: + case 128: + pixelBit[0] = x0; + pixelBit[1] = y0; + pixelBit[2] = z0; + pixelBit[3] = x1; + pixelBit[4] = y1; + pixelBit[5] = z1; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + pixelBit[6] = x2; + pixelBit[7] = y2; + pEquation->numBits = 8 + log2BytesPP; + } + + if (thickness == 8) + { + pixelBit[8] = z2; + pEquation->numBits = 9 + log2BytesPP; + } + + // stackedDepthSlices is used for addressing mode that a tile block contains multiple slices, + // which is not supported by our address lib + pEquation->stackedDepthSlices = FALSE; + + return retCode; +} + +/** +**************************************************************************************************** +* Lib::ComputePixelIndexWithinMicroTile +* +* @brief +* Compute the pixel index inside a micro tile of surface +* +* @return +* Pixel index +* +**************************************************************************************************** +*/ +UINT_32 Lib::ComputePixelIndexWithinMicroTile( + UINT_32 x, ///< [in] x coord + UINT_32 y, ///< [in] y coord + UINT_32 z, ///< [in] slice/depth index + UINT_32 bpp, ///< [in] bits per pixel + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType ///< [in] pixel order in display/non-display mode + ) const +{ + UINT_32 pixelBit0 = 0; + UINT_32 pixelBit1 = 0; + UINT_32 pixelBit2 = 0; + UINT_32 pixelBit3 = 0; + UINT_32 pixelBit4 = 0; + UINT_32 pixelBit5 = 0; + UINT_32 pixelBit6 = 0; + UINT_32 pixelBit7 = 0; + UINT_32 pixelBit8 = 0; + UINT_32 pixelNumber; + + UINT_32 x0 = _BIT(x, 0); + UINT_32 x1 = _BIT(x, 1); + UINT_32 x2 = _BIT(x, 2); + UINT_32 y0 = _BIT(y, 0); + UINT_32 y1 = _BIT(y, 1); + UINT_32 y2 = _BIT(y, 2); + UINT_32 z0 = _BIT(z, 0); + UINT_32 z1 = _BIT(z, 1); + UINT_32 z2 = _BIT(z, 2); + + UINT_32 thickness = Thickness(tileMode); + + // Compute the pixel number within the micro tile. + + if (microTileType != ADDR_THICK) + { + if (microTileType == ADDR_DISPLAYABLE) + { + switch (bpp) + { + case 8: + pixelBit0 = x0; + pixelBit1 = x1; + pixelBit2 = x2; + pixelBit3 = y1; + pixelBit4 = y0; + pixelBit5 = y2; + break; + case 16: + pixelBit0 = x0; + pixelBit1 = x1; + pixelBit2 = x2; + pixelBit3 = y0; + pixelBit4 = y1; + pixelBit5 = y2; + break; + case 32: + pixelBit0 = x0; + pixelBit1 = x1; + pixelBit2 = y0; + pixelBit3 = x2; + pixelBit4 = y1; + pixelBit5 = y2; + break; + case 64: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = x2; + pixelBit4 = y1; + pixelBit5 = y2; + break; + case 128: + pixelBit0 = y0; + pixelBit1 = x0; + pixelBit2 = x1; + pixelBit3 = x2; + pixelBit4 = y1; + pixelBit5 = y2; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER) + { + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = y1; + pixelBit4 = x2; + pixelBit5 = y2; + } + else if (microTileType == ADDR_ROTATED) + { + ADDR_ASSERT(thickness == 1); + + switch (bpp) + { + case 8: + pixelBit0 = y0; + pixelBit1 = y1; + pixelBit2 = y2; + pixelBit3 = x1; + pixelBit4 = x0; + pixelBit5 = x2; + break; + case 16: + pixelBit0 = y0; + pixelBit1 = y1; + pixelBit2 = y2; + pixelBit3 = x0; + pixelBit4 = x1; + pixelBit5 = x2; + break; + case 32: + pixelBit0 = y0; + pixelBit1 = y1; + pixelBit2 = x0; + pixelBit3 = y2; + pixelBit4 = x1; + pixelBit5 = x2; + break; + case 64: + pixelBit0 = y0; + pixelBit1 = x0; + pixelBit2 = y1; + pixelBit3 = x1; + pixelBit4 = x2; + pixelBit5 = y2; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + + if (thickness > 1) + { + pixelBit6 = z0; + pixelBit7 = z1; + } + } + else // ADDR_THICK + { + ADDR_ASSERT(thickness > 1); + + switch (bpp) + { + case 8: + case 16: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = y1; + pixelBit4 = z0; + pixelBit5 = z1; + break; + case 32: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = x1; + pixelBit3 = z0; + pixelBit4 = y1; + pixelBit5 = z1; + break; + case 64: + case 128: + pixelBit0 = x0; + pixelBit1 = y0; + pixelBit2 = z0; + pixelBit3 = x1; + pixelBit4 = y1; + pixelBit5 = z1; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + pixelBit6 = x2; + pixelBit7 = y2; + } + + if (thickness == 8) + { + pixelBit8 = z2; + } + + pixelNumber = ((pixelBit0 ) | + (pixelBit1 << 1) | + (pixelBit2 << 2) | + (pixelBit3 << 3) | + (pixelBit4 << 4) | + (pixelBit5 << 5) | + (pixelBit6 << 6) | + (pixelBit7 << 7) | + (pixelBit8 << 8)); + + return pixelNumber; +} + +/** +**************************************************************************************************** +* Lib::AdjustPitchAlignment +* +* @brief +* Adjusts pitch alignment for flipping surface +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID Lib::AdjustPitchAlignment( + ADDR_SURFACE_FLAGS flags, ///< [in] Surface flags + UINT_32* pPitchAlign ///< [out] Pointer to pitch alignment + ) const +{ + // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO which means 32 pixel alignment + // Maybe it will be fixed in future but let's make it general for now. + if (flags.display || flags.overlay) + { + *pPitchAlign = PowTwoAlign(*pPitchAlign, 32); + + if(flags.display) + { + *pPitchAlign = Max(m_minPitchAlignPixels, *pPitchAlign); + } + } +} + +/** +**************************************************************************************************** +* Lib::PadDimensions +* +* @brief +* Helper function to pad dimensions +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID Lib::PadDimensions( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 numSamples, ///< [in] number of samples + ADDR_TILEINFO* pTileInfo, ///< [in,out] bank structure. + UINT_32 padDims, ///< [in] Dimensions to pad valid value 1,2,3 + UINT_32 mipLevel, ///< [in] MipLevel + UINT_32* pPitch, ///< [in,out] pitch in pixels + UINT_32* pPitchAlign, ///< [in,out] pitch align could be changed in HwlPadDimensions + UINT_32* pHeight, ///< [in,out] height in pixels + UINT_32 heightAlign, ///< [in] height alignment + UINT_32* pSlices, ///< [in,out] number of slices + UINT_32 sliceAlign ///< [in] number of slice alignment + ) const +{ + UINT_32 pitchAlign = *pPitchAlign; + UINT_32 thickness = Thickness(tileMode); + + ADDR_ASSERT(padDims <= 3); + + // + // Override padding for mip levels + // + if (mipLevel > 0) + { + if (flags.cube) + { + // for cubemap, we only pad when client call with 6 faces as an identity + if (*pSlices > 1) + { + padDims = 3; // we should pad cubemap sub levels when we treat it as 3d texture + } + else + { + padDims = 2; + } + } + } + + // Any possibilities that padDims is 0? + if (padDims == 0) + { + padDims = 3; + } + + if (IsPow2(pitchAlign)) + { + *pPitch = PowTwoAlign((*pPitch), pitchAlign); + } + else // add this code to pass unit test, r600 linear mode is not align bpp to pow2 for linear + { + *pPitch += pitchAlign - 1; + *pPitch /= pitchAlign; + *pPitch *= pitchAlign; + } + + if (padDims > 1) + { + if (IsPow2(heightAlign)) + { + *pHeight = PowTwoAlign((*pHeight), heightAlign); + } + else + { + *pHeight += heightAlign - 1; + *pHeight /= heightAlign; + *pHeight *= heightAlign; + } + } + + if (padDims > 2 || thickness > 1) + { + // for cubemap single face, we do not pad slices. + // if we pad it, the slice number should be set to 6 and current mip level > 1 + if (flags.cube && (!m_configFlags.noCubeMipSlicesPad || flags.cubeAsArray)) + { + *pSlices = NextPow2(*pSlices); + } + + // normal 3D texture or arrays or cubemap has a thick mode? (Just pass unit test) + if (thickness > 1) + { + *pSlices = PowTwoAlign((*pSlices), sliceAlign); + } + + } + + HwlPadDimensions(tileMode, + bpp, + flags, + numSamples, + pTileInfo, + mipLevel, + pPitch, + pPitchAlign, + *pHeight, + heightAlign); +} + +/** +**************************************************************************************************** +* Lib::HwlPreHandleBaseLvl3xPitch +* +* @brief +* Pre-handler of 3x pitch (96 bit) adjustment +* +* @return +* Expected pitch +**************************************************************************************************** +*/ +UINT_32 Lib::HwlPreHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input + UINT_32 expPitch ///< [in] pitch + ) const +{ + ADDR_ASSERT(pIn->width == expPitch); + // + // If pitch is pre-multiplied by 3, we retrieve original one here to get correct miplevel size + // + if (ElemLib::IsExpand3x(pIn->format) && + pIn->mipLevel == 0 && + pIn->tileMode == ADDR_TM_LINEAR_ALIGNED) + { + expPitch /= 3; + expPitch = NextPow2(expPitch); + } + + return expPitch; +} + +/** +**************************************************************************************************** +* Lib::HwlPostHandleBaseLvl3xPitch +* +* @brief +* Post-handler of 3x pitch adjustment +* +* @return +* Expected pitch +**************************************************************************************************** +*/ +UINT_32 Lib::HwlPostHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input + UINT_32 expPitch ///< [in] pitch + ) const +{ + // + // 96 bits surface of sub levels require element pitch of 32 bits instead + // So we just return pitch in 32 bit pixels without timing 3 + // + if (ElemLib::IsExpand3x(pIn->format) && + pIn->mipLevel == 0 && + pIn->tileMode == ADDR_TM_LINEAR_ALIGNED) + { + expPitch *= 3; + } + + return expPitch; +} + +/** +**************************************************************************************************** +* Lib::IsMacroTiled +* +* @brief +* Check if the tile mode is macro tiled +* +* @return +* TRUE if it is macro tiled (2D/2B/3D/3B) +**************************************************************************************************** +*/ +BOOL_32 Lib::IsMacroTiled( + AddrTileMode tileMode) ///< [in] tile mode +{ + return ModeFlags[tileMode].isMacro; +} + +/** +**************************************************************************************************** +* Lib::IsMacro3dTiled +* +* @brief +* Check if the tile mode is 3D macro tiled +* +* @return +* TRUE if it is 3D macro tiled +**************************************************************************************************** +*/ +BOOL_32 Lib::IsMacro3dTiled( + AddrTileMode tileMode) ///< [in] tile mode +{ + return ModeFlags[tileMode].isMacro3d; +} + +/** +**************************************************************************************************** +* Lib::IsMicroTiled +* +* @brief +* Check if the tile mode is micro tiled +* +* @return +* TRUE if micro tiled +**************************************************************************************************** +*/ +BOOL_32 Lib::IsMicroTiled( + AddrTileMode tileMode) ///< [in] tile mode +{ + return ModeFlags[tileMode].isMicro; +} + +/** +**************************************************************************************************** +* Lib::IsLinear +* +* @brief +* Check if the tile mode is linear +* +* @return +* TRUE if linear +**************************************************************************************************** +*/ +BOOL_32 Lib::IsLinear( + AddrTileMode tileMode) ///< [in] tile mode +{ + return ModeFlags[tileMode].isLinear; +} + +/** +**************************************************************************************************** +* Lib::IsPrtNoRotationTileMode +* +* @brief +* Return TRUE if it is prt tile without rotation +* @note +* This function just used by CI +**************************************************************************************************** +*/ +BOOL_32 Lib::IsPrtNoRotationTileMode( + AddrTileMode tileMode) +{ + return ModeFlags[tileMode].isPrtNoRotation; +} + +/** +**************************************************************************************************** +* Lib::IsPrtTileMode +* +* @brief +* Return TRUE if it is prt tile +* @note +* This function just used by CI +**************************************************************************************************** +*/ +BOOL_32 Lib::IsPrtTileMode( + AddrTileMode tileMode) +{ + return ModeFlags[tileMode].isPrt; +} + +/** +**************************************************************************************************** +* Lib::ComputeMipLevel +* +* @brief +* Compute mipmap level width/height/slices +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::ComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure + ) const +{ + // Check if HWL has handled + BOOL_32 hwlHandled = FALSE; + (void)hwlHandled; + + if (ElemLib::IsBlockCompressed(pIn->format)) + { + if (pIn->mipLevel == 0) + { + // DXTn's level 0 must be multiple of 4 + // But there are exceptions: + // 1. Internal surface creation in hostblt/vsblt/etc... + // 2. Runtime doesn't reject ATI1/ATI2 whose width/height are not multiple of 4 + pIn->width = PowTwoAlign(pIn->width, 4); + pIn->height = PowTwoAlign(pIn->height, 4); + } + } + + hwlHandled = HwlComputeMipLevel(pIn); +} + +/** +**************************************************************************************************** +* Lib::DegradeTo1D +* +* @brief +* Check if surface can be degraded to 1D +* @return +* TRUE if degraded +**************************************************************************************************** +*/ +BOOL_32 Lib::DegradeTo1D( + UINT_32 width, ///< surface width + UINT_32 height, ///< surface height + UINT_32 macroTilePitchAlign, ///< macro tile pitch align + UINT_32 macroTileHeightAlign ///< macro tile height align + ) +{ + BOOL_32 degrade = ((width < macroTilePitchAlign) || (height < macroTileHeightAlign)); + + // Check whether 2D tiling still has too much footprint + if (degrade == FALSE) + { + // Only check width and height as slices are aligned to thickness + UINT_64 unalignedSize = width * height; + + UINT_32 alignedPitch = PowTwoAlign(width, macroTilePitchAlign); + UINT_32 alignedHeight = PowTwoAlign(height, macroTileHeightAlign); + UINT_64 alignedSize = alignedPitch * alignedHeight; + + // alignedSize > 1.5 * unalignedSize + if (2 * alignedSize > 3 * unalignedSize) + { + degrade = TRUE; + } + } + + return degrade; +} + +/** +**************************************************************************************************** +* Lib::OptimizeTileMode +* +* @brief +* Check if base level's tile mode can be optimized (degraded) +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::OptimizeTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in, out] structure for surface info + ) const +{ + AddrTileMode tileMode = pInOut->tileMode; + + BOOL_32 doOpt = (pInOut->flags.opt4Space == TRUE) || + (pInOut->flags.minimizeAlignment == TRUE) || + (pInOut->maxBaseAlign != 0); + + BOOL_32 convertToPrt = FALSE; + + // Optimization can only be done on level 0 and samples <= 1 + if ((doOpt == TRUE) && + (pInOut->mipLevel == 0) && + (IsPrtTileMode(tileMode) == FALSE) && + (pInOut->flags.prt == FALSE)) + { + UINT_32 width = pInOut->width; + UINT_32 height = pInOut->height; + UINT_32 thickness = Thickness(tileMode); + BOOL_32 macroTiledOK = TRUE; + UINT_32 macroWidthAlign = 0; + UINT_32 macroHeightAlign = 0; + UINT_32 macroSizeAlign = 0; + + if (IsMacroTiled(tileMode)) + { + macroTiledOK = HwlGetAlignmentInfoMacroTiled(pInOut, + ¯oWidthAlign, + ¯oHeightAlign, + ¯oSizeAlign); + } + + if (macroTiledOK) + { + if ((pInOut->flags.display == FALSE) && + (pInOut->flags.opt4Space == TRUE) && + (pInOut->numSamples <= 1)) + { + // Check if linear mode is optimal + if ((pInOut->height == 1) && + (IsLinear(tileMode) == FALSE) && + (ElemLib::IsBlockCompressed(pInOut->format) == FALSE) && + (pInOut->flags.depth == FALSE) && + (pInOut->flags.stencil == FALSE) && + (m_configFlags.disableLinearOpt == FALSE) && + (pInOut->flags.disableLinearOpt == FALSE)) + { + tileMode = ADDR_TM_LINEAR_ALIGNED; + } + else if (IsMacroTiled(tileMode) && (pInOut->flags.tcCompatible == FALSE)) + { + if (DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign)) + { + tileMode = (thickness == 1) ? + ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; + } + else if ((thickness > 1) && (pInOut->flags.disallowLargeThickDegrade == 0)) + { + // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to + // thinner modes, we should re-evaluate whether the corresponding + // thinner modes should be degraded. If so, we choose 1D thick mode instead. + tileMode = DegradeLargeThickTile(pInOut->tileMode, pInOut->bpp); + + if (tileMode != pInOut->tileMode) + { + // Get thickness again after large thick degrade + thickness = Thickness(tileMode); + + ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pInOut; + input.tileMode = tileMode; + + macroTiledOK = HwlGetAlignmentInfoMacroTiled(&input, + ¯oWidthAlign, + ¯oHeightAlign, + ¯oSizeAlign); + + if (macroTiledOK && + DegradeTo1D(width, height, macroWidthAlign, macroHeightAlign)) + { + tileMode = ADDR_TM_1D_TILED_THICK; + } + } + } + } + } + + if (macroTiledOK) + { + if ((pInOut->flags.minimizeAlignment == TRUE) && + (pInOut->numSamples <= 1) && + (IsMacroTiled(tileMode) == TRUE)) + { + UINT_32 macroSize = PowTwoAlign(width, macroWidthAlign) * + PowTwoAlign(height, macroHeightAlign); + UINT_32 microSize = PowTwoAlign(width, MicroTileWidth) * + PowTwoAlign(height, MicroTileHeight); + + if (macroSize > microSize) + { + tileMode = (thickness == 1) ? + ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; + } + } + + if ((pInOut->maxBaseAlign != 0) && + (IsMacroTiled(tileMode) == TRUE)) + { + if (macroSizeAlign > pInOut->maxBaseAlign) + { + if (pInOut->numSamples > 1) + { + ADDR_ASSERT(pInOut->maxBaseAlign >= Block64K); + + convertToPrt = TRUE; + } + else if (pInOut->maxBaseAlign < Block64K) + { + tileMode = (thickness == 1) ? + ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK; + } + else + { + convertToPrt = TRUE; + } + } + } + } + } + } + + if (convertToPrt) + { + if ((pInOut->flags.matchStencilTileCfg == TRUE) && (pInOut->numSamples <= 1)) + { + pInOut->tileMode = ADDR_TM_1D_TILED_THIN1; + } + else + { + HwlSetPrtTileMode(pInOut); + } + } + else if (tileMode != pInOut->tileMode) + { + pInOut->tileMode = tileMode; + } + + HwlOptimizeTileMode(pInOut); +} + +/** +**************************************************************************************************** +* Lib::DegradeLargeThickTile +* +* @brief +* Check if the thickness needs to be reduced if a tile is too large +* @return +* The degraded tile mode (unchanged if not degraded) +**************************************************************************************************** +*/ +AddrTileMode Lib::DegradeLargeThickTile( + AddrTileMode tileMode, + UINT_32 bpp) const +{ + // Override tilemode + // When tile_width (8) * tile_height (8) * thickness * element_bytes is > row_size, + // it is better to just use THIN mode in this case + UINT_32 thickness = Thickness(tileMode); + + if (thickness > 1 && m_configFlags.allowLargeThickTile == 0) + { + UINT_32 tileSize = MicroTilePixels * thickness * (bpp >> 3); + + if (tileSize > m_rowSize) + { + switch (tileMode) + { + case ADDR_TM_2D_TILED_XTHICK: + if ((tileSize >> 1) <= m_rowSize) + { + tileMode = ADDR_TM_2D_TILED_THICK; + break; + } + // else fall through + case ADDR_TM_2D_TILED_THICK: + tileMode = ADDR_TM_2D_TILED_THIN1; + break; + + case ADDR_TM_3D_TILED_XTHICK: + if ((tileSize >> 1) <= m_rowSize) + { + tileMode = ADDR_TM_3D_TILED_THICK; + break; + } + // else fall through + case ADDR_TM_3D_TILED_THICK: + tileMode = ADDR_TM_3D_TILED_THIN1; + break; + + case ADDR_TM_PRT_TILED_THICK: + tileMode = ADDR_TM_PRT_TILED_THIN1; + break; + + case ADDR_TM_PRT_2D_TILED_THICK: + tileMode = ADDR_TM_PRT_2D_TILED_THIN1; + break; + + case ADDR_TM_PRT_3D_TILED_THICK: + tileMode = ADDR_TM_PRT_3D_TILED_THIN1; + break; + + default: + break; + } + } + } + + return tileMode; +} + +/** +**************************************************************************************************** +* Lib::PostComputeMipLevel +* @brief +* Compute MipLevel info (including level 0) after surface adjustment +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::PostComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in,out] Input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output structure + ) const +{ + // Mipmap including level 0 must be pow2 padded since either SI hw expects so or it is + // required by CFX for Hw Compatibility between NI and SI. Otherwise it is only needed for + // mipLevel > 0. Any h/w has different requirement should implement its own virtual function + + if (pIn->flags.pow2Pad) + { + pIn->width = NextPow2(pIn->width); + pIn->height = NextPow2(pIn->height); + pIn->numSlices = NextPow2(pIn->numSlices); + } + else if (pIn->mipLevel > 0) + { + pIn->width = NextPow2(pIn->width); + pIn->height = NextPow2(pIn->height); + + if (!pIn->flags.cube) + { + pIn->numSlices = NextPow2(pIn->numSlices); + } + + // for cubemap, we keep its value at first + } + + return ADDR_OK; +} + +/** +**************************************************************************************************** +* Lib::HwlSetupTileCfg +* +* @brief +* Map tile index to tile setting. +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::HwlSetupTileCfg( + UINT_32 bpp, ///< Bits per pixel + INT_32 index, ///< [in] Tile index + INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI) + ADDR_TILEINFO* pInfo, ///< [out] Tile Info + AddrTileMode* pMode, ///< [out] Tile mode + AddrTileType* pType ///< [out] Tile type + ) const +{ + return ADDR_NOTSUPPORTED; +} + +/** +**************************************************************************************************** +* Lib::HwlGetPipes +* +* @brief +* Get number pipes +* @return +* num pipes +**************************************************************************************************** +*/ +UINT_32 Lib::HwlGetPipes( + const ADDR_TILEINFO* pTileInfo ///< [in] Tile info + ) const +{ + //pTileInfo can be NULL when asic is 6xx and 8xx. + return m_pipes; +} + +/** +**************************************************************************************************** +* Lib::ComputeQbStereoInfo +* +* @brief +* Get quad buffer stereo information +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::ComputeQbStereoInfo( + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] updated pOut+pStereoInfo + ) const +{ + ADDR_ASSERT(pOut->bpp >= 8); + ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0); + + // Save original height + pOut->pStereoInfo->eyeHeight = pOut->height; + + // Right offset + pOut->pStereoInfo->rightOffset = static_cast(pOut->surfSize); + + pOut->pStereoInfo->rightSwizzle = HwlComputeQbStereoRightSwizzle(pOut); + // Double height + pOut->height <<= 1; + pOut->pixelHeight <<= 1; + + // Double size + pOut->surfSize <<= 1; + + // Right start address meets the base align since it is guaranteed by AddrLib1 + + // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo. +} + +/** +**************************************************************************************************** +* Lib::ComputePrtInfo +* +* @brief +* Compute prt surface related info +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::ComputePrtInfo( + const ADDR_PRT_INFO_INPUT* pIn, + ADDR_PRT_INFO_OUTPUT* pOut) const +{ + ADDR_ASSERT(pOut != NULL); + + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + UINT_32 expandX = 1; + UINT_32 expandY = 1; + ElemMode elemMode; + + UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, + &elemMode, + &expandX, + &expandY); + + if (bpp <8 || bpp == 24 || bpp == 48 || bpp == 96) + { + returnCode = ADDR_INVALIDPARAMS; + } + + UINT_32 numFrags = pIn->numFrags; + ADDR_ASSERT(numFrags <= 8); + + UINT_32 tileWidth = 0; + UINT_32 tileHeight = 0; + if (returnCode == ADDR_OK) + { + // 3D texture without depth or 2d texture + if (pIn->baseMipDepth > 1 || pIn->baseMipHeight > 1) + { + if (bpp == 8) + { + tileWidth = 256; + tileHeight = 256; + } + else if (bpp == 16) + { + tileWidth = 256; + tileHeight = 128; + } + else if (bpp == 32) + { + tileWidth = 128; + tileHeight = 128; + } + else if (bpp == 64) + { + // assume it is BC1/4 + tileWidth = 512; + tileHeight = 256; + + if (elemMode == ADDR_UNCOMPRESSED) + { + tileWidth = 128; + tileHeight = 64; + } + } + else if (bpp == 128) + { + // assume it is BC2/3/5/6H/7 + tileWidth = 256; + tileHeight = 256; + + if (elemMode == ADDR_UNCOMPRESSED) + { + tileWidth = 64; + tileHeight = 64; + } + } + + if (numFrags == 2) + { + tileWidth = tileWidth / 2; + } + else if (numFrags == 4) + { + tileWidth = tileWidth / 2; + tileHeight = tileHeight / 2; + } + else if (numFrags == 8) + { + tileWidth = tileWidth / 4; + tileHeight = tileHeight / 2; + } + } + else // 1d + { + tileHeight = 1; + if (bpp == 8) + { + tileWidth = 65536; + } + else if (bpp == 16) + { + tileWidth = 32768; + } + else if (bpp == 32) + { + tileWidth = 16384; + } + else if (bpp == 64) + { + tileWidth = 8192; + } + else if (bpp == 128) + { + tileWidth = 4096; + } + } + } + + pOut->prtTileWidth = tileWidth; + pOut->prtTileHeight = tileHeight; + + return returnCode; +} + +} // V1 +} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib1.h mesa-19.0.1/src/amd/addrlib/src/core/addrlib1.h --- mesa-18.3.3/src/amd/addrlib/src/core/addrlib1.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib1.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,544 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrlib1.h +* @brief Contains the Addr::V1::Lib class definition. +**************************************************************************************************** +*/ + +#ifndef __ADDR_LIB1_H__ +#define __ADDR_LIB1_H__ + +#include "addrlib.h" + +namespace Addr +{ +namespace V1 +{ + +/** +**************************************************************************************************** +* @brief Neutral enums that define bank swap size +**************************************************************************************************** +*/ +enum SampleSplitSize +{ + ADDR_SAMPLESPLIT_1KB = 1024, + ADDR_SAMPLESPLIT_2KB = 2048, + ADDR_SAMPLESPLIT_4KB = 4096, + ADDR_SAMPLESPLIT_8KB = 8192, +}; + +/** +**************************************************************************************************** +* @brief Flags for AddrTileMode +**************************************************************************************************** +*/ +struct TileModeFlags +{ + UINT_32 thickness : 4; + UINT_32 isLinear : 1; + UINT_32 isMicro : 1; + UINT_32 isMacro : 1; + UINT_32 isMacro3d : 1; + UINT_32 isPrt : 1; + UINT_32 isPrtNoRotation : 1; + UINT_32 isBankSwapped : 1; +}; + +static const UINT_32 Block64K = 0x10000; +static const UINT_32 PrtTileSize = Block64K; + +/** +**************************************************************************************************** +* @brief This class contains asic independent address lib functionalities +**************************************************************************************************** +*/ +class Lib : public Addr::Lib +{ +public: + virtual ~Lib(); + + static Lib* GetLib( + ADDR_HANDLE hLib); + + /// Returns tileIndex support + BOOL_32 UseTileIndex(INT_32 index) const + { + return m_configFlags.useTileIndex && (index != TileIndexInvalid); + } + + /// Returns combined swizzle support + BOOL_32 UseCombinedSwizzle() const + { + return m_configFlags.useCombinedSwizzle; + } + + // + // Interface stubs + // + ADDR_E_RETURNCODE ComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSliceTileSwizzle( + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ExtractBankPipeSwizzle( + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE CombineBankPipeSwizzle( + const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeBaseSwizzle( + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord( + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr( + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ConvertTileIndex( + const ADDR_CONVERT_TILEINDEX_INPUT* pIn, + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE GetMacroModeIndex( + const ADDR_GET_MACROMODEINDEX_INPUT* pIn, + ADDR_GET_MACROMODEINDEX_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ConvertTileIndex1( + const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, + ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE GetTileIndex( + const ADDR_GET_TILEINDEX_INPUT* pIn, + ADDR_GET_TILEINDEX_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeHtileInfo( + const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, + ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeCmaskInfo( + const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeDccInfo( + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeHtileAddrFromCoord( + const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord( + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeHtileCoordFromAddr( + const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr( + const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputePrtInfo( + const ADDR_PRT_INFO_INPUT* pIn, + ADDR_PRT_INFO_OUTPUT* pOut) const; +protected: + Lib(); // Constructor is protected + Lib(const Client* pClient); + + /// Pure Virtual function for Hwl computing surface info + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl computing surface address from coord + virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl computing surface coord from address + virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl computing surface tile swizzle + virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle( + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b + virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle( + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl combining bank/pipe swizzle + virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle( + UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO* pTileInfo, + UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0; + + /// Pure Virtual function for Hwl computing base swizzle + virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle( + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl computing HTILE base align + virtual UINT_32 HwlComputeHtileBaseAlign( + BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0; + + /// Pure Virtual function for Hwl computing HTILE bpp + virtual UINT_32 HwlComputeHtileBpp( + BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0; + + /// Pure Virtual function for Hwl computing HTILE bytes + virtual UINT_64 HwlComputeHtileBytes( + UINT_32 pitch, UINT_32 height, UINT_32 bpp, + BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0; + + /// Pure Virtual function for Hwl computing FMASK info + virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0; + + /// Pure Virtual function for Hwl FMASK address from coord + virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord( + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl FMASK coord from address + virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr( + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl convert tile info from real value to HW value + virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0; + + /// Pure Virtual function for Hwl compute mipmap info + virtual BOOL_32 HwlComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0; + + /// Pure Virtual function for Hwl compute max cmask blockMax value + virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0; + + /// Pure Virtual function for Hwl compute fmask bits + virtual UINT_32 HwlComputeFmaskBits( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + UINT_32* pNumSamples) const = 0; + + /// Virtual function to get index (not pure then no need to implement this in all hwls + virtual ADDR_E_RETURNCODE HwlGetTileIndex( + const ADDR_GET_TILEINDEX_INPUT* pIn, + ADDR_GET_TILEINDEX_OUTPUT* pOut) const + { + return ADDR_NOTSUPPORTED; + } + + /// Virtual function for Hwl to compute Dcc info + virtual ADDR_E_RETURNCODE HwlComputeDccInfo( + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const + { + return ADDR_NOTSUPPORTED; + } + + /// Virtual function to get cmask address for tc compatible cmask + virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const + { + return ADDR_NOTSUPPORTED; + } + + /// Virtual function to get htile address for tc compatible htile + virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( + const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const + { + return ADDR_NOTSUPPORTED; + } + + // Compute attributes + + // HTILE + UINT_32 ComputeHtileInfo( + ADDR_HTILE_FLAGS flags, + UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, + BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, + ADDR_TILEINFO* pTileInfo, + UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes, + UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL, + UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const; + + // CMASK + ADDR_E_RETURNCODE ComputeCmaskInfo( + ADDR_CMASK_FLAGS flags, + UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear, + ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes, + UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL, + UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const; + + virtual VOID HwlComputeTileDataWidthAndHeightLinear( + UINT_32* pMacroWidth, UINT_32* pMacroHeight, + UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const; + + // CMASK & HTILE addressing + virtual UINT_64 HwlComputeXmaskAddrFromCoord( + UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, + UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, + BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo, + UINT_32* bitPosition) const; + + virtual VOID HwlComputeXmaskCoordFromAddr( + UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices, + UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, + ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const; + + // Surface mipmap + VOID ComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + + /// Pure Virtual function for Hwl to get macro tiled alignment info + virtual BOOL_32 HwlGetAlignmentInfoMacroTiled( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const = 0; + + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const + { + // not supported in hwl layer + } + + virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const + { + // not supported in hwl layer + } + + virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const + { + // not supported in hwl layer + } + + AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const; + + VOID PadDimensions( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel, + UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32* pHeight, UINT_32 heightAlign, + UINT_32* pSlices, UINT_32 sliceAlign) const; + + virtual VOID HwlPadDimensions( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel, + UINT_32* pPitch, UINT_32* pPitchAlign, UINT_32 height, UINT_32 heightAlign) const + { + } + + // + // Addressing shared for linear/1D tiling + // + UINT_64 ComputeSurfaceAddrFromCoordLinear( + UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices, + UINT_32* pBitPosition) const; + + VOID ComputeSurfaceCoordFromAddrLinear( + UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp, + UINT_32 pitch, UINT_32 height, UINT_32 numSlices, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const; + + VOID ComputeSurfaceCoordFromAddrMicroTiled( + UINT_64 addr, UINT_32 bitPosition, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, + AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const; + + ADDR_E_RETURNCODE ComputeMicroTileEquation( + UINT_32 bpp, AddrTileMode tileMode, + AddrTileType microTileType, ADDR_EQUATION* pEquation) const; + + UINT_32 ComputePixelIndexWithinMicroTile( + UINT_32 x, UINT_32 y, UINT_32 z, + UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const; + + /// Pure Virtual function for Hwl computing coord from offset inside micro tile + virtual VOID HwlComputePixelCoordFromOffset( + UINT_32 offset, UINT_32 bpp, UINT_32 numSamples, + AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, + AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0; + + // + // Addressing shared by all + // + virtual UINT_32 HwlGetPipes( + const ADDR_TILEINFO* pTileInfo) const; + + UINT_32 ComputePipeFromAddr( + UINT_64 addr, UINT_32 numPipes) const; + + virtual ADDR_E_RETURNCODE ComputePipeEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const + { + return ADDR_NOTSUPPORTED; + } + + /// Pure Virtual function for Hwl computing pipe from coord + virtual UINT_32 ComputePipeFromCoord( + UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode, + UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0; + + /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile + virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe( + UINT_32 pipe, UINT_32 x) const = 0; + + // + // Misc helper + // + static const TileModeFlags ModeFlags[ADDR_TM_COUNT]; + + static UINT_32 Thickness( + AddrTileMode tileMode); + + // Checking tile mode + static BOOL_32 IsMacroTiled(AddrTileMode tileMode); + static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode); + static BOOL_32 IsLinear(AddrTileMode tileMode); + static BOOL_32 IsMicroTiled(AddrTileMode tileMode); + static BOOL_32 IsPrtTileMode(AddrTileMode tileMode); + static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode); + + /// Return TRUE if tile info is needed + BOOL_32 UseTileInfo() const + { + return !m_configFlags.ignoreTileInfo; + } + + /// Adjusts pitch alignment for flipping surface + VOID AdjustPitchAlignment( + ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const; + + /// Overwrite tile config according to tile index + virtual ADDR_E_RETURNCODE HwlSetupTileCfg( + UINT_32 bpp, INT_32 index, INT_32 macroModeIndex, + ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const; + + /// Overwrite macro tile config according to tile index + virtual INT_32 HwlComputeMacroModeIndex( + INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples, + ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL + ) const + { + return TileIndexNoMacroIndex; + } + + /// Pre-handler of 3x pitch (96 bit) adjustment + virtual UINT_32 HwlPreHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; + /// Post-handler of 3x pitch adjustment + virtual UINT_32 HwlPostHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; + /// Check miplevel after surface adjustment + ADDR_E_RETURNCODE PostComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + /// Quad buffer stereo support, has its implementation in ind. layer + VOID ComputeQbStereoInfo( + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + /// Pure virutual function to compute stereo bank swizzle for right eye + virtual UINT_32 HwlComputeQbStereoRightSwizzle( + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; + + VOID OptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; + + /// Overwrite tile setting to PRT + virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const + { + } + + static BOOL_32 DegradeTo1D( + UINT_32 width, UINT_32 height, + UINT_32 macroTilePitchAlign, UINT_32 macroTileHeightAlign); + +private: + // Disallow the copy constructor + Lib(const Lib& a); + + // Disallow the assignment operator + Lib& operator=(const Lib& a); + + UINT_32 ComputeCmaskBaseAlign( + ADDR_CMASK_FLAGS flags, ADDR_TILEINFO* pTileInfo) const; + + UINT_64 ComputeCmaskBytes( + UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const; + + // + // CMASK/HTILE shared methods + // + VOID ComputeTileDataWidthAndHeight( + UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo, + UINT_32* pMacroWidth, UINT_32* pMacroHeight) const; + + UINT_32 ComputeXmaskCoordYFromPipe( + UINT_32 pipe, UINT_32 x) const; +}; + +} // V1 +} // Addr + +#endif + diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib2.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrlib2.cpp --- mesa-18.3.3/src/amd/addrlib/src/core/addrlib2.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib2.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,1883 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +************************************************************************************************************************ +* @file addrlib2.cpp +* @brief Contains the implementation for the AddrLib2 base class. +************************************************************************************************************************ +*/ + +#include "addrinterface.h" +#include "addrlib2.h" +#include "addrcommon.h" + +namespace Addr +{ +namespace V2 +{ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Static Const Member +//////////////////////////////////////////////////////////////////////////////////////////////////// + +const Dim2d Lib::Block256_2d[] = {{16, 16}, {16, 8}, {8, 8}, {8, 4}, {4, 4}}; + +const Dim3d Lib::Block1K_3d[] = {{16, 8, 8}, {8, 8, 8}, {8, 8, 4}, {8, 4, 4}, {4, 4, 4}}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Constructor/Destructor +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +************************************************************************************************************************ +* Lib::Lib +* +* @brief +* Constructor for the Addr::V2::Lib class +* +************************************************************************************************************************ +*/ +Lib::Lib() + : + Addr::Lib() +{ +} + +/** +************************************************************************************************************************ +* Lib::Lib +* +* @brief +* Constructor for the AddrLib2 class with hClient as parameter +* +************************************************************************************************************************ +*/ +Lib::Lib(const Client* pClient) + : + Addr::Lib(pClient) +{ +} + +/** +************************************************************************************************************************ +* Lib::~Lib +* +* @brief +* Destructor for the AddrLib2 class +* +************************************************************************************************************************ +*/ +Lib::~Lib() +{ +} + +/** +************************************************************************************************************************ +* Lib::GetLib +* +* @brief +* Get Addr::V2::Lib pointer +* +* @return +* An Addr::V2::Lib class pointer +************************************************************************************************************************ +*/ +Lib* Lib::GetLib( + ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE +{ + Addr::Lib* pAddrLib = Addr::Lib::GetLib(hLib); + if ((pAddrLib != NULL) && + (pAddrLib->GetChipFamily() <= ADDR_CHIP_FAMILY_VI)) + { + // only valid and GFX9+ ASIC can use AddrLib2 function. + ADDR_ASSERT_ALWAYS(); + hLib = NULL; + } + return static_cast(hLib); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Surface Methods +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceInfo +* +* @brief +* Interface function stub of AddrComputeSurfaceInfo. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceInfo( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + // Adjust coming parameters. + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; + localIn.width = Max(pIn->width, 1u); + localIn.height = Max(pIn->height, 1u); + localIn.numMipLevels = Max(pIn->numMipLevels, 1u); + localIn.numSlices = Max(pIn->numSlices, 1u); + localIn.numSamples = Max(pIn->numSamples, 1u); + localIn.numFrags = (localIn.numFrags == 0) ? localIn.numSamples : pIn->numFrags; + + UINT_32 expandX = 1; + UINT_32 expandY = 1; + ElemMode elemMode = ADDR_UNCOMPRESSED; + + if (returnCode == ADDR_OK) + { + // Set format to INVALID will skip this conversion + if (localIn.format != ADDR_FMT_INVALID) + { + // Get compression/expansion factors and element mode which indicates compression/expansion + localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format, + &elemMode, + &expandX, + &expandY); + + // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is + // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear- + // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw + // restrictions are different. + // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround + // but we use this flag to skip RestoreSurfaceInfo below + + if ((elemMode == ADDR_EXPANDED) && (expandX > 1)) + { + ADDR_ASSERT(IsLinear(localIn.swizzleMode)); + } + + UINT_32 basePitch = 0; + GetElemLib()->AdjustSurfaceInfo(elemMode, + expandX, + expandY, + &localIn.bpp, + &basePitch, + &localIn.width, + &localIn.height); + + // Overwrite these parameters if we have a valid format + } + + if (localIn.bpp != 0) + { + localIn.width = Max(localIn.width, 1u); + localIn.height = Max(localIn.height, 1u); + } + else // Rule out some invalid parameters + { + ADDR_ASSERT_ALWAYS(); + + returnCode = ADDR_INVALIDPARAMS; + } + } + + if (returnCode == ADDR_OK) + { + returnCode = ComputeSurfaceInfoSanityCheck(&localIn); + } + + if (returnCode == ADDR_OK) + { + VerifyMipLevelInfo(pIn); + + if (IsLinear(pIn->swizzleMode)) + { + // linear mode + returnCode = ComputeSurfaceInfoLinear(&localIn, pOut); + } + else + { + // tiled mode + returnCode = ComputeSurfaceInfoTiled(&localIn, pOut); + } + + if (returnCode == ADDR_OK) + { + pOut->bpp = localIn.bpp; + pOut->pixelPitch = pOut->pitch; + pOut->pixelHeight = pOut->height; + pOut->pixelMipChainPitch = pOut->mipChainPitch; + pOut->pixelMipChainHeight = pOut->mipChainHeight; + pOut->pixelBits = localIn.bpp; + + if (localIn.format != ADDR_FMT_INVALID) + { + UINT_32 pixelBits = pOut->pixelBits; + + GetElemLib()->RestoreSurfaceInfo(elemMode, + expandX, + expandY, + &pOut->pixelBits, + &pOut->pixelPitch, + &pOut->pixelHeight); + + GetElemLib()->RestoreSurfaceInfo(elemMode, + expandX, + expandY, + &pixelBits, + &pOut->pixelMipChainPitch, + &pOut->pixelMipChainHeight); + + if ((localIn.numMipLevels > 1) && (pOut->pMipInfo != NULL)) + { + for (UINT_32 i = 0; i < localIn.numMipLevels; i++) + { + pOut->pMipInfo[i].pixelPitch = pOut->pMipInfo[i].pitch; + pOut->pMipInfo[i].pixelHeight = pOut->pMipInfo[i].height; + + GetElemLib()->RestoreSurfaceInfo(elemMode, + expandX, + expandY, + &pixelBits, + &pOut->pMipInfo[i].pixelPitch, + &pOut->pMipInfo[i].pixelHeight); + } + } + } + + if (localIn.flags.needEquation && (Log2(localIn.numFrags) == 0)) + { + pOut->equationIndex = GetEquationIndex(&localIn, pOut); + } + + if (localIn.flags.qbStereo) + { + if (pOut->pStereoInfo != NULL) + { + ComputeQbStereoInfo(pOut); + } + } + } + } + + ADDR_ASSERT(pOut->surfSize != 0); + + ValidBaseAlignments(pOut->baseAlign); + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceInfo +* +* @brief +* Interface function stub of AddrComputeSurfaceInfo. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoord( + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT localIn = *pIn; + localIn.unalignedWidth = Max(pIn->unalignedWidth, 1u); + localIn.unalignedHeight = Max(pIn->unalignedHeight, 1u); + localIn.numMipLevels = Max(pIn->numMipLevels, 1u); + localIn.numSlices = Max(pIn->numSlices, 1u); + localIn.numSamples = Max(pIn->numSamples, 1u); + localIn.numFrags = Max(pIn->numFrags, 1u); + + if ((localIn.bpp < 8) || + (localIn.bpp > 128) || + ((localIn.bpp % 8) != 0) || + (localIn.sample >= localIn.numSamples) || + (localIn.slice >= localIn.numSlices) || + (localIn.mipId >= localIn.numMipLevels) || + (IsTex3d(localIn.resourceType) && + (Valid3DMipSliceIdConstraint(localIn.numSlices, localIn.mipId, localIn.slice) == FALSE))) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if (returnCode == ADDR_OK) + { + if (IsLinear(localIn.swizzleMode)) + { + returnCode = ComputeSurfaceAddrFromCoordLinear(&localIn, pOut); + } + else + { + returnCode = ComputeSurfaceAddrFromCoordTiled(&localIn, pOut); + } + + if (returnCode == ADDR_OK) + { + pOut->prtBlockIndex = static_cast(pOut->addr / (64 * 1024)); + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceCoordFromAddr +* +* @brief +* Interface function stub of ComputeSurfaceCoordFromAddr. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddr( + const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if ((pIn->bpp < 8) || + (pIn->bpp > 128) || + ((pIn->bpp % 8) != 0) || + (pIn->bitPosition >= 8)) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if (returnCode == ADDR_OK) + { + if (IsLinear(pIn->swizzleMode)) + { + returnCode = ComputeSurfaceCoordFromAddrLinear(pIn, pOut); + } + else + { + returnCode = ComputeSurfaceCoordFromAddrTiled(pIn, pOut); + } + } + + return returnCode; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// CMASK/HTILE +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +************************************************************************************************************************ +* Lib::ComputeHtileInfo +* +* @brief +* Interface function stub of AddrComputeHtilenfo +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeHtileInfo( + const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeHtileInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->baseAlign); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeHtileAddrFromCoord +* +* @brief +* Interface function stub of AddrComputeHtileAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeHtileAddrFromCoord( + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeHtileAddrFromCoord(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeHtileCoordFromAddr +* +* @brief +* Interface function stub of AddrComputeHtileCoordFromAddr +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeHtileCoordFromAddr( + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeHtileCoordFromAddr(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeCmaskInfo +* +* @brief +* Interface function stub of AddrComputeCmaskInfo +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeCmaskInfo( + const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else if (pIn->cMaskFlags.linear) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeCmaskInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->baseAlign); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeCmaskAddrFromCoord +* +* @brief +* Interface function stub of AddrComputeCmaskAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeCmaskAddrFromCoord( + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeCmaskCoordFromAddr +* +* @brief +* Interface function stub of AddrComputeCmaskCoordFromAddr +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeCmaskCoordFromAddr( + const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED; + + ADDR_NOT_IMPLEMENTED(); + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeFmaskInfo +* +* @brief +* Interface function stub of ComputeFmaskInfo. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeFmaskInfo( + const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure + ) +{ + ADDR_E_RETURNCODE returnCode; + + BOOL_32 valid = (IsZOrderSwizzle(pIn->swizzleMode) == TRUE) && + ((pIn->numSamples > 0) || (pIn->numFrags > 0)); + + if (GetFillSizeFieldsFlags()) + { + if ((pIn->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT))) + { + valid = FALSE; + } + } + + if (valid == FALSE) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; + + localIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT); + localOut.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT); + + localIn.swizzleMode = pIn->swizzleMode; + localIn.numSlices = Max(pIn->numSlices, 1u); + localIn.width = Max(pIn->unalignedWidth, 1u); + localIn.height = Max(pIn->unalignedHeight, 1u); + localIn.bpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); + localIn.flags.fmask = 1; + localIn.numFrags = 1; + localIn.numSamples = 1; + localIn.resourceType = ADDR_RSRC_TEX_2D; + + if (localIn.bpp == 8) + { + localIn.format = ADDR_FMT_8; + } + else if (localIn.bpp == 16) + { + localIn.format = ADDR_FMT_16; + } + else if (localIn.bpp == 32) + { + localIn.format = ADDR_FMT_32; + } + else + { + localIn.format = ADDR_FMT_32_32; + } + + returnCode = ComputeSurfaceInfo(&localIn, &localOut); + + if (returnCode == ADDR_OK) + { + pOut->pitch = localOut.pitch; + pOut->height = localOut.height; + pOut->baseAlign = localOut.baseAlign; + pOut->numSlices = localOut.numSlices; + pOut->fmaskBytes = static_cast(localOut.surfSize); + pOut->sliceSize = static_cast(localOut.sliceSize); + pOut->bpp = localIn.bpp; + pOut->numSamples = 1; + } + } + + ValidBaseAlignments(pOut->baseAlign); + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeFmaskAddrFromCoord +* +* @brief +* Interface function stub of ComputeFmaskAddrFromCoord. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeFmaskAddrFromCoord( + const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED; + + ADDR_NOT_IMPLEMENTED(); + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeFmaskCoordFromAddr +* +* @brief +* Interface function stub of ComputeFmaskAddrFromCoord. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeFmaskCoordFromAddr( + const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED; + + ADDR_NOT_IMPLEMENTED(); + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeDccInfo +* +* @brief +* Interface function to compute DCC key info +* +* @return +* return code of HwlComputeDccInfo +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeDccInfo( + const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_DCCINFO_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeDccInfo(pIn, pOut); + + ValidMetaBaseAlignments(pOut->dccRamBaseAlign); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeDccAddrFromCoord +* +* @brief +* Interface function stub of ComputeDccAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeDccAddrFromCoord( + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeDccAddrFromCoord(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputePipeBankXor +* +* @brief +* Interface function stub of Addr2ComputePipeBankXor. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputePipeBankXor( + const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputePipeBankXor(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSlicePipeBankXor +* +* @brief +* Interface function stub of Addr2ComputeSlicePipeBankXor. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSlicePipeBankXor( + const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else if ((IsThin(pIn->resourceType, pIn->swizzleMode) == FALSE) || + (IsNonPrtXor(pIn->swizzleMode) == FALSE) || + (pIn->numSamples > 1)) + { + returnCode = ADDR_NOTSUPPORTED; + } + else + { + returnCode = HwlComputeSlicePipeBankXor(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSubResourceOffsetForSwizzlePattern +* +* @brief +* Interface function stub of Addr2ComputeSubResourceOffsetForSwizzlePattern. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSubResourceOffsetForSwizzlePattern( + const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT)) || + (pOut->size != sizeof(ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeSubResourceOffsetForSwizzlePattern(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ExtractPipeBankXor +* +* @brief +* Internal function to extract bank and pipe xor bits from combined xor bits. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ExtractPipeBankXor( + UINT_32 pipeBankXor, + UINT_32 bankBits, + UINT_32 pipeBits, + UINT_32* pBankX, + UINT_32* pPipeX) +{ + ADDR_E_RETURNCODE returnCode; + + if (pipeBankXor < (1u << (pipeBits + bankBits))) + { + *pPipeX = pipeBankXor % (1 << pipeBits); + *pBankX = pipeBankXor >> pipeBits; + returnCode = ADDR_OK; + } + else + { + ADDR_ASSERT_ALWAYS(); + returnCode = ADDR_INVALIDPARAMS; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceInfoSanityCheck +* +* @brief +* Internal function to do basic sanity check before compute surface info +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoSanityCheck( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure + ) const +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + (pIn->size != sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlComputeSurfaceInfoSanityCheck(pIn); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ApplyCustomizedPitchHeight +* +* @brief +* Helper function to override hw required row pitch/slice pitch by customrized one +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ApplyCustomizedPitchHeight( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + UINT_32 elementBytes, ///< [in] element bytes per element + UINT_32 pitchAlignInElement, ///< [in] pitch alignment in element + UINT_32* pPitch, ///< [in/out] pitch + UINT_32* pHeight ///< [in/out] height + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pIn->numMipLevels <= 1) + { + if (pIn->pitchInElement > 0) + { + if ((pIn->pitchInElement % pitchAlignInElement) != 0) + { + returnCode = ADDR_INVALIDPARAMS; + } + else if (pIn->pitchInElement < (*pPitch)) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + *pPitch = pIn->pitchInElement; + } + } + + if (returnCode == ADDR_OK) + { + if (pIn->sliceAlign > 0) + { + UINT_32 customizedHeight = pIn->sliceAlign / elementBytes / (*pPitch); + + if (customizedHeight * elementBytes * (*pPitch) != pIn->sliceAlign) + { + returnCode = ADDR_INVALIDPARAMS; + } + else if ((pIn->numSlices > 1) && ((*pHeight) != customizedHeight)) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + *pHeight = customizedHeight; + } + } + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceInfoLinear +* +* @brief +* Internal function to calculate alignment for linear swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoLinear( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + return HwlComputeSurfaceInfoLinear(pIn, pOut); +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceInfoTiled +* +* @brief +* Internal function to calculate alignment for tiled swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceInfoTiled( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + return HwlComputeSurfaceInfoTiled(pIn, pOut); +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceAddrFromCoordLinear +* +* @brief +* Internal function to calculate address from coord for linear swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordLinear( + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1) && (pIn->pipeBankXor == 0); + + if (valid) + { + if (IsTex1d(pIn->resourceType)) + { + valid = (pIn->y == 0); + } + } + + if (valid) + { + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; + ADDR2_MIP_INFO mipInfo[MaxMipLevels]; + + localIn.bpp = pIn->bpp; + localIn.flags = pIn->flags; + localIn.width = Max(pIn->unalignedWidth, 1u); + localIn.height = Max(pIn->unalignedHeight, 1u); + localIn.numSlices = Max(pIn->numSlices, 1u); + localIn.numMipLevels = Max(pIn->numMipLevels, 1u); + localIn.resourceType = pIn->resourceType; + + if (localIn.numMipLevels <= 1) + { + localIn.pitchInElement = pIn->pitchInElement; + } + + localOut.pMipInfo = mipInfo; + + returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut); + + if (returnCode == ADDR_OK) + { + pOut->addr = (localOut.sliceSize * pIn->slice) + + mipInfo[pIn->mipId].offset + + (pIn->y * mipInfo[pIn->mipId].pitch + pIn->x) * (pIn->bpp >> 3); + pOut->bitPosition = 0; + } + else + { + valid = FALSE; + } + } + + if (valid == FALSE) + { + returnCode = ADDR_INVALIDPARAMS; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceAddrFromCoordTiled +* +* @brief +* Internal function to calculate address from coord for tiled swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceAddrFromCoordTiled( + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + return HwlComputeSurfaceAddrFromCoordTiled(pIn, pOut); +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceCoordFromAddrLinear +* +* @brief +* Internal function to calculate coord from address for linear swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrLinear( + const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + BOOL_32 valid = (pIn->numSamples <= 1) && (pIn->numFrags <= 1); + + if (valid) + { + if (IsTex1d(pIn->resourceType)) + { + valid = (pIn->unalignedHeight == 1); + } + } + + if (valid) + { + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; + localIn.bpp = pIn->bpp; + localIn.flags = pIn->flags; + localIn.width = Max(pIn->unalignedWidth, 1u); + localIn.height = Max(pIn->unalignedHeight, 1u); + localIn.numSlices = Max(pIn->numSlices, 1u); + localIn.numMipLevels = Max(pIn->numMipLevels, 1u); + localIn.resourceType = pIn->resourceType; + if (localIn.numMipLevels <= 1) + { + localIn.pitchInElement = pIn->pitchInElement; + } + returnCode = ComputeSurfaceInfoLinear(&localIn, &localOut); + + if (returnCode == ADDR_OK) + { + pOut->slice = static_cast(pIn->addr / localOut.sliceSize); + pOut->sample = 0; + + UINT_32 offsetInSlice = static_cast(pIn->addr % localOut.sliceSize); + UINT_32 elementBytes = pIn->bpp >> 3; + UINT_32 mipOffsetInSlice = 0; + UINT_32 mipSize = 0; + UINT_32 mipId = 0; + for (; mipId < pIn->numMipLevels ; mipId++) + { + if (IsTex1d(pIn->resourceType)) + { + mipSize = localOut.pitch * elementBytes; + } + else + { + UINT_32 currentMipHeight = (PowTwoAlign(localIn.height, (1 << mipId))) >> mipId; + mipSize = currentMipHeight * localOut.pitch * elementBytes; + } + + if (mipSize == 0) + { + valid = FALSE; + break; + } + else if ((mipSize + mipOffsetInSlice) > offsetInSlice) + { + break; + } + else + { + mipOffsetInSlice += mipSize; + if ((mipId == (pIn->numMipLevels - 1)) || + (mipOffsetInSlice >= localOut.sliceSize)) + { + valid = FALSE; + } + } + } + + if (valid) + { + pOut->mipId = mipId; + + UINT_32 elemOffsetInMip = (offsetInSlice - mipOffsetInSlice) / elementBytes; + if (IsTex1d(pIn->resourceType)) + { + if (elemOffsetInMip < localOut.pitch) + { + pOut->x = elemOffsetInMip; + pOut->y = 0; + } + else + { + valid = FALSE; + } + } + else + { + pOut->y = elemOffsetInMip / localOut.pitch; + pOut->x = elemOffsetInMip % localOut.pitch; + } + + if ((pOut->slice >= pIn->numSlices) || + (pOut->mipId >= pIn->numMipLevels) || + (pOut->x >= Max((pIn->unalignedWidth >> pOut->mipId), 1u)) || + (pOut->y >= Max((pIn->unalignedHeight >> pOut->mipId), 1u)) || + (IsTex3d(pIn->resourceType) && + (FALSE == Valid3DMipSliceIdConstraint(pIn->numSlices, + pOut->mipId, + pOut->slice)))) + { + valid = FALSE; + } + } + } + else + { + valid = FALSE; + } + } + + if (valid == FALSE) + { + returnCode = ADDR_INVALIDPARAMS; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurfaceCoordFromAddrTiled +* +* @brief +* Internal function to calculate coord from address for tiled swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeSurfaceCoordFromAddrTiled( + const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_NOTIMPLEMENTED; + + ADDR_NOT_IMPLEMENTED(); + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeBlockDimensionForSurf +* +* @brief +* Internal function to get block width/height/depth in element from surface input params. +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeBlockDimensionForSurf( + UINT_32* pWidth, + UINT_32* pHeight, + UINT_32* pDepth, + UINT_32 bpp, + UINT_32 numSamples, + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const +{ + ADDR_E_RETURNCODE returnCode = ComputeBlockDimension(pWidth, + pHeight, + pDepth, + bpp, + resourceType, + swizzleMode); + + if ((returnCode == ADDR_OK) && (numSamples > 1) && IsThin(resourceType, swizzleMode)) + { + const UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); + const UINT_32 log2sample = Log2(numSamples); + const UINT_32 q = log2sample >> 1; + const UINT_32 r = log2sample & 1; + + if (log2blkSize & 1) + { + *pWidth >>= q; + *pHeight >>= (q + r); + } + else + { + *pWidth >>= (q + r); + *pHeight >>= q; + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeBlockDimension +* +* @brief +* Internal function to get block width/height/depth in element without considering MSAA case +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeBlockDimension( + UINT_32* pWidth, + UINT_32* pHeight, + UINT_32* pDepth, + UINT_32 bpp, + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + UINT_32 eleBytes = bpp >> 3; + UINT_32 microBlockSizeTableIndex = Log2(eleBytes); + UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); + + if (IsThin(resourceType, swizzleMode)) + { + UINT_32 log2blkSizeIn256B = log2blkSize - 8; + UINT_32 widthAmp = log2blkSizeIn256B / 2; + UINT_32 heightAmp = log2blkSizeIn256B - widthAmp; + + ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block256_2d) / sizeof(Block256_2d[0])); + + *pWidth = (Block256_2d[microBlockSizeTableIndex].w << widthAmp); + *pHeight = (Block256_2d[microBlockSizeTableIndex].h << heightAmp); + *pDepth = 1; + } + else if (IsThick(resourceType, swizzleMode)) + { + UINT_32 log2blkSizeIn1KB = log2blkSize - 10; + UINT_32 averageAmp = log2blkSizeIn1KB / 3; + UINT_32 restAmp = log2blkSizeIn1KB % 3; + + ADDR_ASSERT(microBlockSizeTableIndex < sizeof(Block1K_3d) / sizeof(Block1K_3d[0])); + + *pWidth = Block1K_3d[microBlockSizeTableIndex].w << averageAmp; + *pHeight = Block1K_3d[microBlockSizeTableIndex].h << (averageAmp + (restAmp / 2)); + *pDepth = Block1K_3d[microBlockSizeTableIndex].d << (averageAmp + ((restAmp != 0) ? 1 : 0)); + } + else + { + ADDR_ASSERT_ALWAYS(); + returnCode = ADDR_INVALIDPARAMS; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::GetMipTailDim +* +* @brief +* Internal function to get out max dimension of first level in mip tail +* +* @return +* Max Width/Height/Depth value of the first mip fitted in mip tail +************************************************************************************************************************ +*/ +Dim3d Lib::GetMipTailDim( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode, + UINT_32 blockWidth, + UINT_32 blockHeight, + UINT_32 blockDepth) const +{ + Dim3d out = {blockWidth, blockHeight, blockDepth}; + UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); + + if (IsThick(resourceType, swizzleMode)) + { + UINT_32 dim = log2blkSize % 3; + + if (dim == 0) + { + out.h >>= 1; + } + else if (dim == 1) + { + out.w >>= 1; + } + else + { + out.d >>= 1; + } + } + else + { + if (log2blkSize & 1) + { + out.h >>= 1; + } + else + { + out.w >>= 1; + } + } + + return out; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurface2DMicroBlockOffset +* +* @brief +* Internal function to calculate micro block (256B) offset from coord for 2D resource +* +* @return +* micro block (256B) offset for 2D resource +************************************************************************************************************************ +*/ +UINT_32 Lib::ComputeSurface2DMicroBlockOffset( + const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const +{ + ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode)); + + UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3); + UINT_32 microBlockOffset = 0; + if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode)) + { + UINT_32 xBits = pIn->x << log2ElementBytes; + microBlockOffset = (xBits & 0xf) | ((pIn->y & 0x3) << 4); + if (log2ElementBytes < 3) + { + microBlockOffset |= (pIn->y & 0x4) << 4; + if (log2ElementBytes == 0) + { + microBlockOffset |= (pIn->y & 0x8) << 4; + } + else + { + microBlockOffset |= (xBits & 0x10) << 3; + } + } + else + { + microBlockOffset |= (xBits & 0x30) << 2; + } + } + else if (IsDisplaySwizzle(pIn->resourceType, pIn->swizzleMode)) + { + if (log2ElementBytes == 4) + { + microBlockOffset = (GetBit(pIn->x, 0) << 4) | + (GetBit(pIn->y, 0) << 5) | + (GetBit(pIn->x, 1) << 6) | + (GetBit(pIn->y, 1) << 7); + } + else + { + microBlockOffset = GetBits(pIn->x, 0, 3, log2ElementBytes) | + GetBits(pIn->y, 1, 2, 3 + log2ElementBytes) | + GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) | + GetBits(pIn->y, 3, 1, 6 + log2ElementBytes); + microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) | + (GetBit(pIn->y, 0) << 4) | + GetBits(microBlockOffset, 4, 3, 5); + } + } + else if (IsRotateSwizzle(pIn->swizzleMode)) + { + microBlockOffset = GetBits(pIn->y, 0, 3, log2ElementBytes) | + GetBits(pIn->x, 1, 2, 3 + log2ElementBytes) | + GetBits(pIn->x, 3, 1, 5 + log2ElementBytes) | + GetBits(pIn->y, 3, 1, 6 + log2ElementBytes); + microBlockOffset = GetBits(microBlockOffset, 0, 4, 0) | + (GetBit(pIn->x, 0) << 4) | + GetBits(microBlockOffset, 4, 3, 5); + if (log2ElementBytes == 3) + { + microBlockOffset = GetBits(microBlockOffset, 0, 6, 0) | + GetBits(pIn->x, 1, 2, 6); + } + } + + return microBlockOffset; +} + +/** +************************************************************************************************************************ +* Lib::ComputeSurface3DMicroBlockOffset +* +* @brief +* Internal function to calculate micro block (1KB) offset from coord for 3D resource +* +* @return +* micro block (1KB) offset for 3D resource +************************************************************************************************************************ +*/ +UINT_32 Lib::ComputeSurface3DMicroBlockOffset( + const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const +{ + ADDR_ASSERT(IsThick(pIn->resourceType, pIn->swizzleMode)); + + UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3); + UINT_32 microBlockOffset = 0; + if (IsStandardSwizzle(pIn->resourceType, pIn->swizzleMode)) + { + if (log2ElementBytes == 0) + { + microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1); + } + else if (log2ElementBytes == 1) + { + microBlockOffset = ((pIn->slice & 4) >> 2) | ((pIn->y & 4) >> 1); + } + else if (log2ElementBytes == 2) + { + microBlockOffset = ((pIn->y & 4) >> 2) | ((pIn->x & 4) >> 1); + } + else if (log2ElementBytes == 3) + { + microBlockOffset = (pIn->x & 6) >> 1; + } + else + { + microBlockOffset = pIn->x & 3; + } + + microBlockOffset <<= 8; + + UINT_32 xBits = pIn->x << log2ElementBytes; + microBlockOffset |= (xBits & 0xf) | ((pIn->y & 0x3) << 4) | ((pIn->slice & 0x3) << 6); + } + else if (IsZOrderSwizzle(pIn->swizzleMode)) + { + UINT_32 xh, yh, zh; + + if (log2ElementBytes == 0) + { + microBlockOffset = + (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2); + microBlockOffset = microBlockOffset | ((pIn->slice & 3) << 4) | ((pIn->x & 4) << 4); + + xh = pIn->x >> 3; + yh = pIn->y >> 2; + zh = pIn->slice >> 2; + } + else if (log2ElementBytes == 1) + { + microBlockOffset = + (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->y & 2) << 2); + microBlockOffset = (microBlockOffset << 1) | ((pIn->slice & 3) << 5); + + xh = pIn->x >> 2; + yh = pIn->y >> 2; + zh = pIn->slice >> 2; + } + else if (log2ElementBytes == 2) + { + microBlockOffset = + (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->x & 2) << 1) | ((pIn->slice & 1) << 3); + microBlockOffset = (microBlockOffset << 2) | ((pIn->y & 2) << 5); + + xh = pIn->x >> 2; + yh = pIn->y >> 2; + zh = pIn->slice >> 1; + } + else if (log2ElementBytes == 3) + { + microBlockOffset = + (pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2) | ((pIn->x & 2) << 2); + microBlockOffset <<= 3; + + xh = pIn->x >> 2; + yh = pIn->y >> 1; + zh = pIn->slice >> 1; + } + else + { + microBlockOffset = + (((pIn->x & 1) | ((pIn->y & 1) << 1) | ((pIn->slice & 1) << 2)) << 4); + + xh = pIn->x >> 1; + yh = pIn->y >> 1; + zh = pIn->slice >> 1; + } + + microBlockOffset |= ((MortonGen3d(xh, yh, zh, 1) << 7) & 0x380); + } + + return microBlockOffset; +} + +/** +************************************************************************************************************************ +* Lib::GetPipeXorBits +* +* @brief +* Internal function to get bits number for pipe/se xor operation +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +UINT_32 Lib::GetPipeXorBits( + UINT_32 macroBlockBits) const +{ + ADDR_ASSERT(macroBlockBits >= m_pipeInterleaveLog2); + + // Total available xor bits + UINT_32 xorBits = macroBlockBits - m_pipeInterleaveLog2; + + // Pipe/Se xor bits + UINT_32 pipeBits = Min(xorBits, m_pipesLog2 + m_seLog2); + + return pipeBits; +} + +/** +************************************************************************************************************************ +* Lib::GetBankXorBits +* +* @brief +* Internal function to get bits number for pipe/se xor operation +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +UINT_32 Lib::GetBankXorBits( + UINT_32 macroBlockBits) const +{ + UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); + + // Bank xor bits + UINT_32 bankBits = Min(macroBlockBits - pipeBits - m_pipeInterleaveLog2, m_banksLog2); + + return bankBits; +} + +/** +************************************************************************************************************************ +* Lib::Addr2GetPreferredSurfaceSetting +* +* @brief +* Internal function to get suggested surface information for cliet to use +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::Addr2GetPreferredSurfaceSetting( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode; + + if ((GetFillSizeFieldsFlags() == TRUE) && + ((pIn->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT)) || + (pOut->size != sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT)))) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + returnCode = HwlGetPreferredSurfaceSetting(pIn, pOut); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Lib::ComputeBlock256Equation +* +* @brief +* Compute equation for block 256B +* +* @return +* If equation computed successfully +* +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeBlock256Equation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const +{ + ADDR_E_RETURNCODE ret; + + if (IsBlock256b(swMode)) + { + ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation); + } + else + { + ADDR_ASSERT_ALWAYS(); + ret = ADDR_INVALIDPARAMS; + } + + return ret; +} + +/** +************************************************************************************************************************ +* Lib::ComputeThinEquation +* +* @brief +* Compute equation for 2D/3D resource which use THIN mode +* +* @return +* If equation computed successfully +* +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeThinEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const +{ + ADDR_E_RETURNCODE ret; + + if (IsThin(rsrcType, swMode)) + { + ret = HwlComputeThinEquation(rsrcType, swMode, elementBytesLog2, pEquation); + } + else + { + ADDR_ASSERT_ALWAYS(); + ret = ADDR_INVALIDPARAMS; + } + + return ret; +} + +/** +************************************************************************************************************************ +* Lib::ComputeThickEquation +* +* @brief +* Compute equation for 3D resource which use THICK mode +* +* @return +* If equation computed successfully +* +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Lib::ComputeThickEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const +{ + ADDR_E_RETURNCODE ret; + + if (IsThick(rsrcType, swMode)) + { + ret = HwlComputeThickEquation(rsrcType, swMode, elementBytesLog2, pEquation); + } + else + { + ADDR_ASSERT_ALWAYS(); + ret = ADDR_INVALIDPARAMS; + } + + return ret; +} + +/** +************************************************************************************************************************ +* Lib::ComputeQbStereoInfo +* +* @brief +* Get quad buffer stereo information +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Lib::ComputeQbStereoInfo( + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] updated pOut+pStereoInfo + ) const +{ + ADDR_ASSERT(pOut->bpp >= 8); + ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0); + + // Save original height + pOut->pStereoInfo->eyeHeight = pOut->height; + + // Right offset + pOut->pStereoInfo->rightOffset = static_cast(pOut->surfSize); + + // Double height + pOut->height <<= 1; + + ADDR_ASSERT(pOut->height <= MaxSurfaceHeight); + + pOut->pixelHeight <<= 1; + + // Double size + pOut->surfSize <<= 1; +} + +} // V2 +} // Addr + diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib2.h mesa-19.0.1/src/amd/addrlib/src/core/addrlib2.h --- mesa-18.3.3/src/amd/addrlib/src/core/addrlib2.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib2.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,836 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +************************************************************************************************************************ +* @file addrlib2.h +* @brief Contains the Addr::V2::Lib class definition. +************************************************************************************************************************ +*/ + +#ifndef __ADDR2_LIB2_H__ +#define __ADDR2_LIB2_H__ + +#include "addrlib.h" + +namespace Addr +{ +namespace V2 +{ + +/** +************************************************************************************************************************ +* @brief Flags for SwizzleModeTable +************************************************************************************************************************ +*/ +struct SwizzleModeFlags +{ + // Swizzle mode + UINT_32 isLinear : 1; // Linear + + // Block size + UINT_32 is256b : 1; // Block size is 256B + UINT_32 is4kb : 1; // Block size is 4KB + UINT_32 is64kb : 1; // Block size is 64KB + UINT_32 isVar : 1; // Block size is variable + + UINT_32 isZ : 1; // Z order swizzle mode + UINT_32 isStd : 1; // Standard swizzle mode + UINT_32 isDisp : 1; // Display swizzle mode + UINT_32 isRot : 1; // Rotate swizzle mode + + // XOR mode + UINT_32 isXor : 1; // XOR after swizzle if set + + UINT_32 isT : 1; // T mode + + UINT_32 isRtOpt : 1; // mode opt for render target +}; + +struct Dim2d +{ + UINT_32 w; + UINT_32 h; +}; + +struct Dim3d +{ + UINT_32 w; + UINT_32 h; + UINT_32 d; +}; + +// Macro define resource block type +enum AddrBlockType +{ + AddrBlockMicro = 0, // Resource uses 256B block + AddrBlock4KB = 1, // Resource uses 4KB block + AddrBlock64KB = 2, // Resource uses 64KB block + AddrBlockVar = 3, // Resource uses var block, only valid for GFX9 + AddrBlockLinear = 4, // Resource uses linear swizzle mode + + AddrBlockMaxTiledType = AddrBlock64KB + 1, +}; + +enum AddrBlockSet +{ + AddrBlockSetMicro = 1 << AddrBlockMicro, + AddrBlockSetMacro4KB = 1 << AddrBlock4KB, + AddrBlockSetMacro64KB = 1 << AddrBlock64KB, + AddrBlockSetVar = 1 << AddrBlockVar, + AddrBlockSetLinear = 1 << AddrBlockLinear, + + AddrBlockSetMacro = AddrBlockSetMacro4KB | AddrBlockSetMacro64KB, +}; + +enum AddrSwSet +{ + AddrSwSetZ = 1 << ADDR_SW_Z, + AddrSwSetS = 1 << ADDR_SW_S, + AddrSwSetD = 1 << ADDR_SW_D, + AddrSwSetR = 1 << ADDR_SW_R, + + AddrSwSetAll = AddrSwSetZ | AddrSwSetS | AddrSwSetD | AddrSwSetR, +}; + +/** +************************************************************************************************************************ +* @brief This class contains asic independent address lib functionalities +************************************************************************************************************************ +*/ +class Lib : public Addr::Lib +{ +public: + virtual ~Lib(); + + static Lib* GetLib( + ADDR_HANDLE hLib); + + // + // Interface stubs + // + + // For data surface + ADDR_E_RETURNCODE ComputeSurfaceInfo( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord( + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr( + const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; + + // For HTile + ADDR_E_RETURNCODE ComputeHtileInfo( + const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, + ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeHtileAddrFromCoord( + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeHtileCoordFromAddr( + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); + + // For CMask + ADDR_E_RETURNCODE ComputeCmaskInfo( + const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, + ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord( + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr( + const ADDR2_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const; + + // For FMask + ADDR_E_RETURNCODE ComputeFmaskInfo( + const ADDR2_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR2_COMPUTE_FMASK_INFO_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord( + const ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr( + const ADDR2_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; + + // For DCC key + ADDR_E_RETURNCODE ComputeDccInfo( + const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, + ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeDccAddrFromCoord( + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut); + + // Misc + ADDR_E_RETURNCODE ComputePipeBankXor( + const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeSlicePipeBankXor( + const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut); + + ADDR_E_RETURNCODE ComputeSubResourceOffsetForSwizzlePattern( + const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut); + + ADDR_E_RETURNCODE Addr2GetPreferredSurfaceSetting( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; + + virtual BOOL_32 IsValidDisplaySwizzleMode( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTIMPLEMENTED; + } + +protected: + Lib(); // Constructor is protected + Lib(const Client* pClient); + + static const UINT_32 MaxNumOfBpp = 5; + static const UINT_32 MaxNumOfAA = 4; + + static const Dim2d Block256_2d[MaxNumOfBpp]; + static const Dim3d Block1K_3d[MaxNumOfBpp]; + + static const UINT_32 PrtAlignment = 64 * 1024; + static const UINT_32 MaxMacroBits = 20; + + static const UINT_32 MaxMipLevels = 16; + + // Checking block size + BOOL_32 IsBlock256b(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].is256b; + } + + BOOL_32 IsBlock4kb(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].is4kb; + } + + BOOL_32 IsBlock64kb(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].is64kb; + } + + BOOL_32 IsBlockVariable(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isVar; + } + + // Checking swizzle mode + BOOL_32 IsLinear(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isLinear; + } + + BOOL_32 IsRtOptSwizzle(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isRtOpt; + } + + BOOL_32 IsZOrderSwizzle(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isZ; + } + + BOOL_32 IsStandardSwizzle(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isStd; + } + + BOOL_32 IsDisplaySwizzle(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isDisp; + } + + BOOL_32 IsRotateSwizzle(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isRot; + } + + BOOL_32 IsStandardSwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const + { + return HwlIsStandardSwizzle(resourceType, swizzleMode); + } + + BOOL_32 IsDisplaySwizzle(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const + { + return HwlIsDisplaySwizzle(resourceType, swizzleMode); + } + + BOOL_32 IsXor(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isXor; + } + + BOOL_32 IsPrt(AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isT; + } + + BOOL_32 IsNonPrtXor(AddrSwizzleMode swizzleMode) const + { + return (IsXor(swizzleMode) && (IsPrt(swizzleMode) == FALSE)); + } + + // Checking resource type + static BOOL_32 IsTex1d(AddrResourceType resourceType) + { + return (resourceType == ADDR_RSRC_TEX_1D); + } + + static BOOL_32 IsTex2d(AddrResourceType resourceType) + { + return (resourceType == ADDR_RSRC_TEX_2D); + } + + static BOOL_32 IsTex3d(AddrResourceType resourceType) + { + return (resourceType == ADDR_RSRC_TEX_3D); + } + + BOOL_32 IsThick(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const + { + return HwlIsThick(resourceType, swizzleMode); + } + + BOOL_32 IsThin(AddrResourceType resourceType, AddrSwizzleMode swizzleMode) const + { + return HwlIsThin(resourceType, swizzleMode); + } + + UINT_32 GetBlockSizeLog2(AddrSwizzleMode swizzleMode) const + { + UINT_32 blockSizeLog2 = 0; + + if (IsBlock256b(swizzleMode) || IsLinear(swizzleMode)) + { + blockSizeLog2 = 8; + } + else if (IsBlock4kb(swizzleMode)) + { + blockSizeLog2 = 12; + } + else if (IsBlock64kb(swizzleMode)) + { + blockSizeLog2 = 16; + } + else if (IsBlockVariable(swizzleMode)) + { + blockSizeLog2 = m_blockVarSizeLog2; + } + else + { + ADDR_ASSERT_ALWAYS(); + } + + return blockSizeLog2; + } + + UINT_32 GetBlockSize(AddrSwizzleMode swizzleMode) const + { + return (1 << GetBlockSizeLog2(swizzleMode)); + } + + static UINT_32 GetFmaskBpp(UINT_32 sample, UINT_32 frag) + { + sample = (sample == 0) ? 1 : sample; + frag = (frag == 0) ? sample : frag; + + UINT_32 fmaskBpp = QLog2(frag); + + if (sample > frag) + { + fmaskBpp++; + } + + if (fmaskBpp == 3) + { + fmaskBpp = 4; + } + + fmaskBpp = Max(8u, fmaskBpp * sample); + + return fmaskBpp; + } + + virtual BOOL_32 HwlIsStandardSwizzle( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const + { + ADDR_NOT_IMPLEMENTED(); + return FALSE; + } + + virtual BOOL_32 HwlIsDisplaySwizzle( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const + { + ADDR_NOT_IMPLEMENTED(); + return FALSE; + } + + virtual BOOL_32 HwlIsThin( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const + { + ADDR_NOT_IMPLEMENTED(); + return FALSE; + } + + virtual BOOL_32 HwlIsThick( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const + { + ADDR_NOT_IMPLEMENTED(); + return FALSE; + } + + virtual ADDR_E_RETURNCODE HwlComputeHtileInfo( + const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, + ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo( + const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, + ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeDccInfo( + const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, + ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord( + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr( + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeThinEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeThickEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual UINT_32 HwlGetEquationIndex( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_INVALID_EQUATION_INDEX; + } + + UINT_32 GetEquationIndex( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const + { + return HwlGetEquationIndex(pIn, pOut); + } + + virtual ADDR_E_RETURNCODE HwlComputePipeBankXor( + const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor( + const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern( + const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTSUPPORTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTIMPLEMENTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTIMPLEMENTED; + } + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled( + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const + { + ADDR_NOT_IMPLEMENTED(); + return ADDR_NOTIMPLEMENTED; + } + + ADDR_E_RETURNCODE ComputeBlock256Equation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const; + + ADDR_E_RETURNCODE ComputeThinEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const; + + ADDR_E_RETURNCODE ComputeThickEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const; + + ADDR_E_RETURNCODE ComputeSurfaceInfoSanityCheck( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + + ADDR_E_RETURNCODE ComputeSurfaceInfoLinear( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceInfoTiled( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordLinear( + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoordTiled( + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrLinear( + const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddrTiled( + const ADDR2_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; + + UINT_32 ComputeSurface2DMicroBlockOffset( + const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const; + + UINT_32 ComputeSurface3DMicroBlockOffset( + const _ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn) const; + + // Misc + ADDR_E_RETURNCODE ComputeBlockDimensionForSurf( + UINT_32* pWidth, + UINT_32* pHeight, + UINT_32* pDepth, + UINT_32 bpp, + UINT_32 numSamples, + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const; + + ADDR_E_RETURNCODE ComputeBlockDimension( + UINT_32* pWidth, + UINT_32* pHeight, + UINT_32* pDepth, + UINT_32 bpp, + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const; + + static UINT_64 ComputePadSize( + const Dim3d* pBlkDim, + UINT_32 width, + UINT_32 height, + UINT_32 numSlices, + Dim3d* pPadDim) + { + pPadDim->w = PowTwoAlign(width ,pBlkDim->w); + pPadDim->h = PowTwoAlign(height ,pBlkDim->h); + pPadDim->d = PowTwoAlign(numSlices, pBlkDim->d); + return static_cast(pPadDim->w) * pPadDim->h * pPadDim->d; + } + + static ADDR_E_RETURNCODE ExtractPipeBankXor( + UINT_32 pipeBankXor, + UINT_32 bankBits, + UINT_32 pipeBits, + UINT_32* pBankX, + UINT_32* pPipeX); + + static BOOL_32 Valid3DMipSliceIdConstraint( + UINT_32 numSlices, + UINT_32 mipId, + UINT_32 slice) + { + return (Max((numSlices >> mipId), 1u) > slice); + } + + Dim3d GetMipTailDim( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode, + UINT_32 blockWidth, + UINT_32 blockHeight, + UINT_32 blockDepth) const; + + BOOL_32 IsInMipTail( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode, + Dim3d mipTailDim, + UINT_32 width, + UINT_32 height, + UINT_32 depth) const + { + BOOL_32 inTail = ((width <= mipTailDim.w) && + (height <= mipTailDim.h) && + (IsThin(resourceType, swizzleMode) || (depth <= mipTailDim.d))); + + return inTail; + } + + static BOOL_32 IsLocalHeap(AddrResrouceLocation resourceType) + { + return ((resourceType == ADDR_RSRC_LOC_LOCAL) || + (resourceType == ADDR_RSRC_LOC_INVIS)); + } + + static BOOL_32 IsInvisibleHeap(AddrResrouceLocation resourceType) + { + return (resourceType == ADDR_RSRC_LOC_INVIS); + } + + static BOOL_32 IsNonlocalHeap(AddrResrouceLocation resourceType) + { + return ((resourceType == ADDR_RSRC_LOC_USWC) || + (resourceType == ADDR_RSRC_LOC_CACHED)); + } + + UINT_32 GetPipeLog2ForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const + { + UINT_32 numPipeLog2 = pipeAligned ? Min(m_pipesLog2 + m_seLog2, 5u) : 0; + + if (IsXor(swizzleMode)) + { + UINT_32 maxPipeLog2 = GetBlockSizeLog2(swizzleMode) - m_pipeInterleaveLog2; + + numPipeLog2 = Min(numPipeLog2, maxPipeLog2); + } + + return numPipeLog2; + } + + UINT_32 GetPipeNumForMetaAddressing(BOOL_32 pipeAligned, AddrSwizzleMode swizzleMode) const + { + return (1 << GetPipeLog2ForMetaAddressing(pipeAligned, swizzleMode)); + } + + VOID VerifyMipLevelInfo(const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const + { +#if DEBUG + if (pIn->numMipLevels > 1) + { + UINT_32 actualMipLevels = 1; + switch (pIn->resourceType) + { + case ADDR_RSRC_TEX_3D: + // Fall through to share 2D case + actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->numSlices) + 1); + case ADDR_RSRC_TEX_2D: + // Fall through to share 1D case + actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->height) + 1); + case ADDR_RSRC_TEX_1D: + // Base 1D case + actualMipLevels = Max(actualMipLevels, Log2NonPow2(pIn->width) + 1); + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + // Client pass wrong number of MipLevels to addrlib and result will be bad. + // Not sure if we should fail this calling instead of putting an assertion here. + ADDR_ASSERT(actualMipLevels >= pIn->numMipLevels); + } +#endif + } + + ADDR_E_RETURNCODE ApplyCustomerPipeBankXor( + AddrSwizzleMode swizzleMode, + UINT_32 pipeBankXor, + UINT_32 bankBits, + UINT_32 pipeBits, + UINT_32* pBlockOffset) const + { + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (IsXor(swizzleMode)) + { + // Apply driver set bankPipeXor + UINT_32 bankX = 0; + UINT_32 pipeX = 0; + returnCode = ExtractPipeBankXor(pipeBankXor, bankBits, pipeBits, &bankX, &pipeX); + *pBlockOffset ^= (pipeX << m_pipeInterleaveLog2); + *pBlockOffset ^= (bankX << (m_pipeInterleaveLog2 + pipeBits)); + } + + return returnCode; + } + + UINT_32 GetPipeXorBits(UINT_32 macroBlockBits) const; + UINT_32 GetBankXorBits(UINT_32 macroBlockBits) const; + + ADDR_E_RETURNCODE ApplyCustomizedPitchHeight( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + UINT_32 elementBytes, + UINT_32 pitchAlignInElement, + UINT_32* pPitch, + UINT_32* pHeight) const; + + VOID ComputeQbStereoInfo(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + UINT_32 m_se; ///< Number of shader engine + UINT_32 m_rbPerSe; ///< Number of render backend per shader engine + UINT_32 m_maxCompFrag; ///< Number of max compressed fragment + + UINT_32 m_banksLog2; ///< Number of bank Log2 + UINT_32 m_pipesLog2; ///< Number of pipe per shader engine Log2 + UINT_32 m_seLog2; ///< Number of shader engine Log2 + UINT_32 m_rbPerSeLog2; ///< Number of render backend per shader engine Log2 + UINT_32 m_maxCompFragLog2; ///< Number of max compressed fragment Log2 + + UINT_32 m_pipeInterleaveLog2; ///< Log2 of pipe interleave bytes + + UINT_32 m_blockVarSizeLog2; ///< Log2 of block var size + + SwizzleModeFlags m_swizzleModeTable[ADDR_SW_MAX_TYPE]; ///< Swizzle mode table + +private: + // Disallow the copy constructor + Lib(const Lib& a); + + // Disallow the assignment operator + Lib& operator=(const Lib& a); +}; + +} // V2 +} // Addr + +#endif + diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrlib.cpp --- mesa-18.3.3/src/amd/addrlib/src/core/addrlib.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,655 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrlib.cpp +* @brief Contains the implementation for the Addr::Lib class. +**************************************************************************************************** +*/ + +#include "addrinterface.h" +#include "addrlib.h" +#include "addrcommon.h" + +#if defined(__APPLE__) + +UINT_32 div64_32(UINT_64 n, UINT_32 base) +{ + UINT_64 rem = n; + UINT_64 b = base; + UINT_64 res, d = 1; + UINT_32 high = rem >> 32; + + res = 0; + if (high >= base) + { + high /= base; + res = (UINT_64) high << 32; + rem -= (UINT_64) (high * base) << 32; + } + + while (((INT_64)b > 0) && (b < rem)) + { + b = b + b; + d = d + d; + } + + do + { + if (rem >= b) + { + rem -= b; + res += d; + } + b >>= 1; + d >>= 1; + } while (d); + + n = res; + return rem; +} + +extern "C" +UINT_32 __umoddi3(UINT_64 n, UINT_32 base) +{ + return div64_32(n, base); +} + +#endif // __APPLE__ + +namespace Addr +{ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Constructor/Destructor +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Lib::Lib +* +* @brief +* Constructor for the AddrLib class +* +**************************************************************************************************** +*/ +Lib::Lib() : + m_class(BASE_ADDRLIB), + m_chipFamily(ADDR_CHIP_FAMILY_IVLD), + m_chipRevision(0), + m_version(ADDRLIB_VERSION), + m_pipes(0), + m_banks(0), + m_pipeInterleaveBytes(0), + m_rowSize(0), + m_minPitchAlignPixels(1), + m_maxSamples(8), + m_pElemLib(NULL) +{ + m_configFlags.value = 0; +} + +/** +**************************************************************************************************** +* Lib::Lib +* +* @brief +* Constructor for the AddrLib class with hClient as parameter +* +**************************************************************************************************** +*/ +Lib::Lib(const Client* pClient) : + Object(pClient), + m_class(BASE_ADDRLIB), + m_chipFamily(ADDR_CHIP_FAMILY_IVLD), + m_chipRevision(0), + m_version(ADDRLIB_VERSION), + m_pipes(0), + m_banks(0), + m_pipeInterleaveBytes(0), + m_rowSize(0), + m_minPitchAlignPixels(1), + m_maxSamples(8), + m_pElemLib(NULL) +{ + m_configFlags.value = 0; +} + +/** +**************************************************************************************************** +* Lib::~AddrLib +* +* @brief +* Destructor for the AddrLib class +* +**************************************************************************************************** +*/ +Lib::~Lib() +{ + if (m_pElemLib) + { + delete m_pElemLib; + m_pElemLib = NULL; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Initialization/Helper +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Lib::Create +* +* @brief +* Creates and initializes AddrLib object. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::Create( + const ADDR_CREATE_INPUT* pCreateIn, ///< [in] pointer to ADDR_CREATE_INPUT + ADDR_CREATE_OUTPUT* pCreateOut) ///< [out] pointer to ADDR_CREATE_OUTPUT +{ + Lib* pLib = NULL; + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pCreateIn->createFlags.fillSizeFields == TRUE) + { + if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) || + (pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if ((returnCode == ADDR_OK) && + (pCreateIn->callbacks.allocSysMem != NULL) && + (pCreateIn->callbacks.freeSysMem != NULL)) + { + Client client = { + pCreateIn->hClient, + pCreateIn->callbacks + }; + + switch (pCreateIn->chipEngine) + { + case CIASICIDGFXENGINE_SOUTHERNISLAND: + switch (pCreateIn->chipFamily) + { + case FAMILY_SI: + pLib = SiHwlInit(&client); + break; + case FAMILY_VI: + case FAMILY_CZ: + case FAMILY_CI: + case FAMILY_KV: // CI based fusion + pLib = CiHwlInit(&client); + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + break; + case CIASICIDGFXENGINE_ARCTICISLAND: + switch (pCreateIn->chipFamily) + { + case FAMILY_AI: + case FAMILY_RV: + pLib = Gfx9HwlInit(&client); + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + + if (pLib != NULL) + { + BOOL_32 initValid; + + // Pass createFlags to configFlags first since these flags may be overwritten + pLib->m_configFlags.noCubeMipSlicesPad = pCreateIn->createFlags.noCubeMipSlicesPad; + pLib->m_configFlags.fillSizeFields = pCreateIn->createFlags.fillSizeFields; + pLib->m_configFlags.useTileIndex = pCreateIn->createFlags.useTileIndex; + pLib->m_configFlags.useCombinedSwizzle = pCreateIn->createFlags.useCombinedSwizzle; + pLib->m_configFlags.checkLast2DLevel = pCreateIn->createFlags.checkLast2DLevel; + pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign; + pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile; + pLib->m_configFlags.disableLinearOpt = FALSE; + + pLib->SetChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision); + + pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels); + + // Global parameters initialized and remaining configFlags bits are set as well + initValid = pLib->HwlInitGlobalParams(pCreateIn); + + if (initValid) + { + pLib->m_pElemLib = ElemLib::Create(pLib); + } + else + { + pLib->m_pElemLib = NULL; // Don't go on allocating element lib + returnCode = ADDR_INVALIDGBREGVALUES; + } + + if (pLib->m_pElemLib == NULL) + { + delete pLib; + pLib = NULL; + ADDR_ASSERT_ALWAYS(); + } + else + { + pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags); + } + } + + pCreateOut->hLib = pLib; + + if ((pLib != NULL) && + (returnCode == ADDR_OK)) + { + pCreateOut->numEquations = + pLib->HwlGetEquationTableInfo(&pCreateOut->pEquationTable); + + pLib->SetMaxAlignments(); + + } + else if ((pLib == NULL) && + (returnCode == ADDR_OK)) + { + // Unknown failures, we return the general error code + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::SetChipFamily +* +* @brief +* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::SetChipFamily( + UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h + UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h +{ + ChipFamily family = HwlConvertChipFamily(uChipFamily, uChipRevision); + + ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD); + + m_chipFamily = family; + m_chipRevision = uChipRevision; +} + +/** +**************************************************************************************************** +* Lib::SetMinPitchAlignPixels +* +* @brief +* Set m_minPitchAlignPixels with input param +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::SetMinPitchAlignPixels( + UINT_32 minPitchAlignPixels) ///< [in] minmum pitch alignment in pixels +{ + m_minPitchAlignPixels = (minPitchAlignPixels == 0) ? 1 : minPitchAlignPixels; +} + +/** +**************************************************************************************************** +* Lib::SetMaxAlignments +* +* @brief +* Set max alignments +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID Lib::SetMaxAlignments() +{ + m_maxBaseAlign = HwlComputeMaxBaseAlignments(); + m_maxMetaBaseAlign = HwlComputeMaxMetaBaseAlignments(); +} + +/** +**************************************************************************************************** +* Lib::GetLib +* +* @brief +* Get AddrLib pointer +* +* @return +* An AddrLib class pointer +**************************************************************************************************** +*/ +Lib* Lib::GetLib( + ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE +{ + return static_cast(hLib); +} + +/** +**************************************************************************************************** +* Lib::GetMaxAlignments +* +* @brief +* Gets maximum alignments for data surface (include FMask) +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::GetMaxAlignments( + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT)) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + if (m_maxBaseAlign != 0) + { + pOut->baseAlign = m_maxBaseAlign; + } + else + { + returnCode = ADDR_NOTIMPLEMENTED; + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::GetMaxMetaAlignments +* +* @brief +* Gets maximum alignments for metadata (CMask, DCC and HTile) +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::GetMaxMetaAlignments( + ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if (pOut->size != sizeof(ADDR_GET_MAX_ALINGMENTS_OUTPUT)) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + if (m_maxMetaBaseAlign != 0) + { + pOut->baseAlign = m_maxMetaBaseAlign; + } + else + { + returnCode = ADDR_NOTIMPLEMENTED; + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::Bits2Number +* +* @brief +* Cat a array of binary bit to a number +* +* @return +* The number combined with the array of bits +**************************************************************************************************** +*/ +UINT_32 Lib::Bits2Number( + UINT_32 bitNum, ///< [in] how many bits + ...) ///< [in] varaible bits value starting from MSB +{ + UINT_32 number = 0; + UINT_32 i; + va_list bits_ptr; + + va_start(bits_ptr, bitNum); + + for(i = 0; i < bitNum; i++) + { + number |= va_arg(bits_ptr, UINT_32); + number <<= 1; + } + + number >>= 1; + + va_end(bits_ptr); + + return number; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Element lib +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +**************************************************************************************************** +* Lib::Flt32ToColorPixel +* +* @brief +* Convert a FLT_32 value to a depth/stencil pixel value +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::Flt32ToDepthPixel( + const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, + ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) || + (pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + GetElemLib()->Flt32ToDepthPixel(pIn->format, pIn->comps, pOut->pPixel); + + UINT_32 depthBase = 0; + UINT_32 stencilBase = 0; + UINT_32 depthBits = 0; + UINT_32 stencilBits = 0; + + switch (pIn->format) + { + case ADDR_DEPTH_16: + depthBits = 16; + break; + case ADDR_DEPTH_X8_24: + case ADDR_DEPTH_8_24: + case ADDR_DEPTH_X8_24_FLOAT: + case ADDR_DEPTH_8_24_FLOAT: + depthBase = 8; + depthBits = 24; + stencilBits = 8; + break; + case ADDR_DEPTH_32_FLOAT: + depthBits = 32; + break; + case ADDR_DEPTH_X24_8_32_FLOAT: + depthBase = 8; + depthBits = 32; + stencilBits = 8; + break; + default: + break; + } + + // Overwrite base since R800 has no "tileBase" + if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE) + { + depthBase = 0; + stencilBase = 0; + } + + depthBase *= 64; + stencilBase *= 64; + + pOut->stencilBase = stencilBase; + pOut->depthBase = depthBase; + pOut->depthBits = depthBits; + pOut->stencilBits = stencilBits; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::Flt32ToColorPixel +* +* @brief +* Convert a FLT_32 value to a red/green/blue/alpha pixel value +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE Lib::Flt32ToColorPixel( + const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, + ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) || + (pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT))) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + GetElemLib()->Flt32ToColorPixel(pIn->format, + pIn->surfNum, + pIn->surfSwap, + pIn->comps, + pOut->pPixel); + } + + return returnCode; +} + +/** +**************************************************************************************************** +* Lib::GetExportNorm +* +* @brief +* Check one format can be EXPORT_NUM +* @return +* TRUE if EXPORT_NORM can be used +**************************************************************************************************** +*/ +BOOL_32 Lib::GetExportNorm( + const ELEM_GETEXPORTNORM_INPUT* pIn) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + BOOL_32 enabled = FALSE; + + if (GetFillSizeFieldsFlags() == TRUE) + { + if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT)) + { + returnCode = ADDR_PARAMSIZEMISMATCH; + } + } + + if (returnCode == ADDR_OK) + { + enabled = GetElemLib()->PixGetExportNorm(pIn->format, pIn->num, pIn->swap); + } + + return enabled; +} + +/** +**************************************************************************************************** +* Lib::GetBpe +* +* @brief +* Get bits-per-element for specified format +* @return +* bits-per-element of specified format +**************************************************************************************************** +*/ +UINT_32 Lib::GetBpe(AddrFormat format) const +{ + return GetElemLib()->GetBitsPerPixel(format); +} + +} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrlib.h mesa-19.0.1/src/amd/addrlib/src/core/addrlib.h --- mesa-18.3.3/src/amd/addrlib/src/core/addrlib.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrlib.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,415 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrlib.h +* @brief Contains the Addr::Lib base class definition. +**************************************************************************************************** +*/ + +#ifndef __ADDR_LIB_H__ +#define __ADDR_LIB_H__ + +#include "addrinterface.h" +#include "addrobject.h" +#include "addrelemlib.h" + +#include "amdgpu_asic_addr.h" + +#ifndef CIASICIDGFXENGINE_R600 +#define CIASICIDGFXENGINE_R600 0x00000006 +#endif + +#ifndef CIASICIDGFXENGINE_R800 +#define CIASICIDGFXENGINE_R800 0x00000008 +#endif + +#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND +#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A +#endif + +#ifndef CIASICIDGFXENGINE_ARCTICISLAND +#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D +#endif + +namespace Addr +{ + +/** +**************************************************************************************************** +* @brief Neutral enums that define pipeinterleave +**************************************************************************************************** +*/ +enum PipeInterleave +{ + ADDR_PIPEINTERLEAVE_256B = 256, + ADDR_PIPEINTERLEAVE_512B = 512, + ADDR_PIPEINTERLEAVE_1KB = 1024, + ADDR_PIPEINTERLEAVE_2KB = 2048, +}; + +/** +**************************************************************************************************** +* @brief Neutral enums that define DRAM row size +**************************************************************************************************** +*/ +enum RowSize +{ + ADDR_ROWSIZE_1KB = 1024, + ADDR_ROWSIZE_2KB = 2048, + ADDR_ROWSIZE_4KB = 4096, + ADDR_ROWSIZE_8KB = 8192, +}; + +/** +**************************************************************************************************** +* @brief Neutral enums that define bank interleave +**************************************************************************************************** +*/ +enum BankInterleave +{ + ADDR_BANKINTERLEAVE_1 = 1, + ADDR_BANKINTERLEAVE_2 = 2, + ADDR_BANKINTERLEAVE_4 = 4, + ADDR_BANKINTERLEAVE_8 = 8, +}; + +/** +**************************************************************************************************** +* @brief Neutral enums that define shader engine tile size +**************************************************************************************************** +*/ +enum ShaderEngineTileSize +{ + ADDR_SE_TILESIZE_16 = 16, + ADDR_SE_TILESIZE_32 = 32, +}; + +/** +**************************************************************************************************** +* @brief Neutral enums that define bank swap size +**************************************************************************************************** +*/ +enum BankSwapSize +{ + ADDR_BANKSWAP_128B = 128, + ADDR_BANKSWAP_256B = 256, + ADDR_BANKSWAP_512B = 512, + ADDR_BANKSWAP_1KB = 1024, +}; + +/** +**************************************************************************************************** +* @brief Enums that define max compressed fragments config +**************************************************************************************************** +*/ +enum NumMaxCompressedFragmentsConfig +{ + ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS = 0x00000000, + ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS = 0x00000001, + ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS = 0x00000002, + ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS = 0x00000003, +}; + +/** +**************************************************************************************************** +* @brief Enums that define num pipes config +**************************************************************************************************** +*/ +enum NumPipesConfig +{ + ADDR_CONFIG_1_PIPE = 0x00000000, + ADDR_CONFIG_2_PIPE = 0x00000001, + ADDR_CONFIG_4_PIPE = 0x00000002, + ADDR_CONFIG_8_PIPE = 0x00000003, + ADDR_CONFIG_16_PIPE = 0x00000004, + ADDR_CONFIG_32_PIPE = 0x00000005, + ADDR_CONFIG_64_PIPE = 0x00000006, +}; + +/** +**************************************************************************************************** +* @brief Enums that define num banks config +**************************************************************************************************** +*/ +enum NumBanksConfig +{ + ADDR_CONFIG_1_BANK = 0x00000000, + ADDR_CONFIG_2_BANK = 0x00000001, + ADDR_CONFIG_4_BANK = 0x00000002, + ADDR_CONFIG_8_BANK = 0x00000003, + ADDR_CONFIG_16_BANK = 0x00000004, +}; + +/** +**************************************************************************************************** +* @brief Enums that define num rb per shader engine config +**************************************************************************************************** +*/ +enum NumRbPerShaderEngineConfig +{ + ADDR_CONFIG_1_RB_PER_SHADER_ENGINE = 0x00000000, + ADDR_CONFIG_2_RB_PER_SHADER_ENGINE = 0x00000001, + ADDR_CONFIG_4_RB_PER_SHADER_ENGINE = 0x00000002, +}; + +/** +**************************************************************************************************** +* @brief Enums that define num shader engines config +**************************************************************************************************** +*/ +enum NumShaderEnginesConfig +{ + ADDR_CONFIG_1_SHADER_ENGINE = 0x00000000, + ADDR_CONFIG_2_SHADER_ENGINE = 0x00000001, + ADDR_CONFIG_4_SHADER_ENGINE = 0x00000002, + ADDR_CONFIG_8_SHADER_ENGINE = 0x00000003, +}; + +/** +**************************************************************************************************** +* @brief Enums that define pipe interleave size config +**************************************************************************************************** +*/ +enum PipeInterleaveSizeConfig +{ + ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x00000000, + ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x00000001, + ADDR_CONFIG_PIPE_INTERLEAVE_1KB = 0x00000002, + ADDR_CONFIG_PIPE_INTERLEAVE_2KB = 0x00000003, +}; + +/** +**************************************************************************************************** +* @brief Enums that define row size config +**************************************************************************************************** +*/ +enum RowSizeConfig +{ + ADDR_CONFIG_1KB_ROW = 0x00000000, + ADDR_CONFIG_2KB_ROW = 0x00000001, + ADDR_CONFIG_4KB_ROW = 0x00000002, +}; + +/** +**************************************************************************************************** +* @brief Enums that define bank interleave size config +**************************************************************************************************** +*/ +enum BankInterleaveSizeConfig +{ + ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x00000000, + ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x00000001, + ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x00000002, + ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x00000003, +}; + +/** +**************************************************************************************************** +* @brief Enums that define engine tile size config +**************************************************************************************************** +*/ +enum ShaderEngineTileSizeConfig +{ + ADDR_CONFIG_SE_TILE_16 = 0x00000000, + ADDR_CONFIG_SE_TILE_32 = 0x00000001, +}; + +/** +**************************************************************************************************** +* @brief This class contains asic independent address lib functionalities +**************************************************************************************************** +*/ +class Lib : public Object +{ +public: + virtual ~Lib(); + + static ADDR_E_RETURNCODE Create( + const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut); + + /// Pair of Create + VOID Destroy() + { + delete this; + } + + static Lib* GetLib(ADDR_HANDLE hLib); + + /// Returns AddrLib version (from compiled binary instead include file) + UINT_32 GetVersion() + { + return m_version; + } + + /// Returns asic chip family name defined by AddrLib + ChipFamily GetChipFamily() + { + return m_chipFamily; + } + + ADDR_E_RETURNCODE Flt32ToDepthPixel( + const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, + ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE Flt32ToColorPixel( + const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, + ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const; + + BOOL_32 GetExportNorm(const ELEM_GETEXPORTNORM_INPUT* pIn) const; + + ADDR_E_RETURNCODE GetMaxAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE GetMaxMetaAlignments(ADDR_GET_MAX_ALINGMENTS_OUTPUT* pOut) const; + + UINT_32 GetBpe(AddrFormat format) const; + +protected: + Lib(); // Constructor is protected + Lib(const Client* pClient); + + /// Pure virtual function to get max base alignments + virtual UINT_32 HwlComputeMaxBaseAlignments() const = 0; + + /// Gets maximum alignements for metadata + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const + { + ADDR_NOT_IMPLEMENTED(); + + return 0; + } + + VOID ValidBaseAlignments(UINT_32 alignment) const + { +#if DEBUG + ADDR_ASSERT(alignment <= m_maxBaseAlign); +#endif + } + + VOID ValidMetaBaseAlignments(UINT_32 metaAlignment) const + { +#if DEBUG + ADDR_ASSERT(metaAlignment <= m_maxMetaBaseAlign); +#endif + } + + // + // Initialization + // + /// Pure Virtual function for Hwl computing internal global parameters from h/w registers + virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn) = 0; + + /// Pure Virtual function for Hwl converting chip family + virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0; + + /// Get equation table pointer and number of equations + virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const + { + *ppEquationTable = NULL; + + return 0; + } + + // + // Misc helper + // + static UINT_32 Bits2Number(UINT_32 bitNum, ...); + + static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags) + { + return (numFrags != 0) ? numFrags : Max(1u, numSamples); + } + + /// Returns pointer of ElemLib + ElemLib* GetElemLib() const + { + return m_pElemLib; + } + + /// Returns fillSizeFields flag + UINT_32 GetFillSizeFieldsFlags() const + { + return m_configFlags.fillSizeFields; + } + +private: + // Disallow the copy constructor + Lib(const Lib& a); + + // Disallow the assignment operator + Lib& operator=(const Lib& a); + + VOID SetChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision); + + VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels); + + VOID SetMaxAlignments(); + +protected: + LibClass m_class; ///< Store class type (HWL type) + + ChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h + + UINT_32 m_chipRevision; ///< Revision id from xxx_id.h + + UINT_32 m_version; ///< Current version + + // + // Global parameters + // + ConfigFlags m_configFlags; ///< Global configuration flags. Note this is setup by + /// AddrLib instead of Client except forceLinearAligned + + UINT_32 m_pipes; ///< Number of pipes + UINT_32 m_banks; ///< Number of banks + /// For r800 this is MC_ARB_RAMCFG.NOOFBANK + /// Keep it here to do default parameter calculation + + UINT_32 m_pipeInterleaveBytes; + ///< Specifies the size of contiguous address space + /// within each tiling pipe when making linear + /// accesses. (Formerly Group Size) + + UINT_32 m_rowSize; ///< DRAM row size, in bytes + + UINT_32 m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels + UINT_32 m_maxSamples; ///< Max numSamples + + UINT_32 m_maxBaseAlign; ///< Max base alignment for data surface + UINT_32 m_maxMetaBaseAlign; ///< Max base alignment for metadata + +private: + ElemLib* m_pElemLib; ///< Element Lib pointer +}; + +Lib* SiHwlInit (const Client* pClient); +Lib* CiHwlInit (const Client* pClient); +Lib* Gfx9HwlInit (const Client* pClient); + +} // Addr + +#endif diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrobject.cpp mesa-19.0.1/src/amd/addrlib/src/core/addrobject.cpp --- mesa-18.3.3/src/amd/addrlib/src/core/addrobject.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrobject.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,233 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrobject.cpp +* @brief Contains the Object base class implementation. +**************************************************************************************************** +*/ + +#include "addrinterface.h" +#include "addrobject.h" + +namespace Addr +{ + +/** +**************************************************************************************************** +* Object::Object +* +* @brief +* Constructor for the Object class. +**************************************************************************************************** +*/ +Object::Object() +{ + m_client.handle = NULL; + m_client.callbacks.allocSysMem = NULL; + m_client.callbacks.freeSysMem = NULL; + m_client.callbacks.debugPrint = NULL; +} + +/** +**************************************************************************************************** +* Object::Object +* +* @brief +* Constructor for the Object class. +**************************************************************************************************** +*/ +Object::Object(const Client* pClient) +{ + m_client = *pClient; +} + +/** +**************************************************************************************************** +* Object::~Object +* +* @brief +* Destructor for the Object class. +**************************************************************************************************** +*/ +Object::~Object() +{ +} + +/** +**************************************************************************************************** +* Object::ClientAlloc +* +* @brief +* Calls instanced allocSysMem inside Client +**************************************************************************************************** +*/ +VOID* Object::ClientAlloc( + size_t objSize, ///< [in] Size to allocate + const Client* pClient) ///< [in] Client pointer +{ + VOID* pObjMem = NULL; + + if (pClient->callbacks.allocSysMem != NULL) + { + ADDR_ALLOCSYSMEM_INPUT allocInput = {0}; + + allocInput.size = sizeof(ADDR_ALLOCSYSMEM_INPUT); + allocInput.flags.value = 0; + allocInput.sizeInBytes = static_cast(objSize); + allocInput.hClient = pClient->handle; + + pObjMem = pClient->callbacks.allocSysMem(&allocInput); + } + + return pObjMem; +} + +/** +**************************************************************************************************** +* Object::Alloc +* +* @brief +* A wrapper of ClientAlloc +**************************************************************************************************** +*/ +VOID* Object::Alloc( + size_t objSize ///< [in] Size to allocate + ) const +{ + return ClientAlloc(objSize, &m_client); +} + +/** +**************************************************************************************************** +* Object::ClientFree +* +* @brief +* Calls freeSysMem inside Client +**************************************************************************************************** +*/ +VOID Object::ClientFree( + VOID* pObjMem, ///< [in] User virtual address to free. + const Client* pClient) ///< [in] Client pointer +{ + if (pClient->callbacks.freeSysMem != NULL) + { + if (pObjMem != NULL) + { + ADDR_FREESYSMEM_INPUT freeInput = {0}; + + freeInput.size = sizeof(ADDR_FREESYSMEM_INPUT); + freeInput.hClient = pClient->handle; + freeInput.pVirtAddr = pObjMem; + + pClient->callbacks.freeSysMem(&freeInput); + } + } +} + +/** +**************************************************************************************************** +* Object::Free +* +* @brief +* A wrapper of ClientFree +**************************************************************************************************** +*/ +VOID Object::Free( + VOID* pObjMem ///< [in] User virtual address to free. + ) const +{ + ClientFree(pObjMem, &m_client); +} + +/** +**************************************************************************************************** +* Object::operator new +* +* @brief +* Placement new operator. (with pre-allocated memory pointer) +* +* @return +* Returns pre-allocated memory pointer. +**************************************************************************************************** +*/ +VOID* Object::operator new( + size_t objSize, ///< [in] Size to allocate + VOID* pMem) ///< [in] Pre-allocated pointer +{ + return pMem; +} + +/** +**************************************************************************************************** +* Object::operator delete +* +* @brief +* Frees Object object memory. +**************************************************************************************************** +*/ +VOID Object::operator delete( + VOID* pObjMem) ///< [in] User virtual address to free. +{ + Object* pObj = static_cast(pObjMem); + ClientFree(pObjMem, &pObj->m_client); +} + +/** +**************************************************************************************************** +* Object::DebugPrint +* +* @brief +* Print debug message +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID Object::DebugPrint( + const CHAR* pDebugString, ///< [in] Debug string + ... + ) const +{ +#if DEBUG + if (m_client.callbacks.debugPrint != NULL) + { + ADDR_DEBUGPRINT_INPUT debugPrintInput = {0}; + + debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT); + debugPrintInput.pDebugString = const_cast(pDebugString); + debugPrintInput.hClient = m_client.handle; + va_start(debugPrintInput.ap, pDebugString); + + m_client.callbacks.debugPrint(&debugPrintInput); + + va_end(debugPrintInput.ap); + } +#endif +} + +} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/addrobject.h mesa-19.0.1/src/amd/addrlib/src/core/addrobject.h --- mesa-18.3.3/src/amd/addrlib/src/core/addrobject.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/addrobject.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,95 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file addrobject.h +* @brief Contains the Object base class definition. +**************************************************************************************************** +*/ + +#ifndef __ADDR_OBJECT_H__ +#define __ADDR_OBJECT_H__ + +#include "addrtypes.h" +#include "addrcommon.h" + +namespace Addr +{ + +/** +**************************************************************************************************** +* @brief This structure contains client specific data +**************************************************************************************************** +*/ +struct Client +{ + ADDR_CLIENT_HANDLE handle; + ADDR_CALLBACKS callbacks; +}; +/** +**************************************************************************************************** +* @brief This class is the base class for all ADDR class objects. +**************************************************************************************************** +*/ +class Object +{ +public: + Object(); + Object(const Client* pClient); + virtual ~Object(); + + VOID* operator new(size_t size, VOID* pMem); + VOID operator delete(VOID* pObj); + /// Microsoft compiler requires a matching delete implementation, which seems to be called when + /// bad_alloc is thrown. But currently C++ exception isn't allowed so a dummy implementation is + /// added to eliminate the warning. + VOID operator delete(VOID* pObj, VOID* pMem) { ADDR_ASSERT_ALWAYS(); } + + VOID* Alloc(size_t size) const; + VOID Free(VOID* pObj) const; + + VOID DebugPrint(const CHAR* pDebugString, ...) const; + + const Client* GetClient() const {return &m_client;} + +protected: + Client m_client; + + static VOID* ClientAlloc(size_t size, const Client* pClient); + static VOID ClientFree(VOID* pObj, const Client* pClient); + +private: + // disallow the copy constructor + Object(const Object& a); + + // disallow the assignment operator + Object& operator=(const Object& a); +}; + +} // Addr +#endif + diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/coord.cpp mesa-19.0.1/src/amd/addrlib/src/core/coord.cpp --- mesa-18.3.3/src/amd/addrlib/src/core/coord.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/coord.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,714 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +// Coordinate class implementation +#include "addrcommon.h" +#include "coord.h" + +namespace Addr +{ +namespace V2 +{ + +Coordinate::Coordinate() +{ + dim = 'x'; + ord = 0; +} + +Coordinate::Coordinate(INT_8 c, INT_32 n) +{ + set(c, n); +} + +VOID Coordinate::set(INT_8 c, INT_32 n) +{ + dim = c; + ord = static_cast(n); +} + +UINT_32 Coordinate::ison(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const +{ + UINT_32 bit = static_cast(1ull << static_cast(ord)); + UINT_32 out = 0; + + switch (dim) + { + case 'm': out = m & bit; break; + case 's': out = s & bit; break; + case 'x': out = x & bit; break; + case 'y': out = y & bit; break; + case 'z': out = z & bit; break; + } + return (out != 0) ? 1 : 0; +} + +INT_8 Coordinate::getdim() +{ + return dim; +} + +INT_8 Coordinate::getord() +{ + return ord; +} + +BOOL_32 Coordinate::operator==(const Coordinate& b) +{ + return (dim == b.dim) && (ord == b.ord); +} + +BOOL_32 Coordinate::operator<(const Coordinate& b) +{ + BOOL_32 ret; + + if (dim == b.dim) + { + ret = ord < b.ord; + } + else + { + if (dim == 's' || b.dim == 'm') + { + ret = TRUE; + } + else if (b.dim == 's' || dim == 'm') + { + ret = FALSE; + } + else if (ord == b.ord) + { + ret = dim < b.dim; + } + else + { + ret = ord < b.ord; + } + } + + return ret; +} + +BOOL_32 Coordinate::operator>(const Coordinate& b) +{ + BOOL_32 lt = *this < b; + BOOL_32 eq = *this == b; + return !lt && !eq; +} + +BOOL_32 Coordinate::operator<=(const Coordinate& b) +{ + return (*this < b) || (*this == b); +} + +BOOL_32 Coordinate::operator>=(const Coordinate& b) +{ + return !(*this < b); +} + +BOOL_32 Coordinate::operator!=(const Coordinate& b) +{ + return !(*this == b); +} + +Coordinate& Coordinate::operator++(INT_32) +{ + ord++; + return *this; +} + +// CoordTerm + +CoordTerm::CoordTerm() +{ + num_coords = 0; +} + +VOID CoordTerm::Clear() +{ + num_coords = 0; +} + +VOID CoordTerm::add(Coordinate& co) +{ + // This function adds a coordinate INT_32o the list + // It will prevent the same coordinate from appearing, + // and will keep the list ordered from smallest to largest + UINT_32 i; + + for (i = 0; i < num_coords; i++) + { + if (m_coord[i] == co) + { + break; + } + if (m_coord[i] > co) + { + for (UINT_32 j = num_coords; j > i; j--) + { + m_coord[j] = m_coord[j - 1]; + } + m_coord[i] = co; + num_coords++; + break; + } + } + + if (i == num_coords) + { + m_coord[num_coords] = co; + num_coords++; + } +} + +VOID CoordTerm::add(CoordTerm& cl) +{ + for (UINT_32 i = 0; i < cl.num_coords; i++) + { + add(cl.m_coord[i]); + } +} + +BOOL_32 CoordTerm::remove(Coordinate& co) +{ + BOOL_32 remove = FALSE; + for (UINT_32 i = 0; i < num_coords; i++) + { + if (m_coord[i] == co) + { + remove = TRUE; + num_coords--; + } + + if (remove) + { + m_coord[i] = m_coord[i + 1]; + } + } + return remove; +} + +BOOL_32 CoordTerm::Exists(Coordinate& co) +{ + BOOL_32 exists = FALSE; + for (UINT_32 i = 0; i < num_coords; i++) + { + if (m_coord[i] == co) + { + exists = TRUE; + break; + } + } + return exists; +} + +VOID CoordTerm::copyto(CoordTerm& cl) +{ + cl.num_coords = num_coords; + for (UINT_32 i = 0; i < num_coords; i++) + { + cl.m_coord[i] = m_coord[i]; + } +} + +UINT_32 CoordTerm::getsize() +{ + return num_coords; +} + +UINT_32 CoordTerm::getxor(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const +{ + UINT_32 out = 0; + for (UINT_32 i = 0; i < num_coords; i++) + { + out = out ^ m_coord[i].ison(x, y, z, s, m); + } + return out; +} + +VOID CoordTerm::getsmallest(Coordinate& co) +{ + co = m_coord[0]; +} + +UINT_32 CoordTerm::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis) +{ + for (UINT_32 i = start; i < num_coords;) + { + if (((f == '<' && m_coord[i] < co) || + (f == '>' && m_coord[i] > co) || + (f == '=' && m_coord[i] == co)) && + (axis == '\0' || axis == m_coord[i].getdim())) + { + for (UINT_32 j = i; j < num_coords - 1; j++) + { + m_coord[j] = m_coord[j + 1]; + } + num_coords--; + } + else + { + i++; + } + } + return num_coords; +} + +Coordinate& CoordTerm::operator[](UINT_32 i) +{ + return m_coord[i]; +} + +BOOL_32 CoordTerm::operator==(const CoordTerm& b) +{ + BOOL_32 ret = TRUE; + + if (num_coords != b.num_coords) + { + ret = FALSE; + } + else + { + for (UINT_32 i = 0; i < num_coords; i++) + { + // Note: the lists will always be in order, so we can compare the two lists at time + if (m_coord[i] != b.m_coord[i]) + { + ret = FALSE; + break; + } + } + } + return ret; +} + +BOOL_32 CoordTerm::operator!=(const CoordTerm& b) +{ + return !(*this == b); +} + +BOOL_32 CoordTerm::exceedRange(UINT_32 xRange, UINT_32 yRange, UINT_32 zRange, UINT_32 sRange) +{ + BOOL_32 exceed = FALSE; + for (UINT_32 i = 0; (i < num_coords) && (exceed == FALSE); i++) + { + UINT_32 subject; + switch (m_coord[i].getdim()) + { + case 'x': + subject = xRange; + break; + case 'y': + subject = yRange; + break; + case 'z': + subject = zRange; + break; + case 's': + subject = sRange; + break; + case 'm': + subject = 0; + break; + default: + // Invalid input! + ADDR_ASSERT_ALWAYS(); + subject = 0; + break; + } + + exceed = ((1u << m_coord[i].getord()) <= subject); + } + + return exceed; +} + +// coordeq +CoordEq::CoordEq() +{ + m_numBits = 0; +} + +VOID CoordEq::remove(Coordinate& co) +{ + for (UINT_32 i = 0; i < m_numBits; i++) + { + m_eq[i].remove(co); + } +} + +BOOL_32 CoordEq::Exists(Coordinate& co) +{ + BOOL_32 exists = FALSE; + + for (UINT_32 i = 0; i < m_numBits; i++) + { + if (m_eq[i].Exists(co)) + { + exists = TRUE; + } + } + return exists; +} + +VOID CoordEq::resize(UINT_32 n) +{ + if (n > m_numBits) + { + for (UINT_32 i = m_numBits; i < n; i++) + { + m_eq[i].Clear(); + } + } + m_numBits = n; +} + +UINT_32 CoordEq::getsize() +{ + return m_numBits; +} + +UINT_64 CoordEq::solve(UINT_32 x, UINT_32 y, UINT_32 z, UINT_32 s, UINT_32 m) const +{ + UINT_64 out = 0; + for (UINT_32 i = 0; i < m_numBits; i++) + { + if (m_eq[i].getxor(x, y, z, s, m) != 0) + { + out |= (1ULL << i); + } + } + return out; +} + +VOID CoordEq::solveAddr( + UINT_64 addr, UINT_32 sliceInM, + UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const +{ + UINT_32 xBitsValid = 0; + UINT_32 yBitsValid = 0; + UINT_32 zBitsValid = 0; + UINT_32 sBitsValid = 0; + UINT_32 mBitsValid = 0; + + CoordEq temp = *this; + + x = y = z = s = m = 0; + + UINT_32 bitsLeft = 0; + + for (UINT_32 i = 0; i < temp.m_numBits; i++) + { + UINT_32 termSize = temp.m_eq[i].getsize(); + + if (termSize == 1) + { + INT_8 bit = (addr >> i) & 1; + INT_8 dim = temp.m_eq[i][0].getdim(); + INT_8 ord = temp.m_eq[i][0].getord(); + + ADDR_ASSERT((ord < 32) || (bit == 0)); + + switch (dim) + { + case 'x': + xBitsValid |= (1 << ord); + x |= (bit << ord); + break; + case 'y': + yBitsValid |= (1 << ord); + y |= (bit << ord); + break; + case 'z': + zBitsValid |= (1 << ord); + z |= (bit << ord); + break; + case 's': + sBitsValid |= (1 << ord); + s |= (bit << ord); + break; + case 'm': + mBitsValid |= (1 << ord); + m |= (bit << ord); + break; + default: + break; + } + + temp.m_eq[i].Clear(); + } + else if (termSize > 1) + { + bitsLeft++; + } + } + + if (bitsLeft > 0) + { + if (sliceInM != 0) + { + z = m / sliceInM; + zBitsValid = 0xffffffff; + } + + do + { + bitsLeft = 0; + + for (UINT_32 i = 0; i < temp.m_numBits; i++) + { + UINT_32 termSize = temp.m_eq[i].getsize(); + + if (termSize == 1) + { + INT_8 bit = (addr >> i) & 1; + INT_8 dim = temp.m_eq[i][0].getdim(); + INT_8 ord = temp.m_eq[i][0].getord(); + + ADDR_ASSERT((ord < 32) || (bit == 0)); + + switch (dim) + { + case 'x': + xBitsValid |= (1 << ord); + x |= (bit << ord); + break; + case 'y': + yBitsValid |= (1 << ord); + y |= (bit << ord); + break; + case 'z': + zBitsValid |= (1 << ord); + z |= (bit << ord); + break; + case 's': + ADDR_ASSERT_ALWAYS(); + break; + case 'm': + ADDR_ASSERT_ALWAYS(); + break; + default: + break; + } + + temp.m_eq[i].Clear(); + } + else if (termSize > 1) + { + CoordTerm tmpTerm = temp.m_eq[i]; + + for (UINT_32 j = 0; j < termSize; j++) + { + INT_8 dim = temp.m_eq[i][j].getdim(); + INT_8 ord = temp.m_eq[i][j].getord(); + + switch (dim) + { + case 'x': + if (xBitsValid & (1 << ord)) + { + UINT_32 v = (((x >> ord) & 1) << i); + addr ^= static_cast(v); + tmpTerm.remove(temp.m_eq[i][j]); + } + break; + case 'y': + if (yBitsValid & (1 << ord)) + { + UINT_32 v = (((y >> ord) & 1) << i); + addr ^= static_cast(v); + tmpTerm.remove(temp.m_eq[i][j]); + } + break; + case 'z': + if (zBitsValid & (1 << ord)) + { + UINT_32 v = (((z >> ord) & 1) << i); + addr ^= static_cast(v); + tmpTerm.remove(temp.m_eq[i][j]); + } + break; + case 's': + ADDR_ASSERT_ALWAYS(); + break; + case 'm': + ADDR_ASSERT_ALWAYS(); + break; + default: + break; + } + } + + temp.m_eq[i] = tmpTerm; + + bitsLeft++; + } + } + } while (bitsLeft > 0); + } +} + +VOID CoordEq::copy(CoordEq& o, UINT_32 start, UINT_32 num) +{ + o.m_numBits = (num == 0xFFFFFFFF) ? m_numBits : num; + for (UINT_32 i = 0; i < o.m_numBits; i++) + { + m_eq[start + i].copyto(o.m_eq[i]); + } +} + +VOID CoordEq::reverse(UINT_32 start, UINT_32 num) +{ + UINT_32 n = (num == 0xFFFFFFFF) ? m_numBits : num; + + for (UINT_32 i = 0; i < n / 2; i++) + { + CoordTerm temp; + m_eq[start + i].copyto(temp); + m_eq[start + n - 1 - i].copyto(m_eq[start + i]); + temp.copyto(m_eq[start + n - 1 - i]); + } +} + +VOID CoordEq::xorin(CoordEq& x, UINT_32 start) +{ + UINT_32 n = ((m_numBits - start) < x.m_numBits) ? (m_numBits - start) : x.m_numBits; + for (UINT_32 i = 0; i < n; i++) + { + m_eq[start + i].add(x.m_eq[i]); + } +} + +UINT_32 CoordEq::Filter(INT_8 f, Coordinate& co, UINT_32 start, INT_8 axis) +{ + for (UINT_32 i = start; i < m_numBits;) + { + UINT_32 m = m_eq[i].Filter(f, co, 0, axis); + if (m == 0) + { + for (UINT_32 j = i; j < m_numBits - 1; j++) + { + m_eq[j] = m_eq[j + 1]; + } + m_numBits--; + } + else + { + i++; + } + } + return m_numBits; +} + +VOID CoordEq::shift(INT_32 amount, INT_32 start) +{ + if (amount != 0) + { + INT_32 numBits = static_cast(m_numBits); + amount = -amount; + INT_32 inc = (amount < 0) ? -1 : 1; + INT_32 i = (amount < 0) ? numBits - 1 : start; + INT_32 end = (amount < 0) ? start - 1 : numBits; + for (; (inc > 0) ? i < end : i > end; i += inc) + { + if ((i + amount < start) || (i + amount >= numBits)) + { + m_eq[i].Clear(); + } + else + { + m_eq[i + amount].copyto(m_eq[i]); + } + } + } +} + +CoordTerm& CoordEq::operator[](UINT_32 i) +{ + return m_eq[i]; +} + +VOID CoordEq::mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start, UINT_32 end) +{ + if (end == 0) + { + ADDR_ASSERT(m_numBits > 0); + end = m_numBits - 1; + } + for (UINT_32 i = start; i <= end; i++) + { + UINT_32 select = (i - start) % 2; + Coordinate& c = (select == 0) ? c0 : c1; + m_eq[i].add(c); + c++; + } +} + +VOID CoordEq::mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start, UINT_32 end) +{ + if (end == 0) + { + ADDR_ASSERT(m_numBits > 0); + end = m_numBits - 1; + } + for (UINT_32 i = start; i <= end; i++) + { + UINT_32 select = (i - start) % 3; + Coordinate& c = (select == 0) ? c0 : ((select == 1) ? c1 : c2); + m_eq[i].add(c); + c++; + } +} + +BOOL_32 CoordEq::operator==(const CoordEq& b) +{ + BOOL_32 ret = TRUE; + + if (m_numBits != b.m_numBits) + { + ret = FALSE; + } + else + { + for (UINT_32 i = 0; i < m_numBits; i++) + { + if (m_eq[i] != b.m_eq[i]) + { + ret = FALSE; + break; + } + } + } + return ret; +} + +BOOL_32 CoordEq::operator!=(const CoordEq& b) +{ + return !(*this == b); +} + +} // V2 +} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/src/core/coord.h mesa-19.0.1/src/amd/addrlib/src/core/coord.h --- mesa-18.3.3/src/amd/addrlib/src/core/coord.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/core/coord.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,122 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +// Class used to define a coordinate bit + +#ifndef __COORD_H +#define __COORD_H + +namespace Addr +{ +namespace V2 +{ + +class Coordinate +{ +public: + Coordinate(); + Coordinate(INT_8 c, INT_32 n); + + VOID set(INT_8 c, INT_32 n); + UINT_32 ison(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const; + INT_8 getdim(); + INT_8 getord(); + + BOOL_32 operator==(const Coordinate& b); + BOOL_32 operator<(const Coordinate& b); + BOOL_32 operator>(const Coordinate& b); + BOOL_32 operator<=(const Coordinate& b); + BOOL_32 operator>=(const Coordinate& b); + BOOL_32 operator!=(const Coordinate& b); + Coordinate& operator++(INT_32); + +private: + INT_8 dim; + INT_8 ord; +}; + +class CoordTerm +{ +public: + CoordTerm(); + VOID Clear(); + VOID add(Coordinate& co); + VOID add(CoordTerm& cl); + BOOL_32 remove(Coordinate& co); + BOOL_32 Exists(Coordinate& co); + VOID copyto(CoordTerm& cl); + UINT_32 getsize(); + UINT_32 getxor(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const; + + VOID getsmallest(Coordinate& co); + UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0'); + Coordinate& operator[](UINT_32 i); + BOOL_32 operator==(const CoordTerm& b); + BOOL_32 operator!=(const CoordTerm& b); + BOOL_32 exceedRange(UINT_32 xRange, UINT_32 yRange = 0, UINT_32 zRange = 0, UINT_32 sRange = 0); + +private: + static const UINT_32 MaxCoords = 8; + UINT_32 num_coords; + Coordinate m_coord[MaxCoords]; +}; + +class CoordEq +{ +public: + CoordEq(); + VOID remove(Coordinate& co); + BOOL_32 Exists(Coordinate& co); + VOID resize(UINT_32 n); + UINT_32 getsize(); + virtual UINT_64 solve(UINT_32 x, UINT_32 y, UINT_32 z = 0, UINT_32 s = 0, UINT_32 m = 0) const; + virtual VOID solveAddr(UINT_64 addr, UINT_32 sliceInM, + UINT_32& x, UINT_32& y, UINT_32& z, UINT_32& s, UINT_32& m) const; + + VOID copy(CoordEq& o, UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF); + VOID reverse(UINT_32 start = 0, UINT_32 num = 0xFFFFFFFF); + VOID xorin(CoordEq& x, UINT_32 start = 0); + UINT_32 Filter(INT_8 f, Coordinate& co, UINT_32 start = 0, INT_8 axis = '\0'); + VOID shift(INT_32 amount, INT_32 start = 0); + virtual CoordTerm& operator[](UINT_32 i); + VOID mort2d(Coordinate& c0, Coordinate& c1, UINT_32 start = 0, UINT_32 end = 0); + VOID mort3d(Coordinate& c0, Coordinate& c1, Coordinate& c2, UINT_32 start = 0, UINT_32 end = 0); + + BOOL_32 operator==(const CoordEq& b); + BOOL_32 operator!=(const CoordEq& b); + +private: + static const UINT_32 MaxEqBits = 64; + UINT_32 m_numBits; + + CoordTerm m_eq[MaxEqBits]; +}; + +} // V2 +} // Addr + +#endif + diff -Nru mesa-18.3.3/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp mesa-19.0.1/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp --- mesa-18.3.3/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/gfx9/gfx9addrlib.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,4871 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +************************************************************************************************************************ +* @file gfx9addrlib.cpp +* @brief Contgfx9ns the implementation for the Gfx9Lib class. +************************************************************************************************************************ +*/ + +#include "gfx9addrlib.h" + +#include "gfx9_gb_reg.h" + +#include "amdgpu_asic_addr.h" + +#include "util/macros.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace Addr +{ + +/** +************************************************************************************************************************ +* Gfx9HwlInit +* +* @brief +* Creates an Gfx9Lib object. +* +* @return +* Returns an Gfx9Lib object pointer. +************************************************************************************************************************ +*/ +Addr::Lib* Gfx9HwlInit(const Client* pClient) +{ + return V2::Gfx9Lib::CreateObj(pClient); +} + +namespace V2 +{ + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Static Const Member +//////////////////////////////////////////////////////////////////////////////////////////////////// + +const SwizzleModeFlags Gfx9Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] = +{//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR + {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S + {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_256B_D + {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_256B_R + + {0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_Z + {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S + {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_4KB_D + {0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_R + + {0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_Z + {0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S + {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_64KB_D + {0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_R + + {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_Z + {0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_VAR_S + {0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_SW_VAR_D + {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_R + + {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_Z_T + {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0}, // ADDR_SW_64KB_S_T + {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0}, // ADDR_SW_64KB_D_T + {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0}, // ADDR_SW_64KB_R_T + + {0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_Z_x + {0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_4KB_S_x + {0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_4KB_D_x + {0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_4KB_R_x + + {0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_Z_X + {0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_64KB_S_X + {0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_64KB_D_X + {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_R_X + + {0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_Z_X + {0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0}, // ADDR_SW_VAR_S_X + {0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_SW_VAR_D_X + {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, // ADDR_SW_VAR_R_X + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL +}; + +const UINT_32 Gfx9Lib::MipTailOffset256B[] = {2048, 1024, 512, 256, 128, 64, 32, 16, + 8, 6, 5, 4, 3, 2, 1, 0}; + +const Dim3d Gfx9Lib::Block256_3dS[] = {{16, 4, 4}, {8, 4, 4}, {4, 4, 4}, {2, 4, 4}, {1, 4, 4}}; + +const Dim3d Gfx9Lib::Block256_3dZ[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}}; + +/** +************************************************************************************************************************ +* Gfx9Lib::Gfx9Lib +* +* @brief +* Constructor +* +************************************************************************************************************************ +*/ +Gfx9Lib::Gfx9Lib(const Client* pClient) + : + Lib(pClient), + m_numEquations(0) +{ + m_class = AI_ADDRLIB; + memset(&m_settings, 0, sizeof(m_settings)); + memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable)); +} + +/** +************************************************************************************************************************ +* Gfx9Lib::~Gfx9Lib +* +* @brief +* Destructor +************************************************************************************************************************ +*/ +Gfx9Lib::~Gfx9Lib() +{ +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeHtileInfo +* +* @brief +* Interface function stub of AddrComputeHtilenfo +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileInfo( + const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->hTileFlags.pipeAligned, + pIn->swizzleMode); + + UINT_32 numRbTotal = pIn->hTileFlags.rbAligned ? m_se * m_rbPerSe : 1; + + UINT_32 numCompressBlkPerMetaBlk, numCompressBlkPerMetaBlkLog2; + + if ((numPipeTotal == 1) && (numRbTotal == 1)) + { + numCompressBlkPerMetaBlkLog2 = 10; + } + else + { + if (m_settings.applyAliasFix) + { + numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2); + } + else + { + numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; + } + } + + numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; + + Dim3d metaBlkDim = {8, 8, 1}; + UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2; + UINT_32 widthAmp = (pIn->numMipLevels > 1) ? (totalAmpBits >> 1) : RoundHalf(totalAmpBits); + UINT_32 heightAmp = totalAmpBits - widthAmp; + metaBlkDim.w <<= widthAmp; + metaBlkDim.h <<= heightAmp; + +#if DEBUG + Dim3d metaBlkDimDbg = {8, 8, 1}; + for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++) + { + if ((metaBlkDimDbg.h < metaBlkDimDbg.w) || + ((pIn->numMipLevels > 1) && (metaBlkDimDbg.h == metaBlkDimDbg.w))) + { + metaBlkDimDbg.h <<= 1; + } + else + { + metaBlkDimDbg.w <<= 1; + } + } + ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h)); +#endif + + UINT_32 numMetaBlkX; + UINT_32 numMetaBlkY; + UINT_32 numMetaBlkZ; + + GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, FALSE, pOut->pMipInfo, + pIn->unalignedWidth, pIn->unalignedHeight, pIn->numSlices, + &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); + + const UINT_32 metaBlkSize = numCompressBlkPerMetaBlk << 2; + UINT_32 align = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + + if ((IsXor(pIn->swizzleMode) == FALSE) && (numPipeTotal > 2)) + { + align *= (numPipeTotal >> 1); + } + + align = Max(align, metaBlkSize); + + if (m_settings.metaBaseAlignFix) + { + align = Max(align, GetBlockSize(pIn->swizzleMode)); + } + + if (m_settings.htileAlignFix) + { + const INT_32 metaBlkSizeLog2 = numCompressBlkPerMetaBlkLog2 + 2; + const INT_32 htileCachelineSizeLog2 = 11; + const INT_32 maxNumOfRbMaskBits = 1 + Log2(numPipeTotal) + Log2(numRbTotal); + + INT_32 rbMaskPadding = Max(0, htileCachelineSizeLog2 - (metaBlkSizeLog2 - maxNumOfRbMaskBits)); + + align <<= rbMaskPadding; + } + + pOut->pitch = numMetaBlkX * metaBlkDim.w; + pOut->height = numMetaBlkY * metaBlkDim.h; + pOut->sliceSize = numMetaBlkX * numMetaBlkY * metaBlkSize; + + pOut->metaBlkWidth = metaBlkDim.w; + pOut->metaBlkHeight = metaBlkDim.h; + pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; + + pOut->baseAlign = align; + pOut->htileBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, align); + + return ADDR_OK; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeCmaskInfo +* +* @brief +* Interface function stub of AddrComputeCmaskInfo +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskInfo( + const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ +// TODO: Clarify with AddrLib team +// ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D); + + UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pIn->cMaskFlags.pipeAligned, + pIn->swizzleMode); + + UINT_32 numRbTotal = pIn->cMaskFlags.rbAligned ? m_se * m_rbPerSe : 1; + + UINT_32 numCompressBlkPerMetaBlkLog2, numCompressBlkPerMetaBlk; + + if ((numPipeTotal == 1) && (numRbTotal == 1)) + { + numCompressBlkPerMetaBlkLog2 = 13; + } + else + { + if (m_settings.applyAliasFix) + { + numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + Max(10u, m_pipeInterleaveLog2); + } + else + { + numCompressBlkPerMetaBlkLog2 = m_seLog2 + m_rbPerSeLog2 + 10; + } + + numCompressBlkPerMetaBlkLog2 = Max(numCompressBlkPerMetaBlkLog2, 13u); + } + + numCompressBlkPerMetaBlk = 1 << numCompressBlkPerMetaBlkLog2; + + Dim2d metaBlkDim = {8, 8}; + UINT_32 totalAmpBits = numCompressBlkPerMetaBlkLog2; + UINT_32 heightAmp = totalAmpBits >> 1; + UINT_32 widthAmp = totalAmpBits - heightAmp; + metaBlkDim.w <<= widthAmp; + metaBlkDim.h <<= heightAmp; + +#if DEBUG + Dim2d metaBlkDimDbg = {8, 8}; + for (UINT_32 index = 0; index < numCompressBlkPerMetaBlkLog2; index++) + { + if (metaBlkDimDbg.h < metaBlkDimDbg.w) + { + metaBlkDimDbg.h <<= 1; + } + else + { + metaBlkDimDbg.w <<= 1; + } + } + ADDR_ASSERT((metaBlkDimDbg.w == metaBlkDim.w) && (metaBlkDimDbg.h == metaBlkDim.h)); +#endif + + UINT_32 numMetaBlkX = (pIn->unalignedWidth + metaBlkDim.w - 1) / metaBlkDim.w; + UINT_32 numMetaBlkY = (pIn->unalignedHeight + metaBlkDim.h - 1) / metaBlkDim.h; + UINT_32 numMetaBlkZ = Max(pIn->numSlices, 1u); + + UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + + if (m_settings.metaBaseAlignFix) + { + sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); + } + + pOut->pitch = numMetaBlkX * metaBlkDim.w; + pOut->height = numMetaBlkY * metaBlkDim.h; + pOut->sliceSize = (numMetaBlkX * numMetaBlkY * numCompressBlkPerMetaBlk) >> 1; + pOut->cmaskBytes = PowTwoAlign(pOut->sliceSize * numMetaBlkZ, sizeAlign); + pOut->baseAlign = Max(numCompressBlkPerMetaBlk >> 1, sizeAlign); + + pOut->metaBlkWidth = metaBlkDim.w; + pOut->metaBlkHeight = metaBlkDim.h; + + pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; + + return ADDR_OK; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::GetMetaMipInfo +* +* @brief +* Get meta mip info +* +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Gfx9Lib::GetMetaMipInfo( + UINT_32 numMipLevels, ///< [in] number of mip levels + Dim3d* pMetaBlkDim, ///< [in] meta block dimension + BOOL_32 dataThick, ///< [in] data surface is thick + ADDR2_META_MIP_INFO* pInfo, ///< [out] meta mip info + UINT_32 mip0Width, ///< [in] mip0 width + UINT_32 mip0Height, ///< [in] mip0 height + UINT_32 mip0Depth, ///< [in] mip0 depth + UINT_32* pNumMetaBlkX, ///< [out] number of metablock X in mipchain + UINT_32* pNumMetaBlkY, ///< [out] number of metablock Y in mipchain + UINT_32* pNumMetaBlkZ) ///< [out] number of metablock Z in mipchain + const +{ + UINT_32 numMetaBlkX = (mip0Width + pMetaBlkDim->w - 1) / pMetaBlkDim->w; + UINT_32 numMetaBlkY = (mip0Height + pMetaBlkDim->h - 1) / pMetaBlkDim->h; + UINT_32 numMetaBlkZ = (mip0Depth + pMetaBlkDim->d - 1) / pMetaBlkDim->d; + UINT_32 tailWidth = pMetaBlkDim->w; + UINT_32 tailHeight = pMetaBlkDim->h >> 1; + UINT_32 tailDepth = pMetaBlkDim->d; + BOOL_32 inTail = FALSE; + AddrMajorMode major = ADDR_MAJOR_MAX_TYPE; + + if (numMipLevels > 1) + { + if (dataThick && (numMetaBlkZ > numMetaBlkX) && (numMetaBlkZ > numMetaBlkY)) + { + // Z major + major = ADDR_MAJOR_Z; + } + else if (numMetaBlkX >= numMetaBlkY) + { + // X major + major = ADDR_MAJOR_X; + } + else + { + // Y major + major = ADDR_MAJOR_Y; + } + + inTail = ((mip0Width <= tailWidth) && + (mip0Height <= tailHeight) && + ((dataThick == FALSE) || (mip0Depth <= tailDepth))); + + if (inTail == FALSE) + { + UINT_32 orderLimit; + UINT_32 *pMipDim; + UINT_32 *pOrderDim; + + if (major == ADDR_MAJOR_Z) + { + // Z major + pMipDim = &numMetaBlkY; + pOrderDim = &numMetaBlkZ; + orderLimit = 4; + } + else if (major == ADDR_MAJOR_X) + { + // X major + pMipDim = &numMetaBlkY; + pOrderDim = &numMetaBlkX; + orderLimit = 4; + } + else + { + // Y major + pMipDim = &numMetaBlkX; + pOrderDim = &numMetaBlkY; + orderLimit = 2; + } + + if ((*pMipDim < 3) && (*pOrderDim > orderLimit) && (numMipLevels > 3)) + { + *pMipDim += 2; + } + else + { + *pMipDim += ((*pMipDim / 2) + (*pMipDim & 1)); + } + } + } + + if (pInfo != NULL) + { + UINT_32 mipWidth = mip0Width; + UINT_32 mipHeight = mip0Height; + UINT_32 mipDepth = mip0Depth; + Dim3d mipCoord = {0}; + + for (UINT_32 mip = 0; mip < numMipLevels; mip++) + { + if (inTail) + { + GetMetaMiptailInfo(&pInfo[mip], mipCoord, numMipLevels - mip, + pMetaBlkDim); + break; + } + else + { + mipWidth = PowTwoAlign(mipWidth, pMetaBlkDim->w); + mipHeight = PowTwoAlign(mipHeight, pMetaBlkDim->h); + mipDepth = PowTwoAlign(mipDepth, pMetaBlkDim->d); + + pInfo[mip].inMiptail = FALSE; + pInfo[mip].startX = mipCoord.w; + pInfo[mip].startY = mipCoord.h; + pInfo[mip].startZ = mipCoord.d; + pInfo[mip].width = mipWidth; + pInfo[mip].height = mipHeight; + pInfo[mip].depth = dataThick ? mipDepth : 1; + + if ((mip >= 3) || (mip & 1)) + { + switch (major) + { + case ADDR_MAJOR_X: + mipCoord.w += mipWidth; + break; + case ADDR_MAJOR_Y: + mipCoord.h += mipHeight; + break; + case ADDR_MAJOR_Z: + mipCoord.d += mipDepth; + break; + default: + break; + } + } + else + { + switch (major) + { + case ADDR_MAJOR_X: + mipCoord.h += mipHeight; + break; + case ADDR_MAJOR_Y: + mipCoord.w += mipWidth; + break; + case ADDR_MAJOR_Z: + mipCoord.h += mipHeight; + break; + default: + break; + } + } + + mipWidth = Max(mipWidth >> 1, 1u); + mipHeight = Max(mipHeight >> 1, 1u); + mipDepth = Max(mipDepth >> 1, 1u); + + inTail = ((mipWidth <= tailWidth) && + (mipHeight <= tailHeight) && + ((dataThick == FALSE) || (mipDepth <= tailDepth))); + } + } + } + + *pNumMetaBlkX = numMetaBlkX; + *pNumMetaBlkY = numMetaBlkY; + *pNumMetaBlkZ = numMetaBlkZ; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeDccInfo +* +* @brief +* Interface function to compute DCC key info +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccInfo( + const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + BOOL_32 dataLinear = IsLinear(pIn->swizzleMode); + BOOL_32 metaLinear = pIn->dccKeyFlags.linear; + BOOL_32 pipeAligned = pIn->dccKeyFlags.pipeAligned; + + if (dataLinear) + { + metaLinear = TRUE; + } + else if (metaLinear == TRUE) + { + pipeAligned = FALSE; + } + + UINT_32 numPipeTotal = GetPipeNumForMetaAddressing(pipeAligned, pIn->swizzleMode); + + if (metaLinear) + { + // Linear metadata supporting was removed for GFX9! No one can use this feature on GFX9. + ADDR_ASSERT_ALWAYS(); + + pOut->dccRamBaseAlign = numPipeTotal * m_pipeInterleaveBytes; + pOut->dccRamSize = PowTwoAlign((pIn->dataSurfaceSize / 256), pOut->dccRamBaseAlign); + } + else + { + BOOL_32 dataThick = IsThick(pIn->resourceType, pIn->swizzleMode); + + UINT_32 minMetaBlkSize = dataThick ? 65536 : 4096; + + UINT_32 numFrags = Max(pIn->numFrags, 1u); + UINT_32 numSlices = Max(pIn->numSlices, 1u); + + minMetaBlkSize /= numFrags; + + UINT_32 numCompressBlkPerMetaBlk = minMetaBlkSize; + + UINT_32 numRbTotal = pIn->dccKeyFlags.rbAligned ? m_se * m_rbPerSe : 1; + + if ((numPipeTotal > 1) || (numRbTotal > 1)) + { + const UINT_32 thinBlkSize = 1 << (m_settings.applyAliasFix ? Max(10u, m_pipeInterleaveLog2) : 10); + + numCompressBlkPerMetaBlk = + Max(numCompressBlkPerMetaBlk, m_se * m_rbPerSe * (dataThick ? 262144 : thinBlkSize)); + + if (numCompressBlkPerMetaBlk > 65536 * pIn->bpp) + { + numCompressBlkPerMetaBlk = 65536 * pIn->bpp; + } + } + + Dim3d compressBlkDim = GetDccCompressBlk(pIn->resourceType, pIn->swizzleMode, pIn->bpp); + Dim3d metaBlkDim = compressBlkDim; + + for (UINT_32 index = 1; index < numCompressBlkPerMetaBlk; index <<= 1) + { + if ((metaBlkDim.h < metaBlkDim.w) || + ((pIn->numMipLevels > 1) && (metaBlkDim.h == metaBlkDim.w))) + { + if ((dataThick == FALSE) || (metaBlkDim.h <= metaBlkDim.d)) + { + metaBlkDim.h <<= 1; + } + else + { + metaBlkDim.d <<= 1; + } + } + else + { + if ((dataThick == FALSE) || (metaBlkDim.w <= metaBlkDim.d)) + { + metaBlkDim.w <<= 1; + } + else + { + metaBlkDim.d <<= 1; + } + } + } + + UINT_32 numMetaBlkX; + UINT_32 numMetaBlkY; + UINT_32 numMetaBlkZ; + + GetMetaMipInfo(pIn->numMipLevels, &metaBlkDim, dataThick, pOut->pMipInfo, + pIn->unalignedWidth, pIn->unalignedHeight, numSlices, + &numMetaBlkX, &numMetaBlkY, &numMetaBlkZ); + + UINT_32 sizeAlign = numPipeTotal * numRbTotal * m_pipeInterleaveBytes; + + if (numFrags > m_maxCompFrag) + { + sizeAlign *= (numFrags / m_maxCompFrag); + } + + if (m_settings.metaBaseAlignFix) + { + sizeAlign = Max(sizeAlign, GetBlockSize(pIn->swizzleMode)); + } + + pOut->dccRamSize = numMetaBlkX * numMetaBlkY * numMetaBlkZ * + numCompressBlkPerMetaBlk * numFrags; + pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, sizeAlign); + pOut->dccRamBaseAlign = Max(numCompressBlkPerMetaBlk, sizeAlign); + + pOut->pitch = numMetaBlkX * metaBlkDim.w; + pOut->height = numMetaBlkY * metaBlkDim.h; + pOut->depth = numMetaBlkZ * metaBlkDim.d; + + pOut->compressBlkWidth = compressBlkDim.w; + pOut->compressBlkHeight = compressBlkDim.h; + pOut->compressBlkDepth = compressBlkDim.d; + + pOut->metaBlkWidth = metaBlkDim.w; + pOut->metaBlkHeight = metaBlkDim.h; + pOut->metaBlkDepth = metaBlkDim.d; + + pOut->metaBlkNumPerSlice = numMetaBlkX * numMetaBlkY; + pOut->fastClearSizePerSlice = + pOut->metaBlkNumPerSlice * numCompressBlkPerMetaBlk * Min(numFrags, m_maxCompFrag); + } + + return ADDR_OK; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeMaxBaseAlignments +* +* @brief +* Gets maximum alignments +* @return +* maximum alignments +************************************************************************************************************************ +*/ +UINT_32 Gfx9Lib::HwlComputeMaxBaseAlignments() const +{ + return ComputeSurfaceBaseAlignTiled(ADDR_SW_64KB); +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +************************************************************************************************************************ +*/ +UINT_32 Gfx9Lib::HwlComputeMaxMetaBaseAlignments() const +{ + // Max base alignment for Htile + const UINT_32 maxNumPipeTotal = GetPipeNumForMetaAddressing(TRUE, ADDR_SW_64KB_Z); + const UINT_32 maxNumRbTotal = m_se * m_rbPerSe; + + // If applyAliasFix was set, the extra bits should be MAX(10u, m_pipeInterleaveLog2), + // but we never saw any ASIC whose m_pipeInterleaveLog2 != 8, so just put an assertion and simply the logic. + ADDR_ASSERT((m_settings.applyAliasFix == FALSE) || (m_pipeInterleaveLog2 <= 10u)); + const UINT_32 maxNumCompressBlkPerMetaBlk = 1u << (m_seLog2 + m_rbPerSeLog2 + 10u); + + UINT_32 maxBaseAlignHtile = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes; + + if (maxNumPipeTotal > 2) + { + maxBaseAlignHtile *= (maxNumPipeTotal >> 1); + } + + maxBaseAlignHtile = Max(maxNumCompressBlkPerMetaBlk << 2, maxBaseAlignHtile); + + if (m_settings.metaBaseAlignFix) + { + maxBaseAlignHtile = Max(maxBaseAlignHtile, GetBlockSize(ADDR_SW_64KB)); + } + + if (m_settings.htileAlignFix) + { + maxBaseAlignHtile *= maxNumPipeTotal; + } + + // Max base alignment for Cmask will not be larger than that for Htile, no need to calculate + + // Max base alignment for 2D Dcc will not be larger than that for 3D, no need to calculate + UINT_32 maxBaseAlignDcc3D = 65536; + + if ((maxNumPipeTotal > 1) || (maxNumRbTotal > 1)) + { + maxBaseAlignDcc3D = Min(m_se * m_rbPerSe * 262144, 65536 * 128u); + } + + // Max base alignment for Msaa Dcc + UINT_32 maxBaseAlignDccMsaa = maxNumPipeTotal * maxNumRbTotal * m_pipeInterleaveBytes * (8 / m_maxCompFrag); + + if (m_settings.metaBaseAlignFix) + { + maxBaseAlignDccMsaa = Max(maxBaseAlignDccMsaa, GetBlockSize(ADDR_SW_64KB)); + } + + return Max(maxBaseAlignHtile, Max(maxBaseAlignDccMsaa, maxBaseAlignDcc3D)); +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeCmaskAddrFromCoord +* +* @brief +* Interface function stub of AddrComputeCmaskAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeCmaskAddrFromCoord( + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure +{ + ADDR2_COMPUTE_CMASK_INFO_INPUT input = {0}; + input.size = sizeof(input); + input.cMaskFlags = pIn->cMaskFlags; + input.colorFlags = pIn->colorFlags; + input.unalignedWidth = Max(pIn->unalignedWidth, 1u); + input.unalignedHeight = Max(pIn->unalignedHeight, 1u); + input.numSlices = Max(pIn->numSlices, 1u); + input.swizzleMode = pIn->swizzleMode; + input.resourceType = pIn->resourceType; + + ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {0}; + output.size = sizeof(output); + + ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output); + + if (returnCode == ADDR_OK) + { + UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags); + UINT_32 fmaskElementBytesLog2 = Log2(fmaskBpp >> 3); + UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); + UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); + + MetaEqParams metaEqParams = {0, fmaskElementBytesLog2, 0, pIn->cMaskFlags, + Gfx9DataFmask, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); + + UINT_32 xb = pIn->x / output.metaBlkWidth; + UINT_32 yb = pIn->y / output.metaBlkHeight; + UINT_32 zb = pIn->slice; + + UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; + UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; + UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; + + UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); + + pOut->addr = address >> 1; + pOut->bitPosition = static_cast((address & 1) << 2); + + UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->cMaskFlags.pipeAligned, + pIn->swizzleMode); + + UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); + + pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeHtileAddrFromCoord +* +* @brief +* Interface function stub of AddrComputeHtileAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileAddrFromCoord( + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pIn->numMipLevels > 1) + { + returnCode = ADDR_NOTIMPLEMENTED; + } + else + { + ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0}; + input.size = sizeof(input); + input.hTileFlags = pIn->hTileFlags; + input.depthFlags = pIn->depthflags; + input.swizzleMode = pIn->swizzleMode; + input.unalignedWidth = Max(pIn->unalignedWidth, 1u); + input.unalignedHeight = Max(pIn->unalignedHeight, 1u); + input.numSlices = Max(pIn->numSlices, 1u); + input.numMipLevels = Max(pIn->numMipLevels, 1u); + + ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0}; + output.size = sizeof(output); + + returnCode = ComputeHtileInfo(&input, &output); + + if (returnCode == ADDR_OK) + { + UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); + UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); + UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); + UINT_32 numSamplesLog2 = Log2(pIn->numSamples); + + MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); + + UINT_32 xb = pIn->x / output.metaBlkWidth; + UINT_32 yb = pIn->y / output.metaBlkHeight; + UINT_32 zb = pIn->slice; + + UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; + UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; + UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; + + UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, 0, blockIndex); + + pOut->addr = address >> 1; + + UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, + pIn->swizzleMode); + + UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); + + pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeHtileCoordFromAddr +* +* @brief +* Interface function stub of AddrComputeHtileCoordFromAddr +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeHtileCoordFromAddr( + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (pIn->numMipLevels > 1) + { + returnCode = ADDR_NOTIMPLEMENTED; + } + else + { + ADDR2_COMPUTE_HTILE_INFO_INPUT input = {0}; + input.size = sizeof(input); + input.hTileFlags = pIn->hTileFlags; + input.swizzleMode = pIn->swizzleMode; + input.unalignedWidth = Max(pIn->unalignedWidth, 1u); + input.unalignedHeight = Max(pIn->unalignedHeight, 1u); + input.numSlices = Max(pIn->numSlices, 1u); + input.numMipLevels = Max(pIn->numMipLevels, 1u); + + ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {0}; + output.size = sizeof(output); + + returnCode = ComputeHtileInfo(&input, &output); + + if (returnCode == ADDR_OK) + { + UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); + UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); + UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); + UINT_32 numSamplesLog2 = Log2(pIn->numSamples); + + MetaEqParams metaEqParams = {0, elementBytesLog2, numSamplesLog2, pIn->hTileFlags, + Gfx9DataDepthStencil, pIn->swizzleMode, ADDR_RSRC_TEX_2D, + metaBlkWidthLog2, metaBlkHeightLog2, 0, 3, 3, 0}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); + + UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->hTileFlags.pipeAligned, + pIn->swizzleMode); + + UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); + + UINT_64 nibbleAddress = (pIn->addr ^ (pipeXor << m_pipeInterleaveLog2)) << 1; + + UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; + UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; + + UINT_32 x, y, z, s, m; + pMetaEq->solveAddr(nibbleAddress, sliceSizeInBlock, x, y, z, s, m); + + pOut->slice = m / sliceSizeInBlock; + pOut->y = ((m % sliceSizeInBlock) / pitchInBlock) * output.metaBlkHeight + y; + pOut->x = (m % pitchInBlock) * output.metaBlkWidth + x; + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeDccAddrFromCoord +* +* @brief +* Interface function stub of AddrComputeDccAddrFromCoord +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeDccAddrFromCoord( + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if ((pIn->numMipLevels > 1) || (pIn->mipId > 1) || pIn->dccKeyFlags.linear) + { + returnCode = ADDR_NOTIMPLEMENTED; + } + else + { + ADDR2_COMPUTE_DCCINFO_INPUT input = {0}; + input.size = sizeof(input); + input.dccKeyFlags = pIn->dccKeyFlags; + input.colorFlags = pIn->colorFlags; + input.swizzleMode = pIn->swizzleMode; + input.resourceType = pIn->resourceType; + input.bpp = pIn->bpp; + input.unalignedWidth = Max(pIn->unalignedWidth, 1u); + input.unalignedHeight = Max(pIn->unalignedHeight, 1u); + input.numSlices = Max(pIn->numSlices, 1u); + input.numFrags = Max(pIn->numFrags, 1u); + input.numMipLevels = Max(pIn->numMipLevels, 1u); + + ADDR2_COMPUTE_DCCINFO_OUTPUT output = {0}; + output.size = sizeof(output); + + returnCode = ComputeDccInfo(&input, &output); + + if (returnCode == ADDR_OK) + { + UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); + UINT_32 numSamplesLog2 = Log2(pIn->numFrags); + UINT_32 metaBlkWidthLog2 = Log2(output.metaBlkWidth); + UINT_32 metaBlkHeightLog2 = Log2(output.metaBlkHeight); + UINT_32 metaBlkDepthLog2 = Log2(output.metaBlkDepth); + UINT_32 compBlkWidthLog2 = Log2(output.compressBlkWidth); + UINT_32 compBlkHeightLog2 = Log2(output.compressBlkHeight); + UINT_32 compBlkDepthLog2 = Log2(output.compressBlkDepth); + + MetaEqParams metaEqParams = {pIn->mipId, elementBytesLog2, numSamplesLog2, pIn->dccKeyFlags, + Gfx9DataColor, pIn->swizzleMode, pIn->resourceType, + metaBlkWidthLog2, metaBlkHeightLog2, metaBlkDepthLog2, + compBlkWidthLog2, compBlkHeightLog2, compBlkDepthLog2}; + + const CoordEq* pMetaEq = GetMetaEquation(metaEqParams); + + UINT_32 xb = pIn->x / output.metaBlkWidth; + UINT_32 yb = pIn->y / output.metaBlkHeight; + UINT_32 zb = pIn->slice / output.metaBlkDepth; + + UINT_32 pitchInBlock = output.pitch / output.metaBlkWidth; + UINT_32 sliceSizeInBlock = (output.height / output.metaBlkHeight) * pitchInBlock; + UINT_32 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; + + UINT_64 address = pMetaEq->solve(pIn->x, pIn->y, pIn->slice, pIn->sample, blockIndex); + + pOut->addr = address >> 1; + + UINT_32 numPipeBits = GetPipeLog2ForMetaAddressing(pIn->dccKeyFlags.pipeAligned, + pIn->swizzleMode); + + UINT_64 pipeXor = static_cast(pIn->pipeXor & ((1 << numPipeBits) - 1)); + + pOut->addr ^= (pipeXor << m_pipeInterleaveLog2); + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlInitGlobalParams +* +* @brief +* Initializes global parameters +* +* @return +* TRUE if all settings are valid +* +************************************************************************************************************************ +*/ +BOOL_32 Gfx9Lib::HwlInitGlobalParams( + const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input +{ + BOOL_32 valid = TRUE; + + if (m_settings.isArcticIsland) + { + GB_ADDR_CONFIG gbAddrConfig; + + gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig; + + // These values are copied from CModel code + switch (gbAddrConfig.bits.NUM_PIPES) + { + case ADDR_CONFIG_1_PIPE: + m_pipes = 1; + m_pipesLog2 = 0; + break; + case ADDR_CONFIG_2_PIPE: + m_pipes = 2; + m_pipesLog2 = 1; + break; + case ADDR_CONFIG_4_PIPE: + m_pipes = 4; + m_pipesLog2 = 2; + break; + case ADDR_CONFIG_8_PIPE: + m_pipes = 8; + m_pipesLog2 = 3; + break; + case ADDR_CONFIG_16_PIPE: + m_pipes = 16; + m_pipesLog2 = 4; + break; + case ADDR_CONFIG_32_PIPE: + m_pipes = 32; + m_pipesLog2 = 5; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE) + { + case ADDR_CONFIG_PIPE_INTERLEAVE_256B: + m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B; + m_pipeInterleaveLog2 = 8; + break; + case ADDR_CONFIG_PIPE_INTERLEAVE_512B: + m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B; + m_pipeInterleaveLog2 = 9; + break; + case ADDR_CONFIG_PIPE_INTERLEAVE_1KB: + m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB; + m_pipeInterleaveLog2 = 10; + break; + case ADDR_CONFIG_PIPE_INTERLEAVE_2KB: + m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB; + m_pipeInterleaveLog2 = 11; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + // Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, + // and any larger value requires a post-process (left shift) on the output pipeBankXor bits. + ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B); + + switch (gbAddrConfig.bits.NUM_BANKS) + { + case ADDR_CONFIG_1_BANK: + m_banks = 1; + m_banksLog2 = 0; + break; + case ADDR_CONFIG_2_BANK: + m_banks = 2; + m_banksLog2 = 1; + break; + case ADDR_CONFIG_4_BANK: + m_banks = 4; + m_banksLog2 = 2; + break; + case ADDR_CONFIG_8_BANK: + m_banks = 8; + m_banksLog2 = 3; + break; + case ADDR_CONFIG_16_BANK: + m_banks = 16; + m_banksLog2 = 4; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + switch (gbAddrConfig.bits.NUM_SHADER_ENGINES) + { + case ADDR_CONFIG_1_SHADER_ENGINE: + m_se = 1; + m_seLog2 = 0; + break; + case ADDR_CONFIG_2_SHADER_ENGINE: + m_se = 2; + m_seLog2 = 1; + break; + case ADDR_CONFIG_4_SHADER_ENGINE: + m_se = 4; + m_seLog2 = 2; + break; + case ADDR_CONFIG_8_SHADER_ENGINE: + m_se = 8; + m_seLog2 = 3; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + switch (gbAddrConfig.bits.NUM_RB_PER_SE) + { + case ADDR_CONFIG_1_RB_PER_SHADER_ENGINE: + m_rbPerSe = 1; + m_rbPerSeLog2 = 0; + break; + case ADDR_CONFIG_2_RB_PER_SHADER_ENGINE: + m_rbPerSe = 2; + m_rbPerSeLog2 = 1; + break; + case ADDR_CONFIG_4_RB_PER_SHADER_ENGINE: + m_rbPerSe = 4; + m_rbPerSeLog2 = 2; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS) + { + case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS: + m_maxCompFrag = 1; + m_maxCompFragLog2 = 0; + break; + case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS: + m_maxCompFrag = 2; + m_maxCompFragLog2 = 1; + break; + case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS: + m_maxCompFrag = 4; + m_maxCompFragLog2 = 2; + break; + case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS: + m_maxCompFrag = 8; + m_maxCompFragLog2 = 3; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + m_blockVarSizeLog2 = pCreateIn->regValue.blockVarSizeLog2; + ADDR_ASSERT((m_blockVarSizeLog2 == 0) || + ((m_blockVarSizeLog2 >= 17u) && (m_blockVarSizeLog2 <= 20u))); + m_blockVarSizeLog2 = Min(Max(17u, m_blockVarSizeLog2), 20u); + + if ((m_rbPerSeLog2 == 1) && + (((m_pipesLog2 == 1) && ((m_seLog2 == 2) || (m_seLog2 == 3))) || + ((m_pipesLog2 == 2) && ((m_seLog2 == 1) || (m_seLog2 == 2))))) + { + ADDR_ASSERT(m_settings.isVega10 == FALSE); + ADDR_ASSERT(m_settings.isRaven == FALSE); + ADDR_ASSERT(m_settings.isVega20 == FALSE); + + if (m_settings.isVega12) + { + m_settings.htileCacheRbConflict = 1; + } + } + } + else + { + valid = FALSE; + ADDR_NOT_IMPLEMENTED(); + } + + if (valid) + { + InitEquationTable(); + } + + return valid; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlConvertChipFamily +* +* @brief +* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision +* @return +* ChipFamily +************************************************************************************************************************ +*/ +ChipFamily Gfx9Lib::HwlConvertChipFamily( + UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h + UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h +{ + ChipFamily family = ADDR_CHIP_FAMILY_AI; + + switch (uChipFamily) + { + case FAMILY_AI: + m_settings.isArcticIsland = 1; + m_settings.isVega10 = ASICREV_IS_VEGA10_P(uChipRevision); + m_settings.isVega12 = ASICREV_IS_VEGA12_P(uChipRevision); + m_settings.isVega20 = ASICREV_IS_VEGA20_P(uChipRevision); + m_settings.isDce12 = 1; + + if (m_settings.isVega10 == 0) + { + m_settings.htileAlignFix = 1; + m_settings.applyAliasFix = 1; + } + + m_settings.metaBaseAlignFix = 1; + + m_settings.depthPipeXorDisable = 1; + break; + case FAMILY_RV: + m_settings.isArcticIsland = 1; + + if (ASICREV_IS_RAVEN(uChipRevision)) + { + m_settings.isRaven = 1; + + m_settings.depthPipeXorDisable = 1; + } + + if (ASICREV_IS_RAVEN2(uChipRevision)) + { + m_settings.isRaven = 1; + } + + if (m_settings.isRaven == 0) + { + m_settings.htileAlignFix = 1; + m_settings.applyAliasFix = 1; + } + + m_settings.isDcn1 = m_settings.isRaven; + + m_settings.metaBaseAlignFix = 1; + break; + + default: + ADDR_ASSERT(!"This should be a Fusion"); + break; + } + + return family; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::InitRbEquation +* +* @brief +* Init RB equation +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Gfx9Lib::GetRbEquation( + CoordEq* pRbEq, ///< [out] rb equation + UINT_32 numRbPerSeLog2, ///< [in] number of rb per shader engine + UINT_32 numSeLog2) ///< [in] number of shader engine + const +{ + // RB's are distributed on 16x16, except when we have 1 rb per se, in which case its 32x32 + UINT_32 rbRegion = (numRbPerSeLog2 == 0) ? 5 : 4; + Coordinate cx('x', rbRegion); + Coordinate cy('y', rbRegion); + + UINT_32 start = 0; + UINT_32 numRbTotalLog2 = numRbPerSeLog2 + numSeLog2; + + // Clear the rb equation + pRbEq->resize(0); + pRbEq->resize(numRbTotalLog2); + + if ((numSeLog2 > 0) && (numRbPerSeLog2 == 1)) + { + // Special case when more than 1 SE, and 2 RB per SE + (*pRbEq)[0].add(cx); + (*pRbEq)[0].add(cy); + cx++; + cy++; + + if (m_settings.applyAliasFix == false) + { + (*pRbEq)[0].add(cy); + } + + (*pRbEq)[0].add(cy); + start++; + } + + UINT_32 numBits = 2 * (numRbTotalLog2 - start); + + for (UINT_32 i = 0; i < numBits; i++) + { + UINT_32 idx = + start + (((start + i) >= numRbTotalLog2) ? (2 * (numRbTotalLog2 - start) - i - 1) : i); + + if ((i % 2) == 1) + { + (*pRbEq)[idx].add(cx); + cx++; + } + else + { + (*pRbEq)[idx].add(cy); + cy++; + } + } +} + +/** +************************************************************************************************************************ +* Gfx9Lib::GetDataEquation +* +* @brief +* Get data equation for fmask and Z +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Gfx9Lib::GetDataEquation( + CoordEq* pDataEq, ///< [out] data surface equation + Gfx9DataType dataSurfaceType, ///< [in] data surface type + AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode + AddrResourceType resourceType, ///< [in] data surface resource type + UINT_32 elementBytesLog2, ///< [in] data surface element bytes + UINT_32 numSamplesLog2) ///< [in] data surface sample count + const +{ + Coordinate cx('x', 0); + Coordinate cy('y', 0); + Coordinate cz('z', 0); + Coordinate cs('s', 0); + + // Clear the equation + pDataEq->resize(0); + pDataEq->resize(27); + + if (dataSurfaceType == Gfx9DataColor) + { + if (IsLinear(swizzleMode)) + { + Coordinate cm('m', 0); + + pDataEq->resize(49); + + for (UINT_32 i = 0; i < 49; i++) + { + (*pDataEq)[i].add(cm); + cm++; + } + } + else if (IsThick(resourceType, swizzleMode)) + { + // Color 3d_S and 3d_Z modes, 3d_D is same as color 2d + UINT_32 i; + if (IsStandardSwizzle(resourceType, swizzleMode)) + { + // Standard 3d swizzle + // Fill in bottom x bits + for (i = elementBytesLog2; i < 4; i++) + { + (*pDataEq)[i].add(cx); + cx++; + } + // Fill in 2 bits of y and then z + for (i = 4; i < 6; i++) + { + (*pDataEq)[i].add(cy); + cy++; + } + for (i = 6; i < 8; i++) + { + (*pDataEq)[i].add(cz); + cz++; + } + if (elementBytesLog2 < 2) + { + // fill in z & y bit + (*pDataEq)[8].add(cz); + (*pDataEq)[9].add(cy); + cz++; + cy++; + } + else if (elementBytesLog2 == 2) + { + // fill in y and x bit + (*pDataEq)[8].add(cy); + (*pDataEq)[9].add(cx); + cy++; + cx++; + } + else + { + // fill in 2 x bits + (*pDataEq)[8].add(cx); + cx++; + (*pDataEq)[9].add(cx); + cx++; + } + } + else + { + // Z 3d swizzle + UINT_32 m2dEnd = (elementBytesLog2 ==0) ? 3 : ((elementBytesLog2 < 4) ? 4 : 5); + UINT_32 numZs = (elementBytesLog2 == 0 || elementBytesLog2 == 4) ? + 2 : ((elementBytesLog2 == 1) ? 3 : 1); + pDataEq->mort2d(cx, cy, elementBytesLog2, m2dEnd); + for (i = m2dEnd + 1; i <= m2dEnd + numZs; i++) + { + (*pDataEq)[i].add(cz); + cz++; + } + if ((elementBytesLog2 == 0) || (elementBytesLog2 == 3)) + { + // add an x and z + (*pDataEq)[6].add(cx); + (*pDataEq)[7].add(cz); + cx++; + cz++; + } + else if (elementBytesLog2 == 2) + { + // add a y and z + (*pDataEq)[6].add(cy); + (*pDataEq)[7].add(cz); + cy++; + cz++; + } + // add y and x + (*pDataEq)[8].add(cy); + (*pDataEq)[9].add(cx); + cy++; + cx++; + } + // Fill in bit 10 and up + pDataEq->mort3d( cz, cy, cx, 10 ); + } + else if (IsThin(resourceType, swizzleMode)) + { + UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode); + // Color 2D + UINT_32 microYBits = (8 - elementBytesLog2) / 2; + UINT_32 tileSplitStart = blockSizeLog2 - numSamplesLog2; + UINT_32 i; + // Fill in bottom x bits + for (i = elementBytesLog2; i < 4; i++) + { + (*pDataEq)[i].add(cx); + cx++; + } + // Fill in bottom y bits + for (i = 4; i < 4 + microYBits; i++) + { + (*pDataEq)[i].add(cy); + cy++; + } + // Fill in last of the micro_x bits + for (i = 4 + microYBits; i < 8; i++) + { + (*pDataEq)[i].add(cx); + cx++; + } + // Fill in x/y bits below sample split + pDataEq->mort2d(cy, cx, 8, tileSplitStart - 1); + // Fill in sample bits + for (i = 0; i < numSamplesLog2; i++) + { + cs.set('s', i); + (*pDataEq)[tileSplitStart + i].add(cs); + } + // Fill in x/y bits above sample split + if ((numSamplesLog2 & 1) ^ (blockSizeLog2 & 1)) + { + pDataEq->mort2d(cx, cy, blockSizeLog2); + } + else + { + pDataEq->mort2d(cy, cx, blockSizeLog2); + } + } + else + { + ADDR_ASSERT_ALWAYS(); + } + } + else + { + // Fmask or depth + UINT_32 sampleStart = elementBytesLog2; + UINT_32 pixelStart = elementBytesLog2 + numSamplesLog2; + UINT_32 ymajStart = 6 + numSamplesLog2; + + for (UINT_32 s = 0; s < numSamplesLog2; s++) + { + cs.set('s', s); + (*pDataEq)[sampleStart + s].add(cs); + } + + // Put in the x-major order pixel bits + pDataEq->mort2d(cx, cy, pixelStart, ymajStart - 1); + // Put in the y-major order pixel bits + pDataEq->mort2d(cy, cx, ymajStart); + } +} + +/** +************************************************************************************************************************ +* Gfx9Lib::GetPipeEquation +* +* @brief +* Get pipe equation +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Gfx9Lib::GetPipeEquation( + CoordEq* pPipeEq, ///< [out] pipe equation + CoordEq* pDataEq, ///< [in] data equation + UINT_32 pipeInterleaveLog2, ///< [in] pipe interleave + UINT_32 numPipeLog2, ///< [in] number of pipes + UINT_32 numSamplesLog2, ///< [in] data surface sample count + Gfx9DataType dataSurfaceType, ///< [in] data surface type + AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode + AddrResourceType resourceType ///< [in] data surface resource type + ) const +{ + UINT_32 blockSizeLog2 = GetBlockSizeLog2(swizzleMode); + CoordEq dataEq; + + pDataEq->copy(dataEq); + + if (dataSurfaceType == Gfx9DataColor) + { + INT_32 shift = static_cast(numSamplesLog2); + dataEq.shift(-shift, blockSizeLog2 - numSamplesLog2); + } + + dataEq.copy(*pPipeEq, pipeInterleaveLog2, numPipeLog2); + + // This section should only apply to z/stencil, maybe fmask + // If the pipe bit is below the comp block size, + // then keep moving up the address until we find a bit that is above + UINT_32 pipeStart = 0; + + if (dataSurfaceType != Gfx9DataColor) + { + Coordinate tileMin('x', 3); + + while (dataEq[pipeInterleaveLog2 + pipeStart][0] < tileMin) + { + pipeStart++; + } + + // if pipe is 0, then the first pipe bit is above the comp block size, + // so we don't need to do anything + // Note, this if condition is not necessary, since if we execute the loop when pipe==0, + // we will get the same pipe equation + if (pipeStart != 0) + { + for (UINT_32 i = 0; i < numPipeLog2; i++) + { + // Copy the jth bit above pipe interleave to the current pipe equation bit + dataEq[pipeInterleaveLog2 + pipeStart + i].copyto((*pPipeEq)[i]); + } + } + } + + if (IsPrt(swizzleMode)) + { + // Clear out bits above the block size if prt's are enabled + dataEq.resize(blockSizeLog2); + dataEq.resize(48); + } + + if (IsXor(swizzleMode)) + { + CoordEq xorMask; + + if (IsThick(resourceType, swizzleMode)) + { + CoordEq xorMask2; + + dataEq.copy(xorMask2, pipeInterleaveLog2 + numPipeLog2, 2 * numPipeLog2); + + xorMask.resize(numPipeLog2); + + for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++) + { + xorMask[pipeIdx].add(xorMask2[2 * pipeIdx]); + xorMask[pipeIdx].add(xorMask2[2 * pipeIdx + 1]); + } + } + else + { + // Xor in the bits above the pipe+gpu bits + dataEq.copy(xorMask, pipeInterleaveLog2 + pipeStart + numPipeLog2, numPipeLog2); + + if ((numSamplesLog2 == 0) && (IsPrt(swizzleMode) == FALSE)) + { + Coordinate co; + CoordEq xorMask2; + // if 1xaa and not prt, then xor in the z bits + xorMask2.resize(0); + xorMask2.resize(numPipeLog2); + for (UINT_32 pipeIdx = 0; pipeIdx < numPipeLog2; pipeIdx++) + { + co.set('z', numPipeLog2 - 1 - pipeIdx); + xorMask2[pipeIdx].add(co); + } + + pPipeEq->xorin(xorMask2); + } + } + + xorMask.reverse(); + pPipeEq->xorin(xorMask); + } +} +/** +************************************************************************************************************************ +* Gfx9Lib::GetMetaEquation +* +* @brief +* Get meta equation for cmask/htile/DCC +* @return +* Pointer to a calculated meta equation +************************************************************************************************************************ +*/ +const CoordEq* Gfx9Lib::GetMetaEquation( + const MetaEqParams& metaEqParams) +{ + UINT_32 cachedMetaEqIndex; + + for (cachedMetaEqIndex = 0; cachedMetaEqIndex < MaxCachedMetaEq; cachedMetaEqIndex++) + { + if (memcmp(&metaEqParams, + &m_cachedMetaEqKey[cachedMetaEqIndex], + static_cast(sizeof(metaEqParams))) == 0) + { + break; + } + } + + CoordEq* pMetaEq = NULL; + + if (cachedMetaEqIndex < MaxCachedMetaEq) + { + pMetaEq = &m_cachedMetaEq[cachedMetaEqIndex]; + } + else + { + m_cachedMetaEqKey[m_metaEqOverrideIndex] = metaEqParams; + + pMetaEq = &m_cachedMetaEq[m_metaEqOverrideIndex++]; + + m_metaEqOverrideIndex %= MaxCachedMetaEq; + + GenMetaEquation(pMetaEq, + metaEqParams.maxMip, + metaEqParams.elementBytesLog2, + metaEqParams.numSamplesLog2, + metaEqParams.metaFlag, + metaEqParams.dataSurfaceType, + metaEqParams.swizzleMode, + metaEqParams.resourceType, + metaEqParams.metaBlkWidthLog2, + metaEqParams.metaBlkHeightLog2, + metaEqParams.metaBlkDepthLog2, + metaEqParams.compBlkWidthLog2, + metaEqParams.compBlkHeightLog2, + metaEqParams.compBlkDepthLog2); + } + + return pMetaEq; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::GenMetaEquation +* +* @brief +* Get meta equation for cmask/htile/DCC +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Gfx9Lib::GenMetaEquation( + CoordEq* pMetaEq, ///< [out] meta equation + UINT_32 maxMip, ///< [in] max mip Id + UINT_32 elementBytesLog2, ///< [in] data surface element bytes + UINT_32 numSamplesLog2, ///< [in] data surface sample count + ADDR2_META_FLAGS metaFlag, ///< [in] meta falg + Gfx9DataType dataSurfaceType, ///< [in] data surface type + AddrSwizzleMode swizzleMode, ///< [in] data surface swizzle mode + AddrResourceType resourceType, ///< [in] data surface resource type + UINT_32 metaBlkWidthLog2, ///< [in] meta block width + UINT_32 metaBlkHeightLog2, ///< [in] meta block height + UINT_32 metaBlkDepthLog2, ///< [in] meta block depth + UINT_32 compBlkWidthLog2, ///< [in] compress block width + UINT_32 compBlkHeightLog2, ///< [in] compress block height + UINT_32 compBlkDepthLog2) ///< [in] compress block depth + const +{ + UINT_32 numPipeTotalLog2 = GetPipeLog2ForMetaAddressing(metaFlag.pipeAligned, swizzleMode); + UINT_32 pipeInterleaveLog2 = m_pipeInterleaveLog2; + + // Get the correct data address and rb equation + CoordEq dataEq; + GetDataEquation(&dataEq, dataSurfaceType, swizzleMode, resourceType, + elementBytesLog2, numSamplesLog2); + + // Get pipe and rb equations + CoordEq pipeEquation; + GetPipeEquation(&pipeEquation, &dataEq, pipeInterleaveLog2, numPipeTotalLog2, + numSamplesLog2, dataSurfaceType, swizzleMode, resourceType); + numPipeTotalLog2 = pipeEquation.getsize(); + + if (metaFlag.linear) + { + // Linear metadata supporting was removed for GFX9! No one can use this feature. + ADDR_ASSERT_ALWAYS(); + + ADDR_ASSERT(dataSurfaceType == Gfx9DataColor); + + dataEq.copy(*pMetaEq); + + if (IsLinear(swizzleMode)) + { + if (metaFlag.pipeAligned) + { + // Remove the pipe bits + INT_32 shift = static_cast(numPipeTotalLog2); + pMetaEq->shift(-shift, pipeInterleaveLog2); + } + // Divide by comp block size, which for linear (which is always color) is 256 B + pMetaEq->shift(-8); + + if (metaFlag.pipeAligned) + { + // Put pipe bits back in + pMetaEq->shift(numPipeTotalLog2, pipeInterleaveLog2); + + for (UINT_32 i = 0; i < numPipeTotalLog2; i++) + { + pipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + i]); + } + } + } + + pMetaEq->shift(1); + } + else + { + UINT_32 maxCompFragLog2 = static_cast(m_maxCompFragLog2); + UINT_32 compFragLog2 = + ((dataSurfaceType == Gfx9DataColor) && (numSamplesLog2 > maxCompFragLog2)) ? + maxCompFragLog2 : numSamplesLog2; + + UINT_32 uncompFragLog2 = numSamplesLog2 - compFragLog2; + + // Make sure the metaaddr is cleared + pMetaEq->resize(0); + pMetaEq->resize(27); + + if (IsThick(resourceType, swizzleMode)) + { + Coordinate cx('x', 0); + Coordinate cy('y', 0); + Coordinate cz('z', 0); + + if (maxMip > 0) + { + pMetaEq->mort3d(cy, cx, cz); + } + else + { + pMetaEq->mort3d(cx, cy, cz); + } + } + else + { + Coordinate cx('x', 0); + Coordinate cy('y', 0); + Coordinate cs; + + if (maxMip > 0) + { + pMetaEq->mort2d(cy, cx, compFragLog2); + } + else + { + pMetaEq->mort2d(cx, cy, compFragLog2); + } + + //------------------------------------------------------------------------------------------------------------------------ + // Put the compressible fragments at the lsb + // the uncompressible frags will be at the msb of the micro address + //------------------------------------------------------------------------------------------------------------------------ + for (UINT_32 s = 0; s < compFragLog2; s++) + { + cs.set('s', s); + (*pMetaEq)[s].add(cs); + } + } + + // Keep a copy of the pipe equations + CoordEq origPipeEquation; + pipeEquation.copy(origPipeEquation); + + Coordinate co; + // filter out everything under the compressed block size + co.set('x', compBlkWidthLog2); + pMetaEq->Filter('<', co, 0, 'x'); + co.set('y', compBlkHeightLog2); + pMetaEq->Filter('<', co, 0, 'y'); + co.set('z', compBlkDepthLog2); + pMetaEq->Filter('<', co, 0, 'z'); + + // For non-color, filter out sample bits + if (dataSurfaceType != Gfx9DataColor) + { + co.set('x', 0); + pMetaEq->Filter('<', co, 0, 's'); + } + + // filter out everything above the metablock size + co.set('x', metaBlkWidthLog2 - 1); + pMetaEq->Filter('>', co, 0, 'x'); + co.set('y', metaBlkHeightLog2 - 1); + pMetaEq->Filter('>', co, 0, 'y'); + co.set('z', metaBlkDepthLog2 - 1); + pMetaEq->Filter('>', co, 0, 'z'); + + // filter out everything above the metablock size for the channel bits + co.set('x', metaBlkWidthLog2 - 1); + pipeEquation.Filter('>', co, 0, 'x'); + co.set('y', metaBlkHeightLog2 - 1); + pipeEquation.Filter('>', co, 0, 'y'); + co.set('z', metaBlkDepthLog2 - 1); + pipeEquation.Filter('>', co, 0, 'z'); + + // Make sure we still have the same number of channel bits + if (pipeEquation.getsize() != numPipeTotalLog2) + { + ADDR_ASSERT_ALWAYS(); + } + + // Loop through all channel and rb bits, + // and make sure these components exist in the metadata address + for (UINT_32 i = 0; i < numPipeTotalLog2; i++) + { + for (UINT_32 j = pipeEquation[i].getsize(); j > 0; j--) + { + if (pMetaEq->Exists(pipeEquation[i][j - 1]) == FALSE) + { + ADDR_ASSERT_ALWAYS(); + } + } + } + + const UINT_32 numSeLog2 = metaFlag.rbAligned ? m_seLog2 : 0; + const UINT_32 numRbPeSeLog2 = metaFlag.rbAligned ? m_rbPerSeLog2 : 0; + const UINT_32 numRbTotalLog2 = numRbPeSeLog2 + numSeLog2; + CoordEq origRbEquation; + + GetRbEquation(&origRbEquation, numRbPeSeLog2, numSeLog2); + + CoordEq rbEquation = origRbEquation; + + for (UINT_32 i = 0; i < numRbTotalLog2; i++) + { + for (UINT_32 j = rbEquation[i].getsize(); j > 0; j--) + { + if (pMetaEq->Exists(rbEquation[i][j - 1]) == FALSE) + { + ADDR_ASSERT_ALWAYS(); + } + } + } + + if (m_settings.applyAliasFix) + { + co.set('z', -1); + } + + // Loop through each rb id bit; if it is equal to any of the filtered channel bits, clear it + for (UINT_32 i = 0; i < numRbTotalLog2; i++) + { + for (UINT_32 j = 0; j < numPipeTotalLog2; j++) + { + BOOL_32 isRbEquationInPipeEquation = FALSE; + + if (m_settings.applyAliasFix) + { + CoordTerm filteredPipeEq; + filteredPipeEq = pipeEquation[j]; + + filteredPipeEq.Filter('>', co, 0, 'z'); + + isRbEquationInPipeEquation = (rbEquation[i] == filteredPipeEq); + } + else + { + isRbEquationInPipeEquation = (rbEquation[i] == pipeEquation[j]); + } + + if (isRbEquationInPipeEquation) + { + rbEquation[i].Clear(); + } + } + } + + bool rbAppendedWithPipeBits[1 << (MaxSeLog2 + MaxRbPerSeLog2)] = {}; + + // Loop through each bit of the channel, get the smallest coordinate, + // and remove it from the metaaddr, and rb_equation + for (UINT_32 i = 0; i < numPipeTotalLog2; i++) + { + pipeEquation[i].getsmallest(co); + + UINT_32 old_size = pMetaEq->getsize(); + pMetaEq->Filter('=', co); + UINT_32 new_size = pMetaEq->getsize(); + if (new_size != old_size-1) + { + ADDR_ASSERT_ALWAYS(); + } + pipeEquation.remove(co); + for (UINT_32 j = 0; j < numRbTotalLog2; j++) + { + if (rbEquation[j].remove(co)) + { + // if we actually removed something from this bit, then add the remaining + // channel bits, as these can be removed for this bit + for (UINT_32 k = 0; k < pipeEquation[i].getsize(); k++) + { + if (pipeEquation[i][k] != co) + { + rbEquation[j].add(pipeEquation[i][k]); + rbAppendedWithPipeBits[j] = true; + } + } + } + } + } + + // Loop through the rb bits and see what remain; + // filter out the smallest coordinate if it remains + UINT_32 rbBitsLeft = 0; + for (UINT_32 i = 0; i < numRbTotalLog2; i++) + { + BOOL_32 isRbEqAppended = FALSE; + + if (m_settings.applyAliasFix) + { + isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0)); + } + else + { + isRbEqAppended = (rbEquation[i].getsize() > 0); + } + + if (isRbEqAppended) + { + rbBitsLeft++; + rbEquation[i].getsmallest(co); + UINT_32 old_size = pMetaEq->getsize(); + pMetaEq->Filter('=', co); + UINT_32 new_size = pMetaEq->getsize(); + if (new_size != old_size - 1) + { + // assert warning + } + for (UINT_32 j = i + 1; j < numRbTotalLog2; j++) + { + if (rbEquation[j].remove(co)) + { + // if we actually removed something from this bit, then add the remaining + // rb bits, as these can be removed for this bit + for (UINT_32 k = 0; k < rbEquation[i].getsize(); k++) + { + if (rbEquation[i][k] != co) + { + rbEquation[j].add(rbEquation[i][k]); + rbAppendedWithPipeBits[j] |= rbAppendedWithPipeBits[i]; + } + } + } + } + } + } + + // capture the size of the metaaddr + UINT_32 metaSize = pMetaEq->getsize(); + // resize to 49 bits...make this a nibble address + pMetaEq->resize(49); + // Concatenate the macro address above the current address + for (UINT_32 i = metaSize, j = 0; i < 49; i++, j++) + { + co.set('m', j); + (*pMetaEq)[i].add(co); + } + + // Multiply by meta element size (in nibbles) + if (dataSurfaceType == Gfx9DataColor) + { + pMetaEq->shift(1); + } + else if (dataSurfaceType == Gfx9DataDepthStencil) + { + pMetaEq->shift(3); + } + + //------------------------------------------------------------------------------------------ + // Note the pipeInterleaveLog2+1 is because address is a nibble address + // Shift up from pipe interleave number of channel + // and rb bits left, and uncompressed fragments + //------------------------------------------------------------------------------------------ + + pMetaEq->shift(numPipeTotalLog2 + rbBitsLeft + uncompFragLog2, pipeInterleaveLog2 + 1); + + // Put in the channel bits + for (UINT_32 i = 0; i < numPipeTotalLog2; i++) + { + origPipeEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2+1 + i]); + } + + // Put in remaining rb bits + for (UINT_32 i = 0, j = 0; j < rbBitsLeft; i = (i + 1) % numRbTotalLog2) + { + BOOL_32 isRbEqAppended = FALSE; + + if (m_settings.applyAliasFix) + { + isRbEqAppended = (rbEquation[i].getsize() > (rbAppendedWithPipeBits[i] ? 1 : 0)); + } + else + { + isRbEqAppended = (rbEquation[i].getsize() > 0); + } + + if (isRbEqAppended) + { + origRbEquation[i].copyto((*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + j]); + // Mark any rb bit we add in to the rb mask + j++; + } + } + + //------------------------------------------------------------------------------------------ + // Put in the uncompressed fragment bits + //------------------------------------------------------------------------------------------ + for (UINT_32 i = 0; i < uncompFragLog2; i++) + { + co.set('s', compFragLog2 + i); + (*pMetaEq)[pipeInterleaveLog2 + 1 + numPipeTotalLog2 + rbBitsLeft + i].add(co); + } + } +} + +/** +************************************************************************************************************************ +* Gfx9Lib::IsEquationSupported +* +* @brief +* Check if equation is supported for given swizzle mode and resource type. +* +* @return +* TRUE if supported +************************************************************************************************************************ +*/ +BOOL_32 Gfx9Lib::IsEquationSupported( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2) const +{ + BOOL_32 supported = (elementBytesLog2 < MaxElementBytesLog2) && + (IsLinear(swMode) == FALSE) && + (((IsTex2d(rsrcType) == TRUE) && + ((elementBytesLog2 < 4) || + ((IsRotateSwizzle(swMode) == FALSE) && + (IsZOrderSwizzle(swMode) == FALSE)))) || + ((IsTex3d(rsrcType) == TRUE) && + (IsRotateSwizzle(swMode) == FALSE) && + (IsBlock256b(swMode) == FALSE))); + + return supported; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::InitEquationTable +* +* @brief +* Initialize Equation table. +* +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Gfx9Lib::InitEquationTable() +{ + memset(m_equationTable, 0, sizeof(m_equationTable)); + + // Loop all possible resource type (2D/3D) + for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++) + { + AddrResourceType rsrcType = static_cast(rsrcTypeIdx + ADDR_RSRC_TEX_2D); + + // Loop all possible swizzle mode + for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwMode; swModeIdx++) + { + AddrSwizzleMode swMode = static_cast(swModeIdx); + + // Loop all possible bpp + for (UINT_32 bppIdx = 0; bppIdx < MaxElementBytesLog2; bppIdx++) + { + UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; + + // Check if the input is supported + if (IsEquationSupported(rsrcType, swMode, bppIdx)) + { + ADDR_EQUATION equation; + ADDR_E_RETURNCODE retCode; + + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + // Generate the equation + if (IsBlock256b(swMode) && IsTex2d(rsrcType)) + { + retCode = ComputeBlock256Equation(rsrcType, swMode, bppIdx, &equation); + } + else if (IsThin(rsrcType, swMode)) + { + retCode = ComputeThinEquation(rsrcType, swMode, bppIdx, &equation); + } + else + { + retCode = ComputeThickEquation(rsrcType, swMode, bppIdx, &equation); + } + + // Only fill the equation into the table if the return code is ADDR_OK, + // otherwise if the return code is not ADDR_OK, it indicates this is not + // a valid input, we do nothing but just fill invalid equation index + // into the lookup table. + if (retCode == ADDR_OK) + { + equationIndex = m_numEquations; + ADDR_ASSERT(equationIndex < EquationTableSize); + + m_equationTable[equationIndex] = equation; + + m_numEquations++; + } + else + { + ADDR_ASSERT_ALWAYS(); + } + } + + // Fill the index into the lookup table, if the combination is not supported + // fill the invalid equation index + m_equationLookupTable[rsrcTypeIdx][swModeIdx][bppIdx] = equationIndex; + } + } + } +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlGetEquationIndex +* +* @brief +* Interface function stub of GetEquationIndex +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +UINT_32 Gfx9Lib::HwlGetEquationIndex( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut + ) const +{ + AddrResourceType rsrcType = pIn->resourceType; + AddrSwizzleMode swMode = pIn->swizzleMode; + UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); + UINT_32 index = ADDR_INVALID_EQUATION_INDEX; + + if (IsEquationSupported(rsrcType, swMode, elementBytesLog2)) + { + UINT_32 rsrcTypeIdx = static_cast(rsrcType) - 1; + UINT_32 swModeIdx = static_cast(swMode); + + index = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elementBytesLog2]; + } + + if (pOut->pMipInfo != NULL) + { + for (UINT_32 i = 0; i < pIn->numMipLevels; i++) + { + pOut->pMipInfo[i].equationIndex = index; + } + } + + return index; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeBlock256Equation +* +* @brief +* Interface function stub of ComputeBlock256Equation +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const +{ + ADDR_E_RETURNCODE ret = ADDR_OK; + + pEquation->numBits = 8; + + UINT_32 i = 0; + for (; i < elementBytesLog2; i++) + { + InitChannel(1, 0 , i, &pEquation->addr[i]); + } + + ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2]; + + const UINT_32 maxBitsUsed = 4; + ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; + ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; + + for (i = 0; i < maxBitsUsed; i++) + { + InitChannel(1, 0, elementBytesLog2 + i, &x[i]); + InitChannel(1, 1, i, &y[i]); + } + + if (IsStandardSwizzle(rsrcType, swMode)) + { + switch (elementBytesLog2) + { + case 0: + pixelBit[0] = x[0]; + pixelBit[1] = x[1]; + pixelBit[2] = x[2]; + pixelBit[3] = x[3]; + pixelBit[4] = y[0]; + pixelBit[5] = y[1]; + pixelBit[6] = y[2]; + pixelBit[7] = y[3]; + break; + case 1: + pixelBit[0] = x[0]; + pixelBit[1] = x[1]; + pixelBit[2] = x[2]; + pixelBit[3] = y[0]; + pixelBit[4] = y[1]; + pixelBit[5] = y[2]; + pixelBit[6] = x[3]; + break; + case 2: + pixelBit[0] = x[0]; + pixelBit[1] = x[1]; + pixelBit[2] = y[0]; + pixelBit[3] = y[1]; + pixelBit[4] = y[2]; + pixelBit[5] = x[2]; + break; + case 3: + pixelBit[0] = x[0]; + pixelBit[1] = y[0]; + pixelBit[2] = y[1]; + pixelBit[3] = x[1]; + pixelBit[4] = x[2]; + break; + case 4: + pixelBit[0] = y[0]; + pixelBit[1] = y[1]; + pixelBit[2] = x[0]; + pixelBit[3] = x[1]; + break; + default: + ADDR_ASSERT_ALWAYS(); + ret = ADDR_INVALIDPARAMS; + break; + } + } + else if (IsDisplaySwizzle(rsrcType, swMode)) + { + switch (elementBytesLog2) + { + case 0: + pixelBit[0] = x[0]; + pixelBit[1] = x[1]; + pixelBit[2] = x[2]; + pixelBit[3] = y[1]; + pixelBit[4] = y[0]; + pixelBit[5] = y[2]; + pixelBit[6] = x[3]; + pixelBit[7] = y[3]; + break; + case 1: + pixelBit[0] = x[0]; + pixelBit[1] = x[1]; + pixelBit[2] = x[2]; + pixelBit[3] = y[0]; + pixelBit[4] = y[1]; + pixelBit[5] = y[2]; + pixelBit[6] = x[3]; + break; + case 2: + pixelBit[0] = x[0]; + pixelBit[1] = x[1]; + pixelBit[2] = y[0]; + pixelBit[3] = x[2]; + pixelBit[4] = y[1]; + pixelBit[5] = y[2]; + break; + case 3: + pixelBit[0] = x[0]; + pixelBit[1] = y[0]; + pixelBit[2] = x[1]; + pixelBit[3] = x[2]; + pixelBit[4] = y[1]; + break; + case 4: + pixelBit[0] = x[0]; + pixelBit[1] = y[0]; + pixelBit[2] = x[1]; + pixelBit[3] = y[1]; + break; + default: + ADDR_ASSERT_ALWAYS(); + ret = ADDR_INVALIDPARAMS; + break; + } + } + else if (IsRotateSwizzle(swMode)) + { + switch (elementBytesLog2) + { + case 0: + pixelBit[0] = y[0]; + pixelBit[1] = y[1]; + pixelBit[2] = y[2]; + pixelBit[3] = x[1]; + pixelBit[4] = x[0]; + pixelBit[5] = x[2]; + pixelBit[6] = x[3]; + pixelBit[7] = y[3]; + break; + case 1: + pixelBit[0] = y[0]; + pixelBit[1] = y[1]; + pixelBit[2] = y[2]; + pixelBit[3] = x[0]; + pixelBit[4] = x[1]; + pixelBit[5] = x[2]; + pixelBit[6] = x[3]; + break; + case 2: + pixelBit[0] = y[0]; + pixelBit[1] = y[1]; + pixelBit[2] = x[0]; + pixelBit[3] = y[2]; + pixelBit[4] = x[1]; + pixelBit[5] = x[2]; + break; + case 3: + pixelBit[0] = y[0]; + pixelBit[1] = x[0]; + pixelBit[2] = y[1]; + pixelBit[3] = x[1]; + pixelBit[4] = x[2]; + break; + default: + ADDR_ASSERT_ALWAYS(); + case 4: + ret = ADDR_INVALIDPARAMS; + break; + } + } + else + { + ADDR_ASSERT_ALWAYS(); + ret = ADDR_INVALIDPARAMS; + } + + // Post validation + if (ret == ADDR_OK) + { + MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2]; + ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) == + (microBlockDim.w * (1 << elementBytesLog2))); + ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h); + } + + return ret; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeThinEquation +* +* @brief +* Interface function stub of ComputeThinEquation +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThinEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const +{ + ADDR_E_RETURNCODE ret = ADDR_OK; + + UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); + + UINT_32 maxXorBits = blockSizeLog2; + if (IsNonPrtXor(swMode)) + { + // For non-prt-xor, maybe need to initialize some more bits for xor + // The highest xor bit used in equation will be max the following 3 items: + // 1. m_pipeInterleaveLog2 + 2 * pipeXorBits + // 2. m_pipeInterleaveLog2 + pipeXorBits + 2 * bankXorBits + // 3. blockSizeLog2 + + maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 2 * GetPipeXorBits(blockSizeLog2)); + maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + + GetPipeXorBits(blockSizeLog2) + + 2 * GetBankXorBits(blockSizeLog2)); + } + + const UINT_32 maxBitsUsed = 14; + ADDR_ASSERT((2 * maxBitsUsed) >= maxXorBits); + ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; + ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; + + const UINT_32 extraXorBits = 16; + ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2); + ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {}; + + for (UINT_32 i = 0; i < maxBitsUsed; i++) + { + InitChannel(1, 0, elementBytesLog2 + i, &x[i]); + InitChannel(1, 1, i, &y[i]); + } + + ADDR_CHANNEL_SETTING* pixelBit = pEquation->addr; + + for (UINT_32 i = 0; i < elementBytesLog2; i++) + { + InitChannel(1, 0 , i, &pixelBit[i]); + } + + UINT_32 xIdx = 0; + UINT_32 yIdx = 0; + UINT_32 lowBits = 0; + + if (IsZOrderSwizzle(swMode)) + { + if (elementBytesLog2 <= 3) + { + for (UINT_32 i = elementBytesLog2; i < 6; i++) + { + pixelBit[i] = (((i - elementBytesLog2) & 1) == 0) ? x[xIdx++] : y[yIdx++]; + } + + lowBits = 6; + } + else + { + ret = ADDR_INVALIDPARAMS; + } + } + else + { + ret = HwlComputeBlock256Equation(rsrcType, swMode, elementBytesLog2, pEquation); + + if (ret == ADDR_OK) + { + Dim2d microBlockDim = Block256_2d[elementBytesLog2]; + xIdx = Log2(microBlockDim.w); + yIdx = Log2(microBlockDim.h); + lowBits = 8; + } + } + + if (ret == ADDR_OK) + { + for (UINT_32 i = lowBits; i < blockSizeLog2; i++) + { + pixelBit[i] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++]; + } + + for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++) + { + xorExtra[i - blockSizeLog2] = ((i & 1) == 0) ? y[yIdx++] : x[xIdx++]; + } + + if (IsXor(swMode)) + { + // Fill XOR bits + UINT_32 pipeStart = m_pipeInterleaveLog2; + UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2); + + UINT_32 bankStart = pipeStart + pipeXorBits; + UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2); + + for (UINT_32 i = 0; i < pipeXorBits; i++) + { + UINT_32 xor1BitPos = pipeStart + 2 * pipeXorBits - 1 - i; + ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? + &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; + + InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src); + } + + for (UINT_32 i = 0; i < bankXorBits; i++) + { + UINT_32 xor1BitPos = bankStart + 2 * bankXorBits - 1 - i; + ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? + &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; + + InitChannel(&pEquation->xor1[bankStart + i], pXor1Src); + } + + if (IsPrt(swMode) == FALSE) + { + for (UINT_32 i = 0; i < pipeXorBits; i++) + { + InitChannel(1, 2, pipeXorBits - i - 1, &pEquation->xor2[pipeStart + i]); + } + + for (UINT_32 i = 0; i < bankXorBits; i++) + { + InitChannel(1, 2, bankXorBits - i - 1 + pipeXorBits, &pEquation->xor2[bankStart + i]); + } + } + } + + pEquation->numBits = blockSizeLog2; + } + + return ret; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeThickEquation +* +* @brief +* Interface function stub of ComputeThickEquation +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeThickEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const +{ + ADDR_E_RETURNCODE ret = ADDR_OK; + + ADDR_ASSERT(IsTex3d(rsrcType)); + + UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode); + + UINT_32 maxXorBits = blockSizeLog2; + if (IsNonPrtXor(swMode)) + { + // For non-prt-xor, maybe need to initialize some more bits for xor + // The highest xor bit used in equation will be max the following 3: + // 1. m_pipeInterleaveLog2 + 3 * pipeXorBits + // 2. m_pipeInterleaveLog2 + pipeXorBits + 3 * bankXorBits + // 3. blockSizeLog2 + + maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + 3 * GetPipeXorBits(blockSizeLog2)); + maxXorBits = Max(maxXorBits, m_pipeInterleaveLog2 + + GetPipeXorBits(blockSizeLog2) + + 3 * GetBankXorBits(blockSizeLog2)); + } + + for (UINT_32 i = 0; i < elementBytesLog2; i++) + { + InitChannel(1, 0 , i, &pEquation->addr[i]); + } + + ADDR_CHANNEL_SETTING* pixelBit = &pEquation->addr[elementBytesLog2]; + + const UINT_32 maxBitsUsed = 12; + ADDR_ASSERT((3 * maxBitsUsed) >= maxXorBits); + ADDR_CHANNEL_SETTING x[maxBitsUsed] = {}; + ADDR_CHANNEL_SETTING y[maxBitsUsed] = {}; + ADDR_CHANNEL_SETTING z[maxBitsUsed] = {}; + + const UINT_32 extraXorBits = 24; + ADDR_ASSERT(extraXorBits >= maxXorBits - blockSizeLog2); + ADDR_CHANNEL_SETTING xorExtra[extraXorBits] = {}; + + for (UINT_32 i = 0; i < maxBitsUsed; i++) + { + InitChannel(1, 0, elementBytesLog2 + i, &x[i]); + InitChannel(1, 1, i, &y[i]); + InitChannel(1, 2, i, &z[i]); + } + + if (IsZOrderSwizzle(swMode)) + { + switch (elementBytesLog2) + { + case 0: + pixelBit[0] = x[0]; + pixelBit[1] = y[0]; + pixelBit[2] = x[1]; + pixelBit[3] = y[1]; + pixelBit[4] = z[0]; + pixelBit[5] = z[1]; + pixelBit[6] = x[2]; + pixelBit[7] = z[2]; + pixelBit[8] = y[2]; + pixelBit[9] = x[3]; + break; + case 1: + pixelBit[0] = x[0]; + pixelBit[1] = y[0]; + pixelBit[2] = x[1]; + pixelBit[3] = y[1]; + pixelBit[4] = z[0]; + pixelBit[5] = z[1]; + pixelBit[6] = z[2]; + pixelBit[7] = y[2]; + pixelBit[8] = x[2]; + break; + case 2: + pixelBit[0] = x[0]; + pixelBit[1] = y[0]; + pixelBit[2] = x[1]; + pixelBit[3] = z[0]; + pixelBit[4] = y[1]; + pixelBit[5] = z[1]; + pixelBit[6] = y[2]; + pixelBit[7] = x[2]; + break; + case 3: + pixelBit[0] = x[0]; + pixelBit[1] = y[0]; + pixelBit[2] = z[0]; + pixelBit[3] = x[1]; + pixelBit[4] = z[1]; + pixelBit[5] = y[1]; + pixelBit[6] = x[2]; + break; + case 4: + pixelBit[0] = x[0]; + pixelBit[1] = y[0]; + pixelBit[2] = z[0]; + pixelBit[3] = z[1]; + pixelBit[4] = y[1]; + pixelBit[5] = x[1]; + break; + default: + ADDR_ASSERT_ALWAYS(); + ret = ADDR_INVALIDPARAMS; + break; + } + } + else if (IsStandardSwizzle(rsrcType, swMode)) + { + switch (elementBytesLog2) + { + case 0: + pixelBit[0] = x[0]; + pixelBit[1] = x[1]; + pixelBit[2] = x[2]; + pixelBit[3] = x[3]; + pixelBit[4] = y[0]; + pixelBit[5] = y[1]; + pixelBit[6] = z[0]; + pixelBit[7] = z[1]; + pixelBit[8] = z[2]; + pixelBit[9] = y[2]; + break; + case 1: + pixelBit[0] = x[0]; + pixelBit[1] = x[1]; + pixelBit[2] = x[2]; + pixelBit[3] = y[0]; + pixelBit[4] = y[1]; + pixelBit[5] = z[0]; + pixelBit[6] = z[1]; + pixelBit[7] = z[2]; + pixelBit[8] = y[2]; + break; + case 2: + pixelBit[0] = x[0]; + pixelBit[1] = x[1]; + pixelBit[2] = y[0]; + pixelBit[3] = y[1]; + pixelBit[4] = z[0]; + pixelBit[5] = z[1]; + pixelBit[6] = y[2]; + pixelBit[7] = x[2]; + break; + case 3: + pixelBit[0] = x[0]; + pixelBit[1] = y[0]; + pixelBit[2] = y[1]; + pixelBit[3] = z[0]; + pixelBit[4] = z[1]; + pixelBit[5] = x[1]; + pixelBit[6] = x[2]; + break; + case 4: + pixelBit[0] = y[0]; + pixelBit[1] = y[1]; + pixelBit[2] = z[0]; + pixelBit[3] = z[1]; + pixelBit[4] = x[0]; + pixelBit[5] = x[1]; + break; + default: + ADDR_ASSERT_ALWAYS(); + ret = ADDR_INVALIDPARAMS; + break; + } + } + else + { + ADDR_ASSERT_ALWAYS(); + ret = ADDR_INVALIDPARAMS; + } + + if (ret == ADDR_OK) + { + Dim3d microBlockDim = Block1K_3d[elementBytesLog2]; + UINT_32 xIdx = Log2(microBlockDim.w); + UINT_32 yIdx = Log2(microBlockDim.h); + UINT_32 zIdx = Log2(microBlockDim.d); + + pixelBit = pEquation->addr; + + const UINT_32 lowBits = 10; + ADDR_ASSERT(pEquation->addr[lowBits - 1].valid == 1); + ADDR_ASSERT(pEquation->addr[lowBits].valid == 0); + + for (UINT_32 i = lowBits; i < blockSizeLog2; i++) + { + if ((i % 3) == 0) + { + pixelBit[i] = x[xIdx++]; + } + else if ((i % 3) == 1) + { + pixelBit[i] = z[zIdx++]; + } + else + { + pixelBit[i] = y[yIdx++]; + } + } + + for (UINT_32 i = blockSizeLog2; i < maxXorBits; i++) + { + if ((i % 3) == 0) + { + xorExtra[i - blockSizeLog2] = x[xIdx++]; + } + else if ((i % 3) == 1) + { + xorExtra[i - blockSizeLog2] = z[zIdx++]; + } + else + { + xorExtra[i - blockSizeLog2] = y[yIdx++]; + } + } + + if (IsXor(swMode)) + { + // Fill XOR bits + UINT_32 pipeStart = m_pipeInterleaveLog2; + UINT_32 pipeXorBits = GetPipeXorBits(blockSizeLog2); + for (UINT_32 i = 0; i < pipeXorBits; i++) + { + UINT_32 xor1BitPos = pipeStart + (3 * pipeXorBits) - 1 - (2 * i); + ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? + &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; + + InitChannel(&pEquation->xor1[pipeStart + i], pXor1Src); + + UINT_32 xor2BitPos = pipeStart + (3 * pipeXorBits) - 2 - (2 * i); + ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ? + &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2]; + + InitChannel(&pEquation->xor2[pipeStart + i], pXor2Src); + } + + UINT_32 bankStart = pipeStart + pipeXorBits; + UINT_32 bankXorBits = GetBankXorBits(blockSizeLog2); + for (UINT_32 i = 0; i < bankXorBits; i++) + { + UINT_32 xor1BitPos = bankStart + (3 * bankXorBits) - 1 - (2 * i); + ADDR_CHANNEL_SETTING* pXor1Src = (xor1BitPos < blockSizeLog2) ? + &pEquation->addr[xor1BitPos] : &xorExtra[xor1BitPos - blockSizeLog2]; + + InitChannel(&pEquation->xor1[bankStart + i], pXor1Src); + + UINT_32 xor2BitPos = bankStart + (3 * bankXorBits) - 2 - (2 * i); + ADDR_CHANNEL_SETTING* pXor2Src = (xor2BitPos < blockSizeLog2) ? + &pEquation->addr[xor2BitPos] : &xorExtra[xor2BitPos - blockSizeLog2]; + + InitChannel(&pEquation->xor2[bankStart + i], pXor2Src); + } + } + + pEquation->numBits = blockSizeLog2; + } + + return ret; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::IsValidDisplaySwizzleMode +* +* @brief +* Check if a swizzle mode is supported by display engine +* +* @return +* TRUE is swizzle mode is supported by display engine +************************************************************************************************************************ +*/ +BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const +{ + BOOL_32 support = FALSE; + + const AddrResourceType resourceType = pIn->resourceType; + (void)resourceType; + const AddrSwizzleMode swizzleMode = pIn->swizzleMode; + + if (m_settings.isDce12) + { + switch (swizzleMode) + { + case ADDR_SW_256B_D: + case ADDR_SW_256B_R: + support = (pIn->bpp == 32); + break; + + case ADDR_SW_LINEAR: + case ADDR_SW_4KB_D: + case ADDR_SW_4KB_R: + case ADDR_SW_64KB_D: + case ADDR_SW_64KB_R: + case ADDR_SW_VAR_D: + case ADDR_SW_VAR_R: + case ADDR_SW_4KB_D_X: + case ADDR_SW_4KB_R_X: + case ADDR_SW_64KB_D_X: + case ADDR_SW_64KB_R_X: + case ADDR_SW_VAR_D_X: + case ADDR_SW_VAR_R_X: + support = (pIn->bpp <= 64); + break; + + default: + break; + } + } + else if (m_settings.isDcn1) + { + switch (swizzleMode) + { + case ADDR_SW_4KB_D: + case ADDR_SW_64KB_D: + case ADDR_SW_VAR_D: + case ADDR_SW_64KB_D_T: + case ADDR_SW_4KB_D_X: + case ADDR_SW_64KB_D_X: + case ADDR_SW_VAR_D_X: + support = (pIn->bpp == 64); + break; + + case ADDR_SW_LINEAR: + case ADDR_SW_4KB_S: + case ADDR_SW_64KB_S: + case ADDR_SW_VAR_S: + case ADDR_SW_64KB_S_T: + case ADDR_SW_4KB_S_X: + case ADDR_SW_64KB_S_X: + case ADDR_SW_VAR_S_X: + support = (pIn->bpp <= 64); + break; + + default: + break; + } + } + else + { + ADDR_NOT_IMPLEMENTED(); + } + + return support; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputePipeBankXor +* +* @brief +* Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address +* +* @return +* PipeBankXor value +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputePipeBankXor( + const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const +{ + if (IsXor(pIn->swizzleMode)) + { + UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode); + UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); + UINT_32 bankBits = GetBankXorBits(macroBlockBits); + + UINT_32 pipeXor = 0; + UINT_32 bankXor = 0; + + const UINT_32 bankMask = (1 << bankBits) - 1; + const UINT_32 index = pIn->surfIndex & bankMask; + + const UINT_32 bpp = pIn->flags.fmask ? + GetFmaskBpp(pIn->numSamples, pIn->numFrags) : GetElemLib()->GetBitsPerPixel(pIn->format); + if (bankBits == 4) + { + static const UINT_32 BankXorSmallBpp[] = {0, 7, 4, 3, 8, 15, 12, 11, 1, 6, 5, 2, 9, 14, 13, 10}; + static const UINT_32 BankXorLargeBpp[] = {0, 7, 8, 15, 4, 3, 12, 11, 1, 6, 9, 14, 5, 2, 13, 10}; + + bankXor = (bpp <= 32) ? BankXorSmallBpp[index] : BankXorLargeBpp[index]; + } + else if (bankBits > 0) + { + UINT_32 bankIncrease = (1 << (bankBits - 1)) - 1; + bankIncrease = (bankIncrease == 0) ? 1 : bankIncrease; + bankXor = (index * bankIncrease) & bankMask; + } + + pOut->pipeBankXor = (bankXor << pipeBits) | pipeXor; + } + else + { + pOut->pipeBankXor = 0; + } + + return ADDR_OK; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeSlicePipeBankXor +* +* @brief +* Generate slice PipeBankXor value based on base PipeBankXor value and slice id +* +* @return +* PipeBankXor value +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSlicePipeBankXor( + const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const +{ + UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode); + UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); + UINT_32 bankBits = GetBankXorBits(macroBlockBits); + + UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits); + UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits); + + pOut->pipeBankXor = pIn->basePipeBankXor ^ (pipeXor | (bankXor << pipeBits)); + + return ADDR_OK; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern +* +* @brief +* Compute sub resource offset to support swizzle pattern +* +* @return +* Offset +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSubResourceOffsetForSwizzlePattern( + const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const +{ + ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode)); + + UINT_32 macroBlockBits = GetBlockSizeLog2(pIn->swizzleMode); + UINT_32 pipeBits = GetPipeXorBits(macroBlockBits); + UINT_32 bankBits = GetBankXorBits(macroBlockBits); + UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits); + UINT_32 bankXor = ReverseBitVector(pIn->slice >> pipeBits, bankBits); + UINT_32 pipeBankXor = ((pipeXor | (bankXor << pipeBits)) ^ (pIn->pipeBankXor)) << m_pipeInterleaveLog2; + + pOut->offset = pIn->slice * pIn->sliceSize + + pIn->macroBlockOffset + + (pIn->mipTailOffset ^ pipeBankXor) - + static_cast(pipeBankXor); + return ADDR_OK; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeSurfaceInfoSanityCheck +* +* @brief +* Compute surface info sanity check +* +* @return +* Offset +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoSanityCheck( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const +{ + BOOL_32 invalid = FALSE; + + if ((pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16)) + { + invalid = TRUE; + } + else if ((pIn->swizzleMode >= ADDR_SW_MAX_TYPE) || + (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)) + { + invalid = TRUE; + } + + BOOL_32 mipmap = (pIn->numMipLevels > 1); + BOOL_32 msaa = (pIn->numFrags > 1); + + ADDR2_SURFACE_FLAGS flags = pIn->flags; + BOOL_32 zbuffer = (flags.depth || flags.stencil); + BOOL_32 color = flags.color; + BOOL_32 display = flags.display || flags.rotated; + + AddrResourceType rsrcType = pIn->resourceType; + BOOL_32 tex3d = IsTex3d(rsrcType); + BOOL_32 thin3d = tex3d && flags.view3dAs2dArray; + AddrSwizzleMode swizzle = pIn->swizzleMode; + BOOL_32 linear = IsLinear(swizzle); + BOOL_32 blk256B = IsBlock256b(swizzle); + BOOL_32 blkVar = IsBlockVariable(swizzle); + BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle); + BOOL_32 prt = flags.prt; + BOOL_32 stereo = flags.qbStereo; + + if (invalid == FALSE) + { + if ((pIn->numFrags > 1) && + (GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags))) + { + // MSAA surface must have blk_bytes/pipe_interleave >= num_samples + invalid = TRUE; + } + } + + if (invalid == FALSE) + { + switch (rsrcType) + { + case ADDR_RSRC_TEX_1D: + invalid = msaa || zbuffer || display || (linear == FALSE) || stereo; + break; + case ADDR_RSRC_TEX_2D: + invalid = (msaa && mipmap) || (stereo && msaa) || (stereo && mipmap); + break; + case ADDR_RSRC_TEX_3D: + invalid = msaa || zbuffer || display || stereo; + break; + default: + invalid = TRUE; + break; + } + } + + if (invalid == FALSE) + { + if (display) + { + invalid = (IsValidDisplaySwizzleMode(pIn) == FALSE); + } + } + + if (invalid == FALSE) + { + if (linear) + { + invalid = ((ADDR_RSRC_TEX_1D != rsrcType) && prt) || + zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0); + } + else + { + if (blk256B || blkVar || isNonPrtXor) + { + invalid = prt; + if (blk256B) + { + invalid = invalid || zbuffer || tex3d || mipmap || msaa; + } + } + + if (invalid == FALSE) + { + if (IsZOrderSwizzle(swizzle)) + { + invalid = (color && msaa) || thin3d; + } + else if (IsStandardSwizzle(swizzle)) + { + invalid = zbuffer || thin3d; + } + else if (IsDisplaySwizzle(swizzle)) + { + invalid = zbuffer || (prt && (ADDR_RSRC_TEX_3D == rsrcType)); + } + else if (IsRotateSwizzle(swizzle)) + { + invalid = zbuffer || (pIn->bpp > 64) || tex3d; + } + else + { + ADDR_ASSERT(!"invalid swizzle mode"); + invalid = TRUE; + } + } + } + } + + ADDR_ASSERT(invalid == FALSE); + + return invalid ? ADDR_INVALIDPARAMS : ADDR_OK; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlGetPreferredSurfaceSetting +* +* @brief +* Internal function to get suggested surface information for cliet to use +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + ElemLib* pElemLib = GetElemLib(); + + UINT_32 bpp = pIn->bpp; + UINT_32 width = pIn->width; + UINT_32 height = pIn->height; + UINT_32 numSamples = Max(pIn->numSamples, 1u); + UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags; + + if (pIn->flags.fmask) + { + bpp = GetFmaskBpp(numSamples, numFrags); + numFrags = 1; + numSamples = 1; + pOut->resourceType = ADDR_RSRC_TEX_2D; + } + else + { + // Set format to INVALID will skip this conversion + if (pIn->format != ADDR_FMT_INVALID) + { + UINT_32 expandX, expandY; + + // Don't care for this case + ElemMode elemMode = ADDR_UNCOMPRESSED; + + // Get compression/expansion factors and element mode which indicates compression/expansion + bpp = pElemLib->GetBitsPerPixel(pIn->format, + &elemMode, + &expandX, + &expandY); + + UINT_32 basePitch = 0; + GetElemLib()->AdjustSurfaceInfo(elemMode, + expandX, + expandY, + &bpp, + &basePitch, + &width, + &height); + } + + // The output may get changed for volume(3D) texture resource in future + pOut->resourceType = pIn->resourceType; + } + + const UINT_32 numSlices = Max(pIn->numSlices, 1u); + const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u); + const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1); + const BOOL_32 displayRsrc = pIn->flags.display || pIn->flags.rotated; + + // Forbid swizzle mode(s) by client setting, for simplicity we never allow VAR swizzle mode for GFX9 + ADDR2_SWMODE_SET allowedSwModeSet = {}; + allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx9LinearSwModeMask; + allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx9Blk256BSwModeMask; + allowedSwModeSet.value |= pIn->forbiddenBlock.macro4KB ? 0 : Gfx9Blk4KBSwModeMask; + allowedSwModeSet.value |= pIn->forbiddenBlock.macro64KB ? 0 : Gfx9Blk64KBSwModeMask; + + if (pIn->preferredSwSet.value != 0) + { + allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx9ZSwModeMask; + allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx9StandardSwModeMask; + allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx9DisplaySwModeMask; + allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx9RotateSwModeMask; + } + + if (pIn->noXor) + { + allowedSwModeSet.value &= ~Gfx9XorSwModeMask; + } + + if (pIn->maxAlign > 0) + { + if (pIn->maxAlign < GetBlockSize(ADDR_SW_64KB)) + { + allowedSwModeSet.value &= ~Gfx9Blk64KBSwModeMask; + } + + if (pIn->maxAlign < GetBlockSize(ADDR_SW_4KB)) + { + allowedSwModeSet.value &= ~Gfx9Blk4KBSwModeMask; + } + + if (pIn->maxAlign < GetBlockSize(ADDR_SW_256B)) + { + allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask; + } + } + + // Filter out invalid swizzle mode(s) by image attributes and HW restrictions + switch (pOut->resourceType) + { + case ADDR_RSRC_TEX_1D: + allowedSwModeSet.value &= Gfx9Rsrc1dSwModeMask; + break; + + case ADDR_RSRC_TEX_2D: + allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc2dPrtSwModeMask : Gfx9Rsrc2dSwModeMask; + + if (bpp > 64) + { + allowedSwModeSet.value &= ~(Gfx9RotateSwModeMask | Gfx9ZSwModeMask); + } + break; + + case ADDR_RSRC_TEX_3D: + allowedSwModeSet.value &= pIn->flags.prt ? Gfx9Rsrc3dPrtSwModeMask : Gfx9Rsrc3dSwModeMask; + + if ((numMipLevels > 1) && (numSlices >= width) && (numSlices >= height)) + { + // SW_*_D for 3D mipmaps (maxmip > 0) is only supported for Xmajor or Ymajor mipmap + // When depth (Z) is the maximum dimension then must use one of the SW_*_S + // or SW_*_Z modes if mipmapping is desired on a 3D surface + allowedSwModeSet.value &= ~Gfx9DisplaySwModeMask; + } + + if ((bpp == 128) && pIn->flags.color) + { + allowedSwModeSet.value &= ~Gfx9StandardSwModeMask; + } + + if (pIn->flags.view3dAs2dArray) + { + allowedSwModeSet.value &= Gfx9Rsrc3dThinSwModeMask | Gfx9LinearSwModeMask; + } + break; + + default: + ADDR_ASSERT_ALWAYS(); + allowedSwModeSet.value = 0; + break; + } + + if (pIn->format == ADDR_FMT_32_32_32) + { + allowedSwModeSet.value &= Gfx9LinearSwModeMask; + } + + if (ElemLib::IsBlockCompressed(pIn->format)) + { + if (pIn->flags.texture) + { + allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask; + } + else + { + allowedSwModeSet.value &= Gfx9StandardSwModeMask | Gfx9DisplaySwModeMask | Gfx9LinearSwModeMask; + } + } + + if (ElemLib::IsMacroPixelPacked(pIn->format) || + (msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered))) + { + allowedSwModeSet.value &= ~Gfx9ZSwModeMask; + } + + if (pIn->flags.fmask || pIn->flags.depth || pIn->flags.stencil) + { + allowedSwModeSet.value &= Gfx9ZSwModeMask; + + if (pIn->flags.noMetadata == FALSE) + { + if (pIn->flags.depth && + pIn->flags.texture && + (((bpp == 16) && (numFrags >= 4)) || ((bpp == 32) && (numFrags >= 2)))) + { + // When _X/_T swizzle mode was used for MSAA depth texture, TC will get zplane + // equation from wrong address within memory range a tile covered and use the + // garbage data for compressed Z reading which finally leads to corruption. + allowedSwModeSet.value &= ~Gfx9XorSwModeMask; + } + + if (m_settings.htileCacheRbConflict && + (pIn->flags.depth || pIn->flags.stencil) && + (numSlices > 1) && + (pIn->flags.metaRbUnaligned == FALSE) && + (pIn->flags.metaPipeUnaligned == FALSE)) + { + // Z_X 2D array with Rb/Pipe aligned HTile won't have metadata cache coherency + allowedSwModeSet.value &= ~Gfx9XSwModeMask; + } + } + } + + if (msaa) + { + allowedSwModeSet.value &= Gfx9MsaaSwModeMask; + } + + if ((numFrags > 1) && + (GetBlockSize(ADDR_SW_4KB) < (m_pipeInterleaveBytes * numFrags))) + { + // MSAA surface must have blk_bytes/pipe_interleave >= num_samples + allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask; + } + + if (numMipLevels > 1) + { + allowedSwModeSet.value &= ~Gfx9Blk256BSwModeMask; + } + + if (displayRsrc) + { + if (m_settings.isDce12) + { + allowedSwModeSet.value &= (bpp == 32) ? Dce12Bpp32SwModeMask : Dce12NonBpp32SwModeMask; + } + else if (m_settings.isDcn1) + { + allowedSwModeSet.value &= (bpp == 64) ? Dcn1Bpp64SwModeMask : Dcn1NonBpp64SwModeMask; + } + else + { + ADDR_NOT_IMPLEMENTED(); + } + } + + if (allowedSwModeSet.value != 0) + { +#if DEBUG + // Post sanity check, at least AddrLib should accept the output generated by its own + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {}; + localIn.flags = pIn->flags; + localIn.resourceType = pOut->resourceType; + localIn.format = pIn->format; + localIn.bpp = bpp; + localIn.width = width; + localIn.height = height; + localIn.numSlices = numSlices; + localIn.numMipLevels = numMipLevels; + localIn.numSamples = numSamples; + localIn.numFrags = numFrags; + + UINT_32 validateSwModeSet = allowedSwModeSet.value; + for (UINT_32 i = 0; validateSwModeSet != 0; i++) + { + if (validateSwModeSet & 1) + { + localIn.swizzleMode = static_cast(i); + HwlComputeSurfaceInfoSanityCheck(&localIn); + } + + validateSwModeSet >>= 1; + } +#endif + + pOut->validSwModeSet = allowedSwModeSet; + pOut->canXor = (allowedSwModeSet.value & Gfx9XorSwModeMask) ? TRUE : FALSE; + pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet); + pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet); + + pOut->clientPreferredSwSet = pIn->preferredSwSet; + + if (pOut->clientPreferredSwSet.value == 0) + { + pOut->clientPreferredSwSet.value = AddrSwSetAll; + } + + if (allowedSwModeSet.value == Gfx9LinearSwModeMask) + { + pOut->swizzleMode = ADDR_SW_LINEAR; + } + else + { + // Always ignore linear swizzle mode if there is other choice. + allowedSwModeSet.swLinear = 0; + + ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet); + + // Determine block size if there is 2 or more block type candidates + if (IsPow2(allowedBlockSet.value) == FALSE) + { + const AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {ADDR_SW_256B, ADDR_SW_4KB, ADDR_SW_64KB}; + Dim3d blkDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; + Dim3d padDim[AddrBlockMaxTiledType] = {{0}, {0}, {0}}; + UINT_64 padSize[AddrBlockMaxTiledType] = {0}; + + const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2); + const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1); + const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u); + UINT_32 minSizeBlk = AddrBlockMicro; + UINT_64 minSize = 0; + + for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++) + { + if (allowedBlockSet.value & (1 << i)) + { + ComputeBlockDimensionForSurf(&blkDim[i].w, + &blkDim[i].h, + &blkDim[i].d, + bpp, + numFrags, + pOut->resourceType, + swMode[i]); + + if (displayRsrc) + { + blkDim[i].w = PowTwoAlign(blkDim[i].w, 32); + } + + padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]); + padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement); + + if ((minSize == 0) || + ((padSize[i] * ratioHi) <= (minSize * ratioLow))) + { + minSize = padSize[i]; + minSizeBlk = i; + } + } + } + + if ((allowedBlockSet.micro == TRUE) && + (width <= blkDim[AddrBlockMicro].w) && + (height <= blkDim[AddrBlockMicro].h) && + (NextPow2(pIn->minSizeAlign) <= GetBlockSize(ADDR_SW_256B))) + { + minSizeBlk = AddrBlockMicro; + } + + if (minSizeBlk == AddrBlockMicro) + { + allowedSwModeSet.value &= Gfx9Blk256BSwModeMask; + } + else if (minSizeBlk == AddrBlock4KB) + { + allowedSwModeSet.value &= Gfx9Blk4KBSwModeMask; + } + else + { + ADDR_ASSERT(minSizeBlk == AddrBlock64KB); + allowedSwModeSet.value &= Gfx9Blk64KBSwModeMask; + } + } + + // Block type should be determined. + ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet).value)); + + ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet); + + // Determine swizzle type if there is 2 or more swizzle type candidates + if (IsPow2(allowedSwSet.value) == FALSE) + { + if (ElemLib::IsBlockCompressed(pIn->format)) + { + if (allowedSwSet.sw_D) + { + allowedSwModeSet.value &= Gfx9DisplaySwModeMask; + } + else + { + ADDR_ASSERT(allowedSwSet.sw_S); + allowedSwModeSet.value &= Gfx9StandardSwModeMask; + } + } + else if (ElemLib::IsMacroPixelPacked(pIn->format)) + { + if (allowedSwSet.sw_S) + { + allowedSwModeSet.value &= Gfx9StandardSwModeMask; + } + else if (allowedSwSet.sw_D) + { + allowedSwModeSet.value &= Gfx9DisplaySwModeMask; + } + else + { + ADDR_ASSERT(allowedSwSet.sw_R); + allowedSwModeSet.value &= Gfx9RotateSwModeMask; + } + } + else if (pOut->resourceType == ADDR_RSRC_TEX_3D) + { + if (pIn->flags.color && allowedSwSet.sw_D) + { + allowedSwModeSet.value &= Gfx9DisplaySwModeMask; + } + else if (allowedSwSet.sw_Z) + { + allowedSwModeSet.value &= Gfx9ZSwModeMask; + } + else + { + ADDR_ASSERT(allowedSwSet.sw_S); + allowedSwModeSet.value &= Gfx9StandardSwModeMask; + } + } + else + { + if (pIn->flags.rotated && allowedSwSet.sw_R) + { + allowedSwModeSet.value &= Gfx9RotateSwModeMask; + } + else if (displayRsrc && allowedSwSet.sw_D) + { + allowedSwModeSet.value &= Gfx9DisplaySwModeMask; + } + else if (allowedSwSet.sw_S) + { + allowedSwModeSet.value &= Gfx9StandardSwModeMask; + } + else + { + ADDR_ASSERT(allowedSwSet.sw_Z); + allowedSwModeSet.value &= Gfx9ZSwModeMask; + } + } + } + + // Swizzle type should be determined. + ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value)); + + // Determine swizzle mode now - always select the "largest" swizzle mode for a given block type + + // swizzle type combination. For example, for AddrBlock64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's + // available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9). + pOut->swizzleMode = static_cast(Log2NonPow2(allowedSwModeSet.value)); + } + } + else + { + // Invalid combination... + ADDR_ASSERT_ALWAYS(); + returnCode = ADDR_INVALIDPARAMS; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::ComputeStereoInfo +* +* @brief +* Compute height alignment and right eye pipeBankXor for stereo surface +* +* @return +* Error code +* +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut, + UINT_32* pHeightAlign + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + UINT_32 eqIndex = HwlGetEquationIndex(pIn, pOut); + + if (eqIndex < m_numEquations) + { + if (IsXor(pIn->swizzleMode)) + { + const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode); + const UINT_32 numPipeBits = GetPipeXorBits(blkSizeLog2); + const UINT_32 numBankBits = GetBankXorBits(blkSizeLog2); + const UINT_32 bppLog2 = Log2(pIn->bpp >> 3); + const UINT_32 maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1; + MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex]; + + ADDR_ASSERT(maxYCoordBlock256 == + GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1)); + + const UINT_32 maxYCoordInBaseEquation = + (blkSizeLog2 - GetBlockSizeLog2(ADDR_SW_256B)) / 2 + maxYCoordBlock256; + + ADDR_ASSERT(maxYCoordInBaseEquation == + GetMaxValidChannelIndex(&pEqToCheck->addr[0], blkSizeLog2, 1)); + + const UINT_32 maxYCoordInPipeXor = (numPipeBits == 0) ? 0 : maxYCoordBlock256 + numPipeBits; + + ADDR_ASSERT(maxYCoordInPipeXor == + GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2], numPipeBits, 1)); + + const UINT_32 maxYCoordInBankXor = (numBankBits == 0) ? + 0 : maxYCoordBlock256 + (numPipeBits + 1) / 2 + numBankBits; + + ADDR_ASSERT(maxYCoordInBankXor == + GetMaxValidChannelIndex(&pEqToCheck->xor1[m_pipeInterleaveLog2 + numPipeBits], numBankBits, 1)); + + const UINT_32 maxYCoordInPipeBankXor = Max(maxYCoordInPipeXor, maxYCoordInBankXor); + + if (maxYCoordInPipeBankXor > maxYCoordInBaseEquation) + { + *pHeightAlign = 1u << maxYCoordInPipeBankXor; + + if (pOut->pStereoInfo != NULL) + { + pOut->pStereoInfo->rightSwizzle = 0; + + if ((PowTwoAlign(pIn->height, *pHeightAlign) % (*pHeightAlign * 2)) != 0) + { + if (maxYCoordInPipeXor == maxYCoordInPipeBankXor) + { + pOut->pStereoInfo->rightSwizzle |= (1u << 1); + } + + if (maxYCoordInBankXor == maxYCoordInPipeBankXor) + { + pOut->pStereoInfo->rightSwizzle |= + 1u << ((numPipeBits % 2) ? numPipeBits : numPipeBits + 1); + } + + ADDR_ASSERT(pOut->pStereoInfo->rightSwizzle == + GetCoordActiveMask(&pEqToCheck->xor1[m_pipeInterleaveLog2], + numPipeBits + numBankBits, 1, maxYCoordInPipeBankXor)); + } + } + } + } + } + else + { + ADDR_ASSERT_ALWAYS(); + returnCode = ADDR_ERROR; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeSurfaceInfoTiled +* +* @brief +* Internal function to calculate alignment for tiled surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoTiled( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth, + &pOut->blockHeight, + &pOut->blockSlices, + pIn->bpp, + pIn->numFrags, + pIn->resourceType, + pIn->swizzleMode); + + if (returnCode == ADDR_OK) + { + UINT_32 pitchAlignInElement = pOut->blockWidth; + + if ((IsTex2d(pIn->resourceType) == TRUE) && + (pIn->flags.display || pIn->flags.rotated) && + (pIn->numMipLevels <= 1) && + (pIn->numSamples <= 1) && + (pIn->numFrags <= 1)) + { + // Display engine needs pitch align to be at least 32 pixels. + pitchAlignInElement = PowTwoAlign(pitchAlignInElement, 32); + } + + pOut->pitch = PowTwoAlign(pIn->width, pitchAlignInElement); + + if ((pIn->numMipLevels <= 1) && (pIn->pitchInElement > 0)) + { + if ((pIn->pitchInElement % pitchAlignInElement) != 0) + { + returnCode = ADDR_INVALIDPARAMS; + } + else if (pIn->pitchInElement < pOut->pitch) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + pOut->pitch = pIn->pitchInElement; + } + } + + UINT_32 heightAlign = 0; + + if (pIn->flags.qbStereo) + { + returnCode = ComputeStereoInfo(pIn, pOut, &heightAlign); + } + + if (returnCode == ADDR_OK) + { + pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight); + + if (heightAlign > 1) + { + pOut->height = PowTwoAlign(pOut->height, heightAlign); + } + + pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices); + + pOut->epitchIsHeight = FALSE; + pOut->mipChainInTail = FALSE; + pOut->firstMipIdInTail = pIn->numMipLevels; + + pOut->mipChainPitch = pOut->pitch; + pOut->mipChainHeight = pOut->height; + pOut->mipChainSlice = pOut->numSlices; + + if (pIn->numMipLevels > 1) + { + pOut->firstMipIdInTail = GetMipChainInfo(pIn->resourceType, + pIn->swizzleMode, + pIn->bpp, + pIn->width, + pIn->height, + pIn->numSlices, + pOut->blockWidth, + pOut->blockHeight, + pOut->blockSlices, + pIn->numMipLevels, + pOut->pMipInfo); + + const UINT_32 endingMipId = Min(pOut->firstMipIdInTail, pIn->numMipLevels - 1); + + if (endingMipId == 0) + { + const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType, + pIn->swizzleMode, + pOut->blockWidth, + pOut->blockHeight, + pOut->blockSlices); + + pOut->epitchIsHeight = TRUE; + pOut->pitch = tailMaxDim.w; + pOut->height = tailMaxDim.h; + pOut->numSlices = IsThick(pIn->resourceType, pIn->swizzleMode) ? + tailMaxDim.d : pIn->numSlices; + pOut->mipChainInTail = TRUE; + } + else + { + UINT_32 mip0WidthInBlk = pOut->pitch / pOut->blockWidth; + UINT_32 mip0HeightInBlk = pOut->height / pOut->blockHeight; + + AddrMajorMode majorMode = GetMajorMode(pIn->resourceType, + pIn->swizzleMode, + mip0WidthInBlk, + mip0HeightInBlk, + pOut->numSlices / pOut->blockSlices); + if (majorMode == ADDR_MAJOR_Y) + { + UINT_32 mip1WidthInBlk = RoundHalf(mip0WidthInBlk); + + if ((mip1WidthInBlk == 1) && (endingMipId > 2)) + { + mip1WidthInBlk++; + } + + pOut->mipChainPitch += (mip1WidthInBlk * pOut->blockWidth); + + pOut->epitchIsHeight = FALSE; + } + else + { + UINT_32 mip1HeightInBlk = RoundHalf(mip0HeightInBlk); + + if ((mip1HeightInBlk == 1) && (endingMipId > 2)) + { + mip1HeightInBlk++; + } + + pOut->mipChainHeight += (mip1HeightInBlk * pOut->blockHeight); + + pOut->epitchIsHeight = TRUE; + } + } + + if (pOut->pMipInfo != NULL) + { + UINT_32 elementBytesLog2 = Log2(pIn->bpp >> 3); + + for (UINT_32 i = 0; i < pIn->numMipLevels; i++) + { + Dim3d mipStartPos = {0}; + UINT_32 mipTailOffsetInBytes = 0; + + mipStartPos = GetMipStartPos(pIn->resourceType, + pIn->swizzleMode, + pOut->pitch, + pOut->height, + pOut->numSlices, + pOut->blockWidth, + pOut->blockHeight, + pOut->blockSlices, + i, + elementBytesLog2, + &mipTailOffsetInBytes); + + UINT_32 pitchInBlock = + pOut->mipChainPitch / pOut->blockWidth; + UINT_32 sliceInBlock = + (pOut->mipChainHeight / pOut->blockHeight) * pitchInBlock; + UINT_64 blockIndex = + mipStartPos.d * sliceInBlock + mipStartPos.h * pitchInBlock + mipStartPos.w; + UINT_64 macroBlockOffset = + blockIndex << GetBlockSizeLog2(pIn->swizzleMode); + + pOut->pMipInfo[i].macroBlockOffset = macroBlockOffset; + pOut->pMipInfo[i].mipTailOffset = mipTailOffsetInBytes; + } + } + } + else if (pOut->pMipInfo != NULL) + { + pOut->pMipInfo[0].pitch = pOut->pitch; + pOut->pMipInfo[0].height = pOut->height; + pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType)? pOut->numSlices : 1; + pOut->pMipInfo[0].offset = 0; + } + + pOut->sliceSize = static_cast(pOut->mipChainPitch) * pOut->mipChainHeight * + (pIn->bpp >> 3) * pIn->numFrags; + pOut->surfSize = pOut->sliceSize * pOut->mipChainSlice; + pOut->baseAlign = ComputeSurfaceBaseAlignTiled(pIn->swizzleMode); + + if ((IsBlock256b(pIn->swizzleMode) == FALSE) && + (pIn->flags.color || pIn->flags.depth || pIn->flags.stencil || pIn->flags.fmask) && + (pIn->flags.texture == TRUE) && + (pIn->flags.noMetadata == FALSE) && + (pIn->flags.metaPipeUnaligned == FALSE)) + { + // Assume client requires pipe aligned metadata, which is TcCompatible and will be accessed by TC... + // Then we need extra padding for base surface. Otherwise, metadata and data surface for same pixel will + // be flushed to different pipes, but texture engine only uses pipe id of data surface to fetch both of + // them, which may cause invalid metadata to be fetched. + pOut->baseAlign = Max(pOut->baseAlign, m_pipeInterleaveBytes * m_pipes); + } + + if (pIn->flags.prt) + { + pOut->baseAlign = Max(pOut->baseAlign, PrtAlignment); + } + } + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeSurfaceInfoLinear +* +* @brief +* Internal function to calculate alignment for linear surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceInfoLinear( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + UINT_32 pitch = 0; + UINT_32 actualHeight = 0; + UINT_32 elementBytes = pIn->bpp >> 3; + const UINT_32 alignment = pIn->flags.prt ? PrtAlignment : 256; + + if (IsTex1d(pIn->resourceType)) + { + if (pIn->height > 1) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + const UINT_32 pitchAlignInElement = alignment / elementBytes; + + pitch = PowTwoAlign(pIn->width, pitchAlignInElement); + actualHeight = pIn->numMipLevels; + + if (pIn->flags.prt == FALSE) + { + returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, + &pitch, &actualHeight); + } + + if (returnCode == ADDR_OK) + { + if (pOut->pMipInfo != NULL) + { + for (UINT_32 i = 0; i < pIn->numMipLevels; i++) + { + pOut->pMipInfo[i].offset = pitch * elementBytes * i; + pOut->pMipInfo[i].pitch = pitch; + pOut->pMipInfo[i].height = 1; + pOut->pMipInfo[i].depth = 1; + } + } + } + } + } + else + { + returnCode = ComputeSurfaceLinearPadding(pIn, &pitch, &actualHeight, pOut->pMipInfo); + } + + if ((pitch == 0) || (actualHeight == 0)) + { + returnCode = ADDR_INVALIDPARAMS; + } + + if (returnCode == ADDR_OK) + { + pOut->pitch = pitch; + pOut->height = pIn->height; + pOut->numSlices = pIn->numSlices; + pOut->mipChainPitch = pitch; + pOut->mipChainHeight = actualHeight; + pOut->mipChainSlice = pOut->numSlices; + pOut->epitchIsHeight = (pIn->numMipLevels > 1) ? TRUE : FALSE; + pOut->sliceSize = static_cast(pOut->pitch) * actualHeight * elementBytes; + pOut->surfSize = pOut->sliceSize * pOut->numSlices; + pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? (pIn->bpp / 8) : alignment; + pOut->blockWidth = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes); + pOut->blockHeight = 1; + pOut->blockSlices = 1; + } + + // Post calculation validate + ADDR_ASSERT(pOut->sliceSize > 0); + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::GetMipChainInfo +* +* @brief +* Internal function to get out information about mip chain +* +* @return +* Smaller value between Id of first mip fitted in mip tail and max Id of mip being created +************************************************************************************************************************ +*/ +UINT_32 Gfx9Lib::GetMipChainInfo( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode, + UINT_32 bpp, + UINT_32 mip0Width, + UINT_32 mip0Height, + UINT_32 mip0Depth, + UINT_32 blockWidth, + UINT_32 blockHeight, + UINT_32 blockDepth, + UINT_32 numMipLevel, + ADDR2_MIP_INFO* pMipInfo) const +{ + const Dim3d tailMaxDim = + GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth); + + UINT_32 mipPitch = mip0Width; + UINT_32 mipHeight = mip0Height; + UINT_32 mipDepth = IsTex3d(resourceType) ? mip0Depth : 1; + UINT_32 offset = 0; + UINT_32 firstMipIdInTail = numMipLevel; + BOOL_32 inTail = FALSE; + BOOL_32 finalDim = FALSE; + BOOL_32 is3dThick = IsThick(resourceType, swizzleMode); + BOOL_32 is3dThin = IsTex3d(resourceType) && (is3dThick == FALSE); + + for (UINT_32 mipId = 0; mipId < numMipLevel; mipId++) + { + if (inTail) + { + if (finalDim == FALSE) + { + UINT_32 mipSize; + + if (is3dThick) + { + mipSize = mipPitch * mipHeight * mipDepth * (bpp >> 3); + } + else + { + mipSize = mipPitch * mipHeight * (bpp >> 3); + } + + if (mipSize <= 256) + { + UINT_32 index = Log2(bpp >> 3); + + if (is3dThick) + { + mipPitch = Block256_3dZ[index].w; + mipHeight = Block256_3dZ[index].h; + mipDepth = Block256_3dZ[index].d; + } + else + { + mipPitch = Block256_2d[index].w; + mipHeight = Block256_2d[index].h; + } + + finalDim = TRUE; + } + } + } + else + { + inTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, + mipPitch, mipHeight, mipDepth); + + if (inTail) + { + firstMipIdInTail = mipId; + mipPitch = tailMaxDim.w; + mipHeight = tailMaxDim.h; + + if (is3dThick) + { + mipDepth = tailMaxDim.d; + } + } + else + { + mipPitch = PowTwoAlign(mipPitch, blockWidth); + mipHeight = PowTwoAlign(mipHeight, blockHeight); + + if (is3dThick) + { + mipDepth = PowTwoAlign(mipDepth, blockDepth); + } + } + } + + if (pMipInfo != NULL) + { + pMipInfo[mipId].pitch = mipPitch; + pMipInfo[mipId].height = mipHeight; + pMipInfo[mipId].depth = mipDepth; + pMipInfo[mipId].offset = offset; + } + + offset += (mipPitch * mipHeight * mipDepth * (bpp >> 3)); + + if (finalDim) + { + if (is3dThin) + { + mipDepth = Max(mipDepth >> 1, 1u); + } + } + else + { + mipPitch = Max(mipPitch >> 1, 1u); + mipHeight = Max(mipHeight >> 1, 1u); + + if (is3dThick || is3dThin) + { + mipDepth = Max(mipDepth >> 1, 1u); + } + } + } + + return firstMipIdInTail; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::GetMetaMiptailInfo +* +* @brief +* Get mip tail coordinate information. +* +* @return +* N/A +************************************************************************************************************************ +*/ +VOID Gfx9Lib::GetMetaMiptailInfo( + ADDR2_META_MIP_INFO* pInfo, ///< [out] output structure to store per mip coord + Dim3d mipCoord, ///< [in] mip tail base coord + UINT_32 numMipInTail, ///< [in] number of mips in tail + Dim3d* pMetaBlkDim ///< [in] meta block width/height/depth + ) const +{ + BOOL_32 isThick = (pMetaBlkDim->d > 1); + UINT_32 mipWidth = pMetaBlkDim->w; + UINT_32 mipHeight = pMetaBlkDim->h >> 1; + UINT_32 mipDepth = pMetaBlkDim->d; + UINT_32 minInc; + + if (isThick) + { + minInc = (pMetaBlkDim->h >= 512) ? 128 : ((pMetaBlkDim->h == 256) ? 64 : 32); + } + else if (pMetaBlkDim->h >= 1024) + { + minInc = 256; + } + else if (pMetaBlkDim->h == 512) + { + minInc = 128; + } + else + { + minInc = 64; + } + + UINT_32 blk32MipId = 0xFFFFFFFF; + + for (UINT_32 mip = 0; mip < numMipInTail; mip++) + { + pInfo[mip].inMiptail = TRUE; + pInfo[mip].startX = mipCoord.w; + pInfo[mip].startY = mipCoord.h; + pInfo[mip].startZ = mipCoord.d; + pInfo[mip].width = mipWidth; + pInfo[mip].height = mipHeight; + pInfo[mip].depth = mipDepth; + + if (mipWidth <= 32) + { + if (blk32MipId == 0xFFFFFFFF) + { + blk32MipId = mip; + } + + mipCoord.w = pInfo[blk32MipId].startX; + mipCoord.h = pInfo[blk32MipId].startY; + mipCoord.d = pInfo[blk32MipId].startZ; + + switch (mip - blk32MipId) + { + case 0: + mipCoord.w += 32; // 16x16 + break; + case 1: + mipCoord.h += 32; // 8x8 + break; + case 2: + mipCoord.h += 32; // 4x4 + mipCoord.w += 16; + break; + case 3: + mipCoord.h += 32; // 2x2 + mipCoord.w += 32; + break; + case 4: + mipCoord.h += 32; // 1x1 + mipCoord.w += 48; + break; + // The following are for BC/ASTC formats + case 5: + mipCoord.h += 48; // 1/2 x 1/2 + break; + case 6: + mipCoord.h += 48; // 1/4 x 1/4 + mipCoord.w += 16; + break; + case 7: + mipCoord.h += 48; // 1/8 x 1/8 + mipCoord.w += 32; + break; + case 8: + mipCoord.h += 48; // 1/16 x 1/16 + mipCoord.w += 48; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + mipWidth = ((mip - blk32MipId) == 0) ? 16 : 8; + mipHeight = mipWidth; + + if (isThick) + { + mipDepth = mipWidth; + } + } + else + { + if (mipWidth <= minInc) + { + // if we're below the minimal increment... + if (isThick) + { + // For 3d, just go in z direction + mipCoord.d += mipDepth; + } + else + { + // For 2d, first go across, then down + if ((mipWidth * 2) == minInc) + { + // if we're 2 mips below, that's when we go back in x, and down in y + mipCoord.w -= minInc; + mipCoord.h += minInc; + } + else + { + // otherwise, just go across in x + mipCoord.w += minInc; + } + } + } + else + { + // On even mip, go down, otherwise, go across + if (mip & 1) + { + mipCoord.w += mipWidth; + } + else + { + mipCoord.h += mipHeight; + } + } + // Divide the width by 2 + mipWidth >>= 1; + // After the first mip in tail, the mip is always a square + mipHeight = mipWidth; + // ...or for 3d, a cube + if (isThick) + { + mipDepth = mipWidth; + } + } + } +} + +/** +************************************************************************************************************************ +* Gfx9Lib::GetMipStartPos +* +* @brief +* Internal function to get out information about mip logical start position +* +* @return +* logical start position in macro block width/heith/depth of one mip level within one slice +************************************************************************************************************************ +*/ +Dim3d Gfx9Lib::GetMipStartPos( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode, + UINT_32 width, + UINT_32 height, + UINT_32 depth, + UINT_32 blockWidth, + UINT_32 blockHeight, + UINT_32 blockDepth, + UINT_32 mipId, + UINT_32 log2ElementBytes, + UINT_32* pMipTailBytesOffset) const +{ + Dim3d mipStartPos = {0}; + const Dim3d tailMaxDim = GetMipTailDim(resourceType, swizzleMode, blockWidth, blockHeight, blockDepth); + + // Report mip in tail if Mip0 is already in mip tail + BOOL_32 inMipTail = IsInMipTail(resourceType, swizzleMode, tailMaxDim, width, height, depth); + UINT_32 log2blkSize = GetBlockSizeLog2(swizzleMode); + UINT_32 mipIndexInTail = mipId; + + if (inMipTail == FALSE) + { + // Mip 0 dimension, unit in block + UINT_32 mipWidthInBlk = width / blockWidth; + UINT_32 mipHeightInBlk = height / blockHeight; + UINT_32 mipDepthInBlk = depth / blockDepth; + AddrMajorMode majorMode = GetMajorMode(resourceType, + swizzleMode, + mipWidthInBlk, + mipHeightInBlk, + mipDepthInBlk); + + UINT_32 endingMip = mipId + 1; + + for (UINT_32 i = 1; i <= mipId; i++) + { + if ((i == 1) || (i == 3)) + { + if (majorMode == ADDR_MAJOR_Y) + { + mipStartPos.w += mipWidthInBlk; + } + else + { + mipStartPos.h += mipHeightInBlk; + } + } + else + { + if (majorMode == ADDR_MAJOR_X) + { + mipStartPos.w += mipWidthInBlk; + } + else if (majorMode == ADDR_MAJOR_Y) + { + mipStartPos.h += mipHeightInBlk; + } + else + { + mipStartPos.d += mipDepthInBlk; + } + } + + BOOL_32 inTail = FALSE; + + if (IsThick(resourceType, swizzleMode)) + { + UINT_32 dim = log2blkSize % 3; + + if (dim == 0) + { + inTail = + (mipWidthInBlk <= 2) && (mipHeightInBlk == 1) && (mipDepthInBlk <= 2); + } + else if (dim == 1) + { + inTail = + (mipWidthInBlk == 1) && (mipHeightInBlk <= 2) && (mipDepthInBlk <= 2); + } + else + { + inTail = + (mipWidthInBlk <= 2) && (mipHeightInBlk <= 2) && (mipDepthInBlk == 1); + } + } + else + { + if (log2blkSize & 1) + { + inTail = (mipWidthInBlk <= 2) && (mipHeightInBlk == 1); + } + else + { + inTail = (mipWidthInBlk == 1) && (mipHeightInBlk <= 2); + } + } + + if (inTail) + { + endingMip = i; + break; + } + + mipWidthInBlk = RoundHalf(mipWidthInBlk); + mipHeightInBlk = RoundHalf(mipHeightInBlk); + mipDepthInBlk = RoundHalf(mipDepthInBlk); + } + + if (mipId >= endingMip) + { + inMipTail = TRUE; + mipIndexInTail = mipId - endingMip; + } + } + + if (inMipTail) + { + UINT_32 index = mipIndexInTail + MaxMacroBits - log2blkSize; + ADDR_ASSERT(index < sizeof(MipTailOffset256B) / sizeof(UINT_32)); + *pMipTailBytesOffset = MipTailOffset256B[index] << 8; + } + + return mipStartPos; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled +* +* @brief +* Internal function to calculate address from coord for tiled swizzle surface +* +* @return +* ADDR_E_RETURNCODE +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::HwlComputeSurfaceAddrFromCoordTiled( + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {0}; + localIn.swizzleMode = pIn->swizzleMode; + localIn.flags = pIn->flags; + localIn.resourceType = pIn->resourceType; + localIn.bpp = pIn->bpp; + localIn.width = Max(pIn->unalignedWidth, 1u); + localIn.height = Max(pIn->unalignedHeight, 1u); + localIn.numSlices = Max(pIn->numSlices, 1u); + localIn.numMipLevels = Max(pIn->numMipLevels, 1u); + localIn.numSamples = Max(pIn->numSamples, 1u); + localIn.numFrags = Max(pIn->numFrags, 1u); + if (localIn.numMipLevels <= 1) + { + localIn.pitchInElement = pIn->pitchInElement; + } + + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {0}; + ADDR_E_RETURNCODE returnCode = ComputeSurfaceInfoTiled(&localIn, &localOut); + + BOOL_32 valid = (returnCode == ADDR_OK) && + (IsThin(pIn->resourceType, pIn->swizzleMode) || + IsThick(pIn->resourceType, pIn->swizzleMode)) && + ((pIn->pipeBankXor == 0) || (IsXor(pIn->swizzleMode))); + + if (valid) + { + UINT_32 log2ElementBytes = Log2(pIn->bpp >> 3); + Dim3d mipStartPos = {0}; + UINT_32 mipTailBytesOffset = 0; + + if (pIn->numMipLevels > 1) + { + // Mip-map chain cannot be MSAA surface + ADDR_ASSERT((pIn->numSamples <= 1) && (pIn->numFrags<= 1)); + + mipStartPos = GetMipStartPos(pIn->resourceType, + pIn->swizzleMode, + localOut.pitch, + localOut.height, + localOut.numSlices, + localOut.blockWidth, + localOut.blockHeight, + localOut.blockSlices, + pIn->mipId, + log2ElementBytes, + &mipTailBytesOffset); + } + + UINT_32 interleaveOffset = 0; + UINT_32 pipeBits = 0; + UINT_32 pipeXor = 0; + UINT_32 bankBits = 0; + UINT_32 bankXor = 0; + + if (IsThin(pIn->resourceType, pIn->swizzleMode)) + { + UINT_32 blockOffset = 0; + UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode); + + if (IsZOrderSwizzle(pIn->swizzleMode)) + { + // Morton generation + if ((log2ElementBytes == 0) || (log2ElementBytes == 2)) + { + UINT_32 totalLowBits = 6 - log2ElementBytes; + UINT_32 mortBits = totalLowBits / 2; + UINT_32 lowBitsValue = MortonGen2d(pIn->y, pIn->x, mortBits); + // Are 9 bits enough? + UINT_32 highBitsValue = + MortonGen2d(pIn->x >> mortBits, pIn->y >> mortBits, 9) << totalLowBits; + blockOffset = lowBitsValue | highBitsValue; + ADDR_ASSERT(blockOffset == lowBitsValue + highBitsValue); + } + else + { + blockOffset = MortonGen2d(pIn->y, pIn->x, 13); + } + + // Fill LSBs with sample bits + if (pIn->numSamples > 1) + { + blockOffset *= pIn->numSamples; + blockOffset |= pIn->sample; + } + + // Shift according to BytesPP + blockOffset <<= log2ElementBytes; + } + else + { + // Micro block offset + UINT_32 microBlockOffset = ComputeSurface2DMicroBlockOffset(pIn); + blockOffset = microBlockOffset; + + // Micro block dimension + ADDR_ASSERT(log2ElementBytes < MaxNumOfBpp); + Dim2d microBlockDim = Block256_2d[log2ElementBytes]; + // Morton generation, does 12 bit enough? + blockOffset |= + MortonGen2d((pIn->x / microBlockDim.w), (pIn->y / microBlockDim.h), 12) << 8; + + // Sample bits start location + UINT_32 sampleStart = log2blkSize - Log2(pIn->numSamples); + // Join sample bits information to the highest Macro block bits + if (IsNonPrtXor(pIn->swizzleMode)) + { + // Non-prt-Xor : xor highest Macro block bits with sample bits + blockOffset = blockOffset ^ (pIn->sample << sampleStart); + } + else + { + // Non-Xor or prt-Xor: replace highest Macro block bits with sample bits + // after this op, the blockOffset only contains log2 Macro block size bits + blockOffset %= (1 << sampleStart); + blockOffset |= (pIn->sample << sampleStart); + ADDR_ASSERT((blockOffset >> log2blkSize) == 0); + } + } + + if (IsXor(pIn->swizzleMode)) + { + // Mask off bits above Macro block bits to keep page synonyms working for prt + if (IsPrt(pIn->swizzleMode)) + { + blockOffset &= ((1 << log2blkSize) - 1); + } + + // Preserve offset inside pipe interleave + interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1); + blockOffset >>= m_pipeInterleaveLog2; + + // Pipe/Se xor bits + pipeBits = GetPipeXorBits(log2blkSize); + // Pipe xor + pipeXor = FoldXor2d(blockOffset, pipeBits); + blockOffset >>= pipeBits; + + // Bank xor bits + bankBits = GetBankXorBits(log2blkSize); + // Bank Xor + bankXor = FoldXor2d(blockOffset, bankBits); + blockOffset >>= bankBits; + + // Put all the part back together + blockOffset <<= bankBits; + blockOffset |= bankXor; + blockOffset <<= pipeBits; + blockOffset |= pipeXor; + blockOffset <<= m_pipeInterleaveLog2; + blockOffset |= interleaveOffset; + } + + ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset)); + ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize))); + + blockOffset |= mipTailBytesOffset; + + if (IsNonPrtXor(pIn->swizzleMode) && (pIn->numSamples <= 1)) + { + // Apply slice xor if not MSAA/PRT + blockOffset ^= (ReverseBitVector(pIn->slice, pipeBits) << m_pipeInterleaveLog2); + blockOffset ^= (ReverseBitVector(pIn->slice >> pipeBits, bankBits) << + (m_pipeInterleaveLog2 + pipeBits)); + } + + returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor, + bankBits, pipeBits, &blockOffset); + + blockOffset %= (1 << log2blkSize); + + UINT_32 pitchInMacroBlock = localOut.mipChainPitch / localOut.blockWidth; + UINT_32 paddedHeightInMacroBlock = localOut.mipChainHeight / localOut.blockHeight; + UINT_32 sliceSizeInMacroBlock = pitchInMacroBlock * paddedHeightInMacroBlock; + UINT_64 macroBlockIndex = + (pIn->slice + mipStartPos.d) * sliceSizeInMacroBlock + + ((pIn->y / localOut.blockHeight) + mipStartPos.h) * pitchInMacroBlock + + ((pIn->x / localOut.blockWidth) + mipStartPos.w); + + pOut->addr = blockOffset | (macroBlockIndex << log2blkSize); + } + else + { + UINT_32 log2blkSize = GetBlockSizeLog2(pIn->swizzleMode); + + Dim3d microBlockDim = Block1K_3d[log2ElementBytes]; + + UINT_32 blockOffset = MortonGen3d((pIn->x / microBlockDim.w), + (pIn->y / microBlockDim.h), + (pIn->slice / microBlockDim.d), + 8); + + blockOffset <<= 10; + blockOffset |= ComputeSurface3DMicroBlockOffset(pIn); + + if (IsXor(pIn->swizzleMode)) + { + // Mask off bits above Macro block bits to keep page synonyms working for prt + if (IsPrt(pIn->swizzleMode)) + { + blockOffset &= ((1 << log2blkSize) - 1); + } + + // Preserve offset inside pipe interleave + interleaveOffset = blockOffset & ((1 << m_pipeInterleaveLog2) - 1); + blockOffset >>= m_pipeInterleaveLog2; + + // Pipe/Se xor bits + pipeBits = GetPipeXorBits(log2blkSize); + // Pipe xor + pipeXor = FoldXor3d(blockOffset, pipeBits); + blockOffset >>= pipeBits; + + // Bank xor bits + bankBits = GetBankXorBits(log2blkSize); + // Bank Xor + bankXor = FoldXor3d(blockOffset, bankBits); + blockOffset >>= bankBits; + + // Put all the part back together + blockOffset <<= bankBits; + blockOffset |= bankXor; + blockOffset <<= pipeBits; + blockOffset |= pipeXor; + blockOffset <<= m_pipeInterleaveLog2; + blockOffset |= interleaveOffset; + } + + ADDR_ASSERT((blockOffset | mipTailBytesOffset) == (blockOffset + mipTailBytesOffset)); + ADDR_ASSERT((mipTailBytesOffset == 0u) || (blockOffset < (1u << log2blkSize))); + blockOffset |= mipTailBytesOffset; + + returnCode = ApplyCustomerPipeBankXor(pIn->swizzleMode, pIn->pipeBankXor, + bankBits, pipeBits, &blockOffset); + + blockOffset %= (1 << log2blkSize); + + UINT_32 xb = pIn->x / localOut.blockWidth + mipStartPos.w; + UINT_32 yb = pIn->y / localOut.blockHeight + mipStartPos.h; + UINT_32 zb = pIn->slice / localOut.blockSlices + + mipStartPos.d; + + UINT_32 pitchInBlock = localOut.mipChainPitch / localOut.blockWidth; + UINT_32 sliceSizeInBlock = + (localOut.mipChainHeight / localOut.blockHeight) * pitchInBlock; + UINT_64 blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb; + + pOut->addr = blockOffset | (blockIndex << log2blkSize); + } + } + else + { + returnCode = ADDR_INVALIDPARAMS; + } + + return returnCode; +} + +/** +************************************************************************************************************************ +* Gfx9Lib::ComputeSurfaceInfoLinear +* +* @brief +* Internal function to calculate padding for linear swizzle 2D/3D surface +* +* @return +* N/A +************************************************************************************************************************ +*/ +ADDR_E_RETURNCODE Gfx9Lib::ComputeSurfaceLinearPadding( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input srtucture + UINT_32* pMipmap0PaddedWidth, ///< [out] padded width in element + UINT_32* pSlice0PaddedHeight, ///< [out] padded height for HW + ADDR2_MIP_INFO* pMipInfo ///< [out] per mip information + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + UINT_32 elementBytes = pIn->bpp >> 3; + UINT_32 pitchAlignInElement = 0; + + if (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) + { + ADDR_ASSERT(pIn->numMipLevels <= 1); + ADDR_ASSERT(pIn->numSlices <= 1); + pitchAlignInElement = 1; + } + else + { + pitchAlignInElement = (256 / elementBytes); + } + + UINT_32 mipChainWidth = PowTwoAlign(pIn->width, pitchAlignInElement); + UINT_32 slice0PaddedHeight = pIn->height; + + returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlignInElement, + &mipChainWidth, &slice0PaddedHeight); + + if (returnCode == ADDR_OK) + { + UINT_32 mipChainHeight = 0; + UINT_32 mipHeight = pIn->height; + UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1; + + for (UINT_32 i = 0; i < pIn->numMipLevels; i++) + { + if (pMipInfo != NULL) + { + pMipInfo[i].offset = mipChainWidth * mipChainHeight * elementBytes; + pMipInfo[i].pitch = mipChainWidth; + pMipInfo[i].height = mipHeight; + pMipInfo[i].depth = mipDepth; + } + + mipChainHeight += mipHeight; + mipHeight = RoundHalf(mipHeight); + mipHeight = Max(mipHeight, 1u); + } + + *pMipmap0PaddedWidth = mipChainWidth; + *pSlice0PaddedHeight = (pIn->numMipLevels > 1) ? mipChainHeight : slice0PaddedHeight; + } + + return returnCode; +} + +} // V2 +} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/src/gfx9/gfx9addrlib.h mesa-19.0.1/src/amd/addrlib/src/gfx9/gfx9addrlib.h --- mesa-18.3.3/src/amd/addrlib/src/gfx9/gfx9addrlib.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/gfx9/gfx9addrlib.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,629 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +************************************************************************************************************************ +* @file gfx9addrlib.h +* @brief Contgfx9ns the Gfx9Lib class definition. +************************************************************************************************************************ +*/ + +#ifndef __GFX9_ADDR_LIB_H__ +#define __GFX9_ADDR_LIB_H__ + +#include "addrlib2.h" +#include "coord.h" + +namespace Addr +{ +namespace V2 +{ + +/** +************************************************************************************************************************ +* @brief GFX9 specific settings structure. +************************************************************************************************************************ +*/ +struct Gfx9ChipSettings +{ + struct + { + // Asic/Generation name + UINT_32 isArcticIsland : 1; + UINT_32 isVega10 : 1; + UINT_32 isRaven : 1; + UINT_32 isVega12 : 1; + UINT_32 isVega20 : 1; + UINT_32 reserved0 : 27; + + // Display engine IP version name + UINT_32 isDce12 : 1; + UINT_32 isDcn1 : 1; + + // Misc configuration bits + UINT_32 metaBaseAlignFix : 1; + UINT_32 depthPipeXorDisable : 1; + UINT_32 htileAlignFix : 1; + UINT_32 applyAliasFix : 1; + UINT_32 htileCacheRbConflict: 1; + UINT_32 reserved2 : 27; + }; +}; + +/** +************************************************************************************************************************ +* @brief GFX9 data surface type. +************************************************************************************************************************ +*/ +enum Gfx9DataType +{ + Gfx9DataColor, + Gfx9DataDepthStencil, + Gfx9DataFmask +}; + +const UINT_32 Gfx9LinearSwModeMask = (1u << ADDR_SW_LINEAR); + +const UINT_32 Gfx9Blk256BSwModeMask = (1u << ADDR_SW_256B_S) | + (1u << ADDR_SW_256B_D) | + (1u << ADDR_SW_256B_R); + +const UINT_32 Gfx9Blk4KBSwModeMask = (1u << ADDR_SW_4KB_Z) | + (1u << ADDR_SW_4KB_S) | + (1u << ADDR_SW_4KB_D) | + (1u << ADDR_SW_4KB_R) | + (1u << ADDR_SW_4KB_Z_X) | + (1u << ADDR_SW_4KB_S_X) | + (1u << ADDR_SW_4KB_D_X) | + (1u << ADDR_SW_4KB_R_X); + +const UINT_32 Gfx9Blk64KBSwModeMask = (1u << ADDR_SW_64KB_Z) | + (1u << ADDR_SW_64KB_S) | + (1u << ADDR_SW_64KB_D) | + (1u << ADDR_SW_64KB_R) | + (1u << ADDR_SW_64KB_Z_T) | + (1u << ADDR_SW_64KB_S_T) | + (1u << ADDR_SW_64KB_D_T) | + (1u << ADDR_SW_64KB_R_T) | + (1u << ADDR_SW_64KB_Z_X) | + (1u << ADDR_SW_64KB_S_X) | + (1u << ADDR_SW_64KB_D_X) | + (1u << ADDR_SW_64KB_R_X); + +const UINT_32 Gfx9BlkVarSwModeMask = (1u << ADDR_SW_VAR_Z) | + (1u << ADDR_SW_VAR_S) | + (1u << ADDR_SW_VAR_D) | + (1u << ADDR_SW_VAR_R) | + (1u << ADDR_SW_VAR_Z_X) | + (1u << ADDR_SW_VAR_S_X) | + (1u << ADDR_SW_VAR_D_X) | + (1u << ADDR_SW_VAR_R_X); + +const UINT_32 Gfx9ZSwModeMask = (1u << ADDR_SW_4KB_Z) | + (1u << ADDR_SW_64KB_Z) | + (1u << ADDR_SW_VAR_Z) | + (1u << ADDR_SW_64KB_Z_T) | + (1u << ADDR_SW_4KB_Z_X) | + (1u << ADDR_SW_64KB_Z_X) | + (1u << ADDR_SW_VAR_Z_X); + +const UINT_32 Gfx9StandardSwModeMask = (1u << ADDR_SW_256B_S) | + (1u << ADDR_SW_4KB_S) | + (1u << ADDR_SW_64KB_S) | + (1u << ADDR_SW_VAR_S) | + (1u << ADDR_SW_64KB_S_T) | + (1u << ADDR_SW_4KB_S_X) | + (1u << ADDR_SW_64KB_S_X) | + (1u << ADDR_SW_VAR_S_X); + +const UINT_32 Gfx9DisplaySwModeMask = (1u << ADDR_SW_256B_D) | + (1u << ADDR_SW_4KB_D) | + (1u << ADDR_SW_64KB_D) | + (1u << ADDR_SW_VAR_D) | + (1u << ADDR_SW_64KB_D_T) | + (1u << ADDR_SW_4KB_D_X) | + (1u << ADDR_SW_64KB_D_X) | + (1u << ADDR_SW_VAR_D_X); + +const UINT_32 Gfx9RotateSwModeMask = (1u << ADDR_SW_256B_R) | + (1u << ADDR_SW_4KB_R) | + (1u << ADDR_SW_64KB_R) | + (1u << ADDR_SW_VAR_R) | + (1u << ADDR_SW_64KB_R_T) | + (1u << ADDR_SW_4KB_R_X) | + (1u << ADDR_SW_64KB_R_X) | + (1u << ADDR_SW_VAR_R_X); + +const UINT_32 Gfx9XSwModeMask = (1u << ADDR_SW_4KB_Z_X) | + (1u << ADDR_SW_4KB_S_X) | + (1u << ADDR_SW_4KB_D_X) | + (1u << ADDR_SW_4KB_R_X) | + (1u << ADDR_SW_64KB_Z_X) | + (1u << ADDR_SW_64KB_S_X) | + (1u << ADDR_SW_64KB_D_X) | + (1u << ADDR_SW_64KB_R_X) | + (1u << ADDR_SW_VAR_Z_X) | + (1u << ADDR_SW_VAR_S_X) | + (1u << ADDR_SW_VAR_D_X) | + (1u << ADDR_SW_VAR_R_X); + +const UINT_32 Gfx9TSwModeMask = (1u << ADDR_SW_64KB_Z_T) | + (1u << ADDR_SW_64KB_S_T) | + (1u << ADDR_SW_64KB_D_T) | + (1u << ADDR_SW_64KB_R_T); + +const UINT_32 Gfx9XorSwModeMask = Gfx9XSwModeMask | + Gfx9TSwModeMask; + +const UINT_32 Gfx9AllSwModeMask = Gfx9LinearSwModeMask | + Gfx9ZSwModeMask | + Gfx9StandardSwModeMask | + Gfx9DisplaySwModeMask | + Gfx9RotateSwModeMask; + +const UINT_32 Gfx9Rsrc1dSwModeMask = Gfx9LinearSwModeMask; + +const UINT_32 Gfx9Rsrc2dSwModeMask = Gfx9AllSwModeMask; + +const UINT_32 Gfx9Rsrc3dSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9RotateSwModeMask; + +const UINT_32 Gfx9Rsrc2dPrtSwModeMask = (Gfx9Blk4KBSwModeMask | Gfx9Blk64KBSwModeMask) & ~Gfx9XSwModeMask; + +const UINT_32 Gfx9Rsrc3dPrtSwModeMask = Gfx9Rsrc2dPrtSwModeMask & ~Gfx9RotateSwModeMask & ~Gfx9DisplaySwModeMask; + +const UINT_32 Gfx9Rsrc3dThinSwModeMask = Gfx9DisplaySwModeMask & ~Gfx9Blk256BSwModeMask; + +const UINT_32 Gfx9MsaaSwModeMask = Gfx9AllSwModeMask & ~Gfx9Blk256BSwModeMask & ~Gfx9LinearSwModeMask; + +const UINT_32 Dce12NonBpp32SwModeMask = (1u << ADDR_SW_LINEAR) | + (1u << ADDR_SW_4KB_D) | + (1u << ADDR_SW_4KB_R) | + (1u << ADDR_SW_64KB_D) | + (1u << ADDR_SW_64KB_R) | + (1u << ADDR_SW_VAR_D) | + (1u << ADDR_SW_VAR_R) | + (1u << ADDR_SW_4KB_D_X) | + (1u << ADDR_SW_4KB_R_X) | + (1u << ADDR_SW_64KB_D_X) | + (1u << ADDR_SW_64KB_R_X) | + (1u << ADDR_SW_VAR_D_X) | + (1u << ADDR_SW_VAR_R_X); + +const UINT_32 Dce12Bpp32SwModeMask = (1u << ADDR_SW_256B_D) | + (1u << ADDR_SW_256B_R) | + Dce12NonBpp32SwModeMask; + +const UINT_32 Dcn1NonBpp64SwModeMask = (1u << ADDR_SW_LINEAR) | + (1u << ADDR_SW_4KB_S) | + (1u << ADDR_SW_64KB_S) | + (1u << ADDR_SW_VAR_S) | + (1u << ADDR_SW_64KB_S_T) | + (1u << ADDR_SW_4KB_S_X) | + (1u << ADDR_SW_64KB_S_X) | + (1u << ADDR_SW_VAR_S_X); + +const UINT_32 Dcn1Bpp64SwModeMask = (1u << ADDR_SW_4KB_D) | + (1u << ADDR_SW_64KB_D) | + (1u << ADDR_SW_VAR_D) | + (1u << ADDR_SW_64KB_D_T) | + (1u << ADDR_SW_4KB_D_X) | + (1u << ADDR_SW_64KB_D_X) | + (1u << ADDR_SW_VAR_D_X) | + Dcn1NonBpp64SwModeMask; + +/** +************************************************************************************************************************ +* @brief GFX9 meta equation parameters +************************************************************************************************************************ +*/ +struct MetaEqParams +{ + UINT_32 maxMip; + UINT_32 elementBytesLog2; + UINT_32 numSamplesLog2; + ADDR2_META_FLAGS metaFlag; + Gfx9DataType dataSurfaceType; + AddrSwizzleMode swizzleMode; + AddrResourceType resourceType; + UINT_32 metaBlkWidthLog2; + UINT_32 metaBlkHeightLog2; + UINT_32 metaBlkDepthLog2; + UINT_32 compBlkWidthLog2; + UINT_32 compBlkHeightLog2; + UINT_32 compBlkDepthLog2; +}; + +/** +************************************************************************************************************************ +* @brief This class is the GFX9 specific address library +* function set. +************************************************************************************************************************ +*/ +class Gfx9Lib : public Lib +{ +public: + /// Creates Gfx9Lib object + static Addr::Lib* CreateObj(const Client* pClient) + { + VOID* pMem = Object::ClientAlloc(sizeof(Gfx9Lib), pClient); + return (pMem != NULL) ? new (pMem) Gfx9Lib(pClient) : NULL; + } + + virtual BOOL_32 IsValidDisplaySwizzleMode( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + +protected: + Gfx9Lib(const Client* pClient); + virtual ~Gfx9Lib(); + + virtual BOOL_32 HwlIsStandardSwizzle( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const + { + return m_swizzleModeTable[swizzleMode].isStd || + (IsTex3d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp); + } + + virtual BOOL_32 HwlIsDisplaySwizzle( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const + { + return IsTex2d(resourceType) && m_swizzleModeTable[swizzleMode].isDisp; + } + + virtual BOOL_32 HwlIsThin( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const + { + return ((IsTex2d(resourceType) == TRUE) || + ((IsTex3d(resourceType) == TRUE) && + (m_swizzleModeTable[swizzleMode].isZ == FALSE) && + (m_swizzleModeTable[swizzleMode].isStd == FALSE))); + } + + virtual BOOL_32 HwlIsThick( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode) const + { + return (IsTex3d(resourceType) && + (m_swizzleModeTable[swizzleMode].isZ || m_swizzleModeTable[swizzleMode].isStd)); + } + + virtual ADDR_E_RETURNCODE HwlComputeHtileInfo( + const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, + ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeCmaskInfo( + const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, + ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeDccInfo( + const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, + ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( + const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut); + + virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( + const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut); + + virtual ADDR_E_RETURNCODE HwlComputeHtileCoordFromAddr( + const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, + ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut); + + virtual ADDR_E_RETURNCODE HwlComputeDccAddrFromCoord( + const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut); + + virtual UINT_32 HwlGetEquationIndex( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeBlock256Equation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const; + + virtual ADDR_E_RETURNCODE HwlComputeThinEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const; + + virtual ADDR_E_RETURNCODE HwlComputeThickEquation( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2, + ADDR_EQUATION* pEquation) const; + + // Get equation table pointer and number of equations + virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const + { + *ppEquationTable = m_equationTable; + + return m_numEquations; + } + + virtual BOOL_32 IsEquationSupported( + AddrResourceType rsrcType, + AddrSwizzleMode swMode, + UINT_32 elementBytesLog2) const; + + UINT_32 ComputeSurfaceBaseAlignTiled(AddrSwizzleMode swizzleMode) const + { + UINT_32 baseAlign; + + if (IsXor(swizzleMode)) + { + baseAlign = GetBlockSize(swizzleMode); + } + else + { + baseAlign = 256; + } + + return baseAlign; + } + + virtual ADDR_E_RETURNCODE HwlComputePipeBankXor( + const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSlicePipeBankXor( + const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, + ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSubResourceOffsetForSwizzlePattern( + const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, + ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlGetPreferredSurfaceSetting( + const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, + ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoSanityCheck( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoTiled( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfoLinear( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoordTiled( + const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + // Initialize equation table + VOID InitEquationTable(); + + ADDR_E_RETURNCODE ComputeStereoInfo( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut, + UINT_32* pHeightAlign) const; + + UINT_32 GetMipChainInfo( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode, + UINT_32 bpp, + UINT_32 mip0Width, + UINT_32 mip0Height, + UINT_32 mip0Depth, + UINT_32 blockWidth, + UINT_32 blockHeight, + UINT_32 blockDepth, + UINT_32 numMipLevel, + ADDR2_MIP_INFO* pMipInfo) const; + + VOID GetMetaMiptailInfo( + ADDR2_META_MIP_INFO* pInfo, + Dim3d mipCoord, + UINT_32 numMipInTail, + Dim3d* pMetaBlkDim) const; + + Dim3d GetMipStartPos( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode, + UINT_32 width, + UINT_32 height, + UINT_32 depth, + UINT_32 blockWidth, + UINT_32 blockHeight, + UINT_32 blockDepth, + UINT_32 mipId, + UINT_32 log2ElementBytes, + UINT_32* pMipTailBytesOffset) const; + + AddrMajorMode GetMajorMode( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode, + UINT_32 mip0WidthInBlk, + UINT_32 mip0HeightInBlk, + UINT_32 mip0DepthInBlk) const + { + BOOL_32 yMajor = (mip0WidthInBlk < mip0HeightInBlk); + BOOL_32 xMajor = (yMajor == FALSE); + + if (IsThick(resourceType, swizzleMode)) + { + yMajor = yMajor && (mip0HeightInBlk >= mip0DepthInBlk); + xMajor = xMajor && (mip0WidthInBlk >= mip0DepthInBlk); + } + + AddrMajorMode majorMode; + if (xMajor) + { + majorMode = ADDR_MAJOR_X; + } + else if (yMajor) + { + majorMode = ADDR_MAJOR_Y; + } + else + { + majorMode = ADDR_MAJOR_Z; + } + + return majorMode; + } + + Dim3d GetDccCompressBlk( + AddrResourceType resourceType, + AddrSwizzleMode swizzleMode, + UINT_32 bpp) const + { + UINT_32 index = Log2(bpp >> 3); + Dim3d compressBlkDim; + + if (IsThin(resourceType, swizzleMode)) + { + compressBlkDim.w = Block256_2d[index].w; + compressBlkDim.h = Block256_2d[index].h; + compressBlkDim.d = 1; + } + else if (IsStandardSwizzle(resourceType, swizzleMode)) + { + compressBlkDim = Block256_3dS[index]; + } + else + { + compressBlkDim = Block256_3dZ[index]; + } + + return compressBlkDim; + } + + static const UINT_32 MaxSeLog2 = 3; + static const UINT_32 MaxRbPerSeLog2 = 2; + + static const Dim3d Block256_3dS[MaxNumOfBpp]; + static const Dim3d Block256_3dZ[MaxNumOfBpp]; + + static const UINT_32 MipTailOffset256B[]; + + static const SwizzleModeFlags SwizzleModeTable[ADDR_SW_MAX_TYPE]; + + // Max number of swizzle mode supported for equation + static const UINT_32 MaxSwMode = 32; + // Max number of resource type (2D/3D) supported for equation + static const UINT_32 MaxRsrcType = 2; + // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp) + static const UINT_32 MaxElementBytesLog2 = 5; + // Almost all swizzle mode + resource type support equation + static const UINT_32 EquationTableSize = MaxElementBytesLog2 * MaxSwMode * MaxRsrcType; + // Equation table + ADDR_EQUATION m_equationTable[EquationTableSize]; + + // Number of equation entries in the table + UINT_32 m_numEquations; + // Equation lookup table according to bpp and tile index + UINT_32 m_equationLookupTable[MaxRsrcType][MaxSwMode][MaxElementBytesLog2]; + + static const UINT_32 MaxCachedMetaEq = 2; + +private: + virtual UINT_32 HwlComputeMaxBaseAlignments() const; + + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; + + virtual BOOL_32 HwlInitGlobalParams(const ADDR_CREATE_INPUT* pCreateIn); + + VOID GetRbEquation(CoordEq* pRbEq, UINT_32 rbPerSeLog2, UINT_32 seLog2) const; + + VOID GetDataEquation(CoordEq* pDataEq, Gfx9DataType dataSurfaceType, + AddrSwizzleMode swizzleMode, AddrResourceType resourceType, + UINT_32 elementBytesLog2, UINT_32 numSamplesLog2) const; + + VOID GetPipeEquation(CoordEq* pPipeEq, CoordEq* pDataEq, + UINT_32 pipeInterleaveLog2, UINT_32 numPipesLog2, + UINT_32 numSamplesLog2, Gfx9DataType dataSurfaceType, + AddrSwizzleMode swizzleMode, AddrResourceType resourceType) const; + + VOID GenMetaEquation(CoordEq* pMetaEq, UINT_32 maxMip, + UINT_32 elementBytesLog2, UINT_32 numSamplesLog2, + ADDR2_META_FLAGS metaFlag, Gfx9DataType dataSurfaceType, + AddrSwizzleMode swizzleMode, AddrResourceType resourceType, + UINT_32 metaBlkWidthLog2, UINT_32 metaBlkHeightLog2, + UINT_32 metaBlkDepthLog2, UINT_32 compBlkWidthLog2, + UINT_32 compBlkHeightLog2, UINT_32 compBlkDepthLog2) const; + + const CoordEq* GetMetaEquation(const MetaEqParams& metaEqParams); + + virtual ChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision); + + VOID GetMetaMipInfo(UINT_32 numMipLevels, Dim3d* pMetaBlkDim, + BOOL_32 dataThick, ADDR2_META_MIP_INFO* pInfo, + UINT_32 mip0Width, UINT_32 mip0Height, UINT_32 mip0Depth, + UINT_32* pNumMetaBlkX, UINT_32* pNumMetaBlkY, UINT_32* pNumMetaBlkZ) const; + + ADDR_E_RETURNCODE ComputeSurfaceLinearPadding( + const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, + UINT_32* pMipmap0PaddedWidth, + UINT_32* pSlice0PaddedHeight, + ADDR2_MIP_INFO* pMipInfo = NULL) const; + + static ADDR2_BLOCK_SET GetAllowedBlockSet(ADDR2_SWMODE_SET allowedSwModeSet) + { + ADDR2_BLOCK_SET allowedBlockSet = {}; + + allowedBlockSet.micro = (allowedSwModeSet.value & Gfx9Blk256BSwModeMask) ? TRUE : FALSE; + allowedBlockSet.macro4KB = (allowedSwModeSet.value & Gfx9Blk4KBSwModeMask) ? TRUE : FALSE; + allowedBlockSet.macro64KB = (allowedSwModeSet.value & Gfx9Blk64KBSwModeMask) ? TRUE : FALSE; + allowedBlockSet.var = (allowedSwModeSet.value & Gfx9BlkVarSwModeMask) ? TRUE : FALSE; + allowedBlockSet.linear = (allowedSwModeSet.value & Gfx9LinearSwModeMask) ? TRUE : FALSE; + + return allowedBlockSet; + } + + static ADDR2_SWTYPE_SET GetAllowedSwSet(ADDR2_SWMODE_SET allowedSwModeSet) + { + ADDR2_SWTYPE_SET allowedSwSet = {}; + + allowedSwSet.sw_Z = (allowedSwModeSet.value & Gfx9ZSwModeMask) ? TRUE : FALSE; + allowedSwSet.sw_S = (allowedSwModeSet.value & Gfx9StandardSwModeMask) ? TRUE : FALSE; + allowedSwSet.sw_D = (allowedSwModeSet.value & Gfx9DisplaySwModeMask) ? TRUE : FALSE; + allowedSwSet.sw_R = (allowedSwModeSet.value & Gfx9RotateSwModeMask) ? TRUE : FALSE; + + return allowedSwSet; + } + + Gfx9ChipSettings m_settings; + + CoordEq m_cachedMetaEq[MaxCachedMetaEq]; + MetaEqParams m_cachedMetaEqKey[MaxCachedMetaEq]; + UINT_32 m_metaEqOverrideIndex; +}; + +} // V2 +} // Addr + +#endif + diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/ciaddrlib.cpp mesa-19.0.1/src/amd/addrlib/src/r800/ciaddrlib.cpp --- mesa-18.3.3/src/amd/addrlib/src/r800/ciaddrlib.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/r800/ciaddrlib.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,2339 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file ciaddrlib.cpp +* @brief Contains the implementation for the CiLib class. +**************************************************************************************************** +*/ + +#include "ciaddrlib.h" + +#include "si_gb_reg.h" + +#include "amdgpu_asic_addr.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace Addr +{ + +/** +**************************************************************************************************** +* CiHwlInit +* +* @brief +* Creates an CiLib object. +* +* @return +* Returns an CiLib object pointer. +**************************************************************************************************** +*/ +Lib* CiHwlInit(const Client* pClient) +{ + return V1::CiLib::CreateObj(pClient); +} + +namespace V1 +{ + +/** +**************************************************************************************************** +* Mask +* +* @brief +* Gets a mask of "width" +* @return +* Bit mask +**************************************************************************************************** +*/ +static UINT_64 Mask( + UINT_32 width) ///< Width of bits +{ + UINT_64 ret; + + if (width >= sizeof(UINT_64)*8) + { + ret = ~((UINT_64) 0); + } + else + { + return (((UINT_64) 1) << width) - 1; + } + return ret; +} + +/** +**************************************************************************************************** +* GetBits +* +* @brief +* Gets bits within a range of [msb, lsb] +* @return +* Bits of this range +**************************************************************************************************** +*/ +static UINT_64 GetBits( + UINT_64 bits, ///< Source bits + UINT_32 msb, ///< Most signicant bit + UINT_32 lsb) ///< Least signicant bit +{ + UINT_64 ret = 0; + + if (msb >= lsb) + { + ret = (bits >> lsb) & (Mask(1 + msb - lsb)); + } + return ret; +} + +/** +**************************************************************************************************** +* RemoveBits +* +* @brief +* Removes bits within the range of [msb, lsb] +* @return +* Modified bits +**************************************************************************************************** +*/ +static UINT_64 RemoveBits( + UINT_64 bits, ///< Source bits + UINT_32 msb, ///< Most signicant bit + UINT_32 lsb) ///< Least signicant bit +{ + UINT_64 ret = bits; + + if (msb >= lsb) + { + ret = GetBits(bits, lsb - 1, 0) // low bits + | (GetBits(bits, 8 * sizeof(bits) - 1, msb + 1) << lsb); //high bits + } + return ret; +} + +/** +**************************************************************************************************** +* InsertBits +* +* @brief +* Inserts new bits into the range of [msb, lsb] +* @return +* Modified bits +**************************************************************************************************** +*/ +static UINT_64 InsertBits( + UINT_64 bits, ///< Source bits + UINT_64 newBits, ///< New bits to be inserted + UINT_32 msb, ///< Most signicant bit + UINT_32 lsb) ///< Least signicant bit +{ + UINT_64 ret = bits; + + if (msb >= lsb) + { + ret = GetBits(bits, lsb - 1, 0) // old low bitss + | (GetBits(newBits, msb - lsb, 0) << lsb) //new bits + | (GetBits(bits, 8 * sizeof(bits) - 1, lsb) << (msb + 1)); //old high bits + } + return ret; +} + +/** +**************************************************************************************************** +* CiLib::CiLib +* +* @brief +* Constructor +* +**************************************************************************************************** +*/ +CiLib::CiLib(const Client* pClient) + : + SiLib(pClient), + m_noOfMacroEntries(0), + m_allowNonDispThickModes(FALSE) +{ + m_class = CI_ADDRLIB; +} + +/** +**************************************************************************************************** +* CiLib::~CiLib +* +* @brief +* Destructor +**************************************************************************************************** +*/ +CiLib::~CiLib() +{ +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeDccInfo +* +* @brief +* Compute DCC key size, base alignment +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE CiLib::HwlComputeDccInfo( + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + if (m_settings.isVolcanicIslands && IsMacroTiled(pIn->tileMode)) + { + UINT_64 dccFastClearSize = pIn->colorSurfSize >> 8; + + ADDR_ASSERT(0 == (pIn->colorSurfSize & 0xff)); + + if (pIn->numSamples > 1) + { + UINT_32 tileSizePerSample = BITS_TO_BYTES(pIn->bpp * MicroTileWidth * MicroTileHeight); + UINT_32 samplesPerSplit = pIn->tileInfo.tileSplitBytes / tileSizePerSample; + + if (samplesPerSplit < pIn->numSamples) + { + UINT_32 numSplits = pIn->numSamples / samplesPerSplit; + UINT_32 fastClearBaseAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes; + + ADDR_ASSERT(IsPow2(fastClearBaseAlign)); + + dccFastClearSize /= numSplits; + + if (0 != (dccFastClearSize & (fastClearBaseAlign - 1))) + { + // Disable dcc fast clear + // if key size of fisrt sample split is not pipe*interleave aligned + dccFastClearSize = 0; + } + } + } + + pOut->dccRamSize = pIn->colorSurfSize >> 8; + pOut->dccRamBaseAlign = pIn->tileInfo.banks * + HwlGetPipes(&pIn->tileInfo) * + m_pipeInterleaveBytes; + pOut->dccFastClearSize = dccFastClearSize; + pOut->dccRamSizeAligned = TRUE; + + ADDR_ASSERT(IsPow2(pOut->dccRamBaseAlign)); + + if (0 == (pOut->dccRamSize & (pOut->dccRamBaseAlign - 1))) + { + pOut->subLvlCompressible = TRUE; + } + else + { + UINT_64 dccRamSizeAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes; + + if (pOut->dccRamSize == pOut->dccFastClearSize) + { + pOut->dccFastClearSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign); + } + if ((pOut->dccRamSize & (dccRamSizeAlign - 1)) != 0) + { + pOut->dccRamSizeAligned = FALSE; + } + pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign); + pOut->subLvlCompressible = FALSE; + } + } + else + { + returnCode = ADDR_NOTSUPPORTED; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeCmaskAddrFromCoord +* +* @brief +* Compute tc compatible Cmask address from fmask ram address +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE CiLib::HwlComputeCmaskAddrFromCoord( + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] fmask addr/bpp/tile input + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] cmask address + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED; + + if ((m_settings.isVolcanicIslands == TRUE) && + (pIn->flags.tcCompatible == TRUE)) + { + UINT_32 numOfPipes = HwlGetPipes(pIn->pTileInfo); + UINT_32 numOfBanks = pIn->pTileInfo->banks; + UINT_64 fmaskAddress = pIn->fmaskAddr; + UINT_32 elemBits = pIn->bpp; + UINT_32 blockByte = 64 * elemBits / 8; + UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(fmaskAddress, + 0, + 0, + 4, // cmask 4 bits + elemBits, + blockByte, + m_pipeInterleaveBytes, + numOfPipes, + numOfBanks, + 1); + pOut->addr = (metaNibbleAddress >> 1); + pOut->bitPosition = (metaNibbleAddress % 2) ? 4 : 0; + returnCode = ADDR_OK; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeHtileAddrFromCoord +* +* @brief +* Compute tc compatible Htile address from depth/stencil address +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE CiLib::HwlComputeHtileAddrFromCoord( + const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] depth/stencil addr/bpp/tile input + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] htile address + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED; + + if ((m_settings.isVolcanicIslands == TRUE) && + (pIn->flags.tcCompatible == TRUE)) + { + UINT_32 numOfPipes = HwlGetPipes(pIn->pTileInfo); + UINT_32 numOfBanks = pIn->pTileInfo->banks; + UINT_64 zStencilAddr = pIn->zStencilAddr; + UINT_32 elemBits = pIn->bpp; + UINT_32 blockByte = 64 * elemBits / 8; + UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(zStencilAddr, + 0, + 0, + 32, // htile 32 bits + elemBits, + blockByte, + m_pipeInterleaveBytes, + numOfPipes, + numOfBanks, + 1); + pOut->addr = (metaNibbleAddress >> 1); + pOut->bitPosition = 0; + returnCode = ADDR_OK; + } + + return returnCode; +} + +/** +**************************************************************************************************** +* CiLib::HwlConvertChipFamily +* +* @brief +* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision +* @return +* ChipFamily +**************************************************************************************************** +*/ +ChipFamily CiLib::HwlConvertChipFamily( + UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h + UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h +{ + ChipFamily family = ADDR_CHIP_FAMILY_CI; + + switch (uChipFamily) + { + case FAMILY_CI: + m_settings.isSeaIsland = 1; + m_settings.isBonaire = ASICREV_IS_BONAIRE_M(uChipRevision); + m_settings.isHawaii = ASICREV_IS_HAWAII_P(uChipRevision); + break; + case FAMILY_KV: + m_settings.isKaveri = 1; + m_settings.isSpectre = ASICREV_IS_SPECTRE(uChipRevision); + m_settings.isSpooky = ASICREV_IS_SPOOKY(uChipRevision); + m_settings.isKalindi = ASICREV_IS_KALINDI(uChipRevision); + break; + case FAMILY_VI: + m_settings.isVolcanicIslands = 1; + m_settings.isIceland = ASICREV_IS_ICELAND_M(uChipRevision); + m_settings.isTonga = ASICREV_IS_TONGA_P(uChipRevision); + m_settings.isFiji = ASICREV_IS_FIJI_P(uChipRevision); + m_settings.isPolaris10 = ASICREV_IS_POLARIS10_P(uChipRevision); + m_settings.isPolaris11 = ASICREV_IS_POLARIS11_M(uChipRevision); + m_settings.isPolaris12 = ASICREV_IS_POLARIS12_V(uChipRevision); + m_settings.isVegaM = ASICREV_IS_VEGAM_P(uChipRevision); + family = ADDR_CHIP_FAMILY_VI; + break; + case FAMILY_CZ: + m_settings.isCarrizo = 1; + m_settings.isVolcanicIslands = 1; + family = ADDR_CHIP_FAMILY_VI; + break; + default: + ADDR_ASSERT(!"This should be a unexpected Fusion"); + break; + } + + return family; +} + +/** +**************************************************************************************************** +* CiLib::HwlInitGlobalParams +* +* @brief +* Initializes global parameters +* +* @return +* TRUE if all settings are valid +* +**************************************************************************************************** +*/ +BOOL_32 CiLib::HwlInitGlobalParams( + const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input +{ + BOOL_32 valid = TRUE; + + const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue; + + valid = DecodeGbRegs(pRegValue); + + // The following assignments for m_pipes is only for fail-safe, InitTileSettingTable should + // read the correct pipes from tile mode table + if (m_settings.isHawaii) + { + m_pipes = 16; + } + else if (m_settings.isBonaire || m_settings.isSpectre) + { + m_pipes = 4; + } + else // Treat other KV asics to be 2-pipe + { + m_pipes = 2; + } + + // @todo: VI + // Move this to VI code path once created + if (m_settings.isTonga || m_settings.isPolaris10) + { + m_pipes = 8; + } + else if (m_settings.isIceland) + { + m_pipes = 2; + } + else if (m_settings.isFiji) + { + m_pipes = 16; + } + else if (m_settings.isPolaris11 || m_settings.isPolaris12) + { + m_pipes = 4; + } + else if (m_settings.isVegaM) + { + m_pipes = 16; + } + + if (valid) + { + valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries); + } + if (valid) + { + valid = InitMacroTileCfgTable(pRegValue->pMacroTileConfig, pRegValue->noOfMacroEntries); + } + + if (valid) + { + InitEquationTable(); + } + + return valid; +} + +/** +**************************************************************************************************** +* CiLib::HwlPostCheckTileIndex +* +* @brief +* Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches +* tile mode/type/info and change the index if needed +* @return +* Tile index. +**************************************************************************************************** +*/ +INT_32 CiLib::HwlPostCheckTileIndex( + const ADDR_TILEINFO* pInfo, ///< [in] Tile Info + AddrTileMode mode, ///< [in] Tile mode + AddrTileType type, ///< [in] Tile type + INT curIndex ///< [in] Current index assigned in HwlSetupTileInfo + ) const +{ + INT_32 index = curIndex; + + if (mode == ADDR_TM_LINEAR_GENERAL) + { + index = TileIndexLinearGeneral; + } + else + { + BOOL_32 macroTiled = IsMacroTiled(mode); + + // We need to find a new index if either of them is true + // 1. curIndex is invalid + // 2. tile mode is changed + // 3. tile info does not match for macro tiled + if ((index == TileIndexInvalid) || + (mode != m_tileTable[index].mode) || + (macroTiled && pInfo->pipeConfig != m_tileTable[index].info.pipeConfig)) + { + for (index = 0; index < static_cast(m_noOfEntries); index++) + { + if (macroTiled) + { + // macro tile modes need all to match + if ((pInfo->pipeConfig == m_tileTable[index].info.pipeConfig) && + (mode == m_tileTable[index].mode) && + (type == m_tileTable[index].type)) + { + // tileSplitBytes stored in m_tileTable is only valid for depth entries + if (type == ADDR_DEPTH_SAMPLE_ORDER) + { + if (Min(m_tileTable[index].info.tileSplitBytes, + m_rowSize) == pInfo->tileSplitBytes) + { + break; + } + } + else // other entries are determined by other 3 fields + { + break; + } + } + } + else if (mode == ADDR_TM_LINEAR_ALIGNED) + { + // linear mode only needs tile mode to match + if (mode == m_tileTable[index].mode) + { + break; + } + } + else + { + // micro tile modes only need tile mode and tile type to match + if (mode == m_tileTable[index].mode && + type == m_tileTable[index].type) + { + break; + } + } + } + } + } + + ADDR_ASSERT(index < static_cast(m_noOfEntries)); + + if (index >= static_cast(m_noOfEntries)) + { + index = TileIndexInvalid; + } + + return index; +} + +/** +**************************************************************************************************** +* CiLib::HwlSetupTileCfg +* +* @brief +* Map tile index to tile setting. +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE CiLib::HwlSetupTileCfg( + UINT_32 bpp, ///< Bits per pixel + INT_32 index, ///< Tile index + INT_32 macroModeIndex, ///< Index in macro tile mode table(CI) + ADDR_TILEINFO* pInfo, ///< [out] Tile Info + AddrTileMode* pMode, ///< [out] Tile mode + AddrTileType* pType ///< [out] Tile type + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + // Global flag to control usage of tileIndex + if (UseTileIndex(index)) + { + if (index == TileIndexLinearGeneral) + { + pInfo->banks = 2; + pInfo->bankWidth = 1; + pInfo->bankHeight = 1; + pInfo->macroAspectRatio = 1; + pInfo->tileSplitBytes = 64; + pInfo->pipeConfig = ADDR_PIPECFG_P2; + } + else if (static_cast(index) >= m_noOfEntries) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + const TileConfig* pCfgTable = GetTileSetting(index); + + if (pInfo != NULL) + { + if (IsMacroTiled(pCfgTable->mode)) + { + ADDR_ASSERT((macroModeIndex != TileIndexInvalid) && + (macroModeIndex != TileIndexNoMacroIndex)); + + UINT_32 tileSplit; + + *pInfo = m_macroTileTable[macroModeIndex]; + + if (pCfgTable->type == ADDR_DEPTH_SAMPLE_ORDER) + { + tileSplit = pCfgTable->info.tileSplitBytes; + } + else + { + if (bpp > 0) + { + UINT_32 thickness = Thickness(pCfgTable->mode); + UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness); + // Non-depth entries store a split factor + UINT_32 sampleSplit = m_tileTable[index].info.tileSplitBytes; + tileSplit = Max(256u, sampleSplit * tileBytes1x); + } + else + { + // Return tileBytes instead if not enough info + tileSplit = pInfo->tileSplitBytes; + } + } + + // Clamp to row_size + pInfo->tileSplitBytes = Min(m_rowSize, tileSplit); + + pInfo->pipeConfig = pCfgTable->info.pipeConfig; + } + else // 1D and linear modes, we return default value stored in table + { + *pInfo = pCfgTable->info; + } + } + + if (pMode != NULL) + { + *pMode = pCfgTable->mode; + } + + if (pType != NULL) + { + *pType = pCfgTable->type; + } + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeSurfaceInfo +* +* @brief +* Entry of CI's ComputeSurfaceInfo +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE CiLib::HwlComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + // If tileIndex is invalid, force macroModeIndex to be invalid, too + if (pIn->tileIndex == TileIndexInvalid) + { + pOut->macroModeIndex = TileIndexInvalid; + } + + ADDR_E_RETURNCODE retCode = SiLib::HwlComputeSurfaceInfo(pIn, pOut); + + if ((pIn->mipLevel > 0) && + (pOut->tcCompatible == TRUE) && + (pOut->tileMode != pIn->tileMode) && + (m_settings.isVolcanicIslands == TRUE)) + { + pOut->tcCompatible = CheckTcCompatibility(pOut->pTileInfo, pIn->bpp, pOut->tileMode, pOut->tileType, pOut); + } + + if (pOut->macroModeIndex == TileIndexNoMacroIndex) + { + pOut->macroModeIndex = TileIndexInvalid; + } + + if ((pIn->flags.matchStencilTileCfg == TRUE) && + (pIn->flags.depth == TRUE)) + { + pOut->stencilTileIdx = TileIndexInvalid; + + if ((MinDepth2DThinIndex <= pOut->tileIndex) && + (MaxDepth2DThinIndex >= pOut->tileIndex)) + { + BOOL_32 depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut); + + if ((depthStencil2DTileConfigMatch == FALSE) && + (pOut->tcCompatible == TRUE)) + { + pOut->macroModeIndex = TileIndexInvalid; + + ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; + localIn.tileIndex = TileIndexInvalid; + localIn.pTileInfo = NULL; + localIn.flags.tcCompatible = FALSE; + + SiLib::HwlComputeSurfaceInfo(&localIn, pOut); + + ADDR_ASSERT((MinDepth2DThinIndex <= pOut->tileIndex) && (MaxDepth2DThinIndex >= pOut->tileIndex)); + + depthStencil2DTileConfigMatch = DepthStencilTileCfgMatch(pIn, pOut); + } + + if ((depthStencil2DTileConfigMatch == FALSE) && + (pIn->numSamples <= 1)) + { + pOut->macroModeIndex = TileIndexInvalid; + + ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn; + localIn.tileMode = ADDR_TM_1D_TILED_THIN1; + localIn.tileIndex = TileIndexInvalid; + localIn.pTileInfo = NULL; + + retCode = SiLib::HwlComputeSurfaceInfo(&localIn, pOut); + } + } + + if (pOut->tileIndex == Depth1DThinIndex) + { + pOut->stencilTileIdx = Depth1DThinIndex; + } + } + + return retCode; +} + +/** +**************************************************************************************************** +* CiLib::HwlFmaskSurfaceInfo +* @brief +* Entry of r800's ComputeFmaskInfo +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE CiLib::HwlComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure + ) +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + ADDR_TILEINFO tileInfo = {0}; + ADDR_COMPUTE_FMASK_INFO_INPUT fmaskIn; + fmaskIn = *pIn; + + AddrTileMode tileMode = pIn->tileMode; + + // Use internal tile info if pOut does not have a valid pTileInfo + if (pOut->pTileInfo == NULL) + { + pOut->pTileInfo = &tileInfo; + } + + ADDR_ASSERT(tileMode == ADDR_TM_2D_TILED_THIN1 || + tileMode == ADDR_TM_3D_TILED_THIN1 || + tileMode == ADDR_TM_PRT_TILED_THIN1 || + tileMode == ADDR_TM_PRT_2D_TILED_THIN1 || + tileMode == ADDR_TM_PRT_3D_TILED_THIN1); + + ADDR_ASSERT(m_tileTable[14].mode == ADDR_TM_2D_TILED_THIN1); + ADDR_ASSERT(m_tileTable[15].mode == ADDR_TM_3D_TILED_THIN1); + + // The only valid tile modes for fmask are 2D_THIN1 and 3D_THIN1 plus non-displayable + INT_32 tileIndex = tileMode == ADDR_TM_2D_TILED_THIN1 ? 14 : 15; + ADDR_SURFACE_FLAGS flags = {{0}}; + flags.fmask = 1; + + INT_32 macroModeIndex = TileIndexInvalid; + + UINT_32 numSamples = pIn->numSamples; + UINT_32 numFrags = pIn->numFrags == 0 ? numSamples : pIn->numFrags; + + UINT_32 bpp = QLog2(numFrags); + + // EQAA needs one more bit + if (numSamples > numFrags) + { + bpp++; + } + + if (bpp == 3) + { + bpp = 4; + } + + bpp = Max(8u, bpp * numSamples); + + macroModeIndex = HwlComputeMacroModeIndex(tileIndex, flags, bpp, numSamples, pOut->pTileInfo); + + fmaskIn.tileIndex = tileIndex; + fmaskIn.pTileInfo = pOut->pTileInfo; + pOut->macroModeIndex = macroModeIndex; + pOut->tileIndex = tileIndex; + + retCode = DispatchComputeFmaskInfo(&fmaskIn, pOut); + + if (retCode == ADDR_OK) + { + pOut->tileIndex = + HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE, + pOut->tileIndex); + } + + // Resets pTileInfo to NULL if the internal tile info is used + if (pOut->pTileInfo == &tileInfo) + { + pOut->pTileInfo = NULL; + } + + return retCode; +} + +/** +**************************************************************************************************** +* CiLib::HwlFmaskPreThunkSurfInfo +* +* @brief +* Some preparation before thunking a ComputeSurfaceInfo call for Fmask +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +VOID CiLib::HwlFmaskPreThunkSurfInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, ///< [in] Input of fmask info + const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, ///< [in] Output of fmask info + ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, ///< [out] Input of thunked surface info + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut ///< [out] Output of thunked surface info + ) const +{ + pSurfIn->tileIndex = pFmaskIn->tileIndex; + pSurfOut->macroModeIndex = pFmaskOut->macroModeIndex; +} + +/** +**************************************************************************************************** +* CiLib::HwlFmaskPostThunkSurfInfo +* +* @brief +* Copy hwl extra field after calling thunked ComputeSurfaceInfo +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +VOID CiLib::HwlFmaskPostThunkSurfInfo( + const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, ///< [in] Output of surface info + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut ///< [out] Output of fmask info + ) const +{ + pFmaskOut->tileIndex = pSurfOut->tileIndex; + pFmaskOut->macroModeIndex = pSurfOut->macroModeIndex; +} + +/** +**************************************************************************************************** +* CiLib::HwlDegradeThickTileMode +* +* @brief +* Degrades valid tile mode for thick modes if needed +* +* @return +* Suitable tile mode +**************************************************************************************************** +*/ +AddrTileMode CiLib::HwlDegradeThickTileMode( + AddrTileMode baseTileMode, ///< [in] base tile mode + UINT_32 numSlices, ///< [in] current number of slices + UINT_32* pBytesPerTile ///< [in,out] pointer to bytes per slice + ) const +{ + return baseTileMode; +} + +/** +**************************************************************************************************** +* CiLib::HwlOptimizeTileMode +* +* @brief +* Optimize tile mode on CI +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID CiLib::HwlOptimizeTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure + ) const +{ + AddrTileMode tileMode = pInOut->tileMode; + + // Override 2D/3D macro tile mode to PRT_* tile mode if + // client driver requests this surface is equation compatible + if (IsMacroTiled(tileMode) == TRUE) + { + if ((pInOut->flags.needEquation == TRUE) && + (pInOut->numSamples <= 1) && + (IsPrtTileMode(tileMode) == FALSE)) + { + if ((pInOut->numSlices > 1) && ((pInOut->maxBaseAlign == 0) || (pInOut->maxBaseAlign >= Block64K))) + { + UINT_32 thickness = Thickness(tileMode); + + if (thickness == 1) + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + } + else + { + static const UINT_32 PrtTileBytes = 0x10000; + // First prt thick tile index in the tile mode table + static const UINT_32 PrtThickTileIndex = 22; + ADDR_TILEINFO tileInfo = {0}; + + HwlComputeMacroModeIndex(PrtThickTileIndex, + pInOut->flags, + pInOut->bpp, + pInOut->numSamples, + &tileInfo); + + UINT_32 macroTileBytes = ((pInOut->bpp) >> 3) * 64 * pInOut->numSamples * + thickness * HwlGetPipes(&tileInfo) * + tileInfo.banks * tileInfo.bankWidth * + tileInfo.bankHeight; + + if (macroTileBytes <= PrtTileBytes) + { + tileMode = ADDR_TM_PRT_TILED_THICK; + } + else + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + } + } + } + } + + if (pInOut->maxBaseAlign != 0) + { + pInOut->flags.dccPipeWorkaround = FALSE; + } + } + + if (tileMode != pInOut->tileMode) + { + pInOut->tileMode = tileMode; + } +} + +/** +**************************************************************************************************** +* CiLib::HwlOverrideTileMode +* +* @brief +* Override THICK to THIN, for specific formats on CI +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID CiLib::HwlOverrideTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure + ) const +{ + AddrTileMode tileMode = pInOut->tileMode; + AddrTileType tileType = pInOut->tileType; + + // currently, all CI/VI family do not + // support ADDR_TM_PRT_2D_TILED_THICK,ADDR_TM_PRT_3D_TILED_THICK and + // ADDR_TM_PRT_2D_TILED_THIN1, ADDR_TM_PRT_3D_TILED_THIN1 + switch (tileMode) + { + case ADDR_TM_PRT_2D_TILED_THICK: + case ADDR_TM_PRT_3D_TILED_THICK: + tileMode = ADDR_TM_PRT_TILED_THICK; + break; + case ADDR_TM_PRT_2D_TILED_THIN1: + case ADDR_TM_PRT_3D_TILED_THIN1: + tileMode = ADDR_TM_PRT_TILED_THIN1; + break; + default: + break; + } + + // UBTS#404321, we do not need such overriding, as THICK+THICK entries removed from the tile-mode table + if (!m_settings.isBonaire) + { + UINT_32 thickness = Thickness(tileMode); + + // tile_thickness = (array_mode == XTHICK) ? 8 : ((array_mode == THICK) ? 4 : 1) + if (thickness > 1) + { + switch (pInOut->format) + { + // tcpError("Thick micro tiling is not supported for format... + case ADDR_FMT_X24_8_32_FLOAT: + case ADDR_FMT_32_AS_8: + case ADDR_FMT_32_AS_8_8: + case ADDR_FMT_32_AS_32_32_32_32: + + // packed formats + case ADDR_FMT_GB_GR: + case ADDR_FMT_BG_RG: + case ADDR_FMT_1_REVERSED: + case ADDR_FMT_1: + case ADDR_FMT_BC1: + case ADDR_FMT_BC2: + case ADDR_FMT_BC3: + case ADDR_FMT_BC4: + case ADDR_FMT_BC5: + case ADDR_FMT_BC6: + case ADDR_FMT_BC7: + switch (tileMode) + { + case ADDR_TM_1D_TILED_THICK: + tileMode = ADDR_TM_1D_TILED_THIN1; + break; + + case ADDR_TM_2D_TILED_XTHICK: + case ADDR_TM_2D_TILED_THICK: + tileMode = ADDR_TM_2D_TILED_THIN1; + break; + + case ADDR_TM_3D_TILED_XTHICK: + case ADDR_TM_3D_TILED_THICK: + tileMode = ADDR_TM_3D_TILED_THIN1; + break; + + case ADDR_TM_PRT_TILED_THICK: + tileMode = ADDR_TM_PRT_TILED_THIN1; + break; + + case ADDR_TM_PRT_2D_TILED_THICK: + tileMode = ADDR_TM_PRT_2D_TILED_THIN1; + break; + + case ADDR_TM_PRT_3D_TILED_THICK: + tileMode = ADDR_TM_PRT_3D_TILED_THIN1; + break; + + default: + break; + + } + + // Switch tile type from thick to thin + if (tileMode != pInOut->tileMode) + { + // see tileIndex: 13-18 + tileType = ADDR_NON_DISPLAYABLE; + } + + break; + default: + break; + } + } + } + + if (tileMode != pInOut->tileMode) + { + pInOut->tileMode = tileMode; + pInOut->tileType = tileType; + } +} + +/** +**************************************************************************************************** +* CiLib::HwlSelectTileMode +* +* @brief +* Select tile modes. +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID CiLib::HwlSelectTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure + ) const +{ + AddrTileMode tileMode; + AddrTileType tileType; + + if (pInOut->flags.rotateDisplay) + { + tileMode = ADDR_TM_2D_TILED_THIN1; + tileType = ADDR_ROTATED; + } + else if (pInOut->flags.volume) + { + BOOL_32 bThin = (m_settings.isBonaire == TRUE) || + ((m_allowNonDispThickModes == TRUE) && (pInOut->flags.color == TRUE)); + + if (pInOut->numSlices >= 8) + { + tileMode = ADDR_TM_2D_TILED_XTHICK; + tileType = (bThin == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK; + } + else if (pInOut->numSlices >= 4) + { + tileMode = ADDR_TM_2D_TILED_THICK; + tileType = (bThin == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK; + } + else + { + tileMode = ADDR_TM_2D_TILED_THIN1; + tileType = ADDR_NON_DISPLAYABLE; + } + } + else + { + tileMode = ADDR_TM_2D_TILED_THIN1; + + if (pInOut->flags.depth || pInOut->flags.stencil) + { + tileType = ADDR_DEPTH_SAMPLE_ORDER; + } + else if ((pInOut->bpp <= 32) || + (pInOut->flags.display == TRUE) || + (pInOut->flags.overlay == TRUE)) + { + tileType = ADDR_DISPLAYABLE; + } + else + { + tileType = ADDR_NON_DISPLAYABLE; + } + } + + if (pInOut->flags.prt) + { + if (Thickness(tileMode) > 1) + { + tileMode = ADDR_TM_PRT_TILED_THICK; + tileType = (m_settings.isBonaire == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK; + } + else + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + } + } + + pInOut->tileMode = tileMode; + pInOut->tileType = tileType; + + if ((pInOut->flags.dccCompatible == FALSE) && + (pInOut->flags.tcCompatible == FALSE)) + { + pInOut->flags.opt4Space = TRUE; + pInOut->maxBaseAlign = Block64K; + } + + // Optimize tile mode if possible + OptimizeTileMode(pInOut); + + HwlOverrideTileMode(pInOut); +} + +/** +**************************************************************************************************** +* CiLib::HwlSetPrtTileMode +* +* @brief +* Set PRT tile mode. +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID CiLib::HwlSetPrtTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure + ) const +{ + AddrTileMode tileMode = pInOut->tileMode; + AddrTileType tileType = pInOut->tileType; + + if (Thickness(tileMode) > 1) + { + tileMode = ADDR_TM_PRT_TILED_THICK; + tileType = (m_settings.isBonaire == TRUE) ? ADDR_NON_DISPLAYABLE : ADDR_THICK; + } + else + { + tileMode = ADDR_TM_PRT_TILED_THIN1; + tileType = (tileType == ADDR_THICK) ? ADDR_NON_DISPLAYABLE : tileType; + } + + pInOut->tileMode = tileMode; + pInOut->tileType = tileType; +} + +/** +**************************************************************************************************** +* CiLib::HwlSetupTileInfo +* +* @brief +* Setup default value of tile info for SI +**************************************************************************************************** +*/ +VOID CiLib::HwlSetupTileInfo( + AddrTileMode tileMode, ///< [in] Tile mode + ADDR_SURFACE_FLAGS flags, ///< [in] Surface type flags + UINT_32 bpp, ///< [in] Bits per pixel + UINT_32 pitch, ///< [in] Pitch in pixels + UINT_32 height, ///< [in] Height in pixels + UINT_32 numSamples, ///< [in] Number of samples + ADDR_TILEINFO* pTileInfoIn, ///< [in] Tile info input: NULL for default + ADDR_TILEINFO* pTileInfoOut, ///< [out] Tile info output + AddrTileType inTileType, ///< [in] Tile type + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output + ) const +{ + UINT_32 thickness = Thickness(tileMode); + ADDR_TILEINFO* pTileInfo = pTileInfoOut; + INT index = TileIndexInvalid; + INT macroModeIndex = TileIndexInvalid; + + // Fail-safe code + if (IsLinear(tileMode) == FALSE) + { + // Thick tile modes must use thick micro tile mode but Bonaire does not support due to + // old derived netlists (UBTS 404321) + if (thickness > 1) + { + if (m_settings.isBonaire) + { + inTileType = ADDR_NON_DISPLAYABLE; + } + else if ((m_allowNonDispThickModes == FALSE) || + (inTileType != ADDR_NON_DISPLAYABLE) || + // There is no PRT_THICK + THIN entry in tile mode table except Bonaire + (IsPrtTileMode(tileMode) == TRUE)) + { + inTileType = ADDR_THICK; + } + } + // 128 bpp tiling must be non-displayable. + // Fmask reuse color buffer's entry but bank-height field can be from another entry + // To simplify the logic, fmask entry should be picked from non-displayable ones + else if (bpp == 128 || flags.fmask) + { + inTileType = ADDR_NON_DISPLAYABLE; + } + // These two modes only have non-disp entries though they can be other micro tile modes + else if (tileMode == ADDR_TM_3D_TILED_THIN1 || tileMode == ADDR_TM_PRT_3D_TILED_THIN1) + { + inTileType = ADDR_NON_DISPLAYABLE; + } + + if (flags.depth || flags.stencil) + { + inTileType = ADDR_DEPTH_SAMPLE_ORDER; + } + } + + // tcCompatible flag is only meaningful for gfx8. + if (m_settings.isVolcanicIslands == FALSE) + { + flags.tcCompatible = FALSE; + } + + if (IsTileInfoAllZero(pTileInfo)) + { + // See table entries 0-4 + if (flags.depth || flags.stencil) + { + // tileSize = thickness * bpp * numSamples * 8 * 8 / 8 + UINT_32 tileSize = thickness * bpp * numSamples * 8; + + // Turn off tc compatible if row_size is smaller than tile size (tile split occurs). + if (m_rowSize < tileSize) + { + flags.tcCompatible = FALSE; + } + + if (flags.nonSplit | flags.tcCompatible | flags.needEquation) + { + // Texture readable depth surface should not be split + switch (tileSize) + { + case 64: + index = 0; + break; + case 128: + index = 1; + break; + case 256: + index = 2; + break; + case 512: + index = 3; + break; + default: + index = 4; + break; + } + } + else + { + // Depth and stencil need to use the same index, thus the pre-defined tile_split + // can meet the requirement to choose the same macro mode index + // uncompressed depth/stencil are not supported for now + switch (numSamples) + { + case 1: + index = 0; + break; + case 2: + case 4: + index = 1; + break; + case 8: + index = 2; + break; + default: + break; + } + } + } + + // See table entries 5-6 + if (inTileType == ADDR_DEPTH_SAMPLE_ORDER) + { + switch (tileMode) + { + case ADDR_TM_1D_TILED_THIN1: + index = 5; + break; + case ADDR_TM_PRT_TILED_THIN1: + index = 6; + break; + default: + break; + } + } + + // See table entries 8-12 + if (inTileType == ADDR_DISPLAYABLE) + { + switch (tileMode) + { + case ADDR_TM_1D_TILED_THIN1: + index = 9; + break; + case ADDR_TM_2D_TILED_THIN1: + index = 10; + break; + case ADDR_TM_PRT_TILED_THIN1: + index = 11; + break; + default: + break; + } + } + + // See table entries 13-18 + if (inTileType == ADDR_NON_DISPLAYABLE) + { + switch (tileMode) + { + case ADDR_TM_1D_TILED_THIN1: + index = 13; + break; + case ADDR_TM_2D_TILED_THIN1: + index = 14; + break; + case ADDR_TM_3D_TILED_THIN1: + index = 15; + break; + case ADDR_TM_PRT_TILED_THIN1: + index = 16; + break; + default: + break; + } + } + + // See table entries 19-26 + if (thickness > 1) + { + switch (tileMode) + { + case ADDR_TM_1D_TILED_THICK: + // special check for bonaire, for the compatablity between old KMD and new UMD + index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 19 : 18; + break; + case ADDR_TM_2D_TILED_THICK: + // special check for bonaire, for the compatablity between old KMD and new UMD + index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 20 : 24; + break; + case ADDR_TM_3D_TILED_THICK: + index = 21; + break; + case ADDR_TM_PRT_TILED_THICK: + index = 22; + break; + case ADDR_TM_2D_TILED_XTHICK: + index = 25; + break; + case ADDR_TM_3D_TILED_XTHICK: + index = 26; + break; + default: + break; + } + } + + // See table entries 27-30 + if (inTileType == ADDR_ROTATED) + { + switch (tileMode) + { + case ADDR_TM_1D_TILED_THIN1: + index = 27; + break; + case ADDR_TM_2D_TILED_THIN1: + index = 28; + break; + case ADDR_TM_PRT_TILED_THIN1: + index = 29; + break; + case ADDR_TM_PRT_2D_TILED_THIN1: + index = 30; + break; + default: + break; + } + } + + if (m_pipes >= 8) + { + ADDR_ASSERT((index + 1) < static_cast(m_noOfEntries)); + // Only do this when tile mode table is updated. + if (((tileMode == ADDR_TM_PRT_TILED_THIN1) || (tileMode == ADDR_TM_PRT_TILED_THICK)) && + (m_tileTable[index + 1].mode == tileMode)) + { + static const UINT_32 PrtTileBytes = 0x10000; + ADDR_TILEINFO tileInfo = {0}; + + HwlComputeMacroModeIndex(index, flags, bpp, numSamples, &tileInfo); + + UINT_32 macroTileBytes = (bpp >> 3) * 64 * numSamples * thickness * + HwlGetPipes(&tileInfo) * tileInfo.banks * + tileInfo.bankWidth * tileInfo.bankHeight; + + if (macroTileBytes != PrtTileBytes) + { + // Switching to next tile mode entry to make sure macro tile size is 64KB + index += 1; + + tileInfo.pipeConfig = m_tileTable[index].info.pipeConfig; + + macroTileBytes = (bpp >> 3) * 64 * numSamples * thickness * + HwlGetPipes(&tileInfo) * tileInfo.banks * + tileInfo.bankWidth * tileInfo.bankHeight; + + ADDR_ASSERT(macroTileBytes == PrtTileBytes); + + flags.tcCompatible = FALSE; + pOut->dccUnsupport = TRUE; + } + } + } + } + else + { + // A pre-filled tile info is ready + index = pOut->tileIndex; + macroModeIndex = pOut->macroModeIndex; + + // pass tile type back for post tile index compute + pOut->tileType = inTileType; + + if (flags.depth || flags.stencil) + { + // tileSize = thickness * bpp * numSamples * 8 * 8 / 8 + UINT_32 tileSize = thickness * bpp * numSamples * 8; + + // Turn off tc compatible if row_size is smaller than tile size (tile split occurs). + if (m_rowSize < tileSize) + { + flags.tcCompatible = FALSE; + } + } + + UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig); + + if (m_pipes != numPipes) + { + pOut->dccUnsupport = TRUE; + } + } + + // We only need to set up tile info if there is a valid index but macroModeIndex is invalid + if ((index != TileIndexInvalid) && (macroModeIndex == TileIndexInvalid)) + { + macroModeIndex = HwlComputeMacroModeIndex(index, flags, bpp, numSamples, pTileInfo); + + // Copy to pOut->tileType/tileIndex/macroModeIndex + pOut->tileIndex = index; + pOut->tileType = m_tileTable[index].type; // Or inTileType, the samea + pOut->macroModeIndex = macroModeIndex; + } + else if (tileMode == ADDR_TM_LINEAR_GENERAL) + { + pOut->tileIndex = TileIndexLinearGeneral; + + // Copy linear-aligned entry?? + *pTileInfo = m_tileTable[8].info; + } + else if (tileMode == ADDR_TM_LINEAR_ALIGNED) + { + pOut->tileIndex = 8; + *pTileInfo = m_tileTable[8].info; + } + + if (flags.tcCompatible) + { + flags.tcCompatible = CheckTcCompatibility(pTileInfo, bpp, tileMode, inTileType, pOut); + } + + pOut->tcCompatible = flags.tcCompatible; +} + +/** +**************************************************************************************************** +* CiLib::ReadGbTileMode +* +* @brief +* Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG. +**************************************************************************************************** +*/ +VOID CiLib::ReadGbTileMode( + UINT_32 regValue, ///< [in] GB_TILE_MODE register + TileConfig* pCfg ///< [out] output structure + ) const +{ + GB_TILE_MODE gbTileMode; + gbTileMode.val = regValue; + + pCfg->type = static_cast(gbTileMode.f.micro_tile_mode_new); + pCfg->info.pipeConfig = static_cast(gbTileMode.f.pipe_config + 1); + + if (pCfg->type == ADDR_DEPTH_SAMPLE_ORDER) + { + pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split; + } + else + { + pCfg->info.tileSplitBytes = 1 << gbTileMode.f.sample_split; + } + + UINT_32 regArrayMode = gbTileMode.f.array_mode; + + pCfg->mode = static_cast(regArrayMode); + + switch (regArrayMode) + { + case 5: + pCfg->mode = ADDR_TM_PRT_TILED_THIN1; + break; + case 6: + pCfg->mode = ADDR_TM_PRT_2D_TILED_THIN1; + break; + case 8: + pCfg->mode = ADDR_TM_2D_TILED_XTHICK; + break; + case 9: + pCfg->mode = ADDR_TM_PRT_TILED_THICK; + break; + case 0xa: + pCfg->mode = ADDR_TM_PRT_2D_TILED_THICK; + break; + case 0xb: + pCfg->mode = ADDR_TM_PRT_3D_TILED_THIN1; + break; + case 0xe: + pCfg->mode = ADDR_TM_3D_TILED_XTHICK; + break; + case 0xf: + pCfg->mode = ADDR_TM_PRT_3D_TILED_THICK; + break; + default: + break; + } + + // Fail-safe code for these always convert tile info, as the non-macro modes + // return the entry of tile mode table directly without looking up macro mode table + if (!IsMacroTiled(pCfg->mode)) + { + pCfg->info.banks = 2; + pCfg->info.bankWidth = 1; + pCfg->info.bankHeight = 1; + pCfg->info.macroAspectRatio = 1; + pCfg->info.tileSplitBytes = 64; + } +} + +/** +**************************************************************************************************** +* CiLib::InitTileSettingTable +* +* @brief +* Initialize the ADDR_TILE_CONFIG table. +* @return +* TRUE if tile table is correctly initialized +**************************************************************************************************** +*/ +BOOL_32 CiLib::InitTileSettingTable( + const UINT_32* pCfg, ///< [in] Pointer to table of tile configs + UINT_32 noOfEntries ///< [in] Numbe of entries in the table above + ) +{ + BOOL_32 initOk = TRUE; + + ADDR_ASSERT(noOfEntries <= TileTableSize); + + memset(m_tileTable, 0, sizeof(m_tileTable)); + + if (noOfEntries != 0) + { + m_noOfEntries = noOfEntries; + } + else + { + m_noOfEntries = TileTableSize; + } + + if (pCfg) // From Client + { + for (UINT_32 i = 0; i < m_noOfEntries; i++) + { + ReadGbTileMode(*(pCfg + i), &m_tileTable[i]); + } + } + else + { + ADDR_ASSERT_ALWAYS(); + initOk = FALSE; + } + + if (initOk) + { + ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED); + + if (m_settings.isBonaire == FALSE) + { + // Check if entry 18 is "thick+thin" combination + if ((m_tileTable[18].mode == ADDR_TM_1D_TILED_THICK) && + (m_tileTable[18].type == ADDR_NON_DISPLAYABLE)) + { + m_allowNonDispThickModes = TRUE; + ADDR_ASSERT(m_tileTable[24].mode == ADDR_TM_2D_TILED_THICK); + } + } + else + { + m_allowNonDispThickModes = TRUE; + } + + // Assume the first entry is always programmed with full pipes + m_pipes = HwlGetPipes(&m_tileTable[0].info); + } + + return initOk; +} + +/** +**************************************************************************************************** +* CiLib::ReadGbMacroTileCfg +* +* @brief +* Convert GB_MACRO_TILE_CFG HW value to ADDR_TILE_CONFIG. +**************************************************************************************************** +*/ +VOID CiLib::ReadGbMacroTileCfg( + UINT_32 regValue, ///< [in] GB_MACRO_TILE_MODE register + ADDR_TILEINFO* pCfg ///< [out] output structure + ) const +{ + GB_MACROTILE_MODE gbTileMode; + gbTileMode.val = regValue; + + pCfg->bankHeight = 1 << gbTileMode.f.bank_height; + pCfg->bankWidth = 1 << gbTileMode.f.bank_width; + pCfg->banks = 1 << (gbTileMode.f.num_banks + 1); + pCfg->macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect; +} + +/** +**************************************************************************************************** +* CiLib::InitMacroTileCfgTable +* +* @brief +* Initialize the ADDR_MACRO_TILE_CONFIG table. +* @return +* TRUE if macro tile table is correctly initialized +**************************************************************************************************** +*/ +BOOL_32 CiLib::InitMacroTileCfgTable( + const UINT_32* pCfg, ///< [in] Pointer to table of tile configs + UINT_32 noOfMacroEntries ///< [in] Numbe of entries in the table above + ) +{ + BOOL_32 initOk = TRUE; + + ADDR_ASSERT(noOfMacroEntries <= MacroTileTableSize); + + memset(m_macroTileTable, 0, sizeof(m_macroTileTable)); + + if (noOfMacroEntries != 0) + { + m_noOfMacroEntries = noOfMacroEntries; + } + else + { + m_noOfMacroEntries = MacroTileTableSize; + } + + if (pCfg) // From Client + { + for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) + { + ReadGbMacroTileCfg(*(pCfg + i), &m_macroTileTable[i]); + + m_macroTileTable[i].tileSplitBytes = 64 << (i % 8); + } + } + else + { + ADDR_ASSERT_ALWAYS(); + initOk = FALSE; + } + return initOk; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeMacroModeIndex +* +* @brief +* Computes macro tile mode index +* @return +* TRUE if macro tile table is correctly initialized +**************************************************************************************************** +*/ +INT_32 CiLib::HwlComputeMacroModeIndex( + INT_32 tileIndex, ///< [in] Tile mode index + ADDR_SURFACE_FLAGS flags, ///< [in] Surface flags + UINT_32 bpp, ///< [in] Bit per pixel + UINT_32 numSamples, ///< [in] Number of samples + ADDR_TILEINFO* pTileInfo, ///< [out] Pointer to ADDR_TILEINFO + AddrTileMode* pTileMode, ///< [out] Pointer to AddrTileMode + AddrTileType* pTileType ///< [out] Pointer to AddrTileType + ) const +{ + INT_32 macroModeIndex = TileIndexInvalid; + + AddrTileMode tileMode = m_tileTable[tileIndex].mode; + AddrTileType tileType = m_tileTable[tileIndex].type; + UINT_32 thickness = Thickness(tileMode); + + if (!IsMacroTiled(tileMode)) + { + *pTileInfo = m_tileTable[tileIndex].info; + macroModeIndex = TileIndexNoMacroIndex; + } + else + { + UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness); + UINT_32 tileSplit; + + if (m_tileTable[tileIndex].type == ADDR_DEPTH_SAMPLE_ORDER) + { + // Depth entries store real tileSplitBytes + tileSplit = m_tileTable[tileIndex].info.tileSplitBytes; + } + else + { + // Non-depth entries store a split factor + UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes; + UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x); + + tileSplit = colorTileSplit; + } + + UINT_32 tileSplitC = Min(m_rowSize, tileSplit); + UINT_32 tileBytes; + + if (flags.fmask) + { + tileBytes = Min(tileSplitC, tileBytes1x); + } + else + { + tileBytes = Min(tileSplitC, numSamples * tileBytes1x); + } + + if (tileBytes < 64) + { + tileBytes = 64; + } + + macroModeIndex = Log2(tileBytes / 64); + + if (flags.prt || IsPrtTileMode(tileMode)) + { + macroModeIndex += PrtMacroModeOffset; + *pTileInfo = m_macroTileTable[macroModeIndex]; + } + else + { + *pTileInfo = m_macroTileTable[macroModeIndex]; + } + + pTileInfo->pipeConfig = m_tileTable[tileIndex].info.pipeConfig; + + pTileInfo->tileSplitBytes = tileSplitC; + } + + if (NULL != pTileMode) + { + *pTileMode = tileMode; + } + + if (NULL != pTileType) + { + *pTileType = tileType; + } + + return macroModeIndex; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeTileDataWidthAndHeightLinear +* +* @brief +* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout +* +* @note +* MacroWidth and macroHeight are measured in pixels +**************************************************************************************************** +*/ +VOID CiLib::HwlComputeTileDataWidthAndHeightLinear( + UINT_32* pMacroWidth, ///< [out] macro tile width + UINT_32* pMacroHeight, ///< [out] macro tile height + UINT_32 bpp, ///< [in] bits per pixel + ADDR_TILEINFO* pTileInfo ///< [in] tile info + ) const +{ + ADDR_ASSERT(pTileInfo != NULL); + + UINT_32 numTiles; + + switch (pTileInfo->pipeConfig) + { + case ADDR_PIPECFG_P16_32x32_8x16: + case ADDR_PIPECFG_P16_32x32_16x16: + case ADDR_PIPECFG_P8_32x64_32x32: + case ADDR_PIPECFG_P8_32x32_16x32: + case ADDR_PIPECFG_P8_32x32_16x16: + case ADDR_PIPECFG_P8_32x32_8x16: + case ADDR_PIPECFG_P4_32x32: + numTiles = 8; + break; + default: + numTiles = 4; + break; + } + + *pMacroWidth = numTiles * MicroTileWidth; + *pMacroHeight = numTiles * MicroTileHeight; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeMetadataNibbleAddress +* +* @brief +* calculate meta data address based on input information +* +* ¶meter +* uncompressedDataByteAddress - address of a pixel in color surface +* dataBaseByteAddress - base address of color surface +* metadataBaseByteAddress - base address of meta ram +* metadataBitSize - meta key size, 8 for DCC, 4 for cmask +* elementBitSize - element size of color surface +* blockByteSize - compression block size, 256 for DCC +* pipeInterleaveBytes - pipe interleave size +* numOfPipes - number of pipes +* numOfBanks - number of banks +* numOfSamplesPerSplit - number of samples per tile split +* @return +* meta data nibble address (nibble address is used to support DCC compatible cmask) +* +**************************************************************************************************** +*/ +UINT_64 CiLib::HwlComputeMetadataNibbleAddress( + UINT_64 uncompressedDataByteAddress, + UINT_64 dataBaseByteAddress, + UINT_64 metadataBaseByteAddress, + UINT_32 metadataBitSize, + UINT_32 elementBitSize, + UINT_32 blockByteSize, + UINT_32 pipeInterleaveBytes, + UINT_32 numOfPipes, + UINT_32 numOfBanks, + UINT_32 numOfSamplesPerSplit) const +{ + ///-------------------------------------------------------------------------------------------- + /// Get pipe interleave, bank and pipe bits + ///-------------------------------------------------------------------------------------------- + UINT_32 pipeInterleaveBits = Log2(pipeInterleaveBytes); + UINT_32 pipeBits = Log2(numOfPipes); + UINT_32 bankBits = Log2(numOfBanks); + + ///-------------------------------------------------------------------------------------------- + /// Clear pipe and bank swizzles + ///-------------------------------------------------------------------------------------------- + UINT_32 dataMacrotileBits = pipeInterleaveBits + pipeBits + bankBits; + UINT_32 metadataMacrotileBits = pipeInterleaveBits + pipeBits + bankBits; + + UINT_64 dataMacrotileClearMask = ~((1L << dataMacrotileBits) - 1); + UINT_64 metadataMacrotileClearMask = ~((1L << metadataMacrotileBits) - 1); + + UINT_64 dataBaseByteAddressNoSwizzle = dataBaseByteAddress & dataMacrotileClearMask; + UINT_64 metadataBaseByteAddressNoSwizzle = metadataBaseByteAddress & metadataMacrotileClearMask; + + ///-------------------------------------------------------------------------------------------- + /// Modify metadata base before adding in so that when final address is divided by data ratio, + /// the base address returns to where it should be + ///-------------------------------------------------------------------------------------------- + ADDR_ASSERT((0 != metadataBitSize)); + UINT_64 metadataBaseShifted = metadataBaseByteAddressNoSwizzle * blockByteSize * 8 / + metadataBitSize; + UINT_64 offset = uncompressedDataByteAddress - + dataBaseByteAddressNoSwizzle + + metadataBaseShifted; + + ///-------------------------------------------------------------------------------------------- + /// Save bank data bits + ///-------------------------------------------------------------------------------------------- + UINT_32 lsb = pipeBits + pipeInterleaveBits; + UINT_32 msb = bankBits - 1 + lsb; + + UINT_64 bankDataBits = GetBits(offset, msb, lsb); + + ///-------------------------------------------------------------------------------------------- + /// Save pipe data bits + ///-------------------------------------------------------------------------------------------- + lsb = pipeInterleaveBits; + msb = pipeBits - 1 + lsb; + + UINT_64 pipeDataBits = GetBits(offset, msb, lsb); + + ///-------------------------------------------------------------------------------------------- + /// Remove pipe and bank bits + ///-------------------------------------------------------------------------------------------- + lsb = pipeInterleaveBits; + msb = dataMacrotileBits - 1; + + UINT_64 offsetWithoutPipeBankBits = RemoveBits(offset, msb, lsb); + + ADDR_ASSERT((0 != blockByteSize)); + UINT_64 blockInBankpipe = offsetWithoutPipeBankBits / blockByteSize; + + UINT_32 tileSize = 8 * 8 * elementBitSize/8 * numOfSamplesPerSplit; + UINT_32 blocksInTile = tileSize / blockByteSize; + + if (0 == blocksInTile) + { + lsb = 0; + } + else + { + lsb = Log2(blocksInTile); + } + msb = bankBits - 1 + lsb; + + UINT_64 blockInBankpipeWithBankBits = InsertBits(blockInBankpipe, bankDataBits, msb, lsb); + + /// NOTE *2 because we are converting to Nibble address in this step + UINT_64 metaAddressInPipe = blockInBankpipeWithBankBits * 2 * metadataBitSize / 8; + + ///-------------------------------------------------------------------------------------------- + /// Reinsert pipe bits back into the final address + ///-------------------------------------------------------------------------------------------- + lsb = pipeInterleaveBits + 1; ///<+1 due to Nibble address now gives interleave bits extra lsb. + msb = pipeBits - 1 + lsb; + UINT_64 metadataAddress = InsertBits(metaAddressInPipe, pipeDataBits, msb, lsb); + + return metadataAddress; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeSurfaceAlignmentsMacroTiled +* +* @brief +* Hardware layer function to compute alignment request for macro tile mode +* +**************************************************************************************************** +*/ +VOID CiLib::HwlComputeSurfaceAlignmentsMacroTiled( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 mipLevel, ///< [in] mip level + UINT_32 numSamples, ///< [in] number of samples + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] Surface output + ) const +{ + // This is to workaround a H/W limitation that DCC doesn't work when pipe config is switched to + // P4. In theory, all asics that have such switching should be patched but we now only know what + // to pad for Fiji. + if ((m_settings.isFiji == TRUE) && + (flags.dccPipeWorkaround == TRUE) && + (flags.prt == FALSE) && + (mipLevel == 0) && + (tileMode == ADDR_TM_PRT_TILED_THIN1) && + (pOut->dccUnsupport == TRUE)) + { + pOut->pitchAlign = PowTwoAlign(pOut->pitchAlign, 256); + // In case the client still requests DCC usage. + pOut->dccUnsupport = FALSE; + } +} + +/** +**************************************************************************************************** +* CiLib::HwlPadDimensions +* +* @brief +* Helper function to pad dimensions +* +**************************************************************************************************** +*/ +VOID CiLib::HwlPadDimensions( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 numSamples, ///< [in] number of samples + ADDR_TILEINFO* pTileInfo, ///< [in] tile info + UINT_32 mipLevel, ///< [in] mip level + UINT_32* pPitch, ///< [in,out] pitch in pixels + UINT_32* pPitchAlign, ///< [in,out] pitch alignment + UINT_32 height, ///< [in] height in pixels + UINT_32 heightAlign ///< [in] height alignment + ) const +{ + if ((m_settings.isVolcanicIslands == TRUE) && + (flags.dccCompatible == TRUE) && + (numSamples > 1) && + (mipLevel == 0) && + (IsMacroTiled(tileMode) == TRUE)) + { + UINT_32 tileSizePerSample = BITS_TO_BYTES(bpp * MicroTileWidth * MicroTileHeight); + UINT_32 samplesPerSplit = pTileInfo->tileSplitBytes / tileSizePerSample; + + if (samplesPerSplit < numSamples) + { + UINT_32 dccFastClearByteAlign = HwlGetPipes(pTileInfo) * m_pipeInterleaveBytes * 256; + UINT_32 bytesPerSplit = BITS_TO_BYTES((*pPitch) * height * bpp * samplesPerSplit); + + ADDR_ASSERT(IsPow2(dccFastClearByteAlign)); + + if (0 != (bytesPerSplit & (dccFastClearByteAlign - 1))) + { + UINT_32 dccFastClearPixelAlign = dccFastClearByteAlign / + BITS_TO_BYTES(bpp) / + samplesPerSplit; + UINT_32 macroTilePixelAlign = (*pPitchAlign) * heightAlign; + + if ((dccFastClearPixelAlign >= macroTilePixelAlign) && + ((dccFastClearPixelAlign % macroTilePixelAlign) == 0)) + { + UINT_32 dccFastClearPitchAlignInMacroTile = + dccFastClearPixelAlign / macroTilePixelAlign; + UINT_32 heightInMacroTile = height / heightAlign; + + while ((heightInMacroTile > 1) && + ((heightInMacroTile % 2) == 0) && + (dccFastClearPitchAlignInMacroTile > 1) && + ((dccFastClearPitchAlignInMacroTile % 2) == 0)) + { + heightInMacroTile >>= 1; + dccFastClearPitchAlignInMacroTile >>= 1; + } + + UINT_32 dccFastClearPitchAlignInPixels = + (*pPitchAlign) * dccFastClearPitchAlignInMacroTile; + + if (IsPow2(dccFastClearPitchAlignInPixels)) + { + *pPitch = PowTwoAlign((*pPitch), dccFastClearPitchAlignInPixels); + } + else + { + *pPitch += (dccFastClearPitchAlignInPixels - 1); + *pPitch /= dccFastClearPitchAlignInPixels; + *pPitch *= dccFastClearPitchAlignInPixels; + } + + *pPitchAlign = dccFastClearPitchAlignInPixels; + } + } + } + } +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeMaxBaseAlignments +* +* @brief +* Gets maximum alignments +* @return +* maximum alignments +**************************************************************************************************** +*/ +UINT_32 CiLib::HwlComputeMaxBaseAlignments() const +{ + const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info); + + // Initial size is 64 KiB for PRT. + UINT_32 maxBaseAlign = 64 * 1024; + + for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) + { + // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice. + UINT_32 tileSize = m_macroTileTable[i].tileSplitBytes; + + UINT_32 baseAlign = tileSize * pipes * m_macroTileTable[i].banks * + m_macroTileTable[i].bankWidth * m_macroTileTable[i].bankHeight; + + if (baseAlign > maxBaseAlign) + { + maxBaseAlign = baseAlign; + } + } + + return maxBaseAlign; +} + +/** +**************************************************************************************************** +* CiLib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +**************************************************************************************************** +*/ +UINT_32 CiLib::HwlComputeMaxMetaBaseAlignments() const +{ + UINT_32 maxBank = 1; + + for (UINT_32 i = 0; i < m_noOfMacroEntries; i++) + { + if ((m_settings.isVolcanicIslands) && IsMacroTiled(m_tileTable[i].mode)) + { + maxBank = Max(maxBank, m_macroTileTable[i].banks); + } + } + + return SiLib::HwlComputeMaxMetaBaseAlignments() * maxBank; +} + +/** +**************************************************************************************************** +* CiLib::DepthStencilTileCfgMatch +* +* @brief +* Try to find a tile index for stencil which makes its tile config parameters matches to depth +* @return +* TRUE if such tile index for stencil can be found +**************************************************************************************************** +*/ +BOOL_32 CiLib::DepthStencilTileCfgMatch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + BOOL_32 depthStencil2DTileConfigMatch = FALSE; + + for (INT_32 stencilTileIndex = MinDepth2DThinIndex; + stencilTileIndex <= MaxDepth2DThinIndex; + stencilTileIndex++) + { + ADDR_TILEINFO tileInfo = {0}; + INT_32 stencilMacroIndex = HwlComputeMacroModeIndex(stencilTileIndex, + pIn->flags, + 8, + pIn->numSamples, + &tileInfo); + + if (stencilMacroIndex != TileIndexNoMacroIndex) + { + if ((m_macroTileTable[stencilMacroIndex].banks == + m_macroTileTable[pOut->macroModeIndex].banks) && + (m_macroTileTable[stencilMacroIndex].bankWidth == + m_macroTileTable[pOut->macroModeIndex].bankWidth) && + (m_macroTileTable[stencilMacroIndex].bankHeight == + m_macroTileTable[pOut->macroModeIndex].bankHeight) && + (m_macroTileTable[stencilMacroIndex].macroAspectRatio == + m_macroTileTable[pOut->macroModeIndex].macroAspectRatio) && + (m_macroTileTable[stencilMacroIndex].pipeConfig == + m_macroTileTable[pOut->macroModeIndex].pipeConfig)) + { + if ((pOut->tcCompatible == FALSE) || + (tileInfo.tileSplitBytes >= MicroTileWidth * MicroTileHeight * pIn->numSamples)) + { + depthStencil2DTileConfigMatch = TRUE; + pOut->stencilTileIdx = stencilTileIndex; + break; + } + } + } + else + { + ADDR_ASSERT_ALWAYS(); + } + } + + return depthStencil2DTileConfigMatch; +} + +/** +**************************************************************************************************** +* CiLib::DepthStencilTileCfgMatch +* +* @brief +* Check if tc compatibility is available +* @return +* If tc compatibility is not available +**************************************************************************************************** +*/ +BOOL_32 CiLib::CheckTcCompatibility( + const ADDR_TILEINFO* pTileInfo, ///< [in] input tile info + UINT_32 bpp, ///< [in] Bits per pixel + AddrTileMode tileMode, ///< [in] input tile mode + AddrTileType tileType, ///< [in] input tile type + const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in] output surf info + ) const +{ + BOOL_32 tcCompatible = TRUE; + + if (IsMacroTiled(tileMode)) + { + if (tileType != ADDR_DEPTH_SAMPLE_ORDER) + { + // Turn off tcCompatible for color surface if tileSplit happens. Depth/stencil + // tileSplit case was handled at tileIndex selecting time. + INT_32 tileIndex = pOut->tileIndex; + + if ((tileIndex == TileIndexInvalid) && (IsTileInfoAllZero(pTileInfo) == FALSE)) + { + tileIndex = HwlPostCheckTileIndex(pTileInfo, tileMode, tileType, tileIndex); + } + + if (tileIndex != TileIndexInvalid) + { + UINT_32 thickness = Thickness(tileMode); + + ADDR_ASSERT(static_cast(tileIndex) < TileTableSize); + // Non-depth entries store a split factor + UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes; + UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness); + UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x); + + if (m_rowSize < colorTileSplit) + { + tcCompatible = FALSE; + } + } + } + } + else + { + // Client should not enable tc compatible for linear and 1D tile modes. + tcCompatible = FALSE; + } + + return tcCompatible; +} + +} // V1 +} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/ciaddrlib.h mesa-19.0.1/src/amd/addrlib/src/r800/ciaddrlib.h --- mesa-18.3.3/src/amd/addrlib/src/r800/ciaddrlib.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/r800/ciaddrlib.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,201 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file ciaddrlib.h +* @brief Contains the CiLib class definition. +**************************************************************************************************** +*/ + +#ifndef __CI_ADDR_LIB_H__ +#define __CI_ADDR_LIB_H__ + +#include "addrlib1.h" +#include "siaddrlib.h" + +namespace Addr +{ +namespace V1 +{ + +/** +**************************************************************************************************** +* @brief This class is the CI specific address library +* function set. +**************************************************************************************************** +*/ +class CiLib : public SiLib +{ +public: + /// Creates CiLib object + static Addr::Lib* CreateObj(const Client* pClient) + { + VOID* pMem = Object::ClientAlloc(sizeof(CiLib), pClient); + return (pMem != NULL) ? new (pMem) CiLib(pClient) : NULL; + } + +private: + CiLib(const Client* pClient); + virtual ~CiLib(); + +protected: + + // Hwl interface - defined in AddrLib1 + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); + + virtual ChipFamily HwlConvertChipFamily( + UINT_32 uChipFamily, UINT_32 uChipRevision); + + virtual BOOL_32 HwlInitGlobalParams( + const ADDR_CREATE_INPUT* pCreateIn); + + virtual ADDR_E_RETURNCODE HwlSetupTileCfg( + UINT_32 bpp, INT_32 index, INT_32 macroModeIndex, ADDR_TILEINFO* pInfo, + AddrTileMode* pMode = 0, AddrTileType* pType = 0) const; + + virtual VOID HwlComputeTileDataWidthAndHeightLinear( + UINT_32* pMacroWidth, UINT_32* pMacroHeight, + UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const; + + virtual INT_32 HwlComputeMacroModeIndex( + INT_32 tileIndex, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples, + ADDR_TILEINFO* pTileInfo, AddrTileMode* pTileMode = NULL, AddrTileType* pTileType = NULL + ) const; + + // Sub-hwl interface - defined in EgBasedLib + virtual VOID HwlSetupTileInfo( + AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo, + AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + virtual INT_32 HwlPostCheckTileIndex( + const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type, + INT curIndex = TileIndexInvalid) const; + + virtual VOID HwlFmaskPreThunkSurfInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, + const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, + ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const; + + virtual VOID HwlFmaskPostThunkSurfInfo( + const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const; + + virtual AddrTileMode HwlDegradeThickTileMode( + AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; + + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; + + virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; + + virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; + + /// Overwrite tile setting to PRT + virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeDccInfo( + const ADDR_COMPUTE_DCCINFO_INPUT* pIn, + ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord( + const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeHtileAddrFromCoord( + const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const; + + virtual UINT_32 HwlComputeMaxBaseAlignments() const; + + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; + + virtual VOID HwlPadDimensions( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 mipLevel, + UINT_32* pPitch, UINT_32 *PitchAlign, UINT_32 height, UINT_32 heightAlign) const; + + virtual VOID HwlComputeSurfaceAlignmentsMacroTiled( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + +private: + + VOID ReadGbTileMode( + UINT_32 regValue, TileConfig* pCfg) const; + + VOID ReadGbMacroTileCfg( + UINT_32 regValue, ADDR_TILEINFO* pCfg) const; + +private: + BOOL_32 InitTileSettingTable( + const UINT_32 *pSetting, UINT_32 noOfEntries); + + BOOL_32 InitMacroTileCfgTable( + const UINT_32 *pSetting, UINT_32 noOfEntries); + + UINT_64 HwlComputeMetadataNibbleAddress( + UINT_64 uncompressedDataByteAddress, + UINT_64 dataBaseByteAddress, + UINT_64 metadataBaseByteAddress, + UINT_32 metadataBitSize, + UINT_32 elementBitSize, + UINT_32 blockByteSize, + UINT_32 pipeInterleaveBytes, + UINT_32 numOfPipes, + UINT_32 numOfBanks, + UINT_32 numOfSamplesPerSplit) const; + + BOOL_32 DepthStencilTileCfgMatch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + BOOL_32 CheckTcCompatibility(const ADDR_TILEINFO* pTileInfo, UINT_32 bpp, AddrTileMode tileMode, + AddrTileType tileType, const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + static const UINT_32 MacroTileTableSize = 16; + static const UINT_32 PrtMacroModeOffset = MacroTileTableSize / 2; + static const INT_32 MinDepth2DThinIndex = 0; + static const INT_32 MaxDepth2DThinIndex = 4; + static const INT_32 Depth1DThinIndex = 5; + + ADDR_TILEINFO m_macroTileTable[MacroTileTableSize]; + UINT_32 m_noOfMacroEntries; + BOOL_32 m_allowNonDispThickModes; +}; + +} // V1 +} // Addr + +#endif diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/egbaddrlib.cpp mesa-19.0.1/src/amd/addrlib/src/r800/egbaddrlib.cpp --- mesa-18.3.3/src/amd/addrlib/src/r800/egbaddrlib.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/r800/egbaddrlib.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,4156 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file egbaddrlib.cpp +* @brief Contains the EgBasedLib class implementation. +**************************************************************************************************** +*/ + +#include "egbaddrlib.h" + +#include "util/macros.h" + +namespace Addr +{ +namespace V1 +{ + +/** +**************************************************************************************************** +* EgBasedLib::EgBasedLib +* +* @brief +* Constructor +* +* @note +* +**************************************************************************************************** +*/ +EgBasedLib::EgBasedLib(const Client* pClient) + : + Lib(pClient), + m_ranks(0), + m_logicalBanks(0), + m_bankInterleave(1) +{ +} + +/** +**************************************************************************************************** +* EgBasedLib::~EgBasedLib +* +* @brief +* Destructor +**************************************************************************************************** +*/ +EgBasedLib::~EgBasedLib() +{ +} + +/** +**************************************************************************************************** +* EgBasedLib::DispatchComputeSurfaceInfo +* +* @brief +* Compute surface sizes include padded pitch,height,slices,total size in bytes, +* meanwhile output suitable tile mode and base alignment might be changed in this +* call as well. Results are returned through output parameters. +* +* @return +* TRUE if no error occurs +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::DispatchComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + AddrTileMode tileMode = pIn->tileMode; + UINT_32 bpp = pIn->bpp; + UINT_32 numSamples = pIn->numSamples; + UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags); + UINT_32 pitch = pIn->width; + UINT_32 height = pIn->height; + UINT_32 numSlices = pIn->numSlices; + UINT_32 mipLevel = pIn->mipLevel; + ADDR_SURFACE_FLAGS flags = pIn->flags; + + ADDR_TILEINFO tileInfoDef = {0}; + ADDR_TILEINFO* pTileInfo = &tileInfoDef; + UINT_32 padDims = 0; + BOOL_32 valid; + + if (pIn->flags.disallowLargeThickDegrade == 0) + { + tileMode = DegradeLargeThickTile(tileMode, bpp); + } + + // Only override numSamples for NI above + if (m_chipFamily >= ADDR_CHIP_FAMILY_NI) + { + if (numFrags != numSamples) // This means EQAA + { + // The real surface size needed is determined by number of fragments + numSamples = numFrags; + } + + // Save altered numSamples in pOut + pOut->numSamples = numSamples; + } + + // Caller makes sure pOut->pTileInfo is not NULL, see HwlComputeSurfaceInfo + ADDR_ASSERT(pOut->pTileInfo); + + if (pOut->pTileInfo != NULL) + { + pTileInfo = pOut->pTileInfo; + } + + // Set default values + if (pIn->pTileInfo != NULL) + { + if (pTileInfo != pIn->pTileInfo) + { + *pTileInfo = *pIn->pTileInfo; + } + } + else + { + memset(pTileInfo, 0, sizeof(ADDR_TILEINFO)); + } + + // For macro tile mode, we should calculate default tiling parameters + HwlSetupTileInfo(tileMode, + flags, + bpp, + pitch, + height, + numSamples, + pIn->pTileInfo, + pTileInfo, + pIn->tileType, + pOut); + + if (flags.cube) + { + if (mipLevel == 0) + { + padDims = 2; + } + + if (numSlices == 1) + { + // This is calculating one face, remove cube flag + flags.cube = 0; + } + } + + switch (tileMode) + { + case ADDR_TM_LINEAR_GENERAL://fall through + case ADDR_TM_LINEAR_ALIGNED: + valid = ComputeSurfaceInfoLinear(pIn, pOut, padDims); + break; + + case ADDR_TM_1D_TILED_THIN1://fall through + case ADDR_TM_1D_TILED_THICK: + valid = ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, tileMode); + break; + + case ADDR_TM_2D_TILED_THIN1: //fall through + case ADDR_TM_2D_TILED_THICK: //fall through + case ADDR_TM_3D_TILED_THIN1: //fall through + case ADDR_TM_3D_TILED_THICK: //fall through + case ADDR_TM_2D_TILED_XTHICK: //fall through + case ADDR_TM_3D_TILED_XTHICK: //fall through + case ADDR_TM_PRT_TILED_THIN1: //fall through + case ADDR_TM_PRT_2D_TILED_THIN1://fall through + case ADDR_TM_PRT_3D_TILED_THIN1://fall through + case ADDR_TM_PRT_TILED_THICK: //fall through + case ADDR_TM_PRT_2D_TILED_THICK://fall through + case ADDR_TM_PRT_3D_TILED_THICK: + valid = ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, tileMode); + break; + + default: + valid = FALSE; + ADDR_ASSERT_ALWAYS(); + break; + } + + return valid; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceInfoLinear +* +* @brief +* Compute linear surface sizes include padded pitch, height, slices, total size in +* bytes, meanwhile alignments as well. Since it is linear mode, so output tile mode +* will not be changed here. Results are returned through output parameters. +* +* @return +* TRUE if no error occurs +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::ComputeSurfaceInfoLinear( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, ///< [out] Output structure + UINT_32 padDims ///< [in] Dimensions to padd + ) const +{ + UINT_32 expPitch = pIn->width; + UINT_32 expHeight = pIn->height; + UINT_32 expNumSlices = pIn->numSlices; + + // No linear MSAA on real H/W, keep this for TGL + UINT_32 numSamples = pOut->numSamples; + + const UINT_32 microTileThickness = 1; + + // + // Compute the surface alignments. + // + ComputeSurfaceAlignmentsLinear(pIn->tileMode, + pIn->bpp, + pIn->flags, + &pOut->baseAlign, + &pOut->pitchAlign, + &pOut->heightAlign); + + if ((pIn->tileMode == ADDR_TM_LINEAR_GENERAL) && pIn->flags.color && (pIn->height > 1)) + { +#if !ALT_TEST + // When linear_general surface is accessed in multiple lines, it requires 8 pixels in pitch + // alignment since PITCH_TILE_MAX is in unit of 8 pixels. + // It is OK if it is accessed per line. + ADDR_ASSERT((pIn->width % 8) == 0); +#endif + } + + pOut->depthAlign = microTileThickness; + + expPitch = HwlPreHandleBaseLvl3xPitch(pIn, expPitch); + + // + // Pad pitch and height to the required granularities. + // + PadDimensions(pIn->tileMode, + pIn->bpp, + pIn->flags, + numSamples, + pOut->pTileInfo, + padDims, + pIn->mipLevel, + &expPitch, &pOut->pitchAlign, + &expHeight, pOut->heightAlign, + &expNumSlices, microTileThickness); + + expPitch = HwlPostHandleBaseLvl3xPitch(pIn, expPitch); + + // + // Adjust per HWL + // + + UINT_64 logicalSliceSize; + + logicalSliceSize = HwlGetSizeAdjustmentLinear(pIn->tileMode, + pIn->bpp, + numSamples, + pOut->baseAlign, + pOut->pitchAlign, + &expPitch, + &expHeight, + &pOut->heightAlign); + + if ((pIn->pitchAlign != 0) || (pIn->heightAlign != 0)) + { + if (pIn->pitchAlign != 0) + { + ADDR_ASSERT((pIn->pitchAlign % pOut->pitchAlign) == 0); + pOut->pitchAlign = pIn->pitchAlign; + + if (IsPow2(pOut->pitchAlign)) + { + expPitch = PowTwoAlign(expPitch, pOut->pitchAlign); + } + else + { + expPitch += pOut->pitchAlign - 1; + expPitch /= pOut->pitchAlign; + expPitch *= pOut->pitchAlign; + } + } + + if (pIn->heightAlign != 0) + { + ADDR_ASSERT((pIn->heightAlign % pOut->heightAlign) == 0); + pOut->heightAlign = pIn->heightAlign; + + if (IsPow2(pOut->heightAlign)) + { + expHeight = PowTwoAlign(expHeight, pOut->heightAlign); + } + else + { + expHeight += pOut->heightAlign - 1; + expHeight /= pOut->heightAlign; + expHeight *= pOut->heightAlign; + } + } + + logicalSliceSize = BITS_TO_BYTES(expPitch * expHeight * pIn->bpp); + } + + pOut->pitch = expPitch; + pOut->height = expHeight; + pOut->depth = expNumSlices; + + pOut->surfSize = logicalSliceSize * expNumSlices; + + pOut->tileMode = pIn->tileMode; + + return TRUE; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceInfoMicroTiled +* +* @brief +* Compute 1D/Micro Tiled surface sizes include padded pitch, height, slices, total +* size in bytes, meanwhile alignments as well. Results are returned through output +* parameters. +* +* @return +* TRUE if no error occurs +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::ComputeSurfaceInfoMicroTiled( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, ///< [out] Output structure + UINT_32 padDims, ///< [in] Dimensions to padd + AddrTileMode expTileMode ///< [in] Expected tile mode + ) const +{ + BOOL_32 valid = TRUE; + + UINT_32 microTileThickness; + UINT_32 expPitch = pIn->width; + UINT_32 expHeight = pIn->height; + UINT_32 expNumSlices = pIn->numSlices; + + // No 1D MSAA on real H/W, keep this for TGL + UINT_32 numSamples = pOut->numSamples; + + // + // Compute the micro tile thickness. + // + microTileThickness = Thickness(expTileMode); + + // + // Extra override for mip levels + // + if (pIn->mipLevel > 0) + { + // + // Reduce tiling mode from thick to thin if the number of slices is less than the + // micro tile thickness. + // + if ((expTileMode == ADDR_TM_1D_TILED_THICK) && + (expNumSlices < ThickTileThickness)) + { + expTileMode = HwlDegradeThickTileMode(ADDR_TM_1D_TILED_THICK, expNumSlices, NULL); + if (expTileMode != ADDR_TM_1D_TILED_THICK) + { + microTileThickness = 1; + } + } + } + + // + // Compute the surface restrictions. + // + ComputeSurfaceAlignmentsMicroTiled(expTileMode, + pIn->bpp, + pIn->flags, + pIn->mipLevel, + numSamples, + &pOut->baseAlign, + &pOut->pitchAlign, + &pOut->heightAlign); + + pOut->depthAlign = microTileThickness; + + // + // Pad pitch and height to the required granularities. + // Compute surface size. + // Return parameters. + // + PadDimensions(expTileMode, + pIn->bpp, + pIn->flags, + numSamples, + pOut->pTileInfo, + padDims, + pIn->mipLevel, + &expPitch, &pOut->pitchAlign, + &expHeight, pOut->heightAlign, + &expNumSlices, microTileThickness); + + // + // Get HWL specific pitch adjustment + // + UINT_64 logicalSliceSize = HwlGetSizeAdjustmentMicroTiled(microTileThickness, + pIn->bpp, + pIn->flags, + numSamples, + pOut->baseAlign, + pOut->pitchAlign, + &expPitch, + &expHeight); + + pOut->pitch = expPitch; + pOut->height = expHeight; + pOut->depth = expNumSlices; + + pOut->surfSize = logicalSliceSize * expNumSlices; + + pOut->tileMode = expTileMode; + + return valid; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceInfoMacroTiled +* +* @brief +* Compute 2D/macro tiled surface sizes include padded pitch, height, slices, total +* size in bytes, meanwhile output suitable tile mode and alignments might be changed +* in this call as well. Results are returned through output parameters. +* +* @return +* TRUE if no error occurs +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::ComputeSurfaceInfoMacroTiled( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, ///< [out] Output structure + UINT_32 padDims, ///< [in] Dimensions to padd + AddrTileMode expTileMode ///< [in] Expected tile mode + ) const +{ + BOOL_32 valid = TRUE; + + AddrTileMode origTileMode = expTileMode; + UINT_32 microTileThickness; + + UINT_32 paddedPitch; + UINT_32 paddedHeight; + UINT_64 bytesPerSlice; + + UINT_32 expPitch = pIn->width; + UINT_32 expHeight = pIn->height; + UINT_32 expNumSlices = pIn->numSlices; + + UINT_32 numSamples = pOut->numSamples; + + // + // Compute the surface restrictions as base + // SanityCheckMacroTiled is called in ComputeSurfaceAlignmentsMacroTiled + // + valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode, + pIn->bpp, + pIn->flags, + pIn->mipLevel, + numSamples, + pOut); + + if (valid) + { + // + // Compute the micro tile thickness. + // + microTileThickness = Thickness(expTileMode); + + // + // Find the correct tiling mode for mip levels + // + if (pIn->mipLevel > 0) + { + // + // Try valid tile mode + // + expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode, + pIn->bpp, + expPitch, + expHeight, + expNumSlices, + numSamples, + pOut->blockWidth, + pOut->blockHeight, + pOut->pTileInfo); + + if (!IsMacroTiled(expTileMode)) // Downgraded to micro-tiled + { + return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode); + } + else if (microTileThickness != Thickness(expTileMode)) + { + // + // Re-compute if thickness changed since bank-height may be changed! + // + return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode); + } + } + + paddedPitch = expPitch; + paddedHeight = expHeight; + + // + // Re-cal alignment + // + if (expTileMode != origTileMode) // Tile mode is changed but still macro-tiled + { + valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode, + pIn->bpp, + pIn->flags, + pIn->mipLevel, + numSamples, + pOut); + } + + // + // Do padding + // + PadDimensions(expTileMode, + pIn->bpp, + pIn->flags, + numSamples, + pOut->pTileInfo, + padDims, + pIn->mipLevel, + &paddedPitch, &pOut->pitchAlign, + &paddedHeight, pOut->heightAlign, + &expNumSlices, microTileThickness); + + if (pIn->flags.qbStereo && + (pOut->pStereoInfo != NULL)) + { + UINT_32 stereoHeightAlign = HwlStereoCheckRightOffsetPadding(pOut->pTileInfo); + + if (stereoHeightAlign != 0) + { + paddedHeight = PowTwoAlign(paddedHeight, stereoHeightAlign); + } + } + + if ((pIn->flags.needEquation == TRUE) && + (m_chipFamily == ADDR_CHIP_FAMILY_SI) && + (pIn->numMipLevels > 1) && + (pIn->mipLevel == 0)) + { + BOOL_32 convertTo1D = FALSE; + + ADDR_ASSERT(Thickness(expTileMode) == 1); + + for (UINT_32 i = 1; i < pIn->numMipLevels; i++) + { + UINT_32 mipPitch = Max(1u, paddedPitch >> i); + UINT_32 mipHeight = Max(1u, pIn->height >> i); + UINT_32 mipSlices = pIn->flags.volume ? + Max(1u, pIn->numSlices >> i) : pIn->numSlices; + expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode, + pIn->bpp, + mipPitch, + mipHeight, + mipSlices, + numSamples, + pOut->blockWidth, + pOut->blockHeight, + pOut->pTileInfo); + + if (IsMacroTiled(expTileMode)) + { + if (PowTwoAlign(mipPitch, pOut->blockWidth) != + PowTwoAlign(mipPitch, pOut->pitchAlign)) + { + convertTo1D = TRUE; + break; + } + } + else + { + break; + } + } + + if (convertTo1D) + { + return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, ADDR_TM_1D_TILED_THIN1); + } + } + + pOut->pitch = paddedPitch; + // Put this check right here to workaround special mipmap cases which the original height + // is needed. + // The original height is pre-stored in pOut->height in PostComputeMipLevel and + // pOut->pitch is needed in HwlCheckLastMacroTiledLvl, too. + if (m_configFlags.checkLast2DLevel && (numSamples == 1)) // Don't check MSAA + { + // Set a TRUE in pOut if next Level is the first 1D sub level + HwlCheckLastMacroTiledLvl(pIn, pOut); + } + pOut->height = paddedHeight; + + pOut->depth = expNumSlices; + + // + // Compute the size of a slice. + // + bytesPerSlice = BITS_TO_BYTES(static_cast(paddedPitch) * + paddedHeight * NextPow2(pIn->bpp) * numSamples); + + pOut->surfSize = bytesPerSlice * expNumSlices; + + pOut->tileMode = expTileMode; + + pOut->depthAlign = microTileThickness; + + } // if (valid) + + return valid; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceAlignmentsLinear +* +* @brief +* Compute linear surface alignment, calculation results are returned through +* output parameters. +* +* @return +* TRUE if no error occurs +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsLinear( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32* pBaseAlign, ///< [out] base address alignment in bytes + UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels + UINT_32* pHeightAlign ///< [out] height alignment in pixels + ) const +{ + BOOL_32 valid = TRUE; + + switch (tileMode) + { + case ADDR_TM_LINEAR_GENERAL: + // + // The required base alignment and pitch and height granularities is to 1 element. + // + *pBaseAlign = (bpp > 8) ? bpp / 8 : 1; + *pPitchAlign = 1; + *pHeightAlign = 1; + break; + case ADDR_TM_LINEAR_ALIGNED: + // + // The required alignment for base is the pipe interleave size. + // The required granularity for pitch is hwl dependent. + // The required granularity for height is one row. + // + *pBaseAlign = m_pipeInterleaveBytes; + *pPitchAlign = HwlGetPitchAlignmentLinear(bpp, flags); + *pHeightAlign = 1; + break; + default: + *pBaseAlign = 1; + *pPitchAlign = 1; + *pHeightAlign = 1; + ADDR_UNHANDLED_CASE(); + break; + } + + AdjustPitchAlignment(flags, pPitchAlign); + + return valid; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceAlignmentsMicroTiled +* +* @brief +* Compute 1D tiled surface alignment, calculation results are returned through +* output parameters. +* +* @return +* TRUE if no error occurs +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMicroTiled( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 mipLevel, ///< [in] mip level + UINT_32 numSamples, ///< [in] number of samples + UINT_32* pBaseAlign, ///< [out] base address alignment in bytes + UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels + UINT_32* pHeightAlign ///< [out] height alignment in pixels + ) const +{ + BOOL_32 valid = TRUE; + + // + // The required alignment for base is the pipe interleave size. + // + *pBaseAlign = m_pipeInterleaveBytes; + + *pPitchAlign = HwlGetPitchAlignmentMicroTiled(tileMode, bpp, flags, numSamples); + + *pHeightAlign = MicroTileHeight; + + AdjustPitchAlignment(flags, pPitchAlign); + + // Workaround 2 for 1D tiling - There is HW bug for Carrizo, + // where it requires the following alignments for 1D tiling. + if (flags.czDispCompatible && (mipLevel == 0)) + { + *pBaseAlign = PowTwoAlign(*pBaseAlign, 4096); //Base address MOD 4096 = 0 + *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 / (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0 + } + // end Carrizo workaround for 1D tilling + + return valid; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlReduceBankWidthHeight +* +* @brief +* Additional checks, reduce bankHeight/bankWidth if needed and possible +* tileSize*BANK_WIDTH*BANK_HEIGHT <= ROW_SIZE +* +* @return +* TRUE if no error occurs +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::HwlReduceBankWidthHeight( + UINT_32 tileSize, ///< [in] tile size + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 numSamples, ///< [in] number of samples + UINT_32 bankHeightAlign, ///< [in] bank height alignment + UINT_32 pipes, ///< [in] pipes + ADDR_TILEINFO* pTileInfo ///< [in,out] bank structure. + ) const +{ + UINT_32 macroAspectAlign; + BOOL_32 valid = TRUE; + + if (tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize) + { + BOOL_32 stillGreater = TRUE; + + // Try reducing bankWidth first + if (stillGreater && pTileInfo->bankWidth > 1) + { + while (stillGreater && pTileInfo->bankWidth > 0) + { + pTileInfo->bankWidth >>= 1; + + if (pTileInfo->bankWidth == 0) + { + pTileInfo->bankWidth = 1; + break; + } + + stillGreater = + tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize; + } + + // bankWidth is reduced above, so we need to recalculate bankHeight and ratio + bankHeightAlign = Max(1u, + m_pipeInterleaveBytes * m_bankInterleave / + (tileSize * pTileInfo->bankWidth) + ); + + // We cannot increase bankHeight so just assert this case. + ADDR_ASSERT((pTileInfo->bankHeight % bankHeightAlign) == 0); + + if (numSamples == 1) + { + macroAspectAlign = Max(1u, + m_pipeInterleaveBytes * m_bankInterleave / + (tileSize * pipes * pTileInfo->bankWidth) + ); + pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio, + macroAspectAlign); + } + } + + // Early quit bank_height degradation for "64" bit z buffer + if (flags.depth && bpp >= 64) + { + stillGreater = FALSE; + } + + // Then try reducing bankHeight + if (stillGreater && pTileInfo->bankHeight > bankHeightAlign) + { + while (stillGreater && pTileInfo->bankHeight > bankHeightAlign) + { + pTileInfo->bankHeight >>= 1; + + if (pTileInfo->bankHeight < bankHeightAlign) + { + pTileInfo->bankHeight = bankHeightAlign; + break; + } + + stillGreater = + tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize; + } + } + + valid = !stillGreater; + + // Generate a warning if we still fail to meet this constraint + if (valid == FALSE) + { + ADDR_WARN( + 0, ("TILE_SIZE(%d)*BANK_WIDTH(%d)*BANK_HEIGHT(%d) <= ROW_SIZE(%d)", + tileSize, pTileInfo->bankWidth, pTileInfo->bankHeight, m_rowSize)); + } + } + + return valid; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceAlignmentsMacroTiled +* +* @brief +* Compute 2D tiled surface alignment, calculation results are returned through +* output parameters. +* +* @return +* TRUE if no error occurs +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::ComputeSurfaceAlignmentsMacroTiled( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 mipLevel, ///< [in] mip level + UINT_32 numSamples, ///< [in] number of samples + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] Surface output + ) const +{ + ADDR_TILEINFO* pTileInfo = pOut->pTileInfo; + + BOOL_32 valid = SanityCheckMacroTiled(pTileInfo); + + if (valid) + { + UINT_32 macroTileWidth; + UINT_32 macroTileHeight; + + UINT_32 tileSize; + UINT_32 bankHeightAlign; + UINT_32 macroAspectAlign; + + UINT_32 thickness = Thickness(tileMode); + UINT_32 pipes = HwlGetPipes(pTileInfo); + + // + // Align bank height first according to latest h/w spec + // + + // tile_size = MIN(tile_split, 64 * tile_thickness * element_bytes * num_samples) + tileSize = Min(pTileInfo->tileSplitBytes, + BITS_TO_BYTES(64 * thickness * bpp * numSamples)); + + // bank_height_align = + // MAX(1, (pipe_interleave_bytes * bank_interleave)/(tile_size*bank_width)) + bankHeightAlign = Max(1u, + m_pipeInterleaveBytes * m_bankInterleave / + (tileSize * pTileInfo->bankWidth) + ); + + pTileInfo->bankHeight = PowTwoAlign(pTileInfo->bankHeight, bankHeightAlign); + + // num_pipes * bank_width * macro_tile_aspect >= + // (pipe_interleave_size * bank_interleave) / tile_size + if (numSamples == 1) + { + // this restriction is only for mipmap (mipmap's numSamples must be 1) + macroAspectAlign = Max(1u, + m_pipeInterleaveBytes * m_bankInterleave / + (tileSize * pipes * pTileInfo->bankWidth) + ); + pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio, macroAspectAlign); + } + + valid = HwlReduceBankWidthHeight(tileSize, + bpp, + flags, + numSamples, + bankHeightAlign, + pipes, + pTileInfo); + + // + // The required granularity for pitch is the macro tile width. + // + macroTileWidth = MicroTileWidth * pTileInfo->bankWidth * pipes * + pTileInfo->macroAspectRatio; + + pOut->pitchAlign = macroTileWidth; + pOut->blockWidth = macroTileWidth; + + AdjustPitchAlignment(flags, &pOut->pitchAlign); + + // + // The required granularity for height is the macro tile height. + // + macroTileHeight = MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / + pTileInfo->macroAspectRatio; + + pOut->heightAlign = macroTileHeight; + pOut->blockHeight = macroTileHeight; + + // + // Compute base alignment + // + pOut->baseAlign = + pipes * pTileInfo->bankWidth * pTileInfo->banks * pTileInfo->bankHeight * tileSize; + + HwlComputeSurfaceAlignmentsMacroTiled(tileMode, bpp, flags, mipLevel, numSamples, pOut); + } + + return valid; +} + +/** +**************************************************************************************************** +* EgBasedLib::SanityCheckMacroTiled +* +* @brief +* Check if macro-tiled parameters are valid +* @return +* TRUE if valid +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::SanityCheckMacroTiled( + ADDR_TILEINFO* pTileInfo ///< [in] macro-tiled parameters + ) const +{ + BOOL_32 valid = TRUE; + MAYBE_UNUSED UINT_32 numPipes = HwlGetPipes(pTileInfo); + + switch (pTileInfo->banks) + { + case 2: //fall through + case 4: //fall through + case 8: //fall through + case 16: + break; + default: + valid = FALSE; + break; + + } + + if (valid) + { + switch (pTileInfo->bankWidth) + { + case 1: //fall through + case 2: //fall through + case 4: //fall through + case 8: + break; + default: + valid = FALSE; + break; + } + } + + if (valid) + { + switch (pTileInfo->bankHeight) + { + case 1: //fall through + case 2: //fall through + case 4: //fall through + case 8: + break; + default: + valid = FALSE; + break; + } + } + + if (valid) + { + switch (pTileInfo->macroAspectRatio) + { + case 1: //fall through + case 2: //fall through + case 4: //fall through + case 8: + break; + default: + valid = FALSE; + break; + } + } + + if (valid) + { + if (pTileInfo->banks < pTileInfo->macroAspectRatio) + { + // This will generate macro tile height <= 1 + valid = FALSE; + } + } + + if (valid) + { + if (pTileInfo->tileSplitBytes > m_rowSize) + { + ADDR_WARN(0, ("tileSplitBytes is bigger than row size")); + } + } + + if (valid) + { + valid = HwlSanityCheckMacroTiled(pTileInfo); + } + + ADDR_ASSERT(valid == TRUE); + + // Add this assert for guidance + ADDR_ASSERT(numPipes * pTileInfo->banks >= 4); + + return valid; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceMipLevelTileMode +* +* @brief +* Compute valid tile mode for surface mipmap sub-levels +* +* @return +* Suitable tile mode +**************************************************************************************************** +*/ +AddrTileMode EgBasedLib::ComputeSurfaceMipLevelTileMode( + AddrTileMode baseTileMode, ///< [in] base tile mode + UINT_32 bpp, ///< [in] bits per pixels + UINT_32 pitch, ///< [in] current level pitch + UINT_32 height, ///< [in] current level height + UINT_32 numSlices, ///< [in] current number of slices + UINT_32 numSamples, ///< [in] number of samples + UINT_32 pitchAlign, ///< [in] pitch alignment + UINT_32 heightAlign, ///< [in] height alignment + ADDR_TILEINFO* pTileInfo ///< [in] ptr to bank structure + ) const +{ + UINT_64 bytesPerSlice; + (void)bytesPerSlice; + UINT_32 bytesPerTile; + + AddrTileMode expTileMode = baseTileMode; + UINT_32 microTileThickness = Thickness(expTileMode); + UINT_32 interleaveSize = m_pipeInterleaveBytes * m_bankInterleave; + + // + // Compute the size of a slice. + // + bytesPerSlice = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples); + bytesPerTile = BITS_TO_BYTES(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples); + + // + // Reduce tiling mode from thick to thin if the number of slices is less than the + // micro tile thickness. + // + if (numSlices < microTileThickness) + { + expTileMode = HwlDegradeThickTileMode(expTileMode, numSlices, &bytesPerTile); + } + + if (bytesPerTile > pTileInfo->tileSplitBytes) + { + bytesPerTile = pTileInfo->tileSplitBytes; + } + + UINT_32 threshold1 = + bytesPerTile * HwlGetPipes(pTileInfo) * pTileInfo->bankWidth * pTileInfo->macroAspectRatio; + + UINT_32 threshold2 = + bytesPerTile * pTileInfo->bankWidth * pTileInfo->bankHeight; + + // + // Reduce the tile mode from 2D/3D to 1D in following conditions + // + switch (expTileMode) + { + case ADDR_TM_2D_TILED_THIN1: //fall through + case ADDR_TM_3D_TILED_THIN1: + case ADDR_TM_PRT_TILED_THIN1: + case ADDR_TM_PRT_2D_TILED_THIN1: + case ADDR_TM_PRT_3D_TILED_THIN1: + if ((pitch < pitchAlign) || + (height < heightAlign) || + (interleaveSize > threshold1) || + (interleaveSize > threshold2)) + { + expTileMode = ADDR_TM_1D_TILED_THIN1; + } + break; + case ADDR_TM_2D_TILED_THICK: //fall through + case ADDR_TM_3D_TILED_THICK: + case ADDR_TM_2D_TILED_XTHICK: + case ADDR_TM_3D_TILED_XTHICK: + case ADDR_TM_PRT_TILED_THICK: + case ADDR_TM_PRT_2D_TILED_THICK: + case ADDR_TM_PRT_3D_TILED_THICK: + if ((pitch < pitchAlign) || + (height < heightAlign)) + { + expTileMode = ADDR_TM_1D_TILED_THICK; + } + break; + default: + break; + } + + return expTileMode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlGetAlignmentInfoMacroTiled +* @brief +* Get alignment info for giving tile mode +* @return +* TRUE if getting alignment is OK +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::HwlGetAlignmentInfoMacroTiled( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] create surface info + UINT_32* pPitchAlign, ///< [out] pitch alignment + UINT_32* pHeightAlign, ///< [out] height alignment + UINT_32* pSizeAlign ///< [out] size alignment + ) const +{ + BOOL_32 valid = TRUE; + + ADDR_ASSERT(IsMacroTiled(pIn->tileMode)); + + UINT_32 numSamples = (pIn->numFrags == 0) ? pIn->numSamples : pIn->numFrags; + + ADDR_ASSERT(pIn->pTileInfo); + ADDR_TILEINFO tileInfo = *pIn->pTileInfo; + ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; + out.pTileInfo = &tileInfo; + + if (UseTileIndex(pIn->tileIndex)) + { + out.tileIndex = pIn->tileIndex; + out.macroModeIndex = TileIndexInvalid; + } + + HwlSetupTileInfo(pIn->tileMode, + pIn->flags, + pIn->bpp, + pIn->width, + pIn->height, + numSamples, + &tileInfo, + &tileInfo, + pIn->tileType, + &out); + + valid = ComputeSurfaceAlignmentsMacroTiled(pIn->tileMode, + pIn->bpp, + pIn->flags, + pIn->mipLevel, + numSamples, + &out); + + if (valid) + { + *pPitchAlign = out.pitchAlign; + *pHeightAlign = out.heightAlign; + *pSizeAlign = out.baseAlign; + } + + return valid; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlDegradeThickTileMode +* +* @brief +* Degrades valid tile mode for thick modes if needed +* +* @return +* Suitable tile mode +**************************************************************************************************** +*/ +AddrTileMode EgBasedLib::HwlDegradeThickTileMode( + AddrTileMode baseTileMode, ///< [in] base tile mode + UINT_32 numSlices, ///< [in] current number of slices + UINT_32* pBytesPerTile ///< [in,out] pointer to bytes per slice + ) const +{ + ADDR_ASSERT(numSlices < Thickness(baseTileMode)); + // if pBytesPerTile is NULL, this is a don't-care.... + UINT_32 bytesPerTile = pBytesPerTile != NULL ? *pBytesPerTile : 64; + + AddrTileMode expTileMode = baseTileMode; + switch (baseTileMode) + { + case ADDR_TM_1D_TILED_THICK: + expTileMode = ADDR_TM_1D_TILED_THIN1; + bytesPerTile >>= 2; + break; + case ADDR_TM_2D_TILED_THICK: + expTileMode = ADDR_TM_2D_TILED_THIN1; + bytesPerTile >>= 2; + break; + case ADDR_TM_3D_TILED_THICK: + expTileMode = ADDR_TM_3D_TILED_THIN1; + bytesPerTile >>= 2; + break; + case ADDR_TM_2D_TILED_XTHICK: + if (numSlices < ThickTileThickness) + { + expTileMode = ADDR_TM_2D_TILED_THIN1; + bytesPerTile >>= 3; + } + else + { + expTileMode = ADDR_TM_2D_TILED_THICK; + bytesPerTile >>= 1; + } + break; + case ADDR_TM_3D_TILED_XTHICK: + if (numSlices < ThickTileThickness) + { + expTileMode = ADDR_TM_3D_TILED_THIN1; + bytesPerTile >>= 3; + } + else + { + expTileMode = ADDR_TM_3D_TILED_THICK; + bytesPerTile >>= 1; + } + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + if (pBytesPerTile != NULL) + { + *pBytesPerTile = bytesPerTile; + } + + return expTileMode; +} + +/** +**************************************************************************************************** +* EgBasedLib::DispatchComputeSurfaceAddrFromCoord +* +* @brief +* Compute surface address from given coord (x, y, slice,sample) +* +* @return +* Address in bytes +**************************************************************************************************** +*/ +UINT_64 EgBasedLib::DispatchComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + UINT_32 x = pIn->x; + UINT_32 y = pIn->y; + UINT_32 slice = pIn->slice; + UINT_32 sample = pIn->sample; + UINT_32 bpp = pIn->bpp; + UINT_32 pitch = pIn->pitch; + UINT_32 height = pIn->height; + UINT_32 numSlices = pIn->numSlices; + UINT_32 numSamples = ((pIn->numSamples == 0) ? 1 : pIn->numSamples); + UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags); + AddrTileMode tileMode = pIn->tileMode; + AddrTileType microTileType = pIn->tileType; + BOOL_32 ignoreSE = pIn->ignoreSE; + BOOL_32 isDepthSampleOrder = pIn->isDepth; + ADDR_TILEINFO* pTileInfo = pIn->pTileInfo; + + UINT_32* pBitPosition = &pOut->bitPosition; + UINT_64 addr; + + // ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order + if (microTileType == ADDR_DEPTH_SAMPLE_ORDER) + { + isDepthSampleOrder = TRUE; + } + + if (m_chipFamily >= ADDR_CHIP_FAMILY_NI) + { + if (numFrags != numSamples) + { + numSamples = numFrags; + ADDR_ASSERT(sample < numSamples); + } + + /// @note + /// 128 bit/thick tiled surface doesn't support display tiling and + /// mipmap chain must have the same tileType, so please fill tileType correctly + if (IsLinear(pIn->tileMode) == FALSE) + { + if (bpp >= 128 || Thickness(tileMode) > 1) + { + ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE); + } + } + } + + switch (tileMode) + { + case ADDR_TM_LINEAR_GENERAL://fall through + case ADDR_TM_LINEAR_ALIGNED: + addr = ComputeSurfaceAddrFromCoordLinear(x, + y, + slice, + sample, + bpp, + pitch, + height, + numSlices, + pBitPosition); + break; + case ADDR_TM_1D_TILED_THIN1://fall through + case ADDR_TM_1D_TILED_THICK: + addr = ComputeSurfaceAddrFromCoordMicroTiled(x, + y, + slice, + sample, + bpp, + pitch, + height, + numSamples, + tileMode, + microTileType, + isDepthSampleOrder, + pBitPosition); + break; + case ADDR_TM_2D_TILED_THIN1: //fall through + case ADDR_TM_2D_TILED_THICK: //fall through + case ADDR_TM_3D_TILED_THIN1: //fall through + case ADDR_TM_3D_TILED_THICK: //fall through + case ADDR_TM_2D_TILED_XTHICK: //fall through + case ADDR_TM_3D_TILED_XTHICK: //fall through + case ADDR_TM_PRT_TILED_THIN1: //fall through + case ADDR_TM_PRT_2D_TILED_THIN1://fall through + case ADDR_TM_PRT_3D_TILED_THIN1://fall through + case ADDR_TM_PRT_TILED_THICK: //fall through + case ADDR_TM_PRT_2D_TILED_THICK://fall through + case ADDR_TM_PRT_3D_TILED_THICK: + UINT_32 pipeSwizzle; + UINT_32 bankSwizzle; + + if (m_configFlags.useCombinedSwizzle) + { + ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo, + &bankSwizzle, &pipeSwizzle); + } + else + { + pipeSwizzle = pIn->pipeSwizzle; + bankSwizzle = pIn->bankSwizzle; + } + + addr = ComputeSurfaceAddrFromCoordMacroTiled(x, + y, + slice, + sample, + bpp, + pitch, + height, + numSamples, + tileMode, + microTileType, + ignoreSE, + isDepthSampleOrder, + pipeSwizzle, + bankSwizzle, + pTileInfo, + pBitPosition); + break; + default: + addr = 0; + ADDR_ASSERT_ALWAYS(); + break; + } + + return addr; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeMacroTileEquation +* +* @brief +* Computes the address equation in macro tile +* @return +* If equation can be computed +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::ComputeMacroTileEquation( + UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType, ///< [in] micro tiling type + ADDR_TILEINFO* pTileInfo, ///< [in] bank structure + ADDR_EQUATION* pEquation ///< [out] Equation for addressing in macro tile + ) const +{ + ADDR_E_RETURNCODE retCode; + + // Element equation within a tile + retCode = ComputeMicroTileEquation(log2BytesPP, tileMode, microTileType, pEquation); + + if (retCode == ADDR_OK) + { + // Tile equesiton with signle pipe bank + UINT_32 numPipes = HwlGetPipes(pTileInfo); + UINT_32 numPipeBits = Log2(numPipes); + + for (UINT_32 i = 0; i < Log2(pTileInfo->bankWidth); i++) + { + pEquation->addr[pEquation->numBits].valid = 1; + pEquation->addr[pEquation->numBits].channel = 0; + pEquation->addr[pEquation->numBits].index = i + log2BytesPP + 3 + numPipeBits; + pEquation->numBits++; + } + + for (UINT_32 i = 0; i < Log2(pTileInfo->bankHeight); i++) + { + pEquation->addr[pEquation->numBits].valid = 1; + pEquation->addr[pEquation->numBits].channel = 1; + pEquation->addr[pEquation->numBits].index = i + 3; + pEquation->numBits++; + } + + ADDR_EQUATION equation; + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + UINT_32 thresholdX = 32; + UINT_32 thresholdY = 32; + + if (IsPrtNoRotationTileMode(tileMode)) + { + UINT_32 macroTilePitch = + (MicroTileWidth * pTileInfo->bankWidth * numPipes) * pTileInfo->macroAspectRatio; + UINT_32 macroTileHeight = + (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / + pTileInfo->macroAspectRatio; + thresholdX = Log2(macroTilePitch); + thresholdY = Log2(macroTileHeight); + } + + // Pipe equation + retCode = ComputePipeEquation(log2BytesPP, thresholdX, thresholdY, pTileInfo, &equation); + + if (retCode == ADDR_OK) + { + UINT_32 pipeBitStart = Log2(m_pipeInterleaveBytes); + + if (pEquation->numBits > pipeBitStart) + { + UINT_32 numLeftShift = pEquation->numBits - pipeBitStart; + + for (UINT_32 i = 0; i < numLeftShift; i++) + { + pEquation->addr[pEquation->numBits + equation.numBits - i - 1] = + pEquation->addr[pEquation->numBits - i - 1]; + pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor1[pEquation->numBits - i - 1]; + pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor2[pEquation->numBits - i - 1]; + } + } + + for (UINT_32 i = 0; i < equation.numBits; i++) + { + pEquation->addr[pipeBitStart + i] = equation.addr[i]; + pEquation->xor1[pipeBitStart + i] = equation.xor1[i]; + pEquation->xor2[pipeBitStart + i] = equation.xor2[i]; + pEquation->numBits++; + } + + // Bank equation + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + retCode = ComputeBankEquation(log2BytesPP, thresholdX, thresholdY, + pTileInfo, &equation); + + if (retCode == ADDR_OK) + { + UINT_32 bankBitStart = pipeBitStart + numPipeBits + Log2(m_bankInterleave); + + if (pEquation->numBits > bankBitStart) + { + UINT_32 numLeftShift = pEquation->numBits - bankBitStart; + + for (UINT_32 i = 0; i < numLeftShift; i++) + { + pEquation->addr[pEquation->numBits + equation.numBits - i - 1] = + pEquation->addr[pEquation->numBits - i - 1]; + pEquation->xor1[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor1[pEquation->numBits - i - 1]; + pEquation->xor2[pEquation->numBits + equation.numBits - i - 1] = + pEquation->xor2[pEquation->numBits - i - 1]; + } + } + + for (UINT_32 i = 0; i < equation.numBits; i++) + { + pEquation->addr[bankBitStart + i] = equation.addr[i]; + pEquation->xor1[bankBitStart + i] = equation.xor1[i]; + pEquation->xor2[bankBitStart + i] = equation.xor2[i]; + pEquation->numBits++; + } + } + } + } + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled +* +* @brief +* Computes the surface address and bit position from a +* coordinate for 2D tilied (macro tiled) +* @return +* The byte address +**************************************************************************************************** +*/ +UINT_64 EgBasedLib::ComputeSurfaceAddrFromCoordMacroTiled( + UINT_32 x, ///< [in] x coordinate + UINT_32 y, ///< [in] y coordinate + UINT_32 slice, ///< [in] slice index + UINT_32 sample, ///< [in] sample index + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 pitch, ///< [in] surface pitch, in pixels + UINT_32 height, ///< [in] surface height, in pixels + UINT_32 numSamples, ///< [in] number of samples + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType, ///< [in] micro tiling type + BOOL_32 ignoreSE, ///< [in] TRUE if shader enginers can be ignored + BOOL_32 isDepthSampleOrder, ///< [in] TRUE if it depth sample ordering is used + UINT_32 pipeSwizzle, ///< [in] pipe swizzle + UINT_32 bankSwizzle, ///< [in] bank swizzle + ADDR_TILEINFO* pTileInfo, ///< [in] bank structure + /// **All fields to be valid on entry** + UINT_32* pBitPosition ///< [out] bit position, e.g. FMT_1 will use this + ) const +{ + UINT_64 addr; + + UINT_32 microTileBytes; + UINT_32 microTileBits; + UINT_32 sampleOffset; + UINT_32 pixelIndex; + UINT_32 pixelOffset; + UINT_32 elementOffset; + UINT_32 tileSplitSlice; + UINT_32 pipe; + UINT_32 bank; + UINT_64 sliceBytes; + UINT_64 sliceOffset; + UINT_32 macroTilePitch; + UINT_32 macroTileHeight; + UINT_32 macroTilesPerRow; + UINT_32 macroTilesPerSlice; + UINT_64 macroTileBytes; + UINT_32 macroTileIndexX; + UINT_32 macroTileIndexY; + UINT_64 macroTileOffset; + UINT_64 totalOffset; + UINT_64 pipeInterleaveMask; + UINT_64 bankInterleaveMask; + UINT_64 pipeInterleaveOffset; + UINT_32 bankInterleaveOffset; + UINT_64 offset; + UINT_32 tileRowIndex; + UINT_32 tileColumnIndex; + UINT_32 tileIndex; + UINT_32 tileOffset; + + UINT_32 microTileThickness = Thickness(tileMode); + + // + // Compute the number of group, pipe, and bank bits. + // + UINT_32 numPipes = HwlGetPipes(pTileInfo); + UINT_32 numPipeInterleaveBits = Log2(m_pipeInterleaveBytes); + UINT_32 numPipeBits = Log2(numPipes); + UINT_32 numBankInterleaveBits = Log2(m_bankInterleave); + UINT_32 numBankBits = Log2(pTileInfo->banks); + + // + // Compute the micro tile size. + // + microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples; + + microTileBytes = microTileBits / 8; + // + // Compute the pixel index within the micro tile. + // + pixelIndex = ComputePixelIndexWithinMicroTile(x, + y, + slice, + bpp, + tileMode, + microTileType); + + // + // Compute the sample offset and pixel offset. + // + if (isDepthSampleOrder) + { + // + // For depth surfaces, samples are stored contiguously for each element, so the sample + // offset is the sample number times the element size. + // + sampleOffset = sample * bpp; + pixelOffset = pixelIndex * bpp * numSamples; + } + else + { + // + // For color surfaces, all elements for a particular sample are stored contiguously, so + // the sample offset is the sample number times the micro tile size divided yBit the number + // of samples. + // + sampleOffset = sample * (microTileBits / numSamples); + pixelOffset = pixelIndex * bpp; + } + + // + // Compute the element offset. + // + elementOffset = pixelOffset + sampleOffset; + + *pBitPosition = static_cast(elementOffset % 8); + + elementOffset /= 8; //bit-to-byte + + // + // Determine if tiles need to be split across slices. + // + // If the size of the micro tile is larger than the tile split size, then the tile will be + // split across multiple slices. + // + UINT_32 slicesPerTile = 1; + + if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1)) + { //don't support for thick mode + + // + // Compute the number of slices per tile. + // + slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes; + + // + // Compute the tile split slice number for use in rotating the bank. + // + tileSplitSlice = elementOffset / pTileInfo->tileSplitBytes; + + // + // Adjust the element offset to account for the portion of the tile that is being moved to + // a new slice.. + // + elementOffset %= pTileInfo->tileSplitBytes; + + // + // Adjust the microTileBytes size to tileSplitBytes size since + // a new slice.. + // + microTileBytes = pTileInfo->tileSplitBytes; + } + else + { + tileSplitSlice = 0; + } + + // + // Compute macro tile pitch and height. + // + macroTilePitch = + (MicroTileWidth * pTileInfo->bankWidth * numPipes) * pTileInfo->macroAspectRatio; + macroTileHeight = + (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / pTileInfo->macroAspectRatio; + + // + // Compute the number of bytes per macro tile. Note: bytes of the same bank/pipe actually + // + macroTileBytes = + static_cast(microTileBytes) * + (macroTilePitch / MicroTileWidth) * (macroTileHeight / MicroTileHeight) / + (numPipes * pTileInfo->banks); + + // + // Compute the number of macro tiles per row. + // + macroTilesPerRow = pitch / macroTilePitch; + + // + // Compute the offset to the macro tile containing the specified coordinate. + // + macroTileIndexX = x / macroTilePitch; + macroTileIndexY = y / macroTileHeight; + macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes; + + // + // Compute the number of macro tiles per slice. + // + macroTilesPerSlice = macroTilesPerRow * (height / macroTileHeight); + + // + // Compute the slice size. + // + sliceBytes = macroTilesPerSlice * macroTileBytes; + + // + // Compute the slice offset. + // + sliceOffset = sliceBytes * (tileSplitSlice + slicesPerTile * (slice / microTileThickness)); + + // + // Compute tile offest + // + tileRowIndex = (y / MicroTileHeight) % pTileInfo->bankHeight; + tileColumnIndex = ((x / MicroTileWidth) / numPipes) % pTileInfo->bankWidth; + tileIndex = (tileRowIndex * pTileInfo->bankWidth) + tileColumnIndex; + tileOffset = tileIndex * microTileBytes; + + // + // Combine the slice offset and macro tile offset with the pixel and sample offsets, accounting + // for the pipe and bank bits in the middle of the address. + // + totalOffset = sliceOffset + macroTileOffset + elementOffset + tileOffset; + + // + // Get the pipe and bank. + // + + // when the tileMode is PRT type, then adjust x and y coordinates + if (IsPrtNoRotationTileMode(tileMode)) + { + x = x % macroTilePitch; + y = y % macroTileHeight; + } + + pipe = ComputePipeFromCoord(x, + y, + slice, + tileMode, + pipeSwizzle, + ignoreSE, + pTileInfo); + + bank = ComputeBankFromCoord(x, + y, + slice, + tileMode, + bankSwizzle, + tileSplitSlice, + pTileInfo); + + // + // Split the offset to put some bits below the pipe+bank bits and some above. + // + pipeInterleaveMask = (1 << numPipeInterleaveBits) - 1; + bankInterleaveMask = (1 << numBankInterleaveBits) - 1; + pipeInterleaveOffset = totalOffset & pipeInterleaveMask; + bankInterleaveOffset = static_cast((totalOffset >> numPipeInterleaveBits) & + bankInterleaveMask); + offset = totalOffset >> (numPipeInterleaveBits + numBankInterleaveBits); + + // + // Assemble the address from its components. + // + addr = pipeInterleaveOffset; + // This is to remove /analyze warnings + UINT_32 pipeBits = pipe << numPipeInterleaveBits; + UINT_32 bankInterleaveBits = bankInterleaveOffset << (numPipeInterleaveBits + numPipeBits); + UINT_32 bankBits = bank << (numPipeInterleaveBits + numPipeBits + + numBankInterleaveBits); + UINT_64 offsetBits = offset << (numPipeInterleaveBits + numPipeBits + + numBankInterleaveBits + numBankBits); + + addr |= pipeBits; + addr |= bankInterleaveBits; + addr |= bankBits; + addr |= offsetBits; + + return addr; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled +* +* @brief +* Computes the surface address and bit position from a coordinate for 1D tilied +* (micro tiled) +* @return +* The byte address +**************************************************************************************************** +*/ +UINT_64 EgBasedLib::ComputeSurfaceAddrFromCoordMicroTiled( + UINT_32 x, ///< [in] x coordinate + UINT_32 y, ///< [in] y coordinate + UINT_32 slice, ///< [in] slice index + UINT_32 sample, ///< [in] sample index + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 pitch, ///< [in] pitch, in pixels + UINT_32 height, ///< [in] height, in pixels + UINT_32 numSamples, ///< [in] number of samples + AddrTileMode tileMode, ///< [in] tile mode + AddrTileType microTileType, ///< [in] micro tiling type + BOOL_32 isDepthSampleOrder, ///< [in] TRUE if depth sample ordering is used + UINT_32* pBitPosition ///< [out] bit position, e.g. FMT_1 will use this + ) const +{ + UINT_64 addr = 0; + + UINT_32 microTileBytes; + UINT_64 sliceBytes; + UINT_32 microTilesPerRow; + UINT_32 microTileIndexX; + UINT_32 microTileIndexY; + UINT_32 microTileIndexZ; + UINT_64 sliceOffset; + UINT_64 microTileOffset; + UINT_32 sampleOffset; + UINT_32 pixelIndex; + UINT_32 pixelOffset; + + UINT_32 microTileThickness = Thickness(tileMode); + + // + // Compute the micro tile size. + // + microTileBytes = BITS_TO_BYTES(MicroTilePixels * microTileThickness * bpp * numSamples); + + // + // Compute the slice size. + // + sliceBytes = + BITS_TO_BYTES(static_cast(pitch) * height * microTileThickness * bpp * numSamples); + + // + // Compute the number of micro tiles per row. + // + microTilesPerRow = pitch / MicroTileWidth; + + // + // Compute the micro tile index. + // + microTileIndexX = x / MicroTileWidth; + microTileIndexY = y / MicroTileHeight; + microTileIndexZ = slice / microTileThickness; + + // + // Compute the slice offset. + // + sliceOffset = static_cast(microTileIndexZ) * sliceBytes; + + // + // Compute the offset to the micro tile containing the specified coordinate. + // + microTileOffset = (static_cast(microTileIndexY) * microTilesPerRow + microTileIndexX) * + microTileBytes; + + // + // Compute the pixel index within the micro tile. + // + pixelIndex = ComputePixelIndexWithinMicroTile(x, + y, + slice, + bpp, + tileMode, + microTileType); + + // Compute the sample offset. + // + if (isDepthSampleOrder) + { + // + // For depth surfaces, samples are stored contiguously for each element, so the sample + // offset is the sample number times the element size. + // + sampleOffset = sample * bpp; + pixelOffset = pixelIndex * bpp * numSamples; + } + else + { + // + // For color surfaces, all elements for a particular sample are stored contiguously, so + // the sample offset is the sample number times the micro tile size divided yBit the number + // of samples. + // + sampleOffset = sample * (microTileBytes*8 / numSamples); + pixelOffset = pixelIndex * bpp; + } + + // + // Compute the bit position of the pixel. Each element is stored with one bit per sample. + // + + UINT_32 elemOffset = sampleOffset + pixelOffset; + + *pBitPosition = elemOffset % 8; + elemOffset /= 8; + + // + // Combine the slice offset, micro tile offset, sample offset, and pixel offsets. + // + addr = sliceOffset + microTileOffset + elemOffset; + + return addr; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputePixelCoordFromOffset +* +* @brief +* Compute pixel coordinate from offset inside a micro tile +* @return +* N/A +**************************************************************************************************** +*/ +VOID EgBasedLib::HwlComputePixelCoordFromOffset( + UINT_32 offset, ///< [in] offset inside micro tile in bits + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 numSamples, ///< [in] number of samples + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 tileBase, ///< [in] base offset within a tile + UINT_32 compBits, ///< [in] component bits actually needed(for planar surface) + UINT_32* pX, ///< [out] x coordinate + UINT_32* pY, ///< [out] y coordinate + UINT_32* pSlice, ///< [out] slice index + UINT_32* pSample, ///< [out] sample index + AddrTileType microTileType, ///< [in] micro tiling type + BOOL_32 isDepthSampleOrder ///< [in] TRUE if depth sample order in microtile is used + ) const +{ + UINT_32 x = 0; + UINT_32 y = 0; + UINT_32 z = 0; + UINT_32 thickness = Thickness(tileMode); + + // For planar surface, we adjust offset acoording to tile base + if ((bpp != compBits) && (compBits != 0) && isDepthSampleOrder) + { + offset -= tileBase; + + ADDR_ASSERT(microTileType == ADDR_NON_DISPLAYABLE || + microTileType == ADDR_DEPTH_SAMPLE_ORDER); + + bpp = compBits; + } + + UINT_32 sampleTileBits; + UINT_32 samplePixelBits; + UINT_32 pixelIndex; + + if (isDepthSampleOrder) + { + samplePixelBits = bpp * numSamples; + pixelIndex = offset / samplePixelBits; + *pSample = (offset % samplePixelBits) / bpp; + } + else + { + sampleTileBits = MicroTilePixels * bpp * thickness; + *pSample = offset / sampleTileBits; + pixelIndex = (offset % sampleTileBits) / bpp; + } + + if (microTileType != ADDR_THICK) + { + if (microTileType == ADDR_DISPLAYABLE) // displayable + { + switch (bpp) + { + case 8: + x = pixelIndex & 0x7; + y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4)); + break; + case 16: + x = pixelIndex & 0x7; + y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3)); + break; + case 32: + x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0)); + y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2)); + break; + case 64: + x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); + y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,1)); + break; + case 128: + x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,1)); + y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,0)); + break; + default: + break; + } + } + else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER) + { + x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); + y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,1)); + } + else if (microTileType == ADDR_ROTATED) + { + /* + 8-Bit Elements + element_index[5:0] = { x[2], x[0], x[1], y[2], y[1], y[0] } + + 16-Bit Elements + element_index[5:0] = { x[2], x[1], x[0], y[2], y[1], y[0] } + + 32-Bit Elements + element_index[5:0] = { x[2], x[1], y[2], x[0], y[1], y[0] } + + 64-Bit Elements + element_index[5:0] = { y[2], x[2], x[1], y[1], x[0], y[0] } + */ + switch(bpp) + { + case 8: + x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4)); + y = pixelIndex & 0x7; + break; + case 16: + x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3)); + y = pixelIndex & 0x7; + break; + case 32: + x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2)); + y = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0)); + break; + case 64: + x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,3),_BIT(pixelIndex,1)); + y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + + if (thickness > 1) // thick + { + z = Bits2Number(3, _BIT(pixelIndex,8),_BIT(pixelIndex,7),_BIT(pixelIndex,6)); + } + } + else + { + ADDR_ASSERT((m_chipFamily >= ADDR_CHIP_FAMILY_CI) && (thickness > 1)); + /* + 8-Bit Elements and 16-Bit Elements + element_index[7:0] = { y[2], x[2], z[1], z[0], y[1], x[1], y[0], x[0] } + + 32-Bit Elements + element_index[7:0] = { y[2], x[2], z[1], y[1], z[0], x[1], y[0], x[0] } + + 64-Bit Elements and 128-Bit Elements + element_index[7:0] = { y[2], x[2], z[1], y[1], x[1], z[0], y[0], x[0] } + + The equation to compute the element index for the extra thick tile: + element_index[8] = z[2] + */ + switch (bpp) + { + case 8: + case 16: // fall-through + x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); + y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,3),_BIT(pixelIndex,1)); + z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,4)); + break; + case 32: + x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0)); + y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1)); + z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,3)); + break; + case 64: + case 128: // fall-through + x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,3),_BIT(pixelIndex,0)); + y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1)); + z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,2)); + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + if (thickness == 8) + { + z += Bits2Number(3,_BIT(pixelIndex,8),0,0); + } + } + + *pX = x; + *pY = y; + *pSlice += z; +} + +/** +**************************************************************************************************** +* EgBasedLib::DispatchComputeSurfaceCoordFromAddrDispatch +* +* @brief +* Compute (x,y,slice,sample) coordinates from surface address +* @return +* N/A +**************************************************************************************************** +*/ +VOID EgBasedLib::DispatchComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + UINT_64 addr = pIn->addr; + UINT_32 bitPosition = pIn->bitPosition; + UINT_32 bpp = pIn->bpp; + UINT_32 pitch = pIn->pitch; + UINT_32 height = pIn->height; + UINT_32 numSlices = pIn->numSlices; + UINT_32 numSamples = ((pIn->numSamples == 0) ? 1 : pIn->numSamples); + UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags); + AddrTileMode tileMode = pIn->tileMode; + UINT_32 tileBase = pIn->tileBase; + UINT_32 compBits = pIn->compBits; + AddrTileType microTileType = pIn->tileType; + BOOL_32 ignoreSE = pIn->ignoreSE; + BOOL_32 isDepthSampleOrder = pIn->isDepth; + ADDR_TILEINFO* pTileInfo = pIn->pTileInfo; + + UINT_32* pX = &pOut->x; + UINT_32* pY = &pOut->y; + UINT_32* pSlice = &pOut->slice; + UINT_32* pSample = &pOut->sample; + + if (microTileType == ADDR_DEPTH_SAMPLE_ORDER) + { + isDepthSampleOrder = TRUE; + } + + if (m_chipFamily >= ADDR_CHIP_FAMILY_NI) + { + if (numFrags != numSamples) + { + numSamples = numFrags; + } + + /// @note + /// 128 bit/thick tiled surface doesn't support display tiling and + /// mipmap chain must have the same tileType, so please fill tileType correctly + if (IsLinear(pIn->tileMode) == FALSE) + { + if (bpp >= 128 || Thickness(tileMode) > 1) + { + ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE); + } + } + } + + switch (tileMode) + { + case ADDR_TM_LINEAR_GENERAL://fall through + case ADDR_TM_LINEAR_ALIGNED: + ComputeSurfaceCoordFromAddrLinear(addr, + bitPosition, + bpp, + pitch, + height, + numSlices, + pX, + pY, + pSlice, + pSample); + break; + case ADDR_TM_1D_TILED_THIN1://fall through + case ADDR_TM_1D_TILED_THICK: + ComputeSurfaceCoordFromAddrMicroTiled(addr, + bitPosition, + bpp, + pitch, + height, + numSamples, + tileMode, + tileBase, + compBits, + pX, + pY, + pSlice, + pSample, + microTileType, + isDepthSampleOrder); + break; + case ADDR_TM_2D_TILED_THIN1: //fall through + case ADDR_TM_2D_TILED_THICK: //fall through + case ADDR_TM_3D_TILED_THIN1: //fall through + case ADDR_TM_3D_TILED_THICK: //fall through + case ADDR_TM_2D_TILED_XTHICK: //fall through + case ADDR_TM_3D_TILED_XTHICK: //fall through + case ADDR_TM_PRT_TILED_THIN1: //fall through + case ADDR_TM_PRT_2D_TILED_THIN1://fall through + case ADDR_TM_PRT_3D_TILED_THIN1://fall through + case ADDR_TM_PRT_TILED_THICK: //fall through + case ADDR_TM_PRT_2D_TILED_THICK://fall through + case ADDR_TM_PRT_3D_TILED_THICK: + UINT_32 pipeSwizzle; + UINT_32 bankSwizzle; + + if (m_configFlags.useCombinedSwizzle) + { + ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo, + &bankSwizzle, &pipeSwizzle); + } + else + { + pipeSwizzle = pIn->pipeSwizzle; + bankSwizzle = pIn->bankSwizzle; + } + + ComputeSurfaceCoordFromAddrMacroTiled(addr, + bitPosition, + bpp, + pitch, + height, + numSamples, + tileMode, + tileBase, + compBits, + microTileType, + ignoreSE, + isDepthSampleOrder, + pipeSwizzle, + bankSwizzle, + pTileInfo, + pX, + pY, + pSlice, + pSample); + break; + default: + ADDR_ASSERT_ALWAYS(); + } +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled +* +* @brief +* Compute surface coordinates from address for macro tiled surface +* @return +* N/A +**************************************************************************************************** +*/ +VOID EgBasedLib::ComputeSurfaceCoordFromAddrMacroTiled( + UINT_64 addr, ///< [in] byte address + UINT_32 bitPosition, ///< [in] bit position + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 pitch, ///< [in] pitch in pixels + UINT_32 height, ///< [in] height in pixels + UINT_32 numSamples, ///< [in] number of samples + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 tileBase, ///< [in] tile base offset + UINT_32 compBits, ///< [in] component bits (for planar surface) + AddrTileType microTileType, ///< [in] micro tiling type + BOOL_32 ignoreSE, ///< [in] TRUE if shader engines can be ignored + BOOL_32 isDepthSampleOrder, ///< [in] TRUE if depth sample order is used + UINT_32 pipeSwizzle, ///< [in] pipe swizzle + UINT_32 bankSwizzle, ///< [in] bank swizzle + ADDR_TILEINFO* pTileInfo, ///< [in] bank structure. + /// **All fields to be valid on entry** + UINT_32* pX, ///< [out] X coord + UINT_32* pY, ///< [out] Y coord + UINT_32* pSlice, ///< [out] slice index + UINT_32* pSample ///< [out] sample index + ) const +{ + UINT_32 mx; + UINT_32 my; + UINT_64 tileBits; + UINT_64 macroTileBits; + UINT_32 slices; + UINT_32 tileSlices; + UINT_64 elementOffset; + UINT_64 macroTileIndex; + UINT_32 tileIndex; + UINT_64 totalOffset; + + UINT_32 bank; + UINT_32 pipe; + UINT_32 groupBits = m_pipeInterleaveBytes << 3; + UINT_32 pipes = HwlGetPipes(pTileInfo); + UINT_32 banks = pTileInfo->banks; + + UINT_32 bankInterleave = m_bankInterleave; + + UINT_64 addrBits = BYTES_TO_BITS(addr) + bitPosition; + + // + // remove bits for bank and pipe + // + totalOffset = (addrBits % groupBits) + + (((addrBits / groupBits / pipes) % bankInterleave) * groupBits) + + (((addrBits / groupBits / pipes) / bankInterleave) / banks) * groupBits * bankInterleave; + + UINT_32 microTileThickness = Thickness(tileMode); + + UINT_32 microTileBits = bpp * microTileThickness * MicroTilePixels * numSamples; + + UINT_32 microTileBytes = BITS_TO_BYTES(microTileBits); + // + // Determine if tiles need to be split across slices. + // + // If the size of the micro tile is larger than the tile split size, then the tile will be + // split across multiple slices. + // + UINT_32 slicesPerTile = 1; //_State->TileSlices + + if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1)) + { //don't support for thick mode + + // + // Compute the number of slices per tile. + // + slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes; + } + + tileBits = microTileBits / slicesPerTile; // micro tile bits + + // in micro tiles because not MicroTileWidth timed. + UINT_32 macroWidth = pTileInfo->bankWidth * pipes * pTileInfo->macroAspectRatio; + // in micro tiles as well + UINT_32 macroHeight = pTileInfo->bankHeight * banks / pTileInfo->macroAspectRatio; + + UINT_32 pitchInMacroTiles = pitch / MicroTileWidth / macroWidth; + + macroTileBits = (macroWidth * macroHeight) * tileBits / (banks * pipes); + + macroTileIndex = totalOffset / macroTileBits; + + // pitchMacros * height / heightMacros; macroTilesPerSlice == _State->SliceMacros + UINT_32 macroTilesPerSlice = (pitch / (macroWidth * MicroTileWidth)) * height / + (macroHeight * MicroTileWidth); + + slices = static_cast(macroTileIndex / macroTilesPerSlice); + + *pSlice = static_cast(slices / slicesPerTile * microTileThickness); + + // + // calculate element offset and x[2:0], y[2:0], z[1:0] for thick + // + tileSlices = slices % slicesPerTile; + + elementOffset = tileSlices * tileBits; + elementOffset += totalOffset % tileBits; + + UINT_32 coordZ = 0; + + HwlComputePixelCoordFromOffset(static_cast(elementOffset), + bpp, + numSamples, + tileMode, + tileBase, + compBits, + pX, + pY, + &coordZ, + pSample, + microTileType, + isDepthSampleOrder); + + macroTileIndex = macroTileIndex % macroTilesPerSlice; + *pY += static_cast(macroTileIndex / pitchInMacroTiles * macroHeight * MicroTileHeight); + *pX += static_cast(macroTileIndex % pitchInMacroTiles * macroWidth * MicroTileWidth); + + *pSlice += coordZ; + + tileIndex = static_cast((totalOffset % macroTileBits) / tileBits); + + my = (tileIndex / pTileInfo->bankWidth) % pTileInfo->bankHeight * MicroTileHeight; + mx = (tileIndex % pTileInfo->bankWidth) * pipes * MicroTileWidth; + + *pY += my; + *pX += mx; + + bank = ComputeBankFromAddr(addr, banks, pipes); + pipe = ComputePipeFromAddr(addr, pipes); + + HwlComputeSurfaceCoord2DFromBankPipe(tileMode, + pX, + pY, + *pSlice, + bank, + pipe, + bankSwizzle, + pipeSwizzle, + tileSlices, + ignoreSE, + pTileInfo); +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSurfaceCoord2DFromBankPipe +* +* @brief +* Compute surface x,y coordinates from bank/pipe info +* @return +* N/A +**************************************************************************************************** +*/ +VOID EgBasedLib::ComputeSurfaceCoord2DFromBankPipe( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 x, ///< [in] x coordinate + UINT_32 y, ///< [in] y coordinate + UINT_32 slice, ///< [in] slice index + UINT_32 bank, ///< [in] bank number + UINT_32 pipe, ///< [in] pipe number + UINT_32 bankSwizzle,///< [in] bank swizzle + UINT_32 pipeSwizzle,///< [in] pipe swizzle + UINT_32 tileSlices, ///< [in] slices in a micro tile + ADDR_TILEINFO* pTileInfo, ///< [in] bank structure. **All fields to be valid on entry** + CoordFromBankPipe* pOutput ///< [out] pointer to extracted x/y bits + ) const +{ + UINT_32 yBit3 = 0; + UINT_32 yBit4 = 0; + UINT_32 yBit5 = 0; + UINT_32 yBit6 = 0; + + UINT_32 xBit3 = 0; + UINT_32 xBit4 = 0; + UINT_32 xBit5 = 0; + + UINT_32 tileSplitRotation; + + UINT_32 numPipes = HwlGetPipes(pTileInfo); + + UINT_32 bankRotation = ComputeBankRotation(tileMode, + pTileInfo->banks, numPipes); + + UINT_32 pipeRotation = ComputePipeRotation(tileMode, numPipes); + + UINT_32 xBit = x / (MicroTileWidth * pTileInfo->bankWidth * numPipes); + UINT_32 yBit = y / (MicroTileHeight * pTileInfo->bankHeight); + + //calculate the bank and pipe before rotation and swizzle + + switch (tileMode) + { + case ADDR_TM_2D_TILED_THIN1: //fall through + case ADDR_TM_2D_TILED_THICK: //fall through + case ADDR_TM_2D_TILED_XTHICK: //fall through + case ADDR_TM_3D_TILED_THIN1: //fall through + case ADDR_TM_3D_TILED_THICK: //fall through + case ADDR_TM_3D_TILED_XTHICK: + tileSplitRotation = ((pTileInfo->banks / 2) + 1); + break; + default: + tileSplitRotation = 0; + break; + } + + UINT_32 microTileThickness = Thickness(tileMode); + + bank ^= tileSplitRotation * tileSlices; + if (pipeRotation == 0) + { + bank ^= bankRotation * (slice / microTileThickness) + bankSwizzle; + bank %= pTileInfo->banks; + pipe ^= pipeSwizzle; + } + else + { + bank ^= bankRotation * (slice / microTileThickness) / numPipes + bankSwizzle; + bank %= pTileInfo->banks; + pipe ^= pipeRotation * (slice / microTileThickness) + pipeSwizzle; + } + + if (pTileInfo->macroAspectRatio == 1) + { + switch (pTileInfo->banks) + { + case 2: + yBit3 = _BIT(bank, 0) ^ _BIT(xBit,0); + break; + case 4: + yBit4 = _BIT(bank, 0) ^ _BIT(xBit,0); + yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1); + break; + case 8: + yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2); + yBit5 = _BIT(bank, 0) ^ _BIT(xBit,0); + yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ yBit5; + break; + case 16: + yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); + yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); + yBit6 = _BIT(bank, 0) ^ _BIT(xBit, 0); + yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ yBit6; + break; + default: + break; + } + + } + else if (pTileInfo->macroAspectRatio == 2) + { + switch (pTileInfo->banks) + { + case 2: //xBit3 = yBit3^b0 + xBit3 = _BIT(bank, 0) ^ _BIT(yBit,0); + break; + case 4: //xBit3=yBit4^b0; yBit3=xBit4^b1 + xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1); + yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1); + break; + case 8: //xBit4, xBit5, yBit5 are known + xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); + yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2); + yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ _BIT(yBit, 2); + break; + case 16://x4,x5,x6,y6 are known + xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3); //x3 = y6 ^ b0 + yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3 + yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = x5 ^ b2 + yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ _BIT(yBit, 3); //y5=x4^y6^b1 + break; + default: + break; + } + } + else if (pTileInfo->macroAspectRatio == 4) + { + switch (pTileInfo->banks) + { + case 4: //yBit3, yBit4 + xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1); + xBit4 = _BIT(bank, 1) ^ _BIT(yBit,0); + break; + case 8: //xBit5, yBit4, yBit5 + xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); + yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2); + xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit,2); + break; + case 16: //xBit5, xBit6, yBit5, yBit6 + xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = b0 ^ y6 + xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = b1 ^ y5 ^ y6; + yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = b3 ^ x6; + yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = b2 ^ x5; + break; + default: + break; + } + } + else if (pTileInfo->macroAspectRatio == 8) + { + switch (pTileInfo->banks) + { + case 8: //yBit3, yBit4, yBit5 + xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); //x3 = b0 ^ y5; + xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit, 2);//x4 = b1 ^ y4 ^ y5; + xBit5 = _BIT(bank, 2) ^ _BIT(yBit,0); + break; + case 16: //xBit6, yBit4, yBit5, yBit6 + xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = y6 ^ b0 + xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = y5 ^ y6 ^ b1 + xBit5 = _BIT(bank, 2) ^ _BIT(yBit, 1);//x5 = y4 ^ b2 + yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3 + break; + default: + break; + } + } + + pOutput->xBits = xBit; + pOutput->yBits = yBit; + + pOutput->xBit3 = xBit3; + pOutput->xBit4 = xBit4; + pOutput->xBit5 = xBit5; + pOutput->yBit3 = yBit3; + pOutput->yBit4 = yBit4; + pOutput->yBit5 = yBit5; + pOutput->yBit6 = yBit6; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlExtractBankPipeSwizzle +* @brief +* Entry of EgBasedLib ExtractBankPipeSwizzle +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlExtractBankPipeSwizzle( + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure + ) const +{ + ExtractBankPipeSwizzle(pIn->base256b, + pIn->pTileInfo, + &pOut->bankSwizzle, + &pOut->pipeSwizzle); + + return ADDR_OK; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlCombineBankPipeSwizzle +* @brief +* Combine bank/pipe swizzle +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlCombineBankPipeSwizzle( + UINT_32 bankSwizzle, ///< [in] bank swizzle + UINT_32 pipeSwizzle, ///< [in] pipe swizzle + ADDR_TILEINFO* pTileInfo, ///< [in] tile info + UINT_64 baseAddr, ///< [in] base address + UINT_32* pTileSwizzle ///< [out] combined swizzle + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + if (pTileSwizzle) + { + *pTileSwizzle = GetBankPipeSwizzle(bankSwizzle, pipeSwizzle, baseAddr, pTileInfo); + } + else + { + retCode = ADDR_INVALIDPARAMS; + } + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeBaseSwizzle +* @brief +* Compute base swizzle +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlComputeBaseSwizzle( + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut + ) const +{ + UINT_32 bankSwizzle = 0; + UINT_32 pipeSwizzle = 0; + ADDR_TILEINFO* pTileInfo = pIn->pTileInfo; + + ADDR_ASSERT(IsMacroTiled(pIn->tileMode)); + ADDR_ASSERT(pIn->pTileInfo); + + /// This is a legacy misreading of h/w doc, use it as it doesn't hurt. + static const UINT_8 bankRotationArray[4][16] = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // ADDR_SURF_2_BANK + { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // ADDR_SURF_4_BANK + { 0, 3, 6, 1, 4, 7, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0 }, // ADDR_SURF_8_BANK + { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 }, // ADDR_SURF_16_BANK + }; + + UINT_32 pipes = HwlGetPipes(pTileInfo); + (void)pipes; + UINT_32 banks = pTileInfo ? pTileInfo->banks : 2; + UINT_32 hwNumBanks; + + // Uses less bank swizzle bits + if (pIn->option.reduceBankBit && banks > 2) + { + banks >>= 1; + } + + switch (banks) + { + case 2: + hwNumBanks = 0; + break; + case 4: + hwNumBanks = 1; + break; + case 8: + hwNumBanks = 2; + break; + case 16: + hwNumBanks = 3; + break; + default: + ADDR_ASSERT_ALWAYS(); + hwNumBanks = 0; + break; + } + + if (pIn->option.genOption == ADDR_SWIZZLE_GEN_LINEAR) + { + bankSwizzle = pIn->surfIndex & (banks - 1); + } + else // (pIn->option.genOption == ADDR_SWIZZLE_GEN_DEFAULT) + { + bankSwizzle = bankRotationArray[hwNumBanks][pIn->surfIndex & (banks - 1)]; + } + + if (IsMacro3dTiled(pIn->tileMode)) + { + pipeSwizzle = pIn->surfIndex & (HwlGetPipes(pTileInfo) - 1); + } + + return HwlCombineBankPipeSwizzle(bankSwizzle, pipeSwizzle, pTileInfo, 0, &pOut->tileSwizzle); +} + +/** +**************************************************************************************************** +* EgBasedLib::ExtractBankPipeSwizzle +* @brief +* Extract bank/pipe swizzle from base256b +* @return +* N/A +**************************************************************************************************** +*/ +VOID EgBasedLib::ExtractBankPipeSwizzle( + UINT_32 base256b, ///< [in] input base256b register value + ADDR_TILEINFO* pTileInfo, ///< [in] 2D tile parameters. Client must provide all data + UINT_32* pBankSwizzle, ///< [out] bank swizzle + UINT_32* pPipeSwizzle ///< [out] pipe swizzle + ) const +{ + UINT_32 bankSwizzle = 0; + UINT_32 pipeSwizzle = 0; + + if (base256b != 0) + { + UINT_32 numPipes = HwlGetPipes(pTileInfo); + UINT_32 bankBits = QLog2(pTileInfo->banks); + UINT_32 pipeBits = QLog2(numPipes); + UINT_32 groupBytes = m_pipeInterleaveBytes; + UINT_32 bankInterleave = m_bankInterleave; + + pipeSwizzle = + (base256b / (groupBytes >> 8)) & ((1<> 8) / numPipes / bankInterleave) & ((1 << bankBits) - 1); + } + + *pPipeSwizzle = pipeSwizzle; + *pBankSwizzle = bankSwizzle; +} + +/** +**************************************************************************************************** +* EgBasedLib::GetBankPipeSwizzle +* @brief +* Combine bank/pipe swizzle +* @return +* Base256b bits (only filled bank/pipe bits) +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::GetBankPipeSwizzle( + UINT_32 bankSwizzle, ///< [in] bank swizzle + UINT_32 pipeSwizzle, ///< [in] pipe swizzle + UINT_64 baseAddr, ///< [in] base address + ADDR_TILEINFO* pTileInfo ///< [in] tile info + ) const +{ + UINT_32 pipeBits = QLog2(HwlGetPipes(pTileInfo)); + UINT_32 bankInterleaveBits = QLog2(m_bankInterleave); + UINT_32 tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits); + + baseAddr ^= tileSwizzle * m_pipeInterleaveBytes; + baseAddr >>= 8; + + return static_cast(baseAddr); +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeSliceTileSwizzle +* @brief +* Compute cubemap/3d texture faces/slices tile swizzle +* @return +* Tile swizzle +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::ComputeSliceTileSwizzle( + AddrTileMode tileMode, ///< [in] Tile mode + UINT_32 baseSwizzle, ///< [in] Base swizzle + UINT_32 slice, ///< [in] Slice index, Cubemap face index, 0 means +X + UINT_64 baseAddr, ///< [in] Base address + ADDR_TILEINFO* pTileInfo ///< [in] Bank structure + ) const +{ + UINT_32 tileSwizzle = 0; + + if (IsMacroTiled(tileMode)) // Swizzle only for macro tile mode + { + UINT_32 firstSlice = slice / Thickness(tileMode); + + UINT_32 numPipes = HwlGetPipes(pTileInfo); + UINT_32 numBanks = pTileInfo->banks; + + UINT_32 pipeRotation; + UINT_32 bankRotation; + + UINT_32 bankSwizzle = 0; + UINT_32 pipeSwizzle = 0; + + pipeRotation = ComputePipeRotation(tileMode, numPipes); + bankRotation = ComputeBankRotation(tileMode, numBanks, numPipes); + + if (baseSwizzle != 0) + { + ExtractBankPipeSwizzle(baseSwizzle, + pTileInfo, + &bankSwizzle, + &pipeSwizzle); + } + + if (pipeRotation == 0) //2D mode + { + bankSwizzle += firstSlice * bankRotation; + bankSwizzle %= numBanks; + } + else //3D mode + { + pipeSwizzle += firstSlice * pipeRotation; + pipeSwizzle %= numPipes; + bankSwizzle += firstSlice * bankRotation / numPipes; + bankSwizzle %= numBanks; + } + + tileSwizzle = GetBankPipeSwizzle(bankSwizzle, + pipeSwizzle, + baseAddr, + pTileInfo); + } + + return tileSwizzle; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeQbStereoRightSwizzle +* +* @brief +* Compute right eye swizzle +* @return +* swizzle +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::HwlComputeQbStereoRightSwizzle( + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo ///< [in] Surface info, must be valid + ) const +{ + UINT_32 bankBits = 0; + UINT_32 swizzle = 0; + + // The assumption is default swizzle for left eye is 0 + if (IsMacroTiled(pInfo->tileMode) && pInfo->pStereoInfo && pInfo->pTileInfo) + { + bankBits = ComputeBankFromCoord(0, pInfo->height, 0, + pInfo->tileMode, 0, 0, pInfo->pTileInfo); + + if (bankBits) + { + HwlCombineBankPipeSwizzle(bankBits, 0, pInfo->pTileInfo, 0, &swizzle); + } + } + + return swizzle; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeBankFromCoord +* +* @brief +* Compute bank number from coordinates +* @return +* Bank number +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::ComputeBankFromCoord( + UINT_32 x, ///< [in] x coordinate + UINT_32 y, ///< [in] y coordinate + UINT_32 slice, ///< [in] slice index + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bankSwizzle, ///< [in] bank swizzle + UINT_32 tileSplitSlice, ///< [in] If the size of the pixel offset is larger than the + /// tile split size, then the pixel will be moved to a separate + /// slice. This value equals pixelOffset / tileSplitBytes + /// in this case. Otherwise this is 0. + ADDR_TILEINFO* pTileInfo ///< [in] tile info + ) const +{ + UINT_32 pipes = HwlGetPipes(pTileInfo); + UINT_32 bankBit0 = 0; + UINT_32 bankBit1 = 0; + UINT_32 bankBit2 = 0; + UINT_32 bankBit3 = 0; + UINT_32 sliceRotation; + UINT_32 tileSplitRotation; + UINT_32 bank; + UINT_32 numBanks = pTileInfo->banks; + UINT_32 bankWidth = pTileInfo->bankWidth; + UINT_32 bankHeight = pTileInfo->bankHeight; + + UINT_32 tx = x / MicroTileWidth / (bankWidth * pipes); + UINT_32 ty = y / MicroTileHeight / bankHeight; + + UINT_32 x3 = _BIT(tx,0); + UINT_32 x4 = _BIT(tx,1); + UINT_32 x5 = _BIT(tx,2); + UINT_32 x6 = _BIT(tx,3); + UINT_32 y3 = _BIT(ty,0); + UINT_32 y4 = _BIT(ty,1); + UINT_32 y5 = _BIT(ty,2); + UINT_32 y6 = _BIT(ty,3); + + switch (numBanks) + { + case 16: + bankBit0 = x3 ^ y6; + bankBit1 = x4 ^ y5 ^ y6; + bankBit2 = x5 ^ y4; + bankBit3 = x6 ^ y3; + break; + case 8: + bankBit0 = x3 ^ y5; + bankBit1 = x4 ^ y4 ^ y5; + bankBit2 = x5 ^ y3; + break; + case 4: + bankBit0 = x3 ^ y4; + bankBit1 = x4 ^ y3; + break; + case 2: + bankBit0 = x3 ^ y3; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + + bank = bankBit0 | (bankBit1 << 1) | (bankBit2 << 2) | (bankBit3 << 3); + + //Bits2Number(4, bankBit3, bankBit2, bankBit1, bankBit0); + + bank = HwlPreAdjustBank((x / MicroTileWidth), bank, pTileInfo); + // + // Compute bank rotation for the slice. + // + UINT_32 microTileThickness = Thickness(tileMode); + + switch (tileMode) + { + case ADDR_TM_2D_TILED_THIN1: // fall through + case ADDR_TM_2D_TILED_THICK: // fall through + case ADDR_TM_2D_TILED_XTHICK: + sliceRotation = ((numBanks / 2) - 1) * (slice / microTileThickness); + break; + case ADDR_TM_3D_TILED_THIN1: // fall through + case ADDR_TM_3D_TILED_THICK: // fall through + case ADDR_TM_3D_TILED_XTHICK: + sliceRotation = + Max(1u, (pipes / 2) - 1) * (slice / microTileThickness) / pipes; + break; + default: + sliceRotation = 0; + break; + } + + // + // Compute bank rotation for the tile split slice. + // + // The sample slice will be non-zero if samples must be split across multiple slices. + // This situation arises when the micro tile size multiplied yBit the number of samples exceeds + // the split size (set in GB_ADDR_CONFIG). + // + switch (tileMode) + { + case ADDR_TM_2D_TILED_THIN1: //fall through + case ADDR_TM_3D_TILED_THIN1: //fall through + case ADDR_TM_PRT_2D_TILED_THIN1: //fall through + case ADDR_TM_PRT_3D_TILED_THIN1: //fall through + tileSplitRotation = ((numBanks / 2) + 1) * tileSplitSlice; + break; + default: + tileSplitRotation = 0; + break; + } + + // + // Apply bank rotation for the slice and tile split slice. + // + bank ^= bankSwizzle + sliceRotation; + bank ^= tileSplitRotation; + + bank &= (numBanks - 1); + + return bank; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeBankFromAddr +* +* @brief +* Compute the bank number from an address +* @return +* Bank number +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::ComputeBankFromAddr( + UINT_64 addr, ///< [in] address + UINT_32 numBanks, ///< [in] number of banks + UINT_32 numPipes ///< [in] number of pipes + ) const +{ + UINT_32 bank; + + // + // The LSBs of the address are arranged as follows: + // bank | bankInterleave | pipe | pipeInterleave + // + // To get the bank number, shift off the pipe interleave, pipe, and bank interlave bits and + // mask the bank bits. + // + bank = static_cast( + (addr >> Log2(m_pipeInterleaveBytes * numPipes * m_bankInterleave)) & + (numBanks - 1) + ); + + return bank; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputePipeRotation +* +* @brief +* Compute pipe rotation value +* @return +* Pipe rotation +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::ComputePipeRotation( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 numPipes ///< [in] number of pipes + ) const +{ + UINT_32 rotation; + + switch (tileMode) + { + case ADDR_TM_3D_TILED_THIN1: //fall through + case ADDR_TM_3D_TILED_THICK: //fall through + case ADDR_TM_3D_TILED_XTHICK: //fall through + case ADDR_TM_PRT_3D_TILED_THIN1: //fall through + case ADDR_TM_PRT_3D_TILED_THICK: + rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1); + break; + default: + rotation = 0; + } + + return rotation; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeBankRotation +* +* @brief +* Compute bank rotation value +* @return +* Bank rotation +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::ComputeBankRotation( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 numBanks, ///< [in] number of banks + UINT_32 numPipes ///< [in] number of pipes + ) const +{ + UINT_32 rotation; + + switch (tileMode) + { + case ADDR_TM_2D_TILED_THIN1: // fall through + case ADDR_TM_2D_TILED_THICK: // fall through + case ADDR_TM_2D_TILED_XTHICK: + case ADDR_TM_PRT_2D_TILED_THIN1: + case ADDR_TM_PRT_2D_TILED_THICK: + // Rotate banks per Z-slice yBit 1 for 4-bank or 3 for 8-bank + rotation = numBanks / 2 - 1; + break; + case ADDR_TM_3D_TILED_THIN1: // fall through + case ADDR_TM_3D_TILED_THICK: // fall through + case ADDR_TM_3D_TILED_XTHICK: + case ADDR_TM_PRT_3D_TILED_THIN1: + case ADDR_TM_PRT_3D_TILED_THICK: + rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1); // rotate pipes & banks + break; + default: + rotation = 0; + } + + return rotation; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeHtileBytes +* +* @brief +* Compute htile size in bytes +* +* @return +* Htile size in bytes +**************************************************************************************************** +*/ +UINT_64 EgBasedLib::ComputeHtileBytes( + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 bpp, ///< [in] bits per pixel + BOOL_32 isLinear, ///< [in] if it is linear mode + UINT_32 numSlices, ///< [in] number of slices + UINT_64* sliceBytes, ///< [out] bytes per slice + UINT_32 baseAlign ///< [in] base alignments + ) const +{ + UINT_64 surfBytes; + + const UINT_64 HtileCacheLineSize = BITS_TO_BYTES(HtileCacheBits); + + *sliceBytes = BITS_TO_BYTES(static_cast(pitch) * height * bpp / 64); + + if (m_configFlags.useHtileSliceAlign) + { + // Align the sliceSize to htilecachelinesize * pipes at first + *sliceBytes = PowTwoAlign(*sliceBytes, HtileCacheLineSize * m_pipes); + surfBytes = *sliceBytes * numSlices; + } + else + { + // Align the surfSize to htilecachelinesize * pipes at last + surfBytes = *sliceBytes * numSlices; + surfBytes = PowTwoAlign(surfBytes, HtileCacheLineSize * m_pipes); + } + + return surfBytes; +} + +/** +**************************************************************************************************** +* EgBasedLib::DispatchComputeFmaskInfo +* +* @brief +* Compute fmask sizes include padded pitch, height, slices, total size in bytes, +* meanwhile output suitable tile mode and alignments as well. Results are returned +* through output parameters. +* +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::DispatchComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) ///< [out] output structure +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + ADDR_COMPUTE_SURFACE_INFO_INPUT surfIn = {0}; + ADDR_COMPUTE_SURFACE_INFO_OUTPUT surfOut = {0}; + + // Setup input structure + surfIn.tileMode = pIn->tileMode; + surfIn.width = pIn->pitch; + surfIn.height = pIn->height; + surfIn.numSlices = pIn->numSlices; + surfIn.pTileInfo = pIn->pTileInfo; + surfIn.tileType = ADDR_NON_DISPLAYABLE; + surfIn.flags.fmask = 1; + + // Setup output structure + surfOut.pTileInfo = pOut->pTileInfo; + + // Setup hwl specific fields + HwlFmaskPreThunkSurfInfo(pIn, pOut, &surfIn, &surfOut); + + surfIn.bpp = HwlComputeFmaskBits(pIn, &surfIn.numSamples); + + // ComputeSurfaceInfo needs numSamples in surfOut as surface routines need adjusted numSamples + surfOut.numSamples = surfIn.numSamples; + + retCode = HwlComputeSurfaceInfo(&surfIn, &surfOut); + + // Save bpp field for surface dump support + surfOut.bpp = surfIn.bpp; + + if (retCode == ADDR_OK) + { + pOut->bpp = surfOut.bpp; + pOut->pitch = surfOut.pitch; + pOut->height = surfOut.height; + pOut->numSlices = surfOut.depth; + pOut->fmaskBytes = surfOut.surfSize; + pOut->baseAlign = surfOut.baseAlign; + pOut->pitchAlign = surfOut.pitchAlign; + pOut->heightAlign = surfOut.heightAlign; + + if (surfOut.depth > 1) + { + // For fmask, expNumSlices is stored in depth. + pOut->sliceSize = surfOut.surfSize / surfOut.depth; + } + else + { + pOut->sliceSize = surfOut.surfSize; + } + + // Save numSamples field for surface dump support + pOut->numSamples = surfOut.numSamples; + + HwlFmaskPostThunkSurfInfo(&surfOut, pOut); + } + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlFmaskSurfaceInfo +* @brief +* Entry of EgBasedLib ComputeFmaskInfo +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure + ) +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + ADDR_TILEINFO tileInfo = {0}; + + // Use internal tile info if pOut does not have a valid pTileInfo + if (pOut->pTileInfo == NULL) + { + pOut->pTileInfo = &tileInfo; + } + + retCode = DispatchComputeFmaskInfo(pIn, pOut); + + if (retCode == ADDR_OK) + { + pOut->tileIndex = + HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE, + pOut->tileIndex); + } + + // Resets pTileInfo to NULL if the internal tile info is used + if (pOut->pTileInfo == &tileInfo) + { + pOut->pTileInfo = NULL; + } + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeFmaskAddrFromCoord +* @brief +* Entry of EgBasedLib ComputeFmaskAddrFromCoord +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskAddrFromCoord( + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeFmaskCoordFromAddr +* @brief +* Entry of EgBasedLib ComputeFmaskCoordFromAddr +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlComputeFmaskCoordFromAddr( + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeFmaskNumPlanesFromNumSamples +* +* @brief +* Compute fmask number of planes from number of samples +* +* @return +* Number of planes +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::ComputeFmaskNumPlanesFromNumSamples( + UINT_32 numSamples) ///< [in] number of samples +{ + UINT_32 numPlanes; + + // + // FMASK is stored such that each micro tile is composed of elements containing N bits, where + // N is the number of samples. There is a micro tile for each bit in the FMASK address, and + // micro tiles for each address bit, sometimes referred to as a plane, are stored sequentially. + // The FMASK for a 2-sample surface looks like a general surface with 2 bits per element. + // The FMASK for a 4-sample surface looks like a general surface with 4 bits per element and + // 2 samples. The FMASK for an 8-sample surface looks like a general surface with 8 bits per + // element and 4 samples. R6xx and R7xx only stored 3 planes for 8-sample FMASK surfaces. + // This was changed for R8xx to simplify the logic in the CB. + // + switch (numSamples) + { + case 2: + numPlanes = 1; + break; + case 4: + numPlanes = 2; + break; + case 8: + numPlanes = 4; + break; + default: + ADDR_UNHANDLED_CASE(); + numPlanes = 0; + break; + } + return numPlanes; +} + +/** +**************************************************************************************************** +* EgBasedLib::ComputeFmaskResolvedBppFromNumSamples +* +* @brief +* Compute resolved fmask effective bpp based on number of samples +* +* @return +* bpp +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::ComputeFmaskResolvedBppFromNumSamples( + UINT_32 numSamples) ///< number of samples +{ + UINT_32 bpp; + + // + // Resolved FMASK surfaces are generated yBit the CB and read yBit the texture unit + // so that the texture unit can read compressed multi-sample color data. + // These surfaces store each index value packed per element. + // Each element contains at least num_samples * log2(num_samples) bits. + // Resolved FMASK surfaces are addressed as follows: + // 2-sample Addressed similarly to a color surface with 8 bits per element and 1 sample. + // 4-sample Addressed similarly to a color surface with 8 bits per element and 1 sample. + // 8-sample Addressed similarly to a color surface with 32 bits per element and 1 sample. + + switch (numSamples) + { + case 2: + bpp = 8; + break; + case 4: + bpp = 8; + break; + case 8: + bpp = 32; + break; + default: + ADDR_UNHANDLED_CASE(); + bpp = 0; + break; + } + return bpp; +} + +/** +**************************************************************************************************** +* EgBasedLib::IsTileInfoAllZero +* +* @brief +* Return TRUE if all field are zero +* @note +* Since NULL input is consider to be all zero +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::IsTileInfoAllZero( + const ADDR_TILEINFO* pTileInfo) +{ + BOOL_32 allZero = TRUE; + + if (pTileInfo) + { + if ((pTileInfo->banks != 0) || + (pTileInfo->bankWidth != 0) || + (pTileInfo->bankHeight != 0) || + (pTileInfo->macroAspectRatio != 0) || + (pTileInfo->tileSplitBytes != 0) || + (pTileInfo->pipeConfig != 0) + ) + { + allZero = FALSE; + } + } + + return allZero; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlTileInfoEqual +* +* @brief +* Return TRUE if all field are equal +* @note +* Only takes care of current HWL's data +**************************************************************************************************** +*/ +BOOL_32 EgBasedLib::HwlTileInfoEqual( + const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand + const ADDR_TILEINFO* pRight ///<[in] Right compare operand + ) const +{ + BOOL_32 equal = FALSE; + + if (pLeft->banks == pRight->banks && + pLeft->bankWidth == pRight->bankWidth && + pLeft->bankHeight == pRight->bankHeight && + pLeft->macroAspectRatio == pRight->macroAspectRatio && + pLeft->tileSplitBytes == pRight->tileSplitBytes) + { + equal = TRUE; + } + + return equal; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlConvertTileInfoToHW +* @brief +* Entry of EgBasedLib ConvertTileInfoToHW +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + ADDR_TILEINFO *pTileInfoIn = pIn->pTileInfo; + ADDR_TILEINFO *pTileInfoOut = pOut->pTileInfo; + + if ((pTileInfoIn != NULL) && (pTileInfoOut != NULL)) + { + if (pIn->reverse == FALSE) + { + switch (pTileInfoIn->banks) + { + case 2: + pTileInfoOut->banks = 0; + break; + case 4: + pTileInfoOut->banks = 1; + break; + case 8: + pTileInfoOut->banks = 2; + break; + case 16: + pTileInfoOut->banks = 3; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->banks = 0; + break; + } + + switch (pTileInfoIn->bankWidth) + { + case 1: + pTileInfoOut->bankWidth = 0; + break; + case 2: + pTileInfoOut->bankWidth = 1; + break; + case 4: + pTileInfoOut->bankWidth = 2; + break; + case 8: + pTileInfoOut->bankWidth = 3; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->bankWidth = 0; + break; + } + + switch (pTileInfoIn->bankHeight) + { + case 1: + pTileInfoOut->bankHeight = 0; + break; + case 2: + pTileInfoOut->bankHeight = 1; + break; + case 4: + pTileInfoOut->bankHeight = 2; + break; + case 8: + pTileInfoOut->bankHeight = 3; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->bankHeight = 0; + break; + } + + switch (pTileInfoIn->macroAspectRatio) + { + case 1: + pTileInfoOut->macroAspectRatio = 0; + break; + case 2: + pTileInfoOut->macroAspectRatio = 1; + break; + case 4: + pTileInfoOut->macroAspectRatio = 2; + break; + case 8: + pTileInfoOut->macroAspectRatio = 3; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->macroAspectRatio = 0; + break; + } + + switch (pTileInfoIn->tileSplitBytes) + { + case 64: + pTileInfoOut->tileSplitBytes = 0; + break; + case 128: + pTileInfoOut->tileSplitBytes = 1; + break; + case 256: + pTileInfoOut->tileSplitBytes = 2; + break; + case 512: + pTileInfoOut->tileSplitBytes = 3; + break; + case 1024: + pTileInfoOut->tileSplitBytes = 4; + break; + case 2048: + pTileInfoOut->tileSplitBytes = 5; + break; + case 4096: + pTileInfoOut->tileSplitBytes = 6; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->tileSplitBytes = 0; + break; + } + } + else + { + switch (pTileInfoIn->banks) + { + case 0: + pTileInfoOut->banks = 2; + break; + case 1: + pTileInfoOut->banks = 4; + break; + case 2: + pTileInfoOut->banks = 8; + break; + case 3: + pTileInfoOut->banks = 16; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->banks = 2; + break; + } + + switch (pTileInfoIn->bankWidth) + { + case 0: + pTileInfoOut->bankWidth = 1; + break; + case 1: + pTileInfoOut->bankWidth = 2; + break; + case 2: + pTileInfoOut->bankWidth = 4; + break; + case 3: + pTileInfoOut->bankWidth = 8; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->bankWidth = 1; + break; + } + + switch (pTileInfoIn->bankHeight) + { + case 0: + pTileInfoOut->bankHeight = 1; + break; + case 1: + pTileInfoOut->bankHeight = 2; + break; + case 2: + pTileInfoOut->bankHeight = 4; + break; + case 3: + pTileInfoOut->bankHeight = 8; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->bankHeight = 1; + break; + } + + switch (pTileInfoIn->macroAspectRatio) + { + case 0: + pTileInfoOut->macroAspectRatio = 1; + break; + case 1: + pTileInfoOut->macroAspectRatio = 2; + break; + case 2: + pTileInfoOut->macroAspectRatio = 4; + break; + case 3: + pTileInfoOut->macroAspectRatio = 8; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->macroAspectRatio = 1; + break; + } + + switch (pTileInfoIn->tileSplitBytes) + { + case 0: + pTileInfoOut->tileSplitBytes = 64; + break; + case 1: + pTileInfoOut->tileSplitBytes = 128; + break; + case 2: + pTileInfoOut->tileSplitBytes = 256; + break; + case 3: + pTileInfoOut->tileSplitBytes = 512; + break; + case 4: + pTileInfoOut->tileSplitBytes = 1024; + break; + case 5: + pTileInfoOut->tileSplitBytes = 2048; + break; + case 6: + pTileInfoOut->tileSplitBytes = 4096; + break; + default: + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + pTileInfoOut->tileSplitBytes = 64; + break; + } + } + + if (pTileInfoIn != pTileInfoOut) + { + pTileInfoOut->pipeConfig = pTileInfoIn->pipeConfig; + } + } + else + { + ADDR_ASSERT_ALWAYS(); + retCode = ADDR_INVALIDPARAMS; + } + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeSurfaceInfo +* @brief +* Entry of EgBasedLib ComputeSurfaceInfo +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + if (pIn->numSamples < pIn->numFrags) + { + retCode = ADDR_INVALIDPARAMS; + } + + ADDR_TILEINFO tileInfo = {0}; + + if (retCode == ADDR_OK) + { + // Uses internal tile info if pOut does not have a valid pTileInfo + if (pOut->pTileInfo == NULL) + { + pOut->pTileInfo = &tileInfo; + } + + if (DispatchComputeSurfaceInfo(pIn, pOut) == FALSE) + { + retCode = ADDR_INVALIDPARAMS; + } + + // In case client uses tile info as input and would like to calculate a correct size and + // alignment together with tile info as output when the tile info is not suppose to have any + // matching indices in tile mode tables. + if (pIn->flags.skipIndicesOutput == FALSE) + { + // Returns an index + pOut->tileIndex = HwlPostCheckTileIndex(pOut->pTileInfo, + pOut->tileMode, + pOut->tileType, + pOut->tileIndex); + + if (IsMacroTiled(pOut->tileMode) && (pOut->macroModeIndex == TileIndexInvalid)) + { + pOut->macroModeIndex = HwlComputeMacroModeIndex(pOut->tileIndex, + pIn->flags, + pIn->bpp, + pIn->numSamples, + pOut->pTileInfo); + } + } + + // Resets pTileInfo to NULL if the internal tile info is used + if (pOut->pTileInfo == &tileInfo) + { +#if DEBUG + // Client does not pass in a valid pTileInfo + if (IsMacroTiled(pOut->tileMode)) + { + // If a valid index is returned, then no pTileInfo is okay + ADDR_ASSERT((m_configFlags.useTileIndex == FALSE) || + (pOut->tileIndex != TileIndexInvalid)); + + if (IsTileInfoAllZero(pIn->pTileInfo) == FALSE) + { + // The initial value of pIn->pTileInfo is copied to tileInfo + // We do not expect any of these value to be changed nor any 0 of inputs + ADDR_ASSERT(tileInfo.banks == pIn->pTileInfo->banks); + ADDR_ASSERT(tileInfo.bankWidth == pIn->pTileInfo->bankWidth); + ADDR_ASSERT(tileInfo.bankHeight == pIn->pTileInfo->bankHeight); + ADDR_ASSERT(tileInfo.macroAspectRatio == pIn->pTileInfo->macroAspectRatio); + ADDR_ASSERT(tileInfo.tileSplitBytes == pIn->pTileInfo->tileSplitBytes); + } + } +#endif + pOut->pTileInfo = NULL; + } + } + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeSurfaceAddrFromCoord +* @brief +* Entry of EgBasedLib ComputeSurfaceAddrFromCoord +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + if ( +#if !ALT_TEST // Overflow test needs this out-of-boundary coord + (pIn->x > pIn->pitch) || + (pIn->y > pIn->height) || +#endif + (pIn->numSamples > m_maxSamples)) + { + retCode = ADDR_INVALIDPARAMS; + } + else + { + pOut->addr = DispatchComputeSurfaceAddrFromCoord(pIn, pOut); + } + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeSurfaceCoordFromAddr +* @brief +* Entry of EgBasedLib ComputeSurfaceCoordFromAddr +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + if ((pIn->bitPosition >= 8) || + (pIn->numSamples > m_maxSamples)) + { + retCode = ADDR_INVALIDPARAMS; + } + else + { + DispatchComputeSurfaceCoordFromAddr(pIn, pOut); + } + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeSliceTileSwizzle +* @brief +* Entry of EgBasedLib ComputeSurfaceCoordFromAddr +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE EgBasedLib::HwlComputeSliceTileSwizzle( + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + if (pIn->pTileInfo && (pIn->pTileInfo->banks > 0)) + { + + pOut->tileSwizzle = ComputeSliceTileSwizzle(pIn->tileMode, + pIn->baseSwizzle, + pIn->slice, + pIn->baseAddr, + pIn->pTileInfo); + } + else + { + retCode = ADDR_INVALIDPARAMS; + } + + return retCode; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeHtileBpp +* +* @brief +* Compute htile bpp +* +* @return +* Htile bpp +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::HwlComputeHtileBpp( + BOOL_32 isWidth8, ///< [in] TRUE if block width is 8 + BOOL_32 isHeight8 ///< [in] TRUE if block height is 8 + ) const +{ + // only support 8x8 mode + ADDR_ASSERT(isWidth8 && isHeight8); + return 32; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlComputeHtileBaseAlign +* +* @brief +* Compute htile base alignment +* +* @return +* Htile base alignment +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::HwlComputeHtileBaseAlign( + BOOL_32 isTcCompatible, ///< [in] if TC compatible + BOOL_32 isLinear, ///< [in] if it is linear mode + ADDR_TILEINFO* pTileInfo ///< [in] Tile info + ) const +{ + UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo); + + if (isTcCompatible) + { + ADDR_ASSERT(pTileInfo != NULL); + if (pTileInfo) + { + baseAlign *= pTileInfo->banks; + } + } + + return baseAlign; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlGetPitchAlignmentMicroTiled +* +* @brief +* Compute 1D tiled surface pitch alignment, calculation results are returned through +* output parameters. +* +* @return +* pitch alignment +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::HwlGetPitchAlignmentMicroTiled( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 numSamples ///< [in] number of samples + ) const +{ + UINT_32 pitchAlign; + + UINT_32 microTileThickness = Thickness(tileMode); + + UINT_32 pixelsPerMicroTile; + UINT_32 pixelsPerPipeInterleave; + UINT_32 microTilesPerPipeInterleave; + + // + // Special workaround for depth/stencil buffer, use 8 bpp to meet larger requirement for + // stencil buffer since pitch alignment is related to bpp. + // For a depth only buffer do not set this. + // + // Note: this actually does not work for mipmap but mipmap depth texture is not really + // sampled with mipmap. + // + if (flags.depth && (flags.noStencil == FALSE)) + { + bpp = 8; + } + + pixelsPerMicroTile = MicroTilePixels * microTileThickness; + pixelsPerPipeInterleave = BYTES_TO_BITS(m_pipeInterleaveBytes) / (bpp * numSamples); + microTilesPerPipeInterleave = pixelsPerPipeInterleave / pixelsPerMicroTile; + + pitchAlign = Max(MicroTileWidth, microTilesPerPipeInterleave * MicroTileWidth); + + return pitchAlign; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlGetSizeAdjustmentMicroTiled +* +* @brief +* Adjust 1D tiled surface pitch and slice size +* +* @return +* Logical slice size in bytes +**************************************************************************************************** +*/ +UINT_64 EgBasedLib::HwlGetSizeAdjustmentMicroTiled( + UINT_32 thickness, ///< [in] thickness + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 numSamples, ///< [in] number of samples + UINT_32 baseAlign, ///< [in] base alignment + UINT_32 pitchAlign, ///< [in] pitch alignment + UINT_32* pPitch, ///< [in,out] pointer to pitch + UINT_32* pHeight ///< [in,out] pointer to height + ) const +{ + UINT_64 logicalSliceSize; + MAYBE_UNUSED UINT_64 physicalSliceSize; + + UINT_32 pitch = *pPitch; + UINT_32 height = *pHeight; + + // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1) + logicalSliceSize = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples); + + // Physical slice: multiplied by thickness + physicalSliceSize = logicalSliceSize * thickness; + + // + // R800 will always pad physical slice size to baseAlign which is pipe_interleave_bytes + // + ADDR_ASSERT((physicalSliceSize % baseAlign) == 0); + + return logicalSliceSize; +} + +/** +**************************************************************************************************** +* EgBasedLib::HwlStereoCheckRightOffsetPadding +* +* @brief +* check if the height needs extra padding for stereo right eye offset, to avoid swizzling +* +* @return +* TRUE is the extra padding is needed +* +**************************************************************************************************** +*/ +UINT_32 EgBasedLib::HwlStereoCheckRightOffsetPadding( + ADDR_TILEINFO* pTileInfo ///< Tiling info + ) const +{ + UINT_32 stereoHeightAlign = 0; + + if (pTileInfo->macroAspectRatio > 2) + { + // Since 3D rendering treats right eye surface starting from y == "eye height" while + // display engine treats it to be 0, so the bank bits may be different. + // Additional padding in height is required to make sure it's possible + // to achieve synonym by adjusting bank swizzle of right eye surface. + + static const UINT_32 StereoAspectRatio = 2; + stereoHeightAlign = pTileInfo->banks * + pTileInfo->bankHeight * + MicroTileHeight / + StereoAspectRatio; + } + + return stereoHeightAlign; +} + +} // V1 +} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/egbaddrlib.h mesa-19.0.1/src/amd/addrlib/src/r800/egbaddrlib.h --- mesa-18.3.3/src/amd/addrlib/src/r800/egbaddrlib.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/r800/egbaddrlib.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,430 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file egbaddrlib.h +* @brief Contains the EgBasedLib class definition. +**************************************************************************************************** +*/ + +#ifndef __EG_BASED_ADDR_LIB_H__ +#define __EG_BASED_ADDR_LIB_H__ + +#include "addrlib1.h" + +namespace Addr +{ +namespace V1 +{ +/// Structures for functions +struct CoordFromBankPipe +{ + UINT_32 xBits : 3; + UINT_32 yBits : 4; + + UINT_32 xBit3 : 1; + UINT_32 xBit4 : 1; + UINT_32 xBit5 : 1; + UINT_32 yBit3 : 1; + UINT_32 yBit4 : 1; + UINT_32 yBit5 : 1; + UINT_32 yBit6 : 1; +}; + +/** +**************************************************************************************************** +* @brief This class is the Evergreen based address library +* @note Abstract class +**************************************************************************************************** +*/ +class EgBasedLib : public Lib +{ +protected: + EgBasedLib(const Client* pClient); + virtual ~EgBasedLib(); + +public: + + /// Surface info functions + + // NOTE: DispatchComputeSurfaceInfo using TileInfo takes both an input and an output. + // On input: + // One or more fields may be 0 to be calculated/defaulted - pre-SI h/w. + // H/W using tile mode index only accepts none or all 0's - SI and newer h/w. + // It then returns the actual tiling configuration used. + // Other methods' TileInfo must be valid on entry + BOOL_32 DispatchComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + ADDR_E_RETURNCODE DispatchComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); + +protected: + // Hwl interface + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle( + const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, + ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle( + const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, + ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle( + UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO* pTileInfo, + UINT_64 baseAddr, UINT_32* pTileSwizzle) const; + + virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle( + const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn, + ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const; + + virtual UINT_32 HwlComputeHtileBpp( + BOOL_32 isWidth8, BOOL_32 isHeight8) const; + + virtual UINT_32 HwlComputeHtileBaseAlign( + BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const; + + virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut); + + virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord( + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr( + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; + + virtual BOOL_32 HwlGetAlignmentInfoMacroTiled( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + UINT_32* pPitchAlign, UINT_32* pHeightAlign, UINT_32* pSizeAlign) const; + + virtual UINT_32 HwlComputeQbStereoRightSwizzle( + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo) const; + + virtual VOID HwlComputePixelCoordFromOffset( + UINT_32 offset, UINT_32 bpp, UINT_32 numSamples, + AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, + AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const; + + /// Return Cmask block max + virtual BOOL_32 HwlGetMaxCmaskBlockMax() const + { + return 0x3FFF; // 14 bits, 0n16383 + } + + // Sub-hwl interface + /// Pure virtual function to setup tile info (indices) if client requests to do so + virtual VOID HwlSetupTileInfo( + AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo, + AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; + + /// Pure virtual function to get pitch alignment for linear modes + virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const = 0; + + /// Pure virtual function to get size adjustment for linear modes + virtual UINT_64 HwlGetSizeAdjustmentLinear( + AddrTileMode tileMode, + UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign, + UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const = 0; + + virtual UINT_32 HwlGetPitchAlignmentMicroTiled( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const; + + virtual UINT_64 HwlGetSizeAdjustmentMicroTiled( + UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, + UINT_32 baseAlign, UINT_32 pitchAlign, + UINT_32 *pPitch, UINT_32 *pHeight) const; + + /// Pure virtual function to do extra sanity check + virtual BOOL_32 HwlSanityCheckMacroTiled( + ADDR_TILEINFO* pTileInfo) const = 0; + + /// Pure virtual function to check current level to be the last macro tiled one + virtual VOID HwlCheckLastMacroTiledLvl( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0; + + /// Adjusts bank before bank is modified by rotation + virtual UINT_32 HwlPreAdjustBank( + UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const = 0; + + virtual VOID HwlComputeSurfaceCoord2DFromBankPipe( + AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice, + UINT_32 bank, UINT_32 pipe, + UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices, + BOOL_32 ignoreSE, + ADDR_TILEINFO* pTileInfo) const = 0; + + virtual BOOL_32 HwlTileInfoEqual( + const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const; + + virtual AddrTileMode HwlDegradeThickTileMode( + AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; + + virtual INT_32 HwlPostCheckTileIndex( + const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type, + INT curIndex = TileIndexInvalid) const + { + return TileIndexInvalid; + } + + virtual VOID HwlFmaskPreThunkSurfInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, + const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, + ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const + { + } + + virtual VOID HwlFmaskPostThunkSurfInfo( + const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const + { + } + + virtual UINT_32 HwlStereoCheckRightOffsetPadding(ADDR_TILEINFO* pTileInfo) const; + + virtual BOOL_32 HwlReduceBankWidthHeight( + UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, + UINT_32 bankHeightAlign, UINT_32 pipes, + ADDR_TILEINFO* pTileInfo) const; + + // Protected non-virtual functions + + /// Mip level functions + AddrTileMode ComputeSurfaceMipLevelTileMode( + AddrTileMode baseTileMode, UINT_32 bpp, + UINT_32 pitch, UINT_32 height, UINT_32 numSlices, UINT_32 numSamples, + UINT_32 pitchAlign, UINT_32 heightAlign, + ADDR_TILEINFO* pTileInfo) const; + + /// Swizzle functions + VOID ExtractBankPipeSwizzle( + UINT_32 base256b, ADDR_TILEINFO* pTileInfo, + UINT_32* pBankSwizzle, UINT_32* pPipeSwizzle) const; + + UINT_32 GetBankPipeSwizzle( + UINT_32 bankSwizzle, UINT_32 pipeSwizzle, + UINT_64 baseAddr, ADDR_TILEINFO* pTileInfo) const; + + UINT_32 ComputeSliceTileSwizzle( + AddrTileMode tileMode, UINT_32 baseSwizzle, UINT_32 slice, UINT_64 baseAddr, + ADDR_TILEINFO* pTileInfo) const; + + /// Addressing functions + virtual ADDR_E_RETURNCODE ComputeBankEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const + { + return ADDR_NOTSUPPORTED; + } + + UINT_32 ComputeBankFromCoord( + UINT_32 x, UINT_32 y, UINT_32 slice, + AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice, + ADDR_TILEINFO* pTileInfo) const; + + UINT_32 ComputeBankFromAddr( + UINT_64 addr, UINT_32 numBanks, UINT_32 numPipes) const; + + UINT_32 ComputePipeRotation( + AddrTileMode tileMode, UINT_32 numPipes) const; + + UINT_32 ComputeBankRotation( + AddrTileMode tileMode, UINT_32 numBanks, + UINT_32 numPipes) const; + + VOID ComputeSurfaceCoord2DFromBankPipe( + AddrTileMode tileMode, UINT_32 x, UINT_32 y, UINT_32 slice, + UINT_32 bank, UINT_32 pipe, + UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices, + ADDR_TILEINFO* pTileInfo, + CoordFromBankPipe *pOutput) const; + + /// Htile/Cmask functions + UINT_64 ComputeHtileBytes( + UINT_32 pitch, UINT_32 height, UINT_32 bpp, + BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const; + + ADDR_E_RETURNCODE ComputeMacroTileEquation( + UINT_32 log2BytesPP, AddrTileMode tileMode, AddrTileType microTileType, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; + + // Static functions + static BOOL_32 IsTileInfoAllZero(const ADDR_TILEINFO* pTileInfo); + static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples); + static UINT_32 ComputeFmaskResolvedBppFromNumSamples(UINT_32 numSamples); + + virtual VOID HwlComputeSurfaceAlignmentsMacroTiled( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const + { + } + +private: + + BOOL_32 ComputeSurfaceInfoLinear( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, + UINT_32 padDims) const; + + BOOL_32 ComputeSurfaceInfoMicroTiled( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, + UINT_32 padDims, + AddrTileMode expTileMode) const; + + BOOL_32 ComputeSurfaceInfoMacroTiled( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, + UINT_32 padDims, + AddrTileMode expTileMode) const; + + BOOL_32 ComputeSurfaceAlignmentsLinear( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const; + + BOOL_32 ComputeSurfaceAlignmentsMicroTiled( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 mipLevel, UINT_32 numSamples, + UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const; + + BOOL_32 ComputeSurfaceAlignmentsMacroTiled( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 mipLevel, UINT_32 numSamples, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + /// Surface addressing functions + UINT_64 DispatchComputeSurfaceAddrFromCoord( + const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const; + + VOID DispatchComputeSurfaceCoordFromAddr( + const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const; + + UINT_64 ComputeSurfaceAddrFromCoordMicroTiled( + UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + AddrTileMode tileMode, + AddrTileType microTileType, BOOL_32 isDepthSampleOrder, + UINT_32* pBitPosition) const; + + UINT_64 ComputeSurfaceAddrFromCoordMacroTiled( + UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + AddrTileMode tileMode, + AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder, + UINT_32 pipeSwizzle, UINT_32 bankSwizzle, + ADDR_TILEINFO* pTileInfo, + UINT_32* pBitPosition) const; + + VOID ComputeSurfaceCoordFromAddrMacroTiled( + UINT_64 addr, UINT_32 bitPosition, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits, + AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder, + UINT_32 pipeSwizzle, UINT_32 bankSwizzle, + ADDR_TILEINFO* pTileInfo, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const; + + /// Fmask functions + UINT_64 DispatchComputeFmaskAddrFromCoord( + const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, + ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const; + + VOID DispatchComputeFmaskCoordFromAddr( + const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, + ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const; + + // FMASK related methods - private + UINT_64 ComputeFmaskAddrFromCoordMicroTiled( + UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane, + UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode, + BOOL_32 resolved, UINT_32* pBitPosition) const; + + VOID ComputeFmaskCoordFromAddrMicroTiled( + UINT_64 addr, UINT_32 bitPosition, + UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + AddrTileMode tileMode, BOOL_32 resolved, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const; + + VOID ComputeFmaskCoordFromAddrMacroTiled( + UINT_64 addr, UINT_32 bitPosition, + UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode, + UINT_32 pipeSwizzle, UINT_32 bankSwizzle, + BOOL_32 ignoreSE, + ADDR_TILEINFO* pTileInfo, + BOOL_32 resolved, + UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const; + + UINT_64 ComputeFmaskAddrFromCoordMacroTiled( + UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane, + UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + AddrTileMode tileMode, UINT_32 pipeSwizzle, UINT_32 bankSwizzle, + BOOL_32 ignoreSE, + ADDR_TILEINFO* pTileInfo, + BOOL_32 resolved, + UINT_32* pBitPosition) const; + + /// Sanity check functions + BOOL_32 SanityCheckMacroTiled( + ADDR_TILEINFO* pTileInfo) const; + +protected: + UINT_32 m_ranks; ///< Number of ranks - MC_ARB_RAMCFG.NOOFRANK + UINT_32 m_logicalBanks; ///< Logical banks = m_banks * m_ranks if m_banks != 16 + UINT_32 m_bankInterleave; ///< Bank interleave, as a multiple of pipe interleave size +}; + +} // V1 +} // Addr + +#endif + diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/siaddrlib.cpp mesa-19.0.1/src/amd/addrlib/src/r800/siaddrlib.cpp --- mesa-18.3.3/src/amd/addrlib/src/r800/siaddrlib.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/r800/siaddrlib.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,3872 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file siaddrlib.cpp +* @brief Contains the implementation for the SiLib class. +**************************************************************************************************** +*/ + +#include "siaddrlib.h" +#include "si_gb_reg.h" + +#include "amdgpu_asic_addr.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// +namespace Addr +{ + +/** +**************************************************************************************************** +* SiHwlInit +* +* @brief +* Creates an SiLib object. +* +* @return +* Returns an SiLib object pointer. +**************************************************************************************************** +*/ +Lib* SiHwlInit(const Client* pClient) +{ + return V1::SiLib::CreateObj(pClient); +} + +namespace V1 +{ + +// We don't support MSAA for equation +const BOOL_32 SiLib::m_EquationSupport[SiLib::TileTableSize][SiLib::MaxNumElementBytes] = +{ + {TRUE, TRUE, TRUE, FALSE, FALSE}, // 0, non-AA compressed depth or any stencil + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 1, 2xAA/4xAA compressed depth with or without stencil + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 2, 8xAA compressed depth with or without stencil + {FALSE, TRUE, FALSE, FALSE, FALSE}, // 3, 16 bpp depth PRT (non-MSAA), don't support uncompressed depth + {TRUE, TRUE, TRUE, FALSE, FALSE}, // 4, 1D depth + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 5, 16 bpp depth PRT (4xMSAA) + {FALSE, FALSE, TRUE, FALSE, FALSE}, // 6, 32 bpp depth PRT (non-MSAA) + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 7, 32 bpp depth PRT (4xMSAA) + {TRUE, TRUE, TRUE, TRUE, TRUE }, // 8, Linear + {TRUE, TRUE, TRUE, TRUE, TRUE }, // 9, 1D display + {TRUE, FALSE, FALSE, FALSE, FALSE}, // 10, 8 bpp color (displayable) + {FALSE, TRUE, FALSE, FALSE, FALSE}, // 11, 16 bpp color (displayable) + {FALSE, FALSE, TRUE, TRUE, FALSE}, // 12, 32/64 bpp color (displayable) + {TRUE, TRUE, TRUE, TRUE, TRUE }, // 13, 1D thin + {TRUE, FALSE, FALSE, FALSE, FALSE}, // 14, 8 bpp color non-displayable + {FALSE, TRUE, FALSE, FALSE, FALSE}, // 15, 16 bpp color non-displayable + {FALSE, FALSE, TRUE, FALSE, FALSE}, // 16, 32 bpp color non-displayable + {FALSE, FALSE, FALSE, TRUE, TRUE }, // 17, 64/128 bpp color non-displayable + {TRUE, TRUE, TRUE, TRUE, TRUE }, // 18, 1D THICK + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 19, 2D XTHICK + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 20, 2D THICK + {TRUE, FALSE, FALSE, FALSE, FALSE}, // 21, 8 bpp 2D PRTs (non-MSAA) + {FALSE, TRUE, FALSE, FALSE, FALSE}, // 22, 16 bpp 2D PRTs (non-MSAA) + {FALSE, FALSE, TRUE, FALSE, FALSE}, // 23, 32 bpp 2D PRTs (non-MSAA) + {FALSE, FALSE, FALSE, TRUE, FALSE}, // 24, 64 bpp 2D PRTs (non-MSAA) + {FALSE, FALSE, FALSE, FALSE, TRUE }, // 25, 128bpp 2D PRTs (non-MSAA) + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 26, none + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 27, none + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 28, none + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 29, none + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 30, 64bpp 2D PRTs (4xMSAA) + {FALSE, FALSE, FALSE, FALSE, FALSE}, // 31, none +}; + +/** +**************************************************************************************************** +* SiLib::SiLib +* +* @brief +* Constructor +* +**************************************************************************************************** +*/ +SiLib::SiLib(const Client* pClient) + : + EgBasedLib(pClient), + m_noOfEntries(0), + m_numEquations(0) +{ + m_class = SI_ADDRLIB; + memset(&m_settings, 0, sizeof(m_settings)); +} + +/** +**************************************************************************************************** +* SiLib::~SiLib +* +* @brief +* Destructor +**************************************************************************************************** +*/ +SiLib::~SiLib() +{ +} + +/** +**************************************************************************************************** +* SiLib::HwlGetPipes +* +* @brief +* Get number pipes +* @return +* num pipes +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlGetPipes( + const ADDR_TILEINFO* pTileInfo ///< [in] Tile info + ) const +{ + UINT_32 numPipes; + + if (pTileInfo) + { + numPipes = GetPipePerSurf(pTileInfo->pipeConfig); + } + else + { + ADDR_ASSERT_ALWAYS(); + numPipes = m_pipes; // Suppose we should still have a global pipes + } + + return numPipes; +} + +/** +**************************************************************************************************** +* SiLib::GetPipePerSurf +* @brief +* get pipe num base on inputing tileinfo->pipeconfig +* @return +* pipe number +**************************************************************************************************** +*/ +UINT_32 SiLib::GetPipePerSurf( + AddrPipeCfg pipeConfig ///< [in] pipe config + ) const +{ + UINT_32 numPipes = 0; + + switch (pipeConfig) + { + case ADDR_PIPECFG_P2: + numPipes = 2; + break; + case ADDR_PIPECFG_P4_8x16: + case ADDR_PIPECFG_P4_16x16: + case ADDR_PIPECFG_P4_16x32: + case ADDR_PIPECFG_P4_32x32: + numPipes = 4; + break; + case ADDR_PIPECFG_P8_16x16_8x16: + case ADDR_PIPECFG_P8_16x32_8x16: + case ADDR_PIPECFG_P8_32x32_8x16: + case ADDR_PIPECFG_P8_16x32_16x16: + case ADDR_PIPECFG_P8_32x32_16x16: + case ADDR_PIPECFG_P8_32x32_16x32: + case ADDR_PIPECFG_P8_32x64_32x32: + numPipes = 8; + break; + case ADDR_PIPECFG_P16_32x32_8x16: + case ADDR_PIPECFG_P16_32x32_16x16: + numPipes = 16; + break; + default: + ADDR_ASSERT(!"Invalid pipe config"); + numPipes = m_pipes; + } + return numPipes; +} + +/** +**************************************************************************************************** +* SiLib::ComputeBankEquation +* +* @brief +* Compute bank equation +* +* @return +* If equation can be computed +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiLib::ComputeBankEquation( + UINT_32 log2BytesPP, ///< [in] log2 of bytes per pixel + UINT_32 threshX, ///< [in] threshold for x channel + UINT_32 threshY, ///< [in] threshold for y channel + ADDR_TILEINFO* pTileInfo, ///< [in] tile info + ADDR_EQUATION* pEquation ///< [out] bank equation + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + UINT_32 pipes = HwlGetPipes(pTileInfo); + UINT_32 bankXStart = 3 + Log2(pipes) + Log2(pTileInfo->bankWidth); + UINT_32 bankYStart = 3 + Log2(pTileInfo->bankHeight); + + ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, log2BytesPP + bankXStart); + ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, log2BytesPP + bankXStart + 1); + ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, log2BytesPP + bankXStart + 2); + ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, log2BytesPP + bankXStart + 3); + ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, bankYStart); + ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, bankYStart + 1); + ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, bankYStart + 2); + ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, bankYStart + 3); + + x3.value = (threshX > bankXStart) ? x3.value : 0; + x4.value = (threshX > bankXStart + 1) ? x4.value : 0; + x5.value = (threshX > bankXStart + 2) ? x5.value : 0; + x6.value = (threshX > bankXStart + 3) ? x6.value : 0; + y3.value = (threshY > bankYStart) ? y3.value : 0; + y4.value = (threshY > bankYStart + 1) ? y4.value : 0; + y5.value = (threshY > bankYStart + 2) ? y5.value : 0; + y6.value = (threshY > bankYStart + 3) ? y6.value : 0; + + switch (pTileInfo->banks) + { + case 16: + if (pTileInfo->macroAspectRatio == 1) + { + pEquation->addr[0] = y6; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y5; + pEquation->xor1[1] = y6; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y4; + pEquation->xor1[2] = x5; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + } + else if (pTileInfo->macroAspectRatio == 2) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y6; + pEquation->addr[1] = y5; + pEquation->xor1[1] = y6; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y4; + pEquation->xor1[2] = x5; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + } + else if (pTileInfo->macroAspectRatio == 4) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y6; + pEquation->addr[1] = x4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = y6; + pEquation->addr[2] = y4; + pEquation->xor1[2] = x5; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + } + else if (pTileInfo->macroAspectRatio == 8) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y6; + pEquation->addr[1] = x4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = y6; + pEquation->addr[2] = x5; + pEquation->xor1[2] = y4; + pEquation->addr[3] = y3; + pEquation->xor1[3] = x6; + } + else + { + ADDR_ASSERT_ALWAYS(); + } + pEquation->numBits = 4; + break; + case 8: + if (pTileInfo->macroAspectRatio == 1) + { + pEquation->addr[0] = y5; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y3; + pEquation->xor1[2] = x5; + } + else if (pTileInfo->macroAspectRatio == 2) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y5; + pEquation->addr[1] = y4; + pEquation->xor1[1] = y5; + pEquation->xor2[1] = x4; + pEquation->addr[2] = y3; + pEquation->xor1[2] = x5; + } + else if (pTileInfo->macroAspectRatio == 4) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y5; + pEquation->addr[1] = x4; + pEquation->xor1[1] = y4; + pEquation->xor2[1] = y5; + pEquation->addr[2] = y3; + pEquation->xor1[2] = x5; + } + else + { + ADDR_ASSERT_ALWAYS(); + } + pEquation->numBits = 3; + break; + case 4: + if (pTileInfo->macroAspectRatio == 1) + { + pEquation->addr[0] = y4; + pEquation->xor1[0] = x3; + pEquation->addr[1] = y3; + pEquation->xor1[1] = x4; + } + else if (pTileInfo->macroAspectRatio == 2) + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y4; + pEquation->addr[1] = y3; + pEquation->xor1[1] = x4; + } + else + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y4; + pEquation->addr[1] = x4; + pEquation->xor1[1] = y3; + } + pEquation->numBits = 2; + break; + case 2: + if (pTileInfo->macroAspectRatio == 1) + { + pEquation->addr[0] = y3; + pEquation->xor1[0] = x3; + } + else + { + pEquation->addr[0] = x3; + pEquation->xor1[0] = y3; + } + pEquation->numBits = 1; + break; + default: + pEquation->numBits = 0; + retCode = ADDR_NOTSUPPORTED; + ADDR_ASSERT_ALWAYS(); + break; + } + + for (UINT_32 i = 0; i < pEquation->numBits; i++) + { + if (pEquation->addr[i].value == 0) + { + if (pEquation->xor1[i].value == 0) + { + // 00X -> X00 + pEquation->addr[i].value = pEquation->xor2[i].value; + pEquation->xor2[i].value = 0; + } + else + { + pEquation->addr[i].value = pEquation->xor1[i].value; + + if (pEquation->xor2[i].value != 0) + { + // 0XY -> XY0 + pEquation->xor1[i].value = pEquation->xor2[i].value; + pEquation->xor2[i].value = 0; + } + else + { + // 0X0 -> X00 + pEquation->xor1[i].value = 0; + } + } + } + else if (pEquation->xor1[i].value == 0) + { + if (pEquation->xor2[i].value != 0) + { + // X0Y -> XY0 + pEquation->xor1[i].value = pEquation->xor2[i].value; + pEquation->xor2[i].value = 0; + } + } + } + + if ((pTileInfo->bankWidth == 1) && + ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) || + (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32))) + { + retCode = ADDR_NOTSUPPORTED; + } + + return retCode; +} + +/** +**************************************************************************************************** +* SiLib::ComputePipeEquation +* +* @brief +* Compute pipe equation +* +* @return +* If equation can be computed +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiLib::ComputePipeEquation( + UINT_32 log2BytesPP, ///< [in] Log2 of bytes per pixel + UINT_32 threshX, ///< [in] Threshold for X channel + UINT_32 threshY, ///< [in] Threshold for Y channel + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + ADDR_EQUATION* pEquation ///< [out] Pipe configure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + ADDR_CHANNEL_SETTING* pAddr = pEquation->addr; + ADDR_CHANNEL_SETTING* pXor1 = pEquation->xor1; + ADDR_CHANNEL_SETTING* pXor2 = pEquation->xor2; + + ADDR_CHANNEL_SETTING x3 = InitChannel(1, 0, 3 + log2BytesPP); + ADDR_CHANNEL_SETTING x4 = InitChannel(1, 0, 4 + log2BytesPP); + ADDR_CHANNEL_SETTING x5 = InitChannel(1, 0, 5 + log2BytesPP); + ADDR_CHANNEL_SETTING x6 = InitChannel(1, 0, 6 + log2BytesPP); + ADDR_CHANNEL_SETTING y3 = InitChannel(1, 1, 3); + ADDR_CHANNEL_SETTING y4 = InitChannel(1, 1, 4); + ADDR_CHANNEL_SETTING y5 = InitChannel(1, 1, 5); + ADDR_CHANNEL_SETTING y6 = InitChannel(1, 1, 6); + + x3.value = (threshX > 3) ? x3.value : 0; + x4.value = (threshX > 4) ? x4.value : 0; + x5.value = (threshX > 5) ? x5.value : 0; + x6.value = (threshX > 6) ? x6.value : 0; + y3.value = (threshY > 3) ? y3.value : 0; + y4.value = (threshY > 4) ? y4.value : 0; + y5.value = (threshY > 5) ? y5.value : 0; + y6.value = (threshY > 6) ? y6.value : 0; + + switch (pTileInfo->pipeConfig) + { + case ADDR_PIPECFG_P2: + pAddr[0] = x3; + pXor1[0] = y3; + pEquation->numBits = 1; + break; + case ADDR_PIPECFG_P4_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pAddr[1] = x3; + pXor1[1] = y4; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P4_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y4; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P4_16x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y5; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P4_32x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x5; + pXor1[1] = y5; + pEquation->numBits = 2; + break; + case ADDR_PIPECFG_P8_16x16_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x3; + pXor1[1] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_16x32_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x3; + pXor1[1] = y4; + pAddr[2] = x4; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_16x32_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x5; + pXor1[1] = y4; + pAddr[2] = x4; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x32_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x3; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x32_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x32_16x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y6; + pAddr[2] = x5; + pXor1[2] = y5; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P8_32x64_32x32: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x5; + pAddr[1] = x6; + pXor1[1] = y5; + pAddr[2] = x5; + pXor1[2] = y6; + pEquation->numBits = 3; + break; + case ADDR_PIPECFG_P16_32x32_8x16: + pAddr[0] = x4; + pXor1[0] = y3; + pAddr[1] = x3; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y6; + pAddr[3] = x6; + pXor1[3] = y5; + pEquation->numBits = 4; + break; + case ADDR_PIPECFG_P16_32x32_16x16: + pAddr[0] = x3; + pXor1[0] = y3; + pXor2[0] = x4; + pAddr[1] = x4; + pXor1[1] = y4; + pAddr[2] = x5; + pXor1[2] = y6; + pAddr[3] = x6; + pXor1[3] = y5; + pEquation->numBits = 4; + break; + default: + ADDR_UNHANDLED_CASE(); + pEquation->numBits = 0; + retCode = ADDR_NOTSUPPORTED; + break; + } + + if (m_settings.isVegaM && (pEquation->numBits == 4)) + { + ADDR_CHANNEL_SETTING addeMsb = pAddr[0]; + ADDR_CHANNEL_SETTING xor1Msb = pXor1[0]; + ADDR_CHANNEL_SETTING xor2Msb = pXor2[0]; + + pAddr[0] = pAddr[1]; + pXor1[0] = pXor1[1]; + pXor2[0] = pXor2[1]; + + pAddr[1] = pAddr[2]; + pXor1[1] = pXor1[2]; + pXor2[1] = pXor2[2]; + + pAddr[2] = pAddr[3]; + pXor1[2] = pXor1[3]; + pXor2[2] = pXor2[3]; + + pAddr[3] = addeMsb; + pXor1[3] = xor1Msb; + pXor2[3] = xor2Msb; + } + + for (UINT_32 i = 0; i < pEquation->numBits; i++) + { + if (pAddr[i].value == 0) + { + if (pXor1[i].value == 0) + { + pAddr[i].value = pXor2[i].value; + } + else + { + pAddr[i].value = pXor1[i].value; + pXor1[i].value = 0; + } + } + } + + return retCode; +} + +/** +**************************************************************************************************** +* SiLib::ComputePipeFromCoord +* +* @brief +* Compute pipe number from coordinates +* @return +* Pipe number +**************************************************************************************************** +*/ +UINT_32 SiLib::ComputePipeFromCoord( + UINT_32 x, ///< [in] x coordinate + UINT_32 y, ///< [in] y coordinate + UINT_32 slice, ///< [in] slice index + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 pipeSwizzle, ///< [in] pipe swizzle + BOOL_32 ignoreSE, ///< [in] TRUE if shader engines are ignored + ADDR_TILEINFO* pTileInfo ///< [in] Tile info + ) const +{ + UINT_32 pipe; + UINT_32 pipeBit0 = 0; + UINT_32 pipeBit1 = 0; + UINT_32 pipeBit2 = 0; + UINT_32 pipeBit3 = 0; + UINT_32 sliceRotation; + UINT_32 numPipes = 0; + + UINT_32 tx = x / MicroTileWidth; + UINT_32 ty = y / MicroTileHeight; + UINT_32 x3 = _BIT(tx,0); + UINT_32 x4 = _BIT(tx,1); + UINT_32 x5 = _BIT(tx,2); + UINT_32 x6 = _BIT(tx,3); + UINT_32 y3 = _BIT(ty,0); + UINT_32 y4 = _BIT(ty,1); + UINT_32 y5 = _BIT(ty,2); + UINT_32 y6 = _BIT(ty,3); + + switch (pTileInfo->pipeConfig) + { + case ADDR_PIPECFG_P2: + pipeBit0 = x3 ^ y3; + numPipes = 2; + break; + case ADDR_PIPECFG_P4_8x16: + pipeBit0 = x4 ^ y3; + pipeBit1 = x3 ^ y4; + numPipes = 4; + break; + case ADDR_PIPECFG_P4_16x16: + pipeBit0 = x3 ^ y3 ^ x4; + pipeBit1 = x4 ^ y4; + numPipes = 4; + break; + case ADDR_PIPECFG_P4_16x32: + pipeBit0 = x3 ^ y3 ^ x4; + pipeBit1 = x4 ^ y5; + numPipes = 4; + break; + case ADDR_PIPECFG_P4_32x32: + pipeBit0 = x3 ^ y3 ^ x5; + pipeBit1 = x5 ^ y5; + numPipes = 4; + break; + case ADDR_PIPECFG_P8_16x16_8x16: + pipeBit0 = x4 ^ y3 ^ x5; + pipeBit1 = x3 ^ y5; + numPipes = 8; + break; + case ADDR_PIPECFG_P8_16x32_8x16: + pipeBit0 = x4 ^ y3 ^ x5; + pipeBit1 = x3 ^ y4; + pipeBit2 = x4 ^ y5; + numPipes = 8; + break; + case ADDR_PIPECFG_P8_16x32_16x16: + pipeBit0 = x3 ^ y3 ^ x4; + pipeBit1 = x5 ^ y4; + pipeBit2 = x4 ^ y5; + numPipes = 8; + break; + case ADDR_PIPECFG_P8_32x32_8x16: + pipeBit0 = x4 ^ y3 ^ x5; + pipeBit1 = x3 ^ y4; + pipeBit2 = x5 ^ y5; + numPipes = 8; + break; + case ADDR_PIPECFG_P8_32x32_16x16: + pipeBit0 = x3 ^ y3 ^ x4; + pipeBit1 = x4 ^ y4; + pipeBit2 = x5 ^ y5; + numPipes = 8; + break; + case ADDR_PIPECFG_P8_32x32_16x32: + pipeBit0 = x3 ^ y3 ^ x4; + pipeBit1 = x4 ^ y6; + pipeBit2 = x5 ^ y5; + numPipes = 8; + break; + case ADDR_PIPECFG_P8_32x64_32x32: + pipeBit0 = x3 ^ y3 ^ x5; + pipeBit1 = x6 ^ y5; + pipeBit2 = x5 ^ y6; + numPipes = 8; + break; + case ADDR_PIPECFG_P16_32x32_8x16: + pipeBit0 = x4 ^ y3; + pipeBit1 = x3 ^ y4; + pipeBit2 = x5 ^ y6; + pipeBit3 = x6 ^ y5; + numPipes = 16; + break; + case ADDR_PIPECFG_P16_32x32_16x16: + pipeBit0 = x3 ^ y3 ^ x4; + pipeBit1 = x4 ^ y4; + pipeBit2 = x5 ^ y6; + pipeBit3 = x6 ^ y5; + numPipes = 16; + break; + default: + ADDR_UNHANDLED_CASE(); + break; + } + + if (m_settings.isVegaM && (numPipes == 16)) + { + UINT_32 pipeMsb = pipeBit0; + pipeBit0 = pipeBit1; + pipeBit1 = pipeBit2; + pipeBit2 = pipeBit3; + pipeBit3 = pipeMsb; + } + + pipe = pipeBit0 | (pipeBit1 << 1) | (pipeBit2 << 2) | (pipeBit3 << 3); + + UINT_32 microTileThickness = Thickness(tileMode); + + // + // Apply pipe rotation for the slice. + // + switch (tileMode) + { + case ADDR_TM_3D_TILED_THIN1: //fall through thin + case ADDR_TM_3D_TILED_THICK: //fall through thick + case ADDR_TM_3D_TILED_XTHICK: + sliceRotation = + Max(1, static_cast(numPipes / 2) - 1) * (slice / microTileThickness); + break; + default: + sliceRotation = 0; + break; + } + pipeSwizzle += sliceRotation; + pipeSwizzle &= (numPipes - 1); + + pipe = pipe ^ pipeSwizzle; + + return pipe; +} + +/** +**************************************************************************************************** +* SiLib::ComputeTileCoordFromPipeAndElemIdx +* +* @brief +* Compute (x,y) of a tile within a macro tile from address +* @return +* Pipe number +**************************************************************************************************** +*/ +VOID SiLib::ComputeTileCoordFromPipeAndElemIdx( + UINT_32 elemIdx, ///< [in] per pipe element index within a macro tile + UINT_32 pipe, ///< [in] pipe index + AddrPipeCfg pipeCfg, ///< [in] pipe config + UINT_32 pitchInMacroTile, ///< [in] surface pitch in macro tile + UINT_32 x, ///< [in] x coordinate of the (0,0) tile in a macro tile + UINT_32 y, ///< [in] y coordinate of the (0,0) tile in a macro tile + UINT_32* pX, ///< [out] x coordinate + UINT_32* pY ///< [out] y coordinate + ) const +{ + UINT_32 pipebit0 = _BIT(pipe,0); + UINT_32 pipebit1 = _BIT(pipe,1); + UINT_32 pipebit2 = _BIT(pipe,2); + UINT_32 pipebit3 = _BIT(pipe,3); + UINT_32 elemIdx0 = _BIT(elemIdx,0); + UINT_32 elemIdx1 = _BIT(elemIdx,1); + UINT_32 elemIdx2 = _BIT(elemIdx,2); + UINT_32 x3 = 0; + UINT_32 x4 = 0; + UINT_32 x5 = 0; + UINT_32 x6 = 0; + UINT_32 y3 = 0; + UINT_32 y4 = 0; + UINT_32 y5 = 0; + UINT_32 y6 = 0; + + switch(pipeCfg) + { + case ADDR_PIPECFG_P2: + x4 = elemIdx2; + y4 = elemIdx1 ^ x4; + y3 = elemIdx0 ^ x4; + x3 = pipebit0 ^ y3; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + break; + case ADDR_PIPECFG_P4_8x16: + x4 = elemIdx1; + y4 = elemIdx0 ^ x4; + x3 = pipebit1 ^ y4; + y3 = pipebit0 ^ x4; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + break; + case ADDR_PIPECFG_P4_16x16: + x4 = elemIdx1; + y3 = elemIdx0 ^ x4; + y4 = pipebit1 ^ x4; + x3 = pipebit0 ^ y3 ^ x4; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + break; + case ADDR_PIPECFG_P4_16x32: + x3 = elemIdx0 ^ pipebit0; + y5 = _BIT(y,5); + x4 = pipebit1 ^ y5; + y3 = pipebit0 ^ x3 ^ x4; + y4 = elemIdx1 ^ x4; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + break; + case ADDR_PIPECFG_P4_32x32: + x4 = elemIdx2; + y3 = elemIdx0 ^ x4; + y4 = elemIdx1 ^ x4; + if((pitchInMacroTile % 2) == 0) + { //even + y5 = _BIT(y,5); + x5 = pipebit1 ^ y5; + x3 = pipebit0 ^ y3 ^ x5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(3, x5, x4, x3); + } + else + { //odd + x5 = _BIT(x,5); + x3 = pipebit0 ^ y3 ^ x5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + } + break; + case ADDR_PIPECFG_P8_16x16_8x16: + x4 = elemIdx0; + y5 = _BIT(y,5); + x5 = _BIT(x,5); + x3 = pipebit1 ^ y5; + y4 = pipebit2 ^ x4; + y3 = pipebit0 ^ x5 ^ x4; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + break; + case ADDR_PIPECFG_P8_16x32_8x16: + x3 = elemIdx0; + y4 = pipebit1 ^ x3; + y5 = _BIT(y,5); + x5 = _BIT(x,5); + x4 = pipebit2 ^ y5; + y3 = pipebit0 ^ x4 ^ x5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + break; + case ADDR_PIPECFG_P8_32x32_8x16: + x4 = elemIdx1; + y4 = elemIdx0 ^ x4; + x3 = pipebit1 ^ y4; + if((pitchInMacroTile % 2) == 0) + { //even + y5 = _BIT(y,5); + x5 = _BIT(x,5); + x5 = pipebit2 ^ y5; + y3 = pipebit0 ^ x4 ^ x5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(3, x5, x4, x3); + } + else + { //odd + x5 = _BIT(x,5); + y3 = pipebit0 ^ x4 ^ x5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + } + break; + case ADDR_PIPECFG_P8_16x32_16x16: + x3 = elemIdx0; + x5 = _BIT(x,5); + y5 = _BIT(y,5); + x4 = pipebit2 ^ y5; + y4 = pipebit1 ^ x5; + y3 = pipebit0 ^ x3 ^ x4; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + break; + case ADDR_PIPECFG_P8_32x32_16x16: + x4 = elemIdx1; + y3 = elemIdx0 ^ x4; + x3 = y3^x4^pipebit0; + y4 = pipebit1 ^ x4; + if((pitchInMacroTile % 2) == 0) + { //even + y5 = _BIT(y,5); + x5 = pipebit2 ^ y5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(3, x5, x4, x3); + } + else + { //odd + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + } + break; + case ADDR_PIPECFG_P8_32x32_16x32: + if((pitchInMacroTile % 2) == 0) + { //even + y5 = _BIT(y,5); + y6 = _BIT(y,6); + x4 = pipebit1 ^ y6; + y3 = elemIdx0 ^ x4; + y4 = elemIdx1 ^ x4; + x3 = pipebit0 ^ y3 ^ x4; + x5 = pipebit2 ^ y5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(3, x5, x4, x3); + } + else + { //odd + y6 = _BIT(y,6); + x4 = pipebit1 ^ y6; + y3 = elemIdx0 ^ x4; + y4 = elemIdx1 ^ x4; + x3 = pipebit0 ^ y3 ^ x4; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(2, x4, x3); + } + break; + case ADDR_PIPECFG_P8_32x64_32x32: + x4 = elemIdx2; + y3 = elemIdx0 ^ x4; + y4 = elemIdx1 ^ x4; + if((pitchInMacroTile % 4) == 0) + { //multiple of 4 + y5 = _BIT(y,5); + y6 = _BIT(y,6); + x5 = pipebit2 ^ y6; + x6 = pipebit1 ^ y5; + x3 = pipebit0 ^ y3 ^ x5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(4, x6, x5, x4, x3); + } + else + { + y6 = _BIT(y,6); + x5 = pipebit2 ^ y6; + x3 = pipebit0 ^ y3 ^ x5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(3, x5, x4, x3); + } + break; + case ADDR_PIPECFG_P16_32x32_8x16: + x4 = elemIdx1; + y4 = elemIdx0 ^ x4; + y3 = pipebit0 ^ x4; + x3 = pipebit1 ^ y4; + if((pitchInMacroTile % 4) == 0) + { //multiple of 4 + y5 = _BIT(y,5); + y6 = _BIT(y,6); + x5 = pipebit2 ^ y6; + x6 = pipebit3 ^ y5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(4, x6, x5,x4, x3); + } + else + { + y6 = _BIT(y,6); + x5 = pipebit2 ^ y6; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(3, x5, x4, x3); + } + break; + case ADDR_PIPECFG_P16_32x32_16x16: + x4 = elemIdx1; + y3 = elemIdx0 ^ x4; + y4 = pipebit1 ^ x4; + x3 = pipebit0 ^ y3 ^ x4; + if((pitchInMacroTile % 4) == 0) + { //multiple of 4 + y5 = _BIT(y,5); + y6 = _BIT(y,6); + x5 = pipebit2 ^ y6; + x6 = pipebit3 ^ y5; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(4, x6, x5, x4, x3); + } + else + { + y6 = _BIT(y,6); + x5 = pipebit2 ^ y6; + *pY = Bits2Number(2, y4, y3); + *pX = Bits2Number(3, x5, x4, x3); + } + break; + default: + ADDR_UNHANDLED_CASE(); + } +} + +/** +**************************************************************************************************** +* SiLib::TileCoordToMaskElementIndex +* +* @brief +* Compute element index from coordinates in tiles +* @return +* Element index +**************************************************************************************************** +*/ +UINT_32 SiLib::TileCoordToMaskElementIndex( + UINT_32 tx, ///< [in] x coord, in Tiles + UINT_32 ty, ///< [in] y coord, in Tiles + AddrPipeCfg pipeConfig, ///< [in] pipe config + UINT_32* macroShift, ///< [out] macro shift + UINT_32* elemIdxBits ///< [out] tile offset bits + ) const +{ + UINT_32 elemIdx = 0; + UINT_32 elemIdx0, elemIdx1, elemIdx2; + UINT_32 tx0, tx1; + UINT_32 ty0, ty1; + + tx0 = _BIT(tx,0); + tx1 = _BIT(tx,1); + ty0 = _BIT(ty,0); + ty1 = _BIT(ty,1); + + switch(pipeConfig) + { + case ADDR_PIPECFG_P2: + *macroShift = 3; + *elemIdxBits =3; + elemIdx2 = tx1; + elemIdx1 = tx1 ^ ty1; + elemIdx0 = tx1 ^ ty0; + elemIdx = Bits2Number(3,elemIdx2,elemIdx1,elemIdx0); + break; + case ADDR_PIPECFG_P4_8x16: + *macroShift = 2; + *elemIdxBits =2; + elemIdx1 = tx1; + elemIdx0 = tx1 ^ ty1; + elemIdx = Bits2Number(2,elemIdx1,elemIdx0); + break; + case ADDR_PIPECFG_P4_16x16: + *macroShift = 2; + *elemIdxBits =2; + elemIdx0 = tx1^ty0; + elemIdx1 = tx1; + elemIdx = Bits2Number(2, elemIdx1, elemIdx0); + break; + case ADDR_PIPECFG_P4_16x32: + *macroShift = 2; + *elemIdxBits =2; + elemIdx0 = tx1^ty0; + elemIdx1 = tx1^ty1; + elemIdx = Bits2Number(2, elemIdx1, elemIdx0); + break; + case ADDR_PIPECFG_P4_32x32: + *macroShift = 2; + *elemIdxBits =3; + elemIdx0 = tx1^ty0; + elemIdx1 = tx1^ty1; + elemIdx2 = tx1; + elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0); + break; + case ADDR_PIPECFG_P8_16x16_8x16: + *macroShift = 1; + *elemIdxBits =1; + elemIdx0 = tx1; + elemIdx = elemIdx0; + break; + case ADDR_PIPECFG_P8_16x32_8x16: + *macroShift = 1; + *elemIdxBits =1; + elemIdx0 = tx0; + elemIdx = elemIdx0; + break; + case ADDR_PIPECFG_P8_32x32_8x16: + *macroShift = 1; + *elemIdxBits =2; + elemIdx1 = tx1; + elemIdx0 = tx1^ty1; + elemIdx = Bits2Number(2, elemIdx1, elemIdx0); + break; + case ADDR_PIPECFG_P8_16x32_16x16: + *macroShift = 1; + *elemIdxBits =1; + elemIdx0 = tx0; + elemIdx = elemIdx0; + break; + case ADDR_PIPECFG_P8_32x32_16x16: + *macroShift = 1; + *elemIdxBits =2; + elemIdx0 = tx1^ty0; + elemIdx1 = tx1; + elemIdx = Bits2Number(2, elemIdx1, elemIdx0); + break; + case ADDR_PIPECFG_P8_32x32_16x32: + *macroShift = 1; + *elemIdxBits =2; + elemIdx0 = tx1^ty0; + elemIdx1 = tx1^ty1; + elemIdx = Bits2Number(2, elemIdx1, elemIdx0); + break; + case ADDR_PIPECFG_P8_32x64_32x32: + *macroShift = 1; + *elemIdxBits =3; + elemIdx0 = tx1^ty0; + elemIdx1 = tx1^ty1; + elemIdx2 = tx1; + elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0); + break; + case ADDR_PIPECFG_P16_32x32_8x16: + *macroShift = 0; + *elemIdxBits =2; + elemIdx0 = tx1^ty1; + elemIdx1 = tx1; + elemIdx = Bits2Number(2, elemIdx1, elemIdx0); + break; + case ADDR_PIPECFG_P16_32x32_16x16: + *macroShift = 0; + *elemIdxBits =2; + elemIdx0 = tx1^ty0; + elemIdx1 = tx1; + elemIdx = Bits2Number(2, elemIdx1, elemIdx0); + break; + default: + ADDR_UNHANDLED_CASE(); + break; + } + + return elemIdx; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeTileDataWidthAndHeightLinear +* +* @brief +* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout +* +* @return +* N/A +* +* @note +* MacroWidth and macroHeight are measured in pixels +**************************************************************************************************** +*/ +VOID SiLib::HwlComputeTileDataWidthAndHeightLinear( + UINT_32* pMacroWidth, ///< [out] macro tile width + UINT_32* pMacroHeight, ///< [out] macro tile height + UINT_32 bpp, ///< [in] bits per pixel + ADDR_TILEINFO* pTileInfo ///< [in] tile info + ) const +{ + ADDR_ASSERT(pTileInfo != NULL); + UINT_32 macroWidth; + UINT_32 macroHeight; + + /// In linear mode, the htile or cmask buffer must be padded out to 4 tiles + /// but for P8_32x64_32x32, it must be padded out to 8 tiles + /// Actually there are more pipe configs which need 8-tile padding but SI family + /// has a bug which is fixed in CI family + if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32) || + (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) || + (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x32_16x16)) + { + macroWidth = 8*MicroTileWidth; + macroHeight = 8*MicroTileHeight; + } + else + { + macroWidth = 4*MicroTileWidth; + macroHeight = 4*MicroTileHeight; + } + + *pMacroWidth = macroWidth; + *pMacroHeight = macroHeight; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeHtileBytes +* +* @brief +* Compute htile size in bytes +* +* @return +* Htile size in bytes +**************************************************************************************************** +*/ +UINT_64 SiLib::HwlComputeHtileBytes( + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 bpp, ///< [in] bits per pixel + BOOL_32 isLinear, ///< [in] if it is linear mode + UINT_32 numSlices, ///< [in] number of slices + UINT_64* pSliceBytes, ///< [out] bytes per slice + UINT_32 baseAlign ///< [in] base alignments + ) const +{ + return ComputeHtileBytes(pitch, height, bpp, isLinear, numSlices, pSliceBytes, baseAlign); +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeXmaskAddrFromCoord +* +* @brief +* Compute address from coordinates for htile/cmask +* @return +* Byte address +**************************************************************************************************** +*/ +UINT_64 SiLib::HwlComputeXmaskAddrFromCoord( + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 x, ///< [in] x coord + UINT_32 y, ///< [in] y coord + UINT_32 slice, ///< [in] slice/depth index + UINT_32 numSlices, ///< [in] number of slices + UINT_32 factor, ///< [in] factor that indicates cmask(2) or htile(1) + BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout + BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pBitPosition ///< [out] bit position inside a byte + ) const +{ + UINT_32 tx = x / MicroTileWidth; + UINT_32 ty = y / MicroTileHeight; + UINT_32 newPitch; + UINT_32 newHeight; + UINT_64 totalBytes; + UINT_32 macroWidth; + UINT_32 macroHeight; + UINT_64 pSliceBytes; + UINT_32 pBaseAlign; + UINT_32 tileNumPerPipe; + UINT_32 elemBits; + + if (factor == 2) //CMASK + { + ADDR_CMASK_FLAGS flags = {{0}}; + + tileNumPerPipe = 256; + + ComputeCmaskInfo(flags, + pitch, + height, + numSlices, + isLinear, + pTileInfo, + &newPitch, + &newHeight, + &totalBytes, + ¯oWidth, + ¯oHeight); + elemBits = CmaskElemBits; + } + else //HTile + { + ADDR_HTILE_FLAGS flags = {{0}}; + + tileNumPerPipe = 512; + + ComputeHtileInfo(flags, + pitch, + height, + numSlices, + isLinear, + TRUE, + TRUE, + pTileInfo, + &newPitch, + &newHeight, + &totalBytes, + ¯oWidth, + ¯oHeight, + &pSliceBytes, + &pBaseAlign); + elemBits = 32; + } + + const UINT_32 pitchInTile = newPitch / MicroTileWidth; + const UINT_32 heightInTile = newHeight / MicroTileWidth; + UINT_64 macroOffset; // Per pipe starting offset of the macro tile in which this tile lies. + UINT_64 microNumber; // Per pipe starting offset of the macro tile in which this tile lies. + UINT_32 microX; + UINT_32 microY; + UINT_64 microOffset; + UINT_32 microShift; + UINT_64 totalOffset; + UINT_32 elemIdxBits; + UINT_32 elemIdx = + TileCoordToMaskElementIndex(tx, ty, pTileInfo->pipeConfig, µShift, &elemIdxBits); + + UINT_32 numPipes = HwlGetPipes(pTileInfo); + + if (isLinear) + { //linear addressing + // Linear addressing is extremelly wasting memory if slice > 1, since each pipe has the full + // slice memory foot print instead of divided by numPipes. + microX = tx / 4; // Macro Tile is 4x4 + microY = ty / 4 ; + microNumber = static_cast(microX + microY * (pitchInTile / 4)) << microShift; + + UINT_32 sliceBits = pitchInTile * heightInTile; + + // do htile single slice alignment if the flag is true + if (m_configFlags.useHtileSliceAlign && (factor == 1)) //Htile + { + sliceBits = PowTwoAlign(sliceBits, BITS_TO_BYTES(HtileCacheBits) * numPipes / elemBits); + } + macroOffset = slice * (sliceBits / numPipes) * elemBits ; + } + else + { //tiled addressing + const UINT_32 macroWidthInTile = macroWidth / MicroTileWidth; // Now in unit of Tiles + const UINT_32 macroHeightInTile = macroHeight / MicroTileHeight; + const UINT_32 pitchInCL = pitchInTile / macroWidthInTile; + const UINT_32 heightInCL = heightInTile / macroHeightInTile; + + const UINT_32 macroX = x / macroWidth; + const UINT_32 macroY = y / macroHeight; + const UINT_32 macroNumber = macroX + macroY * pitchInCL + slice * pitchInCL * heightInCL; + + // Per pipe starting offset of the cache line in which this tile lies. + microX = (x % macroWidth) / MicroTileWidth / 4; // Macro Tile is 4x4 + microY = (y % macroHeight) / MicroTileHeight / 4 ; + microNumber = static_cast(microX + microY * (macroWidth / MicroTileWidth / 4)) << microShift; + + macroOffset = macroNumber * tileNumPerPipe * elemBits; + } + + if(elemIdxBits == microShift) + { + microNumber += elemIdx; + } + else + { + microNumber >>= elemIdxBits; + microNumber <<= elemIdxBits; + microNumber += elemIdx; + } + + microOffset = elemBits * microNumber; + totalOffset = microOffset + macroOffset; + + UINT_32 pipe = ComputePipeFromCoord(x, y, 0, ADDR_TM_2D_TILED_THIN1, 0, FALSE, pTileInfo); + UINT_64 addrInBits = totalOffset % (m_pipeInterleaveBytes * 8) + + pipe * (m_pipeInterleaveBytes * 8) + + totalOffset / (m_pipeInterleaveBytes * 8) * (m_pipeInterleaveBytes * 8) * numPipes; + *pBitPosition = static_cast(addrInBits) % 8; + UINT_64 addr = addrInBits / 8; + + return addr; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeXmaskCoordFromAddr +* +* @brief +* Compute the coord from an address of a cmask/htile +* +* @return +* N/A +* +* @note +* This method is reused by htile, so rename to Xmask +**************************************************************************************************** +*/ +VOID SiLib::HwlComputeXmaskCoordFromAddr( + UINT_64 addr, ///< [in] address + UINT_32 bitPosition, ///< [in] bitPosition in a byte + UINT_32 pitch, ///< [in] pitch + UINT_32 height, ///< [in] height + UINT_32 numSlices, ///< [in] number of slices + UINT_32 factor, ///< [in] factor that indicates cmask or htile + BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout + BOOL_32 isWidth8, ///< [in] Not used by SI + BOOL_32 isHeight8, ///< [in] Not used by SI + ADDR_TILEINFO* pTileInfo, ///< [in] Tile info + UINT_32* pX, ///< [out] x coord + UINT_32* pY, ///< [out] y coord + UINT_32* pSlice ///< [out] slice index + ) const +{ + UINT_32 newPitch; + UINT_32 newHeight; + UINT_64 totalBytes; + UINT_32 clWidth; + UINT_32 clHeight; + UINT_32 tileNumPerPipe; + UINT_64 sliceBytes; + + *pX = 0; + *pY = 0; + *pSlice = 0; + + if (factor == 2) //CMASK + { + ADDR_CMASK_FLAGS flags = {{0}}; + + tileNumPerPipe = 256; + + ComputeCmaskInfo(flags, + pitch, + height, + numSlices, + isLinear, + pTileInfo, + &newPitch, + &newHeight, + &totalBytes, + &clWidth, + &clHeight); + } + else //HTile + { + ADDR_HTILE_FLAGS flags = {{0}}; + + tileNumPerPipe = 512; + + ComputeHtileInfo(flags, + pitch, + height, + numSlices, + isLinear, + TRUE, + TRUE, + pTileInfo, + &newPitch, + &newHeight, + &totalBytes, + &clWidth, + &clHeight, + &sliceBytes); + } + + const UINT_32 pitchInTile = newPitch / MicroTileWidth; + const UINT_32 heightInTile = newHeight / MicroTileWidth; + const UINT_32 pitchInMacroTile = pitchInTile / 4; + UINT_32 macroShift; + UINT_32 elemIdxBits; + // get macroShift and elemIdxBits + TileCoordToMaskElementIndex(0, 0, pTileInfo->pipeConfig, ¯oShift, &elemIdxBits); + + const UINT_32 numPipes = HwlGetPipes(pTileInfo); + const UINT_32 pipe = (UINT_32)((addr / m_pipeInterleaveBytes) % numPipes); + // per pipe + UINT_64 localOffset = (addr % m_pipeInterleaveBytes) + + (addr / m_pipeInterleaveBytes / numPipes)* m_pipeInterleaveBytes; + + UINT_32 tileIndex; + if (factor == 2) //CMASK + { + tileIndex = (UINT_32)(localOffset * 2 + (bitPosition != 0)); + } + else + { + tileIndex = (UINT_32)(localOffset / 4); + } + + UINT_32 macroOffset; + if (isLinear) + { + UINT_32 sliceSizeInTile = pitchInTile * heightInTile; + + // do htile single slice alignment if the flag is true + if (m_configFlags.useHtileSliceAlign && (factor == 1)) //Htile + { + sliceSizeInTile = PowTwoAlign(sliceSizeInTile, static_cast(sliceBytes) / 64); + } + *pSlice = tileIndex / (sliceSizeInTile / numPipes); + macroOffset = tileIndex % (sliceSizeInTile / numPipes); + } + else + { + const UINT_32 clWidthInTile = clWidth / MicroTileWidth; // Now in unit of Tiles + const UINT_32 clHeightInTile = clHeight / MicroTileHeight; + const UINT_32 pitchInCL = pitchInTile / clWidthInTile; + const UINT_32 heightInCL = heightInTile / clHeightInTile; + const UINT_32 clIndex = tileIndex / tileNumPerPipe; + + UINT_32 clX = clIndex % pitchInCL; + UINT_32 clY = (clIndex % (heightInCL * pitchInCL)) / pitchInCL; + + *pX = clX * clWidthInTile * MicroTileWidth; + *pY = clY * clHeightInTile * MicroTileHeight; + *pSlice = clIndex / (heightInCL * pitchInCL); + + macroOffset = tileIndex % tileNumPerPipe; + } + + UINT_32 elemIdx = macroOffset & 7; + macroOffset >>= elemIdxBits; + + if (elemIdxBits != macroShift) + { + macroOffset <<= (elemIdxBits - macroShift); + + UINT_32 pipebit1 = _BIT(pipe,1); + UINT_32 pipebit2 = _BIT(pipe,2); + UINT_32 pipebit3 = _BIT(pipe,3); + if (pitchInMacroTile % 2) + { //odd + switch (pTileInfo->pipeConfig) + { + case ADDR_PIPECFG_P4_32x32: + macroOffset |= pipebit1; + break; + case ADDR_PIPECFG_P8_32x32_8x16: + case ADDR_PIPECFG_P8_32x32_16x16: + case ADDR_PIPECFG_P8_32x32_16x32: + macroOffset |= pipebit2; + break; + default: + break; + } + + } + + if (pitchInMacroTile % 4) + { + if (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32) + { + macroOffset |= (pipebit1<<1); + } + if((pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) || + (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_16x16)) + { + macroOffset |= (pipebit3<<1); + } + } + } + + UINT_32 macroX; + UINT_32 macroY; + + if (isLinear) + { + macroX = macroOffset % pitchInMacroTile; + macroY = macroOffset / pitchInMacroTile; + } + else + { + const UINT_32 clWidthInMacroTile = clWidth / (MicroTileWidth * 4); + macroX = macroOffset % clWidthInMacroTile; + macroY = macroOffset / clWidthInMacroTile; + } + + *pX += macroX * 4 * MicroTileWidth; + *pY += macroY * 4 * MicroTileHeight; + + UINT_32 microX; + UINT_32 microY; + ComputeTileCoordFromPipeAndElemIdx(elemIdx, pipe, pTileInfo->pipeConfig, pitchInMacroTile, + *pX, *pY, µX, µY); + + *pX += microX * MicroTileWidth; + *pY += microY * MicroTileWidth; +} + +/** +**************************************************************************************************** +* SiLib::HwlGetPitchAlignmentLinear +* @brief +* Get pitch alignment +* @return +* pitch alignment +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlGetPitchAlignmentLinear( + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags ///< [in] surface flags + ) const +{ + UINT_32 pitchAlign; + + // Interleaved access requires a 256B aligned pitch, so fall back to pre-SI alignment + if (flags.interleaved) + { + pitchAlign = Max(64u, m_pipeInterleaveBytes / BITS_TO_BYTES(bpp)); + + } + else + { + pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp)); + } + + return pitchAlign; +} + +/** +**************************************************************************************************** +* SiLib::HwlGetSizeAdjustmentLinear +* +* @brief +* Adjust linear surface pitch and slice size +* +* @return +* Logical slice size in bytes +**************************************************************************************************** +*/ +UINT_64 SiLib::HwlGetSizeAdjustmentLinear( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + UINT_32 numSamples, ///< [in] number of samples + UINT_32 baseAlign, ///< [in] base alignment + UINT_32 pitchAlign, ///< [in] pitch alignment + UINT_32* pPitch, ///< [in,out] pointer to pitch + UINT_32* pHeight, ///< [in,out] pointer to height + UINT_32* pHeightAlign ///< [in,out] pointer to height align + ) const +{ + UINT_64 sliceSize; + if (tileMode == ADDR_TM_LINEAR_GENERAL) + { + sliceSize = BITS_TO_BYTES(static_cast(*pPitch) * (*pHeight) * bpp * numSamples); + } + else + { + UINT_32 pitch = *pPitch; + UINT_32 height = *pHeight; + + UINT_32 pixelsPerPipeInterleave = m_pipeInterleaveBytes / BITS_TO_BYTES(bpp); + UINT_32 sliceAlignInPixel = pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave; + + // numSamples should be 1 in real cases (no MSAA for linear but TGL may pass non 1 value) + UINT_64 pixelPerSlice = static_cast(pitch) * height * numSamples; + + while (pixelPerSlice % sliceAlignInPixel) + { + pitch += pitchAlign; + pixelPerSlice = static_cast(pitch) * height * numSamples; + } + + *pPitch = pitch; + + UINT_32 heightAlign = 1; + + while ((pitch * heightAlign) % sliceAlignInPixel) + { + heightAlign++; + } + + *pHeightAlign = heightAlign; + + sliceSize = BITS_TO_BYTES(pixelPerSlice * bpp); + } + + return sliceSize; +} + +/** +**************************************************************************************************** +* SiLib::HwlPreHandleBaseLvl3xPitch +* +* @brief +* Pre-handler of 3x pitch (96 bit) adjustment +* +* @return +* Expected pitch +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlPreHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input + UINT_32 expPitch ///< [in] pitch + ) const +{ + ADDR_ASSERT(pIn->width == expPitch); + + // From SI, if pow2Pad is 1 the pitch is expanded 3x first, then padded to pow2, so nothing to + // do here + if (pIn->flags.pow2Pad == FALSE) + { + Addr::V1::Lib::HwlPreHandleBaseLvl3xPitch(pIn, expPitch); + } + else + { + ADDR_ASSERT(IsPow2(expPitch)); + } + + return expPitch; +} + +/** +**************************************************************************************************** +* SiLib::HwlPostHandleBaseLvl3xPitch +* +* @brief +* Post-handler of 3x pitch adjustment +* +* @return +* Expected pitch +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlPostHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input + UINT_32 expPitch ///< [in] pitch + ) const +{ + /** + * @note The pitch will be divided by 3 in the end so the value will look odd but h/w should + * be able to compute a correct pitch from it as h/w address library is doing the job. + */ + // From SI, the pitch is expanded 3x first, then padded to pow2, so no special handler here + if (pIn->flags.pow2Pad == FALSE) + { + Addr::V1::Lib::HwlPostHandleBaseLvl3xPitch(pIn, expPitch); + } + + return expPitch; +} + +/** +**************************************************************************************************** +* SiLib::HwlGetPitchAlignmentMicroTiled +* +* @brief +* Compute 1D tiled surface pitch alignment +* +* @return +* pitch alignment +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlGetPitchAlignmentMicroTiled( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 numSamples ///< [in] number of samples + ) const +{ + UINT_32 pitchAlign; + + if (flags.qbStereo) + { + pitchAlign = EgBasedLib::HwlGetPitchAlignmentMicroTiled(tileMode,bpp,flags,numSamples); + } + else + { + pitchAlign = 8; + } + + return pitchAlign; +} + +/** +**************************************************************************************************** +* SiLib::HwlGetSizeAdjustmentMicroTiled +* +* @brief +* Adjust 1D tiled surface pitch and slice size +* +* @return +* Logical slice size in bytes +**************************************************************************************************** +*/ +UINT_64 SiLib::HwlGetSizeAdjustmentMicroTiled( + UINT_32 thickness, ///< [in] thickness + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 numSamples, ///< [in] number of samples + UINT_32 baseAlign, ///< [in] base alignment + UINT_32 pitchAlign, ///< [in] pitch alignment + UINT_32* pPitch, ///< [in,out] pointer to pitch + UINT_32* pHeight ///< [in,out] pointer to height + ) const +{ + UINT_64 logicalSliceSize; + UINT_64 physicalSliceSize; + + UINT_32 pitch = *pPitch; + UINT_32 height = *pHeight; + + // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1) + logicalSliceSize = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples); + + // Physical slice: multiplied by thickness + physicalSliceSize = logicalSliceSize * thickness; + + // Pitch alignment is always 8, so if slice size is not padded to base alignment + // (pipe_interleave_size), we need to increase pitch + while ((physicalSliceSize % baseAlign) != 0) + { + pitch += pitchAlign; + + logicalSliceSize = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples); + + physicalSliceSize = logicalSliceSize * thickness; + } + +#if !ALT_TEST + // + // Special workaround for depth/stencil buffer, use 8 bpp to align depth buffer again since + // the stencil plane may have larger pitch if the slice size is smaller than base alignment. + // + // Note: this actually does not work for mipmap but mipmap depth texture is not really + // sampled with mipmap. + // + if (flags.depth && (flags.noStencil == FALSE)) + { + ADDR_ASSERT(numSamples == 1); + + UINT_64 logicalSiceSizeStencil = static_cast(pitch) * height; // 1 byte stencil + + while ((logicalSiceSizeStencil % baseAlign) != 0) + { + pitch += pitchAlign; // Stencil plane's pitch alignment is the same as depth plane's + + logicalSiceSizeStencil = static_cast(pitch) * height; + } + + if (pitch != *pPitch) + { + // If this is a mipmap, this padded one cannot be sampled as a whole mipmap! + logicalSliceSize = logicalSiceSizeStencil * BITS_TO_BYTES(bpp); + } + } +#endif + *pPitch = pitch; + + // No adjust for pHeight + + return logicalSliceSize; +} + +/** +**************************************************************************************************** +* SiLib::HwlConvertChipFamily +* +* @brief +* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision +* @return +* ChipFamily +**************************************************************************************************** +*/ +ChipFamily SiLib::HwlConvertChipFamily( + UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h + UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h +{ + ChipFamily family = ADDR_CHIP_FAMILY_SI; + + switch (uChipFamily) + { + case FAMILY_SI: + m_settings.isSouthernIsland = 1; + m_settings.isTahiti = ASICREV_IS_TAHITI_P(uChipRevision); + m_settings.isPitCairn = ASICREV_IS_PITCAIRN_PM(uChipRevision); + m_settings.isCapeVerde = ASICREV_IS_CAPEVERDE_M(uChipRevision); + m_settings.isOland = ASICREV_IS_OLAND_M(uChipRevision); + m_settings.isHainan = ASICREV_IS_HAINAN_V(uChipRevision); + break; + default: + ADDR_ASSERT(!"This should be a Fusion"); + break; + } + + return family; +} + +/** +**************************************************************************************************** +* SiLib::HwlSetupTileInfo +* +* @brief +* Setup default value of tile info for SI +**************************************************************************************************** +*/ +VOID SiLib::HwlSetupTileInfo( + AddrTileMode tileMode, ///< [in] Tile mode + ADDR_SURFACE_FLAGS flags, ///< [in] Surface type flags + UINT_32 bpp, ///< [in] Bits per pixel + UINT_32 pitch, ///< [in] Pitch in pixels + UINT_32 height, ///< [in] Height in pixels + UINT_32 numSamples, ///< [in] Number of samples + ADDR_TILEINFO* pTileInfoIn, ///< [in] Tile info input: NULL for default + ADDR_TILEINFO* pTileInfoOut, ///< [out] Tile info output + AddrTileType inTileType, ///< [in] Tile type + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output + ) const +{ + UINT_32 thickness = Thickness(tileMode); + ADDR_TILEINFO* pTileInfo = pTileInfoOut; + INT index = TileIndexInvalid; + + // Fail-safe code + if (IsLinear(tileMode) == FALSE) + { + // 128 bpp/thick tiling must be non-displayable. + // Fmask reuse color buffer's entry but bank-height field can be from another entry + // To simplify the logic, fmask entry should be picked from non-displayable ones + if (bpp == 128 || thickness > 1 || flags.fmask || flags.prt) + { + inTileType = ADDR_NON_DISPLAYABLE; + } + + if (flags.depth || flags.stencil) + { + inTileType = ADDR_DEPTH_SAMPLE_ORDER; + } + } + + // Partial valid fields are not allowed for SI. + if (IsTileInfoAllZero(pTileInfo)) + { + if (IsMacroTiled(tileMode)) + { + if (flags.prt) + { + if (numSamples == 1) + { + if (flags.depth) + { + switch (bpp) + { + case 16: + index = 3; + break; + case 32: + index = 6; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + else + { + switch (bpp) + { + case 8: + index = 21; + break; + case 16: + index = 22; + break; + case 32: + index = 23; + break; + case 64: + index = 24; + break; + case 128: + index = 25; + break; + default: + break; + } + + if (thickness > 1) + { + ADDR_ASSERT(bpp != 128); + index += 5; + } + } + } + else + { + ADDR_ASSERT(numSamples == 4); + + if (flags.depth) + { + switch (bpp) + { + case 16: + index = 5; + break; + case 32: + index = 7; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + else + { + switch (bpp) + { + case 8: + index = 23; + break; + case 16: + index = 24; + break; + case 32: + index = 25; + break; + case 64: + index = 30; + break; + default: + ADDR_ASSERT_ALWAYS(); + break; + } + } + } + }//end of PRT part + // See table entries 0-7 + else if (flags.depth || flags.stencil) + { + if (flags.compressZ) + { + if (flags.stencil) + { + index = 0; + } + else + { + // optimal tile index for compressed depth/stencil. + switch (numSamples) + { + case 1: + index = 0; + break; + case 2: + case 4: + index = 1; + break; + case 8: + index = 2; + break; + default: + break; + } + } + } + else // unCompressZ + { + index = 3; + } + } + else //non PRT & non Depth & non Stencil + { + // See table entries 9-12 + if (inTileType == ADDR_DISPLAYABLE) + { + switch (bpp) + { + case 8: + index = 10; + break; + case 16: + index = 11; + break; + case 32: + index = 12; + break; + case 64: + index = 12; + break; + default: + break; + } + } + else + { + // See table entries 13-17 + if (thickness == 1) + { + if (flags.fmask) + { + UINT_32 fmaskPixelSize = bpp * numSamples; + + switch (fmaskPixelSize) + { + case 8: + index = 14; + break; + case 16: + index = 15; + break; + case 32: + index = 16; + break; + case 64: + index = 17; + break; + default: + ADDR_ASSERT_ALWAYS(); + } + } + else + { + switch (bpp) + { + case 8: + index = 14; + break; + case 16: + index = 15; + break; + case 32: + index = 16; + break; + case 64: + index = 17; + break; + case 128: + index = 17; + break; + default: + break; + } + } + } + else // thick tiling - entries 18-20 + { + switch (thickness) + { + case 4: + index = 20; + break; + case 8: + index = 19; + break; + default: + break; + } + } + } + } + } + else + { + if (tileMode == ADDR_TM_LINEAR_ALIGNED) + { + index = 8; + } + else if (tileMode == ADDR_TM_LINEAR_GENERAL) + { + index = TileIndexLinearGeneral; + } + else + { + if (flags.depth || flags.stencil) + { + index = 4; + } + else if (inTileType == ADDR_DISPLAYABLE) + { + index = 9; + } + else if (thickness == 1) + { + index = 13; + } + else + { + index = 18; + } + } + } + + if (index >= 0 && index <= 31) + { + *pTileInfo = m_tileTable[index].info; + pOut->tileType = m_tileTable[index].type; + } + + if (index == TileIndexLinearGeneral) + { + *pTileInfo = m_tileTable[8].info; + pOut->tileType = m_tileTable[8].type; + } + } + else + { + if (pTileInfoIn) + { + if (flags.stencil && pTileInfoIn->tileSplitBytes == 0) + { + // Stencil always uses index 0 + *pTileInfo = m_tileTable[0].info; + } + } + // Pass through tile type + pOut->tileType = inTileType; + } + + pOut->tileIndex = index; + pOut->prtTileIndex = flags.prt; +} + +/** +**************************************************************************************************** +* SiLib::DecodeGbRegs +* +* @brief +* Decodes GB_ADDR_CONFIG and noOfBanks/noOfRanks +* +* @return +* TRUE if all settings are valid +* +**************************************************************************************************** +*/ +BOOL_32 SiLib::DecodeGbRegs( + const ADDR_REGISTER_VALUE* pRegValue) ///< [in] create input +{ + GB_ADDR_CONFIG reg; + BOOL_32 valid = TRUE; + + reg.val = pRegValue->gbAddrConfig; + + switch (reg.f.pipe_interleave_size) + { + case ADDR_CONFIG_PIPE_INTERLEAVE_256B: + m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B; + break; + case ADDR_CONFIG_PIPE_INTERLEAVE_512B: + m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B; + break; + default: + valid = FALSE; + ADDR_UNHANDLED_CASE(); + break; + } + + switch (reg.f.row_size) + { + case ADDR_CONFIG_1KB_ROW: + m_rowSize = ADDR_ROWSIZE_1KB; + break; + case ADDR_CONFIG_2KB_ROW: + m_rowSize = ADDR_ROWSIZE_2KB; + break; + case ADDR_CONFIG_4KB_ROW: + m_rowSize = ADDR_ROWSIZE_4KB; + break; + default: + valid = FALSE; + ADDR_UNHANDLED_CASE(); + break; + } + + switch (pRegValue->noOfBanks) + { + case 0: + m_banks = 4; + break; + case 1: + m_banks = 8; + break; + case 2: + m_banks = 16; + break; + default: + valid = FALSE; + ADDR_UNHANDLED_CASE(); + break; + } + + switch (pRegValue->noOfRanks) + { + case 0: + m_ranks = 1; + break; + case 1: + m_ranks = 2; + break; + default: + valid = FALSE; + ADDR_UNHANDLED_CASE(); + break; + } + + m_logicalBanks = m_banks * m_ranks; + + ADDR_ASSERT(m_logicalBanks <= 16); + + return valid; +} + +/** +**************************************************************************************************** +* SiLib::HwlInitGlobalParams +* +* @brief +* Initializes global parameters +* +* @return +* TRUE if all settings are valid +* +**************************************************************************************************** +*/ +BOOL_32 SiLib::HwlInitGlobalParams( + const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input +{ + BOOL_32 valid = TRUE; + const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue; + + valid = DecodeGbRegs(pRegValue); + + if (valid) + { + if (m_settings.isTahiti || m_settings.isPitCairn) + { + m_pipes = 8; + } + else if (m_settings.isCapeVerde || m_settings.isOland) + { + m_pipes = 4; + } + else + { + // Hainan is 2-pipe (m_settings.isHainan == 1) + m_pipes = 2; + } + + valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries); + + if (valid) + { + InitEquationTable(); + } + + m_maxSamples = 16; + } + + return valid; +} + +/** +**************************************************************************************************** +* SiLib::HwlConvertTileInfoToHW +* @brief +* Entry of si's ConvertTileInfoToHW +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiLib::HwlConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure + ) const +{ + ADDR_E_RETURNCODE retCode = ADDR_OK; + + retCode = EgBasedLib::HwlConvertTileInfoToHW(pIn, pOut); + + if (retCode == ADDR_OK) + { + if (pIn->reverse == FALSE) + { + if (pIn->pTileInfo->pipeConfig == ADDR_PIPECFG_INVALID) + { + retCode = ADDR_INVALIDPARAMS; + } + else + { + pOut->pTileInfo->pipeConfig = + static_cast(pIn->pTileInfo->pipeConfig - 1); + } + } + else + { + pOut->pTileInfo->pipeConfig = + static_cast(pIn->pTileInfo->pipeConfig + 1); + } + } + + return retCode; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeXmaskCoordYFrom8Pipe +* +* @brief +* Compute the Y coord which will be added to Xmask Y +* coord. +* @return +* Y coord +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlComputeXmaskCoordYFrom8Pipe( + UINT_32 pipe, ///< [in] pipe id + UINT_32 x ///< [in] tile coord x, which is original x coord / 8 + ) const +{ + // This function should never be called since it is 6xx/8xx specfic. + // Keep this empty implementation to avoid any mis-use. + ADDR_ASSERT_ALWAYS(); + + return 0; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeSurfaceCoord2DFromBankPipe +* +* @brief +* Compute surface x,y coordinates from bank/pipe info +* @return +* N/A +**************************************************************************************************** +*/ +VOID SiLib::HwlComputeSurfaceCoord2DFromBankPipe( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32* pX, ///< [in,out] x coordinate + UINT_32* pY, ///< [in,out] y coordinate + UINT_32 slice, ///< [in] slice index + UINT_32 bank, ///< [in] bank number + UINT_32 pipe, ///< [in] pipe number + UINT_32 bankSwizzle,///< [in] bank swizzle + UINT_32 pipeSwizzle,///< [in] pipe swizzle + UINT_32 tileSlices, ///< [in] slices in a micro tile + BOOL_32 ignoreSE, ///< [in] TRUE if shader engines are ignored + ADDR_TILEINFO* pTileInfo ///< [in] bank structure. **All fields to be valid on entry** + ) const +{ + UINT_32 xBit; + UINT_32 yBit; + UINT_32 yBit3 = 0; + UINT_32 yBit4 = 0; + UINT_32 yBit5 = 0; + UINT_32 yBit6 = 0; + + UINT_32 xBit3 = 0; + UINT_32 xBit4 = 0; + UINT_32 xBit5 = 0; + + UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig); + + CoordFromBankPipe xyBits = {0}; + ComputeSurfaceCoord2DFromBankPipe(tileMode, *pX, *pY, slice, bank, pipe, + bankSwizzle, pipeSwizzle, tileSlices, pTileInfo, + &xyBits); + yBit3 = xyBits.yBit3; + yBit4 = xyBits.yBit4; + yBit5 = xyBits.yBit5; + yBit6 = xyBits.yBit6; + + xBit3 = xyBits.xBit3; + xBit4 = xyBits.xBit4; + xBit5 = xyBits.xBit5; + + yBit = xyBits.yBits; + + UINT_32 yBitTemp = 0; + + if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) || + (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)) + { + ADDR_ASSERT(pTileInfo->bankWidth == 1 && pTileInfo->macroAspectRatio > 1); + UINT_32 yBitToCheck = QLog2(pTileInfo->banks) - 1; + + ADDR_ASSERT(yBitToCheck <= 3); + + yBitTemp = _BIT(yBit, yBitToCheck); + + xBit3 = 0; + } + + yBit = Bits2Number(4, yBit6, yBit5, yBit4, yBit3); + xBit = Bits2Number(3, xBit5, xBit4, xBit3); + + *pY += yBit * pTileInfo->bankHeight * MicroTileHeight; + *pX += xBit * numPipes * pTileInfo->bankWidth * MicroTileWidth; + + //calculate the bank and pipe bits in x, y + UINT_32 xTile; //x in micro tile + UINT_32 x3 = 0; + UINT_32 x4 = 0; + UINT_32 x5 = 0; + UINT_32 x6 = 0; + UINT_32 y = *pY; + + UINT_32 pipeBit0 = _BIT(pipe,0); + UINT_32 pipeBit1 = _BIT(pipe,1); + UINT_32 pipeBit2 = _BIT(pipe,2); + + UINT_32 y3 = _BIT(y, 3); + UINT_32 y4 = _BIT(y, 4); + UINT_32 y5 = _BIT(y, 5); + UINT_32 y6 = _BIT(y, 6); + + // bankbit0 after ^x4^x5 + UINT_32 bankBit00 = _BIT(bank,0); + UINT_32 bankBit0 = 0; + + switch (pTileInfo->pipeConfig) + { + case ADDR_PIPECFG_P2: + x3 = pipeBit0 ^ y3; + break; + case ADDR_PIPECFG_P4_8x16: + x4 = pipeBit0 ^ y3; + x3 = pipeBit0 ^ y4; + break; + case ADDR_PIPECFG_P4_16x16: + x4 = pipeBit1 ^ y4; + x3 = pipeBit0 ^ y3 ^ x4; + break; + case ADDR_PIPECFG_P4_16x32: + x4 = pipeBit1 ^ y4; + x3 = pipeBit0 ^ y3 ^ x4; + break; + case ADDR_PIPECFG_P4_32x32: + x5 = pipeBit1 ^ y5; + x3 = pipeBit0 ^ y3 ^ x5; + bankBit0 = yBitTemp ^ x5; + x4 = bankBit00 ^ x5 ^ bankBit0; + *pX += x5 * 4 * 1 * 8; // x5 * num_pipes * bank_width * 8; + break; + case ADDR_PIPECFG_P8_16x16_8x16: + x3 = pipeBit1 ^ y5; + x4 = pipeBit2 ^ y4; + x5 = pipeBit0 ^ y3 ^ x4; + break; + case ADDR_PIPECFG_P8_16x32_8x16: + x3 = pipeBit1 ^ y4; + x4 = pipeBit2 ^ y5; + x5 = pipeBit0 ^ y3 ^ x4; + break; + case ADDR_PIPECFG_P8_32x32_8x16: + x3 = pipeBit1 ^ y4; + x5 = pipeBit2 ^ y5; + x4 = pipeBit0 ^ y3 ^ x5; + break; + case ADDR_PIPECFG_P8_16x32_16x16: + x4 = pipeBit2 ^ y5; + x5 = pipeBit1 ^ y4; + x3 = pipeBit0 ^ y3 ^ x4; + break; + case ADDR_PIPECFG_P8_32x32_16x16: + x5 = pipeBit2 ^ y5; + x4 = pipeBit1 ^ y4; + x3 = pipeBit0 ^ y3 ^ x4; + break; + case ADDR_PIPECFG_P8_32x32_16x32: + x5 = pipeBit2 ^ y5; + x4 = pipeBit1 ^ y6; + x3 = pipeBit0 ^ y3 ^ x4; + break; + case ADDR_PIPECFG_P8_32x64_32x32: + x6 = pipeBit1 ^ y5; + x5 = pipeBit2 ^ y6; + x3 = pipeBit0 ^ y3 ^ x5; + bankBit0 = yBitTemp ^ x6; + x4 = bankBit00 ^ x5 ^ bankBit0; + *pX += x6 * 8 * 1 * 8; // x6 * num_pipes * bank_width * 8; + break; + default: + ADDR_ASSERT_ALWAYS(); + } + + xTile = Bits2Number(3, x5, x4, x3); + + *pX += xTile << 3; +} + +/** +**************************************************************************************************** +* SiLib::HwlPreAdjustBank +* +* @brief +* Adjust bank before calculating address acoording to bank/pipe +* @return +* Adjusted bank +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlPreAdjustBank( + UINT_32 tileX, ///< [in] x coordinate in unit of tile + UINT_32 bank, ///< [in] bank + ADDR_TILEINFO* pTileInfo ///< [in] tile info + ) const +{ + if (((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) || + (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)) && (pTileInfo->bankWidth == 1)) + { + UINT_32 bankBit0 = _BIT(bank, 0); + UINT_32 x4 = _BIT(tileX, 1); + UINT_32 x5 = _BIT(tileX, 2); + + bankBit0 = bankBit0 ^ x4 ^ x5; + bank |= bankBit0; + + ADDR_ASSERT(pTileInfo->macroAspectRatio > 1); + } + + return bank; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeSurfaceInfo +* +* @brief +* Entry of si's ComputeSurfaceInfo +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiLib::HwlComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure + ) const +{ + pOut->tileIndex = pIn->tileIndex; + + ADDR_E_RETURNCODE retCode = EgBasedLib::HwlComputeSurfaceInfo(pIn, pOut); + + UINT_32 tileIndex = static_cast(pOut->tileIndex); + + if (((pIn->flags.needEquation == TRUE) || + (pIn->flags.preferEquation == TRUE)) && + (pIn->numSamples <= 1) && + (tileIndex < TileTableSize)) + { + static const UINT_32 SiUncompressDepthTileIndex = 3; + + if ((pIn->numSlices > 1) && + (IsMacroTiled(pOut->tileMode) == TRUE) && + ((m_chipFamily == ADDR_CHIP_FAMILY_SI) || + (IsPrtTileMode(pOut->tileMode) == FALSE))) + { + pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX; + } + else if ((pIn->flags.prt == FALSE) && + (m_uncompressDepthEqIndex != 0) && + (tileIndex == SiUncompressDepthTileIndex)) + { + pOut->equationIndex = m_uncompressDepthEqIndex + Log2(pIn->bpp >> 3); + } + else + { + + pOut->equationIndex = m_equationLookupTable[Log2(pIn->bpp >> 3)][tileIndex]; + } + + if (pOut->equationIndex != ADDR_INVALID_EQUATION_INDEX) + { + pOut->blockWidth = m_blockWidth[pOut->equationIndex]; + + pOut->blockHeight = m_blockHeight[pOut->equationIndex]; + + pOut->blockSlices = m_blockSlices[pOut->equationIndex]; + } + } + else + { + pOut->equationIndex = ADDR_INVALID_EQUATION_INDEX; + } + + return retCode; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeMipLevel +* @brief +* Compute MipLevel info (including level 0) +* @return +* TRUE if HWL's handled +**************************************************************************************************** +*/ +BOOL_32 SiLib::HwlComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in,out] Input structure + ) const +{ + // basePitch is calculated from level 0 so we only check this for mipLevel > 0 + if (pIn->mipLevel > 0) + { + // Note: Don't check expand 3x formats(96 bit) as the basePitch is not pow2 even if + // we explicity set pow2Pad flag. The 3x base pitch is padded to pow2 but after being + // divided by expandX factor (3) - to program texture pitch, the basePitch is never pow2. + if (ElemLib::IsExpand3x(pIn->format) == FALSE) + { + // Sublevel pitches are generated from base level pitch instead of width on SI + // If pow2Pad is 0, we don't assert - as this is not really used for a mip chain + ADDR_ASSERT((pIn->flags.pow2Pad == FALSE) || + ((pIn->basePitch != 0) && IsPow2(pIn->basePitch))); + } + + if (pIn->basePitch != 0) + { + pIn->width = Max(1u, pIn->basePitch >> pIn->mipLevel); + } + } + + // pow2Pad is done in PostComputeMipLevel + + return TRUE; +} + +/** +**************************************************************************************************** +* SiLib::HwlCheckLastMacroTiledLvl +* +* @brief +* Sets pOut->last2DLevel to TRUE if it is +* @note +* +**************************************************************************************************** +*/ +VOID SiLib::HwlCheckLastMacroTiledLvl( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] Output structure (used as input, too) + ) const +{ + // pow2Pad covers all mipmap cases + if (pIn->flags.pow2Pad) + { + ADDR_ASSERT(IsMacroTiled(pIn->tileMode)); + + UINT_32 nextPitch; + UINT_32 nextHeight; + UINT_32 nextSlices; + + AddrTileMode nextTileMode; + + if (pIn->mipLevel == 0 || pIn->basePitch == 0) + { + // Base level or fail-safe case (basePitch == 0) + nextPitch = pOut->pitch >> 1; + } + else + { + // Sub levels + nextPitch = pIn->basePitch >> (pIn->mipLevel + 1); + } + + // nextHeight must be shifted from this level's original height rather than a pow2 padded + // one but this requires original height stored somewhere (pOut->height) + ADDR_ASSERT(pOut->height != 0); + + // next level's height is just current level's >> 1 in pixels + nextHeight = pOut->height >> 1; + // Special format such as FMT_1 and FMT_32_32_32 can be linear only so we consider block + // compressed foramts + if (ElemLib::IsBlockCompressed(pIn->format)) + { + nextHeight = (nextHeight + 3) / 4; + } + nextHeight = NextPow2(nextHeight); + + // nextSlices may be 0 if this level's is 1 + if (pIn->flags.volume) + { + nextSlices = Max(1u, pIn->numSlices >> 1); + } + else + { + nextSlices = pIn->numSlices; + } + + nextTileMode = ComputeSurfaceMipLevelTileMode(pIn->tileMode, + pIn->bpp, + nextPitch, + nextHeight, + nextSlices, + pIn->numSamples, + pOut->blockWidth, + pOut->blockHeight, + pOut->pTileInfo); + + pOut->last2DLevel = IsMicroTiled(nextTileMode); + } +} + +/** +**************************************************************************************************** +* SiLib::HwlDegradeThickTileMode +* +* @brief +* Degrades valid tile mode for thick modes if needed +* +* @return +* Suitable tile mode +**************************************************************************************************** +*/ +AddrTileMode SiLib::HwlDegradeThickTileMode( + AddrTileMode baseTileMode, ///< base tile mode + UINT_32 numSlices, ///< current number of slices + UINT_32* pBytesPerTile ///< [in,out] pointer to bytes per slice + ) const +{ + return EgBasedLib::HwlDegradeThickTileMode(baseTileMode, numSlices, pBytesPerTile); +} + +/** +**************************************************************************************************** +* SiLib::HwlTileInfoEqual +* +* @brief +* Return TRUE if all field are equal +* @note +* Only takes care of current HWL's data +**************************************************************************************************** +*/ +BOOL_32 SiLib::HwlTileInfoEqual( + const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand + const ADDR_TILEINFO* pRight ///<[in] Right compare operand + ) const +{ + BOOL_32 equal = FALSE; + + if (pLeft->pipeConfig == pRight->pipeConfig) + { + equal = EgBasedLib::HwlTileInfoEqual(pLeft, pRight); + } + + return equal; +} + +/** +**************************************************************************************************** +* SiLib::GetTileSettings +* +* @brief +* Get tile setting infos by index. +* @return +* Tile setting info. +**************************************************************************************************** +*/ +const TileConfig* SiLib::GetTileSetting( + UINT_32 index ///< [in] Tile index + ) const +{ + ADDR_ASSERT(index < m_noOfEntries); + return &m_tileTable[index]; +} + +/** +**************************************************************************************************** +* SiLib::HwlPostCheckTileIndex +* +* @brief +* Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches +* tile mode/type/info and change the index if needed +* @return +* Tile index. +**************************************************************************************************** +*/ +INT_32 SiLib::HwlPostCheckTileIndex( + const ADDR_TILEINFO* pInfo, ///< [in] Tile Info + AddrTileMode mode, ///< [in] Tile mode + AddrTileType type, ///< [in] Tile type + INT curIndex ///< [in] Current index assigned in HwlSetupTileInfo + ) const +{ + INT_32 index = curIndex; + + if (mode == ADDR_TM_LINEAR_GENERAL) + { + index = TileIndexLinearGeneral; + } + else + { + BOOL_32 macroTiled = IsMacroTiled(mode); + + // We need to find a new index if either of them is true + // 1. curIndex is invalid + // 2. tile mode is changed + // 3. tile info does not match for macro tiled + if ((index == TileIndexInvalid || + (mode != m_tileTable[index].mode) || + (macroTiled && (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) == FALSE)))) + { + for (index = 0; index < static_cast(m_noOfEntries); index++) + { + if (macroTiled) + { + // macro tile modes need all to match + if (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) && + (mode == m_tileTable[index].mode) && + (type == m_tileTable[index].type)) + { + break; + } + } + else if (mode == ADDR_TM_LINEAR_ALIGNED) + { + // linear mode only needs tile mode to match + if (mode == m_tileTable[index].mode) + { + break; + } + } + else + { + // micro tile modes only need tile mode and tile type to match + if (mode == m_tileTable[index].mode && + type == m_tileTable[index].type) + { + break; + } + } + } + } + } + + ADDR_ASSERT(index < static_cast(m_noOfEntries)); + + if (index >= static_cast(m_noOfEntries)) + { + index = TileIndexInvalid; + } + + return index; +} + +/** +**************************************************************************************************** +* SiLib::HwlSetupTileCfg +* +* @brief +* Map tile index to tile setting. +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiLib::HwlSetupTileCfg( + UINT_32 bpp, ///< Bits per pixel + INT_32 index, ///< Tile index + INT_32 macroModeIndex, ///< Index in macro tile mode table(CI) + ADDR_TILEINFO* pInfo, ///< [out] Tile Info + AddrTileMode* pMode, ///< [out] Tile mode + AddrTileType* pType ///< [out] Tile type + ) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + // Global flag to control usage of tileIndex + if (UseTileIndex(index)) + { + if (index == TileIndexLinearGeneral) + { + if (pMode) + { + *pMode = ADDR_TM_LINEAR_GENERAL; + } + + if (pType) + { + *pType = ADDR_DISPLAYABLE; + } + + if (pInfo) + { + pInfo->banks = 2; + pInfo->bankWidth = 1; + pInfo->bankHeight = 1; + pInfo->macroAspectRatio = 1; + pInfo->tileSplitBytes = 64; + pInfo->pipeConfig = ADDR_PIPECFG_P2; + } + } + else if (static_cast(index) >= m_noOfEntries) + { + returnCode = ADDR_INVALIDPARAMS; + } + else + { + const TileConfig* pCfgTable = GetTileSetting(index); + + if (pInfo) + { + *pInfo = pCfgTable->info; + } + else + { + if (IsMacroTiled(pCfgTable->mode)) + { + returnCode = ADDR_INVALIDPARAMS; + } + } + + if (pMode) + { + *pMode = pCfgTable->mode; + } + + if (pType) + { + *pType = pCfgTable->type; + } + } + } + + return returnCode; +} + +/** +**************************************************************************************************** +* SiLib::ReadGbTileMode +* +* @brief +* Convert GB_TILE_MODE HW value to TileConfig. +* @return +* NA. +**************************************************************************************************** +*/ +VOID SiLib::ReadGbTileMode( + UINT_32 regValue, ///< [in] GB_TILE_MODE register + TileConfig* pCfg ///< [out] output structure + ) const +{ + GB_TILE_MODE gbTileMode; + gbTileMode.val = regValue; + + pCfg->type = static_cast(gbTileMode.f.micro_tile_mode); + pCfg->info.bankHeight = 1 << gbTileMode.f.bank_height; + pCfg->info.bankWidth = 1 << gbTileMode.f.bank_width; + pCfg->info.banks = 1 << (gbTileMode.f.num_banks + 1); + pCfg->info.macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect; + pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split; + pCfg->info.pipeConfig = static_cast(gbTileMode.f.pipe_config + 1); + + UINT_32 regArrayMode = gbTileMode.f.array_mode; + + pCfg->mode = static_cast(regArrayMode); + + if (regArrayMode == 8) //ARRAY_2D_TILED_XTHICK + { + pCfg->mode = ADDR_TM_2D_TILED_XTHICK; + } + else if (regArrayMode >= 14) //ARRAY_3D_TILED_XTHICK + { + pCfg->mode = static_cast(pCfg->mode + 3); + } +} + +/** +**************************************************************************************************** +* SiLib::InitTileSettingTable +* +* @brief +* Initialize the ADDR_TILE_CONFIG table. +* @return +* TRUE if tile table is correctly initialized +**************************************************************************************************** +*/ +BOOL_32 SiLib::InitTileSettingTable( + const UINT_32* pCfg, ///< [in] Pointer to table of tile configs + UINT_32 noOfEntries ///< [in] Numbe of entries in the table above + ) +{ + BOOL_32 initOk = TRUE; + + ADDR_ASSERT(noOfEntries <= TileTableSize); + + memset(m_tileTable, 0, sizeof(m_tileTable)); + + if (noOfEntries != 0) + { + m_noOfEntries = noOfEntries; + } + else + { + m_noOfEntries = TileTableSize; + } + + if (pCfg) // From Client + { + for (UINT_32 i = 0; i < m_noOfEntries; i++) + { + ReadGbTileMode(*(pCfg + i), &m_tileTable[i]); + } + } + else + { + ADDR_ASSERT_ALWAYS(); + initOk = FALSE; + } + + if (initOk) + { + ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED); + } + + return initOk; +} + +/** +**************************************************************************************************** +* SiLib::HwlGetTileIndex +* +* @brief +* Return the virtual/real index for given mode/type/info +* @return +* ADDR_OK if successful. +**************************************************************************************************** +*/ +ADDR_E_RETURNCODE SiLib::HwlGetTileIndex( + const ADDR_GET_TILEINDEX_INPUT* pIn, + ADDR_GET_TILEINDEX_OUTPUT* pOut) const +{ + ADDR_E_RETURNCODE returnCode = ADDR_OK; + + pOut->index = HwlPostCheckTileIndex(pIn->pTileInfo, pIn->tileMode, pIn->tileType); + + return returnCode; +} + +/** +**************************************************************************************************** +* SiLib::HwlFmaskPreThunkSurfInfo +* +* @brief +* Some preparation before thunking a ComputeSurfaceInfo call for Fmask +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +VOID SiLib::HwlFmaskPreThunkSurfInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, ///< [in] Input of fmask info + const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, ///< [in] Output of fmask info + ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, ///< [out] Input of thunked surface info + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut ///< [out] Output of thunked surface info + ) const +{ + pSurfIn->tileIndex = pFmaskIn->tileIndex; +} + +/** +**************************************************************************************************** +* SiLib::HwlFmaskPostThunkSurfInfo +* +* @brief +* Copy hwl extra field after calling thunked ComputeSurfaceInfo +* @return +* ADDR_E_RETURNCODE +**************************************************************************************************** +*/ +VOID SiLib::HwlFmaskPostThunkSurfInfo( + const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, ///< [in] Output of surface info + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut ///< [out] Output of fmask info + ) const +{ + pFmaskOut->macroModeIndex = TileIndexInvalid; + pFmaskOut->tileIndex = pSurfOut->tileIndex; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeFmaskBits +* @brief +* Computes fmask bits +* @return +* Fmask bits +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlComputeFmaskBits( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + UINT_32* pNumSamples + ) const +{ + UINT_32 numSamples = pIn->numSamples; + UINT_32 numFrags = GetNumFragments(numSamples, pIn->numFrags); + UINT_32 bpp; + + if (numFrags != numSamples) // EQAA + { + ADDR_ASSERT(numFrags <= 8); + + if (pIn->resolved == FALSE) + { + if (numFrags == 1) + { + bpp = 1; + numSamples = numSamples == 16 ? 16 : 8; + } + else if (numFrags == 2) + { + ADDR_ASSERT(numSamples >= 4); + + bpp = 2; + numSamples = numSamples; + } + else if (numFrags == 4) + { + ADDR_ASSERT(numSamples >= 4); + + bpp = 4; + numSamples = numSamples; + } + else // numFrags == 8 + { + ADDR_ASSERT(numSamples == 16); + + bpp = 4; + numSamples = numSamples; + } + } + else + { + if (numFrags == 1) + { + bpp = (numSamples == 16) ? 16 : 8; + numSamples = 1; + } + else if (numFrags == 2) + { + ADDR_ASSERT(numSamples >= 4); + + bpp = numSamples*2; + numSamples = 1; + } + else if (numFrags == 4) + { + ADDR_ASSERT(numSamples >= 4); + + bpp = numSamples*4; + numSamples = 1; + } + else // numFrags == 8 + { + ADDR_ASSERT(numSamples >= 16); + + bpp = 16*4; + numSamples = 1; + } + } + } + else // Normal AA + { + if (pIn->resolved == FALSE) + { + bpp = ComputeFmaskNumPlanesFromNumSamples(numSamples); + numSamples = numSamples == 2 ? 8 : numSamples; + } + else + { + // The same as 8XX + bpp = ComputeFmaskResolvedBppFromNumSamples(numSamples); + numSamples = 1; // 1x sample + } + } + + SafeAssign(pNumSamples, numSamples); + + return bpp; +} + +/** +**************************************************************************************************** +* SiLib::HwlOptimizeTileMode +* +* @brief +* Optimize tile mode on SI +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID SiLib::HwlOptimizeTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure + ) const +{ + AddrTileMode tileMode = pInOut->tileMode; + + if ((pInOut->flags.needEquation == TRUE) && + (IsMacroTiled(tileMode) == TRUE) && + (pInOut->numSamples <= 1)) + { + UINT_32 thickness = Thickness(tileMode); + + if (thickness > 1) + { + tileMode = ADDR_TM_1D_TILED_THICK; + } + else if (pInOut->numSlices > 1) + { + tileMode = ADDR_TM_1D_TILED_THIN1; + } + else + { + tileMode = ADDR_TM_2D_TILED_THIN1; + } + } + + if (tileMode != pInOut->tileMode) + { + pInOut->tileMode = tileMode; + } +} + +/** +**************************************************************************************************** +* SiLib::HwlOverrideTileMode +* +* @brief +* Override tile modes (for PRT only, avoid client passes in an invalid PRT mode for SI. +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID SiLib::HwlOverrideTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure + ) const +{ + AddrTileMode tileMode = pInOut->tileMode; + + switch (tileMode) + { + case ADDR_TM_PRT_TILED_THIN1: + tileMode = ADDR_TM_2D_TILED_THIN1; + break; + + case ADDR_TM_PRT_TILED_THICK: + tileMode = ADDR_TM_2D_TILED_THICK; + break; + + case ADDR_TM_PRT_2D_TILED_THICK: + tileMode = ADDR_TM_2D_TILED_THICK; + break; + + case ADDR_TM_PRT_3D_TILED_THICK: + tileMode = ADDR_TM_3D_TILED_THICK; + break; + + default: + break; + } + + if (tileMode != pInOut->tileMode) + { + pInOut->tileMode = tileMode; + // Only PRT tile modes are overridden for now. Revisit this once new modes are added above. + pInOut->flags.prt = TRUE; + } +} + +/** +**************************************************************************************************** +* SiLib::HwlSetPrtTileMode +* +* @brief +* Set prt tile modes. +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID SiLib::HwlSetPrtTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure + ) const +{ + pInOut->tileMode = ADDR_TM_2D_TILED_THIN1; + pInOut->tileType = (pInOut->tileType == ADDR_DEPTH_SAMPLE_ORDER) ? + ADDR_DEPTH_SAMPLE_ORDER : ADDR_NON_DISPLAYABLE; + pInOut->flags.prt = TRUE; +} + +/** +**************************************************************************************************** +* SiLib::HwlSelectTileMode +* +* @brief +* Select tile modes. +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID SiLib::HwlSelectTileMode( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut ///< [in,out] input output structure + ) const +{ + AddrTileMode tileMode; + AddrTileType tileType; + + if (pInOut->flags.volume) + { + if (pInOut->numSlices >= 8) + { + tileMode = ADDR_TM_2D_TILED_XTHICK; + } + else if (pInOut->numSlices >= 4) + { + tileMode = ADDR_TM_2D_TILED_THICK; + } + else + { + tileMode = ADDR_TM_2D_TILED_THIN1; + } + tileType = ADDR_NON_DISPLAYABLE; + } + else + { + tileMode = ADDR_TM_2D_TILED_THIN1; + + if (pInOut->flags.depth || pInOut->flags.stencil) + { + tileType = ADDR_DEPTH_SAMPLE_ORDER; + } + else if ((pInOut->bpp <= 32) || + (pInOut->flags.display == TRUE) || + (pInOut->flags.overlay == TRUE)) + { + tileType = ADDR_DISPLAYABLE; + } + else + { + tileType = ADDR_NON_DISPLAYABLE; + } + } + + if (pInOut->flags.prt) + { + tileMode = ADDR_TM_2D_TILED_THIN1; + tileType = (tileType == ADDR_DISPLAYABLE) ? ADDR_NON_DISPLAYABLE : tileType; + } + + pInOut->tileMode = tileMode; + pInOut->tileType = tileType; + + // Optimize tile mode if possible + pInOut->flags.opt4Space = TRUE; + + // Optimize tile mode if possible + OptimizeTileMode(pInOut); + + HwlOverrideTileMode(pInOut); +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeMaxBaseAlignments +* +* @brief +* Gets maximum alignments +* @return +* maximum alignments +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlComputeMaxBaseAlignments() const +{ + const UINT_32 pipes = HwlGetPipes(&m_tileTable[0].info); + + // Initial size is 64 KiB for PRT. + UINT_32 maxBaseAlign = 64 * 1024; + + for (UINT_32 i = 0; i < m_noOfEntries; i++) + { + if ((IsMacroTiled(m_tileTable[i].mode) == TRUE) && + (IsPrtTileMode(m_tileTable[i].mode) == FALSE)) + { + // The maximum tile size is 16 byte-per-pixel and either 8-sample or 8-slice. + UINT_32 tileSize = Min(m_tileTable[i].info.tileSplitBytes, + MicroTilePixels * 8 * 16); + + UINT_32 baseAlign = tileSize * pipes * m_tileTable[i].info.banks * + m_tileTable[i].info.bankWidth * m_tileTable[i].info.bankHeight; + + if (baseAlign > maxBaseAlign) + { + maxBaseAlign = baseAlign; + } + } + } + + return maxBaseAlign; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeMaxMetaBaseAlignments +* +* @brief +* Gets maximum alignments for metadata +* @return +* maximum alignments for metadata +**************************************************************************************************** +*/ +UINT_32 SiLib::HwlComputeMaxMetaBaseAlignments() const +{ + UINT_32 maxPipe = 1; + + for (UINT_32 i = 0; i < m_noOfEntries; i++) + { + maxPipe = Max(maxPipe, HwlGetPipes(&m_tileTable[i].info)); + } + + return m_pipeInterleaveBytes * maxPipe; +} + +/** +**************************************************************************************************** +* SiLib::HwlComputeSurfaceAlignmentsMacroTiled +* +* @brief +* Hardware layer function to compute alignment request for macro tile mode +* +* @return +* N/A +* +**************************************************************************************************** +*/ +VOID SiLib::HwlComputeSurfaceAlignmentsMacroTiled( + AddrTileMode tileMode, ///< [in] tile mode + UINT_32 bpp, ///< [in] bits per pixel + ADDR_SURFACE_FLAGS flags, ///< [in] surface flags + UINT_32 mipLevel, ///< [in] mip level + UINT_32 numSamples, ///< [in] number of samples + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in,out] Surface output + ) const +{ + if ((mipLevel == 0) && (flags.prt)) + { + UINT_32 macroTileSize = pOut->blockWidth * pOut->blockHeight * numSamples * bpp / 8; + + if (macroTileSize < PrtTileSize) + { + UINT_32 numMacroTiles = PrtTileSize / macroTileSize; + + ADDR_ASSERT((PrtTileSize % macroTileSize) == 0); + + pOut->pitchAlign *= numMacroTiles; + pOut->baseAlign *= numMacroTiles; + } + } +} + +/** +**************************************************************************************************** +* SiLib::InitEquationTable +* +* @brief +* Initialize Equation table. +* +* @return +* N/A +**************************************************************************************************** +*/ +VOID SiLib::InitEquationTable() +{ + ADDR_EQUATION_KEY equationKeyTable[EquationTableSize]; + memset(equationKeyTable, 0, sizeof(equationKeyTable)); + + memset(m_equationTable, 0, sizeof(m_equationTable)); + + memset(m_blockWidth, 0, sizeof(m_blockWidth)); + + memset(m_blockHeight, 0, sizeof(m_blockHeight)); + + memset(m_blockSlices, 0, sizeof(m_blockSlices)); + + // Loop all possible bpp + for (UINT_32 log2ElementBytes = 0; log2ElementBytes < MaxNumElementBytes; log2ElementBytes++) + { + // Get bits per pixel + UINT_32 bpp = 1 << (log2ElementBytes + 3); + + // Loop all possible tile index + for (INT_32 tileIndex = 0; tileIndex < static_cast(m_noOfEntries); tileIndex++) + { + UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX; + + TileConfig tileConfig = m_tileTable[tileIndex]; + + ADDR_SURFACE_FLAGS flags = {{0}}; + + // Compute tile info, hardcode numSamples to 1 because MSAA is not supported + // in swizzle pattern equation + HwlComputeMacroModeIndex(tileIndex, flags, bpp, 1, &tileConfig.info, NULL, NULL); + + // Check if the input is supported + if (IsEquationSupported(bpp, tileConfig, tileIndex, log2ElementBytes) == TRUE) + { + ADDR_EQUATION_KEY key = {{0}}; + + // Generate swizzle equation key from bpp and tile config + key.fields.log2ElementBytes = log2ElementBytes; + key.fields.tileMode = tileConfig.mode; + // Treat depth micro tile type and non-display micro tile type as the same key + // because they have the same equation actually + key.fields.microTileType = (tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) ? + ADDR_NON_DISPLAYABLE : tileConfig.type; + key.fields.pipeConfig = tileConfig.info.pipeConfig; + key.fields.numBanksLog2 = Log2(tileConfig.info.banks); + key.fields.bankWidth = tileConfig.info.bankWidth; + key.fields.bankHeight = tileConfig.info.bankHeight; + key.fields.macroAspectRatio = tileConfig.info.macroAspectRatio; + key.fields.prt = ((m_chipFamily == ADDR_CHIP_FAMILY_SI) && + ((1 << tileIndex) & SiPrtTileIndexMask)) ? 1 : 0; + + // Find in the table if the equation has been built based on the key + for (UINT_32 i = 0; i < m_numEquations; i++) + { + if (key.value == equationKeyTable[i].value) + { + equationIndex = i; + break; + } + } + + // If found, just fill the index into the lookup table and no need + // to generate the equation again. Otherwise, generate the equation. + if (equationIndex == ADDR_INVALID_EQUATION_INDEX) + { + ADDR_EQUATION equation; + ADDR_E_RETURNCODE retCode; + + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + // Generate the equation + if (IsMicroTiled(tileConfig.mode)) + { + retCode = ComputeMicroTileEquation(log2ElementBytes, + tileConfig.mode, + tileConfig.type, + &equation); + } + else + { + retCode = ComputeMacroTileEquation(log2ElementBytes, + tileConfig.mode, + tileConfig.type, + &tileConfig.info, + &equation); + } + // Only fill the equation into the table if the return code is ADDR_OK, + // otherwise if the return code is not ADDR_OK, it indicates this is not + // a valid input, we do nothing but just fill invalid equation index + // into the lookup table. + if (retCode == ADDR_OK) + { + equationIndex = m_numEquations; + ADDR_ASSERT(equationIndex < EquationTableSize); + + m_blockSlices[equationIndex] = Thickness(tileConfig.mode); + + if (IsMicroTiled(tileConfig.mode)) + { + m_blockWidth[equationIndex] = MicroTileWidth; + m_blockHeight[equationIndex] = MicroTileHeight; + } + else + { + const ADDR_TILEINFO* pTileInfo = &tileConfig.info; + + m_blockWidth[equationIndex] = + HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth * + pTileInfo->macroAspectRatio; + m_blockHeight[equationIndex] = + MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / + pTileInfo->macroAspectRatio; + + if (key.fields.prt) + { + UINT_32 macroTileSize = + m_blockWidth[equationIndex] * m_blockHeight[equationIndex] * + bpp / 8; + + if (macroTileSize < PrtTileSize) + { + UINT_32 numMacroTiles = PrtTileSize / macroTileSize; + + ADDR_ASSERT(macroTileSize == (1u << equation.numBits)); + ADDR_ASSERT((PrtTileSize % macroTileSize) == 0); + + UINT_32 numBits = Log2(numMacroTiles); + + UINT_32 xStart = Log2(m_blockWidth[equationIndex]) + + log2ElementBytes; + + m_blockWidth[equationIndex] *= numMacroTiles; + + for (UINT_32 i = 0; i < numBits; i++) + { + equation.addr[equation.numBits + i].valid = 1; + equation.addr[equation.numBits + i].index = xStart + i; + } + + equation.numBits += numBits; + } + } + } + + equationKeyTable[equationIndex] = key; + m_equationTable[equationIndex] = equation; + + m_numEquations++; + } + } + } + + // Fill the index into the lookup table, if the combination is not supported + // fill the invalid equation index + m_equationLookupTable[log2ElementBytes][tileIndex] = equationIndex; + } + + if (m_chipFamily == ADDR_CHIP_FAMILY_SI) + { + // For tile index 3 which is shared between PRT depth and uncompressed depth + m_uncompressDepthEqIndex = m_numEquations; + + for (UINT_32 log2ElemBytes = 0; log2ElemBytes < MaxNumElementBytes; log2ElemBytes++) + { + TileConfig tileConfig = m_tileTable[3]; + ADDR_EQUATION equation; + ADDR_E_RETURNCODE retCode; + + memset(&equation, 0, sizeof(ADDR_EQUATION)); + + retCode = ComputeMacroTileEquation(log2ElemBytes, + tileConfig.mode, + tileConfig.type, + &tileConfig.info, + &equation); + + if (retCode == ADDR_OK) + { + UINT_32 equationIndex = m_numEquations; + ADDR_ASSERT(equationIndex < EquationTableSize); + + m_blockSlices[equationIndex] = 1; + + const ADDR_TILEINFO* pTileInfo = &tileConfig.info; + + m_blockWidth[equationIndex] = + HwlGetPipes(pTileInfo) * MicroTileWidth * pTileInfo->bankWidth * + pTileInfo->macroAspectRatio; + m_blockHeight[equationIndex] = + MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks / + pTileInfo->macroAspectRatio; + + m_equationTable[equationIndex] = equation; + + m_numEquations++; + } + } + } + } +} + +/** +**************************************************************************************************** +* SiLib::IsEquationSupported +* +* @brief +* Check if it is supported for given bpp and tile config to generate a equation. +* +* @return +* TRUE if supported +**************************************************************************************************** +*/ +BOOL_32 SiLib::IsEquationSupported( + UINT_32 bpp, ///< Bits per pixel + TileConfig tileConfig, ///< Tile config + INT_32 tileIndex, ///< Tile index + UINT_32 elementBytesLog2 ///< Log2 of element bytes + ) const +{ + BOOL_32 supported = TRUE; + + // Linear tile mode is not supported in swizzle pattern equation + if (IsLinear(tileConfig.mode)) + { + supported = FALSE; + } + // These tile modes are for Tex2DArray and Tex3D which has depth (num_slice > 1) use, + // which is not supported in swizzle pattern equation due to slice rotation + else if ((tileConfig.mode == ADDR_TM_2D_TILED_THICK) || + (tileConfig.mode == ADDR_TM_2D_TILED_XTHICK) || + (tileConfig.mode == ADDR_TM_3D_TILED_THIN1) || + (tileConfig.mode == ADDR_TM_3D_TILED_THICK) || + (tileConfig.mode == ADDR_TM_3D_TILED_XTHICK)) + { + supported = FALSE; + } + // Only 8bpp(stencil), 16bpp and 32bpp is supported for depth + else if ((tileConfig.type == ADDR_DEPTH_SAMPLE_ORDER) && (bpp > 32)) + { + supported = FALSE; + } + // Tile split is not supported in swizzle pattern equation + else if (IsMacroTiled(tileConfig.mode)) + { + UINT_32 thickness = Thickness(tileConfig.mode); + if (((bpp >> 3) * MicroTilePixels * thickness) > tileConfig.info.tileSplitBytes) + { + supported = FALSE; + } + + if ((supported == TRUE) && (m_chipFamily == ADDR_CHIP_FAMILY_SI)) + { + supported = m_EquationSupport[tileIndex][elementBytesLog2]; + } + } + + return supported; +} + +} // V1 +} // Addr diff -Nru mesa-18.3.3/src/amd/addrlib/src/r800/siaddrlib.h mesa-19.0.1/src/amd/addrlib/src/r800/siaddrlib.h --- mesa-18.3.3/src/amd/addrlib/src/r800/siaddrlib.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/addrlib/src/r800/siaddrlib.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,347 @@ +/* + * Copyright © 2007-2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/** +**************************************************************************************************** +* @file siaddrlib.h +* @brief Contains the R800Lib class definition. +**************************************************************************************************** +*/ + +#ifndef __SI_ADDR_LIB_H__ +#define __SI_ADDR_LIB_H__ + +#include "addrlib1.h" +#include "egbaddrlib.h" + +namespace Addr +{ +namespace V1 +{ + +/** +**************************************************************************************************** +* @brief Describes the information in tile mode table +**************************************************************************************************** +*/ +struct TileConfig +{ + AddrTileMode mode; + AddrTileType type; + ADDR_TILEINFO info; +}; + +/** +**************************************************************************************************** +* @brief SI specific settings structure. +**************************************************************************************************** +*/ +struct SiChipSettings +{ + UINT_32 isSouthernIsland : 1; + UINT_32 isTahiti : 1; + UINT_32 isPitCairn : 1; + UINT_32 isCapeVerde : 1; + // Oland/Hainan are of GFXIP 6.0, similar with SI + UINT_32 isOland : 1; + UINT_32 isHainan : 1; + + // CI + UINT_32 isSeaIsland : 1; + UINT_32 isBonaire : 1; + UINT_32 isKaveri : 1; + UINT_32 isSpectre : 1; + UINT_32 isSpooky : 1; + UINT_32 isKalindi : 1; + // Hawaii is GFXIP 7.2 + UINT_32 isHawaii : 1; + + // VI + UINT_32 isVolcanicIslands : 1; + UINT_32 isIceland : 1; + UINT_32 isTonga : 1; + UINT_32 isFiji : 1; + UINT_32 isPolaris10 : 1; + UINT_32 isPolaris11 : 1; + UINT_32 isPolaris12 : 1; + UINT_32 isVegaM : 1; + UINT_32 isCarrizo : 1; +}; + +/** +**************************************************************************************************** +* @brief This class is the SI specific address library +* function set. +**************************************************************************************************** +*/ +class SiLib : public EgBasedLib +{ +public: + /// Creates SiLib object + static Addr::Lib* CreateObj(const Client* pClient) + { + VOID* pMem = Object::ClientAlloc(sizeof(SiLib), pClient); + return (pMem != NULL) ? new (pMem) SiLib(pClient) : NULL; + } + +protected: + SiLib(const Client* pClient); + virtual ~SiLib(); + + // Hwl interface - defined in AddrLib1 + virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW( + const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, + ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const; + + virtual UINT_64 HwlComputeXmaskAddrFromCoord( + UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 numSlices, + UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, + ADDR_TILEINFO* pTileInfo, UINT_32* pBitPosition) const; + + virtual VOID HwlComputeXmaskCoordFromAddr( + UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices, + UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8, + ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const; + + virtual ADDR_E_RETURNCODE HwlGetTileIndex( + const ADDR_GET_TILEINDEX_INPUT* pIn, + ADDR_GET_TILEINDEX_OUTPUT* pOut) const; + + virtual BOOL_32 HwlComputeMipLevel( + ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const; + + virtual ChipFamily HwlConvertChipFamily( + UINT_32 uChipFamily, UINT_32 uChipRevision); + + virtual BOOL_32 HwlInitGlobalParams( + const ADDR_CREATE_INPUT* pCreateIn); + + virtual ADDR_E_RETURNCODE HwlSetupTileCfg( + UINT_32 bpp, INT_32 index, INT_32 macroModeIndex, + ADDR_TILEINFO* pInfo, AddrTileMode* pMode = 0, AddrTileType* pType = 0) const; + + virtual VOID HwlComputeTileDataWidthAndHeightLinear( + UINT_32* pMacroWidth, UINT_32* pMacroHeight, + UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const; + + virtual UINT_64 HwlComputeHtileBytes( + UINT_32 pitch, UINT_32 height, UINT_32 bpp, + BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const; + + virtual ADDR_E_RETURNCODE ComputeBankEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; + + virtual ADDR_E_RETURNCODE ComputePipeEquation( + UINT_32 log2BytesPP, UINT_32 threshX, UINT_32 threshY, + ADDR_TILEINFO* pTileInfo, ADDR_EQUATION* pEquation) const; + + virtual UINT_32 ComputePipeFromCoord( + UINT_32 x, UINT_32 y, UINT_32 slice, + AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE, + ADDR_TILEINFO* pTileInfo) const; + + virtual UINT_32 HwlGetPipes(const ADDR_TILEINFO* pTileInfo) const; + + /// Pre-handler of 3x pitch (96 bit) adjustment + virtual UINT_32 HwlPreHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; + /// Post-handler of 3x pitch adjustment + virtual UINT_32 HwlPostHandleBaseLvl3xPitch( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const; + + /// Dummy function to finalize the inheritance + virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe( + UINT_32 pipe, UINT_32 x) const; + + // Sub-hwl interface - defined in EgBasedLib + virtual VOID HwlSetupTileInfo( + AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags, + UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples, + ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo, + AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + virtual UINT_32 HwlGetPitchAlignmentMicroTiled( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const; + + virtual UINT_64 HwlGetSizeAdjustmentMicroTiled( + UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, + UINT_32 baseAlign, UINT_32 pitchAlign, + UINT_32 *pPitch, UINT_32 *pHeight) const; + + virtual VOID HwlCheckLastMacroTiledLvl( + const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + virtual BOOL_32 HwlTileInfoEqual( + const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const; + + virtual AddrTileMode HwlDegradeThickTileMode( + AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const; + + virtual VOID HwlOverrideTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; + + virtual VOID HwlOptimizeTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; + + virtual VOID HwlSelectTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; + + /// Overwrite tile setting to PRT + virtual VOID HwlSetPrtTileMode(ADDR_COMPUTE_SURFACE_INFO_INPUT* pInOut) const; + + virtual BOOL_32 HwlSanityCheckMacroTiled( + ADDR_TILEINFO* pTileInfo) const + { + return TRUE; + } + + virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const; + + virtual UINT_64 HwlGetSizeAdjustmentLinear( + AddrTileMode tileMode, + UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign, + UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const; + + virtual VOID HwlComputeSurfaceCoord2DFromBankPipe( + AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice, + UINT_32 bank, UINT_32 pipe, + UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices, + BOOL_32 ignoreSE, + ADDR_TILEINFO* pTileInfo) const; + + virtual UINT_32 HwlPreAdjustBank( + UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const; + + virtual INT_32 HwlPostCheckTileIndex( + const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type, + INT curIndex = TileIndexInvalid) const; + + virtual VOID HwlFmaskPreThunkSurfInfo( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, + const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, + ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, + ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const; + + virtual VOID HwlFmaskPostThunkSurfInfo( + const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, + ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const; + + virtual UINT_32 HwlComputeFmaskBits( + const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, + UINT_32* pNumSamples) const; + + virtual BOOL_32 HwlReduceBankWidthHeight( + UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples, + UINT_32 bankHeightAlign, UINT_32 pipes, + ADDR_TILEINFO* pTileInfo) const + { + return TRUE; + } + + virtual UINT_32 HwlComputeMaxBaseAlignments() const; + + virtual UINT_32 HwlComputeMaxMetaBaseAlignments() const; + + virtual VOID HwlComputeSurfaceAlignmentsMacroTiled( + AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, + UINT_32 mipLevel, UINT_32 numSamples, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const; + + // Get equation table pointer and number of equations + virtual UINT_32 HwlGetEquationTableInfo(const ADDR_EQUATION** ppEquationTable) const + { + *ppEquationTable = m_equationTable; + + return m_numEquations; + } + + // Check if it is supported for given bpp and tile config to generate an equation + BOOL_32 IsEquationSupported( + UINT_32 bpp, TileConfig tileConfig, INT_32 tileIndex, UINT_32 elementBytesLog2) const; + + // Protected non-virtual functions + VOID ComputeTileCoordFromPipeAndElemIdx( + UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile, + UINT_32 x, UINT_32 y, UINT_32* pX, UINT_32* pY) const; + + UINT_32 TileCoordToMaskElementIndex( + UINT_32 tx, UINT_32 ty, AddrPipeCfg pipeConfig, + UINT_32 *macroShift, UINT_32 *elemIdxBits) const; + + BOOL_32 DecodeGbRegs( + const ADDR_REGISTER_VALUE* pRegValue); + + const TileConfig* GetTileSetting( + UINT_32 index) const; + + // Initialize equation table + VOID InitEquationTable(); + + UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const; + + static const UINT_32 TileTableSize = 32; + TileConfig m_tileTable[TileTableSize]; + UINT_32 m_noOfEntries; + + // Max number of bpp (8bpp/16bpp/32bpp/64bpp/128bpp) + static const UINT_32 MaxNumElementBytes = 5; + + static const BOOL_32 m_EquationSupport[TileTableSize][MaxNumElementBytes]; + + // Prt tile mode index mask + static const UINT_32 SiPrtTileIndexMask = ((1 << 3) | (1 << 5) | (1 << 6) | (1 << 7) | + (1 << 21) | (1 << 22) | (1 << 23) | (1 << 24) | + (1 << 25) | (1 << 30)); + + // More than half slots in tile mode table can't support equation + static const UINT_32 EquationTableSize = (MaxNumElementBytes * TileTableSize) / 2; + // Equation table + ADDR_EQUATION m_equationTable[EquationTableSize]; + UINT_32 m_numMacroBits[EquationTableSize]; + UINT_32 m_blockWidth[EquationTableSize]; + UINT_32 m_blockHeight[EquationTableSize]; + UINT_32 m_blockSlices[EquationTableSize]; + // Number of equation entries in the table + UINT_32 m_numEquations; + // Equation lookup table according to bpp and tile index + UINT_32 m_equationLookupTable[MaxNumElementBytes][TileTableSize]; + + UINT_32 m_uncompressDepthEqIndex; + + SiChipSettings m_settings; + +private: + + VOID ReadGbTileMode(UINT_32 regValue, TileConfig* pCfg) const; + BOOL_32 InitTileSettingTable(const UINT_32 *pSetting, UINT_32 noOfEntries); +}; + +} // V1 +} // Addr + +#endif + diff -Nru mesa-18.3.3/src/amd/Android.addrlib.mk mesa-19.0.1/src/amd/Android.addrlib.mk --- mesa-18.3.3/src/amd/Android.addrlib.mk 2017-12-02 01:35:56.000000000 +0000 +++ mesa-19.0.1/src/amd/Android.addrlib.mk 2019-03-31 23:16:37.000000000 +0000 @@ -33,12 +33,11 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src \ $(MESA_TOP)/src/amd/common \ - $(MESA_TOP)/src/amd/addrlib \ - $(MESA_TOP)/src/amd/addrlib/core \ - $(MESA_TOP)/src/amd/addrlib/inc/chip/gfx9 \ - $(MESA_TOP)/src/amd/addrlib/inc/chip/r800 \ - $(MESA_TOP)/src/amd/addrlib/gfx9/chip \ - $(MESA_TOP)/src/amd/addrlib/r800/chip + $(MESA_TOP)/src/amd/addrlib/inc \ + $(MESA_TOP)/src/amd/addrlib/src \ + $(MESA_TOP)/src/amd/addrlib/src/core \ + $(MESA_TOP)/src/amd/addrlib/src/chip/gfx9 \ + $(MESA_TOP)/src/amd/addrlib/src/chip/r800 LOCAL_EXPORT_C_INCLUDE_DIRS := \ $(LOCAL_PATH) \ diff -Nru mesa-18.3.3/src/amd/common/ac_debug.c mesa-19.0.1/src/amd/common/ac_debug.c --- mesa-18.3.3/src/amd/common/ac_debug.c 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_debug.c 2019-03-31 23:16:37.000000000 +0000 @@ -233,6 +233,7 @@ if (op == PKT3_SET_CONTEXT_REG || op == PKT3_SET_CONFIG_REG || op == PKT3_SET_UCONFIG_REG || + op == PKT3_SET_UCONFIG_REG_INDEX || op == PKT3_SET_SH_REG) fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n", name, predicate); @@ -252,6 +253,7 @@ ac_parse_set_reg_packet(f, count, SI_CONFIG_REG_OFFSET, ib); break; case PKT3_SET_UCONFIG_REG: + case PKT3_SET_UCONFIG_REG_INDEX: ac_parse_set_reg_packet(f, count, CIK_UCONFIG_REG_OFFSET, ib); break; case PKT3_SET_SH_REG: diff -Nru mesa-18.3.3/src/amd/common/ac_gpu_info.c mesa-19.0.1/src/amd/common/ac_gpu_info.c --- mesa-18.3.3/src/amd/common/ac_gpu_info.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_gpu_info.c 2019-03-31 23:16:37.000000000 +0000 @@ -455,7 +455,7 @@ ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment); ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment); ib_align = MAX2(ib_align, vcn_jpeg.ib_start_alignment); - assert(ib_align); + assert(ib_align); info->ib_start_alignment = ib_align; return true; diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_build.c mesa-19.0.1/src/amd/common/ac_llvm_build.c --- mesa-18.3.3/src/amd/common/ac_llvm_build.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_llvm_build.c 2019-03-31 23:16:37.000000000 +0000 @@ -75,7 +75,7 @@ ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); - ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64; + ctx->intptr = ctx->i32; ctx->f16 = LLVMHalfTypeInContext(ctx->context); ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f64 = LLVMDoubleTypeInContext(ctx->context); @@ -229,6 +229,15 @@ return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), ""); } +LLVMValueRef +ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v) +{ + LLVMTypeRef type = LLVMTypeOf(v); + if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) + return v; + return ac_to_integer(ctx, v); +} + static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) { if (t == ctx->i16 || t == ctx->f16) @@ -897,6 +906,37 @@ } LLVMValueRef +ac_build_fs_interp_f16(struct ac_llvm_context *ctx, + LLVMValueRef llvm_chan, + LLVMValueRef attr_number, + LLVMValueRef params, + LLVMValueRef i, + LLVMValueRef j) +{ + LLVMValueRef args[6]; + LLVMValueRef p1; + + args[0] = i; + args[1] = llvm_chan; + args[2] = attr_number; + args[3] = ctx->i1false; + args[4] = params; + + p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", + ctx->f32, args, 5, AC_FUNC_ATTR_READNONE); + + args[0] = p1; + args[1] = j; + args[2] = llvm_chan; + args[3] = attr_number; + args[4] = ctx->i1false; + args[5] = params; + + return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", + ctx->f16, args, 6, AC_FUNC_ATTR_READNONE); +} + +LLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef parameter, LLVMValueRef llvm_chan, @@ -915,6 +955,14 @@ } LLVMValueRef +ac_build_gep_ptr(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, + LLVMValueRef index) +{ + return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, ""); +} + +LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index) @@ -1161,6 +1209,47 @@ ac_get_load_intr_attribs(can_speculate)); } +static LLVMValueRef +ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + unsigned num_channels, + bool glc, + bool slc, + bool can_speculate, + bool use_format, + bool structurized) +{ + LLVMValueRef args[5]; + int idx = 0; + args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); + if (structurized) + args[idx++] = vindex ? vindex : ctx->i32_0; + args[idx++] = voffset ? voffset : ctx->i32_0; + args[idx++] = soffset ? soffset : ctx->i32_0; + args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); + unsigned func = CLAMP(num_channels, 1, 3) - 1; + + LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32}; + const char *type_names[] = {"f32", "v2f32", "v4f32"}; + const char *indexing_kind = structurized ? "struct" : "raw"; + char name[256]; + + if (use_format) { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s", + indexing_kind, type_names[func]); + } else { + snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s", + indexing_kind, type_names[func]); + } + + return ac_build_intrinsic(ctx, name, types[func], args, + idx, + ac_get_load_intr_attribs(can_speculate)); +} + LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, @@ -1180,8 +1269,8 @@ if (soffset) offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); - /* TODO: VI and later generations can use SMEM with GLC=1.*/ - if (allow_smem && !glc && !slc) { + if (allow_smem && !slc && + (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= VI))) { assert(vindex == NULL); LLVMValueRef result[8]; @@ -1191,11 +1280,19 @@ offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, 4, 0), ""); } - LLVMValueRef args[2] = {rsrc, offset}; - result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32", - ctx->f32, args, 2, + const char *intrname = + HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.s.buffer.load.f32" + : "llvm.SI.load.const.v4i32"; + unsigned num_args = HAVE_LLVM >= 0x0800 ? 3 : 2; + LLVMValueRef args[3] = { + rsrc, + offset, + glc ? ctx->i32_1 : ctx->i32_0, + }; + result[i] = ac_build_intrinsic(ctx, intrname, + ctx->f32, args, num_args, AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_LEGACY); + (HAVE_LLVM < 0x0800 ? AC_FUNC_ATTR_LEGACY : 0)); } if (num_channels == 1) return result[0]; @@ -1218,6 +1315,11 @@ bool glc, bool can_speculate) { + if (HAVE_LLVM >= 0x800) { + return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, + num_channels, glc, false, + can_speculate, true, true); + } return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, num_channels, glc, false, can_speculate, true); @@ -1231,6 +1333,12 @@ bool glc, bool can_speculate) { + if (HAVE_LLVM >= 0x800) { + return ac_build_llvm8_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, + num_channels, glc, false, + can_speculate, true, true); + } + LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), ""); LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, ctx->i32_1, ""); stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), ""); @@ -1342,99 +1450,28 @@ int idx, LLVMValueRef val) { - LLVMValueRef tl, trbl, args[2]; + unsigned tl_lanes[4], trbl_lanes[4]; + LLVMValueRef tl, trbl; LLVMValueRef result; - if (HAVE_LLVM >= 0x0700) { - unsigned tl_lanes[4], trbl_lanes[4]; - - for (unsigned i = 0; i < 4; ++i) { - tl_lanes[i] = i & mask; - trbl_lanes[i] = (i & mask) + idx; - } - - tl = ac_build_quad_swizzle(ctx, val, - tl_lanes[0], tl_lanes[1], - tl_lanes[2], tl_lanes[3]); - trbl = ac_build_quad_swizzle(ctx, val, - trbl_lanes[0], trbl_lanes[1], - trbl_lanes[2], trbl_lanes[3]); - } else if (ctx->chip_class >= VI) { - LLVMValueRef thread_id, tl_tid, trbl_tid; - thread_id = ac_get_thread_id(ctx); - - tl_tid = LLVMBuildAnd(ctx->builder, thread_id, - LLVMConstInt(ctx->i32, mask, false), ""); - - trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, - LLVMConstInt(ctx->i32, idx, false), ""); - - args[0] = LLVMBuildMul(ctx->builder, tl_tid, - LLVMConstInt(ctx->i32, 4, false), ""); - args[1] = val; - tl = ac_build_intrinsic(ctx, - "llvm.amdgcn.ds.bpermute", ctx->i32, - args, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - - args[0] = LLVMBuildMul(ctx->builder, trbl_tid, - LLVMConstInt(ctx->i32, 4, false), ""); - trbl = ac_build_intrinsic(ctx, - "llvm.amdgcn.ds.bpermute", ctx->i32, - args, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - } else { - uint32_t masks[2] = {}; - - switch (mask) { - case AC_TID_MASK_TOP_LEFT: - masks[0] = 0x8000; - if (idx == 1) - masks[1] = 0x8055; - else - masks[1] = 0x80aa; - - break; - case AC_TID_MASK_TOP: - masks[0] = 0x8044; - masks[1] = 0x80ee; - break; - case AC_TID_MASK_LEFT: - masks[0] = 0x80a0; - masks[1] = 0x80f5; - break; - default: - assert(0); - } - - args[0] = val; - args[1] = LLVMConstInt(ctx->i32, masks[0], false); - - tl = ac_build_intrinsic(ctx, - "llvm.amdgcn.ds.swizzle", ctx->i32, - args, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - - args[1] = LLVMConstInt(ctx->i32, masks[1], false); - trbl = ac_build_intrinsic(ctx, - "llvm.amdgcn.ds.swizzle", ctx->i32, - args, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); + for (unsigned i = 0; i < 4; ++i) { + tl_lanes[i] = i & mask; + trbl_lanes[i] = (i & mask) + idx; } + tl = ac_build_quad_swizzle(ctx, val, + tl_lanes[0], tl_lanes[1], + tl_lanes[2], tl_lanes[3]); + trbl = ac_build_quad_swizzle(ctx, val, + trbl_lanes[0], trbl_lanes[1], + trbl_lanes[2], trbl_lanes[3]); + tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); - if (HAVE_LLVM >= 0x0700) { - result = ac_build_intrinsic(ctx, - "llvm.amdgcn.wqm.f32", ctx->f32, - &result, 1, 0); - } + result = ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.f32", ctx->f32, + &result, 1, 0); return result; } @@ -1679,171 +1716,6 @@ unreachable("bad atomic op"); } -/* LLVM 6 and older */ -static LLVMValueRef ac_build_image_opcode_llvm6(struct ac_llvm_context *ctx, - struct ac_image_args *a) -{ - LLVMValueRef args[16]; - LLVMTypeRef retty = ctx->v4f32; - const char *name = NULL; - const char *atomic_subop = ""; - char intr_name[128], coords_type[64]; - - bool sample = a->opcode == ac_image_sample || - a->opcode == ac_image_gather4 || - a->opcode == ac_image_get_lod; - bool atomic = a->opcode == ac_image_atomic || - a->opcode == ac_image_atomic_cmpswap; - bool da = a->dim == ac_image_cube || - a->dim == ac_image_1darray || - a->dim == ac_image_2darray || - a->dim == ac_image_2darraymsaa; - if (a->opcode == ac_image_get_lod) - da = false; - - unsigned num_coords = - a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0; - LLVMValueRef addr; - unsigned num_addr = 0; - - if (a->opcode == ac_image_get_lod) { - switch (a->dim) { - case ac_image_1darray: - num_coords = 1; - break; - case ac_image_2darray: - case ac_image_cube: - num_coords = 2; - break; - default: - break; - } - } - - if (a->offset) - args[num_addr++] = ac_to_integer(ctx, a->offset); - if (a->bias) - args[num_addr++] = ac_to_integer(ctx, a->bias); - if (a->compare) - args[num_addr++] = ac_to_integer(ctx, a->compare); - if (a->derivs[0]) { - unsigned num_derivs = ac_num_derivs(a->dim); - for (unsigned i = 0; i < num_derivs; ++i) - args[num_addr++] = ac_to_integer(ctx, a->derivs[i]); - } - for (unsigned i = 0; i < num_coords; ++i) - args[num_addr++] = ac_to_integer(ctx, a->coords[i]); - if (a->lod) - args[num_addr++] = ac_to_integer(ctx, a->lod); - - unsigned pad_goal = util_next_power_of_two(num_addr); - while (num_addr < pad_goal) - args[num_addr++] = LLVMGetUndef(ctx->i32); - - addr = ac_build_gather_values(ctx, args, num_addr); - - unsigned num_args = 0; - if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) { - args[num_args++] = a->data[0]; - if (a->opcode == ac_image_atomic_cmpswap) - args[num_args++] = a->data[1]; - } - - unsigned coords_arg = num_args; - if (sample) - args[num_args++] = ac_to_float(ctx, addr); - else - args[num_args++] = ac_to_integer(ctx, addr); - - args[num_args++] = a->resource; - if (sample) - args[num_args++] = a->sampler; - if (!atomic) { - args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0); - if (sample) - args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0); - args[num_args++] = a->cache_policy & ac_glc ? ctx->i1true : ctx->i1false; - args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false; - args[num_args++] = ctx->i1false; /* lwe */ - args[num_args++] = LLVMConstInt(ctx->i1, da, 0); - } else { - args[num_args++] = ctx->i1false; /* r128 */ - args[num_args++] = LLVMConstInt(ctx->i1, da, 0); - args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false; - } - - switch (a->opcode) { - case ac_image_sample: - name = "llvm.amdgcn.image.sample"; - break; - case ac_image_gather4: - name = "llvm.amdgcn.image.gather4"; - break; - case ac_image_load: - name = "llvm.amdgcn.image.load"; - break; - case ac_image_load_mip: - name = "llvm.amdgcn.image.load.mip"; - break; - case ac_image_store: - name = "llvm.amdgcn.image.store"; - retty = ctx->voidt; - break; - case ac_image_store_mip: - name = "llvm.amdgcn.image.store.mip"; - retty = ctx->voidt; - break; - case ac_image_atomic: - case ac_image_atomic_cmpswap: - name = "llvm.amdgcn.image.atomic."; - retty = ctx->i32; - if (a->opcode == ac_image_atomic_cmpswap) { - atomic_subop = "cmpswap"; - } else { - atomic_subop = get_atomic_name(a->atomic); - } - break; - case ac_image_get_lod: - name = "llvm.amdgcn.image.getlod"; - break; - case ac_image_get_resinfo: - name = "llvm.amdgcn.image.getresinfo"; - break; - default: - unreachable("invalid image opcode"); - } - - ac_build_type_name_for_intr(LLVMTypeOf(args[coords_arg]), coords_type, - sizeof(coords_type)); - - if (atomic) { - snprintf(intr_name, sizeof(intr_name), "llvm.amdgcn.image.atomic.%s.%s", - atomic_subop, coords_type); - } else { - bool lod_suffix = - a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); - - snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32", - name, - a->compare ? ".c" : "", - a->bias ? ".b" : - lod_suffix ? ".l" : - a->derivs[0] ? ".d" : - a->level_zero ? ".lz" : "", - a->offset ? ".o" : "", - coords_type); - } - - LLVMValueRef result = - ac_build_intrinsic(ctx, intr_name, retty, args, num_args, - a->attributes); - if (!sample && retty == ctx->v4f32) { - result = LLVMBuildBitCast(ctx->builder, result, - ctx->v4i32, ""); - } - return result; -} - LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a) { @@ -1868,9 +1740,6 @@ (a->level_zero ? 1 : 0) + (a->derivs[0] ? 1 : 0) <= 1); - if (HAVE_LLVM < 0x0700) - return ac_build_image_opcode_llvm6(ctx, a); - if (a->opcode == ac_image_get_lod) { switch (dim) { case ac_image_1darray: @@ -2659,9 +2528,6 @@ LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) { - if (!HAVE_32BIT_POINTERS) - return ac_array_in_const_addr_space(elem_type); - return LLVMPointerType(LLVMArrayType(elem_type, 0), AC_ADDR_SPACE_CONST_32BIT); } @@ -2807,8 +2673,7 @@ ctx->flow_depth--; } -static void if_cond_emit(struct ac_llvm_context *ctx, LLVMValueRef cond, - int label_id) +void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id) { struct ac_llvm_flow *flow = push_flow(ctx); LLVMBasicBlockRef if_block; @@ -2825,7 +2690,7 @@ { LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, value, ctx->f32_0, ""); - if_cond_emit(ctx, cond, label_id); + ac_build_ifcc(ctx, cond, label_id); } void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, @@ -2834,7 +2699,7 @@ LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, ac_to_integer(ctx, value), ctx->i32_0, ""); - if_cond_emit(ctx, cond, label_id); + ac_build_ifcc(ctx, cond, label_id); } LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, @@ -3294,24 +3159,44 @@ } } -/* TODO: add inclusive and excluse scan functions for SI chip class. */ +/** + * \param maxprefix specifies that the result only needs to be correct for a + * prefix of this many threads + * + * TODO: add inclusive and excluse scan functions for SI chip class. + */ static LLVMValueRef -ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity) +ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity, + unsigned maxprefix) { LLVMValueRef result, tmp; result = src; + if (maxprefix <= 1) + return result; tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false); result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 2) + return result; tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false); result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 3) + return result; tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false); result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 4) + return result; tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false); result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 8) + return result; tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false); result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 16) + return result; tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); result = ac_build_alu_op(ctx, result, tmp, op); + if (maxprefix <= 32) + return result; tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); result = ac_build_alu_op(ctx, result, tmp, op); return result; @@ -3320,14 +3205,24 @@ LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) { - ac_build_optimization_barrier(ctx, &src); LLVMValueRef result; - LLVMValueRef identity = get_reduction_identity(ctx, op, - ac_get_type_size(LLVMTypeOf(src))); - result = LLVMBuildBitCast(ctx->builder, - ac_build_set_inactive(ctx, src, identity), - LLVMTypeOf(identity), ""); - result = ac_build_scan(ctx, op, result, identity); + + if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { + LLVMBuilderRef builder = ctx->builder; + src = LLVMBuildZExt(builder, src, ctx->i32, ""); + result = ac_build_ballot(ctx, src); + result = ac_build_mbcnt(ctx, result); + result = LLVMBuildAdd(builder, result, src, ""); + return result; + } + + ac_build_optimization_barrier(ctx, &src); + + LLVMValueRef identity = + get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); + result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), + LLVMTypeOf(identity), ""); + result = ac_build_scan(ctx, op, result, identity, 64); return ac_build_wwm(ctx, result); } @@ -3335,15 +3230,24 @@ LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op) { - ac_build_optimization_barrier(ctx, &src); LLVMValueRef result; - LLVMValueRef identity = get_reduction_identity(ctx, op, - ac_get_type_size(LLVMTypeOf(src))); - result = LLVMBuildBitCast(ctx->builder, - ac_build_set_inactive(ctx, src, identity), - LLVMTypeOf(identity), ""); + + if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) { + LLVMBuilderRef builder = ctx->builder; + src = LLVMBuildZExt(builder, src, ctx->i32, ""); + result = ac_build_ballot(ctx, src); + result = ac_build_mbcnt(ctx, result); + return result; + } + + ac_build_optimization_barrier(ctx, &src); + + LLVMValueRef identity = + get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src))); + result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity), + LLVMTypeOf(identity), ""); result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, false); - result = ac_build_scan(ctx, op, result, identity); + result = ac_build_scan(ctx, op, result, identity, 64); return ac_build_wwm(ctx, result); } @@ -3401,6 +3305,175 @@ } } +/** + * "Top half" of a scan that reduces per-wave values across an entire + * workgroup. + * + * The source value must be present in the highest lane of the wave, and the + * highest lane must be live. + */ +void +ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +{ + if (ws->maxwaves <= 1) + return; + + const LLVMValueRef i32_63 = LLVMConstInt(ctx->i32, 63, false); + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef tid = ac_get_thread_id(ctx); + LLVMValueRef tmp; + + tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, i32_63, ""); + ac_build_ifcc(ctx, tmp, 1000); + LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, "")); + ac_build_endif(ctx, 1000); +} + +/** + * "Bottom half" of a scan that reduces per-wave values across an entire + * workgroup. + * + * The caller must place a barrier between the top and bottom halves. + */ +void +ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +{ + const LLVMTypeRef type = LLVMTypeOf(ws->src); + const LLVMValueRef identity = + get_reduction_identity(ctx, ws->op, ac_get_type_size(type)); + + if (ws->maxwaves <= 1) { + ws->result_reduce = ws->src; + ws->result_inclusive = ws->src; + ws->result_exclusive = identity; + return; + } + assert(ws->maxwaves <= 32); + + LLVMBuilderRef builder = ctx->builder; + LLVMValueRef tid = ac_get_thread_id(ctx); + LLVMBasicBlockRef bbs[2]; + LLVMValueRef phivalues_scan[2]; + LLVMValueRef tmp, tmp2; + + bbs[0] = LLVMGetInsertBlock(builder); + phivalues_scan[0] = LLVMGetUndef(type); + + if (ws->enable_reduce) + tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, ""); + else if (ws->enable_inclusive) + tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, ""); + else + tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, ""); + ac_build_ifcc(ctx, tmp, 1001); + { + tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), ""); + + ac_build_optimization_barrier(ctx, &tmp); + + bbs[1] = LLVMGetInsertBlock(builder); + phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves); + } + ac_build_endif(ctx, 1001); + + const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs); + + if (ws->enable_reduce) { + tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, ""); + ws->result_reduce = ac_build_readlane(ctx, scan, tmp); + } + if (ws->enable_inclusive) + ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx); + if (ws->enable_exclusive) { + tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, ""); + tmp = ac_build_readlane(ctx, scan, tmp); + tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, ""); + ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, ""); + } +} + +/** + * Inclusive scan of a per-wave value across an entire workgroup. + * + * This implies an s_barrier instruction. + * + * Unlike ac_build_inclusive_scan, the caller \em must ensure that all threads + * of the workgroup are live. (This requirement cannot easily be relaxed in a + * useful manner because of the barrier in the algorithm.) + */ +void +ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +{ + ac_build_wg_wavescan_top(ctx, ws); + ac_build_s_barrier(ctx); + ac_build_wg_wavescan_bottom(ctx, ws); +} + +/** + * "Top half" of a scan that reduces per-thread values across an entire + * workgroup. + * + * All lanes must be active when this code runs. + */ +void +ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +{ + if (ws->enable_exclusive) { + ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op); + if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd) + ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, ""); + ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op); + } else { + ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op); + } + + bool enable_inclusive = ws->enable_inclusive; + bool enable_exclusive = ws->enable_exclusive; + ws->enable_inclusive = false; + ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; + ac_build_wg_wavescan_top(ctx, ws); + ws->enable_inclusive = enable_inclusive; + ws->enable_exclusive = enable_exclusive; +} + +/** + * "Bottom half" of a scan that reduces per-thread values across an entire + * workgroup. + * + * The caller must place a barrier between the top and bottom halves. + */ +void +ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +{ + bool enable_inclusive = ws->enable_inclusive; + bool enable_exclusive = ws->enable_exclusive; + ws->enable_inclusive = false; + ws->enable_exclusive = ws->enable_exclusive || enable_inclusive; + ac_build_wg_wavescan_bottom(ctx, ws); + ws->enable_inclusive = enable_inclusive; + ws->enable_exclusive = enable_exclusive; + + /* ws->result_reduce is already the correct value */ + if (ws->enable_inclusive) + ws->result_inclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->src, ws->op); + if (ws->enable_exclusive) + ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op); +} + +/** + * A scan that reduces per-thread values across an entire workgroup. + * + * The caller must ensure that all lanes are active when this code runs + * (WWM is insufficient!), because there is an implied barrier. + */ +void +ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) +{ + ac_build_wg_scan_top(ctx, ws); + ac_build_s_barrier(ctx); + ac_build_wg_scan_bottom(ctx, ws); +} + LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_build.h mesa-19.0.1/src/amd/common/ac_llvm_build.h --- mesa-18.3.3/src/amd/common/ac_llvm_build.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_llvm_build.h 2019-03-31 23:16:37.000000000 +0000 @@ -34,14 +34,12 @@ extern "C" { #endif -#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700) - enum { - AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */ + AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */ AC_ADDR_SPACE_GLOBAL = 1, - AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5, + AC_ADDR_SPACE_GDS = 2, AC_ADDR_SPACE_LDS = 3, - AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */ + AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */ AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */ }; @@ -128,6 +126,7 @@ LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t); LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v); +LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v); LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t); LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v); @@ -218,6 +217,14 @@ LLVMValueRef j); LLVMValueRef +ac_build_fs_interp_f16(struct ac_llvm_context *ctx, + LLVMValueRef llvm_chan, + LLVMValueRef attr_number, + LLVMValueRef params, + LLVMValueRef i, + LLVMValueRef j); + +LLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef parameter, LLVMValueRef llvm_chan, @@ -225,6 +232,11 @@ LLVMValueRef params); LLVMValueRef +ac_build_gep_ptr(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, + LLVMValueRef index); + +LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index); @@ -481,6 +493,7 @@ void ac_build_else(struct ac_llvm_context *ctx, int lable_id); void ac_build_endif(struct ac_llvm_context *ctx, int lable_id); void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id); +void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id); void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value, int lable_id); void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value, @@ -524,6 +537,42 @@ LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size); +/** + * Common arguments for a scan/reduce operation that accumulates per-wave + * values across an entire workgroup, while respecting the order of waves. + */ +struct ac_wg_scan { + bool enable_reduce; + bool enable_exclusive; + bool enable_inclusive; + nir_op op; + LLVMValueRef src; /* clobbered! */ + LLVMValueRef result_reduce; + LLVMValueRef result_exclusive; + LLVMValueRef result_inclusive; + LLVMValueRef extra; + LLVMValueRef waveidx; + LLVMValueRef numwaves; /* only needed for "reduce" operations */ + + /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */ + LLVMValueRef scratch; + unsigned maxwaves; +}; + +void +ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void +ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void +ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); + +void +ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void +ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); +void +ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws); + LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3); diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_helper.cpp mesa-19.0.1/src/amd/common/ac_llvm_helper.cpp --- mesa-18.3.3/src/amd/common/ac_llvm_helper.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_llvm_helper.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -39,9 +39,6 @@ #include #include -#if HAVE_LLVM < 0x0700 -#include "llvm/Support/raw_ostream.h" -#endif void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) { @@ -132,9 +129,7 @@ llvm::TargetMachine *TM = reinterpret_cast(tm); if (TM->addPassesToEmitFile(p->passmgr, p->ostream, -#if HAVE_LLVM >= 0x0700 nullptr, -#endif llvm::TargetMachine::CGFT_ObjectFile)) { fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); delete p; @@ -170,7 +165,5 @@ void ac_enable_global_isel(LLVMTargetMachineRef tm) { -#if HAVE_LLVM >= 0x0700 reinterpret_cast(tm)->setGlobalISel(true); -#endif } diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_util.c mesa-19.0.1/src/amd/common/ac_llvm_util.c --- mesa-18.3.3/src/amd/common/ac_llvm_util.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_llvm_util.c 2019-03-31 23:16:37.000000000 +0000 @@ -30,9 +30,7 @@ #include #include #include -#if HAVE_LLVM >= 0x0700 #include -#endif #include "c11/threads.h" #include "gallivm/lp_bld_misc.h" #include "util/u_math.h" @@ -132,11 +130,11 @@ case CHIP_RAVEN: return "gfx902"; case CHIP_VEGA12: - return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902"; + return "gfx904"; case CHIP_VEGA20: - return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902"; + return "gfx906"; case CHIP_RAVEN2: - return "gfx902"; /* TODO: use gfx909 when it's available */ + return HAVE_LLVM >= 0x0800 ? "gfx909" : "gfx902"; default: return ""; } @@ -153,7 +151,8 @@ LLVMTargetRef target = ac_get_llvm_target(triple); snprintf(features, sizeof(features), - "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s", + "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s", + HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling", tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "", tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "", tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "", @@ -302,7 +301,6 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, - bool okay_to_leak_target_library_info, enum radeon_family family, enum ac_target_machine_options tm_options) { @@ -323,12 +321,10 @@ goto fail; } - if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) { - compiler->target_library_info = - ac_create_target_library_info(triple); - if (!compiler->target_library_info) - goto fail; - } + compiler->target_library_info = + ac_create_target_library_info(triple); + if (!compiler->target_library_info) + goto fail; compiler->passmgr = ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR); @@ -346,11 +342,8 @@ { if (compiler->passmgr) LLVMDisposePassManager(compiler->passmgr); -#if HAVE_LLVM >= 0x0700 - /* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it there. */ if (compiler->target_library_info) ac_dispose_target_library_info(compiler->target_library_info); -#endif if (compiler->low_opt_tm) LLVMDisposeTargetMachine(compiler->low_opt_tm); if (compiler->tm) diff -Nru mesa-18.3.3/src/amd/common/ac_llvm_util.h mesa-19.0.1/src/amd/common/ac_llvm_util.h --- mesa-18.3.3/src/amd/common/ac_llvm_util.h 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_llvm_util.h 2019-03-31 23:16:37.000000000 +0000 @@ -134,7 +134,6 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, - bool okay_to_leak_target_library_info, enum radeon_family family, enum ac_target_machine_options tm_options); void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler); diff -Nru mesa-18.3.3/src/amd/common/ac_nir_to_llvm.c mesa-19.0.1/src/amd/common/ac_nir_to_llvm.c --- mesa-18.3.3/src/amd/common/ac_nir_to_llvm.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_nir_to_llvm.c 2019-03-31 23:16:37.000000000 +0000 @@ -270,8 +270,9 @@ { LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""); - return LLVMBuildSelect(ctx->builder, v, ac_to_integer(ctx, src1), - ac_to_integer(ctx, src2), ""); + return LLVMBuildSelect(ctx->builder, v, + ac_to_integer_or_pointer(ctx, src1), + ac_to_integer_or_pointer(ctx, src2), ""); } static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx, @@ -428,12 +429,12 @@ { LLVMValueRef result; - if (HAVE_LLVM < 0x0700) { + if (HAVE_LLVM >= 0x0800) { LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed); result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); } else { - /* FIXME: LLVM 7 returns incorrect result when count is 0. + /* FIXME: LLVM 7+ returns incorrect result when count is 0. * https://bugs.freedesktop.org/show_bug.cgi?id=107276 */ LLVMValueRef zero = ctx->i32_0; @@ -686,34 +687,34 @@ LLVMTypeOf(src[0]), ""), ""); break; - case nir_op_ilt: + case nir_op_ilt32: result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]); break; - case nir_op_ine: + case nir_op_ine32: result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]); break; - case nir_op_ieq: + case nir_op_ieq32: result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]); break; - case nir_op_ige: + case nir_op_ige32: result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]); break; - case nir_op_ult: + case nir_op_ult32: result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]); break; - case nir_op_uge: + case nir_op_uge32: result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]); break; - case nir_op_feq: + case nir_op_feq32: result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]); break; - case nir_op_fne: + case nir_op_fne32: result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]); break; - case nir_op_flt: + case nir_op_flt32: result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]); break; - case nir_op_fge: + case nir_op_fge32: result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]); break; case nir_op_fabs: @@ -915,7 +916,7 @@ else result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); break; - case nir_op_bcsel: + case nir_op_b32csel: result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]); break; case nir_op_find_lsb: @@ -940,16 +941,20 @@ src[1] = ac_to_integer(&ctx->ac, src[1]); result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); break; - case nir_op_b2f: + case nir_op_b2f16: + case nir_op_b2f32: + case nir_op_b2f64: result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; - case nir_op_f2b: + case nir_op_f2b32: result = emit_f2b(&ctx->ac, src[0]); break; - case nir_op_b2i: + case nir_op_b2i16: + case nir_op_b2i32: + case nir_op_b2i64: result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; - case nir_op_i2b: + case nir_op_i2b32: src[0] = ac_to_integer(&ctx->ac, src[0]); result = emit_i2b(&ctx->ac, src[0]); break; @@ -1095,7 +1100,7 @@ if (result) { assert(instr->dest.dest.is_ssa); - result = ac_to_integer(&ctx->ac, result); + result = ac_to_integer_or_pointer(&ctx->ac, result); ctx->ssa_defs[instr->dest.dest.ssa.index] = result; } } @@ -1458,6 +1463,30 @@ } } +static unsigned get_cache_policy(struct ac_nir_context *ctx, + enum gl_access_qualifier access, + bool may_store_unaligned, + bool writeonly_memory) +{ + unsigned cache_policy = 0; + + /* SI has a TC L1 bug causing corruption of 8bit/16bit stores. All + * store opcodes not aligned to a dword are affected. The only way to + * get unaligned stores is through shader images. + */ + if (((may_store_unaligned && ctx->ac.chip_class == SI) || + /* If this is write-only, don't keep data in L1 to prevent + * evicting L1 cache lines that may be needed by other + * instructions. + */ + writeonly_memory || + access & (ACCESS_COHERENT | ACCESS_VOLATILE))) { + cache_policy |= ac_glc; + } + + return cache_policy; +} + static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { @@ -1466,10 +1495,9 @@ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8; unsigned writemask = nir_intrinsic_write_mask(instr); enum gl_access_qualifier access = nir_intrinsic_access(instr); - LLVMValueRef glc = ctx->ac.i1false; - - if (access & (ACCESS_VOLATILE | ACCESS_COHERENT)) - glc = ctx->ac.i1true; + bool writeonly_memory = access & ACCESS_NON_READABLE; + unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory); + LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : ctx->ac.i1false; LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, get_src(ctx, instr->src[1]), true); @@ -1625,10 +1653,8 @@ int elem_size_bytes = instr->dest.ssa.bit_size / 8; int num_components = instr->num_components; enum gl_access_qualifier access = nir_intrinsic_access(instr); - LLVMValueRef glc = ctx->ac.i1false; - - if (access & (ACCESS_VOLATILE | ACCESS_COHERENT)) - glc = ctx->ac.i1true; + unsigned cache_policy = get_cache_policy(ctx, access, false, false); + LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : ctx->ac.i1false; LLVMValueRef offset = get_src(ctx, instr->src[1]); LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, @@ -1641,7 +1667,7 @@ LLVMValueRef results[4]; for (int i = 0; i < num_components;) { int num_elems = num_components - i; - if (elem_size_bytes < 4) + if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0) num_elems = 1; if (num_elems * elem_size_bytes > 16) num_elems = 16 / elem_size_bytes; @@ -1858,23 +1884,36 @@ nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); LLVMValueRef values[8]; - int idx = var->data.driver_location; + int idx = 0; int ve = instr->dest.ssa.num_components; - unsigned comp = var->data.location_frac; + unsigned comp = 0; LLVMValueRef indir_index; LLVMValueRef ret; unsigned const_index; - unsigned stride = var->data.compact ? 1 : 4; - bool vs_in = ctx->stage == MESA_SHADER_VERTEX && - var->data.mode == nir_var_shader_in; - - get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL, - &const_index, &indir_index); + unsigned stride = 4; + int mode = nir_var_mem_shared; + + if (var) { + bool vs_in = ctx->stage == MESA_SHADER_VERTEX && + var->data.mode == nir_var_shader_in; + idx = var->data.driver_location; + comp = var->data.location_frac; + mode = var->data.mode; + + get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL, + &const_index, &indir_index); + + if (var->data.compact) { + stride = 1; + const_index += comp; + comp = 0; + } + } if (instr->dest.ssa.bit_size == 64) ve *= 2; - switch (var->data.mode) { + switch (mode) { case nir_var_shader_in: if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) { @@ -1911,7 +1950,7 @@ values[chan] = ctx->abi->inputs[idx + chan + const_index * stride]; } break; - case nir_var_local: + case nir_var_function_temp: for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( @@ -1929,7 +1968,7 @@ } } break; - case nir_var_shared: { + case nir_var_mem_shared: { LLVMValueRef address = get_src(ctx, instr->src[0]); LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); return LLVMBuildBitCast(ctx->ac.builder, val, @@ -1971,18 +2010,28 @@ visit_store_var(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); + nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); LLVMValueRef temp_ptr, value; - int idx = var->data.driver_location; - unsigned comp = var->data.location_frac; + int idx = 0; + unsigned comp = 0; LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1])); int writemask = instr->const_index[0]; LLVMValueRef indir_index; unsigned const_index; - get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), false, - NULL, NULL, &const_index, &indir_index); + if (var) { + get_deref_offset(ctx, deref, false, + NULL, NULL, &const_index, &indir_index); + idx = var->data.driver_location; + comp = var->data.location_frac; + + if (var->data.compact) { + const_index += comp; + comp = 0; + } + } if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) { @@ -1995,7 +2044,7 @@ writemask = writemask << comp; - switch (var->data.mode) { + switch (deref->mode) { case nir_var_shader_out: if (ctx->stage == MESA_SHADER_TESS_CTRL) { @@ -2004,8 +2053,8 @@ unsigned const_index = 0; const bool is_patch = var->data.patch; - get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), - false, NULL, is_patch ? NULL : &vertex_index, + get_deref_offset(ctx, deref, false, NULL, + is_patch ? NULL : &vertex_index, &const_index, &indir_index); ctx->abi->store_tcs_outputs(ctx->abi, var, @@ -2043,7 +2092,7 @@ } } break; - case nir_var_local: + case nir_var_function_temp: for (unsigned chan = 0; chan < 8; chan++) { if (!(writemask & (1 << chan))) continue; @@ -2068,11 +2117,11 @@ } } break; - case nir_var_shared: { + case nir_var_mem_shared: { int writemask = instr->const_index[0]; LLVMValueRef address = get_src(ctx, instr->src[0]); LLVMValueRef val = get_src(ctx, instr->src[1]); - if (util_is_power_of_two_nonzero(writemask)) { + if (writemask == (1u << ac_get_llvm_num_components(val)) - 1) { val = LLVMBuildBitCast( ctx->ac.builder, val, LLVMGetElementType(LLVMTypeOf(address)), ""); @@ -2198,10 +2247,10 @@ return sample_index; } -static nir_variable *get_image_variable(const nir_intrinsic_instr *instr) +static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr) { assert(instr->src[0].is_ssa); - return nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); + return nir_instr_as_deref(instr->src[0].ssa->parent_instr); } static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx, @@ -2216,7 +2265,7 @@ const nir_intrinsic_instr *instr, struct ac_image_args *args) { - const struct glsl_type *type = glsl_without_array(get_image_variable(instr)->type); + const struct glsl_type *type = get_image_deref(instr)->type; LLVMValueRef src0 = get_src(ctx, instr->src[1]); LLVMValueRef masks[] = { @@ -2235,7 +2284,7 @@ bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D; count = image_type_to_components_count(dim, is_array); - if (is_ms) { + if (is_ms && instr->intrinsic == nir_intrinsic_image_deref_load) { LLVMValueRef fmask_load_address[3]; int chan; @@ -2325,10 +2374,13 @@ const nir_intrinsic_instr *instr) { LLVMValueRef res; - const nir_variable *var = get_image_variable(instr); - const struct glsl_type *type = var->type; + const nir_deref_instr *image_deref = get_image_deref(instr); + const struct glsl_type *type = image_deref->type; + const nir_variable *var = nir_deref_instr_get_variable(image_deref); + struct ac_image_args args = {}; - type = glsl_without_array(type); + args.cache_policy = + get_cache_policy(ctx, var->data.image.access, false, false); const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); if (dim == GLSL_SAMPLER_DIM_BUF) { @@ -2340,16 +2392,16 @@ vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); - /* TODO: set "glc" and "can_speculate" when OpenGL needs it. */ + /* TODO: set "can_speculate" when OpenGL needs it. */ res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels, - false, false); + !!(args.cache_policy & ac_glc), + false); res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); res = ac_to_integer(&ctx->ac, res); } else { - struct ac_image_args args = {}; args.opcode = ac_image_load; get_image_coords(ctx, instr, &args); args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false); @@ -2357,8 +2409,6 @@ glsl_sampler_type_is_array(type)); args.dmask = 15; args.attributes = AC_FUNC_ATTR_READONLY; - if (var->data.image.access & (ACCESS_VOLATILE | ACCESS_COHERENT)) - args.cache_policy |= ac_glc; res = ac_build_image_opcode(&ctx->ac, &args); } @@ -2369,13 +2419,15 @@ nir_intrinsic_instr *instr) { LLVMValueRef params[8]; - const nir_variable *var = get_image_variable(instr); - const struct glsl_type *type = glsl_without_array(var->type); + const nir_deref_instr *image_deref = get_image_deref(instr); + const struct glsl_type *type = image_deref->type; + const nir_variable *var = nir_deref_instr_get_variable(image_deref); const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); - LLVMValueRef glc = ctx->ac.i1false; - bool force_glc = ctx->ac.chip_class == SI; - if (force_glc) - glc = ctx->ac.i1true; + bool writeonly_memory = var->data.image.access & ACCESS_NON_READABLE; + struct ac_image_args args = {}; + + args.cache_policy = get_cache_policy(ctx, var->data.image.access, true, + writeonly_memory); if (dim == GLSL_SAMPLER_DIM_BUF) { char name[48]; @@ -2393,14 +2445,19 @@ ctx->ac.i32_0, ""); /* vindex */ params[3] = ctx->ac.i32_0; /* voffset */ snprintf(name, sizeof(name), "%s.%s", - "llvm.amdgcn.buffer.store.format", + HAVE_LLVM >= 0x800 ? "llvm.amdgcn.struct.buffer.store.format" + : "llvm.amdgcn.buffer.store.format", types[CLAMP(src_channels, 1, 3) - 1]); - params[4] = glc; /* glc */ - params[5] = ctx->ac.i1false; /* slc */ + if (HAVE_LLVM >= 0x800) { + params[4] = ctx->ac.i32_0; /* soffset */ + params[5] = (args.cache_policy & ac_glc) ? ctx->ac.i32_1 : ctx->ac.i32_0; + } else { + params[4] = LLVMConstInt(ctx->ac.i1, !!(args.cache_policy & ac_glc), 0); + params[5] = ctx->ac.i1false; /* slc */ + } ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6, 0); } else { - struct ac_image_args args = {}; args.opcode = ac_image_store; args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); get_image_coords(ctx, instr, &args); @@ -2408,8 +2465,6 @@ args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type)); args.dmask = 15; - if (force_glc || (var->data.image.access & (ACCESS_VOLATILE | ACCESS_COHERENT))) - args.cache_policy |= ac_glc; ac_build_image_opcode(&ctx->ac, &args); } @@ -2421,13 +2476,12 @@ { LLVMValueRef params[7]; int param_count = 0; - const nir_variable *var = get_image_variable(instr); + const struct glsl_type *type = get_image_deref(instr)->type; bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap; const char *atomic_name; - char intrinsic_name[41]; + char intrinsic_name[64]; enum ac_atomic_op atomic_subop; - const struct glsl_type *type = glsl_without_array(var->type); MAYBE_UNUSED int length; bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT; @@ -2478,10 +2532,18 @@ params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, ""); /* vindex */ params[param_count++] = ctx->ac.i32_0; /* voffset */ - params[param_count++] = ctx->ac.i1false; /* slc */ + if (HAVE_LLVM >= 0x800) { + params[param_count++] = ctx->ac.i32_0; /* soffset */ + params[param_count++] = ctx->ac.i32_0; /* slc */ + + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name); + } else { + params[param_count++] = ctx->ac.i1false; /* slc */ - length = snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.buffer.atomic.%s", atomic_name); + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.buffer.atomic.%s", atomic_name); + } assert(length < sizeof(intrinsic_name)); return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, @@ -2505,8 +2567,7 @@ static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { - const nir_variable *var = get_image_variable(instr); - const struct glsl_type *type = glsl_without_array(var->type); + const struct glsl_type *type = get_image_deref(instr)->type; struct ac_image_args args = { 0 }; args.dim = get_ac_sampler_dim(&ctx->ac, glsl_get_sampler_dim(type), @@ -2524,8 +2585,7 @@ const nir_intrinsic_instr *instr) { LLVMValueRef res; - const nir_variable *var = get_image_variable(instr); - const struct glsl_type *type = glsl_without_array(var->type); + const struct glsl_type *type = get_image_deref(instr)->type; if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) return get_buffer_size(ctx, get_image_descriptor(ctx, instr, AC_DESC_BUFFER, false), true); @@ -2878,12 +2938,10 @@ } - LLVMValueRef array_idx = ctx->ac.i32_0; + LLVMValueRef attrib_idx = ctx->ac.i32_0; while(deref_instr->deref_type != nir_deref_type_var) { if (deref_instr->deref_type == nir_deref_type_array) { - unsigned array_size = glsl_get_aoa_size(deref_instr->type); - if (!array_size) - array_size = 1; + unsigned array_size = glsl_count_attribute_slots(deref_instr->type, false); LLVMValueRef offset; nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index); @@ -2896,23 +2954,26 @@ LLVMConstInt(ctx->ac.i32, array_size, false), ""); } - array_idx = LLVMBuildAdd(ctx->ac.builder, array_idx, offset, ""); + attrib_idx = LLVMBuildAdd(ctx->ac.builder, attrib_idx, offset, ""); + deref_instr = nir_src_as_deref(deref_instr->parent); + } else if (deref_instr->deref_type == nir_deref_type_struct) { + LLVMValueRef offset; + unsigned sidx = deref_instr->strct.index; deref_instr = nir_src_as_deref(deref_instr->parent); + offset = LLVMConstInt(ctx->ac.i32, glsl_get_record_location_offset(deref_instr->type, sidx), false); + attrib_idx = LLVMBuildAdd(ctx->ac.builder, attrib_idx, offset, ""); } else { unreachable("Unsupported deref type"); } } - unsigned input_array_size = glsl_get_aoa_size(var->type); - if (!input_array_size) - input_array_size = 1; - + unsigned attrib_size = glsl_count_attribute_slots(var->type, false); for (chan = 0; chan < 4; chan++) { - LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, input_array_size)); + LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, attrib_size)); LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - for (unsigned idx = 0; idx < input_array_size; ++idx) { + for (unsigned idx = 0; idx < attrib_size; ++idx) { LLVMValueRef v, attr_number; attr_number = LLVMConstInt(ctx->ac.i32, input_base + idx, false); @@ -2935,7 +2996,7 @@ LLVMConstInt(ctx->ac.i32, idx, false), ""); } - result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, array_idx, ""); + result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, attrib_idx, ""); } return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components, @@ -3032,7 +3093,8 @@ ctx->abi->frag_pos[2], ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3]) }; - result = ac_build_gather_values(&ctx->ac, values, 4); + result = ac_to_integer(&ctx->ac, + ac_build_gather_values(&ctx->ac, values, 4)); break; } case nir_intrinsic_load_front_face: @@ -3257,6 +3319,27 @@ } } +static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx, + unsigned base_index, + unsigned constant_index, + LLVMValueRef dynamic_index) +{ + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0); + LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index, + LLVMConstInt(ctx->ac.i32, constant_index, 0), ""); + + /* Bindless uniforms are 64bit so multiple index by 8 */ + index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), ""); + offset = LLVMBuildAdd(ctx->ac.builder, offset, index, ""); + + LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0); + + LLVMValueRef ret = ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset, + NULL, 0, false, false, true, true); + + return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, ""); +} + static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr, enum ac_descriptor_type desc_type, @@ -3275,30 +3358,49 @@ base_index = tex_instr->sampler_index; } else { while(deref_instr->deref_type != nir_deref_type_var) { - unsigned array_size = glsl_get_aoa_size(deref_instr->type); - if (!array_size) - array_size = 1; - - assert(deref_instr->deref_type == nir_deref_type_array); - nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index); - if (const_value) { - constant_index += array_size * const_value->u32[0]; + if (deref_instr->deref_type == nir_deref_type_array) { + unsigned array_size = glsl_get_aoa_size(deref_instr->type); + if (!array_size) + array_size = 1; + + nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index); + if (const_value) { + constant_index += array_size * const_value->u32[0]; + } else { + LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index); + + indirect = LLVMBuildMul(ctx->ac.builder, indirect, + LLVMConstInt(ctx->ac.i32, array_size, false), ""); + + if (!index) + index = indirect; + else + index = LLVMBuildAdd(ctx->ac.builder, index, indirect, ""); + } + + deref_instr = nir_src_as_deref(deref_instr->parent); + } else if (deref_instr->deref_type == nir_deref_type_struct) { + unsigned sidx = deref_instr->strct.index; + deref_instr = nir_src_as_deref(deref_instr->parent); + constant_index += glsl_get_record_location_offset(deref_instr->type, sidx); } else { - LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index); - - indirect = LLVMBuildMul(ctx->ac.builder, indirect, - LLVMConstInt(ctx->ac.i32, array_size, false), ""); - - if (!index) - index = indirect; - else - index = LLVMBuildAdd(ctx->ac.builder, index, indirect, ""); + unreachable("Unsupported deref type"); } - - deref_instr = nir_src_as_deref(deref_instr->parent); } descriptor_set = deref_instr->var->data.descriptor_set; - base_index = deref_instr->var->data.binding; + + if (deref_instr->var->data.bindless) { + /* For now just assert on unhandled variable types */ + assert(deref_instr->var->data.mode == nir_var_uniform); + + base_index = deref_instr->var->data.driver_location; + bindless = true; + + index = index ? index : ctx->ac.i32_0; + index = get_bindless_index_from_uniform(ctx, base_index, + constant_index, index); + } else + base_index = deref_instr->var->data.binding; } return ctx->abi->load_sampler_desc(ctx->abi, @@ -3731,10 +3833,77 @@ } } +static LLVMTypeRef +glsl_base_to_llvm_type(struct ac_llvm_context *ac, + enum glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SUBROUTINE: + return ac->i32; + case GLSL_TYPE_INT16: + case GLSL_TYPE_UINT16: + return ac->i16; + case GLSL_TYPE_FLOAT: + return ac->f32; + case GLSL_TYPE_FLOAT16: + return ac->f16; + case GLSL_TYPE_INT64: + case GLSL_TYPE_UINT64: + return ac->i64; + case GLSL_TYPE_DOUBLE: + return ac->f64; + default: + unreachable("unknown GLSL type"); + } +} + +static LLVMTypeRef +glsl_to_llvm_type(struct ac_llvm_context *ac, + const struct glsl_type *type) +{ + if (glsl_type_is_scalar(type)) { + return glsl_base_to_llvm_type(ac, glsl_get_base_type(type)); + } + + if (glsl_type_is_vector(type)) { + return LLVMVectorType( + glsl_base_to_llvm_type(ac, glsl_get_base_type(type)), + glsl_get_vector_elements(type)); + } + + if (glsl_type_is_matrix(type)) { + return LLVMArrayType( + glsl_to_llvm_type(ac, glsl_get_column_type(type)), + glsl_get_matrix_columns(type)); + } + + if (glsl_type_is_array(type)) { + return LLVMArrayType( + glsl_to_llvm_type(ac, glsl_get_array_element(type)), + glsl_get_length(type)); + } + + assert(glsl_type_is_struct(type)); + + LLVMTypeRef member_types[glsl_get_length(type)]; + + for (unsigned i = 0; i < glsl_get_length(type); i++) { + member_types[i] = + glsl_to_llvm_type(ac, + glsl_get_struct_field(type, i)); + } + + return LLVMStructTypeInContext(ac->context, member_types, + glsl_get_length(type), false); +} + static void visit_deref(struct ac_nir_context *ctx, nir_deref_instr *instr) { - if (instr->mode != nir_var_shared) + if (instr->mode != nir_var_mem_shared) return; LLVMValueRef result = NULL; @@ -3752,6 +3921,27 @@ result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index)); break; + case nir_deref_type_ptr_as_array: + result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), + get_src(ctx, instr->arr.index)); + break; + case nir_deref_type_cast: { + result = get_src(ctx, instr->parent); + + LLVMTypeRef pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type); + LLVMTypeRef type = LLVMPointerType(pointee_type, AC_ADDR_SPACE_LDS); + + if (LLVMTypeOf(result) != type) { + if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) { + result = LLVMBuildBitCast(ctx->ac.builder, result, + type, ""); + } else { + result = LLVMBuildIntToPtr(ctx->ac.builder, result, + type, ""); + } + } + break; + } default: unreachable("Unhandled deref_instr deref type"); } @@ -3900,68 +4090,6 @@ } } -static LLVMTypeRef -glsl_base_to_llvm_type(struct ac_llvm_context *ac, - enum glsl_base_type type) -{ - switch (type) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_SUBROUTINE: - return ac->i32; - case GLSL_TYPE_FLOAT: /* TODO handle mediump */ - return ac->f32; - case GLSL_TYPE_INT64: - case GLSL_TYPE_UINT64: - return ac->i64; - case GLSL_TYPE_DOUBLE: - return ac->f64; - default: - unreachable("unknown GLSL type"); - } -} - -static LLVMTypeRef -glsl_to_llvm_type(struct ac_llvm_context *ac, - const struct glsl_type *type) -{ - if (glsl_type_is_scalar(type)) { - return glsl_base_to_llvm_type(ac, glsl_get_base_type(type)); - } - - if (glsl_type_is_vector(type)) { - return LLVMVectorType( - glsl_base_to_llvm_type(ac, glsl_get_base_type(type)), - glsl_get_vector_elements(type)); - } - - if (glsl_type_is_matrix(type)) { - return LLVMArrayType( - glsl_to_llvm_type(ac, glsl_get_column_type(type)), - glsl_get_matrix_columns(type)); - } - - if (glsl_type_is_array(type)) { - return LLVMArrayType( - glsl_to_llvm_type(ac, glsl_get_array_element(type)), - glsl_get_length(type)); - } - - assert(glsl_type_is_struct(type)); - - LLVMTypeRef member_types[glsl_get_length(type)]; - - for (unsigned i = 0; i < glsl_get_length(type); i++) { - member_types[i] = - glsl_to_llvm_type(ac, - glsl_get_struct_field(type, i)); - } - - return LLVMStructTypeInContext(ac->context, member_types, - glsl_get_length(type), false); -} - static void setup_locals(struct ac_nir_context *ctx, struct nir_function *func) @@ -4031,13 +4159,13 @@ setup_locals(&ctx, func); - if (nir->info.stage == MESA_SHADER_COMPUTE) + if (gl_shader_stage_is_compute(nir->info.stage)) setup_shared(&ctx, nir); visit_cf_list(&ctx, &func->impl->body); phi_post_pass(&ctx); - if (nir->info.stage != MESA_SHADER_COMPUTE) + if (!gl_shader_stage_is_compute(nir->info.stage)) ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS, ctx.abi->outputs); @@ -4080,7 +4208,168 @@ * See the following thread for more details of the problem: * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html */ - indirect_mask |= nir_var_local; + indirect_mask |= nir_var_function_temp; nir_lower_indirect_derefs(nir, indirect_mask); } + +static unsigned +get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin) +{ + if (intrin->intrinsic != nir_intrinsic_store_deref) + return 0; + + nir_variable *var = + nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0])); + + if (var->data.mode != nir_var_shader_out) + return 0; + + unsigned writemask = 0; + const int location = var->data.location; + unsigned first_component = var->data.location_frac; + unsigned num_comps = intrin->dest.ssa.num_components; + + if (location == VARYING_SLOT_TESS_LEVEL_INNER) + writemask = ((1 << (num_comps + 1)) - 1) << first_component; + else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) + writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4; + + return writemask; +} + +static void +scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask, + unsigned *cond_block_tf_writemask, + bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf) +{ + switch (cf_node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(cf_node); + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_barrier) { + + /* If we find a barrier in nested control flow put this in the + * too hard basket. In GLSL this is not possible but it is in + * SPIR-V. + */ + if (is_nested_cf) { + *tessfactors_are_def_in_all_invocs = false; + return; + } + + /* The following case must be prevented: + * gl_TessLevelInner = ...; + * barrier(); + * if (gl_InvocationID == 1) + * gl_TessLevelInner = ...; + * + * If you consider disjoint code segments separated by barriers, each + * such segment that writes tess factor channels should write the same + * channels in all codepaths within that segment. + */ + if (upper_block_tf_writemask || cond_block_tf_writemask) { + /* Accumulate the result: */ + *tessfactors_are_def_in_all_invocs &= + !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask)); + + /* Analyze the next code segment from scratch. */ + *upper_block_tf_writemask = 0; + *cond_block_tf_writemask = 0; + } + } else + *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin); + } + + break; + } + case nir_cf_node_if: { + unsigned then_tessfactor_writemask = 0; + unsigned else_tessfactor_writemask = 0; + + nir_if *if_stmt = nir_cf_node_as_if(cf_node); + foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) { + scan_tess_ctrl(nested_node, &then_tessfactor_writemask, + cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) { + scan_tess_ctrl(nested_node, &else_tessfactor_writemask, + cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + if (then_tessfactor_writemask || else_tessfactor_writemask) { + /* If both statements write the same tess factor channels, + * we can say that the upper block writes them too. + */ + *upper_block_tf_writemask |= then_tessfactor_writemask & + else_tessfactor_writemask; + *cond_block_tf_writemask |= then_tessfactor_writemask | + else_tessfactor_writemask; + } + + break; + } + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(cf_node); + foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) { + scan_tess_ctrl(nested_node, cond_block_tf_writemask, + cond_block_tf_writemask, + tessfactors_are_def_in_all_invocs, true); + } + + break; + } + default: + unreachable("unknown cf node type"); + } +} + +bool +ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_TESS_CTRL); + + /* The pass works as follows: + * If all codepaths write tess factors, we can say that all + * invocations define tess factors. + * + * Each tess factor channel is tracked separately. + */ + unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */ + unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */ + + /* Initial value = true. Here the pass will accumulate results from + * multiple segments surrounded by barriers. If tess factors aren't + * written at all, it's a shader bug and we don't care if this will be + * true. + */ + bool tessfactors_are_def_in_all_invocs = true; + + nir_foreach_function(function, nir) { + if (function->impl) { + foreach_list_typed(nir_cf_node, node, node, &function->impl->body) { + scan_tess_ctrl(node, &main_block_tf_writemask, + &cond_block_tf_writemask, + &tessfactors_are_def_in_all_invocs, + false); + } + } + } + + /* Accumulate the result for the last code segment separated by a + * barrier. + */ + if (main_block_tf_writemask || cond_block_tf_writemask) { + tessfactors_are_def_in_all_invocs &= + !(cond_block_tf_writemask & ~main_block_tf_writemask); + } + + return tessfactors_are_def_in_all_invocs; +} diff -Nru mesa-18.3.3/src/amd/common/ac_nir_to_llvm.h mesa-19.0.1/src/amd/common/ac_nir_to_llvm.h --- mesa-18.3.3/src/amd/common/ac_nir_to_llvm.h 2018-03-13 20:41:43.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_nir_to_llvm.h 2019-03-31 23:16:37.000000000 +0000 @@ -47,6 +47,8 @@ void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); +bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir); + void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, struct nir_shader *nir); diff -Nru mesa-18.3.3/src/amd/common/ac_surface.c mesa-19.0.1/src/amd/common/ac_surface.c --- mesa-18.3.3/src/amd/common/ac_surface.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_surface.c 2019-03-31 23:16:37.000000000 +0000 @@ -27,7 +27,7 @@ #include "ac_surface.h" #include "amd_family.h" -#include "addrlib/amdgpu_asic_addr.h" +#include "addrlib/src/amdgpu_asic_addr.h" #include "ac_gpu_info.h" #include "util/macros.h" #include "util/u_atomic.h" @@ -39,7 +39,7 @@ #include #include -#include "addrlib/addrinterface.h" +#include "addrlib/inc/addrinterface.h" #ifndef CIASICIDGFXENGINE_SOUTHERNISLAND #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A @@ -1038,8 +1038,7 @@ static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, - bool is_fmask, unsigned flags, - AddrSwizzleMode *swizzle_mode) + bool is_fmask, AddrSwizzleMode *swizzle_mode) { ADDR_E_RETURNCODE ret; ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0}; @@ -1064,16 +1063,6 @@ sin.numSamples = in->numSamples; sin.numFrags = in->numFrags; - if (flags & RADEON_SURF_SCANOUT) { - sin.preferredSwSet.sw_D = 1; - /* Raven only allows S for displayable surfaces with < 64 bpp, so - * allow it as fallback */ - sin.preferredSwSet.sw_S = 1; - } else if (in->flags.depth || in->flags.stencil || is_fmask) - sin.preferredSwSet.sw_Z = 1; - else - sin.preferredSwSet.sw_S = 1; - if (is_fmask) { sin.flags.display = 0; sin.flags.color = 0; @@ -1273,8 +1262,7 @@ fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT); ret = gfx9_get_preferred_swizzle_mode(addrlib, in, - true, surf->flags, - &fin.swizzleMode); + true, &fin.swizzleMode); if (ret != ADDR_OK) return ret; @@ -1424,11 +1412,13 @@ AddrSurfInfoIn.bpp = surf->bpe * 8; } - AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); + bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); + AddrSurfInfoIn.flags.color = is_color_surface && + !(surf->flags & RADEON_SURF_NO_RENDER_TARGET); AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; AddrSurfInfoIn.flags.display = get_display_flag(config, surf); /* flags.texture currently refers to TC-compatible HTILE */ - AddrSurfInfoIn.flags.texture = AddrSurfInfoIn.flags.color || + AddrSurfInfoIn.flags.texture = is_color_surface || surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE; AddrSurfInfoIn.flags.opt4space = 1; @@ -1476,8 +1466,7 @@ } r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, - false, surf->flags, - &AddrSurfInfoIn.swizzleMode); + false, &AddrSurfInfoIn.swizzleMode); if (r) return r; break; @@ -1513,8 +1502,7 @@ if (!AddrSurfInfoIn.flags.depth) { r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, - false, surf->flags, - &AddrSurfInfoIn.swizzleMode); + false, &AddrSurfInfoIn.swizzleMode); if (r) return r; } else @@ -1530,10 +1518,12 @@ /* Query whether the surface is displayable. */ bool displayable = false; - r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode, + if (!config->is_3d && !config->is_cube) { + r = Addr2IsValidDisplaySwizzleMode(addrlib, surf->u.gfx9.surf.swizzle_mode, surf->bpe * 8, &displayable); - if (r) - return r; + if (r) + return r; + } surf->is_displayable = displayable; switch (surf->u.gfx9.surf.swizzle_mode) { @@ -1594,10 +1584,6 @@ assert(0); } - /* Temporary workaround to prevent VM faults and hangs. */ - if (info->family == CHIP_VEGA12) - surf->fmask_size *= 8; - return 0; } diff -Nru mesa-18.3.3/src/amd/common/ac_surface.h mesa-19.0.1/src/amd/common/ac_surface.h --- mesa-18.3.3/src/amd/common/ac_surface.h 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/common/ac_surface.h 2019-03-31 23:16:37.000000000 +0000 @@ -68,6 +68,7 @@ #define RADEON_SURF_IMPORTED (1 << 24) #define RADEON_SURF_OPTIMIZE_FOR_SPACE (1 << 25) #define RADEON_SURF_SHAREABLE (1 << 26) +#define RADEON_SURF_NO_RENDER_TARGET (1 << 27) struct legacy_surf_level { uint64_t offset; diff -Nru mesa-18.3.3/src/amd/common/sid.h mesa-19.0.1/src/amd/common/sid.h --- mesa-18.3.3/src/amd/common/sid.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/common/sid.h 2019-03-31 23:16:37.000000000 +0000 @@ -133,11 +133,11 @@ #define S_370_WR_ONE_ADDR(x) (((unsigned)(x) & 0x1) << 16) #define S_370_DST_SEL(x) (((unsigned)(x) & 0xf) << 8) #define V_370_MEM_MAPPED_REGISTER 0 -#define V_370_MEMORY_SYNC 1 +#define V_370_MEM_GRBM 1 /* sync across GRBM */ #define V_370_TC_L2 2 #define V_370_GDS 3 #define V_370_RESERVED 4 -#define V_370_MEM_ASYNC 5 +#define V_370_MEM 5 /* not on SI */ #define R_371_DST_ADDR_LO 0x371 #define R_372_DST_ADDR_HI 0x372 #define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38 @@ -211,12 +211,14 @@ #define PKT3_SET_SH_REG 0x76 #define PKT3_SET_SH_REG_OFFSET 0x77 #define PKT3_SET_UCONFIG_REG 0x79 /* new for CIK */ +#define PKT3_SET_UCONFIG_REG_INDEX 0x7A /* new for GFX9, CP ucode version >= 26 */ #define PKT3_LOAD_CONST_RAM 0x80 #define PKT3_WRITE_CONST_RAM 0x81 #define PKT3_DUMP_CONST_RAM 0x83 #define PKT3_INCREMENT_CE_COUNTER 0x84 #define PKT3_INCREMENT_DE_COUNTER 0x85 #define PKT3_WAIT_ON_CE_COUNTER 0x86 +#define PKT3_LOAD_CONTEXT_REG 0x9F /* new for VI */ #define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) @@ -2435,6 +2437,9 @@ #define S_008F30_FILTER_MODE(x) (((unsigned)(x) & 0x03) << 29) #define G_008F30_FILTER_MODE(x) (((x) >> 29) & 0x03) #define C_008F30_FILTER_MODE 0x9FFFFFFF +#define V_008F30_SQ_IMG_FILTER_MODE_BLEND 0x00 +#define V_008F30_SQ_IMG_FILTER_MODE_MIN 0x01 +#define V_008F30_SQ_IMG_FILTER_MODE_MAX 0x02 /* VI */ #define S_008F30_COMPAT_MODE(x) (((unsigned)(x) & 0x1) << 31) #define G_008F30_COMPAT_MODE(x) (((x) >> 31) & 0x1) diff -Nru mesa-18.3.3/src/amd/common/sid_tables.py mesa-19.0.1/src/amd/common/sid_tables.py --- mesa-18.3.3/src/amd/common/sid_tables.py 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/common/sid_tables.py 2019-03-31 23:16:37.000000000 +0000 @@ -1,4 +1,4 @@ -from __future__ import print_function +from __future__ import print_function, division, unicode_literals CopyRight = ''' /* diff -Nru mesa-18.3.3/src/amd/Makefile.addrlib.am mesa-19.0.1/src/amd/Makefile.addrlib.am --- mesa-18.3.3/src/amd/Makefile.addrlib.am 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/Makefile.addrlib.am 2019-03-31 23:16:37.000000000 +0000 @@ -26,12 +26,11 @@ -I$(top_srcdir)/src/ \ -I$(top_srcdir)/include \ -I$(srcdir)/common \ - -I$(srcdir)/addrlib \ - -I$(srcdir)/addrlib/core \ - -I$(srcdir)/addrlib/inc/chip/gfx9 \ - -I$(srcdir)/addrlib/inc/chip/r800 \ - -I$(srcdir)/addrlib/gfx9/chip \ - -I$(srcdir)/addrlib/r800/chip + -I$(srcdir)/addrlib/inc \ + -I$(srcdir)/addrlib/src \ + -I$(srcdir)/addrlib/src/core \ + -I$(srcdir)/addrlib/src/chip/gfx9 \ + -I$(srcdir)/addrlib/src/chip/r800 addrlib_libamdgpu_addrlib_la_CXXFLAGS = \ $(VISIBILITY_CXXFLAGS) $(CXX11_CXXFLAGS) diff -Nru mesa-18.3.3/src/amd/Makefile.sources mesa-19.0.1/src/amd/Makefile.sources --- mesa-18.3.3/src/amd/Makefile.sources 2018-03-13 20:41:43.000000000 +0000 +++ mesa-19.0.1/src/amd/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -5,35 +5,33 @@ common/amd_kernel_code_t.h ADDRLIB_FILES = \ - addrlib/addrinterface.cpp \ - addrlib/addrinterface.h \ - addrlib/addrtypes.h \ - addrlib/amdgpu_asic_addr.h \ - addrlib/core/addrcommon.h \ - addrlib/core/addrelemlib.cpp \ - addrlib/core/addrelemlib.h \ - addrlib/core/addrlib.cpp \ - addrlib/core/addrlib.h \ - addrlib/core/addrlib1.cpp \ - addrlib/core/addrlib1.h \ - addrlib/core/addrlib2.cpp \ - addrlib/core/addrlib2.h \ - addrlib/core/addrobject.cpp \ - addrlib/core/addrobject.h \ - addrlib/gfx9/chip/gfx9_enum.h \ - addrlib/gfx9/coord.cpp \ - addrlib/gfx9/coord.h \ - addrlib/gfx9/gfx9addrlib.cpp \ - addrlib/gfx9/gfx9addrlib.h \ - addrlib/inc/chip/gfx9/gfx9_gb_reg.h \ - addrlib/inc/chip/r800/si_gb_reg.h \ - addrlib/r800/chip/si_ci_vi_merged_enum.h \ - addrlib/r800/ciaddrlib.cpp \ - addrlib/r800/ciaddrlib.h \ - addrlib/r800/egbaddrlib.cpp \ - addrlib/r800/egbaddrlib.h \ - addrlib/r800/siaddrlib.cpp \ - addrlib/r800/siaddrlib.h + addrlib/inc/addrinterface.h \ + addrlib/inc/addrtypes.h \ + addrlib/src/addrinterface.cpp \ + addrlib/src/amdgpu_asic_addr.h \ + addrlib/src/core/addrcommon.h \ + addrlib/src/core/addrelemlib.cpp \ + addrlib/src/core/addrelemlib.h \ + addrlib/src/core/addrlib.cpp \ + addrlib/src/core/addrlib.h \ + addrlib/src/core/addrlib1.cpp \ + addrlib/src/core/addrlib1.h \ + addrlib/src/core/addrlib2.cpp \ + addrlib/src/core/addrlib2.h \ + addrlib/src/core/addrobject.cpp \ + addrlib/src/core/addrobject.h \ + addrlib/src/core/coord.cpp \ + addrlib/src/core/coord.h \ + addrlib/src/gfx9/gfx9addrlib.cpp \ + addrlib/src/gfx9/gfx9addrlib.h \ + addrlib/src/chip/gfx9/gfx9_gb_reg.h \ + addrlib/src/chip/r800/si_gb_reg.h \ + addrlib/src/r800/ciaddrlib.cpp \ + addrlib/src/r800/ciaddrlib.h \ + addrlib/src/r800/egbaddrlib.cpp \ + addrlib/src/r800/egbaddrlib.h \ + addrlib/src/r800/siaddrlib.cpp \ + addrlib/src/r800/siaddrlib.h AMD_COMPILER_FILES = \ common/ac_binary.c \ diff -Nru mesa-18.3.3/src/amd/vulkan/Makefile.sources mesa-19.0.1/src/amd/vulkan/Makefile.sources --- mesa-18.3.3/src/amd/vulkan/Makefile.sources 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -50,6 +50,7 @@ radv_meta_copy.c \ radv_meta_decompress.c \ radv_meta_fast_clear.c \ + radv_meta_fmask_expand.c \ radv_meta_resolve.c \ radv_meta_resolve_cs.c \ radv_meta_resolve_fs.c \ diff -Nru mesa-18.3.3/src/amd/vulkan/meson.build mesa-19.0.1/src/amd/vulkan/meson.build --- mesa-18.3.3/src/amd/vulkan/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -78,6 +78,7 @@ 'radv_meta_copy.c', 'radv_meta_decompress.c', 'radv_meta_fast_clear.c', + 'radv_meta_fmask_expand.c', 'radv_meta_resolve.c', 'radv_meta_resolve_cs.c', 'radv_meta_resolve_fs.c', diff -Nru mesa-18.3.3/src/amd/vulkan/radv_android.c mesa-19.0.1/src/amd/vulkan/radv_android.c --- mesa-18.3.3/src/amd/vulkan/radv_android.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_android.c 2019-03-31 23:16:37.000000000 +0000 @@ -111,7 +111,7 @@ VkResult result; if (gralloc_info->handle->numFds != 1) { - return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE, "VkNativeBufferANDROID::handle::numFds is %d, " "expected 1", gralloc_info->handle->numFds); } @@ -126,7 +126,7 @@ const VkImportMemoryFdInfoKHR import_info = { .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, - .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, .fd = dup(dma_buf), }; @@ -230,16 +230,16 @@ * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. */ - const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, + const VkPhysicalDeviceImageFormatInfo2 image_format_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, .format = format, .type = VK_IMAGE_TYPE_2D, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = imageUsage, }; - VkImageFormatProperties2KHR image_format_props = { - .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, + VkImageFormatProperties2 image_format_props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, }; /* Check that requested format and usage are supported. */ @@ -303,7 +303,7 @@ semaphore_result = radv_ImportSemaphoreFdKHR(device, &(VkImportSemaphoreFdInfoKHR) { .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, - .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, .fd = semaphore_fd, .semaphore = semaphore, }); @@ -314,7 +314,7 @@ fence_result = radv_ImportFenceFdKHR(device, &(VkImportFenceFdInfoKHR) { .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR, - .flags = VK_FENCE_IMPORT_TEMPORARY_BIT_KHR, + .flags = VK_FENCE_IMPORT_TEMPORARY_BIT, .fd = fence_fd, .fence = fence, }); @@ -351,7 +351,7 @@ result = radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device), &(VkSemaphoreGetFdInfoKHR) { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, - .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, .semaphore = pWaitSemaphores[i], }, &tmp_fd); if (result != VK_SUCCESS) { diff -Nru mesa-18.3.3/src/amd/vulkan/radv_cmd_buffer.c mesa-19.0.1/src/amd/vulkan/radv_cmd_buffer.c --- mesa-18.3.3/src/amd/vulkan/radv_cmd_buffer.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_cmd_buffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -57,8 +57,7 @@ VkImageLayout dst_layout, uint32_t src_family, uint32_t dst_family, - const VkImageSubresourceRange *range, - VkImageAspectFlags pending_clears); + const VkImageSubresourceRange *range); const struct radv_dynamic_state default_dynamic_state = { .viewport = { @@ -333,18 +332,21 @@ cmd_buffer->descriptors[i].push_dirty = false; } - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 && + cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends; - unsigned eop_bug_offset; + unsigned fence_offset, eop_bug_offset; void *fence_ptr; - radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0, - &cmd_buffer->gfx9_fence_offset, + radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8, &fence_offset, &fence_ptr); - cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo; + + cmd_buffer->gfx9_fence_va = + radv_buffer_get_va(cmd_buffer->upload.upload_bo); + cmd_buffer->gfx9_fence_va += fence_offset; /* Allocate a buffer for the EOP bug on GFX9. */ - radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0, + radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8, &eop_bug_offset, &fence_ptr); cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); @@ -373,7 +375,8 @@ RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS| RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_32BIT); + RADEON_FLAG_32BIT, + RADV_BO_PRIORITY_UPLOAD_BUFFER); if (!bo) { cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -414,6 +417,8 @@ unsigned *out_offset, void **ptr) { + assert(util_is_power_of_two_nonzero(alignment)); + uint64_t offset = align(cmd_buffer->upload.offset, alignment); if (offset + size > cmd_buffer->upload.size) { if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) @@ -454,7 +459,7 @@ radeon_check_space(cmd_buffer->device->ws, cs, 4 + count); radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cs, va); @@ -487,24 +492,16 @@ enum radv_cmd_flush_bits flags) { if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) { - uint32_t *ptr = NULL; - uint64_t va = 0; - assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); - if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { - va = radv_buffer_get_va(cmd_buffer->gfx9_fence_bo) + - cmd_buffer->gfx9_fence_offset; - ptr = &cmd_buffer->gfx9_fence_idx; - } - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); /* Force wait for graphics or compute engines to be idle. */ si_cs_emit_cache_flush(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.chip_class, - ptr, va, + &cmd_buffer->gfx9_fence_idx, + cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer), flags, cmd_buffer->gfx9_eop_bug_va); } @@ -595,8 +592,7 @@ if (loc->sgpr_idx == -1) return; - assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2)); - assert(!loc->indirect); + assert(loc->num_sgprs == 1); radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, false); @@ -625,14 +621,12 @@ struct radv_userdata_info *loc = &locs->descriptor_sets[start]; unsigned sh_offset = sh_base + loc->sgpr_idx * 4; - radv_emit_shader_pointer_head(cs, sh_offset, count, - HAVE_32BIT_POINTERS); + radv_emit_shader_pointer_head(cs, sh_offset, count, true); for (int i = 0; i < count; i++) { struct radv_descriptor_set *set = descriptors_state->sets[start + i]; - radv_emit_shader_pointer_body(device, cs, set->va, - HAVE_32BIT_POINTERS); + radv_emit_shader_pointer_body(device, cs, set->va, true); } } } @@ -664,6 +658,8 @@ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); } + + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void @@ -860,10 +856,13 @@ sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); } + /* TODO: avoid redundantly setting context registers */ radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); radeon_emit(cmd_buffer->cs, sx_ps_downconvert); radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); radeon_emit(cmd_buffer->cs, sx_blend_opt_control); + + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void @@ -887,6 +886,15 @@ radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); + if (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw || + cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash || + memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf, + pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) { + radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw); + cmd_buffer->state.context_roll_without_scissor_emitted = true; + } + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { if (!pipeline->shaders[i]) continue; @@ -923,6 +931,8 @@ cmd_buffer->state.dynamic.scissor.scissors, cmd_buffer->state.dynamic.viewport.viewports, cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband); + + cmd_buffer->state.context_roll_without_scissor_emitted = false; } static void @@ -1062,6 +1072,11 @@ radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); } } + + if (radv_image_has_dcc(image)) { + /* Drawing with DCC enabled also compresses colorbuffers. */ + radv_update_dcc_metadata(cmd_buffer, image, true); + } } static void @@ -1215,6 +1230,8 @@ radv_update_zrange_precision(cmd_buffer, &att->ds, image, layout, false); } + + cmd_buffer->state.context_roll_without_scissor_emitted = true; } /** @@ -1242,7 +1259,7 @@ ++reg_count; radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(cs, va); @@ -1266,7 +1283,7 @@ va += image->offset + image->tc_compat_zrange_offset; radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(cs, va); @@ -1279,7 +1296,6 @@ struct radv_image *image, VkClearDepthStencilValue ds_clear_value) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(image->bo); va += image->offset + image->tc_compat_zrange_offset; uint32_t cond_val; @@ -1341,17 +1357,27 @@ if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ++reg_count; - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2); - radeon_emit(cs, 0); + uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset; - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(cs, 0); + if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) { + radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); + radeon_emit(cs, reg_count); + } else { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_REG) | + (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, reg >> 2); + radeon_emit(cs, 0); + + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); + radeon_emit(cs, 0); + } } /* @@ -1360,9 +1386,31 @@ * cmask eliminate is required. */ void -radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - bool value) +radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, bool value) +{ + uint64_t pred_val = value; + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->fce_pred_offset; + + assert(radv_image_has_dcc(image)); + + radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0)); + radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); + radeon_emit(cmd_buffer->cs, pred_val); + radeon_emit(cmd_buffer->cs, pred_val >> 32); +} + +/** + * Update the DCC predicate to reflect the compression state. + */ +void +radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, bool value) { uint64_t pred_val = value; uint64_t va = radv_buffer_get_va(image->bo); @@ -1371,7 +1419,7 @@ assert(radv_image_has_dcc(image)); radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0)); - radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(cmd_buffer->cs, va); @@ -1409,6 +1457,8 @@ radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2); radeon_emit(cs, color_values[0]); radeon_emit(cs, color_values[1]); + + cmd_buffer->state.context_roll_without_scissor_emitted = true; } /** @@ -1427,7 +1477,7 @@ assert(radv_image_has_cmask(image) || radv_image_has_dcc(image)); radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(cs, va); @@ -1471,17 +1521,25 @@ uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c; - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_COUNT_SEL); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, reg >> 2); - radeon_emit(cs, 0); + if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) { + radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); + radeon_emit(cs, 2); + } else { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_REG) | + COPY_DATA_COUNT_SEL); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, reg >> 2); + radeon_emit(cs, 0); - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); - radeon_emit(cs, 0); + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); + radeon_emit(cs, 0); + } } static void @@ -1490,6 +1548,7 @@ int i; struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; const struct radv_subpass *subpass = cmd_buffer->state.subpass; + unsigned num_bpp64_colorbufs = 0; /* this may happen for inherited secondary recording */ if (!framebuffer) @@ -1513,6 +1572,9 @@ radv_emit_fb_color_state(cmd_buffer, i, att, image, layout); radv_load_color_clear_metadata(cmd_buffer, image, i); + + if (image->surface.bpe >= 8) + num_bpp64_colorbufs++; } if(subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { @@ -1548,6 +1610,23 @@ S_028208_BR_X(framebuffer->width) | S_028208_BR_Y(framebuffer->height)); + if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) { + uint8_t watermark = 4; /* Default value for VI. */ + + /* For optimal DCC performance. */ + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { + if (num_bpp64_colorbufs >= 5) { + watermark = 8; + } else { + watermark = 6; + } + } + + radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL, + S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | + S_028424_OVERWRITE_COMBINER_WATERMARK(watermark)); + } + if (cmd_buffer->device->dfsm_allowed) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); @@ -1641,6 +1720,8 @@ } radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control); + + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void @@ -1702,8 +1783,7 @@ { struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); - uint8_t ptr_size = HAVE_32BIT_POINTERS ? 1 : 2; - uint32_t size = MAX_SETS * 4 * ptr_size; + uint32_t size = MAX_SETS * 4; uint32_t offset; void *ptr; @@ -1712,14 +1792,12 @@ return; for (unsigned i = 0; i < MAX_SETS; i++) { - uint32_t *uptr = ((uint32_t *)ptr) + i * ptr_size; + uint32_t *uptr = ((uint32_t *)ptr) + i; uint64_t set_va = 0; struct radv_descriptor_set *set = descriptors_state->sets[i]; if (descriptors_state->valid & (1u << i)) set_va = set->va; uptr[0] = set_va & 0xffffffff; - if (ptr_size == 2) - uptr[1] = set_va >> 32; } uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); @@ -2014,10 +2092,60 @@ radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); } +struct radv_draw_info { + /** + * Number of vertices. + */ + uint32_t count; + + /** + * Index of the first vertex. + */ + int32_t vertex_offset; + + /** + * First instance id. + */ + uint32_t first_instance; + + /** + * Number of instances. + */ + uint32_t instance_count; + + /** + * First index (indexed draws only). + */ + uint32_t first_index; + + /** + * Whether it's an indexed draw. + */ + bool indexed; + + /** + * Indirect draw parameters resource. + */ + struct radv_buffer *indirect; + uint64_t indirect_offset; + uint32_t stride; + + /** + * Draw count parameters resource. + */ + struct radv_buffer *count_buffer; + uint64_t count_buffer_offset; + + /** + * Stream output parameters resource. + */ + struct radv_buffer *strmout_buffer; + uint64_t strmout_buffer_offset; +}; + static void -radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, bool indexed_draw, - bool instanced_draw, bool indirect_draw, - uint32_t draw_vertex_count) +radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, + const struct radv_draw_info *draw_info) { struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; struct radv_cmd_state *state = &cmd_buffer->state; @@ -2027,8 +2155,9 @@ /* Draw state. */ ia_multi_vgt_param = - si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, - indirect_draw, draw_vertex_count); + si_get_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, + draw_info->indirect, + draw_info->indirect ? 0 : draw_info->count); if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) { if (info->chip_class >= GFX9) { @@ -2048,7 +2177,7 @@ /* Primitive restart. */ primitive_reset_en = - indexed_draw && state->pipeline->graphics.prim_restart_enable; + draw_info->indexed && state->pipeline->graphics.prim_restart_enable; if (primitive_reset_en != state->last_primitive_reset_en) { state->last_primitive_reset_en = primitive_reset_en; @@ -2074,6 +2203,27 @@ state->last_primitive_reset_index = primitive_reset_index; } } + + if (draw_info->strmout_buffer) { + uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo); + + va += draw_info->strmout_buffer->offset + + draw_info->strmout_buffer_offset; + + radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, + draw_info->stride); + + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_REG) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); + radeon_emit(cs, 0); /* unused */ + + radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo); + } } static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, @@ -2254,11 +2404,21 @@ range.baseArrayLayer = view->base_layer; range.layerCount = cmd_buffer->state.framebuffer->layers; + if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) { + /* If the current subpass uses multiview, the driver might have + * performed a fast color/depth clear to the whole image + * (including all layers). To make sure the driver will + * decompress the image correctly (if needed), we have to + * account for the "real" number of layers. If the view mask is + * sparse, this will decompress more layers than needed. + */ + range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask); + } + radv_handle_image_transition(cmd_buffer, view->image, cmd_buffer->state.attachments[idx].current_layout, - att.layout, 0, 0, &range, - cmd_buffer->state.attachments[idx].pending_clear_aspects); + att.layout, 0, 0, &range); cmd_buffer->state.attachments[idx].current_layout = att.layout; @@ -2717,7 +2877,7 @@ void radv_CmdPushDescriptorSetWithTemplateKHR( VkCommandBuffer commandBuffer, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, VkPipelineLayout _layout, uint32_t set, const void* pData) @@ -2789,6 +2949,8 @@ if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline) return; + assert(!pipeline->ctx_cs.cdw); + cmd_buffer->state.emitted_compute_pipeline = pipeline; radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw); @@ -2880,6 +3042,11 @@ assert(firstViewport < MAX_VIEWPORTS); assert(total_count >= 1 && total_count <= MAX_VIEWPORTS); + if (!memcmp(state->dynamic.viewport.viewports + firstViewport, + pViewports, viewportCount * sizeof(*pViewports))) { + return; + } + memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports, viewportCount * sizeof(*pViewports)); @@ -2899,6 +3066,11 @@ assert(firstScissor < MAX_SCISSORS); assert(total_count >= 1 && total_count <= MAX_SCISSORS); + if (!memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors, + scissorCount * sizeof(*pScissors))) { + return; + } + memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors, scissorCount * sizeof(*pScissors)); @@ -2910,6 +3082,10 @@ float lineWidth) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + + if (cmd_buffer->state.dynamic.line_width == lineWidth) + return; + cmd_buffer->state.dynamic.line_width = lineWidth; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; } @@ -2921,12 +3097,19 @@ float depthBiasSlopeFactor) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.depth_bias.bias == depthBiasConstantFactor && + state->dynamic.depth_bias.clamp == depthBiasClamp && + state->dynamic.depth_bias.slope == depthBiasSlopeFactor) { + return; + } - cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor; - cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp; - cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor; + state->dynamic.depth_bias.bias = depthBiasConstantFactor; + state->dynamic.depth_bias.clamp = depthBiasClamp; + state->dynamic.depth_bias.slope = depthBiasSlopeFactor; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; } void radv_CmdSetBlendConstants( @@ -2934,11 +3117,14 @@ const float blendConstants[4]) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4)) + return; - memcpy(cmd_buffer->state.dynamic.blend_constants, - blendConstants, sizeof(float) * 4); + memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4); - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; } void radv_CmdSetDepthBounds( @@ -2947,11 +3133,17 @@ float maxDepthBounds) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; - cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds; - cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds; + if (state->dynamic.depth_bounds.min == minDepthBounds && + state->dynamic.depth_bounds.max == maxDepthBounds) { + return; + } + + state->dynamic.depth_bounds.min = minDepthBounds; + state->dynamic.depth_bounds.max = maxDepthBounds; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; } void radv_CmdSetStencilCompareMask( @@ -2960,13 +3152,21 @@ uint32_t compareMask) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + bool front_same = state->dynamic.stencil_compare_mask.front == compareMask; + bool back_same = state->dynamic.stencil_compare_mask.back == compareMask; + + if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && + (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { + return; + } if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask; + state->dynamic.stencil_compare_mask.front = compareMask; if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask; + state->dynamic.stencil_compare_mask.back = compareMask; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; } void radv_CmdSetStencilWriteMask( @@ -2975,13 +3175,21 @@ uint32_t writeMask) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + bool front_same = state->dynamic.stencil_write_mask.front == writeMask; + bool back_same = state->dynamic.stencil_write_mask.back == writeMask; + + if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && + (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { + return; + } if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask; + state->dynamic.stencil_write_mask.front = writeMask; if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask; + state->dynamic.stencil_write_mask.back = writeMask; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; } void radv_CmdSetStencilReference( @@ -2990,6 +3198,14 @@ uint32_t reference) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + bool front_same = state->dynamic.stencil_reference.front == reference; + bool back_same = state->dynamic.stencil_reference.back == reference; + + if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && + (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { + return; + } if (faceMask & VK_STENCIL_FACE_FRONT_BIT) cmd_buffer->state.dynamic.stencil_reference.front = reference; @@ -3012,6 +3228,11 @@ assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES); assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES); + if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle, + pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) { + return; + } + typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle], pDiscardRectangles, discardRectangleCount); @@ -3177,7 +3398,7 @@ void radv_TrimCommandPool( VkDevice device, VkCommandPool commandPool, - VkCommandPoolTrimFlagsKHR flags) + VkCommandPoolTrimFlags flags) { RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); @@ -3341,57 +3562,6 @@ } } -struct radv_draw_info { - /** - * Number of vertices. - */ - uint32_t count; - - /** - * Index of the first vertex. - */ - int32_t vertex_offset; - - /** - * First instance id. - */ - uint32_t first_instance; - - /** - * Number of instances. - */ - uint32_t instance_count; - - /** - * First index (indexed draws only). - */ - uint32_t first_index; - - /** - * Whether it's an indexed draw. - */ - bool indexed; - - /** - * Indirect draw parameters resource. - */ - struct radv_buffer *indirect; - uint64_t indirect_offset; - uint32_t stride; - - /** - * Draw count parameters resource. - */ - struct radv_buffer *count_buffer; - uint64_t count_buffer_offset; - - /** - * Stream output parameters resource. - */ - struct radv_buffer *strmout_buffer; - uint64_t strmout_buffer_offset; -}; - static void radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) @@ -3400,27 +3570,6 @@ struct radeon_winsys *ws = cmd_buffer->device->ws; struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (info->strmout_buffer) { - uint64_t va = radv_buffer_get_va(info->strmout_buffer->bo); - - va += info->strmout_buffer->offset + - info->strmout_buffer_offset; - - radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, - info->stride); - - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); - radeon_emit(cs, 0); /* unused */ - - radv_cs_add_buffer(ws, cs, info->strmout_buffer->bo); - } - if (info->indirect) { uint64_t va = radv_buffer_get_va(info->indirect->bo); uint64_t count_va = 0; @@ -3539,31 +3688,30 @@ * any context registers. */ static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, - bool indexed_draw) + const struct radv_draw_info *info) { struct radv_cmd_state *state = &cmd_buffer->state; if (!cmd_buffer->device->physical_device->has_scissor_bug) return false; + if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer) + return true; + uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; /* Index, vertex and streamout buffers don't change context regs, and - * pipeline is handled later. + * pipeline is already handled. */ used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_STREAMOUT_BUFFER | RADV_CMD_DIRTY_PIPELINE); - /* Assume all state changes except these two can imply context rolls. */ if (cmd_buffer->state.dirty & used_states) return true; - if (cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) - return true; - - if (indexed_draw && state->pipeline->graphics.prim_restart_enable && + if (info->indexed && state->pipeline->graphics.prim_restart_enable && (state->index_type ? 0xffffffffu : 0xffffu) != state->last_primitive_reset_index) return true; @@ -3574,7 +3722,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { - bool late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info->indexed); + bool late_scissor_emission; if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) || cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) @@ -3583,6 +3731,12 @@ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) radv_emit_graphics_pipeline(cmd_buffer); + /* This should be before the cmd_buffer->state.dirty is cleared + * (excluding RADV_CMD_DIRTY_PIPELINE) and after + * cmd_buffer->state.context_roll_without_scissor_emitted is set. */ + late_scissor_emission = + radv_need_late_scissor_emission(cmd_buffer, info); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) radv_emit_framebuffer_state(cmd_buffer); @@ -3602,9 +3756,7 @@ radv_cmd_buffer_flush_dynamic_state(cmd_buffer); - radv_emit_draw_registers(cmd_buffer, info->indexed, - info->instance_count > 1, info->indirect, - info->indirect ? 0 : info->count); + radv_emit_draw_registers(cmd_buffer, info); if (late_scissor_emission) radv_emit_scissor(cmd_buffer); @@ -3626,6 +3778,19 @@ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096); + if (likely(!info->indirect)) { + /* SI-CI treat instance_count==0 as instance_count==1. There is + * no workaround for indirect draws, but we can at least skip + * direct draws. + */ + if (unlikely(!info->instance_count)) + return; + + /* Handle count == 0. */ + if (unlikely(!info->count && !info->strmout_buffer)) + return; + } + /* Use optimal packet order based on whether we need to sync the * pipeline. */ @@ -3995,7 +4160,6 @@ } if (loc->sgpr_idx != -1) { - assert(!loc->indirect); assert(loc->num_sgprs == 3); radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + @@ -4237,14 +4401,12 @@ VkImageLayout dst_layout, unsigned src_queue_mask, unsigned dst_queue_mask, - const VkImageSubresourceRange *range, - VkImageAspectFlags pending_clears) + const VkImageSubresourceRange *range) { if (!radv_image_has_htile(image)) return; - if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED && - radv_layout_has_htile(image, dst_layout, dst_queue_mask)) { + if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { /* TODO: merge with the clear if applicable */ radv_initialize_htile(cmd_buffer, image, range, 0); } else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) && @@ -4281,6 +4443,27 @@ state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; } +void radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image) +{ + struct radv_cmd_state *state = &cmd_buffer->state; + static const uint32_t fmask_clear_values[4] = { + 0x00000000, + 0x02020202, + 0xE4E4E4E4, + 0x76543210 + }; + uint32_t log2_samples = util_logbase2(image->info.samples); + uint32_t value = fmask_clear_values[log2_samples]; + + state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + + state->flush_bits |= radv_clear_fmask(cmd_buffer, image, value); + + state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; +} + void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, uint32_t value) { @@ -4316,6 +4499,10 @@ radv_initialise_cmask(cmd_buffer, image, value); } + if (radv_image_has_fmask(image)) { + radv_initialize_fmask(cmd_buffer, image); + } + if (radv_image_has_dcc(image)) { uint32_t value = 0xffffffffu; /* Fully expanded mode. */ bool need_decompress_pass = false; @@ -4328,8 +4515,8 @@ radv_initialize_dcc(cmd_buffer, image, value); - radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image, - need_decompress_pass); + radv_update_fce_metadata(cmd_buffer, image, + need_decompress_pass); } if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) { @@ -4371,6 +4558,13 @@ !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); } + + if (radv_image_has_fmask(image)) { + if (src_layout != VK_IMAGE_LAYOUT_GENERAL && + dst_layout == VK_IMAGE_LAYOUT_GENERAL) { + radv_expand_fmask_image_inplace(cmd_buffer, image, range); + } + } } } @@ -4380,8 +4574,7 @@ VkImageLayout dst_layout, uint32_t src_family, uint32_t dst_family, - const VkImageSubresourceRange *range, - VkImageAspectFlags pending_clears) + const VkImageSubresourceRange *range) { if (image->exclusive && src_family != dst_family) { /* This is an acquire or a release operation and there will be @@ -4411,7 +4604,7 @@ radv_handle_depth_image_transition(cmd_buffer, image, src_layout, dst_layout, src_queue_mask, dst_queue_mask, - range, pending_clears); + range); } else { radv_handle_color_image_transition(cmd_buffer, image, src_layout, dst_layout, @@ -4448,7 +4641,7 @@ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7); - si_emit_wait_fence(cs, va, 1, 0xffffffff); + radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff); assert(cmd_buffer->cs->cdw <= cdw_max); } @@ -4485,14 +4678,15 @@ pImageMemoryBarriers[i].newLayout, pImageMemoryBarriers[i].srcQueueFamilyIndex, pImageMemoryBarriers[i].dstQueueFamilyIndex, - &pImageMemoryBarriers[i].subresourceRange, - 0); + &pImageMemoryBarriers[i].subresourceRange); } /* Make sure CP DMA is idle because the driver might have performed a * DMA operation for copying or filling buffers/images. */ - si_cp_dma_wait_for_idle(cmd_buffer); + if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)) + si_cp_dma_wait_for_idle(cmd_buffer); cmd_buffer->state.flush_bits |= dst_flush_bits; } @@ -4549,14 +4743,16 @@ /* Make sure CP DMA is idle because the driver might have performed a * DMA operation for copying or filling buffers/images. */ - si_cp_dma_wait_for_idle(cmd_buffer); + if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)) + si_cp_dma_wait_for_idle(cmd_buffer); /* TODO: Emit EOS events for syncing PS/CS stages. */ if (!(stageMask & ~top_of_pipe_flags)) { /* Just need to sync the PFP engine. */ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(cs, va); @@ -4565,7 +4761,7 @@ } else if (!(stageMask & ~post_index_fetch_flags)) { /* Sync ME because PFP reads index and indirect buffers. */ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cs, va); @@ -4577,7 +4773,7 @@ cmd_buffer->device->physical_device->rad_info.chip_class, radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DATA_SEL_VALUE_32BIT, va, 2, value, + EOP_DATA_SEL_VALUE_32BIT, va, value, cmd_buffer->gfx9_eop_bug_va); } @@ -4709,7 +4905,7 @@ enabled_mask |= 1 << idx; } - cmd_buffer->state.streamout.enabled_mask = enabled_mask; + cmd_buffer->state.streamout.enabled_mask |= enabled_mask; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; } @@ -4729,6 +4925,8 @@ S_028B94_STREAMOUT_3_EN(so->streamout_enabled)); radeon_emit(cs, so->hw_enabled_mask & so->enabled_stream_buffers_mask); + + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void @@ -4805,6 +5003,8 @@ radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */ + cmd_buffer->state.context_roll_without_scissor_emitted = true; + if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { /* The array of counter buffers is optional. */ RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); @@ -4885,6 +5085,8 @@ * that the primitives-emitted query won't increment. */ radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0); + + cmd_buffer->state.context_roll_without_scissor_emitted = true; } radv_set_streamout_enable(cmd_buffer, false); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_debug.c mesa-19.0.1/src/amd/vulkan/radv_debug.c --- mesa-18.3.3/src/amd/vulkan/radv_debug.c 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_debug.c 2019-03-31 23:16:37.000000000 +0000 @@ -63,7 +63,8 @@ device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS| - RADEON_FLAG_NO_INTERPROCESS_SHARING); + RADEON_FLAG_NO_INTERPROCESS_SHARING, + RADV_BO_PRIORITY_UPLOAD_BUFFER); if (!device->trace_bo) return false; diff -Nru mesa-18.3.3/src/amd/vulkan/radv_debug.h mesa-19.0.1/src/amd/vulkan/radv_debug.h --- mesa-18.3.3/src/amd/vulkan/radv_debug.h 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_debug.h 2019-03-31 23:16:37.000000000 +0000 @@ -50,15 +50,15 @@ RADV_DEBUG_STARTUP = 0x100000, RADV_DEBUG_CHECKIR = 0x200000, RADV_DEBUG_NOTHREADLLVM = 0x400000, + RADV_DEBUG_NOBINNING = 0x800000, }; enum { RADV_PERFTEST_NO_BATCHCHAIN = 0x1, RADV_PERFTEST_SISCHED = 0x2, RADV_PERFTEST_LOCAL_BOS = 0x4, - RADV_PERFTEST_BINNING = 0x8, - RADV_PERFTEST_OUT_OF_ORDER = 0x10, - RADV_PERFTEST_DCC_MSAA = 0x20, + RADV_PERFTEST_OUT_OF_ORDER = 0x8, + RADV_PERFTEST_DCC_MSAA = 0x10, }; bool diff -Nru mesa-18.3.3/src/amd/vulkan/radv_descriptor_set.c mesa-19.0.1/src/amd/vulkan/radv_descriptor_set.c --- mesa-18.3.3/src/amd/vulkan/radv_descriptor_set.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_descriptor_set.c 2019-03-31 23:16:37.000000000 +0000 @@ -84,7 +84,9 @@ uint32_t immutable_sampler_count = 0; for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); - if (pCreateInfo->pBindings[j].pImmutableSamplers) + if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER || + pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) && + pCreateInfo->pBindings[j].pImmutableSamplers) immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; } @@ -182,7 +184,9 @@ set_layout->has_variable_descriptors = true; } - if (binding->pImmutableSamplers) { + if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER || + binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) && + binding->pImmutableSamplers) { set_layout->binding[b].immutable_samplers_offset = samplers_offset; set_layout->binding[b].immutable_samplers_equal = has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount); @@ -345,6 +349,7 @@ layout->num_sets = pCreateInfo->setLayoutCount; unsigned dynamic_offset_count = 0; + uint16_t dynamic_shader_stages = 0; _mesa_sha1_init(&ctx); @@ -356,6 +361,7 @@ layout->set[set].dynamic_offset_start = dynamic_offset_count; for (uint32_t b = 0; b < set_layout->binding_count; b++) { dynamic_offset_count += set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count; + dynamic_shader_stages |= set_layout->dynamic_shader_stages; if (set_layout->binding[b].immutable_samplers_offset) _mesa_sha1_update(&ctx, radv_immutable_samplers(set_layout, set_layout->binding + b), set_layout->binding[b].array_size * 4 * sizeof(uint32_t)); @@ -365,6 +371,7 @@ } layout->dynamic_offset_count = dynamic_offset_count; + layout->dynamic_shader_stages = dynamic_shader_stages; layout->push_constant_size = 0; for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) { @@ -412,7 +419,7 @@ if (pool->host_memory_base) { if (pool->host_memory_end - pool->host_memory_ptr < mem_size) - return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY_KHR); + return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY); set = (struct radv_descriptor_set*)pool->host_memory_ptr; pool->host_memory_ptr += mem_size; @@ -437,7 +444,7 @@ if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) { vk_free2(&device->alloc, NULL, set); - return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY_KHR); + return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY); } /* try to allocate linearly first, so that we don't spend @@ -466,7 +473,7 @@ if (pool->size - offset < layout_size) { vk_free2(&device->alloc, NULL, set); - return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY_KHR); + return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY); } set->bo = pool->bo; set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset); @@ -478,7 +485,7 @@ pool->entries[index].set = set; pool->entry_count++; } else - return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY_KHR); + return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY); } if (layout->has_immutable_samplers) { @@ -595,7 +602,8 @@ RADEON_DOMAIN_VRAM, RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | - RADEON_FLAG_32BIT); + RADEON_FLAG_32BIT, + RADV_BO_PRIORITY_DESCRIPTOR); pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo); } pool->size = bo_size; @@ -961,9 +969,11 @@ } src_ptr += src_binding_layout->size / 4; dst_ptr += dst_binding_layout->size / 4; - dst_buffer_list[j] = src_buffer_list[j]; - ++src_buffer_list; - ++dst_buffer_list; + + if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) { + /* Sampler descriptors don't have a buffer list. */ + dst_buffer_list[j] = src_buffer_list[j]; + } } } } @@ -982,9 +992,9 @@ } VkResult radv_CreateDescriptorUpdateTemplate(VkDevice _device, - const VkDescriptorUpdateTemplateCreateInfoKHR *pCreateInfo, + const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, - VkDescriptorUpdateTemplateKHR *pDescriptorUpdateTemplate) + VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate) { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout); @@ -1002,7 +1012,7 @@ templ->bind_point = pCreateInfo->pipelineBindPoint; for (i = 0; i < entry_count; i++) { - const VkDescriptorUpdateTemplateEntryKHR *entry = &pCreateInfo->pDescriptorUpdateEntries[i]; + const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i]; const struct radv_descriptor_set_binding_layout *binding_layout = set_layout->binding + entry->dstBinding; const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement; @@ -1015,7 +1025,7 @@ switch (entry->descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR); + assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET); dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement; dst_stride = 0; /* Not used */ break; @@ -1055,7 +1065,7 @@ } void radv_DestroyDescriptorUpdateTemplate(VkDevice _device, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const VkAllocationCallbacks *pAllocator) { RADV_FROM_HANDLE(radv_device, device, _device); @@ -1070,7 +1080,7 @@ void radv_update_descriptor_set_with_template(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData) { RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate); @@ -1137,7 +1147,7 @@ void radv_UpdateDescriptorSetWithTemplate(VkDevice _device, VkDescriptorSet descriptorSet, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData) { RADV_FROM_HANDLE(radv_device, device, _device); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_descriptor_set.h mesa-19.0.1/src/amd/vulkan/radv_descriptor_set.h --- mesa-18.3.3/src/amd/vulkan/radv_descriptor_set.h 2018-04-24 14:37:08.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_descriptor_set.h 2019-03-31 23:16:37.000000000 +0000 @@ -85,6 +85,7 @@ uint32_t num_sets; uint32_t push_constant_size; uint32_t dynamic_offset_count; + uint16_t dynamic_shader_stages; unsigned char sha1[20]; }; diff -Nru mesa-18.3.3/src/amd/vulkan/radv_device.c mesa-19.0.1/src/amd/vulkan/radv_device.c --- mesa-18.3.3/src/amd/vulkan/radv_device.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_device.c 2019-03-31 23:16:37.000000000 +0000 @@ -45,10 +45,10 @@ #include "sid.h" #include "git_sha1.h" #include "gfx9d.h" -#include "addrlib/gfx9/chip/gfx9_enum.h" #include "util/build_id.h" #include "util/debug.h" #include "util/mesa-sha1.h" +#include "compiler/glsl_types.h" static int radv_device_get_cache_uuid(enum radeon_family family, void *uuid) @@ -123,19 +123,30 @@ snprintf(name, name_len, "%s%s", chip_string, llvm_string); } +static uint64_t +radv_get_visible_vram_size(struct radv_physical_device *device) +{ + return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size); +} + +static uint64_t +radv_get_vram_size(struct radv_physical_device *device) +{ + return device->rad_info.vram_size - radv_get_visible_vram_size(device); +} + static void radv_physical_device_init_mem_types(struct radv_physical_device *device) { STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS); - uint64_t visible_vram_size = MIN2(device->rad_info.vram_size, - device->rad_info.vram_vis_size); - + uint64_t visible_vram_size = radv_get_visible_vram_size(device); + uint64_t vram_size = radv_get_vram_size(device); int vram_index = -1, visible_vram_index = -1, gart_index = -1; device->memory_properties.memoryHeapCount = 0; - if (device->rad_info.vram_size - visible_vram_size > 0) { + if (vram_size > 0) { vram_index = device->memory_properties.memoryHeapCount++; device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) { - .size = device->rad_info.vram_size - visible_vram_size, + .size = vram_size, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, }; } @@ -271,8 +282,6 @@ device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; device->instance = instance; - assert(strlen(path) < ARRAY_SIZE(device->path)); - strncpy(device->path, path, ARRAY_SIZE(device->path)); device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags); @@ -329,7 +338,7 @@ device->rad_info.chip_class > GFX9) fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); - radv_get_driver_uuid(&device->device_uuid); + radv_get_driver_uuid(&device->driver_uuid); radv_get_device_uuid(&device->rad_info, &device->device_uuid); if (device->rad_info.family == CHIP_STONEY || @@ -361,6 +370,11 @@ device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA); + /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */ + device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 || + (device->rad_info.chip_class >= VI && + device->rad_info.me_fw_feature >= 41); + radv_physical_device_init_mem_types(device); radv_fill_device_extension_table(device, &device->supported_extensions); @@ -451,6 +465,7 @@ {"startup", RADV_DEBUG_STARTUP}, {"checkir", RADV_DEBUG_CHECKIR}, {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM}, + {"nobinning", RADV_DEBUG_NOBINNING}, {NULL, 0} }; @@ -465,7 +480,6 @@ {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN}, {"sisched", RADV_PERFTEST_SISCHED}, {"localbos", RADV_PERFTEST_LOCAL_BOS}, - {"binning", RADV_PERFTEST_BINNING}, {"dccmsaa", RADV_PERFTEST_DCC_MSAA}, {NULL, 0} }; @@ -597,6 +611,7 @@ VG(VALGRIND_DESTROY_MEMPOOL(instance)); + _mesa_glsl_release_types(); _mesa_locale_fini(); vk_debug_report_instance_destroy(&instance->debug_report_callbacks); @@ -726,8 +741,7 @@ .alphaToOne = true, .multiViewport = true, .samplerAnisotropy = true, - .textureCompressionETC2 = pdevice->rad_info.chip_class >= GFX9 || - pdevice->rad_info.family == CHIP_STONEY, + .textureCompressionETC2 = radv_device_supports_etc(pdevice), .textureCompressionASTC_LDR = false, .textureCompressionBC = true, .occlusionQueryPrecise = true, @@ -737,7 +751,7 @@ .shaderTessellationAndGeometryPointSize = true, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = true, - .shaderStorageImageMultisample = false, + .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= VI, .shaderUniformBufferArrayDynamicIndexing = true, .shaderSampledImageArrayDynamicIndexing = true, .shaderStorageBufferArrayDynamicIndexing = true, @@ -748,7 +762,7 @@ .shaderCullDistance = true, .shaderFloat64 = true, .shaderInt64 = true, - .shaderInt16 = pdevice->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x700, + .shaderInt16 = pdevice->rad_info.chip_class >= GFX9, .sparseBinding = true, .variableMultisampleRate = true, .inheritedQueries = true, @@ -757,19 +771,19 @@ void radv_GetPhysicalDeviceFeatures2( VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures2KHR *pFeatures) + VkPhysicalDeviceFeatures2 *pFeatures) { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); vk_foreach_struct(ext, pFeatures->pNext) { switch (ext->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: { - VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: { + VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext; features->variablePointersStorageBuffer = true; - features->variablePointers = false; + features->variablePointers = true; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: { - VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { + VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext; features->multiview = true; features->multiviewGeometryShader = true; features->multiviewTessellationShader = true; @@ -790,11 +804,11 @@ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { VkPhysicalDevice16BitStorageFeatures *features = (VkPhysicalDevice16BitStorageFeatures*)ext; - bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI; + bool enabled = pdevice->rad_info.chip_class >= VI; features->storageBuffer16BitAccess = enabled; features->uniformAndStorageBuffer16BitAccess = enabled; features->storagePushConstant16 = enabled; - features->storageInputOutput16 = enabled; + features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { @@ -849,6 +863,18 @@ features->geometryStreams = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: { + VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features = + (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext; + features->scalarBlockLayout = pdevice->rad_info.chip_class >= CIK; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: { + VkPhysicalDeviceMemoryPriorityFeaturesEXT *features = + (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext; + features->memoryPriority = VK_TRUE; + break; + } default: break; } @@ -970,7 +996,7 @@ .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, .sampledImageDepthSampleCounts = sample_counts, .sampledImageStencilSampleCounts = sample_counts, - .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .storageImageSampleCounts = pdevice->rad_info.chip_class >= VI ? sample_counts : VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, .timestampComputeAndGraphics = true, .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq, @@ -978,7 +1004,7 @@ .maxCullDistances = 8, .maxCombinedClipAndCullDistances = 8, .discreteQueuePriorities = 2, - .pointSizeRange = { 0.125, 255.875 }, + .pointSizeRange = { 0.0, 8192.0 }, .lineWidthRange = { 0.0, 7.9921875 }, .pointSizeGranularity = (1.0 / 8.0), .lineWidthGranularity = (1.0 / 128.0), @@ -1005,7 +1031,7 @@ void radv_GetPhysicalDeviceProperties2( VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties2KHR *pProperties) + VkPhysicalDeviceProperties2 *pProperties) { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); @@ -1018,23 +1044,23 @@ properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: { - VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { + VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext; memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); properties->deviceLUIDValid = false; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: { - VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { + VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext; properties->maxMultiviewViewCount = MAX_VIEWS; properties->maxMultiviewInstanceIndex = INT_MAX; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: { - VkPhysicalDevicePointClippingPropertiesKHR *properties = - (VkPhysicalDevicePointClippingPropertiesKHR*)ext; - properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { + VkPhysicalDevicePointClippingProperties *properties = + (VkPhysicalDevicePointClippingProperties*)ext; + properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: { @@ -1296,7 +1322,7 @@ { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); if (!pQueueFamilyProperties) { - return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); + radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); return; } VkQueueFamilyProperties *properties[] = { @@ -1311,11 +1337,11 @@ void radv_GetPhysicalDeviceQueueFamilyProperties2( VkPhysicalDevice physicalDevice, uint32_t* pCount, - VkQueueFamilyProperties2KHR *pQueueFamilyProperties) + VkQueueFamilyProperties2 *pQueueFamilyProperties) { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); if (!pQueueFamilyProperties) { - return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); + radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); return; } VkQueueFamilyProperties *properties[] = { @@ -1336,17 +1362,89 @@ *pMemoryProperties = physical_device->memory_properties; } +static void +radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget) +{ + RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); + VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties; + uint64_t visible_vram_size = radv_get_visible_vram_size(device); + uint64_t vram_size = radv_get_vram_size(device); + uint64_t gtt_size = device->rad_info.gart_size; + uint64_t heap_budget, heap_usage; + + /* For all memory heaps, the computation of budget is as follow: + * heap_budget = heap_size - global_heap_usage + app_heap_usage + * + * The Vulkan spec 1.1.97 says that the budget should include any + * currently allocated device memory. + * + * Note that the application heap usages are not really accurate (eg. + * in presence of shared buffers). + */ + if (vram_size) { + heap_usage = device->ws->query_value(device->ws, + RADEON_ALLOCATED_VRAM); + + heap_budget = vram_size - + device->ws->query_value(device->ws, RADEON_VRAM_USAGE) + + heap_usage; + + memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = heap_budget; + memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] = heap_usage; + } + + if (visible_vram_size) { + heap_usage = device->ws->query_value(device->ws, + RADEON_ALLOCATED_VRAM_VIS); + + heap_budget = visible_vram_size - + device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + + heap_usage; + + memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_budget; + memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_usage; + } + + if (gtt_size) { + heap_usage = device->ws->query_value(device->ws, + RADEON_ALLOCATED_GTT); + + heap_budget = gtt_size - + device->ws->query_value(device->ws, RADEON_GTT_USAGE) + + heap_usage; + + memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = heap_budget; + memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] = heap_usage; + } + + /* The heapBudget and heapUsage values must be zero for array elements + * greater than or equal to + * VkPhysicalDeviceMemoryProperties::memoryHeapCount. + */ + for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) { + memoryBudget->heapBudget[i] = 0; + memoryBudget->heapUsage[i] = 0; + } +} + void radv_GetPhysicalDeviceMemoryProperties2( VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties) + VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) { - return radv_GetPhysicalDeviceMemoryProperties(physicalDevice, - &pMemoryProperties->memoryProperties); + radv_GetPhysicalDeviceMemoryProperties(physicalDevice, + &pMemoryProperties->memoryProperties); + + VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget = + vk_find_struct(pMemoryProperties->pNext, + PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT); + if (memory_budget) + radv_get_memory_budget_properties(physicalDevice, memory_budget); } VkResult radv_GetMemoryHostPointerPropertiesEXT( VkDevice _device, - VkExternalMemoryHandleTypeFlagBitsKHR handleType, + VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer, VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties) { @@ -1367,7 +1465,7 @@ return VK_SUCCESS; } default: - return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; + return VK_ERROR_INVALID_EXTERNAL_HANDLE; } } @@ -1632,9 +1730,7 @@ } device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 && - ((device->instance->perftest_flags & RADV_PERFTEST_BINNING) || - device->physical_device->rad_info.family == CHIP_RAVEN || - device->physical_device->rad_info.family == CHIP_RAVEN2); + !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); /* Disabled and not implemented for now. */ device->dfsm_allowed = device->pbb_allowed && @@ -1881,136 +1977,138 @@ uint32_t tess_offchip_ring_size, struct radeon_winsys_bo *tess_rings_bo) { - uint64_t esgs_va = 0, gsvs_va = 0; - uint64_t tess_va = 0, tess_offchip_va = 0; uint32_t *desc = &map[4]; - if (esgs_ring_bo) - esgs_va = radv_buffer_get_va(esgs_ring_bo); - if (gsvs_ring_bo) - gsvs_va = radv_buffer_get_va(gsvs_ring_bo); + if (esgs_ring_bo) { + uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo); + + /* stride 0, num records - size, add tid, swizzle, elsize4, + index stride 64 */ + desc[0] = esgs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(true); + desc[2] = esgs_ring_size; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1) | + S_008F0C_INDEX_STRIDE(3) | + S_008F0C_ADD_TID_ENABLE(true); + + /* GS entry for ES->GS ring */ + /* stride 0, num records - size, elsize0, + index stride 0 */ + desc[4] = esgs_va; + desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)| + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(false); + desc[6] = esgs_ring_size; + desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(0) | + S_008F0C_INDEX_STRIDE(0) | + S_008F0C_ADD_TID_ENABLE(false); + } + + desc += 8; + + if (gsvs_ring_bo) { + uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo); + + /* VS entry for GS->VS ring */ + /* stride 0, num records - size, elsize0, + index stride 0 */ + desc[0] = gsvs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(false); + desc[2] = gsvs_ring_size; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(0) | + S_008F0C_INDEX_STRIDE(0) | + S_008F0C_ADD_TID_ENABLE(false); + + /* stride gsvs_itemsize, num records 64 + elsize 4, index stride 16 */ + /* shader will patch stride and desc[2] */ + desc[4] = gsvs_va; + desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(true); + desc[6] = 0; + desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1) | + S_008F0C_INDEX_STRIDE(1) | + S_008F0C_ADD_TID_ENABLE(true); + } + + desc += 8; + if (tess_rings_bo) { - tess_va = radv_buffer_get_va(tess_rings_bo); - tess_offchip_va = tess_va + tess_offchip_ring_offset; + uint64_t tess_va = radv_buffer_get_va(tess_rings_bo); + uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset; + + desc[0] = tess_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) | + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(false); + desc[2] = tess_factor_ring_size; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(0) | + S_008F0C_INDEX_STRIDE(0) | + S_008F0C_ADD_TID_ENABLE(false); + + desc[4] = tess_offchip_va; + desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) | + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(false); + desc[6] = tess_offchip_ring_size; + desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(0) | + S_008F0C_INDEX_STRIDE(0) | + S_008F0C_ADD_TID_ENABLE(false); } - /* stride 0, num records - size, add tid, swizzle, elsize4, - index stride 64 */ - desc[0] = esgs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(true); - desc[2] = esgs_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(1) | - S_008F0C_INDEX_STRIDE(3) | - S_008F0C_ADD_TID_ENABLE(true); - - desc += 4; - /* GS entry for ES->GS ring */ - /* stride 0, num records - size, elsize0, - index stride 0 */ - desc[0] = esgs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); - desc[2] = esgs_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); - - desc += 4; - /* VS entry for GS->VS ring */ - /* stride 0, num records - size, elsize0, - index stride 0 */ - desc[0] = gsvs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); - desc[2] = gsvs_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); - desc += 4; - - /* stride gsvs_itemsize, num records 64 - elsize 4, index stride 16 */ - /* shader will patch stride and desc[2] */ - desc[0] = gsvs_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(true); - desc[2] = 0; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(1) | - S_008F0C_INDEX_STRIDE(1) | - S_008F0C_ADD_TID_ENABLE(true); - desc += 4; - - desc[0] = tess_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); - desc[2] = tess_factor_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); - desc += 4; - - desc[0] = tess_offchip_va; - desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) | - S_008F04_STRIDE(0) | - S_008F04_SWIZZLE_ENABLE(false); - desc[2] = tess_offchip_ring_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(0) | - S_008F0C_INDEX_STRIDE(0) | - S_008F0C_ADD_TID_ENABLE(false); - desc += 4; - - /* add sample positions after all rings */ - memcpy(desc, queue->device->sample_locations_1x, 8); - desc += 2; - memcpy(desc, queue->device->sample_locations_2x, 16); - desc += 4; - memcpy(desc, queue->device->sample_locations_4x, 32); desc += 8; - memcpy(desc, queue->device->sample_locations_8x, 64); - desc += 16; - memcpy(desc, queue->device->sample_locations_16x, 128); + + if (add_sample_positions) { + /* add sample positions after all rings */ + memcpy(desc, queue->device->sample_locations_1x, 8); + desc += 2; + memcpy(desc, queue->device->sample_locations_2x, 16); + desc += 4; + memcpy(desc, queue->device->sample_locations_4x, 32); + desc += 8; + memcpy(desc, queue->device->sample_locations_8x, 64); + } } static unsigned @@ -2044,16 +2142,15 @@ max_offchip_buffers = max_offchip_buffers_per_se * device->physical_device->rad_info.max_se; - switch (device->tess_offchip_block_dw_size) { - default: - assert(0); - /* fall through */ - case 8192: - offchip_granularity = V_03093C_X_8K_DWORDS; - break; - case 4096: + /* Hawaii has a bug with offchip buffers > 256 that can be worked + * around by setting 4K granularity. + */ + if (device->tess_offchip_block_dw_size == 4096) { + assert(device->physical_device->rad_info.family == CHIP_HAWAII); offchip_granularity = V_03093C_X_4K_DWORDS; - break; + } else { + assert(device->tess_offchip_block_dw_size == 8192); + offchip_granularity = V_03093C_X_8K_DWORDS; } switch (device->physical_device->rad_info.chip_class) { @@ -2288,7 +2385,8 @@ scratch_size, 4096, RADEON_DOMAIN_VRAM, - ring_bo_flags); + ring_bo_flags, + RADV_BO_PRIORITY_SCRATCH); if (!scratch_bo) goto fail; } else @@ -2299,7 +2397,8 @@ compute_scratch_size, 4096, RADEON_DOMAIN_VRAM, - ring_bo_flags); + ring_bo_flags, + RADV_BO_PRIORITY_SCRATCH); if (!compute_scratch_bo) goto fail; @@ -2311,7 +2410,8 @@ esgs_ring_size, 4096, RADEON_DOMAIN_VRAM, - ring_bo_flags); + ring_bo_flags, + RADV_BO_PRIORITY_SCRATCH); if (!esgs_ring_bo) goto fail; } else { @@ -2324,7 +2424,8 @@ gsvs_ring_size, 4096, RADEON_DOMAIN_VRAM, - ring_bo_flags); + ring_bo_flags, + RADV_BO_PRIORITY_SCRATCH); if (!gsvs_ring_bo) goto fail; } else { @@ -2337,7 +2438,8 @@ tess_offchip_ring_offset + tess_offchip_ring_size, 256, RADEON_DOMAIN_VRAM, - ring_bo_flags); + ring_bo_flags, + RADV_BO_PRIORITY_SCRATCH); if (!tess_rings_bo) goto fail; } else { @@ -2354,7 +2456,7 @@ tess_rings_bo || add_sample_positions) { size = 112; /* 2 dword + 2 padding + 4 dword * 6 */ if (add_sample_positions) - size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */ + size += 128; /* 64+32+16+8 = 120 bytes */ } else if (scratch_bo) size = 8; /* 2 dword */ @@ -2365,12 +2467,36 @@ RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY); + RADEON_FLAG_READ_ONLY, + RADV_BO_PRIORITY_DESCRIPTOR); if (!descriptor_bo) goto fail; } else descriptor_bo = queue->descriptor_bo; + if (descriptor_bo != queue->descriptor_bo) { + uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo); + + if (scratch_bo) { + uint64_t scratch_va = radv_buffer_get_va(scratch_bo); + uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | + S_008F04_SWIZZLE_ENABLE(1); + map[0] = scratch_va; + map[1] = rsrc1; + } + + if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions) + fill_geom_tess_rings(queue, map, add_sample_positions, + esgs_ring_size, esgs_ring_bo, + gsvs_ring_size, gsvs_ring_bo, + tess_factor_ring_size, + tess_offchip_ring_offset, + tess_offchip_ring_size, + tess_rings_bo); + + queue->device->ws->buffer_unmap(descriptor_bo); + } + for(int i = 0; i < 3; ++i) { struct radeon_cmdbuf *cs = NULL; cs = queue->device->ws->cs_create(queue->device->ws, @@ -2395,30 +2521,6 @@ break; } - if (descriptor_bo != queue->descriptor_bo) { - uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo); - - if (scratch_bo) { - uint64_t scratch_va = radv_buffer_get_va(scratch_bo); - uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | - S_008F04_SWIZZLE_ENABLE(1); - map[0] = scratch_va; - map[1] = rsrc1; - } - - if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || - add_sample_positions) - fill_geom_tess_rings(queue, map, add_sample_positions, - esgs_ring_size, esgs_ring_bo, - gsvs_ring_size, gsvs_ring_bo, - tess_factor_ring_size, - tess_offchip_ring_offset, - tess_offchip_ring_size, - tess_rings_bo); - - queue->device->ws->buffer_unmap(descriptor_bo); - } - if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); @@ -2694,7 +2796,7 @@ struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; struct radeon_winsys_ctx *ctx = queue->hw_ctx; int ret; - uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX; + uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT; uint32_t scratch_size = 0; uint32_t compute_scratch_size = 0; uint32_t esgs_ring_size = 0, gsvs_ring_size = 0; @@ -2970,10 +3072,10 @@ const VkImportMemoryFdInfoKHR *import_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); - const VkMemoryDedicatedAllocateInfoKHR *dedicate_info = - vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR); - const VkExportMemoryAllocateInfoKHR *export_info = - vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR); + const VkMemoryDedicatedAllocateInfo *dedicate_info = + vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); + const VkExportMemoryAllocateInfo *export_info = + vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO); const VkImportMemoryHostPointerInfoEXT *host_ptr_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT); @@ -2996,17 +3098,27 @@ mem->buffer = NULL; } + float priority_float = 0.5; + const struct VkMemoryPriorityAllocateInfoEXT *priority_ext = + vk_find_struct_const(pAllocateInfo->pNext, + MEMORY_PRIORITY_ALLOCATE_INFO_EXT); + if (priority_ext) + priority_float = priority_ext->priority; + + unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1, + (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX)); + mem->user_ptr = NULL; if (import_info) { assert(import_info->handleType == - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd, - NULL, NULL); + priority, NULL, NULL); if (!mem->bo) { - result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; + result = VK_ERROR_INVALID_EXTERNAL_HANDLE; goto fail; } else { close(import_info->fd); @@ -3015,9 +3127,10 @@ assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT); assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED); mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer, - pAllocateInfo->allocationSize); + pAllocateInfo->allocationSize, + priority); if (!mem->bo) { - result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR; + result = VK_ERROR_INVALID_EXTERNAL_HANDLE; goto fail; } else { mem->user_ptr = host_ptr_info->pHostPointer; @@ -3042,7 +3155,7 @@ flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment, - domain, flags); + domain, flags, priority); if (!mem->bo) { result = VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -3174,17 +3287,17 @@ void radv_GetBufferMemoryRequirements2( VkDevice device, - const VkBufferMemoryRequirementsInfo2KHR* pInfo, - VkMemoryRequirements2KHR* pMemoryRequirements) + const VkBufferMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) { radv_GetBufferMemoryRequirements(device, pInfo->buffer, &pMemoryRequirements->memoryRequirements); RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer); vk_foreach_struct(ext, pMemoryRequirements->pNext) { switch (ext->sType) { - case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: { - VkMemoryDedicatedRequirementsKHR *req = - (VkMemoryDedicatedRequirementsKHR *) ext; + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *req = + (VkMemoryDedicatedRequirements *) ext; req->requiresDedicatedAllocation = buffer->shareable; req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; break; @@ -3211,8 +3324,8 @@ void radv_GetImageMemoryRequirements2( VkDevice device, - const VkImageMemoryRequirementsInfo2KHR* pInfo, - VkMemoryRequirements2KHR* pMemoryRequirements) + const VkImageMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) { radv_GetImageMemoryRequirements(device, pInfo->image, &pMemoryRequirements->memoryRequirements); @@ -3221,9 +3334,9 @@ vk_foreach_struct(ext, pMemoryRequirements->pNext) { switch (ext->sType) { - case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: { - VkMemoryDedicatedRequirementsKHR *req = - (VkMemoryDedicatedRequirementsKHR *) ext; + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *req = + (VkMemoryDedicatedRequirements *) ext; req->requiresDedicatedAllocation = image->shareable; req->prefersDedicatedAllocation = req->requiresDedicatedAllocation; break; @@ -3245,9 +3358,9 @@ void radv_GetImageSparseMemoryRequirements2( VkDevice device, - const VkImageSparseMemoryRequirementsInfo2KHR* pInfo, + const VkImageSparseMemoryRequirementsInfo2 *pInfo, uint32_t* pSparseMemoryRequirementCount, - VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements) + VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) { stub(); } @@ -3262,7 +3375,7 @@ VkResult radv_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount, - const VkBindBufferMemoryInfoKHR *pBindInfos) + const VkBindBufferMemoryInfo *pBindInfos) { for (uint32_t i = 0; i < bindInfoCount; ++i) { RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); @@ -3284,8 +3397,8 @@ VkDeviceMemory memory, VkDeviceSize memoryOffset) { - const VkBindBufferMemoryInfoKHR info = { - .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR, + const VkBindBufferMemoryInfo info = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, .buffer = buffer, .memory = memory, .memoryOffset = memoryOffset @@ -3296,7 +3409,7 @@ VkResult radv_BindImageMemory2(VkDevice device, uint32_t bindInfoCount, - const VkBindImageMemoryInfoKHR *pBindInfos) + const VkBindImageMemoryInfo *pBindInfos) { for (uint32_t i = 0; i < bindInfoCount; ++i) { RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory); @@ -3320,8 +3433,8 @@ VkDeviceMemory memory, VkDeviceSize memoryOffset) { - const VkBindImageMemoryInfoKHR info = { - .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR, + const VkBindImageMemoryInfo info = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, .image = image, .memory = memory, .memoryOffset = memoryOffset @@ -3446,9 +3559,9 @@ VkFence* pFence) { RADV_FROM_HANDLE(radv_device, device, _device); - const VkExportFenceCreateInfoKHR *export = - vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR); - VkExternalFenceHandleTypeFlagsKHR handleTypes = + const VkExportFenceCreateInfo *export = + vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO); + VkExternalFenceHandleTypeFlags handleTypes = export ? export->handleTypes : 0; struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator, @@ -3735,9 +3848,9 @@ VkSemaphore* pSemaphore) { RADV_FROM_HANDLE(radv_device, device, _device); - const VkExportSemaphoreCreateInfoKHR *export = - vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR); - VkExternalSemaphoreHandleTypeFlagsKHR handleTypes = + const VkExportSemaphoreCreateInfo *export = + vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO); + VkExternalSemaphoreHandleTypeFlags handleTypes = export ? export->handleTypes : 0; struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator, @@ -3802,7 +3915,8 @@ event->bo = device->ws->buffer_create(device->ws, 8, 8, RADEON_DOMAIN_GTT, - RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING); + RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, + RADV_BO_PRIORITY_FENCE); if (!event->bo) { vk_free2(&device->alloc, pAllocator, event); return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); @@ -3883,12 +3997,13 @@ buffer->flags = pCreateInfo->flags; buffer->shareable = vk_find_struct_const(pCreateInfo->pNext, - EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL; + EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL; if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) { buffer->bo = device->ws->buffer_create(device->ws, align64(buffer->size, 4096), - 4096, 0, RADEON_FLAG_VIRTUAL); + 4096, 0, RADEON_FLAG_VIRTUAL, + RADV_BO_PRIORITY_VIRTUAL); if (!buffer->bo) { vk_free2(&device->alloc, pAllocator, buffer); return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); @@ -4529,11 +4644,11 @@ { switch (mode) { case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT: - return SQ_IMG_FILTER_MODE_BLEND; + return V_008F30_SQ_IMG_FILTER_MODE_BLEND; case VK_SAMPLER_REDUCTION_MODE_MIN_EXT: - return SQ_IMG_FILTER_MODE_MIN; + return V_008F30_SQ_IMG_FILTER_MODE_MIN; case VK_SAMPLER_REDUCTION_MODE_MAX_EXT: - return SQ_IMG_FILTER_MODE_MAX; + return V_008F30_SQ_IMG_FILTER_MODE_MAX; default: break; } @@ -4562,7 +4677,7 @@ uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo); uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); bool is_vi = (device->physical_device->rad_info.chip_class >= VI); - unsigned filter_mode = SQ_IMG_FILTER_MODE_BLEND; + unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND; const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction = vk_find_struct_const(pCreateInfo->pNext, @@ -4686,7 +4801,7 @@ /* At the moment, we support only the below handle types. */ assert(pGetFdInfo->handleType == - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); @@ -4697,7 +4812,7 @@ } VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device, - VkExternalMemoryHandleTypeFlagBitsKHR handleType, + VkExternalMemoryHandleTypeFlagBits handleType, int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties) { @@ -4716,7 +4831,7 @@ * * So opaque handle types fall into the default "unsupported" case. */ - return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); } } @@ -4727,7 +4842,7 @@ uint32_t syncobj_handle = 0; int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle); if (ret != 0) - return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); if (*syncobj) device->ws->destroy_syncobj(device->ws, *syncobj); @@ -4748,7 +4863,7 @@ if (!syncobj_handle) { int ret = device->ws->create_syncobj(device->ws, &syncobj_handle); if (ret) { - return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); } } @@ -4757,7 +4872,7 @@ } else { int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd); if (ret != 0) - return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); } *syncobj = syncobj_handle; @@ -4774,16 +4889,16 @@ RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore); uint32_t *syncobj_dst = NULL; - if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) { + if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) { syncobj_dst = &sem->temp_syncobj; } else { syncobj_dst = &sem->syncobj; } switch(pImportSemaphoreFdInfo->handleType) { - case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst); - case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR: + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst); default: unreachable("Unhandled semaphore handle type"); @@ -4805,10 +4920,10 @@ syncobj_handle = sem->syncobj; switch(pGetFdInfo->handleType) { - case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd); break; - case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR: + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd); if (!ret) { if (sem->temp_syncobj) { @@ -4824,30 +4939,30 @@ } if (ret) - return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); return VK_SUCCESS; } void radv_GetPhysicalDeviceExternalSemaphoreProperties( VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo, - VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties) + const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, + VkExternalSemaphoreProperties *pExternalSemaphoreProperties) { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */ if (pdevice->rad_info.has_syncobj_wait_for_submit && - (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || - pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) { - pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR; - pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR; - pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR | - VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR; - } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) { - pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; - pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; - pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR | - VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR; + (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || + pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) { + pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; + } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { + pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; } else { pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; pExternalSemaphoreProperties->compatibleHandleTypes = 0; @@ -4863,16 +4978,16 @@ uint32_t *syncobj_dst = NULL; - if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) { + if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) { syncobj_dst = &fence->temp_syncobj; } else { syncobj_dst = &fence->syncobj; } switch(pImportFenceFdInfo->handleType) { - case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: + case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst); - case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR: + case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst); default: unreachable("Unhandled fence handle type"); @@ -4894,10 +5009,10 @@ syncobj_handle = fence->syncobj; switch(pGetFdInfo->handleType) { - case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: + case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd); break; - case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR: + case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd); if (!ret) { if (fence->temp_syncobj) { @@ -4913,24 +5028,24 @@ } if (ret) - return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); return VK_SUCCESS; } void radv_GetPhysicalDeviceExternalFenceProperties( VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo, - VkExternalFencePropertiesKHR* pExternalFenceProperties) + const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, + VkExternalFenceProperties *pExternalFenceProperties) { RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); if (pdevice->rad_info.has_syncobj_wait_for_submit && - (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || - pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) { - pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR; - pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR; - pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR | - VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR; + (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT || + pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) { + pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; } else { pExternalFenceProperties->exportFromImportedHandleTypes = 0; pExternalFenceProperties->compatibleHandleTypes = 0; diff -Nru mesa-18.3.3/src/amd/vulkan/radv_extensions.py mesa-19.0.1/src/amd/vulkan/radv_extensions.py --- mesa-18.3.3/src/amd/vulkan/radv_extensions.py 2019-01-13 21:16:37.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_extensions.py 2019-03-31 23:16:37.000000000 +0000 @@ -31,7 +31,7 @@ from mako.template import Template -MAX_API_VERSION = '1.1.70' +MAX_API_VERSION = '1.1.90' class Extension: def __init__(self, name, ext_version, enable): @@ -51,7 +51,7 @@ # and dEQP-VK.api.info.device fail due to the duplicated strings. EXTENSIONS = [ Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'), - Extension('VK_KHR_16bit_storage', 1, 'HAVE_LLVM >= 0x0700'), + Extension('VK_KHR_16bit_storage', 1, True), Extension('VK_KHR_bind_memory2', 1, True), Extension('VK_KHR_create_renderpass2', 1, True), Extension('VK_KHR_dedicated_allocation', 1, True), @@ -105,8 +105,11 @@ Extension('VK_EXT_external_memory_dma_buf', 1, True), Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'), Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'), - Extension('VK_EXT_pci_bus_info', 1, False), + Extension('VK_EXT_memory_budget', 1, True), + Extension('VK_EXT_memory_priority', 1, True), + Extension('VK_EXT_pci_bus_info', 2, True), Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), + Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= CIK'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, True), Extension('VK_EXT_transform_feedback', 1, True), @@ -182,6 +185,32 @@ ext = ext_name_map[ext_name] ext.type = ext_elem.attrib['type'] + ext.promotedto = ext_elem.attrib.get('promotedto', None) + try: + ext.requires = ext_elem.attrib['requires'].split(',') + except KeyError: + ext.requires = [] + + def extra_deps(ext): + if ext.type == 'instance': + check = 'instance->enabled_extensions.{}'.format(ext.name[3:]) + if ext.promotedto is not None: + # the xml contains values like VK_VERSION_1_1, but we need to + # translate them to VK_API_VERSION_1_1 for the apiVersion check + api_ver = ext.promotedto.replace('VK_VER', 'VK_API_VER') + check = '({} || instance->apiVersion >= {})'.format(check, api_ver) + return set([check]) + + deps = set() + for dep in ext.requires: + deps |= extra_deps(ext_name_map[dep]) + + return deps + + for ext in EXTENSIONS: + if ext.type == 'device': + for dep in extra_deps(ext): + ext.enable += ' && ' + dep _TEMPLATE_H = Template(COPYRIGHT + """ #ifndef RADV_EXTENSIONS_H @@ -276,6 +305,7 @@ void radv_fill_device_extension_table(const struct radv_physical_device *device, struct radv_device_extension_table* table) { + const struct radv_instance *instance = device->instance; %for ext in device_extensions: table->${ext.name[3:]} = ${ext.enable}; %endfor @@ -292,7 +322,7 @@ radv_physical_device_api_version(struct radv_physical_device *dev) { if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit) - return VK_MAKE_VERSION(1, 1, 70); + return ${MAX_API_VERSION.c_vk_version()}; return VK_MAKE_VERSION(1, 0, 68); } """) diff -Nru mesa-18.3.3/src/amd/vulkan/radv_formats.c mesa-19.0.1/src/amd/vulkan/radv_formats.c --- mesa-18.3.3/src/amd/vulkan/radv_formats.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_formats.c 2019-03-31 23:16:37.000000000 +0000 @@ -595,6 +595,14 @@ } } +bool +radv_device_supports_etc(struct radv_physical_device *physical_device) +{ + return physical_device->rad_info.family == CHIP_VEGA10 || + physical_device->rad_info.family == CHIP_RAVEN || + physical_device->rad_info.family == CHIP_STONEY; +} + static void radv_physical_device_get_format_properties(struct radv_physical_device *physical_device, VkFormat format, @@ -612,9 +620,7 @@ } if (desc->layout == VK_FORMAT_LAYOUT_ETC && - physical_device->rad_info.family != CHIP_VEGA10 && - physical_device->rad_info.family != CHIP_RAVEN && - physical_device->rad_info.family != CHIP_STONEY) { + !radv_device_supports_etc(physical_device)) { out_properties->linearTilingFeatures = linear; out_properties->optimalTilingFeatures = tiled; out_properties->bufferFeatures = buffer; @@ -639,8 +645,8 @@ tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; + tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT; if (radv_is_filter_minmax_format_supported(format)) tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT; @@ -684,8 +690,8 @@ } } if (tiled && !scaled) { - tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; + tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT; } /* Tiled formatting does not support NPOT pixel sizes */ @@ -694,8 +700,8 @@ } if (linear && !scaled) { - linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; + linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT; } if (format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT) { @@ -1026,7 +1032,7 @@ void radv_GetPhysicalDeviceFormatProperties2( VkPhysicalDevice physicalDevice, VkFormat format, - VkFormatProperties2KHR* pFormatProperties) + VkFormatProperties2* pFormatProperties) { RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); @@ -1036,7 +1042,7 @@ } static VkResult radv_get_image_format_properties(struct radv_physical_device *physical_device, - const VkPhysicalDeviceImageFormatInfo2KHR *info, + const VkPhysicalDeviceImageFormatInfo2 *info, VkImageFormatProperties *pImageFormatProperties) { @@ -1093,8 +1099,7 @@ info->type == VK_IMAGE_TYPE_2D && (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && - !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && - !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) { sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT; } @@ -1112,6 +1117,18 @@ maxMipLevels = 1; } + + /* We can't create 3d compressed 128bpp images that can be rendered to on GFX9 */ + if (physical_device->rad_info.chip_class >= GFX9 && + info->type == VK_IMAGE_TYPE_3D && + vk_format_get_blocksizebits(info->format) == 128 && + vk_format_is_compressed(info->format) && + (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) && + ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) || + (info->usage & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT))) { + goto unsupported; + } + if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { goto unsupported; @@ -1191,8 +1208,8 @@ { RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); - const VkPhysicalDeviceImageFormatInfo2KHR info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, + const VkPhysicalDeviceImageFormatInfo2 info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, .pNext = NULL, .format = format, .type = type, @@ -1206,20 +1223,20 @@ } static void -get_external_image_format_properties(const VkPhysicalDeviceImageFormatInfo2KHR *pImageFormatInfo, - VkExternalMemoryHandleTypeFlagBitsKHR handleType, - VkExternalMemoryPropertiesKHR *external_properties) -{ - VkExternalMemoryFeatureFlagBitsKHR flags = 0; - VkExternalMemoryHandleTypeFlagsKHR export_flags = 0; - VkExternalMemoryHandleTypeFlagsKHR compat_flags = 0; +get_external_image_format_properties(const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo, + VkExternalMemoryHandleTypeFlagBits handleType, + VkExternalMemoryProperties *external_properties) +{ + VkExternalMemoryFeatureFlagBits flags = 0; + VkExternalMemoryHandleTypeFlags export_flags = 0; + VkExternalMemoryHandleTypeFlags compat_flags = 0; switch (handleType) { - case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: switch (pImageFormatInfo->type) { case VK_IMAGE_TYPE_2D: - flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT_KHR|VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR|VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR; - compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | + flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT|VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT|VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; break; default: @@ -1227,14 +1244,14 @@ } break; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: - flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR; + flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; break; default: break; } - *external_properties = (VkExternalMemoryPropertiesKHR) { + *external_properties = (VkExternalMemoryProperties) { .externalMemoryFeatures = flags, .exportFromImportedHandleTypes = export_flags, .compatibleHandleTypes = compat_flags, @@ -1243,12 +1260,12 @@ VkResult radv_GetPhysicalDeviceImageFormatProperties2( VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceImageFormatInfo2KHR *base_info, - VkImageFormatProperties2KHR *base_props) + const VkPhysicalDeviceImageFormatInfo2 *base_info, + VkImageFormatProperties2 *base_props) { RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); - const VkPhysicalDeviceExternalImageFormatInfoKHR *external_info = NULL; - VkExternalImageFormatPropertiesKHR *external_props = NULL; + const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; + VkExternalImageFormatProperties *external_props = NULL; VkResult result; result = radv_get_image_format_properties(physical_device, base_info, @@ -1259,7 +1276,7 @@ /* Extract input structs */ vk_foreach_struct_const(s, base_info->pNext) { switch (s->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHR: + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: external_info = (const void *) s; break; default: @@ -1270,7 +1287,7 @@ /* Extract output structs */ vk_foreach_struct(s, base_props->pNext) { switch (s->sType) { - case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR: + case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: external_props = (void *) s; break; default: @@ -1278,26 +1295,26 @@ } } - /* From the Vulkan 1.0.42 spec: + /* From the Vulkan 1.0.97 spec: * - * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2KHR will - * behave as if VkPhysicalDeviceExternalImageFormatInfoKHR was not - * present and VkExternalImageFormatPropertiesKHR will be ignored. + * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will + * behave as if VkPhysicalDeviceExternalImageFormatInfo was not + * present and VkExternalImageFormatProperties will be ignored. */ if (external_info && external_info->handleType != 0) { switch (external_info->handleType) { - case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: get_external_image_format_properties(base_info, external_info->handleType, &external_props->externalMemoryProperties); break; default: - /* From the Vulkan 1.0.42 spec: + /* From the Vulkan 1.0.97 spec: * * If handleType is not compatible with the [parameters] specified - * in VkPhysicalDeviceImageFormatInfo2KHR, then - * vkGetPhysicalDeviceImageFormatProperties2KHR returns + * in VkPhysicalDeviceImageFormatInfo2, then + * vkGetPhysicalDeviceImageFormatProperties2 returns * VK_ERROR_FORMAT_NOT_SUPPORTED. */ result = vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, @@ -1311,10 +1328,10 @@ fail: if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) { - /* From the Vulkan 1.0.42 spec: + /* From the Vulkan 1.0.97 spec: * * If the combination of parameters to - * vkGetPhysicalDeviceImageFormatProperties2KHR is not supported by + * vkGetPhysicalDeviceImageFormatProperties2 is not supported by * the implementation for use in vkCreateImage, then all members of * imageFormatProperties will be filled with zero. */ @@ -1340,9 +1357,9 @@ void radv_GetPhysicalDeviceSparseImageFormatProperties2( VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo, + const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, uint32_t *pPropertyCount, - VkSparseImageFormatProperties2KHR* pProperties) + VkSparseImageFormatProperties2 *pProperties) { /* Sparse images are not yet supported. */ *pPropertyCount = 0; @@ -1350,28 +1367,28 @@ void radv_GetPhysicalDeviceExternalBufferProperties( VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalBufferInfoKHR *pExternalBufferInfo, - VkExternalBufferPropertiesKHR *pExternalBufferProperties) + const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, + VkExternalBufferProperties *pExternalBufferProperties) { - VkExternalMemoryFeatureFlagBitsKHR flags = 0; - VkExternalMemoryHandleTypeFlagsKHR export_flags = 0; - VkExternalMemoryHandleTypeFlagsKHR compat_flags = 0; + VkExternalMemoryFeatureFlagBits flags = 0; + VkExternalMemoryHandleTypeFlags export_flags = 0; + VkExternalMemoryHandleTypeFlags compat_flags = 0; switch(pExternalBufferInfo->handleType) { - case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: - flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR | - VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR; - compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | + flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; break; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: - flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR; + flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; break; default: break; } - pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryPropertiesKHR) { + pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryProperties) { .externalMemoryFeatures = flags, .exportFromImportedHandleTypes = export_flags, .compatibleHandleTypes = compat_flags, diff -Nru mesa-18.3.3/src/amd/vulkan/radv_image.c mesa-19.0.1/src/amd/vulkan/radv_image.c --- mesa-18.3.3/src/amd/vulkan/radv_image.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_image.c 2019-03-31 23:16:37.000000000 +0000 @@ -73,7 +73,7 @@ return false; if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) || - (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR)) + (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)) return false; if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) @@ -122,13 +122,12 @@ static bool radv_use_dcc_for_image(struct radv_device *device, + const struct radv_image *image, const struct radv_image_create_info *create_info, const VkImageCreateInfo *pCreateInfo) { bool dcc_compatible_formats; bool blendable; - bool shareable = vk_find_struct_const(pCreateInfo->pNext, - EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL; /* DCC (Delta Color Compression) is only available for GFX8+. */ if (device->physical_device->rad_info.chip_class < VI) @@ -139,12 +138,12 @@ /* FIXME: DCC is broken for shareable images starting with GFX9 */ if (device->physical_device->rad_info.chip_class >= GFX9 && - shareable) + image->shareable) return false; /* TODO: Enable DCC for storage images. */ if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) || - (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR)) + (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)) return false; if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) @@ -198,6 +197,7 @@ static int radv_init_surface(struct radv_device *device, + const struct radv_image *image, struct radeon_surf *surface, const struct radv_image_create_info *create_info) { @@ -249,9 +249,15 @@ if (is_stencil) surface->flags |= RADEON_SURF_SBUFFER; + if (device->physical_device->rad_info.chip_class >= GFX9 && + pCreateInfo->imageType == VK_IMAGE_TYPE_3D && + vk_format_get_blocksizebits(pCreateInfo->format) == 128 && + vk_format_is_compressed(pCreateInfo->format)) + surface->flags |= RADEON_SURF_NO_RENDER_TARGET; + surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; - if (!radv_use_dcc_for_image(device, create_info, pCreateInfo)) + if (!radv_use_dcc_for_image(device, image, create_info, pCreateInfo)) surface->flags |= RADEON_SURF_DISABLE_DCC; if (create_info->scanout) @@ -857,8 +863,9 @@ image->dcc_offset = align64(image->size, image->surface.dcc_alignment); /* + 16 for storing the clear values + dcc pred */ image->clear_value_offset = image->dcc_offset + image->surface.dcc_size; - image->dcc_pred_offset = image->clear_value_offset + 8; - image->size = image->dcc_offset + image->surface.dcc_size + 16; + image->fce_pred_offset = image->clear_value_offset + 8; + image->dcc_pred_offset = image->clear_value_offset + 16; + image->size = image->dcc_offset + image->surface.dcc_size + 24; image->alignment = MAX2(image->alignment, image->surface.dcc_alignment); } @@ -931,8 +938,8 @@ static inline bool radv_image_can_enable_htile(struct radv_image *image) { - return image->info.levels == 1 && - vk_format_is_depth(image->vk_format) && + return radv_image_has_htile(image) && + image->info.levels == 1 && image->info.width * image->info.height >= 8 * 8; } @@ -977,19 +984,19 @@ image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) - if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHR) + if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL) image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; else image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i]; } image->shareable = vk_find_struct_const(pCreateInfo->pNext, - EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL; + EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL; if (!vk_format_is_depth_or_stencil(pCreateInfo->format) && !create_info->scanout && !image->shareable) { image->info.surf_index = &device->image_mrt_offset_counter; } - radv_init_surface(device, &image->surface, create_info); + radv_init_surface(device, image, &image->surface, create_info); device->ws->surface_init(device->ws, &image->info, &image->surface); @@ -1039,7 +1046,7 @@ image->offset = 0; image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment, - 0, RADEON_FLAG_VIRTUAL); + 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL); if (!image->bo) { vk_free2(&device->alloc, alloc, image); return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); @@ -1257,7 +1264,7 @@ { if (!image->exclusive) return image->queue_family_mask; - if (family == VK_QUEUE_FAMILY_EXTERNAL_KHR) + if (family == VK_QUEUE_FAMILY_EXTERNAL) return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; if (family == VK_QUEUE_FAMILY_IGNORED) return 1u << queue_family; diff -Nru mesa-18.3.3/src/amd/vulkan/radv_llvm_helper.cpp mesa-19.0.1/src/amd/vulkan/radv_llvm_helper.cpp --- mesa-18.3.3/src/amd/vulkan/radv_llvm_helper.cpp 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_llvm_helper.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -40,7 +40,6 @@ bool init(void) { if (!ac_init_llvm_compiler(&llvm_info, - true, family, tm_options)) return false; @@ -99,7 +98,6 @@ } bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, - bool okay_to_leak_target_library_info, bool thread_compiler, enum radeon_family family, enum ac_target_machine_options tm_options) @@ -125,7 +123,6 @@ } if (!ac_init_llvm_compiler(info, - okay_to_leak_target_library_info, family, tm_options)) return false; diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_blit.c mesa-19.0.1/src/amd/vulkan/radv_meta_blit.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_blit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_blit.c 2019-03-31 23:16:37.000000000 +0000 @@ -849,54 +849,60 @@ .subpass = 0, }; - switch(aspect) { - case VK_IMAGE_ASPECT_COLOR_BIT: - vk_pipeline_info.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .attachmentCount = 1, - .pAttachments = (VkPipelineColorBlendAttachmentState []) { - { .colorWriteMask = - VK_COLOR_COMPONENT_A_BIT | - VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT }, + VkPipelineColorBlendStateCreateInfo color_blend_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = (VkPipelineColorBlendAttachmentState []) { + { + .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | + VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT }, } }; + + VkPipelineDepthStencilStateCreateInfo depth_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = true, + .depthWriteEnable = true, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + }; + + VkPipelineDepthStencilStateCreateInfo stencil_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .depthTestEnable = false, + .depthWriteEnable = false, + .stencilTestEnable = true, + .front = { + .failOp = VK_STENCIL_OP_REPLACE, + .passOp = VK_STENCIL_OP_REPLACE, + .depthFailOp = VK_STENCIL_OP_REPLACE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .compareMask = 0xff, + .writeMask = 0xff, + .reference = 0 + }, + .back = { + .failOp = VK_STENCIL_OP_REPLACE, + .passOp = VK_STENCIL_OP_REPLACE, + .depthFailOp = VK_STENCIL_OP_REPLACE, + .compareOp = VK_COMPARE_OP_ALWAYS, + .compareMask = 0xff, + .writeMask = 0xff, + .reference = 0 + }, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + }; + + switch(aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + vk_pipeline_info.pColorBlendState = &color_blend_info; break; case VK_IMAGE_ASPECT_DEPTH_BIT: - vk_pipeline_info.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .depthTestEnable = true, - .depthWriteEnable = true, - .depthCompareOp = VK_COMPARE_OP_ALWAYS, - }; + vk_pipeline_info.pDepthStencilState = &depth_info; break; case VK_IMAGE_ASPECT_STENCIL_BIT: - vk_pipeline_info.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .depthTestEnable = false, - .depthWriteEnable = false, - .stencilTestEnable = true, - .front = { - .failOp = VK_STENCIL_OP_REPLACE, - .passOp = VK_STENCIL_OP_REPLACE, - .depthFailOp = VK_STENCIL_OP_REPLACE, - .compareOp = VK_COMPARE_OP_ALWAYS, - .compareMask = 0xff, - .writeMask = 0xff, - .reference = 0 - }, - .back = { - .failOp = VK_STENCIL_OP_REPLACE, - .passOp = VK_STENCIL_OP_REPLACE, - .depthFailOp = VK_STENCIL_OP_REPLACE, - .compareOp = VK_COMPARE_OP_ALWAYS, - .compareMask = 0xff, - .writeMask = 0xff, - .reference = 0 - }, - .depthCompareOp = VK_COMPARE_OP_ALWAYS, - }; + vk_pipeline_info.pDepthStencilState = &stencil_info; break; default: unreachable("Unhandled aspect"); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_buffer.c mesa-19.0.1/src/amd/vulkan/radv_meta_buffer.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_buffer.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_buffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -15,8 +15,8 @@ b.shader->info.cs.local_size[1] = 1; b.shader->info.cs.local_size[2] = 1; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -67,8 +67,8 @@ b.shader->info.cs.local_size[1] = 1; b.shader->info.cs.local_size[2] = 1; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -522,7 +522,7 @@ radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0)); radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? - V_370_MEM_ASYNC : V_370_MEMORY_SYNC) | + V_370_MEM : V_370_MEM_GRBM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cmd_buffer->cs, va); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_bufimage.c mesa-19.0.1/src/amd/vulkan/radv_meta_bufimage.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_bufimage.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_bufimage.c 2019-03-31 23:16:37.000000000 +0000 @@ -60,8 +60,8 @@ output_img->data.descriptor_set = 0; output_img->data.binding = 1; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -289,8 +289,8 @@ output_img->data.descriptor_set = 0; output_img->data.binding = 1; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -511,8 +511,8 @@ output_img->data.descriptor_set = 0; output_img->data.binding = 1; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -719,8 +719,8 @@ output_img->data.descriptor_set = 0; output_img->data.binding = 1; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -932,8 +932,8 @@ output_img->data.descriptor_set = 0; output_img->data.binding = 1; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -1139,8 +1139,8 @@ output_img->data.descriptor_set = 0; output_img->data.binding = 0; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -1331,8 +1331,8 @@ output_img->data.descriptor_set = 0; output_img->data.binding = 0; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -1593,7 +1593,7 @@ }, NULL, buffer); radv_BindBufferMemory2(radv_device_to_handle(device), 1, - (VkBindBufferMemoryInfoKHR[]) { + (VkBindBufferMemoryInfo[]) { { .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, .buffer = *buffer, diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta.c mesa-19.0.1/src/amd/vulkan/radv_meta.c --- mesa-18.3.3/src/amd/vulkan/radv_meta.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta.c 2019-03-31 23:16:37.000000000 +0000 @@ -389,8 +389,15 @@ result = radv_device_init_meta_resolve_fragment_state(device, on_demand); if (result != VK_SUCCESS) goto fail_resolve_fragment; + + result = radv_device_init_meta_fmask_expand_state(device); + if (result != VK_SUCCESS) + goto fail_fmask_expand; + return VK_SUCCESS; +fail_fmask_expand: + radv_device_finish_meta_resolve_fragment_state(device); fail_resolve_fragment: radv_device_finish_meta_resolve_compute_state(device); fail_resolve_compute: @@ -431,6 +438,7 @@ radv_device_finish_meta_fast_clear_flush_state(device); radv_device_finish_meta_resolve_compute_state(device); radv_device_finish_meta_resolve_fragment_state(device); + radv_device_finish_meta_fmask_expand_state(device); radv_store_meta_pipeline(device); radv_pipeline_cache_finish(&device->meta_state.cache); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_clear.c mesa-19.0.1/src/amd/vulkan/radv_meta_clear.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_clear.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_clear.c 2019-03-31 23:16:37.000000000 +0000 @@ -81,8 +81,8 @@ "v_layer"); vs_out_layer->data.location = VARYING_SLOT_LAYER; vs_out_layer->data.interpolation = INTERP_MODE_FLAT; - nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0); - nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0); + nir_ssa_def *inst_id = nir_load_instance_id(&vs_b); + nir_ssa_def *base_instance = nir_load_base_instance(&vs_b); nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); @@ -303,6 +303,22 @@ return result; } +static void +finish_meta_clear_htile_mask_state(struct radv_device *device) +{ + struct radv_meta_state *state = &device->meta_state; + + radv_DestroyPipeline(radv_device_to_handle(device), + state->clear_htile_mask_pipeline, + &state->alloc); + radv_DestroyPipelineLayout(radv_device_to_handle(device), + state->clear_htile_mask_p_layout, + &state->alloc); + radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->clear_htile_mask_ds_layout, + &state->alloc); +} + void radv_device_finish_meta_clear_state(struct radv_device *device) { @@ -339,6 +355,8 @@ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout, &state->alloc); + + finish_meta_clear_htile_mask_state(device); } static void @@ -352,14 +370,29 @@ const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; const uint32_t subpass_att = clear_att->colorAttachment; const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment; - const struct radv_image_view *iview = fb->attachments[pass_att].attachment; - const uint32_t samples = iview->image->info.samples; - const uint32_t samples_log2 = ffs(samples) - 1; - unsigned fs_key = radv_format_meta_fs_key(iview->vk_format); + const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL; + uint32_t samples, samples_log2; + VkFormat format; + unsigned fs_key; VkClearColorValue clear_value = clear_att->clearValue.color; VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); VkPipeline pipeline; + /* When a framebuffer is bound to the current command buffer, get the + * number of samples from it. Otherwise, get the number of samples from + * the render pass because it's likely a secondary command buffer. + */ + if (iview) { + samples = iview->image->info.samples; + format = iview->vk_format; + } else { + samples = cmd_buffer->state.pass->attachments[pass_att].samples; + format = cmd_buffer->state.pass->attachments[pass_att].format; + } + + samples_log2 = ffs(samples) - 1; + fs_key = radv_format_meta_fs_key(format); + if (fs_key == -1) { radv_finishme("color clears incomplete"); return; @@ -470,8 +503,8 @@ "v_layer"); vs_out_layer->data.location = VARYING_SLOT_LAYER; vs_out_layer->data.interpolation = INTERP_MODE_FLAT; - nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0); - nir_ssa_def *base_instance = nir_load_system_value(&vs_b, nir_intrinsic_load_base_instance, 0); + nir_ssa_def *inst_id = nir_load_instance_id(&vs_b); + nir_ssa_def *base_instance = nir_load_base_instance(&vs_b); nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance); nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1); @@ -599,6 +632,9 @@ const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value) { + if (!iview) + return false; + uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index); @@ -615,7 +651,7 @@ iview->base_mip == 0 && iview->base_layer == 0 && radv_layout_is_htile_compressed(iview->image, layout, queue_mask) && - !radv_image_extent_compare(iview->image, &iview->extent)) + radv_image_extent_compare(iview->image, &iview->extent)) return true; return false; } @@ -677,7 +713,8 @@ static void emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, - const VkClearRect *clear_rect) + const VkClearRect *clear_rect, + uint32_t view_mask) { struct radv_device *device = cmd_buffer->device; struct radv_meta_state *meta_state = &device->meta_state; @@ -686,11 +723,22 @@ const uint32_t pass_att = subpass->depth_stencil_attachment.attachment; VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; VkImageAspectFlags aspects = clear_att->aspectMask; - const struct radv_image_view *iview = fb->attachments[pass_att].attachment; - const uint32_t samples = iview->image->info.samples; - const uint32_t samples_log2 = ffs(samples) - 1; + const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL; + uint32_t samples, samples_log2; VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); + /* When a framebuffer is bound to the current command buffer, get the + * number of samples from it. Otherwise, get the number of samples from + * the render pass because it's likely a secondary command buffer. + */ + if (iview) { + samples = iview->image->info.samples; + } else { + samples = cmd_buffer->state.pass->attachments[pass_att].samples; + } + + samples_log2 = ffs(samples) - 1; + assert(pass_att != VK_ATTACHMENT_UNUSED); if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) @@ -738,7 +786,13 @@ radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect); - radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer); + if (view_mask) { + unsigned i; + for_each_bit(i, view_mask) + radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i); + } else { + radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer); + } if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, @@ -746,94 +800,396 @@ } } +static uint32_t +clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, + struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, + uint32_t htile_value, uint32_t htile_mask) +{ + struct radv_device *device = cmd_buffer->device; + struct radv_meta_state *state = &device->meta_state; + uint64_t block_count = round_up_u64(size, 1024); + struct radv_meta_saved_state saved_state; + + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | + RADV_META_SAVE_CONSTANTS | + RADV_META_SAVE_DESCRIPTORS); + + struct radv_buffer dst_buffer = { + .bo = bo, + .offset = offset, + .size = size + }; + + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, + state->clear_htile_mask_pipeline); + + radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + state->clear_htile_mask_p_layout, + 0, /* set */ + 1, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &(VkDescriptorBufferInfo) { + .buffer = radv_buffer_to_handle(&dst_buffer), + .offset = 0, + .range = size + } + } + }); + + const unsigned constants[2] = { + htile_value & htile_mask, + ~htile_mask, + }; + + radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), + state->clear_htile_mask_p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, + constants); + + radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1); + + radv_meta_restore(&saved_state, cmd_buffer); + + return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | + RADV_CMD_FLAG_INV_VMEM_L1 | + RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; +} + +static uint32_t +radv_get_htile_fast_clear_value(const struct radv_image *image, + VkClearDepthStencilValue value) +{ + uint32_t clear_value; + + if (!image->surface.has_stencil) { + clear_value = value.depth ? 0xfffffff0 : 0; + } else { + clear_value = value.depth ? 0xfffc0000 : 0; + } + + return clear_value; +} + +static uint32_t +radv_get_htile_mask(const struct radv_image *image, VkImageAspectFlags aspects) +{ + uint32_t mask = 0; + + if (!image->surface.has_stencil) { + /* All the HTILE buffer is used when there is no stencil. */ + mask = UINT32_MAX; + } else { + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + mask |= 0xfffffc0f; + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + mask |= 0x000003f0; + } + + return mask; +} + static bool -emit_fast_htile_clear(struct radv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att, - const VkClearRect *clear_rect, - enum radv_cmd_flush_bits *pre_flush, - enum radv_cmd_flush_bits *post_flush) +radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value) { - const struct radv_subpass *subpass = cmd_buffer->state.subpass; - const uint32_t pass_att = subpass->depth_stencil_attachment.attachment; - VkImageLayout image_layout = subpass->depth_stencil_attachment.layout; - const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct radv_image_view *iview = fb->attachments[pass_att].attachment; - VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; - VkImageAspectFlags aspects = clear_att->aspectMask; - uint32_t clear_word, flush_bits; + return value.depth == 1.0f || value.depth == 0.0f; +} + +static bool +radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value) +{ + return value.stencil == 0; +} + +/** + * Determine if the given image can be fast cleared. + */ +static bool +radv_image_can_fast_clear(struct radv_device *device, struct radv_image *image) +{ + if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS) + return false; + + if (vk_format_is_color(image->vk_format)) { + if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image)) + return false; + + /* RB+ doesn't work with CMASK fast clear on Stoney. */ + if (!radv_image_has_dcc(image) && + device->physical_device->rad_info.family == CHIP_STONEY) + return false; + } else { + if (!radv_image_has_htile(image)) + return false; + } - if (!radv_image_has_htile(iview->image)) + /* Do not fast clears 3D images. */ + if (image->type == VK_IMAGE_TYPE_3D) return false; - if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS) + return true; +} + +/** + * Determine if the given image view can be fast cleared. + */ +static bool +radv_image_view_can_fast_clear(struct radv_device *device, + const struct radv_image_view *iview) +{ + struct radv_image *image; + + if (!iview) return false; + image = iview->image; - if (!radv_layout_is_htile_compressed(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index))) - goto fail; + /* Only fast clear if the image itself can be fast cleared. */ + if (!radv_image_can_fast_clear(device, image)) + return false; - /* don't fast clear 3D */ - if (iview->image->type == VK_IMAGE_TYPE_3D) - goto fail; + /* Only fast clear if all layers are bound. */ + if (iview->base_layer > 0 || + iview->layer_count != image->info.array_size) + return false; - /* all layers are bound */ - if (iview->base_layer > 0) - goto fail; - if (iview->image->info.array_size != iview->layer_count) - goto fail; + /* Only fast clear if the view covers the whole image. */ + if (!radv_image_extent_compare(image, &iview->extent)) + return false; - if (!radv_image_extent_compare(iview->image, &iview->extent)) - goto fail; + return true; +} + +static bool +radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, + const struct radv_image_view *iview, + VkImageLayout image_layout, + VkImageAspectFlags aspects, + const VkClearRect *clear_rect, + const VkClearDepthStencilValue clear_value, + uint32_t view_mask) +{ + if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview)) + return false; + + if (!radv_layout_is_htile_compressed(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index))) + return false; if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || clear_rect->rect.extent.width != iview->image->info.width || clear_rect->rect.extent.height != iview->image->info.height) - goto fail; + return false; - if (clear_rect->baseArrayLayer != 0) - goto fail; - if (clear_rect->layerCount != iview->image->info.array_size) - goto fail; + if (view_mask && (iview->image->info.array_size >= 32 || + (1u << iview->image->info.array_size) - 1u != view_mask)) + return false; + if (!view_mask && clear_rect->baseArrayLayer != 0) + return false; + if (!view_mask && clear_rect->layerCount != iview->image->info.array_size) + return false; - if ((clear_value.depth != 0.0 && clear_value.depth != 1.0) || !(aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) - goto fail; + if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 && + (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT) || + ((vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT) && + !(aspects & VK_IMAGE_ASPECT_STENCIL_BIT)))) + return false; - /* GFX8 only supports 32-bit depth surfaces but we can enable TC-compat - * HTILE for 16-bit surfaces if no Z planes are compressed. Though, - * fast HTILE clears don't seem to work. - */ - if (cmd_buffer->device->physical_device->rad_info.chip_class == VI && - iview->image->vk_format == VK_FORMAT_D16_UNORM) - goto fail; + if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && + !radv_is_fast_clear_depth_allowed(clear_value)) || + ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + !radv_is_fast_clear_stencil_allowed(clear_value))) + return false; - if (vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT) { - if (clear_value.stencil != 0 || !(aspects & VK_IMAGE_ASPECT_STENCIL_BIT)) - goto fail; - clear_word = clear_value.depth ? 0xfffc0000 : 0; - } else - clear_word = clear_value.depth ? 0xfffffff0 : 0; + return true; +} + +static void +radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, + const struct radv_image_view *iview, + const VkClearAttachment *clear_att, + enum radv_cmd_flush_bits *pre_flush, + enum radv_cmd_flush_bits *post_flush) +{ + VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; + VkImageAspectFlags aspects = clear_att->aspectMask; + uint32_t clear_word, flush_bits; + uint32_t htile_mask; + + clear_word = radv_get_htile_fast_clear_value(iview->image, clear_value); + htile_mask = radv_get_htile_mask(iview->image, aspects); if (pre_flush) { cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) & ~ *pre_flush; *pre_flush |= cmd_buffer->state.flush_bits; - } else - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + } - flush_bits = radv_fill_buffer(cmd_buffer, iview->image->bo, - iview->image->offset + iview->image->htile_offset, - iview->image->surface.htile_size, clear_word); + if (htile_mask == UINT_MAX) { + /* Clear the whole HTILE buffer. */ + flush_bits = radv_fill_buffer(cmd_buffer, iview->image->bo, + iview->image->offset + iview->image->htile_offset, + iview->image->surface.htile_size, clear_word); + } else { + /* Only clear depth or stencil bytes in the HTILE buffer. */ + assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9); + flush_bits = clear_htile_mask(cmd_buffer, iview->image->bo, + iview->image->offset + iview->image->htile_offset, + iview->image->surface.htile_size, clear_word, + htile_mask); + } radv_update_ds_clear_metadata(cmd_buffer, iview->image, clear_value, aspects); if (post_flush) { *post_flush |= flush_bits; - } else { - cmd_buffer->state.flush_bits |= flush_bits; } +} - return true; +static nir_shader * +build_clear_htile_mask_shader() +{ + nir_builder b; + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); + b.shader->info.name = ralloc_strdup(b.shader, "meta_clear_htile_mask"); + b.shader->info.cs.local_size[0] = 64; + b.shader->info.cs.local_size[1] = 1; + b.shader->info.cs.local_size[2] = 1; + + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *block_size = nir_imm_ivec4(&b, + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); + + nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); + + nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16)); + offset = nir_channel(&b, offset, 0); + + nir_intrinsic_instr *buf = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_vulkan_resource_index); + + buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + nir_intrinsic_set_desc_set(buf, 0); + nir_intrinsic_set_binding(buf, 0); + nir_ssa_dest_init(&buf->instr, &buf->dest, 1, 32, NULL); + nir_builder_instr_insert(&b, &buf->instr); + + nir_intrinsic_instr *constants = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_load_push_constant); + nir_intrinsic_set_base(constants, 0); + nir_intrinsic_set_range(constants, 8); + constants->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + constants->num_components = 2; + nir_ssa_dest_init(&constants->instr, &constants->dest, 2, 32, "constants"); + nir_builder_instr_insert(&b, &constants->instr); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo); + load->src[0] = nir_src_for_ssa(&buf->dest.ssa); + load->src[1] = nir_src_for_ssa(offset); + nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); + load->num_components = 4; + nir_builder_instr_insert(&b, &load->instr); + + /* data = (data & ~htile_mask) | (htile_value & htile_mask) */ + nir_ssa_def *data = + nir_iand(&b, &load->dest.ssa, + nir_channel(&b, &constants->dest.ssa, 1)); + data = nir_ior(&b, data, nir_channel(&b, &constants->dest.ssa, 0)); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); + store->src[0] = nir_src_for_ssa(data); + store->src[1] = nir_src_for_ssa(&buf->dest.ssa); + store->src[2] = nir_src_for_ssa(offset); + nir_intrinsic_set_write_mask(store, 0xf); + store->num_components = 4; + nir_builder_instr_insert(&b, &store->instr); + + return b.shader; +} + +static VkResult +init_meta_clear_htile_mask_state(struct radv_device *device) +{ + struct radv_meta_state *state = &device->meta_state; + struct radv_shader_module cs = { .nir = NULL }; + VkResult result; + + cs.nir = build_clear_htile_mask_shader(); + + VkDescriptorSetLayoutCreateInfo ds_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 1, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + } + }; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), + &ds_layout_info, &state->alloc, + &state->clear_htile_mask_ds_layout); + if (result != VK_SUCCESS) + goto fail; + + VkPipelineLayoutCreateInfo p_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &state->clear_htile_mask_ds_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &(VkPushConstantRange){ + VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, + }, + }; + + result = radv_CreatePipelineLayout(radv_device_to_handle(device), + &p_layout_info, &state->alloc, + &state->clear_htile_mask_p_layout); + if (result != VK_SUCCESS) + goto fail; + + VkPipelineShaderStageCreateInfo shader_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = radv_shader_module_to_handle(&cs), + .pName = "main", + .pSpecializationInfo = NULL, + }; + + VkComputePipelineCreateInfo pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = shader_stage, + .flags = 0, + .layout = state->clear_htile_mask_p_layout, + }; + + result = radv_CreateComputePipelines(radv_device_to_handle(device), + radv_pipeline_cache_to_handle(&state->cache), + 1, &pipeline_info, NULL, + &state->clear_htile_mask_pipeline); + + ralloc_free(cs.nir); + return result; fail: - return false; + ralloc_free(cs.nir); + return result; } VkResult @@ -870,6 +1226,10 @@ if (res != VK_SUCCESS) goto fail; + res = init_meta_clear_htile_mask_state(device); + if (res != VK_SUCCESS) + goto fail; + if (on_demand) return VK_SUCCESS; @@ -961,9 +1321,21 @@ } uint32_t +radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, uint32_t value) +{ + return radv_fill_buffer(cmd_buffer, image->bo, + image->offset + image->fmask.offset, + image->fmask.size, value); +} + +uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, uint32_t value) { + /* Mark the image as being compressed. */ + radv_update_dcc_metadata(cmd_buffer, image, true); + return radv_fill_buffer(cmd_buffer, image->bo, image->offset + image->dcc_offset, image->surface.dcc_size, value); @@ -1047,88 +1419,42 @@ } static bool -emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer, - const VkClearAttachment *clear_att, - const VkClearRect *clear_rect, - enum radv_cmd_flush_bits *pre_flush, - enum radv_cmd_flush_bits *post_flush, - uint32_t view_mask) +radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, + const struct radv_image_view *iview, + VkImageLayout image_layout, + const VkClearRect *clear_rect, + VkClearColorValue clear_value, + uint32_t view_mask) { - const struct radv_subpass *subpass = cmd_buffer->state.subpass; - const uint32_t subpass_att = clear_att->colorAttachment; - const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment; - VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout; - const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; - const struct radv_image_view *iview = fb->attachments[pass_att].attachment; - VkClearColorValue clear_value = clear_att->clearValue.color; - uint32_t clear_color[2], flush_bits = 0; - uint32_t cmask_clear_value; - bool ret; + uint32_t clear_color[2]; - if (!radv_image_has_cmask(iview->image) && !radv_image_has_dcc(iview->image)) - return false; - - if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS) + if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview)) return false; if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index))) - goto fail; - - /* don't fast clear 3D */ - if (iview->image->type == VK_IMAGE_TYPE_3D) - goto fail; - - /* all layers are bound */ - if (iview->base_layer > 0) - goto fail; - if (iview->image->info.array_size != iview->layer_count) - goto fail; - - if (iview->image->info.levels > 1) - goto fail; - - if (!radv_image_extent_compare(iview->image, &iview->extent)) - goto fail; + return false; if (clear_rect->rect.offset.x || clear_rect->rect.offset.y || clear_rect->rect.extent.width != iview->image->info.width || clear_rect->rect.extent.height != iview->image->info.height) - goto fail; + return false; if (view_mask && (iview->image->info.array_size >= 32 || (1u << iview->image->info.array_size) - 1u != view_mask)) - goto fail; + return false; if (!view_mask && clear_rect->baseArrayLayer != 0) - goto fail; + return false; if (!view_mask && clear_rect->layerCount != iview->image->info.array_size) - goto fail; - - /* RB+ doesn't work with CMASK fast clear on Stoney. */ - if (!radv_image_has_dcc(iview->image) && - cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) - goto fail; + return false; /* DCC */ - ret = radv_format_pack_clear_color(iview->vk_format, - clear_color, &clear_value); - if (ret == false) - goto fail; - - if (pre_flush) { - cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) & ~ *pre_flush; - *pre_flush |= cmd_buffer->state.flush_bits; - } else - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - - cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image); + if (!radv_format_pack_clear_color(iview->vk_format, + clear_color, &clear_value)) + return false; - /* clear cmask buffer */ if (radv_image_has_dcc(iview->image)) { - uint32_t reset_value; bool can_avoid_fast_clear_elim; - bool need_decompress_pass = false; + uint32_t reset_value; vi_get_fast_clear_parameters(iview->vk_format, &clear_value, &reset_value, @@ -1143,10 +1469,48 @@ * CB flushes but that shouldn't matter. */ if (!can_avoid_fast_clear_elim) - goto fail; + return false; + } + } + + return true; +} + + +static void +radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, + const struct radv_image_view *iview, + const VkClearAttachment *clear_att, + uint32_t subpass_att, + enum radv_cmd_flush_bits *pre_flush, + enum radv_cmd_flush_bits *post_flush) +{ + VkClearColorValue clear_value = clear_att->clearValue.color; + uint32_t clear_color[2], flush_bits = 0; + uint32_t cmask_clear_value; - assert(radv_image_has_cmask(iview->image)); + if (pre_flush) { + cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) & ~ *pre_flush; + *pre_flush |= cmd_buffer->state.flush_bits; + } + + /* DCC */ + radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value); + + cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image); + /* clear cmask buffer */ + if (radv_image_has_dcc(iview->image)) { + uint32_t reset_value; + bool can_avoid_fast_clear_elim; + bool need_decompress_pass = false; + + vi_get_fast_clear_parameters(iview->vk_format, + &clear_value, &reset_value, + &can_avoid_fast_clear_elim); + + if (radv_image_has_cmask(iview->image)) { flush_bits = radv_clear_cmask(cmd_buffer, iview->image, cmask_clear_value); @@ -1158,8 +1522,8 @@ flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, reset_value); - radv_set_dcc_need_cmask_elim_pred(cmd_buffer, iview->image, - need_decompress_pass); + radv_update_fce_metadata(cmd_buffer, iview->image, + need_decompress_pass); } else { flush_bits = radv_clear_cmask(cmd_buffer, iview->image, cmask_clear_value); @@ -1167,16 +1531,10 @@ if (post_flush) { *post_flush |= flush_bits; - } else { - cmd_buffer->state.flush_bits |= flush_bits; } radv_update_color_clear_metadata(cmd_buffer, iview->image, subpass_att, clear_color); - - return true; -fail: - return false; } /** @@ -1190,16 +1548,46 @@ enum radv_cmd_flush_bits *post_flush, uint32_t view_mask) { - if (clear_att->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - if (!emit_fast_color_clear(cmd_buffer, clear_att, clear_rect, - pre_flush, post_flush, view_mask)) + const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; + const struct radv_subpass *subpass = cmd_buffer->state.subpass; + VkImageAspectFlags aspects = clear_att->aspectMask; + + if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { + const uint32_t subpass_att = clear_att->colorAttachment; + const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment; + VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout; + const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL; + VkClearColorValue clear_value = clear_att->clearValue.color; + + if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, + clear_rect, clear_value, view_mask)) { + radv_fast_clear_color(cmd_buffer, iview, clear_att, + subpass_att, pre_flush, + post_flush); + } else { emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask); + } } else { - assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)); - if (!emit_fast_htile_clear(cmd_buffer, clear_att, clear_rect, - pre_flush, post_flush)) - emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect); + const uint32_t pass_att = subpass->depth_stencil_attachment.attachment; + if (pass_att == VK_ATTACHMENT_UNUSED) + return; + + VkImageLayout image_layout = subpass->depth_stencil_attachment.layout; + const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL; + VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil; + + assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + + if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout, + aspects, clear_rect, clear_value, + view_mask)) { + radv_fast_clear_depth(cmd_buffer, iview, clear_att, + pre_flush, post_flush); + } else { + emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect, + view_mask); + } } } @@ -1441,6 +1829,75 @@ radv_DestroyFramebuffer(device_h, fb, &cmd_buffer->pool->alloc); } + +/** + * Return TRUE if a fast color or depth clear has been performed. + */ +static bool +radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + VkFormat format, + VkImageLayout image_layout, + const VkImageSubresourceRange *range, + const VkClearValue *clear_val) +{ + struct radv_image_view iview; + + radv_image_view_init(&iview, cmd_buffer->device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = radv_meta_get_view_type(image), + .format = image->vk_format, + .subresourceRange = { + .aspectMask = range->aspectMask, + .baseMipLevel = range->baseMipLevel, + .levelCount = range->levelCount, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = range->layerCount, + }, + }); + + VkClearRect clear_rect = { + .rect = { + .offset = { 0, 0 }, + .extent = { + radv_minify(image->info.width, range->baseMipLevel), + radv_minify(image->info.height, range->baseMipLevel), + }, + }, + .baseArrayLayer = range->baseArrayLayer, + .layerCount = range->layerCount, + }; + + VkClearAttachment clear_att = { + .aspectMask = range->aspectMask, + .colorAttachment = 0, + .clearValue = *clear_val, + }; + + if (vk_format_is_color(format)) { + if (radv_can_fast_clear_color(cmd_buffer, &iview, + image_layout, &clear_rect, + clear_att.clearValue.color, 0)) { + radv_fast_clear_color(cmd_buffer, &iview, &clear_att, + clear_att.colorAttachment, + NULL, NULL); + return true; + } + } else { + if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, + range->aspectMask, &clear_rect, + clear_att.clearValue.depthStencil, 0)) { + radv_fast_clear_depth(cmd_buffer, &iview, &clear_att, + NULL, NULL); + return true; + } + } + + return false; +} + static void radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, @@ -1468,18 +1925,31 @@ internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf); } + if (format == VK_FORMAT_R32G32B32_UINT || + format == VK_FORMAT_R32G32B32_SINT || + format == VK_FORMAT_R32G32B32_SFLOAT) + cs = true; + for (uint32_t r = 0; r < range_count; r++) { const VkImageSubresourceRange *range = &ranges[r]; + + /* Try to perform a fast clear first, otherwise fallback to + * the legacy path. + */ + if (!cs && + radv_fast_clear_range(cmd_buffer, image, format, + image_layout, range, + &internal_clear_value)) { + continue; + } + for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) { const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ? radv_minify(image->info.depth, range->baseMipLevel + l) : radv_get_layerCount(image, range); for (uint32_t s = 0; s < layer_count; ++s) { - if (cs || - (format == VK_FORMAT_R32G32B32_UINT || - format == VK_FORMAT_R32G32B32_SINT || - format == VK_FORMAT_R32G32B32_SFLOAT)) { + if (cs) { struct radv_meta_blit2d_surf surf; surf.format = format; surf.image = image; diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_copy.c mesa-19.0.1/src/amd/vulkan/radv_meta_copy.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_copy.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_copy.c 2019-03-31 23:16:37.000000000 +0000 @@ -107,6 +107,22 @@ }; } +static bool +image_is_renderable(struct radv_device *device, struct radv_image *image) +{ + if (image->vk_format == VK_FORMAT_R32G32B32_UINT || + image->vk_format == VK_FORMAT_R32G32B32_SINT || + image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) + return false; + + if (device->physical_device->rad_info.chip_class >= GFX9 && + image->type == VK_IMAGE_TYPE_3D && + vk_format_get_blocksizebits(image->vk_format) == 128 && + vk_format_is_compressed(image->vk_format)) + return false; + return true; +} + static void meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer* buffer, @@ -196,9 +212,7 @@ /* Perform Blit */ if (cs || - (img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_UINT || - img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SINT || - img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)) { + !image_is_renderable(cmd_buffer->device, img_bsurf.image)) { radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect); } else { radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect); @@ -483,9 +497,7 @@ /* Perform Blit */ if (cs || - (b_src.format == VK_FORMAT_R32G32B32_UINT || - b_src.format == VK_FORMAT_R32G32B32_SINT || - b_src.format == VK_FORMAT_R32G32B32_SFLOAT)) { + !image_is_renderable(cmd_buffer->device, b_dst.image)) { radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect); } else { radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_decompress.c mesa-19.0.1/src/amd/vulkan/radv_meta_decompress.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_decompress.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_decompress.c 2019-03-31 23:16:37.000000000 +0000 @@ -308,34 +308,6 @@ return res; } -static void -emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer, - const VkExtent2D *depth_decomp_extent, - VkPipeline pipeline_h) -{ - VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); - - radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_h); - - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { - .x = 0, - .y = 0, - .width = depth_decomp_extent->width, - .height = depth_decomp_extent->height, - .minDepth = 0.0f, - .maxDepth = 1.0f - }); - - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) { - .offset = { 0, 0 }, - .extent = *depth_decomp_extent, - }); - - radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); -} - - enum radv_depth_op { DEPTH_DECOMPRESS, DEPTH_RESUMMARIZE, @@ -388,6 +360,23 @@ unreachable("unknown operation"); } + radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_h); + + radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) { + .x = 0, + .y = 0, + .width = width, + .height = height, + .minDepth = 0.0f, + .maxDepth = 1.0f + }); + + radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) { + .offset = { 0, 0 }, + .extent = { width, height }, + }); + for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) { struct radv_image_view iview; @@ -442,7 +431,7 @@ }, VK_SUBPASS_CONTENTS_INLINE); - emit_depth_decomp(cmd_buffer, &(VkExtent2D){width, height}, pipeline_h); + radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); radv_CmdEndRenderPass(cmd_buffer_h); radv_DestroyFramebuffer(device_h, fb_h, diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_fast_clear.c mesa-19.0.1/src/amd/vulkan/radv_meta_fast_clear.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_fast_clear.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_fast_clear.c 2019-03-31 23:16:37.000000000 +0000 @@ -58,8 +58,8 @@ output_img->data.descriptor_set = 0; output_img->data.binding = 1; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -550,43 +550,15 @@ } static void -emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer, - const VkExtent2D *resolve_extent, - VkPipeline pipeline) -{ - VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); - - radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline); - - radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) { - .x = 0, - .y = 0, - .width = resolve_extent->width, - .height = resolve_extent->height, - .minDepth = 0.0f, - .maxDepth = 1.0f - }); - - radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) { - .offset = (VkOffset2D) { 0, 0 }, - .extent = (VkExtent2D) { resolve_extent->width, resolve_extent->height }, - }); - - radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); - cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META); -} - -static void radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, bool value) + struct radv_image *image, + uint64_t pred_offset, bool value) { uint64_t va = 0; if (value) { va = radv_buffer_get_va(image->bo) + image->offset; - va += image->dcc_pred_offset; + va += pred_offset; } si_emit_set_predication_state(cmd_buffer, true, va); @@ -629,12 +601,33 @@ pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline; } - if (!decompress_dcc && radv_image_has_dcc(image)) { + if (radv_image_has_dcc(image)) { + uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : + image->fce_pred_offset; + old_predicating = cmd_buffer->state.predicating; - radv_emit_set_predication_state_from_image(cmd_buffer, image, true); + radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true); cmd_buffer->state.predicating = true; } + + radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline); + + radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) { + .x = 0, + .y = 0, + .width = image->info.width, + .height = image->info.height, + .minDepth = 0.0f, + .maxDepth = 1.0f + }); + + radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) { + .offset = (VkOffset2D) { 0, 0 }, + .extent = (VkExtent2D) { image->info.width, image->info.height }, + }); + for (uint32_t layer = 0; layer < layer_count; ++layer) { struct radv_image_view iview; @@ -688,24 +681,24 @@ }, VK_SUBPASS_CONTENTS_INLINE); - emit_fast_clear_flush(cmd_buffer, - &(VkExtent2D) { image->info.width, image->info.height }, - pipeline); + radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0); + + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + radv_CmdEndRenderPass(cmd_buffer_h); radv_DestroyFramebuffer(device_h, fb_h, &cmd_buffer->pool->alloc); } - if (!decompress_dcc && radv_image_has_dcc(image)) { - cmd_buffer->state.predicating = old_predicating; + if (radv_image_has_dcc(image)) { + uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : + image->fce_pred_offset; - radv_emit_set_predication_state_from_image(cmd_buffer, image, false); + cmd_buffer->state.predicating = old_predicating; - /* Clear the image's fast-clear eliminate predicate because - * FMASK and DCC also imply a fast-clear eliminate. - */ - radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image, false); + radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false); if (cmd_buffer->state.predication_type != -1) { /* Restore previous conditional rendering user state. */ @@ -714,6 +707,18 @@ cmd_buffer->state.predication_va); } } + + if (radv_image_has_dcc(image)) { + /* Clear the image's fast-clear eliminate predicate because + * FMASK and DCC also imply a fast-clear eliminate. + */ + radv_update_fce_metadata(cmd_buffer, image, false); + + /* Mark the image as being decompressed. */ + if (decompress_dcc) + radv_update_dcc_metadata(cmd_buffer, image, false); + } + radv_meta_restore(&saved_state, cmd_buffer); } @@ -808,6 +813,9 @@ radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1); + /* Mark this image as actually being decompressed. */ + radv_update_dcc_metadata(cmd_buffer, image, false); + /* The fill buffer below does its own saving */ radv_meta_restore(&saved_state, cmd_buffer); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_fmask_expand.c mesa-19.0.1/src/amd/vulkan/radv_meta_fmask_expand.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_fmask_expand.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_fmask_expand.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,306 @@ +/* + * Copyright © 2019 Valve Corporation + * Copyright © 2018 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "radv_meta.h" +#include "radv_private.h" + +static nir_shader * +build_fmask_expand_compute_shader(struct radv_device *device, int samples) +{ + nir_builder b; + char name[64]; + const struct glsl_type *input_img_type = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, + GLSL_TYPE_FLOAT); + const struct glsl_type *output_img_type = + glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, + GLSL_TYPE_FLOAT); + + snprintf(name, 64, "meta_fmask_expand_cs-%d", samples); + + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL); + b.shader->info.name = ralloc_strdup(b.shader, name); + b.shader->info.cs.local_size[0] = 16; + b.shader->info.cs.local_size[1] = 16; + b.shader->info.cs.local_size[2] = 1; + + nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, + input_img_type, "s_tex"); + input_img->data.descriptor_set = 0; + input_img->data.binding = 0; + + nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, + output_img_type, "out_img"); + output_img->data.descriptor_set = 0; + output_img->data.binding = 1; + + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); + nir_ssa_def *block_size = nir_imm_ivec4(&b, + b.shader->info.cs.local_size[0], + b.shader->info.cs.local_size[1], + b.shader->info.cs.local_size[2], 0); + + nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); + + nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; + nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa; + + nir_tex_instr *tex_instr[8]; + for (uint32_t i = 0; i < samples; i++) { + tex_instr[i] = nir_tex_instr_create(b.shader, 3); + + nir_tex_instr *tex = tex_instr[i]; + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + tex->op = nir_texop_txf_ms; + tex->src[0].src_type = nir_tex_src_coord; + tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 0x3)); + tex->src[1].src_type = nir_tex_src_ms_index; + tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); + tex->src[2].src_type = nir_tex_src_texture_deref; + tex->src[2].src = nir_src_for_ssa(input_img_deref); + tex->dest_type = nir_type_float; + tex->is_array = false; + tex->coord_components = 2; + + nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); + nir_builder_instr_insert(&b, &tex->instr); + } + + for (uint32_t i = 0; i < samples; i++) { + nir_ssa_def *outval = &tex_instr[i]->dest.ssa; + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_image_deref_store); + store->num_components = 4; + store->src[0] = nir_src_for_ssa(output_img_deref); + store->src[1] = nir_src_for_ssa(global_id); + store->src[2] = nir_src_for_ssa(nir_imm_int(&b, i)); + store->src[3] = nir_src_for_ssa(outval); + nir_builder_instr_insert(&b, &store->instr); + } + + return b.shader; +} + +void +radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *subresourceRange) +{ + struct radv_device *device = cmd_buffer->device; + struct radv_meta_saved_state saved_state; + const uint32_t samples = image->info.samples; + const uint32_t samples_log2 = ffs(samples) - 1; + + radv_meta_save(&saved_state, cmd_buffer, + RADV_META_SAVE_COMPUTE_PIPELINE | + RADV_META_SAVE_DESCRIPTORS); + + VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2]; + + radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), + VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + + for (unsigned l = 0; l < subresourceRange->layerCount; l++) { + struct radv_image_view iview; + + radv_image_view_init(&iview, device, + &(VkImageViewCreateInfo) { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = radv_image_to_handle(image), + .viewType = radv_meta_get_view_type(image), + .format = image->vk_format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = subresourceRange->baseArrayLayer + l, + .layerCount = 1, + }, + }); + + radv_meta_push_descriptor_set(cmd_buffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + cmd_buffer->device->meta_state.fmask_expand.p_layout, + 0, /* set */ + 2, /* descriptorWriteCount */ + (VkWriteDescriptorSet[]) { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL + }, + } + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = (VkDescriptorImageInfo[]) { + { + .sampler = VK_NULL_HANDLE, + .imageView = radv_image_view_to_handle(&iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL + }, + } + } + }); + + radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1); + } + + radv_meta_restore(&saved_state, cmd_buffer); + + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | + RADV_CMD_FLAG_INV_GLOBAL_L2; + + /* Re-initialize FMASK in fully expanded mode. */ + radv_initialize_fmask(cmd_buffer, image); +} + +void radv_device_finish_meta_fmask_expand_state(struct radv_device *device) +{ + struct radv_meta_state *state = &device->meta_state; + + for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { + radv_DestroyPipeline(radv_device_to_handle(device), + state->fmask_expand.pipeline[i], + &state->alloc); + } + radv_DestroyPipelineLayout(radv_device_to_handle(device), + state->fmask_expand.p_layout, + &state->alloc); + + radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), + state->fmask_expand.ds_layout, + &state->alloc); +} + +static VkResult +create_fmask_expand_pipeline(struct radv_device *device, + int samples, + VkPipeline *pipeline) +{ + struct radv_meta_state *state = &device->meta_state; + struct radv_shader_module cs = { .nir = NULL }; + VkResult result; + + cs.nir = build_fmask_expand_compute_shader(device, samples); + + VkPipelineShaderStageCreateInfo pipeline_shader_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = radv_shader_module_to_handle(&cs), + .pName = "main", + .pSpecializationInfo = NULL, + }; + + VkComputePipelineCreateInfo vk_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = pipeline_shader_stage, + .flags = 0, + .layout = state->fmask_expand.p_layout, + }; + + result = radv_CreateComputePipelines(radv_device_to_handle(device), + radv_pipeline_cache_to_handle(&state->cache), + 1, &vk_pipeline_info, NULL, + pipeline); + + ralloc_free(cs.nir); + return result; +} + +VkResult +radv_device_init_meta_fmask_expand_state(struct radv_device *device) +{ + struct radv_meta_state *state = &device->meta_state; + VkResult result; + + VkDescriptorSetLayoutCreateInfo ds_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, + .bindingCount = 2, + .pBindings = (VkDescriptorSetLayoutBinding[]) { + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = NULL + }, + } + }; + + result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), + &ds_create_info, &state->alloc, + &state->fmask_expand.ds_layout); + if (result != VK_SUCCESS) + goto fail; + + VkPipelineLayoutCreateInfo color_create_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &state->fmask_expand.ds_layout, + .pushConstantRangeCount = 0, + .pPushConstantRanges = NULL, + }; + + result = radv_CreatePipelineLayout(radv_device_to_handle(device), + &color_create_info, &state->alloc, + &state->fmask_expand.p_layout); + if (result != VK_SUCCESS) + goto fail; + + for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) { + uint32_t samples = 1 << i; + result = create_fmask_expand_pipeline(device, samples, + &state->fmask_expand.pipeline[i]); + if (result != VK_SUCCESS) + goto fail; + } + + return VK_SUCCESS; +fail: + radv_device_finish_meta_fmask_expand_state(device); + return result; +} diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta.h mesa-19.0.1/src/amd/vulkan/radv_meta.h --- mesa-18.3.3/src/amd/vulkan/radv_meta.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta.h 2019-03-31 23:16:37.000000000 +0000 @@ -88,6 +88,9 @@ VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand); void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device); +VkResult radv_device_init_meta_fmask_expand_state(struct radv_device *device); +void radv_device_finish_meta_fmask_expand_state(struct radv_device *device); + void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer, uint32_t flags); @@ -174,6 +177,9 @@ void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *subresourceRange); +void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + const VkImageSubresourceRange *subresourceRange); void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, @@ -201,6 +207,8 @@ uint32_t radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, uint32_t value); +uint32_t radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, uint32_t value); uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, uint32_t value); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_resolve.c mesa-19.0.1/src/amd/vulkan/radv_meta_resolve.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_resolve.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_resolve.c 2019-03-31 23:16:37.000000000 +0000 @@ -456,14 +456,6 @@ } assert(dest_image->info.samples == 1); - if (src_image->info.samples >= 16) { - /* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the - * glBlitFramebuffer workaround for samples >= 16. - */ - radv_finishme("vkCmdResolveImage: need interpolation workaround when " - "samples >= 16"); - } - if (src_image->info.array_size > 1) radv_finishme("vkCmdResolveImage: multisample array images"); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_meta_resolve_cs.c mesa-19.0.1/src/amd/vulkan/radv_meta_resolve_cs.c --- mesa-18.3.3/src/amd/vulkan/radv_meta_resolve_cs.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_meta_resolve_cs.c 2019-03-31 23:16:37.000000000 +0000 @@ -99,8 +99,8 @@ img_type, "out_img"); output_img->data.descriptor_set = 0; output_img->data.binding = 1; - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], diff -Nru mesa-18.3.3/src/amd/vulkan/radv_nir_to_llvm.c mesa-19.0.1/src/amd/vulkan/radv_nir_to_llvm.c --- mesa-18.3.3/src/amd/vulkan/radv_nir_to_llvm.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_nir_to_llvm.c 2019-03-31 23:16:37.000000000 +0000 @@ -33,9 +33,7 @@ #include #include #include -#if HAVE_LLVM >= 0x0700 #include -#endif #include "sid.h" #include "gfx9d.h" @@ -94,6 +92,7 @@ gl_shader_stage stage; LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; + uint64_t float16_shaded_mask; uint64_t input_mask; uint64_t output_mask; @@ -435,7 +434,6 @@ struct arg_info { LLVMTypeRef types[MAX_ARGS]; LLVMValueRef *assign[MAX_ARGS]; - unsigned array_params_mask; uint8_t count; uint8_t sgpr_count; uint8_t num_sgprs_used; @@ -466,13 +464,6 @@ } } -static inline void -add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr) -{ - info->array_params_mask |= (1 << info->count); - add_arg(info, ARG_SGPR, type, param_ptr); -} - static void assign_arguments(LLVMValueRef main_function, struct arg_info *info) { @@ -511,10 +502,11 @@ LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); for (unsigned i = 0; i < args->sgpr_count; ++i) { + LLVMValueRef P = LLVMGetParam(main_function, i); + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); - if (args->array_params_mask & (1 << i)) { - LLVMValueRef P = LLVMGetParam(main_function, i); + if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) { ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); ac_add_attr_dereferenceable(P, UINT64_MAX); } @@ -555,11 +547,10 @@ static void set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, - uint8_t num_sgprs, bool indirect) + uint8_t num_sgprs) { ud_info->sgpr_idx = *sgpr_idx; ud_info->num_sgprs = num_sgprs; - ud_info->indirect = indirect; *sgpr_idx += num_sgprs; } @@ -571,31 +562,28 @@ &ctx->shader_info->user_sgprs_locs.shader_data[idx]; assert(ud_info); - set_loc(ud_info, sgpr_idx, num_sgprs, false); + set_loc(ud_info, sgpr_idx, num_sgprs); } static void set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx) { - bool use_32bit_pointers = HAVE_32BIT_POINTERS && - idx != AC_UD_SCRATCH_RING_OFFSETS; + bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS; set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); } static void -set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, - bool indirect) +set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx) { struct radv_userdata_locations *locs = &ctx->shader_info->user_sgprs_locs; struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; assert(ud_info); - set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect); + set_loc(ud_info, sgpr_idx, 1); - if (!indirect) - locs->descriptor_sets_enabled |= 1 << idx; + locs->descriptor_sets_enabled |= 1 << idx; } struct user_sgpr_info { @@ -633,7 +621,7 @@ uint8_t count = 0; if (ctx->shader_info->info.vs.has_vertex_buffers) - count += HAVE_32BIT_POINTERS ? 1 : 2; + count++; count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; return count; @@ -702,51 +690,46 @@ user_sgpr_count++; if (ctx->shader_info->info.loads_push_constants) - user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2; + user_sgpr_count++; + + if (ctx->streamout_buffers) + user_sgpr_count++; uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; uint32_t num_desc_set = util_bitcount(ctx->shader_info->info.desc_set_used_mask); - if (remaining_sgprs / (HAVE_32BIT_POINTERS ? 1 : 2) < num_desc_set) { + if (remaining_sgprs < num_desc_set) { user_sgpr_info->indirect_all_descriptor_sets = true; } } static void declare_global_input_sgprs(struct radv_shader_context *ctx, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage, const struct user_sgpr_info *user_sgpr_info, struct arg_info *args, LLVMValueRef *desc_sets) { LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8); - unsigned num_sets = ctx->options->layout ? - ctx->options->layout->num_sets : 0; - unsigned stage_mask = 1 << stage; - - if (has_previous_stage) - stage_mask |= 1 << previous_stage; /* 1 for each descriptor set */ if (!user_sgpr_info->indirect_all_descriptor_sets) { - for (unsigned i = 0; i < num_sets; ++i) { - if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && - ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - add_array_arg(args, type, - &ctx->descriptor_sets[i]); - } + uint32_t mask = ctx->shader_info->info.desc_set_used_mask; + + while (mask) { + int i = u_bit_scan(&mask); + + add_arg(args, ARG_SGPR, type, &ctx->descriptor_sets[i]); } } else { - add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets); + add_arg(args, ARG_SGPR, ac_array_in_const32_addr_space(type), + desc_sets); } if (ctx->shader_info->info.loads_push_constants) { /* 1 for push constants and dynamic descriptors */ - add_array_arg(args, type, &ctx->abi.push_constants); + add_arg(args, ARG_SGPR, type, &ctx->abi.push_constants); } if (ctx->shader_info->info.so.num_outputs) { @@ -835,41 +818,31 @@ } static void -set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, - bool has_previous_stage, gl_shader_stage previous_stage, +set_global_input_locs(struct radv_shader_context *ctx, const struct user_sgpr_info *user_sgpr_info, LLVMValueRef desc_sets, uint8_t *user_sgpr_idx) { - unsigned num_sets = ctx->options->layout ? - ctx->options->layout->num_sets : 0; - unsigned stage_mask = 1 << stage; - - if (has_previous_stage) - stage_mask |= 1 << previous_stage; + uint32_t mask = ctx->shader_info->info.desc_set_used_mask; if (!user_sgpr_info->indirect_all_descriptor_sets) { - for (unsigned i = 0; i < num_sets; ++i) { - if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && - ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - set_loc_desc(ctx, i, user_sgpr_idx, false); - } else - ctx->descriptor_sets[i] = NULL; + while (mask) { + int i = u_bit_scan(&mask); + + set_loc_desc(ctx, i, user_sgpr_idx); } } else { set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx); - for (unsigned i = 0; i < num_sets; ++i) { - if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && - ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - ctx->descriptor_sets[i] = - ac_build_load_to_sgpr(&ctx->ac, - desc_sets, - LLVMConstInt(ctx->ac.i32, i, false)); + while (mask) { + int i = u_bit_scan(&mask); + + ctx->descriptor_sets[i] = + ac_build_load_to_sgpr(&ctx->ac, desc_sets, + LLVMConstInt(ctx->ac.i32, i, false)); - } else - ctx->descriptor_sets[i] = NULL; } + ctx->shader_info->need_indirect_descriptor_sets = true; } @@ -955,9 +928,8 @@ switch (stage) { case MESA_SHADER_COMPUTE: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); + declare_global_input_sgprs(ctx, &user_sgpr_info, &args, + &desc_sets); if (ctx->shader_info->info.cs.uses_grid_size) { add_arg(&args, ARG_SGPR, ctx->ac.v3i32, @@ -978,9 +950,9 @@ &ctx->abi.local_invocation_ids); break; case MESA_SHADER_VERTEX: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); + declare_global_input_sgprs(ctx, &user_sgpr_info, &args, + &desc_sets); + declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, previous_stage, &args); @@ -1011,11 +983,9 @@ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, + declare_global_input_sgprs(ctx, &user_sgpr_info, &args, &desc_sets); + declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, previous_stage, &args); @@ -1031,10 +1001,7 @@ declare_vs_input_vgprs(ctx, &args); } else { - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, + declare_global_input_sgprs(ctx, &user_sgpr_info, &args, &desc_sets); if (needs_view_index) @@ -1051,9 +1018,8 @@ } break; case MESA_SHADER_TESS_EVAL: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); + declare_global_input_sgprs(ctx, &user_sgpr_info, &args, + &desc_sets); if (needs_view_index) add_arg(&args, ARG_SGPR, ctx->ac.i32, @@ -1084,10 +1050,7 @@ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, + declare_global_input_sgprs(ctx, &user_sgpr_info, &args, &desc_sets); if (previous_stage != MESA_SHADER_TESS_EVAL) { @@ -1118,10 +1081,7 @@ declare_tes_input_vgprs(ctx, &args); } } else { - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, + declare_global_input_sgprs(ctx, &user_sgpr_info, &args, &desc_sets); if (needs_view_index) @@ -1149,9 +1109,8 @@ } break; case MESA_SHADER_FRAGMENT: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); + declare_global_input_sgprs(ctx, &user_sgpr_info, &args, + &desc_sets); add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask); add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample); @@ -1210,8 +1169,7 @@ if (has_previous_stage) user_sgpr_idx = 0; - set_global_input_locs(ctx, stage, has_previous_stage, previous_stage, - &user_sgpr_info, desc_sets, &user_sgpr_idx); + set_global_input_locs(ctx, &user_sgpr_info, desc_sets, &user_sgpr_idx); switch (stage) { case MESA_SHADER_COMPUTE: @@ -1484,7 +1442,7 @@ { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); const unsigned location = var->data.location; - const unsigned component = var->data.location_frac; + unsigned component = var->data.location_frac; const bool is_patch = var->data.patch; const bool is_compact = var->data.compact; LLVMValueRef dw_addr; @@ -1502,10 +1460,14 @@ } param = shader_io_get_unique_index(location); - if (location == VARYING_SLOT_CLIP_DIST0 && - is_compact && const_index > 3) { - const_index -= 3; - param++; + if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) { + const_index += component; + component = 0; + + if (const_index >= 4) { + const_index -= 4; + param++; + } } if (!is_patch) { @@ -1572,9 +1534,13 @@ LLVMValueRef result; unsigned param = shader_io_get_unique_index(location); - if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { - const_index -= 3; - param++; + if ((location == VARYING_SLOT_CLIP_DIST0 || location == VARYING_SLOT_CLIP_DIST1) && is_compact) { + const_index += component; + component = 0; + if (const_index >= 4) { + const_index -= 4; + param++; + } } buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, @@ -1693,9 +1659,6 @@ case 8: sample_pos_offset = 7; break; - case 16: - sample_pos_offset = 15; - break; default: break; } @@ -2097,6 +2060,7 @@ unsigned attr, LLVMValueRef interp_param, LLVMValueRef prim_mask, + bool float16, LLVMValueRef result[4]) { LLVMValueRef attr_number; @@ -2129,7 +2093,12 @@ for (chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - if (interp) { + if (interp && float16) { + result[chan] = ac_build_fs_interp_f16(&ctx->ac, + llvm_chan, + attr_number, + prim_mask, i, j); + } else if (interp) { result[chan] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, @@ -2141,7 +2110,30 @@ attr_number, prim_mask); result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, ""); - result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), ""); + result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], float16 ? ctx->ac.i16 : ctx->ac.i32, ""); + } + } +} + +static void mark_16bit_fs_input(struct radv_shader_context *ctx, + const struct glsl_type *type, + int location) +{ + if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) { + unsigned attrib_count = glsl_count_attribute_slots(type, false); + if (glsl_type_is_16bit(type)) { + ctx->float16_shaded_mask |= ((1ull << attrib_count) - 1) << location; + } + } else if (glsl_type_is_array(type)) { + unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false); + for (unsigned i = 0; i < glsl_get_length(type); ++i) { + mark_16bit_fs_input(ctx, glsl_get_array_element(type), location + i * stride); + } + } else { + assert(glsl_type_is_struct(type)); + for (unsigned i = 0; i < glsl_get_length(type); i++) { + mark_16bit_fs_input(ctx, glsl_get_struct_field(type, i), location); + location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false); } } } @@ -2156,9 +2148,20 @@ uint64_t mask; variable->data.driver_location = idx * 4; + + + if (variable->data.compact) { + unsigned component_count = variable->data.location_frac + + glsl_get_length(variable->type); + attrib_count = (component_count + 3) / 4; + } else + mark_16bit_fs_input(ctx, variable->type, idx); + mask = ((1ull << attrib_count) - 1) << variable->data.location; - if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { + if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT || + glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT16 || + glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_STRUCT) { unsigned interp_type; if (variable->data.sample) interp_type = INTERP_SAMPLE; @@ -2169,22 +2172,12 @@ interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type); } - bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type)); - LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32; if (interp == NULL) - interp = LLVMGetUndef(type); + interp = LLVMGetUndef(ctx->ac.i32); for (unsigned i = 0; i < attrib_count; ++i) ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp; - if (idx == VARYING_SLOT_CLIP_DIST0) { - /* Do not account for the number of components inside the array - * of clip/cull distances because this might wrongly set other - * bits like primitive ID or layer. - */ - mask = 1ull << VARYING_SLOT_CLIP_DIST0; - } - ctx->input_mask |= mask; } @@ -2246,11 +2239,14 @@ if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) { interp_param = *inputs; - interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, + bool float16 = (ctx->float16_shaded_mask >> i) & 1; + interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, float16, inputs); if (LLVMIsUndef(interp_param)) ctx->shader_info->fs.flat_shaded_mask |= 1u << index; + if (float16) + ctx->shader_info->fs.float16_shaded_mask |= 1u << index; if (i >= VARYING_SLOT_VAR0) ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index; ++index; @@ -2262,7 +2258,7 @@ interp_param = *inputs; interp_fs_input(ctx, index, interp_param, - ctx->abi.prim_mask, inputs); + ctx->abi.prim_mask, false, inputs); ++index; } } else if (i == VARYING_SLOT_POS) { @@ -2296,6 +2292,12 @@ if (stage == MESA_SHADER_TESS_CTRL) return; + if (variable->data.compact) { + unsigned component_count = variable->data.location_frac + + glsl_get_length(variable->type); + attrib_count = (component_count + 3) / 4; + } + mask_attribs = ((1ull << attrib_count) - 1) << idx; if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || @@ -2311,8 +2313,6 @@ ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; ctx->shader_info->tes.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size; } - - mask_attribs = 1ull << idx; } } @@ -2411,7 +2411,7 @@ if (is_16bit) { for (unsigned chan = 0; chan < 4; chan++) values[chan] = LLVMBuildZExt(ctx->ac.builder, - values[chan], + ac_to_integer(&ctx->ac, values[chan]), ctx->ac.i32, ""); } break; @@ -2422,7 +2422,7 @@ if (is_16bit) { for (unsigned chan = 0; chan < 4; chan++) values[chan] = LLVMBuildSExt(ctx->ac.builder, - values[chan], + ac_to_integer(&ctx->ac, values[chan]), ctx->ac.i32, ""); } break; @@ -2475,12 +2475,8 @@ } else memcpy(&args->out[0], values, sizeof(values[0]) * 4); - for (unsigned i = 0; i < 4; ++i) { - if (!(args->enabled_channels & (1 << i))) - continue; - + for (unsigned i = 0; i < 4; ++i) args->out[i] = ac_to_float(&ctx->ac, args->out[i]); - } } static void @@ -2661,51 +2657,41 @@ memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(outinfo->vs_output_param_offset)); - if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { - unsigned output_usage_mask, length; - LLVMValueRef slots[8]; - unsigned j; - - if (ctx->stage == MESA_SHADER_VERTEX && - !ctx->is_gs_copy_shader) { - output_usage_mask = - ctx->shader_info->info.vs.output_usage_mask[VARYING_SLOT_CLIP_DIST0]; - } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { - output_usage_mask = - ctx->shader_info->info.tes.output_usage_mask[VARYING_SLOT_CLIP_DIST0]; - } else { - assert(ctx->is_gs_copy_shader); - output_usage_mask = - ctx->shader_info->info.gs.output_usage_mask[VARYING_SLOT_CLIP_DIST0]; - } + for(unsigned location = VARYING_SLOT_CLIP_DIST0; location <= VARYING_SLOT_CLIP_DIST1; ++location) { + if (ctx->output_mask & (1ull << location)) { + unsigned output_usage_mask, length; + LLVMValueRef slots[4]; + unsigned j; + + if (ctx->stage == MESA_SHADER_VERTEX && + !ctx->is_gs_copy_shader) { + output_usage_mask = + ctx->shader_info->info.vs.output_usage_mask[location]; + } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { + output_usage_mask = + ctx->shader_info->info.tes.output_usage_mask[location]; + } else { + assert(ctx->is_gs_copy_shader); + output_usage_mask = + ctx->shader_info->info.gs.output_usage_mask[location]; + } - length = util_last_bit(output_usage_mask); + length = util_last_bit(output_usage_mask); - i = VARYING_SLOT_CLIP_DIST0; - for (j = 0; j < length; j++) - slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); + for (j = 0; j < length; j++) + slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, location, j)); - for (i = length; i < 8; i++) - slots[i] = LLVMGetUndef(ctx->ac.f32); + for (i = length; i < 4; i++) + slots[i] = LLVMGetUndef(ctx->ac.f32); - if (length > 4) { - target = V_008DFC_SQ_EXP_POS + 3; - si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args); + target = V_008DFC_SQ_EXP_POS + 2 + (location - VARYING_SLOT_CLIP_DIST0); + si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args); memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], - &args, sizeof(args)); - } + &args, sizeof(args)); - target = V_008DFC_SQ_EXP_POS + 2; - si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args); - memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], - &args, sizeof(args)); - - /* Export the clip/cull distances values to the next stage. */ - radv_export_param(ctx, param_count, &slots[0], 0xf); - outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++; - if (length > 4) { - radv_export_param(ctx, param_count, &slots[4], 0xf); - outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++; + /* Export the clip/cull distances values to the next stage. */ + radv_export_param(ctx, param_count, &slots[0], 0xf); + outinfo->vs_output_param_offset[location] = param_count++; } } @@ -2866,28 +2852,14 @@ LLVMValueRef lds_base = NULL; for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - unsigned output_usage_mask; int param_index; - int length = 4; if (!(ctx->output_mask & (1ull << i))) continue; - if (ctx->stage == MESA_SHADER_VERTEX) { - output_usage_mask = - ctx->shader_info->info.vs.output_usage_mask[i]; - } else { - assert(ctx->stage == MESA_SHADER_TESS_EVAL); - output_usage_mask = - ctx->shader_info->info.tes.output_usage_mask[i]; - } - - if (i == VARYING_SLOT_CLIP_DIST0) - length = util_last_bit(output_usage_mask); - param_index = shader_io_get_unique_index(i); - max_output_written = MAX2(param_index + (length > 4), max_output_written); + max_output_written = MAX2(param_index, max_output_written); } outinfo->esgs_itemsize = (max_output_written + 1) * 16; @@ -2908,7 +2880,6 @@ LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; unsigned output_usage_mask; int param_index; - int length = 4; if (!(ctx->output_mask & (1ull << i))) continue; @@ -2922,9 +2893,6 @@ ctx->shader_info->info.tes.output_usage_mask[i]; } - if (i == VARYING_SLOT_CLIP_DIST0) - length = util_last_bit(output_usage_mask); - param_index = shader_io_get_unique_index(i); if (lds_base) { @@ -2933,7 +2901,7 @@ ""); } - for (j = 0; j < length; j++) { + for (j = 0; j < 4; j++) { if (!(output_usage_mask & (1 << j))) continue; @@ -2970,22 +2938,16 @@ vertex_dw_stride, ""); for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - unsigned output_usage_mask = - ctx->shader_info->info.vs.output_usage_mask[i]; LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; - int length = 4; if (!(ctx->output_mask & (1ull << i))) continue; - if (i == VARYING_SLOT_CLIP_DIST0) - length = util_last_bit(output_usage_mask); - int param = shader_io_get_unique_index(i); LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, false), ""); - for (unsigned j = 0; j < length; j++) { + for (unsigned j = 0; j < 4; j++) { LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); value = ac_to_integer(&ctx->ac, value); value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, ""); @@ -3511,7 +3473,7 @@ ctx.abi.load_sampler_desc = radv_get_sampler_desc; ctx.abi.load_resource = radv_load_resource; ctx.abi.clamp_shadow_reference = false; - ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9; + ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800; if (shader_count >= 2) ac_init_exec_full_mask(&ctx.ac); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_pass.c mesa-19.0.1/src/amd/vulkan/radv_pass.c --- mesa-18.3.3/src/amd/vulkan/radv_pass.c 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_pass.c 2019-03-31 23:16:37.000000000 +0000 @@ -38,7 +38,7 @@ struct radv_render_pass *pass; size_t size; size_t attachments_offset; - VkRenderPassMultiviewCreateInfoKHR *multiview_info = NULL; + VkRenderPassMultiviewCreateInfo *multiview_info = NULL; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); @@ -59,8 +59,8 @@ vk_foreach_struct(ext, pCreateInfo->pNext) { switch(ext->sType) { - case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR: - multiview_info = ( VkRenderPassMultiviewCreateInfoKHR*)ext; + case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO: + multiview_info = (VkRenderPassMultiviewCreateInfo*)ext; break; default: break; @@ -180,7 +180,17 @@ } for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { + uint32_t src = pCreateInfo->pDependencies[i].srcSubpass; uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; + + /* Ignore subpass self-dependencies as they allow the app to + * call vkCmdPipelineBarrier() inside the render pass and the + * driver should only do the barrier when called, not when + * starting the render pass. + */ + if (src == dst) + continue; + if (dst == VK_SUBPASS_EXTERNAL) { pass->end_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask; pass->end_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask; @@ -337,7 +347,17 @@ } for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { + uint32_t src = pCreateInfo->pDependencies[i].srcSubpass; uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; + + /* Ignore subpass self-dependencies as they allow the app to + * call vkCmdPipelineBarrier() inside the render pass and the + * driver should only do the barrier when called, not when + * starting the render pass. + */ + if (src == dst) + continue; + if (dst == VK_SUBPASS_EXTERNAL) { pass->end_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask; pass->end_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask; diff -Nru mesa-18.3.3/src/amd/vulkan/radv_pipeline.c mesa-19.0.1/src/amd/vulkan/radv_pipeline.c --- mesa-18.3.3/src/amd/vulkan/radv_pipeline.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_pipeline.c 2019-03-31 23:16:37.000000000 +0000 @@ -524,6 +524,14 @@ col_format |= cf << (4 * i); } + if (!col_format && blend->need_src_alpha & (1 << 0)) { + /* When a subpass doesn't have any color attachments, write the + * alpha channel of MRT0 when alpha coverage is enabled because + * the depth attachment needs it. + */ + col_format |= V_028714_SPI_SHADER_32_ABGR; + } + /* If the i-th target format is set, all previous target formats must * be non-zero to avoid hangs. */ @@ -681,13 +689,15 @@ else blend.cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY); - blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(2) | - S_028B70_ALPHA_TO_MASK_OFFSET1(2) | - S_028B70_ALPHA_TO_MASK_OFFSET2(2) | - S_028B70_ALPHA_TO_MASK_OFFSET3(2); + blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(3) | + S_028B70_ALPHA_TO_MASK_OFFSET1(1) | + S_028B70_ALPHA_TO_MASK_OFFSET2(0) | + S_028B70_ALPHA_TO_MASK_OFFSET3(2) | + S_028B70_OFFSET_ROUND(1); if (vkms && vkms->alphaToCoverageEnable) { blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1); + blend.need_src_alpha |= 0x1; } blend.cb_target_mask = 0; @@ -1705,11 +1715,11 @@ } bool ccw = tes->info.tes.ccw; - const VkPipelineTessellationDomainOriginStateCreateInfoKHR *domain_origin_state = + const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state = vk_find_struct_const(pCreateInfo->pTessellationState, - PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR); + PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO); - if (domain_origin_state && domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR) + if (domain_origin_state && domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) ccw = !ccw; if (tes->info.tes.point_mode) @@ -1814,6 +1824,10 @@ nir_lower_io_arrays_to_elements(ordered_shaders[i], ordered_shaders[i - 1]); + if (nir_link_opt_varyings(ordered_shaders[i], + ordered_shaders[i - 1])) + radv_optimize_nir(ordered_shaders[i - 1], false, false); + nir_remove_dead_variables(ordered_shaders[i], nir_var_shader_out); nir_remove_dead_variables(ordered_shaders[i - 1], @@ -2083,6 +2097,10 @@ radv_link_shaders(pipeline, nir); for (int i = 0; i < MESA_SHADER_STAGES; ++i) { + if (nir[i]) { + NIR_PASS_V(nir[i], nir_lower_bool_to_int32); + } + if (radv_can_dump_shader(device, modules[i], false)) nir_print_shader(nir[i], stderr); } @@ -2517,7 +2535,7 @@ } static void -radv_pipeline_generate_binning_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { @@ -2567,15 +2585,15 @@ S_028C44_OPTIMAL_BIN_SELECTION(1); } - radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0, + radeon_set_context_reg(ctx_cs, R_028C44_PA_SC_BINNER_CNTL_0, pa_sc_binner_cntl_0); - radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL, + radeon_set_context_reg(ctx_cs, R_028060_DB_DFSM_CONTROL, db_dfsm_control); } static void -radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct radv_graphics_pipeline_create_info *extra) @@ -2648,35 +2666,35 @@ db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1); } - radeon_set_context_reg(cs, R_028800_DB_DEPTH_CONTROL, db_depth_control); - radeon_set_context_reg(cs, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); + radeon_set_context_reg(ctx_cs, R_028800_DB_DEPTH_CONTROL, db_depth_control); + radeon_set_context_reg(ctx_cs, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); - radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, db_render_control); - radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override); - radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2); + radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control); + radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override); + radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2); } static void -radv_pipeline_generate_blend_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline, const struct radv_blend_state *blend) { - radeon_set_context_reg_seq(cs, R_028780_CB_BLEND0_CONTROL, 8); - radeon_emit_array(cs, blend->cb_blend_control, + radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8); + radeon_emit_array(ctx_cs, blend->cb_blend_control, 8); - radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control); - radeon_set_context_reg(cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask); + radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control); + radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask); if (pipeline->device->physical_device->has_rbplus) { - radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8); - radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8); + radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8); + radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8); } - radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); + radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); - radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); - radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); + radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); + radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); pipeline->graphics.col_format = blend->spi_shader_col_format; pipeline->graphics.cb_target_mask = blend->cb_target_mask; @@ -2694,23 +2712,23 @@ } static void -radv_pipeline_generate_raster_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState; const VkConservativeRasterizationModeEXT mode = radv_get_conservative_raster_mode(vkraster); - uint32_t pa_sc_conservative_rast = 0; + uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); - radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, + radeon_set_context_reg(ctx_cs, R_028810_PA_CL_CLIP_CNTL, S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions. S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) | S_028810_ZCLIP_FAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) | S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1)); - radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0, + radeon_set_context_reg(ctx_cs, R_0286D4_SPI_INTERP_CONTROL_0, S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) | S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | @@ -2719,12 +2737,12 @@ S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */ - radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL, + radeon_set_context_reg(ctx_cs, R_028BE4_PA_SU_VTX_CNTL, S_028BE4_PIX_CENTER(1) | // TODO verify S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) | S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH)); - radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, + radeon_set_context_reg(ctx_cs, R_028814_PA_SU_SC_MODE_CNTL, S_028814_FACE(vkraster->frontFace) | S_028814_CULL_FRONT(!!(vkraster->cullMode & VK_CULL_MODE_FRONT_BIT)) | S_028814_CULL_BACK(!!(vkraster->cullMode & VK_CULL_MODE_BACK_BIT)) | @@ -2765,37 +2783,37 @@ } } - radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, + radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, pa_sc_conservative_rast); } static void -radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *cs, +radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline) { struct radv_multisample_state *ms = &pipeline->graphics.ms; - radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); - radeon_emit(cs, ms->pa_sc_aa_mask[0]); - radeon_emit(cs, ms->pa_sc_aa_mask[1]); + radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); + radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]); + radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]); - radeon_set_context_reg(cs, R_028804_DB_EQAA, ms->db_eqaa); - radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); + radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa); + radeon_set_context_reg(ctx_cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1); /* The exclusion bits can be set to improve rasterization efficiency * if no sample lies on the pixel boundary (-8 sample offset). It's * currently always TRUE because the driver doesn't support 16 samples. */ bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= CIK; - radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, + radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); } static void -radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *cs, - const struct radv_pipeline *pipeline) +radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs, + struct radv_pipeline *pipeline) { const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); @@ -2813,12 +2831,13 @@ vgt_primitiveid_en = true; } - radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en); - radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, vgt_gs_mode); + radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en); + radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode); } static void -radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *cs, +radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline, struct radv_shader_variant *shader) { @@ -2839,10 +2858,10 @@ outinfo->writes_layer || outinfo->writes_viewport_index; - radeon_set_context_reg(cs, R_0286C4_SPI_VS_OUT_CONFIG, + radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(MAX2(1, outinfo->param_exports) - 1)); - radeon_set_context_reg(cs, R_02870C_SPI_SHADER_POS_FORMAT, + radeon_set_context_reg(ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : @@ -2854,13 +2873,13 @@ V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); - radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL, + radeon_set_context_reg(ctx_cs, R_028818_PA_CL_VTE_CNTL, S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) | S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1)); - radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL, + radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL, S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | @@ -2872,7 +2891,7 @@ clip_dist_mask); if (pipeline->device->physical_device->rad_info.chip_class <= VI) - radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, + radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index); } @@ -2940,7 +2959,8 @@ } static void -radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *cs, +radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline, const struct radv_tessellation_state *tess) { @@ -2956,11 +2976,12 @@ else if (vs->info.vs.as_es) radv_pipeline_generate_hw_es(cs, pipeline, vs); else - radv_pipeline_generate_hw_vs(cs, pipeline, vs); + radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs); } static void -radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *cs, +radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline, const struct radv_tessellation_state *tess) { @@ -2976,24 +2997,25 @@ if (tes->info.tes.as_es) radv_pipeline_generate_hw_es(cs, pipeline, tes); else - radv_pipeline_generate_hw_vs(cs, pipeline, tes); + radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, tes); } radv_pipeline_generate_hw_hs(cs, pipeline, tcs, tess); - radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM, + radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM, tess->tf_param); if (pipeline->device->physical_device->rad_info.chip_class >= CIK) - radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, + radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, tess->ls_hs_config); else - radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, + radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, tess->ls_hs_config); } static void -radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *cs, +radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline, const struct radv_gs_state *gs_state) { @@ -3014,32 +3036,32 @@ offset = num_components[0] * gs_max_out_vertices; - radeon_set_context_reg_seq(cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); - radeon_emit(cs, offset); + radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); + radeon_emit(ctx_cs, offset); if (max_stream >= 1) offset += num_components[1] * gs_max_out_vertices; - radeon_emit(cs, offset); + radeon_emit(ctx_cs, offset); if (max_stream >= 2) offset += num_components[2] * gs_max_out_vertices; - radeon_emit(cs, offset); + radeon_emit(ctx_cs, offset); if (max_stream >= 3) offset += num_components[3] * gs_max_out_vertices; - radeon_set_context_reg(cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset); + radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset); - radeon_set_context_reg(cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); + radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); - radeon_set_context_reg_seq(cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); - radeon_emit(cs, num_components[0]); - radeon_emit(cs, (max_stream >= 1) ? num_components[1] : 0); - radeon_emit(cs, (max_stream >= 2) ? num_components[2] : 0); - radeon_emit(cs, (max_stream >= 3) ? num_components[3] : 0); + radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); + radeon_emit(ctx_cs, num_components[0]); + radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0); + radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0); + radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0); uint32_t gs_num_invocations = gs->info.gs.invocations; - radeon_set_context_reg(cs, R_028B90_VGT_GS_INSTANCE_CNT, + radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0)); - radeon_set_context_reg(cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, + radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, gs_state->vgt_esgs_ring_itemsize); va = radv_buffer_get_va(gs->bo) + gs->bo_offset; @@ -3053,8 +3075,8 @@ radeon_emit(cs, gs->rsrc1); radeon_emit(cs, gs->rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size)); - radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl); - radeon_set_context_reg(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup); + radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl); + radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup); } else { radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); radeon_emit(cs, va >> 8); @@ -3063,16 +3085,20 @@ radeon_emit(cs, gs->rsrc2); } - radv_pipeline_generate_hw_vs(cs, pipeline, pipeline->gs_copy_shader); + radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader); } -static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade) +static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16) { uint32_t ps_input_cntl; if (offset <= AC_EXP_PARAM_OFFSET_31) { ps_input_cntl = S_028644_OFFSET(offset); if (flat_shade) ps_input_cntl |= S_028644_FLAT_SHADE(1); + if (float16) { + ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | + S_028644_ATTR0_VALID(1); + } } else { /* The input is a DEFAULT_VAL constant. */ assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && @@ -3085,8 +3111,8 @@ } static void -radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs, - struct radv_pipeline *pipeline) +radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, + struct radv_pipeline *pipeline) { struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); @@ -3097,7 +3123,7 @@ if (ps->info.info.ps.prim_id_input) { unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID]; if (vs_offset != AC_EXP_PARAM_UNDEFINED) { - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false); ++ps_offset; } } @@ -3107,9 +3133,9 @@ ps->info.info.needs_multiview_view_index) { unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER]; if (vs_offset != AC_EXP_PARAM_UNDEFINED) - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false); else - ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true); + ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false); ++ps_offset; } @@ -3125,14 +3151,14 @@ vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0]; if (vs_offset != AC_EXP_PARAM_UNDEFINED) { - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false); ++ps_offset; } vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1]; if (vs_offset != AC_EXP_PARAM_UNDEFINED && ps->info.info.ps.num_input_clips_culls > 4) { - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false); ++ps_offset; } } @@ -3140,6 +3166,7 @@ for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) { unsigned vs_offset; bool flat_shade; + bool float16; if (!(ps->info.fs.input_mask & (1u << i))) continue; @@ -3151,15 +3178,16 @@ } flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset)); + float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset)); - ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade); + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16); ++ps_offset; } if (ps_offset) { - radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset); + radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset); for (unsigned i = 0; i < ps_offset; i++) { - radeon_emit(cs, ps_input_cntl[i]); + radeon_emit(ctx_cs, ps_input_cntl[i]); } } } @@ -3179,11 +3207,11 @@ bool disable_rbplus = device->physical_device->has_rbplus && !device->physical_device->rbplus_allowed; - /* Do not enable the gl_SampleMask fragment shader output if MSAA is - * disabled. + /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled + * but this appears to break Project Cars (DXVK). See + * https://bugs.freedesktop.org/show_bug.cgi?id=109401 */ - bool mask_export_enable = ms->num_samples > 1 && - ps->info.info.ps.writes_sample_mask; + bool mask_export_enable = ps->info.info.ps.writes_sample_mask; return S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) | @@ -3197,7 +3225,8 @@ } static void -radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *cs, +radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, + struct radeon_cmdbuf *cs, struct radv_pipeline *pipeline) { struct radv_shader_variant *ps; @@ -3213,22 +3242,22 @@ radeon_emit(cs, ps->rsrc1); radeon_emit(cs, ps->rsrc2); - radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL, + radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL, radv_compute_db_shader_control(pipeline->device, pipeline, ps)); - radeon_set_context_reg(cs, R_0286CC_SPI_PS_INPUT_ENA, + radeon_set_context_reg(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA, ps->config.spi_ps_input_ena); - radeon_set_context_reg(cs, R_0286D0_SPI_PS_INPUT_ADDR, + radeon_set_context_reg(ctx_cs, R_0286D0_SPI_PS_INPUT_ADDR, ps->config.spi_ps_input_addr); - radeon_set_context_reg(cs, R_0286D8_SPI_PS_IN_CONTROL, + radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, S_0286D8_NUM_INTERP(ps->info.fs.num_interp)); - radeon_set_context_reg(cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl); + radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl); - radeon_set_context_reg(cs, R_028710_SPI_SHADER_Z_FORMAT, + radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, ac_get_spi_shader_z_format(ps->info.info.ps.writes_z, ps->info.info.ps.writes_stencil, ps->info.info.ps.writes_sample_mask)); @@ -3241,7 +3270,7 @@ } static void -radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *cs, +radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline) { if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10) @@ -3252,7 +3281,7 @@ radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) { vtx_reuse_depth = 14; } - radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, + radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth)); } @@ -3322,38 +3351,46 @@ const struct radv_gs_state *gs, unsigned prim, unsigned gs_out) { - pipeline->cs.buf = malloc(4 * 256); - pipeline->cs.max_dw = 256; + struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs; + struct radeon_cmdbuf *cs = &pipeline->cs; - radv_pipeline_generate_depth_stencil_state(&pipeline->cs, pipeline, pCreateInfo, extra); - radv_pipeline_generate_blend_state(&pipeline->cs, pipeline, blend); - radv_pipeline_generate_raster_state(&pipeline->cs, pipeline, pCreateInfo); - radv_pipeline_generate_multisample_state(&pipeline->cs, pipeline); - radv_pipeline_generate_vgt_gs_mode(&pipeline->cs, pipeline); - radv_pipeline_generate_vertex_shader(&pipeline->cs, pipeline, tess); - radv_pipeline_generate_tess_shaders(&pipeline->cs, pipeline, tess); - radv_pipeline_generate_geometry_shader(&pipeline->cs, pipeline, gs); - radv_pipeline_generate_fragment_shader(&pipeline->cs, pipeline); - radv_pipeline_generate_ps_inputs(&pipeline->cs, pipeline); - radv_pipeline_generate_vgt_vertex_reuse(&pipeline->cs, pipeline); - radv_pipeline_generate_binning_state(&pipeline->cs, pipeline, pCreateInfo); + cs->max_dw = 64; + ctx_cs->max_dw = 256; + cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw)); + ctx_cs->buf = cs->buf + cs->max_dw; + + radv_pipeline_generate_depth_stencil_state(ctx_cs, pipeline, pCreateInfo, extra); + radv_pipeline_generate_blend_state(ctx_cs, pipeline, blend); + radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo); + radv_pipeline_generate_multisample_state(ctx_cs, pipeline); + radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline); + radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline, tess); + radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline, tess); + radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline, gs); + radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline); + radv_pipeline_generate_ps_inputs(ctx_cs, pipeline); + radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline); + radv_pipeline_generate_binning_state(ctx_cs, pipeline, pCreateInfo); - radeon_set_context_reg(&pipeline->cs, R_0286E8_SPI_TMPRING_SIZE, + radeon_set_context_reg(ctx_cs, R_0286E8_SPI_TMPRING_SIZE, S_0286E8_WAVES(pipeline->max_waves) | S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); - radeon_set_context_reg(&pipeline->cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline)); + radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline)); if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { - radeon_set_uconfig_reg_idx(&pipeline->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); + radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); } else { - radeon_set_config_reg(&pipeline->cs, R_008958_VGT_PRIMITIVE_TYPE, prim); + radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim); } - radeon_set_context_reg(&pipeline->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out); + radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out); + + radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, radv_compute_cliprect_rule(pCreateInfo)); - radeon_set_context_reg(&pipeline->cs, R_02820C_PA_SC_CLIPRECT_RULE, radv_compute_cliprect_rule(pCreateInfo)); + pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4); - assert(pipeline->cs.cdw <= pipeline->cs.max_dw); + assert(ctx_cs->cdw <= ctx_cs->max_dw); + assert(cs->cdw <= cs->max_dw); } static struct radv_ia_multi_vgt_param_helpers diff -Nru mesa-18.3.3/src/amd/vulkan/radv_private.h mesa-19.0.1/src/amd/vulkan/radv_private.h --- mesa-18.3.3/src/amd/vulkan/radv_private.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_private.h 2019-03-31 23:16:37.000000000 +0000 @@ -285,7 +285,6 @@ struct radeon_winsys *ws; struct radeon_info rad_info; - char path[20]; char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; uint8_t driver_uuid[VK_UUID_SIZE]; uint8_t device_uuid[VK_UUID_SIZE]; @@ -307,6 +306,9 @@ /* Whether DCC should be enabled for MSAA textures. */ bool dcc_msaa_allowed; + /* Whether LOAD_CONTEXT_REG packets are supported. */ + bool has_load_ctx_reg_pkt; + /* This is the drivers on-disk cache used as a fallback as opposed to * the pipeline cache defined by apps. */ @@ -457,6 +459,12 @@ VkPipelineLayout clear_color_p_layout; VkPipelineLayout clear_depth_p_layout; + + /* Optimized compute fast HTILE clear for stencil or depth only. */ + VkPipeline clear_htile_mask_pipeline; + VkPipelineLayout clear_htile_mask_p_layout; + VkDescriptorSetLayout clear_htile_mask_ds_layout; + struct { VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT]; @@ -597,6 +605,12 @@ VkPipeline pipeline_statistics_query_pipeline; VkPipeline tfb_query_pipeline; } query; + + struct { + VkDescriptorSetLayout ds_layout; + VkPipelineLayout p_layout; + VkPipeline pipeline[MAX_SAMPLES_LOG2]; + } fmask_expand; }; /* queue types */ @@ -1044,6 +1058,8 @@ /* Conditional rendering info. */ int predication_type; /* -1: disabled, 0: normal, 1: inverted */ uint64_t predication_va; + + bool context_roll_without_scissor_emitted; }; struct radv_cmd_pool { @@ -1103,8 +1119,7 @@ VkResult record_result; - uint32_t gfx9_fence_offset; - struct radeon_winsys_bo *gfx9_fence_bo; + uint64_t gfx9_fence_va; uint32_t gfx9_fence_idx; uint64_t gfx9_eop_bug_va; @@ -1139,13 +1154,11 @@ unsigned event, unsigned event_flags, unsigned data_sel, uint64_t va, - uint32_t old_fence, uint32_t new_fence, uint64_t gfx9_eop_bug_va); -void si_emit_wait_fence(struct radeon_cmdbuf *cs, - uint64_t va, uint32_t ref, - uint32_t mask); +void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, + uint32_t ref, uint32_t mask); void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t *fence_ptr, uint64_t va, @@ -1198,9 +1211,12 @@ int cb_idx, uint32_t color_values[2]); -void radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - bool value); +void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, bool value); + +void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, bool value); + uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t value); @@ -1238,7 +1254,7 @@ struct radeon_cmdbuf *cs, uint32_t sh_offset, uint64_t va, bool global) { - bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global; + bool use_32bit_pointers = !global; radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers); radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers); @@ -1352,6 +1368,8 @@ VkShaderStageFlags active_stages; struct radeon_cmdbuf cs; + uint32_t ctx_cs_hash; + struct radeon_cmdbuf ctx_cs; struct radv_vertex_elements_info vertex_elements; @@ -1447,6 +1465,7 @@ bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable); bool radv_dcc_formats_compatible(VkFormat format1, VkFormat format2); +bool radv_device_supports_etc(struct radv_physical_device *physical_device); struct radv_fmask_info { uint64_t offset; @@ -1496,6 +1515,7 @@ struct radv_fmask_info fmask; struct radv_cmask_info cmask; uint64_t clear_value_offset; + uint64_t fce_pred_offset; uint64_t dcc_pred_offset; /* @@ -1873,7 +1893,7 @@ radv_update_descriptor_set_with_template(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set, - VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData); void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, @@ -1886,6 +1906,9 @@ void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, uint32_t value); +void radv_initialize_fmask(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image); + struct radv_fence { struct radeon_winsys_fence *fence; struct wsi_fence *fence_wsi; @@ -1967,7 +1990,7 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, VkDescriptorPool) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, VkDescriptorSet) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, VkDescriptorSetLayout) -RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplateKHR) +RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplate) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, VkDeviceMemory) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_fence, VkFence) RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_event, VkEvent) diff -Nru mesa-18.3.3/src/amd/vulkan/radv_query.c mesa-19.0.1/src/amd/vulkan/radv_query.c --- mesa-18.3.3/src/amd/vulkan/radv_query.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_query.c 2019-03-31 23:16:37.000000000 +0000 @@ -51,6 +51,12 @@ return num_db; } + +static nir_ssa_def *nir_test_flag(nir_builder *b, nir_ssa_def *flags, uint32_t flag) +{ + return nir_i2b(b, nir_iand(b, flags, nir_imm_int(b, flag))); +} + static void radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count) { nir_ssa_def *counter = nir_load_var(b, var); @@ -132,7 +138,7 @@ nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter"); nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start"); nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end"); - nir_variable *available = nir_local_variable_create(b.impl, glsl_int_type(), "available"); + nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available"); unsigned db_count = get_max_db(device); nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags"); @@ -153,8 +159,8 @@ nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); nir_builder_instr_insert(&b, &src_buf->instr); - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -170,7 +176,7 @@ nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1); nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1); - nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1); + nir_store_var(&b, available, nir_imm_true(&b), 0x1); nir_loop *outer_loop = nir_loop_create(b.shader); nir_builder_cf_insert(&b, &outer_loop->cf_node); @@ -208,18 +214,17 @@ b.cursor = nir_after_cf_list(&update_if->else_list); - nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1); + nir_store_var(&b, available, nir_imm_false(&b), 0x1); b.cursor = nir_after_cf_node(&outer_loop->cf_node); /* Store the result if complete or if partial results have been requested. */ - nir_ssa_def *result_is_64bit = nir_iand(&b, flags, - nir_imm_int(&b, VK_QUERY_RESULT_64_BIT)); + nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT); nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4)); nir_if *store_if = nir_if_create(b.shader); - store_if->condition = nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)), nir_load_var(&b, available))); + store_if->condition = nir_src_for_ssa(nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available))); nir_cf_node_insert(b.cursor, &store_if->cf_node); b.cursor = nir_after_cf_list(&store_if->then_list); @@ -253,13 +258,13 @@ /* Store the availability bit if requested. */ nir_if *availability_if = nir_if_create(b.shader); - availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))); + availability_if->condition = nir_src_for_ssa(nir_test_flag(&b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)); nir_cf_node_insert(b.cursor, &availability_if->cf_node); b.cursor = nir_after_cf_list(&availability_if->then_list); store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(nir_load_var(&b, available)); + store->src[0] = nir_src_for_ssa(nir_b2i32(&b, nir_load_var(&b, available))); store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base)); nir_intrinsic_set_write_mask(store, 0x1); @@ -291,11 +296,11 @@ * uint64_t dst_offset = dst_base; * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4; * uint32_t elem_count = stats_mask >> 16; - * uint32_t available = src_buf[avail_offset + 4 * global_id.x]; + * uint32_t available32 = src_buf[avail_offset + 4 * global_id.x]; * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - * dst_buf[dst_offset + elem_count * elem_size] = available; + * dst_buf[dst_offset + elem_count * elem_size] = available32; * } - * if (available) { + * if ((bool)available32) { * // repeat 11 times: * if (stats_mask & (1 << 0)) { * uint64_t start = src_buf[src_offset + 8 * indices[0]]; @@ -343,8 +348,8 @@ nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL); nir_builder_instr_insert(&b, &src_buf->instr); - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -367,23 +372,22 @@ nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); load->num_components = 1; nir_builder_instr_insert(&b, &load->instr); - nir_ssa_def *available = &load->dest.ssa; + nir_ssa_def *available32 = &load->dest.ssa; - nir_ssa_def *result_is_64bit = nir_iand(&b, flags, - nir_imm_int(&b, VK_QUERY_RESULT_64_BIT)); + nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT); nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4)); nir_ssa_def *elem_count = nir_ushr(&b, stats_mask, nir_imm_int(&b, 16)); /* Store the availability bit if requested. */ nir_if *availability_if = nir_if_create(b.shader); - availability_if->condition = nir_src_for_ssa(nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))); + availability_if->condition = nir_src_for_ssa(nir_test_flag(&b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)); nir_cf_node_insert(b.cursor, &availability_if->cf_node); b.cursor = nir_after_cf_list(&availability_if->then_list); nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(available); + store->src[0] = nir_src_for_ssa(available32); store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); store->src[2] = nir_src_for_ssa(nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size))); nir_intrinsic_set_write_mask(store, 0x1); @@ -393,7 +397,7 @@ b.cursor = nir_after_cf_node(&availability_if->cf_node); nir_if *available_if = nir_if_create(b.shader); - available_if->condition = nir_src_for_ssa(available); + available_if->condition = nir_src_for_ssa(nir_i2b(&b, available32)); nir_cf_node_insert(b.cursor, &available_if->cf_node); b.cursor = nir_after_cf_list(&available_if->then_list); @@ -401,7 +405,7 @@ nir_store_var(&b, output_offset, output_base, 0x1); for (int i = 0; i < 11; ++i) { nir_if *store_if = nir_if_create(b.shader); - store_if->condition = nir_src_for_ssa(nir_iand(&b, stats_mask, nir_imm_int(&b, 1u << i))); + store_if->condition = nir_src_for_ssa(nir_test_flag(&b, stats_mask, 1u << i)); nir_cf_node_insert(b.cursor, &store_if->cf_node); b.cursor = nir_after_cf_list(&store_if->then_list); @@ -463,8 +467,7 @@ b.cursor = nir_after_cf_list(&available_if->else_list); available_if = nir_if_create(b.shader); - available_if->condition = nir_src_for_ssa(nir_iand(&b, flags, - nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT))); + available_if->condition = nir_src_for_ssa(nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT)); nir_cf_node_insert(b.cursor, &available_if->cf_node); b.cursor = nir_after_cf_list(&available_if->then_list); @@ -563,12 +566,12 @@ glsl_vector_type(GLSL_TYPE_UINT64, 2), "result"); nir_variable *available = - nir_local_variable_create(b.impl, glsl_int_type(), "available"); + nir_local_variable_create(b.impl, glsl_bool_type(), "available"); nir_store_var(&b, result, nir_vec2(&b, nir_imm_int64(&b, 0), nir_imm_int64(&b, 0)), 0x3); - nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1); + nir_store_var(&b, available, nir_imm_false(&b), 0x1); nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags"); @@ -590,8 +593,8 @@ nir_builder_instr_insert(&b, &src_buf->instr); /* Compute global ID. */ - nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0); - nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0); + nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b); + nir_ssa_def *wg_id = nir_load_work_group_id(&b); nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1], @@ -627,8 +630,8 @@ avails[1] = nir_iand(&b, nir_channel(&b, &load2->dest.ssa, 1), nir_channel(&b, &load2->dest.ssa, 3)); nir_ssa_def *result_is_available = - nir_iand(&b, nir_iand(&b, avails[0], avails[1]), - nir_imm_int(&b, 0x80000000)); + nir_i2b(&b, nir_iand(&b, nir_iand(&b, avails[0], avails[1]), + nir_imm_int(&b, 0x80000000))); /* Only compute result if available. */ nir_if *available_if = nir_if_create(b.shader); @@ -661,13 +664,13 @@ nir_store_var(&b, result, nir_vec2(&b, num_primitive_written, primitive_storage_needed), 0x3); - nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1); + nir_store_var(&b, available, nir_imm_true(&b), 0x1); b.cursor = nir_after_cf_node(&available_if->cf_node); /* Determine if result is 64 or 32 bit. */ nir_ssa_def *result_is_64bit = - nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_64_BIT)); + nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT); nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8)); @@ -675,8 +678,7 @@ /* Store the result if complete or partial results have been requested. */ nir_if *store_if = nir_if_create(b.shader); store_if->condition = - nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags, - nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)), + nir_src_for_ssa(nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available))); nir_cf_node_insert(b.cursor, &store_if->cf_node); @@ -714,14 +716,13 @@ /* Store the availability bit if requested. */ nir_if *availability_if = nir_if_create(b.shader); availability_if->condition = - nir_src_for_ssa(nir_iand(&b, flags, - nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT))); + nir_src_for_ssa(nir_test_flag(&b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)); nir_cf_node_insert(b.cursor, &availability_if->cf_node); b.cursor = nir_after_cf_list(&availability_if->then_list); store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo); - store->src[0] = nir_src_for_ssa(nir_load_var(&b, available)); + store->src[0] = nir_src_for_ssa(nir_b2i32(&b, nir_load_var(&b, available))); store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa); store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base)); nir_intrinsic_set_write_mask(store, 0x1); @@ -1012,9 +1013,6 @@ radv_unaligned_dispatch(cmd_buffer, count, 1, 1); - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH; /* Restore conditional rendering. */ cmd_buffer->state.predicating = old_predicating; @@ -1063,7 +1061,8 @@ pool->size += 4 * pCreateInfo->queryCount; pool->bo = device->ws->buffer_create(device->ws, pool->size, - 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING); + 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING, + RADV_BO_PRIORITY_QUERY_POOL); if (!pool->bo) { vk_free2(&device->alloc, pAllocator, pool); @@ -1296,14 +1295,11 @@ unsigned query = firstQuery + i; uint64_t src_va = va + query * pool->stride + pool->stride - 4; + radeon_check_space(cmd_buffer->device->ws, cs, 7); + /* Waits on the upper word of the last DB entry */ - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_GREATER_OR_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, src_va); - radeon_emit(cs, src_va >> 32); - radeon_emit(cs, 0x80000000); /* reference value */ - radeon_emit(cs, 0xffffffff); /* mask */ - radeon_emit(cs, 4); /* poll interval */ + radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, + src_va, 0x80000000, 0xffffffff); } } radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline, @@ -1322,7 +1318,8 @@ uint64_t avail_va = va + pool->availability_offset + 4 * query; /* This waits on the ME. All copies below are done on the ME */ - si_emit_wait_fence(cs, avail_va, 1, 0xffffffff); + radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, + avail_va, 1, 0xffffffff); } } radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, @@ -1344,13 +1341,10 @@ /* Wait on the high 32 bits of the timestamp in * case the low part is 0xffffffff. */ - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false)); - radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, local_src_va + 4); - radeon_emit(cs, (local_src_va + 4) >> 32); - radeon_emit(cs, TIMESTAMP_NOT_READY >> 32); - radeon_emit(cs, 0xffffffff); - radeon_emit(cs, 4); + radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, + local_src_va + 4, + TIMESTAMP_NOT_READY >> 32, + 0xffffffff); } if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { uint64_t avail_dest_va = dest_va + elem_size; @@ -1383,16 +1377,13 @@ unsigned query = firstQuery + i; uint64_t src_va = va + query * pool->stride; + radeon_check_space(cmd_buffer->device->ws, cs, 7 * 4); + /* Wait on the upper word of all results. */ for (unsigned j = 0; j < 4; j++, src_va += 8) { - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_GREATER_OR_EQUAL | - WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, (src_va + 4)); - radeon_emit(cs, (src_va + 4) >> 32); - radeon_emit(cs, 0x80000000); /* reference value */ - radeon_emit(cs, 0xffffffff); /* mask */ - radeon_emit(cs, 4); /* poll interval */ + radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, + src_va + 4, 0x80000000, + 0xffffffff); } } } @@ -1461,7 +1452,6 @@ * because we use a CP dma clear. */ si_emit_cache_flush(cmd_buffer); - cmd_buffer->pending_reset_query = false; } } } @@ -1580,7 +1570,7 @@ radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DATA_SEL_VALUE_32BIT, - avail_va, 0, 1, + avail_va, 1, cmd_buffer->gfx9_eop_bug_va); break; case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: @@ -1703,7 +1693,7 @@ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM | COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | - COPY_DATA_DST_SEL(V_370_MEM_ASYNC)); + COPY_DATA_DST_SEL(V_370_MEM)); radeon_emit(cs, 0); radeon_emit(cs, 0); radeon_emit(cs, query_va); @@ -1715,7 +1705,7 @@ mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DATA_SEL_TIMESTAMP, - query_va, 0, 0, + query_va, 0, cmd_buffer->gfx9_eop_bug_va); break; } diff -Nru mesa-18.3.3/src/amd/vulkan/radv_radeon_winsys.h mesa-19.0.1/src/amd/vulkan/radv_radeon_winsys.h --- mesa-18.3.3/src/amd/vulkan/radv_radeon_winsys.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_radeon_winsys.h 2019-03-31 23:16:37.000000000 +0000 @@ -84,6 +84,9 @@ }; enum radeon_value_id { + RADEON_ALLOCATED_VRAM, + RADEON_ALLOCATED_VRAM_VIS, + RADEON_ALLOCATED_GTT, RADEON_TIMESTAMP, RADEON_NUM_BYTES_MOVED, RADEON_NUM_EVICTIONS, @@ -164,6 +167,7 @@ struct radeon_winsys_bo { uint64_t va; bool is_local; + bool vram_cpu_access; }; struct radv_winsys_sem_counts { uint32_t syncobj_count; @@ -184,6 +188,27 @@ unsigned count; }; +/* Kernel effectively allows 0-31. This sets some priorities for fixed + * functionality buffers */ +enum { + RADV_BO_PRIORITY_APPLICATION_MAX = 28, + + /* virtual buffers have 0 priority since the priority is not used. */ + RADV_BO_PRIORITY_VIRTUAL = 0, + + /* This should be considerably lower than most of the stuff below, + * but how much lower is hard to say since we don't know application + * assignments. Put it pretty high since it is GTT anyway. */ + RADV_BO_PRIORITY_QUERY_POOL = 29, + + RADV_BO_PRIORITY_DESCRIPTOR = 30, + RADV_BO_PRIORITY_UPLOAD_BUFFER = 30, + RADV_BO_PRIORITY_FENCE = 30, + RADV_BO_PRIORITY_SHADER = 31, + RADV_BO_PRIORITY_SCRATCH = 31, + RADV_BO_PRIORITY_CS = 31, +}; + struct radeon_winsys { void (*destroy)(struct radeon_winsys *ws); @@ -202,17 +227,20 @@ uint64_t size, unsigned alignment, enum radeon_bo_domain domain, - enum radeon_bo_flag flags); + enum radeon_bo_flag flags, + unsigned priority); void (*buffer_destroy)(struct radeon_winsys_bo *bo); void *(*buffer_map)(struct radeon_winsys_bo *bo); struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer, - uint64_t size); + uint64_t size, + unsigned priority); struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws, int fd, + unsigned priority, unsigned *stride, unsigned *offset); bool (*buffer_get_fd)(struct radeon_winsys *ws, diff -Nru mesa-18.3.3/src/amd/vulkan/radv_shader.c mesa-19.0.1/src/amd/vulkan/radv_shader.c --- mesa-18.3.3/src/amd/vulkan/radv_shader.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_shader.c 2019-03-31 23:16:37.000000000 +0000 @@ -126,8 +126,8 @@ do { progress = false; - NIR_PASS(progress, shader, nir_split_array_vars, nir_var_local); - NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_local); + NIR_PASS(progress, shader, nir_split_array_vars, nir_var_function_temp); + NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_function_temp); NIR_PASS_V(shader, nir_lower_vars_to_ssa); NIR_PASS_V(shader, nir_lower_pack); @@ -159,7 +159,7 @@ NIR_PASS(progress, shader, nir_opt_if); NIR_PASS(progress, shader, nir_opt_dead_cf); NIR_PASS(progress, shader, nir_opt_cse); - NIR_PASS(progress, shader, nir_opt_peephole_select, 8); + NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true); NIR_PASS(progress, shader, nir_opt_algebraic); NIR_PASS(progress, shader, nir_opt_constant_folding); NIR_PASS(progress, shader, nir_opt_undef); @@ -219,33 +219,39 @@ } } const struct spirv_to_nir_options spirv_options = { + .lower_ubo_ssbo_access_to_offsets = true, .caps = { + .descriptor_array_dynamic_indexing = true, .device_group = true, .draw_parameters = true, .float64 = true, + .gcn_shader = true, + .geometry_streams = true, .image_read_without_format = true, .image_write_without_format = true, - .tessellation = true, - .int64 = true, .int16 = true, + .int64 = true, .multiview = true, + .runtime_descriptor_array = true, + .shader_viewport_index_layer = true, + .stencil_export = true, + .storage_16bit = true, + .storage_image_ms = true, .subgroup_arithmetic = true, .subgroup_ballot = true, .subgroup_basic = true, .subgroup_quad = true, .subgroup_shuffle = true, .subgroup_vote = true, - .variable_pointers = true, - .gcn_shader = true, - .trinary_minmax = true, - .shader_viewport_index_layer = true, - .descriptor_array_dynamic_indexing = true, - .runtime_descriptor_array = true, - .stencil_export = true, - .storage_16bit = true, - .geometry_streams = true, + .tessellation = true, .transform_feedback = true, + .trinary_minmax = true, + .variable_pointers = true, }, + .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), + .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), + .push_const_ptr_type = glsl_uint_type(), + .shared_ptr_type = glsl_uint_type(), }; entry_point = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, @@ -261,10 +267,10 @@ * inline functions. That way they get properly initialized at the top * of the function and not at the top of its caller. */ - NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local); + NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp); NIR_PASS_V(nir, nir_lower_returns); NIR_PASS_V(nir, nir_inline_functions); - NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_deref); /* Pick off the single entrypoint that we want */ foreach_list_typed_safe(nir_function, func, node, &nir->functions) { @@ -323,7 +329,7 @@ nir_split_var_copies(nir); nir_lower_global_vars_to_local(nir); - nir_remove_dead_variables(nir, nir_var_local); + nir_remove_dead_variables(nir, nir_var_function_temp); nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) { .subgroup_size = 64, .ballot_bit_size = 64, @@ -389,7 +395,8 @@ RADEON_DOMAIN_VRAM, RADEON_FLAG_NO_INTERPROCESS_SHARING | (device->physical_device->cpdma_prefetch_writes_memory ? - 0 : RADEON_FLAG_READ_ONLY)); + 0 : RADEON_FLAG_READ_ONLY), + RADV_BO_PRIORITY_SHADER); slab->ptr = (char*)device->ws->buffer_map(slab->bo); list_inithead(&slab->shaders); @@ -548,9 +555,15 @@ * * "mesa" is the prefix for error messages. */ - const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false", - "-amdgpu-skip-threshold=1" }; - LLVMParseCommandLineOptions(3, argv, NULL); + if (HAVE_LLVM >= 0x0800) { + const char *argv[2] = { "mesa", "-simplifycfg-sink-common=false" }; + LLVMParseCommandLineOptions(2, argv, NULL); + + } else { + const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false", + "-amdgpu-skip-threshold=1" }; + LLVMParseCommandLineOptions(3, argv, NULL); + } } static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT; @@ -600,7 +613,7 @@ thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM); radv_init_llvm_once(); - radv_init_llvm_compiler(&ac_llvm, false, + radv_init_llvm_compiler(&ac_llvm, thread_compiler, chip_family, tm_options); if (gs_copy_shader) { @@ -860,6 +873,7 @@ buf = _mesa_string_buffer_create(NULL, 1024); _mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(variant, stage)); + _mesa_string_buffer_printf(buf, "%s\n\n", variant->llvm_ir_string); _mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string); generate_shader_stats(device, variant, stage, buf); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_shader.h mesa-19.0.1/src/amd/vulkan/radv_shader.h --- mesa-18.3.3/src/amd/vulkan/radv_shader.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_shader.h 2019-03-31 23:16:37.000000000 +0000 @@ -213,7 +213,6 @@ struct radv_userdata_info { int8_t sgpr_idx; uint8_t num_sgprs; - bool indirect; }; struct radv_userdata_locations { @@ -258,6 +257,7 @@ unsigned num_interp; uint32_t input_mask; uint32_t flat_shaded_mask; + uint32_t float16_shaded_mask; bool can_discard; bool early_fragment_test; } fs; @@ -402,6 +402,8 @@ return 1; if (slot == VARYING_SLOT_CLIP_DIST0) return 2; + if (slot == VARYING_SLOT_CLIP_DIST1) + return 3; /* 3 is reserved for clip dist as well */ if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) return 4 + (slot - VARYING_SLOT_VAR0); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_shader_helper.h mesa-19.0.1/src/amd/vulkan/radv_shader_helper.h --- mesa-18.3.3/src/amd/vulkan/radv_shader_helper.h 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_shader_helper.h 2019-03-31 23:16:37.000000000 +0000 @@ -27,7 +27,6 @@ #endif bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, - bool okay_to_leak_target_library_info, bool thread_compiler, enum radeon_family family, enum ac_target_machine_options tm_options); diff -Nru mesa-18.3.3/src/amd/vulkan/radv_shader_info.c mesa-19.0.1/src/amd/vulkan/radv_shader_info.c --- mesa-18.3.3/src/amd/vulkan/radv_shader_info.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/radv_shader_info.c 2019-03-31 23:16:37.000000000 +0000 @@ -101,7 +101,7 @@ case MESA_SHADER_VERTEX: { nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); - if (var->data.mode == nir_var_shader_in) { + if (var && var->data.mode == nir_var_shader_in) { unsigned idx = var->data.location; uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); @@ -129,11 +129,9 @@ get_deref_offset(deref_instr, &const_offset); - if (idx == VARYING_SLOT_CLIP_DIST0) { - /* Special case for clip/cull distances because there are - * combined into a single array that contains both. - */ - output_usage_mask[idx] |= 1 << const_offset; + if (var->data.compact) { + const_offset += comp; + output_usage_mask[idx + const_offset / 4] |= 1 << (const_offset % 4); return; } @@ -150,7 +148,7 @@ { nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); - if (var->data.mode == nir_var_shader_out) { + if (var && var->data.mode == nir_var_shader_out) { unsigned idx = var->data.location; switch (nir->info.stage) { @@ -174,13 +172,9 @@ type = glsl_get_array_element(var->type); unsigned slots = - var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4) + var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4) : glsl_count_attribute_slots(type, false); - if (idx == VARYING_SLOT_CLIP_DIST0) - slots = (nir->info.clip_distance_array_size + - nir->info.cull_distance_array_size > 4) ? 2 : 1; - mark_tess_output(info, var->data.patch, param, slots); break; } @@ -270,15 +264,15 @@ } mark_sampler_desc(var, info); - if (nir_intrinsic_image_deref_store || - nir_intrinsic_image_deref_atomic_add || - nir_intrinsic_image_deref_atomic_min || - nir_intrinsic_image_deref_atomic_max || - nir_intrinsic_image_deref_atomic_and || - nir_intrinsic_image_deref_atomic_or || - nir_intrinsic_image_deref_atomic_xor || - nir_intrinsic_image_deref_atomic_exchange || - nir_intrinsic_image_deref_atomic_comp_swap) { + if (instr->intrinsic == nir_intrinsic_image_deref_store || + instr->intrinsic == nir_intrinsic_image_deref_atomic_add || + instr->intrinsic == nir_intrinsic_image_deref_atomic_min || + instr->intrinsic == nir_intrinsic_image_deref_atomic_max || + instr->intrinsic == nir_intrinsic_image_deref_atomic_and || + instr->intrinsic == nir_intrinsic_image_deref_atomic_or || + instr->intrinsic == nir_intrinsic_image_deref_atomic_xor || + instr->intrinsic == nir_intrinsic_image_deref_atomic_exchange || + instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) { if (nir->info.stage == MESA_SHADER_FRAGMENT) info->ps.writes_memory = true; } @@ -374,7 +368,8 @@ info->ps.layer_input = true; break; case VARYING_SLOT_CLIP_DIST0: - info->ps.num_input_clips_culls = attrib_count; + case VARYING_SLOT_CLIP_DIST1: + info->ps.num_input_clips_culls += attrib_count; break; default: break; @@ -409,8 +404,8 @@ int idx = var->data.location; unsigned param = shader_io_get_unique_index(idx); int num_slots = glsl_count_attribute_slots(var->type, false); - if (idx == VARYING_SLOT_CLIP_DIST0) - num_slots = (nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4) ? 2 : 1; + if (var->data.compact) + num_slots = DIV_ROUND_UP(var->data.location_frac + glsl_get_length(var->type), 4); mark_ls_output(info, param, num_slots); } @@ -512,8 +507,10 @@ struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions); - if (options->layout && options->layout->dynamic_offset_count) + if (options->layout && options->layout->dynamic_offset_count && + (options->layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) { info->loads_push_constants = true; + } nir_foreach_variable(variable, &nir->inputs) gather_info_input_decl(nir, variable, info); diff -Nru mesa-18.3.3/src/amd/vulkan/si_cmd_buffer.c mesa-19.0.1/src/amd/vulkan/si_cmd_buffer.c --- mesa-18.3.3/src/amd/vulkan/si_cmd_buffer.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/si_cmd_buffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -278,8 +278,7 @@ radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); - if (physical_device->rad_info.num_good_compute_units / - (physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) { + if (physical_device->rad_info.num_good_cu_per_sh <= 4) { /* Too few available compute units per SH. Disallowing * VS to run on CU0 could hurt us more than late VS * allocation would help. @@ -306,9 +305,6 @@ if (physical_device->rad_info.chip_class >= VI) { uint32_t vgt_tess_distribution; - radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL, - S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | - S_028424_OVERWRITE_COMBINER_WATERMARK(4)); vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | @@ -403,7 +399,8 @@ RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS| RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY); + RADEON_FLAG_READ_ONLY, + RADV_BO_PRIORITY_CS); if (!device->gfx_init) goto fail; @@ -664,7 +661,6 @@ unsigned event, unsigned event_flags, unsigned data_sel, uint64_t va, - uint32_t old_fence, uint32_t new_fence, uint64_t gfx9_eop_bug_va) { @@ -711,7 +707,7 @@ radeon_emit(cs, op); radeon_emit(cs, va); radeon_emit(cs, ((va >> 32) & 0xffff) | sel); - radeon_emit(cs, old_fence); /* immediate data */ + radeon_emit(cs, 0); /* immediate data */ radeon_emit(cs, 0); /* unused */ } @@ -725,12 +721,15 @@ } void -si_emit_wait_fence(struct radeon_cmdbuf *cs, - uint64_t va, uint32_t ref, - uint32_t mask) +radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, + uint32_t ref, uint32_t mask) { + assert(op == WAIT_REG_MEM_EQUAL || + op == WAIT_REG_MEM_NOT_EQUAL || + op == WAIT_REG_MEM_GREATER_OR_EQUAL); + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); + radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, ref); /* reference value */ @@ -802,7 +801,7 @@ V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, EOP_DATA_SEL_DISCARD, - 0, 0, 0, + 0, 0, gfx9_eop_bug_va); } } @@ -869,13 +868,14 @@ RADV_CMD_FLAG_INV_VMEM_L1); } assert(flush_cnt); - uint32_t old_fence = (*flush_cnt)++; + (*flush_cnt)++; si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags, EOP_DATA_SEL_VALUE_32BIT, - flush_va, old_fence, *flush_cnt, + flush_va, *flush_cnt, gfx9_eop_bug_va); - si_emit_wait_fence(cs, flush_va, *flush_cnt, 0xffffffff); + radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, + *flush_cnt, 0xffffffff); } /* VGT state sync */ @@ -971,18 +971,12 @@ if (!cmd_buffer->state.flush_bits) return; - enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class; radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128); - uint32_t *ptr = NULL; - uint64_t va = 0; - if (chip_class == GFX9) { - va = radv_buffer_get_va(cmd_buffer->gfx9_fence_bo) + cmd_buffer->gfx9_fence_offset; - ptr = &cmd_buffer->gfx9_fence_idx; - } si_cs_emit_cache_flush(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.chip_class, - ptr, va, + &cmd_buffer->gfx9_fence_idx, + cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits, cmd_buffer->gfx9_eop_bug_va); @@ -992,6 +986,11 @@ radv_cmd_buffer_trace_emit(cmd_buffer); cmd_buffer->state.flush_bits = 0; + + /* If the driver used a compute shader for resetting a query pool, it + * should be finished at this point. + */ + cmd_buffer->pending_reset_query = false; } /* sets the CP predication state using a boolean stored at va */ diff -Nru mesa-18.3.3/src/amd/vulkan/vk_format_table.py mesa-19.0.1/src/amd/vulkan/vk_format_table.py --- mesa-18.3.3/src/amd/vulkan/vk_format_table.py 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/vk_format_table.py 2019-03-31 23:16:37.000000000 +0000 @@ -146,10 +146,6 @@ print("const struct vk_format_description *") print("vk_format_description(VkFormat format)") print("{") - print(" if (format > VK_FORMAT_END_RANGE) {") - print(" return NULL;") - print(" }") - print() print(" switch (format) {") for format in formats: print(" case %s:" % format.name) diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c --- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c 2019-03-31 23:16:37.000000000 +0000 @@ -249,6 +249,7 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) { struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); + struct radv_amdgpu_winsys *ws = bo->ws; if (p_atomic_dec_return(&bo->ref_count)) return; @@ -269,6 +270,17 @@ 0, AMDGPU_VA_OP_UNMAP); amdgpu_bo_free(bo->bo); } + + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + p_atomic_add(&ws->allocated_vram, + -align64(bo->size, ws->info.gart_page_size)); + if (bo->base.vram_cpu_access) + p_atomic_add(&ws->allocated_vram_vis, + -align64(bo->size, ws->info.gart_page_size)); + if (bo->initial_domain & RADEON_DOMAIN_GTT) + p_atomic_add(&ws->allocated_gtt, + -align64(bo->size, ws->info.gart_page_size)); + amdgpu_va_range_free(bo->va_handle); FREE(bo); } @@ -290,7 +302,8 @@ uint64_t size, unsigned alignment, enum radeon_bo_domain initial_domain, - unsigned flags) + unsigned flags, + unsigned priority) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); struct radv_amdgpu_winsys_bo *bo; @@ -344,8 +357,10 @@ if (initial_domain & RADEON_DOMAIN_GTT) request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; - if (flags & RADEON_FLAG_CPU_ACCESS) + if (flags & RADEON_FLAG_CPU_ACCESS) { + bo->base.vram_cpu_access = initial_domain & RADEON_DOMAIN_VRAM; request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + } if (flags & RADEON_FLAG_NO_CPU_ACCESS) request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (flags & RADEON_FLAG_GTT_WC) @@ -378,6 +393,21 @@ bo->bo = buf_handle; bo->initial_domain = initial_domain; bo->is_shared = false; + bo->priority = priority; + + r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle); + assert(!r); + + if (initial_domain & RADEON_DOMAIN_VRAM) + p_atomic_add(&ws->allocated_vram, + align64(bo->size, ws->info.gart_page_size)); + if (bo->base.vram_cpu_access) + p_atomic_add(&ws->allocated_vram_vis, + align64(bo->size, ws->info.gart_page_size)); + if (initial_domain & RADEON_DOMAIN_GTT) + p_atomic_add(&ws->allocated_gtt, + align64(bo->size, ws->info.gart_page_size)); + radv_amdgpu_add_buffer_to_global_list(bo); return (struct radeon_winsys_bo *)bo; error_va_map: @@ -410,16 +440,40 @@ amdgpu_bo_cpu_unmap(bo->bo); } +static uint64_t +radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, + uint64_t size, unsigned alignment) +{ + uint64_t vm_alignment = alignment; + + /* Increase the VM alignment for faster address translation. */ + if (size >= ws->info.pte_fragment_size) + vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size); + + /* Gfx9: Increase the VM alignment to the most significant bit set + * in the size for faster address translation. + */ + if (ws->info.chip_class >= GFX9) { + unsigned msb = util_last_bit64(size); /* 0 = no bit is set */ + uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0; + + vm_alignment = MAX2(vm_alignment, msb_alignment); + } + return vm_alignment; +} + static struct radeon_winsys_bo * radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, - uint64_t size) + uint64_t size, + unsigned priority) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); amdgpu_bo_handle buf_handle; struct radv_amdgpu_winsys_bo *bo; uint64_t va; amdgpu_va_handle va_handle; + uint64_t vm_alignment; bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); if (!bo) @@ -428,8 +482,14 @@ if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle)) goto error; + /* Using the optimal VM alignment also fixes GPU hangs for buffers that + * are imported. + */ + vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, + ws->info.gart_page_size); + if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, - size, 1 << 12, 0, &va, &va_handle, + size, vm_alignment, 0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH)) goto error_va_alloc; @@ -444,6 +504,13 @@ bo->ws = ws; bo->bo = buf_handle; bo->initial_domain = RADEON_DOMAIN_GTT; + bo->priority = priority; + + MAYBE_UNUSED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle); + assert(!r); + + p_atomic_add(&ws->allocated_gtt, + align64(bo->size, ws->info.gart_page_size)); radv_amdgpu_add_buffer_to_global_list(bo); return (struct radeon_winsys_bo *)bo; @@ -461,7 +528,8 @@ static struct radeon_winsys_bo * radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, - int fd, unsigned *stride, + int fd, unsigned priority, + unsigned *stride, unsigned *offset) { struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); @@ -508,7 +576,19 @@ bo->size = result.alloc_size; bo->is_shared = true; bo->ws = ws; + bo->priority = priority; bo->ref_count = 1; + + r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle); + assert(!r); + + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + p_atomic_add(&ws->allocated_vram, + align64(bo->size, ws->info.gart_page_size)); + if (bo->initial_domain & RADEON_DOMAIN_GTT) + p_atomic_add(&ws->allocated_gtt, + align64(bo->size, ws->info.gart_page_size)); + radv_amdgpu_add_buffer_to_global_list(bo); return (struct radeon_winsys_bo *)bo; error_va_map: diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h --- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h 2017-11-07 20:47:52.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h 2019-03-31 23:16:37.000000000 +0000 @@ -45,6 +45,7 @@ uint64_t size; struct radv_amdgpu_winsys *ws; bool is_virtual; + uint8_t priority; int ref_count; union { @@ -53,6 +54,7 @@ amdgpu_bo_handle bo; enum radeon_bo_domain initial_domain; bool is_shared; + uint32_t bo_handle; struct list_head global_list_item; }; /* virtual bo */ diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c --- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 2019-03-31 23:16:37.000000000 +0000 @@ -50,7 +50,7 @@ uint8_t *ib_mapped; unsigned max_num_buffers; unsigned num_buffers; - amdgpu_bo_handle *handles; + struct drm_amdgpu_bo_list_entry *handles; struct radeon_winsys_bo **old_ib_buffers; unsigned num_old_ib_buffers; @@ -92,17 +92,71 @@ } } +struct radv_amdgpu_cs_request { + /** Specify flags with additional information */ + uint64_t flags; + + /** Specify HW IP block type to which to send the IB. */ + unsigned ip_type; + + /** IP instance index if there are several IPs of the same type. */ + unsigned ip_instance; + + /** + * Specify ring index of the IP. We could have several rings + * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1. + */ + uint32_t ring; + + /** + * List handle with resources used by this request. This is a raw + * bo list handle used by the kernel. + */ + uint32_t resources; + + /** + * Number of dependencies this Command submission needs to + * wait for before starting execution. + */ + uint32_t number_of_dependencies; + + /** + * Array of dependencies which need to be met before + * execution can start. + */ + struct amdgpu_cs_fence *dependencies; + + /** Number of IBs to submit in the field ibs. */ + uint32_t number_of_ibs; + + /** + * IBs to submit. Those IBs will be submit together as single entity + */ + struct amdgpu_cs_ib_info *ibs; + + /** + * The returned sequence number for the command submission + */ + uint64_t seq_no; + + /** + * The fence information + */ + struct amdgpu_cs_fence_info fence_info; +}; + + static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx, uint32_t ip_type, uint32_t ring, struct radv_winsys_sem_info *sem_info); static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, - struct amdgpu_cs_request *request, + struct radv_amdgpu_cs_request *request, struct radv_winsys_sem_info *sem_info); static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_fence *fence, - struct amdgpu_cs_request *req) + struct radv_amdgpu_cs_request *req) { fence->fence.context = ctx->ctx; fence->fence.ip_type = req->ip_type; @@ -243,7 +297,8 @@ RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY); + RADEON_FLAG_READ_ONLY, + RADV_BO_PRIORITY_CS); if (!cs->ib_buffer) { free(cs); return NULL; @@ -295,15 +350,6 @@ /* The maximum size in dwords has been reached, * try to allocate a new one. */ - if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) { - /* TODO: Allow to submit more than 4 IBs. */ - fprintf(stderr, "amdgpu: Maximum number of IBs " - "per submit reached.\n"); - cs->failed = true; - cs->base.cdw = 0; - return; - } - cs->old_cs_buffers = realloc(cs->old_cs_buffers, (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers)); @@ -367,7 +413,8 @@ RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY); + RADEON_FLAG_READ_ONLY, + RADV_BO_PRIORITY_CS); if (!cs->ib_buffer) { cs->base.cdw = 0; @@ -421,8 +468,8 @@ cs->failed = false; for (unsigned i = 0; i < cs->num_buffers; ++i) { - unsigned hash = ((uintptr_t)cs->handles[i] >> 6) & - (ARRAY_SIZE(cs->buffer_hash_table) - 1); + unsigned hash = cs->handles[i].bo_handle & + (ARRAY_SIZE(cs->buffer_hash_table) - 1); cs->buffer_hash_table[hash] = -1; } @@ -457,19 +504,19 @@ } static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs, - amdgpu_bo_handle bo) + uint32_t bo) { - unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1); + unsigned hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1); int index = cs->buffer_hash_table[hash]; if (index == -1) return -1; - if (cs->handles[index] == bo) + if (cs->handles[index].bo_handle == bo) return index; for (unsigned i = 0; i < cs->num_buffers; ++i) { - if (cs->handles[i] == bo) { + if (cs->handles[i].bo_handle == bo) { cs->buffer_hash_table[hash] = i; return i; } @@ -479,7 +526,7 @@ } static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, - amdgpu_bo_handle bo) + uint32_t bo, uint8_t priority) { unsigned hash; int index = radv_amdgpu_cs_find_buffer(cs, bo); @@ -489,13 +536,14 @@ if (cs->num_buffers == cs->max_num_buffers) { unsigned new_count = MAX2(1, cs->max_num_buffers * 2); - cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle)); + cs->handles = realloc(cs->handles, new_count * sizeof(struct drm_amdgpu_bo_list_entry)); cs->max_num_buffers = new_count; } - cs->handles[cs->num_buffers] = bo; + cs->handles[cs->num_buffers].bo_handle = bo; + cs->handles[cs->num_buffers].bo_priority = priority; - hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1); + hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1); cs->buffer_hash_table[hash] = cs->num_buffers; ++cs->num_buffers; @@ -553,7 +601,7 @@ if (bo->base.is_local) return; - radv_amdgpu_cs_add_buffer_internal(cs, bo->bo); + radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority); } static void radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, @@ -563,7 +611,9 @@ struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child); for (unsigned i = 0; i < child->num_buffers; ++i) { - radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i]); + radv_amdgpu_cs_add_buffer_internal(parent, + child->handles[i].bo_handle, + child->handles[i].bo_priority); } for (unsigned i = 0; i < child->num_virtual_buffers; ++i) { @@ -594,13 +644,13 @@ unsigned num_extra_bo, struct radeon_cmdbuf *extra_cs, const struct radv_winsys_bo_list *radv_bo_list, - amdgpu_bo_list_handle *bo_list) + uint32_t *bo_list) { int r = 0; if (ws->debug_all_bos) { struct radv_amdgpu_winsys_bo *bo; - amdgpu_bo_handle *handles; + struct drm_amdgpu_bo_list_entry *handles; unsigned num = 0; pthread_mutex_lock(&ws->global_bo_list_lock); @@ -613,12 +663,13 @@ LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) { assert(num < ws->num_buffers); - handles[num++] = bo->bo; + handles[num].bo_handle = bo->bo_handle; + handles[num].bo_priority = bo->priority; + num++; } - r = amdgpu_bo_list_create(ws->dev, ws->num_buffers, - handles, NULL, - bo_list); + r = amdgpu_bo_list_create_raw(ws->dev, ws->num_buffers, + handles, bo_list); free(handles); pthread_mutex_unlock(&ws->global_bo_list_lock); } else if (count == 1 && !num_extra_bo && !extra_cs && !radv_bo_list && @@ -628,8 +679,8 @@ *bo_list = 0; return 0; } - r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles, - NULL, bo_list); + r = amdgpu_bo_list_create_raw(ws->dev, cs->num_buffers, cs->handles, + bo_list); } else { unsigned total_buffer_count = num_extra_bo; unsigned unique_bo_count = num_extra_bo; @@ -652,14 +703,15 @@ *bo_list = 0; return 0; } - amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count); + struct drm_amdgpu_bo_list_entry *handles = malloc(sizeof(struct drm_amdgpu_bo_list_entry) * total_buffer_count); if (!handles) { free(handles); return -ENOMEM; } for (unsigned i = 0; i < num_extra_bo; i++) { - handles[i] = extra_bo_array[i]->bo; + handles[i].bo_handle = extra_bo_array[i]->bo_handle; + handles[i].bo_priority = extra_bo_array[i]->priority; } for (unsigned i = 0; i < count + !!extra_cs; ++i) { @@ -674,7 +726,7 @@ continue; if (unique_bo_count == 0 && !cs->num_virtual_buffers) { - memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle)); + memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry)); unique_bo_count = cs->num_buffers; continue; } @@ -682,7 +734,7 @@ for (unsigned j = 0; j < cs->num_buffers; ++j) { bool found = false; for (unsigned k = 0; k < unique_bo_so_far; ++k) { - if (handles[k] == cs->handles[j]) { + if (handles[k].bo_handle == cs->handles[j].bo_handle) { found = true; break; } @@ -698,13 +750,14 @@ struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k]; bool found = false; for (unsigned m = 0; m < unique_bo_count; ++m) { - if (handles[m] == bo->bo) { + if (handles[m].bo_handle == bo->bo_handle) { found = true; break; } } if (!found) { - handles[unique_bo_count] = bo->bo; + handles[unique_bo_count].bo_handle = bo->bo_handle; + handles[unique_bo_count].bo_priority = bo->priority; ++unique_bo_count; } } @@ -717,21 +770,22 @@ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(radv_bo_list->bos[i]); bool found = false; for (unsigned j = 0; j < unique_bo_so_far; ++j) { - if (bo->bo == handles[j]) { + if (bo->bo_handle == handles[j].bo_handle) { found = true; break; } } if (!found) { - handles[unique_bo_count] = bo->bo; + handles[unique_bo_count].bo_handle = bo->bo_handle; + handles[unique_bo_count].bo_priority = bo->priority; ++unique_bo_count; } } } if (unique_bo_count > 0) { - r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles, - NULL, bo_list); + r = amdgpu_bo_list_create_raw(ws->dev, unique_bo_count, handles, + bo_list); } else { *bo_list = 0; } @@ -753,7 +807,7 @@ } static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx, - struct amdgpu_cs_request *request) + struct radv_amdgpu_cs_request *request) { radv_amdgpu_request_to_fence(ctx, &ctx->last_submission[request->ip_type][request->ring], @@ -774,8 +828,8 @@ struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence; struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]); - amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_request request = {0}; + uint32_t bo_list; + struct radv_amdgpu_cs_request request = {0}; struct amdgpu_cs_ib_info ibs[2]; unsigned number_of_ibs = 1; @@ -837,8 +891,7 @@ "see dmesg for more information.\n"); } - if (bo_list) - amdgpu_bo_list_destroy(bo_list); + amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list); if (r) return r; @@ -864,67 +917,72 @@ int r; struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence; - amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_request request; - bool emit_signal_sem = sem_info->cs_emit_signal; + uint32_t bo_list; + struct radv_amdgpu_cs_request request = {}; + struct amdgpu_cs_ib_info *ibs; + struct radv_amdgpu_cs *cs0; + unsigned number_of_ibs; + assert(cs_count); + cs0 = radv_amdgpu_cs(cs_array[0]); - for (unsigned i = 0; i < cs_count;) { - struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]); - struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; - struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs; - unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs, - cs_count - i); + /* Compute the number of IBs for this submit. */ + number_of_ibs = cs_count + !!initial_preamble_cs; - memset(&request, 0, sizeof(request)); + /* Create a buffer object list. */ + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0, + initial_preamble_cs, radv_bo_list, + &bo_list); + if (r) { + fprintf(stderr, "amdgpu: buffer list creation failed " + "for the fallback submission (%d)\n", r); + return r; + } - r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0, - preamble_cs, radv_bo_list, &bo_list); - if (r) { - fprintf(stderr, "amdgpu: buffer list creation failed " - "for the fallback submission (%d)\n", r); - return r; - } + ibs = malloc(number_of_ibs * sizeof(*ibs)); + if (!ibs) { + amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list); + return -ENOMEM; + } - request.ip_type = cs0->hw_ip; - request.ring = queue_idx; - request.resources = bo_list; - request.number_of_ibs = cnt + !!preamble_cs; - request.ibs = ibs; - request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx); + /* Configure the CS request. */ + if (initial_preamble_cs) + ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib; - if (preamble_cs) { - ibs[0] = radv_amdgpu_cs(preamble_cs)->ib; - } + for (unsigned i = 0; i < cs_count; i++) { + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); - for (unsigned j = 0; j < cnt; ++j) { - struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); - ibs[j + !!preamble_cs] = cs->ib; + ibs[i + !!initial_preamble_cs] = cs->ib; - if (cs->is_chained) { - *cs->ib_size_ptr -= 4; - cs->is_chained = false; - } + if (cs->is_chained) { + *cs->ib_size_ptr -= 4; + cs->is_chained = false; } + } - sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false; - r = radv_amdgpu_cs_submit(ctx, &request, sem_info); - if (r) { - if (r == -ENOMEM) - fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); - else - fprintf(stderr, "amdgpu: The CS has been rejected, " - "see dmesg for more information.\n"); - } + request.ip_type = cs0->hw_ip; + request.ring = queue_idx; + request.resources = bo_list; + request.number_of_ibs = number_of_ibs; + request.ibs = ibs; + request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx); + + /* Submit the CS. */ + r = radv_amdgpu_cs_submit(ctx, &request, sem_info); + if (r) { + if (r == -ENOMEM) + fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); + else + fprintf(stderr, "amdgpu: The CS has been rejected, " + "see dmesg for more information.\n"); + } - if (bo_list) - amdgpu_bo_list_destroy(bo_list); + amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list); + free(ibs); - if (r) - return r; + if (r) + return r; - i += cnt; - } if (fence) radv_amdgpu_request_to_fence(ctx, fence, &request); @@ -948,8 +1006,8 @@ struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence; struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]); struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws; - amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_request request; + uint32_t bo_list; + struct radv_amdgpu_cs_request request; uint32_t pad_word = 0xffff1000U; bool emit_signal_sem = sem_info->cs_emit_signal; @@ -959,30 +1017,46 @@ assert(cs_count); for (unsigned i = 0; i < cs_count;) { - struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0}; - unsigned number_of_ibs = 1; - struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0}; + struct amdgpu_cs_ib_info *ibs; + struct radeon_winsys_bo **bos; struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs; struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); + unsigned number_of_ibs; uint32_t *ptr; unsigned cnt = 0; unsigned size = 0; unsigned pad_words = 0; - if (cs->num_old_cs_buffers > 0) { + /* Compute the number of IBs for this submit. */ + number_of_ibs = cs->num_old_cs_buffers + 1; + + ibs = malloc(number_of_ibs * sizeof(*ibs)); + if (!ibs) + return -ENOMEM; + + bos = malloc(number_of_ibs * sizeof(*bos)); + if (!bos) { + free(ibs); + return -ENOMEM; + } + + if (number_of_ibs > 1) { /* Special path when the maximum size in dwords has * been reached because we need to handle more than one * IB per submit. */ - unsigned new_cs_count = cs->num_old_cs_buffers + 1; - struct radeon_cmdbuf *new_cs_array[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; + struct radeon_cmdbuf **new_cs_array; unsigned idx = 0; + new_cs_array = malloc(cs->num_old_cs_buffers * + sizeof(*new_cs_array)); + assert(new_cs_array); + for (unsigned j = 0; j < cs->num_old_cs_buffers; j++) new_cs_array[idx++] = &cs->old_cs_buffers[j]; new_cs_array[idx++] = cs_array[i]; - for (unsigned j = 0; j < new_cs_count; j++) { + for (unsigned j = 0; j < number_of_ibs; j++) { struct radeon_cmdbuf *rcs = new_cs_array[j]; bool needs_preamble = preamble_cs && j == 0; unsigned size = 0; @@ -1002,7 +1076,8 @@ RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY); + RADEON_FLAG_READ_ONLY, + RADV_BO_PRIORITY_CS); ptr = ws->buffer_map(bos[j]); if (needs_preamble) { @@ -1020,8 +1095,8 @@ ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]); } - number_of_ibs = new_cs_count; cnt++; + free(new_cs_array); } else { if (preamble_cs) size += preamble_cs->cdw; @@ -1041,7 +1116,8 @@ RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY); + RADEON_FLAG_READ_ONLY, + RADV_BO_PRIORITY_CS); ptr = ws->buffer_map(bos[0]); if (preamble_cs) { @@ -1070,6 +1146,8 @@ if (r) { fprintf(stderr, "amdgpu: buffer list creation failed " "for the sysmem submission (%d)\n", r); + free(ibs); + free(bos); return r; } @@ -1092,13 +1170,15 @@ "see dmesg for more information.\n"); } - if (bo_list) - amdgpu_bo_list_destroy(bo_list); + amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list); for (unsigned j = 0; j < number_of_ibs; j++) { ws->buffer_destroy(bos[j]); } + free(ibs); + free(bos); + if (r) return r; @@ -1131,7 +1211,7 @@ if (!cs->ws->use_ib_bos) { ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); - } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) { + } else if (can_patch && cs->ws->batchchain) { ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else { @@ -1230,8 +1310,9 @@ assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096); ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8, RADEON_DOMAIN_GTT, - RADEON_FLAG_CPU_ACCESS| - RADEON_FLAG_NO_INTERPROCESS_SHARING); + RADEON_FLAG_CPU_ACCESS | + RADEON_FLAG_NO_INTERPROCESS_SHARING, + RADV_BO_PRIORITY_CS); if (ctx->fence_bo) ctx->fence_map = (uint64_t*)ws->base.buffer_map(ctx->fence_bo); if (ctx->fence_map) @@ -1318,7 +1399,7 @@ } static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, - struct amdgpu_cs_request *request, + struct radv_amdgpu_cs_request *request, struct radv_winsys_sem_info *sem_info) { int r; @@ -1420,7 +1501,7 @@ num_chunks++; } - r = amdgpu_cs_submit_raw(ctx->ws->dev, + r = amdgpu_cs_submit_raw2(ctx->ws->dev, ctx->ctx, request->resources, num_chunks, diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c --- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c 2019-03-31 23:16:37.000000000 +0000 @@ -29,7 +29,6 @@ #include #include "radv_private.h" -#include "addrlib/addrinterface.h" #include "util/bitset.h" #include "radv_amdgpu_winsys.h" #include "radv_amdgpu_surface.h" diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c --- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c 2019-03-31 23:16:37.000000000 +0000 @@ -72,6 +72,12 @@ uint64_t retval = 0; switch (value) { + case RADEON_ALLOCATED_VRAM: + return ws->allocated_vram; + case RADEON_ALLOCATED_VRAM_VIS: + return ws->allocated_vram_vis; + case RADEON_ALLOCATED_GTT: + return ws->allocated_gtt; case RADEON_TIMESTAMP: amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval); return retval; diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h --- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h 2018-07-29 21:30:58.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h 2019-03-31 23:16:37.000000000 +0000 @@ -30,7 +30,7 @@ #include "radv_radeon_winsys.h" #include "ac_gpu_info.h" -#include "addrlib/addrinterface.h" +#include "addrlib/inc/addrinterface.h" #include #include "util/list.h" #include @@ -52,6 +52,10 @@ pthread_mutex_t global_bo_list_lock; struct list_head global_bo_list; + + uint64_t allocated_vram; + uint64_t allocated_vram_vis; + uint64_t allocated_gtt; }; static inline struct radv_amdgpu_winsys * diff -Nru mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h --- mesa-18.3.3/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h 2019-03-31 23:16:37.000000000 +0000 @@ -29,6 +29,13 @@ #ifndef RADV_AMDGPU_WINSYS_PUBLIC_H #define RADV_AMDGPU_WINSYS_PUBLIC_H +/* The number of IBs per submit isn't infinite, it depends on the ring type + * (ie. some initial setup needed for a submit) and the number of IBs (4 DW). + * This limit is arbitrary but should be safe for now. Ideally, we should get + * this limit from the KMD. +*/ +#define RADV_MAX_IBS_PER_SUBMIT 192 + struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags); diff -Nru mesa-18.3.3/src/broadcom/cle/v3d_packet_v33.xml mesa-19.0.1/src/broadcom/cle/v3d_packet_v33.xml --- mesa-18.3.3/src/broadcom/cle/v3d_packet_v33.xml 2018-09-27 19:13:53.000000000 +0000 +++ mesa-19.0.1/src/broadcom/cle/v3d_packet_v33.xml 2019-03-31 23:16:37.000000000 +0000 @@ -99,12 +99,12 @@ - - - - - - + + + + + + @@ -174,6 +174,16 @@ + + + + + + + + + @@ -147,6 +149,7 @@ + - - + + @@ -909,6 +940,12 @@ EGLint *num_config + EGLBoolean eglClientSignalSyncEXT + EGLDisplay dpy + EGLSync sync + const EGLAttrib *attrib_list + + EGLint eglClientWaitSync EGLDisplay dpy EGLSync sync @@ -1191,9 +1228,21 @@ EGLNativeDisplayType display_id + char *eglGetDisplayDriverConfig + EGLDisplay dpy + + + const char *eglGetDisplayDriverName + EGLDisplay dpy + + EGLint eglGetError + EGLClientBuffer eglGetNativeClientBufferANDROID + const struct AHardwareBuffer *buffer + + EGLBoolean eglGetOutputLayersEXT EGLDisplay dpy const EGLAttrib *attrib_list @@ -1312,6 +1361,41 @@ EGLnsecsANDROID time + EGLBoolean eglGetCompositorTimingSupportedANDROID + EGLDisplay dpy + EGLSurface surface + EGLint name + + + EGLBoolean eglGetCompositorTimingANDROID + EGLDisplay dpy + EGLSurface surface + EGLint numTimestamps + const EGLint *names + EGLnsecsANDROID *values + + + EGLBoolean eglGetNextFrameIdANDROID + EGLDisplay dpy + EGLSurface surface + EGLuint64KHR *frameId + + + EGLBoolean eglGetFrameTimestampSupportedANDROID + EGLDisplay dpy + EGLSurface surface + EGLint timestamp + + + EGLBoolean eglGetFrameTimestampsANDROID + EGLDisplay dpy + EGLSurface surface + EGLuint64KHR frameId + EGLint numTimestamps + const EGLint *timestamps + EGLnsecsANDROID *values + + EGLenum eglQueryAPI @@ -1567,7 +1651,7 @@ EGLBoolean eglStreamConsumerGLTextureExternalAttribsNV EGLDisplay dpy EGLStreamKHR stream - EGLAttrib *attrib_list + const EGLAttrib *attrib_list EGLBoolean eglStreamConsumerOutputEXT @@ -1587,6 +1671,11 @@ const EGLAttrib *attrib_list + EGLBoolean eglStreamFlushNV + EGLDisplay dpy + EGLStreamKHR stream + + EGLBoolean eglSurfaceAttrib EGLDisplay dpy EGLSurface surface @@ -1641,6 +1730,12 @@ EGLSurface surface + EGLBoolean eglUnsignalSyncEXT + EGLDisplay dpy + EGLSync sync + const EGLAttrib *attrib_list + + EGLBoolean eglWaitClient @@ -1986,6 +2081,11 @@ + + + + + @@ -2010,6 +2110,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + @@ -2057,6 +2181,13 @@ + + + + + + + @@ -2079,6 +2210,7 @@ + @@ -2131,6 +2263,11 @@ + + + + + @@ -2174,6 +2311,12 @@ + + + + + + @@ -2276,6 +2419,11 @@ + + + + + @@ -2690,6 +2838,7 @@ + @@ -2711,6 +2860,12 @@ + + + + + + @@ -2737,6 +2892,11 @@ + + + + + @@ -2831,6 +2991,11 @@ + + + + + @@ -2965,12 +3130,12 @@ - - - - - - + + + + + + diff -Nru mesa-18.3.3/src/egl/generate/genCommon.py mesa-19.0.1/src/egl/generate/genCommon.py --- mesa-18.3.3/src/egl/generate/genCommon.py 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/egl/generate/genCommon.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,223 +0,0 @@ -#!/usr/bin/env python - -# (C) Copyright 2015, NVIDIA CORPORATION. -# All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# on the rights to use, copy, modify, merge, publish, distribute, sub -# license, and/or sell copies of the Software, and to permit persons to whom -# the Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# -# Authors: -# Kyle Brenneman - -import collections -import re -import sys -import xml.etree.cElementTree as etree - -MAPI_TABLE_NUM_DYNAMIC = 4096 - -_LIBRARY_FEATURE_NAMES = { - # libGL and libGLdiapatch both include every function. - "gl" : None, - "gldispatch" : None, - "opengl" : frozenset(( "GL_VERSION_1_0", "GL_VERSION_1_1", - "GL_VERSION_1_2", "GL_VERSION_1_3", "GL_VERSION_1_4", "GL_VERSION_1_5", - "GL_VERSION_2_0", "GL_VERSION_2_1", "GL_VERSION_3_0", "GL_VERSION_3_1", - "GL_VERSION_3_2", "GL_VERSION_3_3", "GL_VERSION_4_0", "GL_VERSION_4_1", - "GL_VERSION_4_2", "GL_VERSION_4_3", "GL_VERSION_4_4", "GL_VERSION_4_5", - )), - "glesv1" : frozenset(("GL_VERSION_ES_CM_1_0", "GL_OES_point_size_array")), - "glesv2" : frozenset(("GL_ES_VERSION_2_0", "GL_ES_VERSION_3_0", - "GL_ES_VERSION_3_1" "GL_ES_VERSION_3_2", - )), -} - -def getFunctions(xmlFiles): - """ - Reads an XML file and returns all of the functions defined in it. - - xmlFile should be the path to Khronos's gl.xml file. The return value is a - sequence of FunctionDesc objects, ordered by slot number. - """ - roots = [ etree.parse(xmlFile).getroot() for xmlFile in xmlFiles ] - return getFunctionsFromRoots(roots) - -def getFunctionsFromRoots(roots): - functions = {} - for root in roots: - for func in _getFunctionList(root): - functions[func.name] = func - functions = functions.values() - - # Sort the function list by name. - functions = sorted(functions, key=lambda f: f.name) - - # Assign a slot number to each function. This isn't strictly necessary, - # since you can just look at the index in the list, but it makes it easier - # to include the slot when formatting output. - for i in range(len(functions)): - functions[i] = functions[i]._replace(slot=i) - - return functions - -def getExportNamesFromRoots(target, roots): - """ - Goes through the tags from gl.xml and returns a set of OpenGL - functions that a library should export. - - target should be one of "gl", "gldispatch", "opengl", "glesv1", or - "glesv2". - """ - featureNames = _LIBRARY_FEATURE_NAMES[target] - if featureNames is None: - return set(func.name for func in getFunctionsFromRoots(roots)) - - names = set() - for root in roots: - features = [] - for featElem in root.findall("feature"): - if featElem.get("name") in featureNames: - features.append(featElem) - for featElem in root.findall("extensions/extension"): - if featElem.get("name") in featureNames: - features.append(featElem) - for featElem in features: - for commandElem in featElem.findall("require/command"): - names.add(commandElem.get("name")) - return names - -class FunctionArg(collections.namedtuple("FunctionArg", "type name")): - @property - def dec(self): - """ - Returns a "TYPE NAME" string, suitable for a function prototype. - """ - rv = str(self.type) - if not rv.endswith("*"): - rv += " " - rv += self.name - return rv - -class FunctionDesc(collections.namedtuple("FunctionDesc", "name rt args slot")): - def hasReturn(self): - """ - Returns true if the function returns a value. - """ - return (self.rt != "void") - - @property - def decArgs(self): - """ - Returns a string with the types and names of the arguments, as you - would use in a function declaration. - """ - if not self.args: - return "void" - else: - return ", ".join(arg.dec for arg in self.args) - - @property - def callArgs(self): - """ - Returns a string with the names of the arguments, as you would use in a - function call. - """ - return ", ".join(arg.name for arg in self.args) - - @property - def basename(self): - assert self.name.startswith("gl") - return self.name[2:] - -def _getFunctionList(root): - for elem in root.findall("commands/command"): - yield _parseCommandElem(elem) - -def _parseCommandElem(elem): - protoElem = elem.find("proto") - (rt, name) = _parseProtoElem(protoElem) - - args = [] - for ch in elem.findall("param"): - # tags have the same format as a tag. - args.append(FunctionArg(*_parseProtoElem(ch))) - func = FunctionDesc(name, rt, tuple(args), slot=None) - - return func - -def _parseProtoElem(elem): - # If I just remove the tags and string the text together, I'll get valid C code. - text = _flattenText(elem) - text = text.strip() - m = re.match(r"^(.+)\b(\w+)(?:\s*\[\s*(\d*)\s*\])?$", text, re.S) - if m: - typename = _fixupTypeName(m.group(1)) - name = m.group(2) - if m.group(3): - # HACK: glPathGlyphIndexRangeNV defines an argument like this: - # GLuint baseAndCount[2] - # Convert it to a pointer and hope for the best. - typename += "*" - return (typename, name) - else: - raise ValueError("Can't parse element %r -> %r" % (elem, text)) - -def _flattenText(elem): - """ - Returns the text in an element and all child elements, with the tags - removed. - """ - text = "" - if elem.text is not None: - text = elem.text - for ch in elem: - text += _flattenText(ch) - if ch.tail is not None: - text += ch.tail - return text - -def _fixupTypeName(typeName): - """ - Converts a typename into a more consistent format. - """ - - rv = typeName.strip() - - # Replace "GLvoid" with just plain "void". - rv = re.sub(r"\bGLvoid\b", "void", rv) - - # Remove the vendor suffixes from types that have a suffix-less version. - rv = re.sub(r"\b(GLhalf|GLintptr|GLsizeiptr|GLint64|GLuint64)(?:ARB|EXT|NV|ATI)\b", r"\1", rv) - - rv = re.sub(r"\bGLvoid\b", "void", rv) - - # Clear out any leading and trailing whitespace. - rv = rv.strip() - - # Remove any whitespace before a '*' - rv = re.sub(r"\s+\*", r"*", rv) - - # Change "foo*" to "foo *" - rv = re.sub(r"([^\*])\*", r"\1 *", rv) - - # Condense all whitespace into a single space. - rv = re.sub(r"\s+", " ", rv) - - return rv - diff -Nru mesa-18.3.3/src/egl/generate/gen_egl_dispatch.py mesa-19.0.1/src/egl/generate/gen_egl_dispatch.py --- mesa-18.3.3/src/egl/generate/gen_egl_dispatch.py 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/egl/generate/gen_egl_dispatch.py 2019-03-31 23:16:37.000000000 +0000 @@ -34,25 +34,23 @@ import argparse import collections -import imp +import eglFunctionList import sys import textwrap +import os +NEWAPI = os.path.join(os.path.dirname(__file__), "..", "..", "mapi", "new") +sys.path.insert(0, NEWAPI) import genCommon def main(): parser = argparse.ArgumentParser() parser.add_argument("target", choices=("header", "source"), help="Whether to build the source or header file.") - parser.add_argument("func_list_file", help="The function list .py file.") parser.add_argument("xml_files", nargs="+", help="The XML files with the EGL function lists.") args = parser.parse_args() - # The function list is a Python module, but it's specified on the command - # line. - eglFunctionList = imp.load_source("eglFunctionList", args.func_list_file) - xmlFunctions = genCommon.getFunctions(args.xml_files) xmlByName = dict((f.name, f) for f in xmlFunctions) functions = [] diff -Nru mesa-18.3.3/src/egl/main/eglapi.c mesa-19.0.1/src/egl/main/eglapi.c --- mesa-18.3.3/src/egl/main/eglapi.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/main/eglapi.c 2019-03-31 23:16:37.000000000 +0000 @@ -90,6 +90,8 @@ #include "c11/threads.h" #include "util/macros.h" +#include "eglapi.h" +#include "egldefines.h" #include "eglglobals.h" #include "eglcontext.h" #include "egldisplay.h" @@ -526,6 +528,7 @@ _eglAppendExtension(&exts, "EGL_MESA_configless_context"); _EGL_CHECK_EXTENSION(MESA_drm_image); _EGL_CHECK_EXTENSION(MESA_image_dma_buf_export); + _EGL_CHECK_EXTENSION(MESA_query_driver); _EGL_CHECK_EXTENSION(NOK_swap_region); _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap); @@ -2647,6 +2650,38 @@ RETURN_EGL_SUCCESS(disp, EGL_TRUE); } +static char * EGLAPIENTRY +eglGetDisplayDriverConfig(EGLDisplay dpy) +{ + _EGLDisplay *disp = _eglLockDisplay(dpy); + _EGLDriver *drv; + char *ret; + + _EGL_FUNC_START(disp, EGL_NONE, NULL, NULL); + _EGL_CHECK_DISPLAY(disp, NULL, drv); + + assert(disp->Extensions.MESA_query_driver); + + ret = drv->API.QueryDriverConfig(disp); + RETURN_EGL_EVAL(disp, ret); +} + +static const char * EGLAPIENTRY +eglGetDisplayDriverName(EGLDisplay dpy) +{ + _EGLDisplay *disp = _eglLockDisplay(dpy); + _EGLDriver *drv; + const char *ret; + + _EGL_FUNC_START(disp, EGL_NONE, NULL, NULL); + _EGL_CHECK_DISPLAY(disp, NULL, drv); + + assert(disp->Extensions.MESA_query_driver); + + ret = drv->API.QueryDriverName(disp); + RETURN_EGL_EVAL(disp, ret); +} + __eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress(const char *procname) { diff -Nru mesa-18.3.3/src/egl/main/eglapi.h mesa-19.0.1/src/egl/main/eglapi.h --- mesa-18.3.3/src/egl/main/eglapi.h 2018-02-08 14:40:56.000000000 +0000 +++ mesa-19.0.1/src/egl/main/eglapi.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,6 +31,7 @@ #ifndef EGLAPI_INCLUDED #define EGLAPI_INCLUDED +#include "egltypedefs.h" #ifdef __cplusplus extern "C" { @@ -54,6 +55,8 @@ /* driver funcs */ EGLBoolean (*Initialize)(_EGLDriver *, _EGLDisplay *dpy); EGLBoolean (*Terminate)(_EGLDriver *, _EGLDisplay *dpy); + const char *(*QueryDriverName)(_EGLDisplay *dpy); + char *(*QueryDriverConfig)(_EGLDisplay *dpy); /* config funcs */ EGLBoolean (*GetConfigs)(_EGLDriver *drv, _EGLDisplay *dpy, diff -Nru mesa-18.3.3/src/egl/main/eglcontext.c mesa-19.0.1/src/egl/main/eglcontext.c --- mesa-18.3.3/src/egl/main/eglcontext.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/main/eglcontext.c 2019-03-31 23:16:37.000000000 +0000 @@ -37,6 +37,7 @@ #include "eglcurrent.h" #include "eglsurface.h" #include "egllog.h" +#include "util/macros.h" /** diff -Nru mesa-18.3.3/src/egl/main/eglcurrent.c mesa-19.0.1/src/egl/main/eglcurrent.c --- mesa-18.3.3/src/egl/main/eglcurrent.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/egl/main/eglcurrent.c 2019-03-31 23:16:37.000000000 +0000 @@ -310,20 +310,28 @@ mtx_unlock(_eglGlobal.Mutex); - if (callback != NULL) { - char *buf = NULL; + char *message_buf = NULL; + if (message != NULL) { + va_start(args, message); + if (vasprintf(&message_buf, message, args) < 0) + message_buf = NULL; + va_end(args); + } - if (message != NULL) { - va_start(args, message); - if (vasprintf(&buf, message, args) < 0) - buf = NULL; + if (callback != NULL) { + callback(error, funcName, type, thr->Label, thr->CurrentObjectLabel, + message_buf); + } - va_end(args); + if (type == EGL_DEBUG_MSG_CRITICAL_KHR || type == EGL_DEBUG_MSG_ERROR_KHR) { + char *func_message_buf = NULL; + /* Note: _eglError() is often called with msg == thr->currentFuncName */ + if (message_buf && funcName && strcmp(message_buf, funcName) != 0) { + if (asprintf(&func_message_buf, "%s: %s", funcName, message_buf) < 0) + func_message_buf = NULL; } - callback(error, funcName, type, thr->Label, thr->CurrentObjectLabel, buf); - free(buf); + _eglInternalError(error, func_message_buf ? func_message_buf : funcName); + free(func_message_buf); } - - if (type == EGL_DEBUG_MSG_CRITICAL_KHR || type == EGL_DEBUG_MSG_ERROR_KHR) - _eglInternalError(error, funcName); + free(message_buf); } diff -Nru mesa-18.3.3/src/egl/main/egldefines.h mesa-19.0.1/src/egl/main/egldefines.h --- mesa-18.3.3/src/egl/main/egldefines.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/egl/main/egldefines.h 2019-03-31 23:16:37.000000000 +0000 @@ -34,8 +34,6 @@ #ifndef EGLDEFINES_INCLUDED #define EGLDEFINES_INCLUDED -#include "util/macros.h" - #ifdef __cplusplus extern "C" { #endif diff -Nru mesa-18.3.3/src/egl/main/egldevice.c mesa-19.0.1/src/egl/main/egldevice.c --- mesa-18.3.3/src/egl/main/egldevice.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/main/egldevice.c 2019-03-31 23:16:37.000000000 +0000 @@ -202,18 +202,6 @@ }; } -/* Ideally we'll have an extension which passes the render node, - * instead of the card one + magic. - * - * Then we can move this in _eglQueryDeviceStringEXT below. Until then - * keep it separate. - */ -const char * -_eglGetDRMDeviceRenderNode(_EGLDevice *dev) -{ - return dev->device->nodes[DRM_NODE_RENDER]; -} - EGLBoolean _eglQueryDeviceAttribEXT(_EGLDevice *dev, EGLint attribute, EGLAttrib *value) diff -Nru mesa-18.3.3/src/egl/main/egldevice.h mesa-19.0.1/src/egl/main/egldevice.h --- mesa-18.3.3/src/egl/main/egldevice.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/main/egldevice.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,9 +31,9 @@ #include +#include #include "egltypedefs.h" - #ifdef __cplusplus extern "C" { #endif @@ -68,9 +68,6 @@ EGLBoolean _eglDeviceSupports(_EGLDevice *dev, _EGLDeviceExtension ext); -const char * -_eglGetDRMDeviceRenderNode(_EGLDevice *dev); - EGLBoolean _eglQueryDeviceAttribEXT(_EGLDevice *dev, EGLint attribute, EGLAttrib *value); diff -Nru mesa-18.3.3/src/egl/main/egldisplay.h mesa-19.0.1/src/egl/main/egldisplay.h --- mesa-18.3.3/src/egl/main/egldisplay.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/main/egldisplay.h 2019-03-31 23:16:37.000000000 +0000 @@ -138,6 +138,7 @@ EGLBoolean MESA_drm_image; EGLBoolean MESA_image_dma_buf_export; + EGLBoolean MESA_query_driver; EGLBoolean NOK_swap_region; EGLBoolean NOK_texture_from_pixmap; diff -Nru mesa-18.3.3/src/egl/main/eglentrypoint.h mesa-19.0.1/src/egl/main/eglentrypoint.h --- mesa-18.3.3/src/egl/main/eglentrypoint.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/main/eglentrypoint.h 2019-03-31 23:16:37.000000000 +0000 @@ -42,6 +42,8 @@ EGL_ENTRYPOINT(eglGetCurrentDisplay) EGL_ENTRYPOINT(eglGetCurrentSurface) EGL_ENTRYPOINT(eglGetDisplay) +EGL_ENTRYPOINT(eglGetDisplayDriverConfig) +EGL_ENTRYPOINT(eglGetDisplayDriverName) EGL_ENTRYPOINT(eglGetError) EGL_ENTRYPOINT(eglGetPlatformDisplay) EGL_ENTRYPOINT(eglGetPlatformDisplayEXT) diff -Nru mesa-18.3.3/src/egl/main/eglglobals.c mesa-19.0.1/src/egl/main/eglglobals.c --- mesa-18.3.3/src/egl/main/eglglobals.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/main/eglglobals.c 2019-03-31 23:16:37.000000000 +0000 @@ -40,6 +40,8 @@ #include "egldriver.h" #include "egllog.h" +#include "util/macros.h" + #ifdef HAVE_MINCORE #include #include diff -Nru mesa-18.3.3/src/egl/main/eglsurface.c mesa-19.0.1/src/egl/main/eglsurface.c --- mesa-18.3.3/src/egl/main/eglsurface.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/main/eglsurface.c 2019-03-31 23:16:37.000000000 +0000 @@ -36,6 +36,7 @@ #include #include #include +#include "egldefines.h" #include "egldisplay.h" #include "egldriver.h" #include "eglcontext.h" @@ -44,6 +45,7 @@ #include "egllog.h" #include "eglsurface.h" +#include "util/macros.h" /** * Parse the list of surface attributes and return the proper error code. diff -Nru mesa-18.3.3/src/egl/Makefile.am mesa-19.0.1/src/egl/Makefile.am --- mesa-18.3.3/src/egl/Makefile.am 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -119,8 +119,7 @@ -I$(top_srcdir)/src/egl/drivers/dri2 \ -I$(top_srcdir)/src/gbm/backends/dri \ -I$(top_builddir)/src/egl/wayland/wayland-drm \ - -I$(top_srcdir)/src/egl/wayland/wayland-drm \ - -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" + -I$(top_srcdir)/src/egl/wayland/wayland-drm nodist_libEGL_common_la_SOURCES = \ $(dri2_backend_GENERATED_FILES) @@ -137,20 +136,26 @@ $(LIBDRM_LIBS) \ $(CLOCK_LIB) -GLVND_GEN_DEPS = generate/gen_egl_dispatch.py \ - generate/egl.xml generate/eglFunctionList.py generate/genCommon.py \ +# dummy rule to keep dist happy +$(top_scrdir)/src/mapi/new/genCommon.py: + +GLVND_GEN_EGL_DEPS = \ + generate/gen_egl_dispatch.py \ + generate/eglFunctionList.py \ + generate/egl.xml \ generate/egl_other.xml +GLVND_GEN_DEPS = $(top_scrdir)/src/mapi/new/genCommon.py \ + $(GLVND_GEN_EGL_DEPS) + PYTHON_GEN = $(AM_V_GEN)$(PYTHON) $(PYTHON_FLAGS) g_egldispatchstubs.c: $(GLVND_GEN_DEPS) $(PYTHON_GEN) $(top_srcdir)/src/egl/generate/gen_egl_dispatch.py source \ - $(top_srcdir)/src/egl/generate/eglFunctionList.py \ $(top_srcdir)/src/egl/generate/egl.xml \ $(top_srcdir)/src/egl/generate/egl_other.xml > $@ g_egldispatchstubs.h: $(GLVND_GEN_DEPS) $(PYTHON_GEN) $(top_srcdir)/src/egl/generate/gen_egl_dispatch.py header \ - $(top_srcdir)/src/egl/generate/eglFunctionList.py \ $(top_srcdir)/src/egl/generate/egl.xml \ $(top_srcdir)/src/egl/generate/egl_other.xml > $@ @@ -229,6 +234,6 @@ drivers/haiku \ main/egl.def \ main/README.txt \ - $(GLVND_GEN_DEPS) \ + $(GLVND_GEN_EGL_DEPS) \ main/50_mesa.json \ meson.build diff -Nru mesa-18.3.3/src/egl/meson.build mesa-19.0.1/src/egl/meson.build --- mesa-18.3.3/src/egl/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/egl/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -1,4 +1,4 @@ -# Copyright © 2017 Intel Corporation +# Copyright © 2017 Intel Corporation # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -62,28 +62,28 @@ g_egldispatchstubs_c = custom_target( 'g_egldispatchstubs.c', input : [ - 'generate/gen_egl_dispatch.py', 'generate/eglFunctionList.py', + 'generate/gen_egl_dispatch.py', 'generate/egl.xml', 'generate/egl_other.xml' ], output : 'g_egldispatchstubs.c', command : [ - prog_python, '@INPUT0@', 'source', '@INPUT1@', '@INPUT2@', '@INPUT3@' + prog_python, '@INPUT0@', 'source', '@INPUT1@', '@INPUT2@', ], - depend_files : files('generate/genCommon.py'), + depend_files : [ files('generate/eglFunctionList.py'), genCommon_py, ], capture : true, ) g_egldispatchstubs_h = custom_target( 'g_egldispatchstubs.h', input : [ - 'generate/gen_egl_dispatch.py', 'generate/eglFunctionList.py', + 'generate/gen_egl_dispatch.py', 'generate/egl.xml', 'generate/egl_other.xml' ], output : 'g_egldispatchstubs.h', command : [ - prog_python, '@INPUT0@', 'header', '@INPUT1@', '@INPUT2@', '@INPUT3@' + prog_python, '@INPUT0@', 'header', '@INPUT1@', '@INPUT2@', ], - depend_files : files('generate/genCommon.py'), + depend_files : [ files('generate/eglFunctionList.py'), genCommon_py, ], capture : true, ) @@ -93,13 +93,11 @@ 'drivers/dri2/egl_dri2.h', 'drivers/dri2/egl_dri2_fallbacks.h', ) - c_args_for_egl += [ - '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path), - ] + link_for_egl += [libloader, libxmlconfig] + incs_for_egl += inc_loader if with_platform_x11 files_egl += files('drivers/dri2/platform_x11.c') - incs_for_egl += inc_loader if with_dri3 files_egl += files('drivers/dri2/platform_x11_dri3.c') link_for_egl += libloader_dri3_helper @@ -108,13 +106,12 @@ endif if with_platform_drm files_egl += files('drivers/dri2/platform_drm.c') - link_for_egl += [libloader, libgbm, libxmlconfig] - incs_for_egl += [inc_loader, inc_gbm, include_directories('../gbm/main')] + link_for_egl += libgbm + incs_for_egl += [inc_gbm, include_directories('../gbm/main')] deps_for_egl += dep_libdrm endif if with_platform_surfaceless files_egl += files('drivers/dri2/platform_surfaceless.c') - incs_for_egl += [inc_loader] endif if with_platform_wayland deps_for_egl += [dep_wayland_client, dep_wayland_server, dep_wayland_egl_headers] @@ -130,7 +127,6 @@ if with_platform_android deps_for_egl += dep_android files_egl += files('drivers/dri2/platform_android.c') - incs_for_egl += [inc_loader] endif elif with_platform_haiku incs_for_egl += inc_haikugl @@ -169,7 +165,7 @@ '-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_@0@'.format(egl_native_platform.to_upper()), ], include_directories : incs_for_egl, - link_with : [link_for_egl, libloader, libxmlconfig, libglapi, libmesa_util], + link_with : [link_for_egl, libglapi, libmesa_util], link_args : [ld_args_bsymbolic, ld_args_gc_sections], dependencies : [deps_for_egl, dep_dl, dep_libdrm, dep_clock, dep_thread], install : true, @@ -203,11 +199,13 @@ test('egl-symbols-check', find_program('egl-symbols-check'), env : env_test, - args : libegl + args : libegl, + suite : ['egl'], ) endif test('egl-entrypoint-check', find_program('egl-entrypoint-check'), - env : [ 'srcdir=' + meson.current_source_dir() ] + env : ['srcdir=' + meson.current_source_dir()], + suite : ['egl'], ) endif diff -Nru mesa-18.3.3/src/egl/wayland/wayland-drm/wayland-drm.c mesa-19.0.1/src/egl/wayland/wayland-drm/wayland-drm.c --- mesa-18.3.3/src/egl/wayland/wayland-drm/wayland-drm.c 2018-01-06 23:02:18.000000000 +0000 +++ mesa-19.0.1/src/egl/wayland/wayland-drm/wayland-drm.c 2019-03-31 23:16:37.000000000 +0000 @@ -111,6 +111,8 @@ uint32_t stride, uint32_t format) { switch (format) { + case WL_DRM_FORMAT_ABGR2101010: + case WL_DRM_FORMAT_XBGR2101010: case WL_DRM_FORMAT_ARGB2101010: case WL_DRM_FORMAT_XRGB2101010: case WL_DRM_FORMAT_ARGB8888: @@ -210,10 +212,31 @@ wl_resource_set_implementation(resource, &drm_interface, data, NULL); wl_resource_post_event(resource, WL_DRM_DEVICE, drm->device_name); - wl_resource_post_event(resource, WL_DRM_FORMAT, - WL_DRM_FORMAT_ARGB2101010); - wl_resource_post_event(resource, WL_DRM_FORMAT, - WL_DRM_FORMAT_XRGB2101010); + + if (drm->callbacks.is_format_supported(drm->user_data, + WL_DRM_FORMAT_ARGB2101010)) { + wl_resource_post_event(resource, WL_DRM_FORMAT, + WL_DRM_FORMAT_ARGB2101010); + } + + if (drm->callbacks.is_format_supported(drm->user_data, + WL_DRM_FORMAT_XRGB2101010)) { + wl_resource_post_event(resource, WL_DRM_FORMAT, + WL_DRM_FORMAT_XRGB2101010); + } + + if (drm->callbacks.is_format_supported(drm->user_data, + WL_DRM_FORMAT_ABGR2101010)) { + wl_resource_post_event(resource, WL_DRM_FORMAT, + WL_DRM_FORMAT_ABGR2101010); + } + + if (drm->callbacks.is_format_supported(drm->user_data, + WL_DRM_FORMAT_XBGR2101010)) { + wl_resource_post_event(resource, WL_DRM_FORMAT, + WL_DRM_FORMAT_XBGR2101010); + } + wl_resource_post_event(resource, WL_DRM_FORMAT, WL_DRM_FORMAT_ARGB8888); wl_resource_post_event(resource, WL_DRM_FORMAT, diff -Nru mesa-18.3.3/src/egl/wayland/wayland-drm/wayland-drm.h mesa-19.0.1/src/egl/wayland/wayland-drm/wayland-drm.h --- mesa-18.3.3/src/egl/wayland/wayland-drm/wayland-drm.h 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/egl/wayland/wayland-drm/wayland-drm.h 2019-03-31 23:16:37.000000000 +0000 @@ -14,6 +14,8 @@ struct wl_drm_buffer *buffer); void (*release_buffer)(void *user_data, struct wl_drm_buffer *buffer); + + bool (*is_format_supported)(void *user_data, uint32_t format); }; diff -Nru mesa-18.3.3/src/freedreno/.dir-locals.el mesa-19.0.1/src/freedreno/.dir-locals.el --- mesa-18.3.3/src/freedreno/.dir-locals.el 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/.dir-locals.el 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,8 @@ +((prog-mode + (indent-tabs-mode . t) + (tab-width . 4) + (c-basic-offset . 4) + (c-file-style . "k&r") + (fill-column . 78) + ) + ) diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_bo.c mesa-19.0.1/src/freedreno/drm/freedreno_bo.c --- mesa-18.3.3/src/freedreno/drm/freedreno_bo.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/freedreno_bo.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,368 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "os/os_mman.h" + +#include "freedreno_drmif.h" +#include "freedreno_priv.h" + +pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER; +void bo_del(struct fd_bo *bo); + +/* set buffer name, and add to table, call w/ table_lock held: */ +static void set_name(struct fd_bo *bo, uint32_t name) +{ + bo->name = name; + /* add ourself into the handle table: */ + _mesa_hash_table_insert(bo->dev->name_table, &bo->name, bo); +} + +/* lookup a buffer, call w/ table_lock held: */ +static struct fd_bo * lookup_bo(struct hash_table *tbl, uint32_t key) +{ + struct fd_bo *bo = NULL; + struct hash_entry *entry = _mesa_hash_table_search(tbl, &key); + if (entry) { + /* found, incr refcnt and return: */ + bo = fd_bo_ref(entry->data); + + /* don't break the bucket if this bo was found in one */ + list_delinit(&bo->list); + } + return bo; +} + +/* allocate a new buffer object, call w/ table_lock held */ +static struct fd_bo * bo_from_handle(struct fd_device *dev, + uint32_t size, uint32_t handle) +{ + struct fd_bo *bo; + + bo = dev->funcs->bo_from_handle(dev, size, handle); + if (!bo) { + struct drm_gem_close req = { + .handle = handle, + }; + drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req); + return NULL; + } + bo->dev = fd_device_ref(dev); + bo->size = size; + bo->handle = handle; + p_atomic_set(&bo->refcnt, 1); + list_inithead(&bo->list); + /* add ourself into the handle table: */ + _mesa_hash_table_insert(dev->handle_table, &bo->handle, bo); + return bo; +} + +static struct fd_bo * +bo_new(struct fd_device *dev, uint32_t size, uint32_t flags, + struct fd_bo_cache *cache) +{ + struct fd_bo *bo = NULL; + uint32_t handle; + int ret; + + bo = fd_bo_cache_alloc(cache, &size, flags); + if (bo) + return bo; + + ret = dev->funcs->bo_new_handle(dev, size, flags, &handle); + if (ret) + return NULL; + + pthread_mutex_lock(&table_lock); + bo = bo_from_handle(dev, size, handle); + pthread_mutex_unlock(&table_lock); + + VG_BO_ALLOC(bo); + + return bo; +} + +struct fd_bo * +_fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags) +{ + struct fd_bo *bo = bo_new(dev, size, flags, &dev->bo_cache); + if (bo) + bo->bo_reuse = BO_CACHE; + return bo; +} + +void +_fd_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap) +{ + bo->funcs->set_name(bo, fmt, ap); +} + +/* internal function to allocate bo's that use the ringbuffer cache + * instead of the normal bo_cache. The purpose is, because cmdstream + * bo's get vmap'd on the kernel side, and that is expensive, we want + * to re-use cmdstream bo's for cmdstream and not unrelated purposes. + */ +struct fd_bo * +fd_bo_new_ring(struct fd_device *dev, uint32_t size, uint32_t flags) +{ + struct fd_bo *bo = bo_new(dev, size, flags, &dev->ring_cache); + if (bo) + bo->bo_reuse = RING_CACHE; + fd_bo_set_name(bo, "cmdstream"); + return bo; +} + +struct fd_bo * +fd_bo_from_handle(struct fd_device *dev, uint32_t handle, uint32_t size) +{ + struct fd_bo *bo = NULL; + + pthread_mutex_lock(&table_lock); + + bo = lookup_bo(dev->handle_table, handle); + if (bo) + goto out_unlock; + + bo = bo_from_handle(dev, size, handle); + + VG_BO_ALLOC(bo); + +out_unlock: + pthread_mutex_unlock(&table_lock); + + return bo; +} + +struct fd_bo * +fd_bo_from_dmabuf(struct fd_device *dev, int fd) +{ + int ret, size; + uint32_t handle; + struct fd_bo *bo; + + pthread_mutex_lock(&table_lock); + ret = drmPrimeFDToHandle(dev->fd, fd, &handle); + if (ret) { + pthread_mutex_unlock(&table_lock); + return NULL; + } + + bo = lookup_bo(dev->handle_table, handle); + if (bo) + goto out_unlock; + + /* lseek() to get bo size */ + size = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_CUR); + + bo = bo_from_handle(dev, size, handle); + + VG_BO_ALLOC(bo); + +out_unlock: + pthread_mutex_unlock(&table_lock); + + return bo; +} + +struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name) +{ + struct drm_gem_open req = { + .name = name, + }; + struct fd_bo *bo; + + pthread_mutex_lock(&table_lock); + + /* check name table first, to see if bo is already open: */ + bo = lookup_bo(dev->name_table, name); + if (bo) + goto out_unlock; + + if (drmIoctl(dev->fd, DRM_IOCTL_GEM_OPEN, &req)) { + ERROR_MSG("gem-open failed: %s", strerror(errno)); + goto out_unlock; + } + + bo = lookup_bo(dev->handle_table, req.handle); + if (bo) + goto out_unlock; + + bo = bo_from_handle(dev, req.size, req.handle); + if (bo) { + set_name(bo, name); + VG_BO_ALLOC(bo); + } + +out_unlock: + pthread_mutex_unlock(&table_lock); + + return bo; +} + +uint64_t fd_bo_get_iova(struct fd_bo *bo) +{ + if (!bo->iova) + bo->iova = bo->funcs->iova(bo); + return bo->iova; +} + +void fd_bo_put_iova(struct fd_bo *bo) +{ + /* currently a no-op */ +} + +struct fd_bo * fd_bo_ref(struct fd_bo *bo) +{ + p_atomic_inc(&bo->refcnt); + return bo; +} + +void fd_bo_del(struct fd_bo *bo) +{ + struct fd_device *dev = bo->dev; + + if (!atomic_dec_and_test(&bo->refcnt)) + return; + + pthread_mutex_lock(&table_lock); + + if ((bo->bo_reuse == BO_CACHE) && (fd_bo_cache_free(&dev->bo_cache, bo) == 0)) + goto out; + if ((bo->bo_reuse == RING_CACHE) && (fd_bo_cache_free(&dev->ring_cache, bo) == 0)) + goto out; + + bo_del(bo); + fd_device_del_locked(dev); +out: + pthread_mutex_unlock(&table_lock); +} + +/* Called under table_lock */ +void bo_del(struct fd_bo *bo) +{ + VG_BO_FREE(bo); + + if (bo->map) + os_munmap(bo->map, bo->size); + + /* TODO probably bo's in bucket list get removed from + * handle table?? + */ + + if (bo->handle) { + struct drm_gem_close req = { + .handle = bo->handle, + }; + _mesa_hash_table_remove_key(bo->dev->handle_table, &bo->handle); + if (bo->name) + _mesa_hash_table_remove_key(bo->dev->name_table, &bo->name); + drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &req); + } + + bo->funcs->destroy(bo); +} + +int fd_bo_get_name(struct fd_bo *bo, uint32_t *name) +{ + if (!bo->name) { + struct drm_gem_flink req = { + .handle = bo->handle, + }; + int ret; + + ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_FLINK, &req); + if (ret) { + return ret; + } + + pthread_mutex_lock(&table_lock); + set_name(bo, req.name); + pthread_mutex_unlock(&table_lock); + bo->bo_reuse = NO_CACHE; + } + + *name = bo->name; + + return 0; +} + +uint32_t fd_bo_handle(struct fd_bo *bo) +{ + return bo->handle; +} + +int fd_bo_dmabuf(struct fd_bo *bo) +{ + int ret, prime_fd; + + ret = drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC, + &prime_fd); + if (ret) { + ERROR_MSG("failed to get dmabuf fd: %d", ret); + return ret; + } + + bo->bo_reuse = NO_CACHE; + + return prime_fd; +} + +uint32_t fd_bo_size(struct fd_bo *bo) +{ + return bo->size; +} + +void * fd_bo_map(struct fd_bo *bo) +{ + if (!bo->map) { + uint64_t offset; + int ret; + + ret = bo->funcs->offset(bo, &offset); + if (ret) { + return NULL; + } + + bo->map = os_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + bo->dev->fd, offset); + if (bo->map == MAP_FAILED) { + ERROR_MSG("mmap failed: %s", strerror(errno)); + bo->map = NULL; + } + } + return bo->map; +} + +/* a bit odd to take the pipe as an arg, but it's a, umm, quirk of kgsl.. */ +int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op) +{ + return bo->funcs->cpu_prep(bo, pipe, op); +} + +void fd_bo_cpu_fini(struct fd_bo *bo) +{ + bo->funcs->cpu_fini(bo); +} diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_bo_cache.c mesa-19.0.1/src/freedreno/drm/freedreno_bo_cache.c --- mesa-18.3.3/src/freedreno/drm/freedreno_bo_cache.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/freedreno_bo_cache.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "freedreno_drmif.h" +#include "freedreno_priv.h" + +void bo_del(struct fd_bo *bo); +extern pthread_mutex_t table_lock; + +static void +add_bucket(struct fd_bo_cache *cache, int size) +{ + unsigned int i = cache->num_buckets; + + assert(i < ARRAY_SIZE(cache->cache_bucket)); + + list_inithead(&cache->cache_bucket[i].list); + cache->cache_bucket[i].size = size; + cache->num_buckets++; +} + +/** + * @coarse: if true, only power-of-two bucket sizes, otherwise + * fill in for a bit smoother size curve.. + */ +void +fd_bo_cache_init(struct fd_bo_cache *cache, int coarse) +{ + unsigned long size, cache_max_size = 64 * 1024 * 1024; + + /* OK, so power of two buckets was too wasteful of memory. + * Give 3 other sizes between each power of two, to hopefully + * cover things accurately enough. (The alternative is + * probably to just go for exact matching of sizes, and assume + * that for things like composited window resize the tiled + * width/height alignment and rounding of sizes to pages will + * get us useful cache hit rates anyway) + */ + add_bucket(cache, 4096); + add_bucket(cache, 4096 * 2); + if (!coarse) + add_bucket(cache, 4096 * 3); + + /* Initialize the linked lists for BO reuse cache. */ + for (size = 4 * 4096; size <= cache_max_size; size *= 2) { + add_bucket(cache, size); + if (!coarse) { + add_bucket(cache, size + size * 1 / 4); + add_bucket(cache, size + size * 2 / 4); + add_bucket(cache, size + size * 3 / 4); + } + } +} + +/* Frees older cached buffers. Called under table_lock */ +void +fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time) +{ + int i; + + if (cache->time == time) + return; + + for (i = 0; i < cache->num_buckets; i++) { + struct fd_bo_bucket *bucket = &cache->cache_bucket[i]; + struct fd_bo *bo; + + while (!LIST_IS_EMPTY(&bucket->list)) { + bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list); + + /* keep things in cache for at least 1 second: */ + if (time && ((time - bo->free_time) <= 1)) + break; + + VG_BO_OBTAIN(bo); + list_del(&bo->list); + bo_del(bo); + } + } + + cache->time = time; +} + +static struct fd_bo_bucket * get_bucket(struct fd_bo_cache *cache, uint32_t size) +{ + int i; + + /* hmm, this is what intel does, but I suppose we could calculate our + * way to the correct bucket size rather than looping.. + */ + for (i = 0; i < cache->num_buckets; i++) { + struct fd_bo_bucket *bucket = &cache->cache_bucket[i]; + if (bucket->size >= size) { + return bucket; + } + } + + return NULL; +} + +static int is_idle(struct fd_bo *bo) +{ + return fd_bo_cpu_prep(bo, NULL, + DRM_FREEDRENO_PREP_READ | + DRM_FREEDRENO_PREP_WRITE | + DRM_FREEDRENO_PREP_NOSYNC) == 0; +} + +static struct fd_bo *find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags) +{ + struct fd_bo *bo = NULL; + + /* TODO .. if we had an ALLOC_FOR_RENDER flag like intel, we could + * skip the busy check.. if it is only going to be a render target + * then we probably don't need to stall.. + * + * NOTE that intel takes ALLOC_FOR_RENDER bo's from the list tail + * (MRU, since likely to be in GPU cache), rather than head (LRU).. + */ + pthread_mutex_lock(&table_lock); + if (!LIST_IS_EMPTY(&bucket->list)) { + bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list); + /* TODO check for compatible flags? */ + if (is_idle(bo)) { + list_del(&bo->list); + } else { + bo = NULL; + } + } + pthread_mutex_unlock(&table_lock); + + return bo; +} + +/* NOTE: size is potentially rounded up to bucket size: */ +struct fd_bo * +fd_bo_cache_alloc(struct fd_bo_cache *cache, uint32_t *size, uint32_t flags) +{ + struct fd_bo *bo = NULL; + struct fd_bo_bucket *bucket; + + *size = align(*size, 4096); + bucket = get_bucket(cache, *size); + + /* see if we can be green and recycle: */ +retry: + if (bucket) { + *size = bucket->size; + bo = find_in_bucket(bucket, flags); + if (bo) { + VG_BO_OBTAIN(bo); + if (bo->funcs->madvise(bo, TRUE) <= 0) { + /* we've lost the backing pages, delete and try again: */ + pthread_mutex_lock(&table_lock); + bo_del(bo); + pthread_mutex_unlock(&table_lock); + goto retry; + } + p_atomic_set(&bo->refcnt, 1); + fd_device_ref(bo->dev); + return bo; + } + } + + return NULL; +} + +int +fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo) +{ + struct fd_bo_bucket *bucket = get_bucket(cache, bo->size); + + /* see if we can be green and recycle: */ + if (bucket) { + struct timespec time; + + bo->funcs->madvise(bo, FALSE); + + clock_gettime(CLOCK_MONOTONIC, &time); + + bo->free_time = time.tv_sec; + VG_BO_RELEASE(bo); + list_addtail(&bo->list, &bucket->list); + fd_bo_cache_cleanup(cache, time.tv_sec); + + /* bo's in the bucket cache don't have a ref and + * don't hold a ref to the dev: + */ + fd_device_del_locked(bo->dev); + + return 0; + } + + return -1; +} diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_device.c mesa-19.0.1/src/freedreno/drm/freedreno_device.c --- mesa-18.3.3/src/freedreno/drm/freedreno_device.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/freedreno_device.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include +#include +#include + +#include "freedreno_drmif.h" +#include "freedreno_priv.h" + +static pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER; + +static uint32_t +u32_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(uint32_t)); +} + +static bool +u32_equals(const void *key1, const void *key2) +{ + return *(const uint32_t *)key1 == *(const uint32_t *)key2; +} + + +struct fd_device * kgsl_device_new(int fd); +struct fd_device * msm_device_new(int fd); + +struct fd_device * fd_device_new(int fd) +{ + struct fd_device *dev; + drmVersionPtr version; + + /* figure out if we are kgsl or msm drm driver: */ + version = drmGetVersion(fd); + if (!version) { + ERROR_MSG("cannot get version: %s", strerror(errno)); + return NULL; + } + + if (!strcmp(version->name, "msm")) { + DEBUG_MSG("msm DRM device"); + if (version->version_major != 1) { + ERROR_MSG("unsupported version: %u.%u.%u", version->version_major, + version->version_minor, version->version_patchlevel); + dev = NULL; + goto out; + } + + dev = msm_device_new(fd); + dev->version = version->version_minor; +#if HAVE_FREEDRENO_KGSL + } else if (!strcmp(version->name, "kgsl")) { + DEBUG_MSG("kgsl DRM device"); + dev = kgsl_device_new(fd); +#endif + } else { + ERROR_MSG("unknown device: %s", version->name); + dev = NULL; + } + +out: + drmFreeVersion(version); + + if (!dev) + return NULL; + + p_atomic_set(&dev->refcnt, 1); + dev->fd = fd; + dev->handle_table = _mesa_hash_table_create(NULL, u32_hash, u32_equals); + dev->name_table = _mesa_hash_table_create(NULL, u32_hash, u32_equals); + fd_bo_cache_init(&dev->bo_cache, FALSE); + fd_bo_cache_init(&dev->ring_cache, TRUE); + + return dev; +} + +/* like fd_device_new() but creates it's own private dup() of the fd + * which is close()d when the device is finalized. + */ +struct fd_device * fd_device_new_dup(int fd) +{ + int dup_fd = dup(fd); + struct fd_device *dev = fd_device_new(dup_fd); + if (dev) + dev->closefd = 1; + else + close(dup_fd); + return dev; +} + +struct fd_device * fd_device_ref(struct fd_device *dev) +{ + p_atomic_inc(&dev->refcnt); + return dev; +} + +static void fd_device_del_impl(struct fd_device *dev) +{ + int close_fd = dev->closefd ? dev->fd : -1; + fd_bo_cache_cleanup(&dev->bo_cache, 0); + _mesa_hash_table_destroy(dev->handle_table, NULL); + _mesa_hash_table_destroy(dev->name_table, NULL); + dev->funcs->destroy(dev); + if (close_fd >= 0) + close(close_fd); +} + +void fd_device_del_locked(struct fd_device *dev) +{ + if (!atomic_dec_and_test(&dev->refcnt)) + return; + fd_device_del_impl(dev); +} + +void fd_device_del(struct fd_device *dev) +{ + if (!atomic_dec_and_test(&dev->refcnt)) + return; + pthread_mutex_lock(&table_lock); + fd_device_del_impl(dev); + pthread_mutex_unlock(&table_lock); +} + +int fd_device_fd(struct fd_device *dev) +{ + return dev->fd; +} + +enum fd_version fd_device_version(struct fd_device *dev) +{ + return dev->version; +} diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_drmif.h mesa-19.0.1/src/freedreno/drm/freedreno_drmif.h --- mesa-18.3.3/src/freedreno/drm/freedreno_drmif.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/freedreno_drmif.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FREEDRENO_DRMIF_H_ +#define FREEDRENO_DRMIF_H_ + +#include + +#include "util/u_debug.h" + +struct fd_bo; +struct fd_pipe; +struct fd_device; + +enum fd_pipe_id { + FD_PIPE_3D = 1, + FD_PIPE_2D = 2, + /* some devices have two 2d blocks.. not really sure how to + * use that yet, so just ignoring the 2nd 2d pipe for now + */ + FD_PIPE_MAX +}; + +enum fd_param_id { + FD_DEVICE_ID, + FD_GMEM_SIZE, + FD_GPU_ID, + FD_CHIP_ID, + FD_MAX_FREQ, + FD_TIMESTAMP, + FD_NR_RINGS, /* # of rings == # of distinct priority levels */ +}; + +/* bo flags: */ +#define DRM_FREEDRENO_GEM_TYPE_SMI 0x00000001 +#define DRM_FREEDRENO_GEM_TYPE_KMEM 0x00000002 +#define DRM_FREEDRENO_GEM_TYPE_MEM_MASK 0x0000000f +#define DRM_FREEDRENO_GEM_CACHE_NONE 0x00000000 +#define DRM_FREEDRENO_GEM_CACHE_WCOMBINE 0x00100000 +#define DRM_FREEDRENO_GEM_CACHE_WTHROUGH 0x00200000 +#define DRM_FREEDRENO_GEM_CACHE_WBACK 0x00400000 +#define DRM_FREEDRENO_GEM_CACHE_WBACKWA 0x00800000 +#define DRM_FREEDRENO_GEM_CACHE_MASK 0x00f00000 +#define DRM_FREEDRENO_GEM_GPUREADONLY 0x01000000 +#define DRM_FREEDRENO_GEM_SCANOUT 0x02000000 + +/* bo access flags: (keep aligned to MSM_PREP_x) */ +#define DRM_FREEDRENO_PREP_READ 0x01 +#define DRM_FREEDRENO_PREP_WRITE 0x02 +#define DRM_FREEDRENO_PREP_NOSYNC 0x04 + +/* device functions: + */ + +struct fd_device * fd_device_new(int fd); +struct fd_device * fd_device_new_dup(int fd); +struct fd_device * fd_device_ref(struct fd_device *dev); +void fd_device_del(struct fd_device *dev); +int fd_device_fd(struct fd_device *dev); + +enum fd_version { + FD_VERSION_MADVISE = 1, /* kernel supports madvise */ + FD_VERSION_UNLIMITED_CMDS = 1, /* submits w/ >4 cmd buffers (growable ringbuffer) */ + FD_VERSION_FENCE_FD = 2, /* submit command supports in/out fences */ + FD_VERSION_SUBMIT_QUEUES = 3, /* submit queues and multiple priority levels */ + FD_VERSION_BO_IOVA = 3, /* supports fd_bo_get/put_iova() */ + FD_VERSION_SOFTPIN = 4, /* adds softpin, bo name, and dump flag */ +}; +enum fd_version fd_device_version(struct fd_device *dev); + +/* pipe functions: + */ + +struct fd_pipe * fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id); +struct fd_pipe * fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio); +struct fd_pipe * fd_pipe_ref(struct fd_pipe *pipe); +void fd_pipe_del(struct fd_pipe *pipe); +int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param, + uint64_t *value); +int fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp); +/* timeout in nanosec */ +int fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp, + uint64_t timeout); + + +/* buffer-object functions: + */ + +struct fd_bo * _fd_bo_new(struct fd_device *dev, + uint32_t size, uint32_t flags); +void _fd_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap); + +static inline void +fd_bo_set_name(struct fd_bo *bo, const char *fmt, ...) _util_printf_format(2, 3); + +static inline void +fd_bo_set_name(struct fd_bo *bo, const char *fmt, ...) +{ +#ifndef NDEBUG + va_list ap; + va_start(ap, fmt); + _fd_bo_set_name(bo, fmt, ap); + va_end(ap); +#endif +} + +static inline struct fd_bo * +fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags, + const char *fmt, ...) _util_printf_format(4, 5); + +static inline struct fd_bo * +fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags, + const char *fmt, ...) +{ + struct fd_bo *bo = _fd_bo_new(dev, size, flags); +#ifndef NDEBUG + if (fmt) { + va_list ap; + va_start(ap, fmt); + _fd_bo_set_name(bo, fmt, ap); + va_end(ap); + } +#endif + return bo; +} + +struct fd_bo *fd_bo_from_handle(struct fd_device *dev, + uint32_t handle, uint32_t size); +struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name); +struct fd_bo * fd_bo_from_dmabuf(struct fd_device *dev, int fd); +uint64_t fd_bo_get_iova(struct fd_bo *bo); +void fd_bo_put_iova(struct fd_bo *bo); +struct fd_bo * fd_bo_ref(struct fd_bo *bo); +void fd_bo_del(struct fd_bo *bo); +int fd_bo_get_name(struct fd_bo *bo, uint32_t *name); +uint32_t fd_bo_handle(struct fd_bo *bo); +int fd_bo_dmabuf(struct fd_bo *bo); +uint32_t fd_bo_size(struct fd_bo *bo); +void * fd_bo_map(struct fd_bo *bo); +int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op); +void fd_bo_cpu_fini(struct fd_bo *bo); + +#endif /* FREEDRENO_DRMIF_H_ */ diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_pipe.c mesa-19.0.1/src/freedreno/drm/freedreno_pipe.c --- mesa-18.3.3/src/freedreno/drm/freedreno_pipe.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/freedreno_pipe.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "freedreno_drmif.h" +#include "freedreno_priv.h" + +/** + * priority of zero is highest priority, and higher numeric values are + * lower priorities + */ +struct fd_pipe * +fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio) +{ + struct fd_pipe *pipe; + uint64_t val; + + if (id > FD_PIPE_MAX) { + ERROR_MSG("invalid pipe id: %d", id); + return NULL; + } + + if ((prio != 1) && (fd_device_version(dev) < FD_VERSION_SUBMIT_QUEUES)) { + ERROR_MSG("invalid priority!"); + return NULL; + } + + pipe = dev->funcs->pipe_new(dev, id, prio); + if (!pipe) { + ERROR_MSG("allocation failed"); + return NULL; + } + + pipe->dev = dev; + pipe->id = id; + p_atomic_set(&pipe->refcnt, 1); + + fd_pipe_get_param(pipe, FD_GPU_ID, &val); + pipe->gpu_id = val; + + return pipe; +} + +struct fd_pipe * +fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id) +{ + return fd_pipe_new2(dev, id, 1); +} + +struct fd_pipe * fd_pipe_ref(struct fd_pipe *pipe) +{ + p_atomic_inc(&pipe->refcnt); + return pipe; +} + +void fd_pipe_del(struct fd_pipe *pipe) +{ + if (!atomic_dec_and_test(&pipe->refcnt)) + return; + pipe->funcs->destroy(pipe); +} + +int fd_pipe_get_param(struct fd_pipe *pipe, + enum fd_param_id param, uint64_t *value) +{ + return pipe->funcs->get_param(pipe, param, value); +} + +int fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp) +{ + return fd_pipe_wait_timeout(pipe, timestamp, ~0); +} + +int fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp, + uint64_t timeout) +{ + return pipe->funcs->wait(pipe, timestamp, timeout); +} diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_priv.h mesa-19.0.1/src/freedreno/drm/freedreno_priv.h --- mesa-18.3.3/src/freedreno/drm/freedreno_priv.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/freedreno_priv.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FREEDRENO_PRIV_H_ +#define FREEDRENO_PRIV_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "util/hash_table.h" +#include "util/list.h" +#include "util/u_debug.h" +#include "util/u_atomic.h" +#include "util/u_math.h" +#include "util/u_debug.h" + +#include "freedreno_drmif.h" +#include "freedreno_ringbuffer.h" + +#define atomic_dec_and_test(x) (__sync_add_and_fetch (x, -1) == 0) + +struct fd_device_funcs { + int (*bo_new_handle)(struct fd_device *dev, uint32_t size, + uint32_t flags, uint32_t *handle); + struct fd_bo * (*bo_from_handle)(struct fd_device *dev, + uint32_t size, uint32_t handle); + struct fd_pipe * (*pipe_new)(struct fd_device *dev, enum fd_pipe_id id, + unsigned prio); + void (*destroy)(struct fd_device *dev); +}; + +struct fd_bo_bucket { + uint32_t size; + struct list_head list; +}; + +struct fd_bo_cache { + struct fd_bo_bucket cache_bucket[14 * 4]; + int num_buckets; + time_t time; +}; + +struct fd_device { + int fd; + enum fd_version version; + int32_t refcnt; + + /* tables to keep track of bo's, to avoid "evil-twin" fd_bo objects: + * + * handle_table: maps handle to fd_bo + * name_table: maps flink name to fd_bo + * + * We end up needing two tables, because DRM_IOCTL_GEM_OPEN always + * returns a new handle. So we need to figure out if the bo is already + * open in the process first, before calling gem-open. + */ + struct hash_table *handle_table, *name_table; + + const struct fd_device_funcs *funcs; + + struct fd_bo_cache bo_cache; + struct fd_bo_cache ring_cache; + + int closefd; /* call close(fd) upon destruction */ + + /* just for valgrind: */ + int bo_size; +}; + +void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse); +void fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time); +struct fd_bo * fd_bo_cache_alloc(struct fd_bo_cache *cache, + uint32_t *size, uint32_t flags); +int fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo); + +/* for where @table_lock is already held: */ +void fd_device_del_locked(struct fd_device *dev); + +struct fd_pipe_funcs { + struct fd_ringbuffer * (*ringbuffer_new_object)(struct fd_pipe *pipe, uint32_t size); + struct fd_submit * (*submit_new)(struct fd_pipe *pipe); + int (*get_param)(struct fd_pipe *pipe, enum fd_param_id param, uint64_t *value); + int (*wait)(struct fd_pipe *pipe, uint32_t timestamp, uint64_t timeout); + void (*destroy)(struct fd_pipe *pipe); +}; + +struct fd_pipe { + struct fd_device *dev; + enum fd_pipe_id id; + uint32_t gpu_id; + int32_t refcnt; + const struct fd_pipe_funcs *funcs; +}; + +struct fd_submit_funcs { + struct fd_ringbuffer * (*new_ringbuffer)(struct fd_submit *submit, + uint32_t size, enum fd_ringbuffer_flags flags); + int (*flush)(struct fd_submit *submit, int in_fence_fd, + int *out_fence_fd, uint32_t *out_fence); + void (*destroy)(struct fd_submit *submit); +}; + +struct fd_submit { + struct fd_pipe *pipe; + const struct fd_submit_funcs *funcs; +}; + +struct fd_ringbuffer_funcs { + void (*grow)(struct fd_ringbuffer *ring, uint32_t size); + void (*emit_reloc)(struct fd_ringbuffer *ring, + const struct fd_reloc *reloc); + uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx); + uint32_t (*cmd_count)(struct fd_ringbuffer *ring); + void (*destroy)(struct fd_ringbuffer *ring); +}; + +struct fd_bo_funcs { + int (*offset)(struct fd_bo *bo, uint64_t *offset); + int (*cpu_prep)(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op); + void (*cpu_fini)(struct fd_bo *bo); + int (*madvise)(struct fd_bo *bo, int willneed); + uint64_t (*iova)(struct fd_bo *bo); + void (*set_name)(struct fd_bo *bo, const char *fmt, va_list ap); + void (*destroy)(struct fd_bo *bo); +}; + +struct fd_bo { + struct fd_device *dev; + uint32_t size; + uint32_t handle; + uint32_t name; + int32_t refcnt; + uint64_t iova; + void *map; + const struct fd_bo_funcs *funcs; + + enum { + NO_CACHE = 0, + BO_CACHE = 1, + RING_CACHE = 2, + } bo_reuse; + + struct list_head list; /* bucket-list entry */ + time_t free_time; /* time when added to bucket-list */ +}; + +struct fd_bo *fd_bo_new_ring(struct fd_device *dev, + uint32_t size, uint32_t flags); + +#define enable_debug 0 /* TODO make dynamic */ + +#define INFO_MSG(fmt, ...) \ + do { debug_printf("[I] "fmt " (%s:%d)\n", \ + ##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0) +#define DEBUG_MSG(fmt, ...) \ + do if (enable_debug) { debug_printf("[D] "fmt " (%s:%d)\n", \ + ##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0) +#define WARN_MSG(fmt, ...) \ + do { debug_printf("[W] "fmt " (%s:%d)\n", \ + ##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0) +#define ERROR_MSG(fmt, ...) \ + do { debug_printf("[E] " fmt " (%s:%d)\n", \ + ##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0) + +#define U642VOID(x) ((void *)(unsigned long)(x)) +#define VOID2U64(x) ((uint64_t)(unsigned long)(x)) + +#if HAVE_VALGRIND +# include + +/* + * For tracking the backing memory (if valgrind enabled, we force a mmap + * for the purposes of tracking) + */ +static inline void VG_BO_ALLOC(struct fd_bo *bo) +{ + if (bo && RUNNING_ON_VALGRIND) { + VALGRIND_MALLOCLIKE_BLOCK(fd_bo_map(bo), bo->size, 0, 1); + } +} + +static inline void VG_BO_FREE(struct fd_bo *bo) +{ + VALGRIND_FREELIKE_BLOCK(bo->map, 0); +} + +/* + * For tracking bo structs that are in the buffer-cache, so that valgrind + * doesn't attribute ownership to the first one to allocate the recycled + * bo. + * + * Note that the list_head in fd_bo is used to track the buffers in cache + * so disable error reporting on the range while they are in cache so + * valgrind doesn't squawk about list traversal. + * + */ +static inline void VG_BO_RELEASE(struct fd_bo *bo) +{ + if (RUNNING_ON_VALGRIND) { + VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size); + VALGRIND_MAKE_MEM_NOACCESS(bo, bo->dev->bo_size); + VALGRIND_FREELIKE_BLOCK(bo->map, 0); + } +} +static inline void VG_BO_OBTAIN(struct fd_bo *bo) +{ + if (RUNNING_ON_VALGRIND) { + VALGRIND_MAKE_MEM_DEFINED(bo, bo->dev->bo_size); + VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size); + VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1); + } +} +#else +static inline void VG_BO_ALLOC(struct fd_bo *bo) {} +static inline void VG_BO_FREE(struct fd_bo *bo) {} +static inline void VG_BO_RELEASE(struct fd_bo *bo) {} +static inline void VG_BO_OBTAIN(struct fd_bo *bo) {} +#endif + +#define FD_DEFINE_CAST(parent, child) \ +static inline struct child * to_ ## child (struct parent *x) \ +{ return (struct child *)x; } + + +#endif /* FREEDRENO_PRIV_H_ */ diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_ringbuffer.c mesa-19.0.1/src/freedreno/drm/freedreno_ringbuffer.c --- mesa-18.3.3/src/freedreno/drm/freedreno_ringbuffer.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/freedreno_ringbuffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include + +#include "freedreno_drmif.h" +#include "freedreno_ringbuffer.h" +#include "freedreno_priv.h" + +struct fd_submit * +fd_submit_new(struct fd_pipe *pipe) +{ + return pipe->funcs->submit_new(pipe); +} + +void +fd_submit_del(struct fd_submit *submit) +{ + return submit->funcs->destroy(submit); +} + +int +fd_submit_flush(struct fd_submit *submit, int in_fence_fd, int *out_fence_fd, + uint32_t *out_fence) +{ + return submit->funcs->flush(submit, in_fence_fd, out_fence_fd, out_fence); +} + +struct fd_ringbuffer * +fd_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size, + enum fd_ringbuffer_flags flags) +{ + debug_assert(!(flags & _FD_RINGBUFFER_OBJECT)); + if (flags & FD_RINGBUFFER_STREAMING) { + debug_assert(!(flags & FD_RINGBUFFER_GROWABLE)); + debug_assert(!(flags & FD_RINGBUFFER_PRIMARY)); + } + return submit->funcs->new_ringbuffer(submit, size, flags); +} + +struct fd_ringbuffer * +fd_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size) +{ + return pipe->funcs->ringbuffer_new_object(pipe, size); +} + +void fd_ringbuffer_del(struct fd_ringbuffer *ring) +{ + if (!atomic_dec_and_test(&ring->refcnt)) + return; + + ring->funcs->destroy(ring); +} + +struct fd_ringbuffer * +fd_ringbuffer_ref(struct fd_ringbuffer *ring) +{ + p_atomic_inc(&ring->refcnt); + return ring; +} + +void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords) +{ + assert(ring->funcs->grow); /* unsupported on kgsl */ + + /* there is an upper bound on IB size, which appears to be 0x100000 */ + if (ring->size < 0x100000) + ring->size *= 2; + + ring->funcs->grow(ring, ring->size); +} + +void fd_ringbuffer_reloc(struct fd_ringbuffer *ring, + const struct fd_reloc *reloc) +{ + ring->funcs->emit_reloc(ring, reloc); +} + +uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring) +{ + if (!ring->funcs->cmd_count) + return 1; + return ring->funcs->cmd_count(ring); +} + +uint32_t +fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx) +{ + return ring->funcs->emit_reloc_ring(ring, target, cmd_idx); +} diff -Nru mesa-18.3.3/src/freedreno/drm/freedreno_ringbuffer.h mesa-19.0.1/src/freedreno/drm/freedreno_ringbuffer.h --- mesa-18.3.3/src/freedreno/drm/freedreno_ringbuffer.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/freedreno_ringbuffer.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FREEDRENO_RINGBUFFER_H_ +#define FREEDRENO_RINGBUFFER_H_ + +#include "util/u_debug.h" + +#include "freedreno_drmif.h" + +struct fd_submit; +struct fd_ringbuffer; + +enum fd_ringbuffer_flags { + + /* Primary ringbuffer for a submit, ie. an IB1 level rb + * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH + * packets. + */ + FD_RINGBUFFER_PRIMARY = 0x1, + + /* Hint that the stateobj will be used for streaming state + * that is used once or a few times and then discarded. + * + * For sub-allocation, non streaming stateobj's should be + * sub-allocated from a page size buffer, so one long lived + * state obj doesn't prevent other pages from being freed. + * (Ie. it would be no worse than allocating a page sized + * bo for each small non-streaming stateobj). + * + * But streaming stateobj's could be sub-allocated from a + * larger buffer to reduce the alloc/del overhead. + */ + FD_RINGBUFFER_STREAMING = 0x2, + + /* Indicates that "growable" cmdstream can be used, + * consisting of multiple physical cmdstream buffers + */ + FD_RINGBUFFER_GROWABLE = 0x4, + + /* Internal use only: */ + _FD_RINGBUFFER_OBJECT = 0x8, +}; + +/* A submit object manages/tracks all the state buildup for a "submit" + * ioctl to the kernel. Additionally, with the exception of long-lived + * non-STREAMING stateobj rb's, rb's are allocated from the submit. + */ +struct fd_submit * fd_submit_new(struct fd_pipe *pipe); + +/* NOTE: all ringbuffer's create from the submit should be unref'd + * before destroying the submit. + */ +void fd_submit_del(struct fd_submit *submit); + +/* Allocate a new rb from the submit. */ +struct fd_ringbuffer * fd_submit_new_ringbuffer(struct fd_submit *submit, + uint32_t size, enum fd_ringbuffer_flags flags); + +/* in_fence_fd: -1 for no in-fence, else fence fd + * out_fence_fd: NULL for no output-fence requested, else ptr to return out-fence + */ +int fd_submit_flush(struct fd_submit *submit, + int in_fence_fd, int *out_fence_fd, + uint32_t *out_fence); + +struct fd_ringbuffer_funcs; + +/* the ringbuffer object is not opaque so that OUT_RING() type stuff + * can be inlined. Note that users should not make assumptions about + * the size of this struct. + */ +struct fd_ringbuffer { + uint32_t *cur, *end, *start; + const struct fd_ringbuffer_funcs *funcs; + +// size or end coudl probably go away + int size; + int32_t refcnt; + enum fd_ringbuffer_flags flags; +}; + +/* Allocate a new long-lived state object, not associated with + * a submit: + */ +struct fd_ringbuffer * fd_ringbuffer_new_object(struct fd_pipe *pipe, + uint32_t size); + +struct fd_ringbuffer *fd_ringbuffer_ref(struct fd_ringbuffer *ring); +void fd_ringbuffer_del(struct fd_ringbuffer *ring); + +void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords); + +static inline void fd_ringbuffer_emit(struct fd_ringbuffer *ring, + uint32_t data) +{ + (*ring->cur++) = data; +} + +struct fd_reloc { + struct fd_bo *bo; +#define FD_RELOC_READ 0x0001 +#define FD_RELOC_WRITE 0x0002 +#define FD_RELOC_DUMP 0x0004 + uint32_t flags; + uint32_t offset; + uint32_t or; + int32_t shift; + uint32_t orhi; /* used for a5xx+ */ +}; + +/* NOTE: relocs are 2 dwords on a5xx+ */ + +void fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc); +uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring); +uint32_t fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx); + +static inline uint32_t +offset_bytes(void *end, void *start) +{ + return ((char *)end) - ((char *)start); +} + +static inline uint32_t +fd_ringbuffer_size(struct fd_ringbuffer *ring) +{ + /* only really needed for stateobj ringbuffers, and won't really + * do what you expect for growable rb's.. so lets just restrict + * this to stateobj's for now: + */ + debug_assert(!(ring->flags & FD_RINGBUFFER_GROWABLE)); + return offset_bytes(ring->cur, ring->start); +} + + +#endif /* FREEDRENO_RINGBUFFER_H_ */ diff -Nru mesa-18.3.3/src/freedreno/drm/meson.build mesa-19.0.1/src/freedreno/drm/meson.build --- mesa-18.3.3/src/freedreno/drm/meson.build 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,54 @@ +# Copyright © 2018 Rob Clark + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +libfreedreno_drm_files = files( + 'freedreno_bo.c', + 'freedreno_bo_cache.c', + 'freedreno_device.c', + 'freedreno_drmif.h', + 'freedreno_pipe.c', + 'freedreno_priv.h', + 'freedreno_ringbuffer.c', + 'freedreno_ringbuffer.h', + 'msm_bo.c', + 'msm_device.c', + 'msm_drm.h', + 'msm_pipe.c', + 'msm_priv.h', + 'msm_ringbuffer.c', + 'msm_ringbuffer_sp.c', +) + +libfreedreno_drm = static_library( + 'freedreno_drm', + libfreedreno_drm_files, + include_directories : [ + inc_freedreno, + inc_common, + ], + c_args : [c_vis_args, no_override_init_args], + cpp_args : [cpp_vis_args], + dependencies : [ + dep_libdrm, + dep_valgrind, + ], + build_by_default : false, +) + diff -Nru mesa-18.3.3/src/freedreno/drm/msm_bo.c mesa-19.0.1/src/freedreno/drm/msm_bo.c --- mesa-18.3.3/src/freedreno/drm/msm_bo.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/msm_bo.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "msm_priv.h" + +static int bo_allocate(struct msm_bo *msm_bo) +{ + struct fd_bo *bo = &msm_bo->base; + if (!msm_bo->offset) { + struct drm_msm_gem_info req = { + .handle = bo->handle, + .info = MSM_INFO_GET_OFFSET, + }; + int ret; + + /* if the buffer is already backed by pages then this + * doesn't actually do anything (other than giving us + * the offset) + */ + ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, + &req, sizeof(req)); + if (ret) { + ERROR_MSG("alloc failed: %s", strerror(errno)); + return ret; + } + + msm_bo->offset = req.value; + } + + return 0; +} + +static int msm_bo_offset(struct fd_bo *bo, uint64_t *offset) +{ + struct msm_bo *msm_bo = to_msm_bo(bo); + int ret = bo_allocate(msm_bo); + if (ret) + return ret; + *offset = msm_bo->offset; + return 0; +} + +static int msm_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op) +{ + struct drm_msm_gem_cpu_prep req = { + .handle = bo->handle, + .op = op, + }; + + get_abs_timeout(&req.timeout, 5000000000); + + return drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_PREP, &req, sizeof(req)); +} + +static void msm_bo_cpu_fini(struct fd_bo *bo) +{ + struct drm_msm_gem_cpu_fini req = { + .handle = bo->handle, + }; + + drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_FINI, &req, sizeof(req)); +} + +static int msm_bo_madvise(struct fd_bo *bo, int willneed) +{ + struct drm_msm_gem_madvise req = { + .handle = bo->handle, + .madv = willneed ? MSM_MADV_WILLNEED : MSM_MADV_DONTNEED, + }; + int ret; + + /* older kernels do not support this: */ + if (bo->dev->version < FD_VERSION_MADVISE) + return willneed; + + ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_MADVISE, &req, sizeof(req)); + if (ret) + return ret; + + return req.retained; +} + +static uint64_t msm_bo_iova(struct fd_bo *bo) +{ + struct drm_msm_gem_info req = { + .handle = bo->handle, + .info = MSM_INFO_GET_IOVA, + }; + int ret; + + ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); + debug_assert(ret == 0); + + return req.value; +} + +static void msm_bo_set_name(struct fd_bo *bo, const char *fmt, va_list ap) +{ + struct drm_msm_gem_info req = { + .handle = bo->handle, + .info = MSM_INFO_SET_NAME, + }; + char buf[32]; + int sz; + + if (bo->dev->version < FD_VERSION_SOFTPIN) + return; + + sz = vsnprintf(buf, sizeof(buf), fmt, ap); + + req.value = VOID2U64(buf); + req.len = MIN2(sz, sizeof(buf)); + + drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); +} + +static void msm_bo_destroy(struct fd_bo *bo) +{ + struct msm_bo *msm_bo = to_msm_bo(bo); + free(msm_bo); +} + +static const struct fd_bo_funcs funcs = { + .offset = msm_bo_offset, + .cpu_prep = msm_bo_cpu_prep, + .cpu_fini = msm_bo_cpu_fini, + .madvise = msm_bo_madvise, + .iova = msm_bo_iova, + .set_name = msm_bo_set_name, + .destroy = msm_bo_destroy, +}; + +/* allocate a buffer handle: */ +int msm_bo_new_handle(struct fd_device *dev, + uint32_t size, uint32_t flags, uint32_t *handle) +{ + struct drm_msm_gem_new req = { + .size = size, + .flags = MSM_BO_WC, // TODO figure out proper flags.. + }; + int ret; + + if (flags & DRM_FREEDRENO_GEM_SCANOUT) + req.flags |= MSM_BO_SCANOUT; + + if (flags & DRM_FREEDRENO_GEM_GPUREADONLY) + req.flags |= MSM_BO_GPU_READONLY; + + ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, + &req, sizeof(req)); + if (ret) + return ret; + + *handle = req.handle; + + return 0; +} + +/* allocate a new buffer object */ +struct fd_bo * msm_bo_from_handle(struct fd_device *dev, + uint32_t size, uint32_t handle) +{ + struct msm_bo *msm_bo; + struct fd_bo *bo; + + msm_bo = calloc(1, sizeof(*msm_bo)); + if (!msm_bo) + return NULL; + + bo = &msm_bo->base; + bo->funcs = &funcs; + + return bo; +} diff -Nru mesa-18.3.3/src/freedreno/drm/msm_device.c mesa-19.0.1/src/freedreno/drm/msm_device.c --- mesa-18.3.3/src/freedreno/drm/msm_device.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/msm_device.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include +#include +#include + +#include "msm_priv.h" + +static void msm_device_destroy(struct fd_device *dev) +{ + struct msm_device *msm_dev = to_msm_device(dev); + free(msm_dev); +} + +static const struct fd_device_funcs funcs = { + .bo_new_handle = msm_bo_new_handle, + .bo_from_handle = msm_bo_from_handle, + .pipe_new = msm_pipe_new, + .destroy = msm_device_destroy, +}; + +struct fd_device * msm_device_new(int fd) +{ + struct msm_device *msm_dev; + struct fd_device *dev; + + msm_dev = calloc(1, sizeof(*msm_dev)); + if (!msm_dev) + return NULL; + + dev = &msm_dev->base; + dev->funcs = &funcs; + + dev->bo_size = sizeof(struct msm_bo); + + return dev; +} diff -Nru mesa-18.3.3/src/freedreno/drm/msm_drm.h mesa-19.0.1/src/freedreno/drm/msm_drm.h --- mesa-18.3.3/src/freedreno/drm/msm_drm.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/msm_drm.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,321 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MSM_DRM_H__ +#define __MSM_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints: + * 1) Do not use pointers, use __u64 instead for 32 bit / 64 bit + * user/kernel compatibility + * 2) Keep fields aligned to their size + * 3) Because of how drm_ioctl() works, we can add new fields at + * the end of an ioctl if some care is taken: drm_ioctl() will + * zero out the new fields at the tail of the ioctl, so a zero + * value should have a backwards compatible meaning. And for + * output params, userspace won't see the newly added output + * fields.. so that has to be somehow ok. + */ + +#define MSM_PIPE_NONE 0x00 +#define MSM_PIPE_2D0 0x01 +#define MSM_PIPE_2D1 0x02 +#define MSM_PIPE_3D0 0x10 + +/* The pipe-id just uses the lower bits, so can be OR'd with flags in + * the upper 16 bits (which could be extended further, if needed, maybe + * we extend/overload the pipe-id some day to deal with multiple rings, + * but even then I don't think we need the full lower 16 bits). + */ +#define MSM_PIPE_ID_MASK 0xffff +#define MSM_PIPE_ID(x) ((x) & MSM_PIPE_ID_MASK) +#define MSM_PIPE_FLAGS(x) ((x) & ~MSM_PIPE_ID_MASK) + +/* timeouts are specified in clock-monotonic absolute times (to simplify + * restarting interrupted ioctls). The following struct is logically the + * same as 'struct timespec' but 32/64b ABI safe. + */ +struct drm_msm_timespec { + __s64 tv_sec; /* seconds */ + __s64 tv_nsec; /* nanoseconds */ +}; + +#define MSM_PARAM_GPU_ID 0x01 +#define MSM_PARAM_GMEM_SIZE 0x02 +#define MSM_PARAM_CHIP_ID 0x03 +#define MSM_PARAM_MAX_FREQ 0x04 +#define MSM_PARAM_TIMESTAMP 0x05 +#define MSM_PARAM_GMEM_BASE 0x06 +#define MSM_PARAM_NR_RINGS 0x07 + +struct drm_msm_param { + __u32 pipe; /* in, MSM_PIPE_x */ + __u32 param; /* in, MSM_PARAM_x */ + __u64 value; /* out (get_param) or in (set_param) */ +}; + +/* + * GEM buffers: + */ + +#define MSM_BO_SCANOUT 0x00000001 /* scanout capable */ +#define MSM_BO_GPU_READONLY 0x00000002 +#define MSM_BO_CACHE_MASK 0x000f0000 +/* cache modes */ +#define MSM_BO_CACHED 0x00010000 +#define MSM_BO_WC 0x00020000 +#define MSM_BO_UNCACHED 0x00040000 + +#define MSM_BO_FLAGS (MSM_BO_SCANOUT | \ + MSM_BO_GPU_READONLY | \ + MSM_BO_CACHED | \ + MSM_BO_WC | \ + MSM_BO_UNCACHED) + +struct drm_msm_gem_new { + __u64 size; /* in */ + __u32 flags; /* in, mask of MSM_BO_x */ + __u32 handle; /* out */ +}; + +/* Get or set GEM buffer info. The requested value can be passed + * directly in 'value', or for data larger than 64b 'value' is a + * pointer to userspace buffer, with 'len' specifying the number of + * bytes copied into that buffer. For info returned by pointer, + * calling the GEM_INFO ioctl with null 'value' will return the + * required buffer size in 'len' + */ +#define MSM_INFO_GET_OFFSET 0x00 /* get mmap() offset, returned by value */ +#define MSM_INFO_GET_IOVA 0x01 /* get iova, returned by value */ +#define MSM_INFO_SET_NAME 0x02 /* set the debug name (by pointer) */ +#define MSM_INFO_GET_NAME 0x03 /* get debug name, returned by pointer */ + +struct drm_msm_gem_info { + __u32 handle; /* in */ + __u32 info; /* in - one of MSM_INFO_* */ + __u64 value; /* in or out */ + __u32 len; /* in or out */ + __u32 pad; +}; + +#define MSM_PREP_READ 0x01 +#define MSM_PREP_WRITE 0x02 +#define MSM_PREP_NOSYNC 0x04 + +#define MSM_PREP_FLAGS (MSM_PREP_READ | MSM_PREP_WRITE | MSM_PREP_NOSYNC) + +struct drm_msm_gem_cpu_prep { + __u32 handle; /* in */ + __u32 op; /* in, mask of MSM_PREP_x */ + struct drm_msm_timespec timeout; /* in */ +}; + +struct drm_msm_gem_cpu_fini { + __u32 handle; /* in */ +}; + +/* + * Cmdstream Submission: + */ + +/* The value written into the cmdstream is logically: + * + * ((relocbuf->gpuaddr + reloc_offset) << shift) | or + * + * When we have GPU's w/ >32bit ptrs, it should be possible to deal + * with this by emit'ing two reloc entries with appropriate shift + * values. Or a new MSM_SUBMIT_CMD_x type would also be an option. + * + * NOTE that reloc's must be sorted by order of increasing submit_offset, + * otherwise EINVAL. + */ +struct drm_msm_gem_submit_reloc { + __u32 submit_offset; /* in, offset from submit_bo */ + __u32 or; /* in, value OR'd with result */ + __s32 shift; /* in, amount of left shift (can be negative) */ + __u32 reloc_idx; /* in, index of reloc_bo buffer */ + __u64 reloc_offset; /* in, offset from start of reloc_bo */ +}; + +/* submit-types: + * BUF - this cmd buffer is executed normally. + * IB_TARGET_BUF - this cmd buffer is an IB target. Reloc's are + * processed normally, but the kernel does not setup an IB to + * this buffer in the first-level ringbuffer + * CTX_RESTORE_BUF - only executed if there has been a GPU context + * switch since the last SUBMIT ioctl + */ +#define MSM_SUBMIT_CMD_BUF 0x0001 +#define MSM_SUBMIT_CMD_IB_TARGET_BUF 0x0002 +#define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003 +struct drm_msm_gem_submit_cmd { + __u32 type; /* in, one of MSM_SUBMIT_CMD_x */ + __u32 submit_idx; /* in, index of submit_bo cmdstream buffer */ + __u32 submit_offset; /* in, offset into submit_bo */ + __u32 size; /* in, cmdstream size */ + __u32 pad; + __u32 nr_relocs; /* in, number of submit_reloc's */ + __u64 relocs; /* in, ptr to array of submit_reloc's */ +}; + +/* Each buffer referenced elsewhere in the cmdstream submit (ie. the + * cmdstream buffer(s) themselves or reloc entries) has one (and only + * one) entry in the submit->bos[] table. + * + * As a optimization, the current buffer (gpu virtual address) can be + * passed back through the 'presumed' field. If on a subsequent reloc, + * userspace passes back a 'presumed' address that is still valid, + * then patching the cmdstream for this entry is skipped. This can + * avoid kernel needing to map/access the cmdstream bo in the common + * case. + */ +#define MSM_SUBMIT_BO_READ 0x0001 +#define MSM_SUBMIT_BO_WRITE 0x0002 +#define MSM_SUBMIT_BO_DUMP 0x0004 + +#define MSM_SUBMIT_BO_FLAGS (MSM_SUBMIT_BO_READ | \ + MSM_SUBMIT_BO_WRITE | \ + MSM_SUBMIT_BO_DUMP) + +struct drm_msm_gem_submit_bo { + __u32 flags; /* in, mask of MSM_SUBMIT_BO_x */ + __u32 handle; /* in, GEM handle */ + __u64 presumed; /* in/out, presumed buffer address */ +}; + +/* Valid submit ioctl flags: */ +#define MSM_SUBMIT_NO_IMPLICIT 0x80000000 /* disable implicit sync */ +#define MSM_SUBMIT_FENCE_FD_IN 0x40000000 /* enable input fence_fd */ +#define MSM_SUBMIT_FENCE_FD_OUT 0x20000000 /* enable output fence_fd */ +#define MSM_SUBMIT_SUDO 0x10000000 /* run submitted cmds from RB */ +#define MSM_SUBMIT_FLAGS ( \ + MSM_SUBMIT_NO_IMPLICIT | \ + MSM_SUBMIT_FENCE_FD_IN | \ + MSM_SUBMIT_FENCE_FD_OUT | \ + MSM_SUBMIT_SUDO | \ + 0) + +/* Each cmdstream submit consists of a table of buffers involved, and + * one or more cmdstream buffers. This allows for conditional execution + * (context-restore), and IB buffers needed for per tile/bin draw cmds. + */ +struct drm_msm_gem_submit { + __u32 flags; /* MSM_PIPE_x | MSM_SUBMIT_x */ + __u32 fence; /* out */ + __u32 nr_bos; /* in, number of submit_bo's */ + __u32 nr_cmds; /* in, number of submit_cmd's */ + __u64 bos; /* in, ptr to array of submit_bo's */ + __u64 cmds; /* in, ptr to array of submit_cmd's */ + __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ + __u32 queueid; /* in, submitqueue id */ +}; + +/* The normal way to synchronize with the GPU is just to CPU_PREP on + * a buffer if you need to access it from the CPU (other cmdstream + * submission from same or other contexts, PAGE_FLIP ioctl, etc, all + * handle the required synchronization under the hood). This ioctl + * mainly just exists as a way to implement the gallium pipe_fence + * APIs without requiring a dummy bo to synchronize on. + */ +struct drm_msm_wait_fence { + __u32 fence; /* in */ + __u32 pad; + struct drm_msm_timespec timeout; /* in */ + __u32 queueid; /* in, submitqueue id */ +}; + +/* madvise provides a way to tell the kernel in case a buffers contents + * can be discarded under memory pressure, which is useful for userspace + * bo cache where we want to optimistically hold on to buffer allocate + * and potential mmap, but allow the pages to be discarded under memory + * pressure. + * + * Typical usage would involve madvise(DONTNEED) when buffer enters BO + * cache, and madvise(WILLNEED) if trying to recycle buffer from BO cache. + * In the WILLNEED case, 'retained' indicates to userspace whether the + * backing pages still exist. + */ +#define MSM_MADV_WILLNEED 0 /* backing pages are needed, status returned in 'retained' */ +#define MSM_MADV_DONTNEED 1 /* backing pages not needed */ +#define __MSM_MADV_PURGED 2 /* internal state */ + +struct drm_msm_gem_madvise { + __u32 handle; /* in, GEM handle */ + __u32 madv; /* in, MSM_MADV_x */ + __u32 retained; /* out, whether backing store still exists */ +}; + +/* + * Draw queues allow the user to set specific submission parameter. Command + * submissions specify a specific submitqueue to use. ID 0 is reserved for + * backwards compatibility as a "default" submitqueue + */ + +#define MSM_SUBMITQUEUE_FLAGS (0) + +struct drm_msm_submitqueue { + __u32 flags; /* in, MSM_SUBMITQUEUE_x */ + __u32 prio; /* in, Priority level */ + __u32 id; /* out, identifier */ +}; + +#define DRM_MSM_GET_PARAM 0x00 +/* placeholder: +#define DRM_MSM_SET_PARAM 0x01 + */ +#define DRM_MSM_GEM_NEW 0x02 +#define DRM_MSM_GEM_INFO 0x03 +#define DRM_MSM_GEM_CPU_PREP 0x04 +#define DRM_MSM_GEM_CPU_FINI 0x05 +#define DRM_MSM_GEM_SUBMIT 0x06 +#define DRM_MSM_WAIT_FENCE 0x07 +#define DRM_MSM_GEM_MADVISE 0x08 +/* placeholder: +#define DRM_MSM_GEM_SVM_NEW 0x09 + */ +#define DRM_MSM_SUBMITQUEUE_NEW 0x0A +#define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B + +#define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) +#define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new) +#define DRM_IOCTL_MSM_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_INFO, struct drm_msm_gem_info) +#define DRM_IOCTL_MSM_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_PREP, struct drm_msm_gem_cpu_prep) +#define DRM_IOCTL_MSM_GEM_CPU_FINI DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_FINI, struct drm_msm_gem_cpu_fini) +#define DRM_IOCTL_MSM_GEM_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_SUBMIT, struct drm_msm_gem_submit) +#define DRM_IOCTL_MSM_WAIT_FENCE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, struct drm_msm_wait_fence) +#define DRM_IOCTL_MSM_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, struct drm_msm_gem_madvise) +#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue) +#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32) + +#if defined(__cplusplus) +} +#endif + +#endif /* __MSM_DRM_H__ */ diff -Nru mesa-18.3.3/src/freedreno/drm/msm_pipe.c mesa-19.0.1/src/freedreno/drm/msm_pipe.c --- mesa-18.3.3/src/freedreno/drm/msm_pipe.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/msm_pipe.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/slab.h" + +#include "msm_priv.h" + +static int query_param(struct fd_pipe *pipe, uint32_t param, + uint64_t *value) +{ + struct msm_pipe *msm_pipe = to_msm_pipe(pipe); + struct drm_msm_param req = { + .pipe = msm_pipe->pipe, + .param = param, + }; + int ret; + + ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_GET_PARAM, + &req, sizeof(req)); + if (ret) + return ret; + + *value = req.value; + + return 0; +} + +static int msm_pipe_get_param(struct fd_pipe *pipe, + enum fd_param_id param, uint64_t *value) +{ + struct msm_pipe *msm_pipe = to_msm_pipe(pipe); + switch(param) { + case FD_DEVICE_ID: // XXX probably get rid of this.. + case FD_GPU_ID: + *value = msm_pipe->gpu_id; + return 0; + case FD_GMEM_SIZE: + *value = msm_pipe->gmem; + return 0; + case FD_CHIP_ID: + *value = msm_pipe->chip_id; + return 0; + case FD_MAX_FREQ: + return query_param(pipe, MSM_PARAM_MAX_FREQ, value); + case FD_TIMESTAMP: + return query_param(pipe, MSM_PARAM_TIMESTAMP, value); + case FD_NR_RINGS: + return query_param(pipe, MSM_PARAM_NR_RINGS, value); + default: + ERROR_MSG("invalid param id: %d", param); + return -1; + } +} + +static int msm_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp, + uint64_t timeout) +{ + struct fd_device *dev = pipe->dev; + struct drm_msm_wait_fence req = { + .fence = timestamp, + .queueid = to_msm_pipe(pipe)->queue_id, + }; + int ret; + + get_abs_timeout(&req.timeout, timeout); + + ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req)); + if (ret) { + ERROR_MSG("wait-fence failed! %d (%s)", ret, strerror(errno)); + return ret; + } + + return 0; +} + +static int open_submitqueue(struct fd_pipe *pipe, uint32_t prio) +{ + struct drm_msm_submitqueue req = { + .flags = 0, + .prio = prio, + }; + uint64_t nr_rings = 1; + int ret; + + if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) { + to_msm_pipe(pipe)->queue_id = 0; + return 0; + } + + msm_pipe_get_param(pipe, FD_NR_RINGS, &nr_rings); + + req.prio = MIN2(req.prio, MAX2(nr_rings, 1) - 1); + + ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_NEW, + &req, sizeof(req)); + if (ret) { + ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno)); + return ret; + } + + to_msm_pipe(pipe)->queue_id = req.id; + return 0; +} + +static void close_submitqueue(struct fd_pipe *pipe, uint32_t queue_id) +{ + if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) + return; + + drmCommandWrite(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE, + &queue_id, sizeof(queue_id)); +} + +static void msm_pipe_destroy(struct fd_pipe *pipe) +{ + struct msm_pipe *msm_pipe = to_msm_pipe(pipe); + close_submitqueue(pipe, msm_pipe->queue_id); + free(msm_pipe); +} + +static const struct fd_pipe_funcs sp_funcs = { + .ringbuffer_new_object = msm_ringbuffer_sp_new_object, + .submit_new = msm_submit_sp_new, + .get_param = msm_pipe_get_param, + .wait = msm_pipe_wait, + .destroy = msm_pipe_destroy, +}; + +static const struct fd_pipe_funcs legacy_funcs = { + .ringbuffer_new_object = msm_ringbuffer_new_object, + .submit_new = msm_submit_new, + .get_param = msm_pipe_get_param, + .wait = msm_pipe_wait, + .destroy = msm_pipe_destroy, +}; + +static uint64_t get_param(struct fd_pipe *pipe, uint32_t param) +{ + uint64_t value; + int ret = query_param(pipe, param, &value); + if (ret) { + ERROR_MSG("get-param failed! %d (%s)", ret, strerror(errno)); + return 0; + } + return value; +} + +struct fd_pipe * msm_pipe_new(struct fd_device *dev, + enum fd_pipe_id id, uint32_t prio) +{ + static const uint32_t pipe_id[] = { + [FD_PIPE_3D] = MSM_PIPE_3D0, + [FD_PIPE_2D] = MSM_PIPE_2D0, + }; + struct msm_pipe *msm_pipe = NULL; + struct fd_pipe *pipe = NULL; + + msm_pipe = calloc(1, sizeof(*msm_pipe)); + if (!msm_pipe) { + ERROR_MSG("allocation failed"); + goto fail; + } + + pipe = &msm_pipe->base; + + if (fd_device_version(dev) >= FD_VERSION_SOFTPIN) { + pipe->funcs = &sp_funcs; + } else { + pipe->funcs = &legacy_funcs; + } + + /* initialize before get_param(): */ + pipe->dev = dev; + msm_pipe->pipe = pipe_id[id]; + + /* these params should be supported since the first version of drm/msm: */ + msm_pipe->gpu_id = get_param(pipe, MSM_PARAM_GPU_ID); + msm_pipe->gmem = get_param(pipe, MSM_PARAM_GMEM_SIZE); + msm_pipe->chip_id = get_param(pipe, MSM_PARAM_CHIP_ID); + + if (! msm_pipe->gpu_id) + goto fail; + + INFO_MSG("Pipe Info:"); + INFO_MSG(" GPU-id: %d", msm_pipe->gpu_id); + INFO_MSG(" Chip-id: 0x%08x", msm_pipe->chip_id); + INFO_MSG(" GMEM size: 0x%08x", msm_pipe->gmem); + + if (open_submitqueue(pipe, prio)) + goto fail; + + return pipe; +fail: + if (pipe) + fd_pipe_del(pipe); + return NULL; +} diff -Nru mesa-18.3.3/src/freedreno/drm/msm_priv.h mesa-19.0.1/src/freedreno/drm/msm_priv.h --- mesa-18.3.3/src/freedreno/drm/msm_priv.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/msm_priv.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef MSM_PRIV_H_ +#define MSM_PRIV_H_ + +#include "freedreno_priv.h" + +#ifndef __user +# define __user +#endif + +#include "msm_drm.h" + +struct msm_device { + struct fd_device base; + struct fd_bo_cache ring_cache; +}; +FD_DEFINE_CAST(fd_device, msm_device); + +struct fd_device * msm_device_new(int fd); + +struct msm_pipe { + struct fd_pipe base; + uint32_t pipe; + uint32_t gpu_id; + uint32_t gmem; + uint32_t chip_id; + uint32_t queue_id; +}; +FD_DEFINE_CAST(fd_pipe, msm_pipe); + +struct fd_pipe * msm_pipe_new(struct fd_device *dev, + enum fd_pipe_id id, uint32_t prio); + +struct fd_ringbuffer * msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size); +struct fd_ringbuffer * msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size); + +struct fd_submit * msm_submit_new(struct fd_pipe *pipe); +struct fd_submit * msm_submit_sp_new(struct fd_pipe *pipe); + +struct msm_bo { + struct fd_bo base; + uint64_t offset; + /* to avoid excess hashtable lookups, cache the ring this bo was + * last emitted on (since that will probably also be the next ring + * it is emitted on) + */ + unsigned current_submit_seqno; + uint32_t idx; +}; +FD_DEFINE_CAST(fd_bo, msm_bo); + +int msm_bo_new_handle(struct fd_device *dev, + uint32_t size, uint32_t flags, uint32_t *handle); +struct fd_bo * msm_bo_from_handle(struct fd_device *dev, + uint32_t size, uint32_t handle); + +static inline void +msm_dump_submit(struct drm_msm_gem_submit *req) +{ + for (unsigned i = 0; i < req->nr_bos; i++) { + struct drm_msm_gem_submit_bo *bos = U642VOID(req->bos); + struct drm_msm_gem_submit_bo *bo = &bos[i]; + ERROR_MSG(" bos[%d]: handle=%u, flags=%x", i, bo->handle, bo->flags); + } + for (unsigned i = 0; i < req->nr_cmds; i++) { + struct drm_msm_gem_submit_cmd *cmds = U642VOID(req->cmds); + struct drm_msm_gem_submit_cmd *cmd = &cmds[i]; + struct drm_msm_gem_submit_reloc *relocs = U642VOID(cmd->relocs); + ERROR_MSG(" cmd[%d]: type=%u, submit_idx=%u, submit_offset=%u, size=%u", + i, cmd->type, cmd->submit_idx, cmd->submit_offset, cmd->size); + for (unsigned j = 0; j < cmd->nr_relocs; j++) { + struct drm_msm_gem_submit_reloc *r = &relocs[j]; + ERROR_MSG(" reloc[%d]: submit_offset=%u, or=%08x, shift=%d, reloc_idx=%u" + ", reloc_offset=%"PRIu64, j, r->submit_offset, r->or, r->shift, + r->reloc_idx, r->reloc_offset); + } + } +} + +static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) +{ + struct timespec t; + uint32_t s = ns / 1000000000; + clock_gettime(CLOCK_MONOTONIC, &t); + tv->tv_sec = t.tv_sec + s; + tv->tv_nsec = t.tv_nsec + ns - (s * 1000000000); +} + +/* + * Stupid/simple growable array implementation: + */ + +static inline void * +grow(void *ptr, uint16_t nr, uint16_t *max, uint16_t sz) +{ + if ((nr + 1) > *max) { + if ((*max * 2) < (nr + 1)) + *max = nr + 5; + else + *max = *max * 2; + ptr = realloc(ptr, *max * sz); + } + return ptr; +} + +#define DECLARE_ARRAY(type, name) \ + unsigned short nr_ ## name, max_ ## name; \ + type * name; + +#define APPEND(x, name) ({ \ + (x)->name = grow((x)->name, (x)->nr_ ## name, &(x)->max_ ## name, sizeof((x)->name[0])); \ + (x)->nr_ ## name ++; \ +}) + +#endif /* MSM_PRIV_H_ */ diff -Nru mesa-18.3.3/src/freedreno/drm/msm_ringbuffer.c mesa-19.0.1/src/freedreno/drm/msm_ringbuffer.c --- mesa-18.3.3/src/freedreno/drm/msm_ringbuffer.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/msm_ringbuffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,722 @@ +/* + * Copyright (C) 2012-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include +#include + +#include "util/hash_table.h" +#include "util/set.h" +#include "util/slab.h" + +#include "drm/freedreno_ringbuffer.h" +#include "msm_priv.h" + +/* The legacy implementation of submit/ringbuffer, which still does the + * traditional reloc and cmd tracking + */ + + +#define INIT_SIZE 0x1000 + +static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; + + +struct msm_submit { + struct fd_submit base; + + DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); + DECLARE_ARRAY(struct fd_bo *, bos); + + unsigned seqno; + + /* maps fd_bo to idx in bos table: */ + struct hash_table *bo_table; + + struct slab_mempool ring_pool; + + /* hash-set of associated rings: */ + struct set *ring_set; + + struct fd_ringbuffer *primary; + + /* Allow for sub-allocation of stateobj ring buffers (ie. sharing + * the same underlying bo).. + * + * We also rely on previous stateobj having been fully constructed + * so we can reclaim extra space at it's end. + */ + struct fd_ringbuffer *suballoc_ring; +}; +FD_DEFINE_CAST(fd_submit, msm_submit); + +/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers + * and sizes. Ie. a finalized buffer can have no more commands appended to + * it. + */ +struct msm_cmd { + struct fd_bo *ring_bo; + unsigned size; + DECLARE_ARRAY(struct drm_msm_gem_submit_reloc, relocs); +}; + +static struct msm_cmd * +cmd_new(struct fd_bo *ring_bo) +{ + struct msm_cmd *cmd = malloc(sizeof(*cmd)); + cmd->ring_bo = fd_bo_ref(ring_bo); + cmd->size = 0; + cmd->nr_relocs = cmd->max_relocs = 0; + cmd->relocs = NULL; + return cmd; +} + +static void +cmd_free(struct msm_cmd *cmd) +{ + fd_bo_del(cmd->ring_bo); + free(cmd->relocs); + free(cmd); +} + +/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to + * later copy into the submit when the stateobj rb is later referenced by + * a regular rb: + */ +struct msm_reloc_bo { + struct fd_bo *bo; + unsigned flags; +}; + +struct msm_ringbuffer { + struct fd_ringbuffer base; + + /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ + unsigned offset; + + union { + /* for _FD_RINGBUFFER_OBJECT case: */ + struct { + struct fd_pipe *pipe; + DECLARE_ARRAY(struct msm_reloc_bo, reloc_bos); + struct set *ring_set; + }; + /* for other cases: */ + struct { + struct fd_submit *submit; + DECLARE_ARRAY(struct msm_cmd *, cmds); + }; + } u; + + struct msm_cmd *cmd; /* current cmd */ + struct fd_bo *ring_bo; +}; +FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer); + +static void finalize_current_cmd(struct fd_ringbuffer *ring); +static struct fd_ringbuffer * msm_ringbuffer_init( + struct msm_ringbuffer *msm_ring, + uint32_t size, enum fd_ringbuffer_flags flags); + +/* add (if needed) bo to submit and return index: */ +static uint32_t +append_bo(struct msm_submit *submit, struct fd_bo *bo, uint32_t flags) +{ + struct msm_bo *msm_bo = to_msm_bo(bo); + uint32_t idx; + pthread_mutex_lock(&idx_lock); + if (likely(msm_bo->current_submit_seqno == submit->seqno)) { + idx = msm_bo->idx; + } else { + uint32_t hash = _mesa_hash_pointer(bo); + struct hash_entry *entry; + + entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); + if (entry) { + /* found */ + idx = (uint32_t)(uintptr_t)entry->data; + } else { + idx = APPEND(submit, submit_bos); + idx = APPEND(submit, bos); + + submit->submit_bos[idx].flags = 0; + submit->submit_bos[idx].handle = bo->handle; + submit->submit_bos[idx].presumed = 0; + + submit->bos[idx] = fd_bo_ref(bo); + + _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, + (void *)(uintptr_t)idx); + } + msm_bo->current_submit_seqno = submit->seqno; + msm_bo->idx = idx; + } + pthread_mutex_unlock(&idx_lock); + if (flags & FD_RELOC_READ) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ; + if (flags & FD_RELOC_WRITE) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; + return idx; +} + +static void +append_ring(struct set *set, struct fd_ringbuffer *ring) +{ + uint32_t hash = _mesa_hash_pointer(ring); + + if (!_mesa_set_search_pre_hashed(set, hash, ring)) { + fd_ringbuffer_ref(ring); + _mesa_set_add_pre_hashed(set, hash, ring); + } +} + +static void +msm_submit_suballoc_ring_bo(struct fd_submit *submit, + struct msm_ringbuffer *msm_ring, uint32_t size) +{ + struct msm_submit *msm_submit = to_msm_submit(submit); + unsigned suballoc_offset = 0; + struct fd_bo *suballoc_bo = NULL; + + if (msm_submit->suballoc_ring) { + struct msm_ringbuffer *suballoc_ring = + to_msm_ringbuffer(msm_submit->suballoc_ring); + + suballoc_bo = suballoc_ring->ring_bo; + suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) + + suballoc_ring->offset; + + suballoc_offset = align(suballoc_offset, 0x10); + + if ((size + suballoc_offset) > suballoc_bo->size) { + suballoc_bo = NULL; + } + } + + if (!suballoc_bo) { + // TODO possibly larger size for streaming bo? + msm_ring->ring_bo = fd_bo_new_ring( + submit->pipe->dev, 0x8000, 0); + msm_ring->offset = 0; + } else { + msm_ring->ring_bo = fd_bo_ref(suballoc_bo); + msm_ring->offset = suballoc_offset; + } + + struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; + + msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); + + if (old_suballoc_ring) + fd_ringbuffer_del(old_suballoc_ring); +} + +static struct fd_ringbuffer * +msm_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size, + enum fd_ringbuffer_flags flags) +{ + struct msm_submit *msm_submit = to_msm_submit(submit); + struct msm_ringbuffer *msm_ring; + + msm_ring = slab_alloc_st(&msm_submit->ring_pool); + + msm_ring->u.submit = submit; + + /* NOTE: needs to be before _suballoc_ring_bo() since it could + * increment the refcnt of the current ring + */ + msm_ring->base.refcnt = 1; + + if (flags & FD_RINGBUFFER_STREAMING) { + msm_submit_suballoc_ring_bo(submit, msm_ring, size); + } else { + if (flags & FD_RINGBUFFER_GROWABLE) + size = INIT_SIZE; + + msm_ring->offset = 0; + msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0); + } + + if (!msm_ringbuffer_init(msm_ring, size, flags)) + return NULL; + + if (flags & FD_RINGBUFFER_PRIMARY) { + debug_assert(!msm_submit->primary); + msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base); + } + + return &msm_ring->base; +} + +static struct drm_msm_gem_submit_reloc * +handle_stateobj_relocs(struct msm_submit *submit, struct msm_ringbuffer *ring) +{ + struct msm_cmd *cmd = ring->cmd; + struct drm_msm_gem_submit_reloc *relocs; + + relocs = malloc(cmd->nr_relocs * sizeof(*relocs)); + + for (unsigned i = 0; i < cmd->nr_relocs; i++) { + unsigned idx = cmd->relocs[i].reloc_idx; + struct fd_bo *bo = ring->u.reloc_bos[idx].bo; + unsigned flags = 0; + + if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_READ) + flags |= FD_RELOC_READ; + if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_WRITE) + flags |= FD_RELOC_WRITE; + + relocs[i] = cmd->relocs[i]; + relocs[i].reloc_idx = append_bo(submit, bo, flags); + } + + return relocs; +} + +static int +msm_submit_flush(struct fd_submit *submit, int in_fence_fd, + int *out_fence_fd, uint32_t *out_fence) +{ + struct msm_submit *msm_submit = to_msm_submit(submit); + struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); + struct drm_msm_gem_submit req = { + .flags = msm_pipe->pipe, + .queueid = msm_pipe->queue_id, + }; + int ret; + + debug_assert(msm_submit->primary); + + finalize_current_cmd(msm_submit->primary); + append_ring(msm_submit->ring_set, msm_submit->primary); + + unsigned nr_cmds = 0; + unsigned nr_objs = 0; + + set_foreach(msm_submit->ring_set, entry) { + struct fd_ringbuffer *ring = (void *)entry->key; + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + nr_cmds += 1; + nr_objs += 1; + } else { + if (ring != msm_submit->primary) + finalize_current_cmd(ring); + nr_cmds += to_msm_ringbuffer(ring)->u.nr_cmds; + } + } + + void *obj_relocs[nr_objs]; + struct drm_msm_gem_submit_cmd cmds[nr_cmds]; + unsigned i = 0, o = 0; + + set_foreach(msm_submit->ring_set, entry) { + struct fd_ringbuffer *ring = (void *)entry->key; + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + + debug_assert(i < nr_cmds); + + // TODO handle relocs: + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + + debug_assert(o < nr_objs); + + void *relocs = handle_stateobj_relocs(msm_submit, msm_ring); + obj_relocs[o++] = relocs; + + cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; + cmds[i].submit_idx = + append_bo(msm_submit, msm_ring->ring_bo, FD_RELOC_READ); + cmds[i].submit_offset = msm_ring->offset; + cmds[i].size = offset_bytes(ring->cur, ring->start); + cmds[i].pad = 0; + cmds[i].nr_relocs = msm_ring->cmd->nr_relocs; + cmds[i].relocs = VOID2U64(relocs); + + i++; + } else { + for (unsigned j = 0; j < msm_ring->u.nr_cmds; j++) { + if (ring->flags & FD_RINGBUFFER_PRIMARY) { + cmds[i].type = MSM_SUBMIT_CMD_BUF; + } else { + cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; + } + cmds[i].submit_idx = append_bo(msm_submit, + msm_ring->u.cmds[j]->ring_bo, FD_RELOC_READ); + cmds[i].submit_offset = msm_ring->offset; + cmds[i].size = msm_ring->u.cmds[j]->size; + cmds[i].pad = 0; + cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs; + cmds[i].relocs = VOID2U64(msm_ring->u.cmds[j]->relocs); + + i++; + } + } + } + + if (in_fence_fd != -1) { + req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; + req.fence_fd = in_fence_fd; + } + + if (out_fence_fd) { + req.flags |= MSM_SUBMIT_FENCE_FD_OUT; + } + + /* needs to be after get_cmd() as that could create bos/cmds table: */ + req.bos = VOID2U64(msm_submit->submit_bos), + req.nr_bos = msm_submit->nr_submit_bos; + req.cmds = VOID2U64(cmds), + req.nr_cmds = nr_cmds; + + DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); + + ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, + &req, sizeof(req)); + if (ret) { + ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); + msm_dump_submit(&req); + } else if (!ret) { + if (out_fence) + *out_fence = req.fence; + + if (out_fence_fd) + *out_fence_fd = req.fence_fd; + } + + for (unsigned o = 0; o < nr_objs; o++) + free(obj_relocs[o]); + + return ret; +} + +static void +unref_rings(struct set_entry *entry) +{ + struct fd_ringbuffer *ring = (void *)entry->key; + fd_ringbuffer_del(ring); +} + +static void +msm_submit_destroy(struct fd_submit *submit) +{ + struct msm_submit *msm_submit = to_msm_submit(submit); + + if (msm_submit->primary) + fd_ringbuffer_del(msm_submit->primary); + if (msm_submit->suballoc_ring) + fd_ringbuffer_del(msm_submit->suballoc_ring); + + _mesa_hash_table_destroy(msm_submit->bo_table, NULL); + _mesa_set_destroy(msm_submit->ring_set, unref_rings); + + // TODO it would be nice to have a way to debug_assert() if all + // rb's haven't been free'd back to the slab, because that is + // an indication that we are leaking bo's + slab_destroy(&msm_submit->ring_pool); + + for (unsigned i = 0; i < msm_submit->nr_bos; i++) + fd_bo_del(msm_submit->bos[i]); + + free(msm_submit->submit_bos); + free(msm_submit->bos); + free(msm_submit); +} + +static const struct fd_submit_funcs submit_funcs = { + .new_ringbuffer = msm_submit_new_ringbuffer, + .flush = msm_submit_flush, + .destroy = msm_submit_destroy, +}; + +struct fd_submit * +msm_submit_new(struct fd_pipe *pipe) +{ + struct msm_submit *msm_submit = calloc(1, sizeof(*msm_submit)); + struct fd_submit *submit; + static unsigned submit_cnt = 0; + + msm_submit->seqno = ++submit_cnt; + msm_submit->bo_table = _mesa_hash_table_create(NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); + msm_submit->ring_set = _mesa_set_create(NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); + // TODO tune size: + slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer), 16); + + submit = &msm_submit->base; + submit->pipe = pipe; + submit->funcs = &submit_funcs; + + return submit; +} + + +static void +finalize_current_cmd(struct fd_ringbuffer *ring) +{ + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + + debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); + + if (!msm_ring->cmd) + return; + + debug_assert(msm_ring->cmd->ring_bo == msm_ring->ring_bo); + + unsigned idx = APPEND(&msm_ring->u, cmds); + + msm_ring->u.cmds[idx] = msm_ring->cmd; + msm_ring->cmd = NULL; + + msm_ring->u.cmds[idx]->size = offset_bytes(ring->cur, ring->start); +} + +static void +msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size) +{ + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + struct fd_pipe *pipe = msm_ring->u.submit->pipe; + + debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); + + finalize_current_cmd(ring); + + fd_bo_del(msm_ring->ring_bo); + msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); + msm_ring->cmd = cmd_new(msm_ring->ring_bo); + + ring->start = fd_bo_map(msm_ring->ring_bo); + ring->end = &(ring->start[size/4]); + ring->cur = ring->start; + ring->size = size; +} + +static void +msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, + const struct fd_reloc *reloc) +{ + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + struct fd_pipe *pipe; + unsigned reloc_idx; + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + unsigned idx = APPEND(&msm_ring->u, reloc_bos); + + msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); + msm_ring->u.reloc_bos[idx].flags = reloc->flags; + + /* this gets fixed up at submit->flush() time, since this state- + * object rb can be used with many different submits + */ + reloc_idx = idx; + + pipe = msm_ring->u.pipe; + } else { + struct msm_submit *msm_submit = + to_msm_submit(msm_ring->u.submit); + + reloc_idx = append_bo(msm_submit, reloc->bo, reloc->flags); + + pipe = msm_ring->u.submit->pipe; + } + + struct drm_msm_gem_submit_reloc *r; + unsigned idx = APPEND(msm_ring->cmd, relocs); + + r = &msm_ring->cmd->relocs[idx]; + + r->reloc_idx = reloc_idx; + r->reloc_offset = reloc->offset; + r->or = reloc->or; + r->shift = reloc->shift; + r->submit_offset = offset_bytes(ring->cur, ring->start) + + msm_ring->offset; + + ring->cur++; + + if (pipe->gpu_id >= 500) { + idx = APPEND(msm_ring->cmd, relocs); + r = &msm_ring->cmd->relocs[idx]; + + r->reloc_idx = reloc_idx; + r->reloc_offset = reloc->offset; + r->or = reloc->orhi; + r->shift = reloc->shift - 32; + r->submit_offset = offset_bytes(ring->cur, ring->start) + + msm_ring->offset; + + ring->cur++; + } +} + +static void +append_stateobj_rings(struct msm_submit *submit, struct fd_ringbuffer *target) +{ + struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); + + debug_assert(target->flags & _FD_RINGBUFFER_OBJECT); + + set_foreach(msm_target->u.ring_set, entry) { + struct fd_ringbuffer *ring = (void *)entry->key; + + append_ring(submit->ring_set, ring); + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + append_stateobj_rings(submit, ring); + } + } +} + +static uint32_t +msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx) +{ + struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + struct fd_bo *bo; + uint32_t size; + + if ((target->flags & FD_RINGBUFFER_GROWABLE) && + (cmd_idx < msm_target->u.nr_cmds)) { + bo = msm_target->u.cmds[cmd_idx]->ring_bo; + size = msm_target->u.cmds[cmd_idx]->size; + } else { + bo = msm_target->ring_bo; + size = offset_bytes(target->cur, target->start); + } + + msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){ + .bo = bo, + .flags = FD_RELOC_READ, + .offset = msm_target->offset, + }); + + if ((target->flags & _FD_RINGBUFFER_OBJECT) && + !(ring->flags & _FD_RINGBUFFER_OBJECT)) { + struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); + + append_stateobj_rings(msm_submit, target); + } + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + append_ring(msm_ring->u.ring_set, target); + } else { + struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); + append_ring(msm_submit->ring_set, target); + } + + return size; +} + +static uint32_t +msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring) +{ + if (ring->flags & FD_RINGBUFFER_GROWABLE) + return to_msm_ringbuffer(ring)->u.nr_cmds + 1; + return 1; +} + +static void +msm_ringbuffer_destroy(struct fd_ringbuffer *ring) +{ + struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); + + fd_bo_del(msm_ring->ring_bo); + if (msm_ring->cmd) + cmd_free(msm_ring->cmd); + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { + fd_bo_del(msm_ring->u.reloc_bos[i].bo); + } + + _mesa_set_destroy(msm_ring->u.ring_set, unref_rings); + + free(msm_ring->u.reloc_bos); + free(msm_ring); + } else { + struct fd_submit *submit = msm_ring->u.submit; + + for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { + cmd_free(msm_ring->u.cmds[i]); + } + + free(msm_ring->u.cmds); + slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring); + } +} + +static const struct fd_ringbuffer_funcs ring_funcs = { + .grow = msm_ringbuffer_grow, + .emit_reloc = msm_ringbuffer_emit_reloc, + .emit_reloc_ring = msm_ringbuffer_emit_reloc_ring, + .cmd_count = msm_ringbuffer_cmd_count, + .destroy = msm_ringbuffer_destroy, +}; + +static inline struct fd_ringbuffer * +msm_ringbuffer_init(struct msm_ringbuffer *msm_ring, uint32_t size, + enum fd_ringbuffer_flags flags) +{ + struct fd_ringbuffer *ring = &msm_ring->base; + + debug_assert(msm_ring->ring_bo); + + uint8_t *base = fd_bo_map(msm_ring->ring_bo); + ring->start = (void *)(base + msm_ring->offset); + ring->end = &(ring->start[size/4]); + ring->cur = ring->start; + + ring->size = size; + ring->flags = flags; + + ring->funcs = &ring_funcs; + + msm_ring->u.cmds = NULL; + msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; + + msm_ring->cmd = cmd_new(msm_ring->ring_bo); + + return ring; +} + +struct fd_ringbuffer * +msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size) +{ + struct msm_ringbuffer *msm_ring = malloc(sizeof(*msm_ring)); + + msm_ring->u.pipe = pipe; + msm_ring->offset = 0; + msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); + msm_ring->base.refcnt = 1; + + msm_ring->u.reloc_bos = NULL; + msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; + + msm_ring->u.ring_set = _mesa_set_create(NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); + + return msm_ringbuffer_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); +} diff -Nru mesa-18.3.3/src/freedreno/drm/msm_ringbuffer_sp.c mesa-19.0.1/src/freedreno/drm/msm_ringbuffer_sp.c --- mesa-18.3.3/src/freedreno/drm/msm_ringbuffer_sp.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/drm/msm_ringbuffer_sp.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,568 @@ +/* + * Copyright (C) 2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include +#include + +#include "util/hash_table.h" +#include "util/slab.h" + +#include "drm/freedreno_ringbuffer.h" +#include "msm_priv.h" + +/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead + * by avoiding the additional tracking necessary to build cmds/relocs tables + * (but still builds a bos table) + */ + + +#define INIT_SIZE 0x1000 + +static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; + + +struct msm_submit_sp { + struct fd_submit base; + + DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); + DECLARE_ARRAY(struct fd_bo *, bos); + + unsigned seqno; + + /* maps fd_bo to idx in bos table: */ + struct hash_table *bo_table; + + struct slab_mempool ring_pool; + + struct fd_ringbuffer *primary; + + /* Allow for sub-allocation of stateobj ring buffers (ie. sharing + * the same underlying bo).. + * + * We also rely on previous stateobj having been fully constructed + * so we can reclaim extra space at it's end. + */ + struct fd_ringbuffer *suballoc_ring; +}; +FD_DEFINE_CAST(fd_submit, msm_submit_sp); + +/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers + * and sizes. Ie. a finalized buffer can have no more commands appended to + * it. + */ +struct msm_cmd_sp { + struct fd_bo *ring_bo; + unsigned size; +}; + +/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to + * later copy into the submit when the stateobj rb is later referenced by + * a regular rb: + */ +struct msm_reloc_bo_sp { + struct fd_bo *bo; + unsigned flags; +}; + +struct msm_ringbuffer_sp { + struct fd_ringbuffer base; + + /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ + unsigned offset; + +// TODO check disasm.. hopefully compilers CSE can realize that +// reloc_bos and cmds are at the same offsets and optimize some +// divergent cases into single case + union { + /* for _FD_RINGBUFFER_OBJECT case: */ + struct { + struct fd_pipe *pipe; + DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos); + }; + /* for other cases: */ + struct { + struct fd_submit *submit; + DECLARE_ARRAY(struct msm_cmd_sp, cmds); + }; + } u; + + struct fd_bo *ring_bo; +}; +FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp); + +static void finalize_current_cmd(struct fd_ringbuffer *ring); +static struct fd_ringbuffer * msm_ringbuffer_sp_init( + struct msm_ringbuffer_sp *msm_ring, + uint32_t size, enum fd_ringbuffer_flags flags); + +/* add (if needed) bo to submit and return index: */ +static uint32_t +append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags) +{ + struct msm_bo *msm_bo = to_msm_bo(bo); + uint32_t idx; + pthread_mutex_lock(&idx_lock); + if (likely(msm_bo->current_submit_seqno == submit->seqno)) { + idx = msm_bo->idx; + } else { + uint32_t hash = _mesa_hash_pointer(bo); + struct hash_entry *entry; + + entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); + if (entry) { + /* found */ + idx = (uint32_t)(uintptr_t)entry->data; + } else { + idx = APPEND(submit, submit_bos); + idx = APPEND(submit, bos); + + submit->submit_bos[idx].flags = 0; + submit->submit_bos[idx].handle = bo->handle; + submit->submit_bos[idx].presumed = 0; + + submit->bos[idx] = fd_bo_ref(bo); + + _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, + (void *)(uintptr_t)idx); + } + msm_bo->current_submit_seqno = submit->seqno; + msm_bo->idx = idx; + } + pthread_mutex_unlock(&idx_lock); + if (flags & FD_RELOC_READ) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ; + if (flags & FD_RELOC_WRITE) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; + if (flags & FD_RELOC_DUMP) + submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP; + return idx; +} + +static void +msm_submit_suballoc_ring_bo(struct fd_submit *submit, + struct msm_ringbuffer_sp *msm_ring, uint32_t size) +{ + struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); + unsigned suballoc_offset = 0; + struct fd_bo *suballoc_bo = NULL; + + if (msm_submit->suballoc_ring) { + struct msm_ringbuffer_sp *suballoc_ring = + to_msm_ringbuffer_sp(msm_submit->suballoc_ring); + + suballoc_bo = suballoc_ring->ring_bo; + suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) + + suballoc_ring->offset; + + suballoc_offset = align(suballoc_offset, 0x10); + + if ((size + suballoc_offset) > suballoc_bo->size) { + suballoc_bo = NULL; + } + } + + if (!suballoc_bo) { + // TODO possibly larger size for streaming bo? + msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, + 0x8000, DRM_FREEDRENO_GEM_GPUREADONLY); + msm_ring->offset = 0; + } else { + msm_ring->ring_bo = fd_bo_ref(suballoc_bo); + msm_ring->offset = suballoc_offset; + } + + struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; + + msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); + + if (old_suballoc_ring) + fd_ringbuffer_del(old_suballoc_ring); +} + +static struct fd_ringbuffer * +msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size, + enum fd_ringbuffer_flags flags) +{ + struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); + struct msm_ringbuffer_sp *msm_ring; + + msm_ring = slab_alloc_st(&msm_submit->ring_pool); + + msm_ring->u.submit = submit; + + /* NOTE: needs to be before _suballoc_ring_bo() since it could + * increment the refcnt of the current ring + */ + msm_ring->base.refcnt = 1; + + if (flags & FD_RINGBUFFER_STREAMING) { + msm_submit_suballoc_ring_bo(submit, msm_ring, size); + } else { + if (flags & FD_RINGBUFFER_GROWABLE) + size = INIT_SIZE; + + msm_ring->offset = 0; + msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, + DRM_FREEDRENO_GEM_GPUREADONLY); + } + + if (!msm_ringbuffer_sp_init(msm_ring, size, flags)) + return NULL; + + if (flags & FD_RINGBUFFER_PRIMARY) { + debug_assert(!msm_submit->primary); + msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base); + } + + return &msm_ring->base; +} + +static int +msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, + int *out_fence_fd, uint32_t *out_fence) +{ + struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); + struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); + struct drm_msm_gem_submit req = { + .flags = msm_pipe->pipe, + .queueid = msm_pipe->queue_id, + }; + int ret; + + debug_assert(msm_submit->primary); + finalize_current_cmd(msm_submit->primary); + + struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary); + struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds]; + + for (unsigned i = 0; i < primary->u.nr_cmds; i++) { + cmds[i].type = MSM_SUBMIT_CMD_BUF; + cmds[i].submit_idx = append_bo(msm_submit, + primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP); + cmds[i].submit_offset = primary->offset; + cmds[i].size = primary->u.cmds[i].size; + cmds[i].pad = 0; + cmds[i].nr_relocs = 0; + } + + if (in_fence_fd != -1) { + req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; + req.fence_fd = in_fence_fd; + } + + if (out_fence_fd) { + req.flags |= MSM_SUBMIT_FENCE_FD_OUT; + } + + /* needs to be after get_cmd() as that could create bos/cmds table: */ + req.bos = VOID2U64(msm_submit->submit_bos), + req.nr_bos = msm_submit->nr_submit_bos; + req.cmds = VOID2U64(cmds), + req.nr_cmds = primary->u.nr_cmds; + + DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); + + ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, + &req, sizeof(req)); + if (ret) { + ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); + msm_dump_submit(&req); + } else if (!ret) { + if (out_fence) + *out_fence = req.fence; + + if (out_fence_fd) + *out_fence_fd = req.fence_fd; + } + + return ret; +} + +static void +msm_submit_sp_destroy(struct fd_submit *submit) +{ + struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); + + if (msm_submit->primary) + fd_ringbuffer_del(msm_submit->primary); + if (msm_submit->suballoc_ring) + fd_ringbuffer_del(msm_submit->suballoc_ring); + + _mesa_hash_table_destroy(msm_submit->bo_table, NULL); + + // TODO it would be nice to have a way to debug_assert() if all + // rb's haven't been free'd back to the slab, because that is + // an indication that we are leaking bo's + slab_destroy(&msm_submit->ring_pool); + + for (unsigned i = 0; i < msm_submit->nr_bos; i++) + fd_bo_del(msm_submit->bos[i]); + + free(msm_submit->submit_bos); + free(msm_submit->bos); + free(msm_submit); +} + +static const struct fd_submit_funcs submit_funcs = { + .new_ringbuffer = msm_submit_sp_new_ringbuffer, + .flush = msm_submit_sp_flush, + .destroy = msm_submit_sp_destroy, +}; + +struct fd_submit * +msm_submit_sp_new(struct fd_pipe *pipe) +{ + struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit)); + struct fd_submit *submit; + static unsigned submit_cnt = 0; + + msm_submit->seqno = ++submit_cnt; + msm_submit->bo_table = _mesa_hash_table_create(NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); + // TODO tune size: + slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16); + + submit = &msm_submit->base; + submit->pipe = pipe; + submit->funcs = &submit_funcs; + + return submit; +} + + +static void +finalize_current_cmd(struct fd_ringbuffer *ring) +{ + debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); + + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + unsigned idx = APPEND(&msm_ring->u, cmds); + + msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo); + msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start); +} + +static void +msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size) +{ + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + struct fd_pipe *pipe = msm_ring->u.submit->pipe; + + debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); + + finalize_current_cmd(ring); + + fd_bo_del(msm_ring->ring_bo); + msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, + DRM_FREEDRENO_GEM_GPUREADONLY); + + ring->start = fd_bo_map(msm_ring->ring_bo); + ring->end = &(ring->start[size/4]); + ring->cur = ring->start; + ring->size = size; +} + +static void +msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring, + const struct fd_reloc *reloc) +{ + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + struct fd_pipe *pipe; + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + unsigned idx = APPEND(&msm_ring->u, reloc_bos); + + msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); + msm_ring->u.reloc_bos[idx].flags = reloc->flags; + + pipe = msm_ring->u.pipe; + } else { + struct msm_submit_sp *msm_submit = + to_msm_submit_sp(msm_ring->u.submit); + + append_bo(msm_submit, reloc->bo, reloc->flags); + + pipe = msm_ring->u.submit->pipe; + } + + uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset; + uint32_t dword = iova; + int shift = reloc->shift; + + if (shift < 0) + dword >>= -shift; + else + dword <<= shift; + + (*ring->cur++) = dword | reloc->or; + + if (pipe->gpu_id >= 500) { + dword = iova >> 32; + shift -= 32; + + if (shift < 0) + dword >>= -shift; + else + dword <<= shift; + + (*ring->cur++) = dword | reloc->orhi; + } +} + +static uint32_t +msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring, + struct fd_ringbuffer *target, uint32_t cmd_idx) +{ + struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target); + struct fd_bo *bo; + uint32_t size; + + if ((target->flags & FD_RINGBUFFER_GROWABLE) && + (cmd_idx < msm_target->u.nr_cmds)) { + bo = msm_target->u.cmds[cmd_idx].ring_bo; + size = msm_target->u.cmds[cmd_idx].size; + } else { + bo = msm_target->ring_bo; + size = offset_bytes(target->cur, target->start); + } + + msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){ + .bo = bo, + .flags = FD_RELOC_READ | FD_RELOC_DUMP, + .offset = msm_target->offset, + }); + + if (!(target->flags & _FD_RINGBUFFER_OBJECT)) + return size; + + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { + unsigned idx = APPEND(&msm_ring->u, reloc_bos); + + msm_ring->u.reloc_bos[idx].bo = + fd_bo_ref(msm_target->u.reloc_bos[i].bo); + msm_ring->u.reloc_bos[idx].flags = + msm_target->u.reloc_bos[i].flags; + } + } else { + // TODO it would be nice to know whether we have already + // seen this target before. But hopefully we hit the + // append_bo() fast path enough for this to not matter: + struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); + + for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { + append_bo(msm_submit, msm_target->u.reloc_bos[i].bo, + msm_target->u.reloc_bos[i].flags); + } + } + + return size; +} + +static uint32_t +msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring) +{ + if (ring->flags & FD_RINGBUFFER_GROWABLE) + return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1; + return 1; +} + +static void +msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring) +{ + struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); + + fd_bo_del(msm_ring->ring_bo); + + if (ring->flags & _FD_RINGBUFFER_OBJECT) { + for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { + fd_bo_del(msm_ring->u.reloc_bos[i].bo); + } + + free(msm_ring); + } else { + struct fd_submit *submit = msm_ring->u.submit; + + for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { + fd_bo_del(msm_ring->u.cmds[i].ring_bo); + } + + slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring); + } +} + +static const struct fd_ringbuffer_funcs ring_funcs = { + .grow = msm_ringbuffer_sp_grow, + .emit_reloc = msm_ringbuffer_sp_emit_reloc, + .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring, + .cmd_count = msm_ringbuffer_sp_cmd_count, + .destroy = msm_ringbuffer_sp_destroy, +}; + +static inline struct fd_ringbuffer * +msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size, + enum fd_ringbuffer_flags flags) +{ + struct fd_ringbuffer *ring = &msm_ring->base; + + debug_assert(msm_ring->ring_bo); + + uint8_t *base = fd_bo_map(msm_ring->ring_bo); + ring->start = (void *)(base + msm_ring->offset); + ring->end = &(ring->start[size/4]); + ring->cur = ring->start; + + ring->size = size; + ring->flags = flags; + + ring->funcs = &ring_funcs; + + // TODO initializing these could probably be conditional on flags + // since unneed for FD_RINGBUFFER_STAGING case.. + msm_ring->u.cmds = NULL; + msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; + + msm_ring->u.reloc_bos = NULL; + msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; + + return ring; +} + +struct fd_ringbuffer * +msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size) +{ + struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring)); + + msm_ring->u.pipe = pipe; + msm_ring->offset = 0; + msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, + DRM_FREEDRENO_GEM_GPUREADONLY); + msm_ring->base.refcnt = 1; + + return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); +} diff -Nru mesa-18.3.3/src/freedreno/ir3/disasm-a3xx.c mesa-19.0.1/src/freedreno/ir3/disasm-a3xx.c --- mesa-18.3.3/src/freedreno/ir3/disasm-a3xx.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/disasm-a3xx.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,1100 @@ +/* + * Copyright (c) 2013 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "instr-a3xx.h" + +/* bitmask of debug flags */ +enum debug_t { + PRINT_RAW = 0x1, /* dump raw hexdump */ + PRINT_VERBOSE = 0x2, +}; + +static enum debug_t debug; + +#define printf debug_printf + +static const char *levels[] = { + "", + "\t", + "\t\t", + "\t\t\t", + "\t\t\t\t", + "\t\t\t\t\t", + "\t\t\t\t\t\t", + "\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t\t", + "x", + "x", + "x", + "x", + "x", + "x", +}; + +static const char *component = "xyzw"; + +static const char *type[] = { + [TYPE_F16] = "f16", + [TYPE_F32] = "f32", + [TYPE_U16] = "u16", + [TYPE_U32] = "u32", + [TYPE_S16] = "s16", + [TYPE_S32] = "s32", + [TYPE_U8] = "u8", + [TYPE_S8] = "s8", +}; + +struct disasm_ctx { + FILE *out; + int level; + unsigned gpu_id; + + /* current instruction repeat flag: */ + unsigned repeat; +}; + +static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, bool r, + bool c, bool im, bool neg, bool abs, bool addr_rel) +{ + const char type = c ? 'c' : 'r'; + + // XXX I prefer - and || for neg/abs, but preserving format used + // by libllvm-a3xx for easy diffing.. + + if (abs && neg) + fprintf(ctx->out, "(absneg)"); + else if (neg) + fprintf(ctx->out, "(neg)"); + else if (abs) + fprintf(ctx->out, "(abs)"); + + if (r) + fprintf(ctx->out, "(r)"); + + if (im) { + fprintf(ctx->out, "%d", reg.iim_val); + } else if (addr_rel) { + /* I would just use %+d but trying to make it diff'able with + * libllvm-a3xx... + */ + if (reg.iim_val < 0) + fprintf(ctx->out, "%s%c", full ? "" : "h", type, -reg.iim_val); + else if (reg.iim_val > 0) + fprintf(ctx->out, "%s%c", full ? "" : "h", type, reg.iim_val); + else + fprintf(ctx->out, "%s%c", full ? "" : "h", type); + } else if ((reg.num == REG_A0) && !c) { + fprintf(ctx->out, "a0.%c", component[reg.comp]); + } else if ((reg.num == REG_P0) && !c) { + fprintf(ctx->out, "p0.%c", component[reg.comp]); + } else { + fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]); + } +} + + +static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel) +{ + print_reg(ctx, reg, full, false, false, false, false, false, addr_rel); +} + +static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool r, + bool c, bool im, bool neg, bool abs, bool addr_rel) +{ + print_reg(ctx, reg, full, r, c, im, neg, abs, addr_rel); +} + +/* TODO switch to using reginfo struct everywhere, since more readable + * than passing a bunch of bools to print_reg_src + */ + +struct reginfo { + reg_t reg; + bool full; + bool r; + bool c; + bool im; + bool neg; + bool abs; + bool addr_rel; +}; + +static void print_src(struct disasm_ctx *ctx, struct reginfo *info) +{ + print_reg_src(ctx, info->reg, info->full, info->r, info->c, info->im, + info->neg, info->abs, info->addr_rel); +} + +//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info) +//{ +// print_reg_dst(ctx, info->reg, info->full, info->addr_rel); +//} + +static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat0_t *cat0 = &instr->cat0; + + switch (cat0->opc) { + case OPC_KILL: + fprintf(ctx->out, " %sp0.%c", cat0->inv ? "!" : "", + component[cat0->comp]); + break; + case OPC_BR: + fprintf(ctx->out, " %sp0.%c, #%d", cat0->inv ? "!" : "", + component[cat0->comp], cat0->a3xx.immed); + break; + case OPC_JUMP: + case OPC_CALL: + fprintf(ctx->out, " #%d", cat0->a3xx.immed); + break; + } + + if ((debug & PRINT_VERBOSE) && (cat0->dummy2|cat0->dummy3|cat0->dummy4)) + fprintf(ctx->out, "\t{0: %x,%x,%x}", cat0->dummy2, cat0->dummy3, cat0->dummy4); +} + +static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat1_t *cat1 = &instr->cat1; + + if (cat1->ul) + fprintf(ctx->out, "(ul)"); + + if (cat1->src_type == cat1->dst_type) { + if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) { + /* special case (nmemonic?): */ + fprintf(ctx->out, "mova"); + } else { + fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); + } + } else { + fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); + } + + fprintf(ctx->out, " "); + + if (cat1->even) + fprintf(ctx->out, "(even)"); + + if (cat1->pos_inf) + fprintf(ctx->out, "(pos_infinity)"); + + print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, + cat1->dst_rel); + + fprintf(ctx->out, ", "); + + /* ugg, have to special case this.. vs print_reg().. */ + if (cat1->src_im) { + if (type_float(cat1->src_type)) + fprintf(ctx->out, "(%f)", cat1->fim_val); + else if (type_uint(cat1->src_type)) + fprintf(ctx->out, "0x%08x", cat1->uim_val); + else + fprintf(ctx->out, "%d", cat1->iim_val); + } else if (cat1->src_rel && !cat1->src_c) { + /* I would just use %+d but trying to make it diff'able with + * libllvm-a3xx... + */ + char type = cat1->src_rel_c ? 'c' : 'r'; + if (cat1->off < 0) + fprintf(ctx->out, "%c", type, -cat1->off); + else if (cat1->off > 0) + fprintf(ctx->out, "%c", type, cat1->off); + else + fprintf(ctx->out, "%c", type); + } else { + print_reg_src(ctx, (reg_t)(cat1->src), type_size(cat1->src_type) == 32, + cat1->src_r, cat1->src_c, cat1->src_im, false, false, false); + } + + if ((debug & PRINT_VERBOSE) && (cat1->must_be_0)) + fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0); +} + +static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat2_t *cat2 = &instr->cat2; + static const char *cond[] = { + "lt", + "le", + "gt", + "ge", + "eq", + "ne", + "?6?", + }; + + switch (_OPC(2, cat2->opc)) { + case OPC_CMPS_F: + case OPC_CMPS_U: + case OPC_CMPS_S: + case OPC_CMPV_F: + case OPC_CMPV_U: + case OPC_CMPV_S: + fprintf(ctx->out, ".%s", cond[cat2->cond]); + break; + } + + fprintf(ctx->out, " "); + if (cat2->ei) + fprintf(ctx->out, "(ei)"); + print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false); + fprintf(ctx->out, ", "); + + unsigned src1_r = cat2->repeat ? cat2->src1_r : 0; + if (cat2->c1.src1_c) { + print_reg_src(ctx, (reg_t)(cat2->c1.src1), cat2->full, src1_r, + cat2->c1.src1_c, cat2->src1_im, cat2->src1_neg, + cat2->src1_abs, false); + } else if (cat2->rel1.src1_rel) { + print_reg_src(ctx, (reg_t)(cat2->rel1.src1), cat2->full, src1_r, + cat2->rel1.src1_c, cat2->src1_im, cat2->src1_neg, + cat2->src1_abs, cat2->rel1.src1_rel); + } else { + print_reg_src(ctx, (reg_t)(cat2->src1), cat2->full, src1_r, + false, cat2->src1_im, cat2->src1_neg, + cat2->src1_abs, false); + } + + unsigned src2_r = cat2->repeat ? cat2->src2_r : 0; + switch (_OPC(2, cat2->opc)) { + case OPC_ABSNEG_F: + case OPC_ABSNEG_S: + case OPC_CLZ_B: + case OPC_CLZ_S: + case OPC_SIGN_F: + case OPC_FLOOR_F: + case OPC_CEIL_F: + case OPC_RNDNE_F: + case OPC_RNDAZ_F: + case OPC_TRUNC_F: + case OPC_NOT_B: + case OPC_BFREV_B: + case OPC_SETRM: + case OPC_CBITS_B: + /* these only have one src reg */ + break; + default: + fprintf(ctx->out, ", "); + if (cat2->c2.src2_c) { + print_reg_src(ctx, (reg_t)(cat2->c2.src2), cat2->full, src2_r, + cat2->c2.src2_c, cat2->src2_im, cat2->src2_neg, + cat2->src2_abs, false); + } else if (cat2->rel2.src2_rel) { + print_reg_src(ctx, (reg_t)(cat2->rel2.src2), cat2->full, src2_r, + cat2->rel2.src2_c, cat2->src2_im, cat2->src2_neg, + cat2->src2_abs, cat2->rel2.src2_rel); + } else { + print_reg_src(ctx, (reg_t)(cat2->src2), cat2->full, src2_r, + false, cat2->src2_im, cat2->src2_neg, + cat2->src2_abs, false); + } + break; + } +} + +static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat3_t *cat3 = &instr->cat3; + bool full = instr_cat3_full(cat3); + + fprintf(ctx->out, " "); + print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false); + fprintf(ctx->out, ", "); + unsigned src1_r = cat3->repeat ? cat3->src1_r : 0; + if (cat3->c1.src1_c) { + print_reg_src(ctx, (reg_t)(cat3->c1.src1), full, + src1_r, cat3->c1.src1_c, false, cat3->src1_neg, + false, false); + } else if (cat3->rel1.src1_rel) { + print_reg_src(ctx, (reg_t)(cat3->rel1.src1), full, + src1_r, cat3->rel1.src1_c, false, cat3->src1_neg, + false, cat3->rel1.src1_rel); + } else { + print_reg_src(ctx, (reg_t)(cat3->src1), full, + src1_r, false, false, cat3->src1_neg, + false, false); + } + fprintf(ctx->out, ", "); + unsigned src2_r = cat3->repeat ? cat3->src2_r : 0; + print_reg_src(ctx, (reg_t)cat3->src2, full, + src2_r, cat3->src2_c, false, cat3->src2_neg, + false, false); + fprintf(ctx->out, ", "); + if (cat3->c2.src3_c) { + print_reg_src(ctx, (reg_t)(cat3->c2.src3), full, + cat3->src3_r, cat3->c2.src3_c, false, cat3->src3_neg, + false, false); + } else if (cat3->rel2.src3_rel) { + print_reg_src(ctx, (reg_t)(cat3->rel2.src3), full, + cat3->src3_r, cat3->rel2.src3_c, false, cat3->src3_neg, + false, cat3->rel2.src3_rel); + } else { + print_reg_src(ctx, (reg_t)(cat3->src3), full, + cat3->src3_r, false, false, cat3->src3_neg, + false, false); + } +} + +static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat4_t *cat4 = &instr->cat4; + + fprintf(ctx->out, " "); + print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false); + fprintf(ctx->out, ", "); + + if (cat4->c.src_c) { + print_reg_src(ctx, (reg_t)(cat4->c.src), cat4->full, + cat4->src_r, cat4->c.src_c, cat4->src_im, + cat4->src_neg, cat4->src_abs, false); + } else if (cat4->rel.src_rel) { + print_reg_src(ctx, (reg_t)(cat4->rel.src), cat4->full, + cat4->src_r, cat4->rel.src_c, cat4->src_im, + cat4->src_neg, cat4->src_abs, cat4->rel.src_rel); + } else { + print_reg_src(ctx, (reg_t)(cat4->src), cat4->full, + cat4->src_r, false, cat4->src_im, + cat4->src_neg, cat4->src_abs, false); + } + + if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2)) + fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2); +} + +static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr) +{ + static const struct { + bool src1, src2, samp, tex; + } info[0x1f] = { + [opc_op(OPC_ISAM)] = { true, false, true, true, }, + [opc_op(OPC_ISAML)] = { true, true, true, true, }, + [opc_op(OPC_ISAMM)] = { true, false, true, true, }, + [opc_op(OPC_SAM)] = { true, false, true, true, }, + [opc_op(OPC_SAMB)] = { true, true, true, true, }, + [opc_op(OPC_SAML)] = { true, true, true, true, }, + [opc_op(OPC_SAMGQ)] = { true, false, true, true, }, + [opc_op(OPC_GETLOD)] = { true, false, true, true, }, + [opc_op(OPC_CONV)] = { true, true, true, true, }, + [opc_op(OPC_CONVM)] = { true, true, true, true, }, + [opc_op(OPC_GETSIZE)] = { true, false, false, true, }, + [opc_op(OPC_GETBUF)] = { false, false, false, true, }, + [opc_op(OPC_GETPOS)] = { true, false, false, true, }, + [opc_op(OPC_GETINFO)] = { false, false, false, true, }, + [opc_op(OPC_DSX)] = { true, false, false, false, }, + [opc_op(OPC_DSY)] = { true, false, false, false, }, + [opc_op(OPC_GATHER4R)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4G)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4B)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4A)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP0)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP1)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP2)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP3)] = { true, false, true, true, }, + [opc_op(OPC_DSXPP_1)] = { true, false, false, false, }, + [opc_op(OPC_DSYPP_1)] = { true, false, false, false, }, + [opc_op(OPC_RGETPOS)] = { false, false, false, false, }, + [opc_op(OPC_RGETINFO)] = { false, false, false, false, }, + }; + instr_cat5_t *cat5 = &instr->cat5; + int i; + + if (cat5->is_3d) fprintf(ctx->out, ".3d"); + if (cat5->is_a) fprintf(ctx->out, ".a"); + if (cat5->is_o) fprintf(ctx->out, ".o"); + if (cat5->is_p) fprintf(ctx->out, ".p"); + if (cat5->is_s) fprintf(ctx->out, ".s"); + if (cat5->is_s2en) fprintf(ctx->out, ".s2en"); + + fprintf(ctx->out, " "); + + switch (_OPC(5, cat5->opc)) { + case OPC_DSXPP_1: + case OPC_DSYPP_1: + break; + default: + fprintf(ctx->out, "(%s)", type[cat5->type]); + break; + } + + fprintf(ctx->out, "("); + for (i = 0; i < 4; i++) + if (cat5->wrmask & (1 << i)) + fprintf(ctx->out, "%c", "xyzw"[i]); + fprintf(ctx->out, ")"); + + print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false); + + if (info[cat5->opc].src1) { + fprintf(ctx->out, ", "); + print_reg_src(ctx, (reg_t)(cat5->src1), cat5->full, false, false, false, + false, false, false); + } + + if (cat5->is_s2en) { + fprintf(ctx->out, ", "); + print_reg_src(ctx, (reg_t)(cat5->s2en.src2), cat5->full, false, false, false, + false, false, false); + fprintf(ctx->out, ", "); + print_reg_src(ctx, (reg_t)(cat5->s2en.src3), false, false, false, false, + false, false, false); + } else { + if (cat5->is_o || info[cat5->opc].src2) { + fprintf(ctx->out, ", "); + print_reg_src(ctx, (reg_t)(cat5->norm.src2), cat5->full, + false, false, false, false, false, false); + } + if (info[cat5->opc].samp) + fprintf(ctx->out, ", s#%d", cat5->norm.samp); + if (info[cat5->opc].tex) + fprintf(ctx->out, ", t#%d", cat5->norm.tex); + } + + if (debug & PRINT_VERBOSE) { + if (cat5->is_s2en) { + if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2)) + fprintf(ctx->out, "\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2); + } else { + if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2)) + fprintf(ctx->out, "\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2); + } + } +} + +static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat6_t *cat6 = &instr->cat6; + char sd = 0, ss = 0; /* dst/src address space */ + bool nodst = false; + struct reginfo dst, src1, src2; + int src1off = 0, dstoff = 0; + + memset(&dst, 0, sizeof(dst)); + memset(&src1, 0, sizeof(src1)); + memset(&src2, 0, sizeof(src2)); + + switch (_OPC(6, cat6->opc)) { + case OPC_RESINFO: + case OPC_RESFMT: + dst.full = type_size(cat6->type) == 32; + src1.full = type_size(cat6->type) == 32; + src2.full = type_size(cat6->type) == 32; + break; + case OPC_L2G: + case OPC_G2L: + dst.full = true; + src1.full = true; + src2.full = true; + break; + case OPC_STG: + case OPC_STL: + case OPC_STP: + case OPC_STI: + case OPC_STLW: + case OPC_STIB: + dst.full = true; + src1.full = type_size(cat6->type) == 32; + src2.full = type_size(cat6->type) == 32; + break; + default: + dst.full = type_size(cat6->type) == 32; + src1.full = true; + src2.full = true; + break; + } + + switch (_OPC(6, cat6->opc)) { + case OPC_PREFETCH: + break; + case OPC_RESINFO: + fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); + break; + case OPC_LDGB: + fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); + fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); + fprintf(ctx->out, ".%s", type[cat6->type]); + fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); + break; + case OPC_STGB: + case OPC_STIB: + fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped"); + fprintf(ctx->out, ".%dd", cat6->stgb.d + 1); + fprintf(ctx->out, ".%s", type[cat6->type]); + fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1); + break; + case OPC_ATOMIC_ADD: + case OPC_ATOMIC_SUB: + case OPC_ATOMIC_XCHG: + case OPC_ATOMIC_INC: + case OPC_ATOMIC_DEC: + case OPC_ATOMIC_CMPXCHG: + case OPC_ATOMIC_MIN: + case OPC_ATOMIC_MAX: + case OPC_ATOMIC_AND: + case OPC_ATOMIC_OR: + case OPC_ATOMIC_XOR: + ss = cat6->g ? 'g' : 'l'; + fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); + fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); + fprintf(ctx->out, ".%s", type[cat6->type]); + fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); + fprintf(ctx->out, ".%c", ss); + break; + default: + dst.im = cat6->g && !cat6->dst_off; + fprintf(ctx->out, ".%s", type[cat6->type]); + break; + } + fprintf(ctx->out, " "); + + switch (_OPC(6, cat6->opc)) { + case OPC_STG: + sd = 'g'; + break; + case OPC_STP: + sd = 'p'; + break; + case OPC_STL: + case OPC_STLW: + sd = 'l'; + break; + + case OPC_LDG: + case OPC_LDC: + ss = 'g'; + break; + case OPC_LDP: + ss = 'p'; + break; + case OPC_LDL: + case OPC_LDLW: + case OPC_LDLV: + ss = 'l'; + break; + + case OPC_L2G: + ss = 'l'; + sd = 'g'; + break; + + case OPC_G2L: + ss = 'g'; + sd = 'l'; + break; + + case OPC_PREFETCH: + ss = 'g'; + nodst = true; + break; + + case OPC_STI: + dst.full = false; // XXX or inverts?? + break; + } + + if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) { + struct reginfo src3; + + memset(&src3, 0, sizeof(src3)); + + src1.reg = (reg_t)(cat6->stgb.src1); + src2.reg = (reg_t)(cat6->stgb.src2); + src2.im = cat6->stgb.src2_im; + src3.reg = (reg_t)(cat6->stgb.src3); + src3.im = cat6->stgb.src3_im; + src3.full = true; + + fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo); + print_src(ctx, &src1); + fprintf(ctx->out, ", "); + print_src(ctx, &src2); + fprintf(ctx->out, ", "); + print_src(ctx, &src3); + + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3); + + return; + } + + if (is_atomic(_OPC(6, cat6->opc))) { + + src1.reg = (reg_t)(cat6->ldgb.src1); + src1.im = cat6->ldgb.src1_im; + src2.reg = (reg_t)(cat6->ldgb.src2); + src2.im = cat6->ldgb.src2_im; + dst.reg = (reg_t)(cat6->ldgb.dst); + + print_src(ctx, &dst); + fprintf(ctx->out, ", "); + if (ss == 'g') { + struct reginfo src3; + memset(&src3, 0, sizeof(src3)); + + src3.reg = (reg_t)(cat6->ldgb.src3); + src3.full = true; + + /* For images, the ".typed" variant is used and src2 is + * the ivecN coordinates, ie ivec2 for 2d. + * + * For SSBOs, the ".untyped" variant is used and src2 is + * a simple dword offset.. src3 appears to be + * uvec2(offset * 4, 0). Not sure the point of that. + */ + + fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); + print_src(ctx, &src1); /* value */ + fprintf(ctx->out, ", "); + print_src(ctx, &src2); /* offset/coords */ + fprintf(ctx->out, ", "); + print_src(ctx, &src3); /* 64b byte offset.. */ + + if (debug & PRINT_VERBOSE) { + fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, + cat6->ldgb.pad3, cat6->ldgb.mustbe0); + } + } else { /* ss == 'l' */ + fprintf(ctx->out, "l["); + print_src(ctx, &src1); /* simple byte offset */ + fprintf(ctx->out, "], "); + print_src(ctx, &src2); /* value */ + + if (debug & PRINT_VERBOSE) { + fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)", + cat6->ldgb.src3, cat6->ldgb.pad0, + cat6->ldgb.pad3, cat6->ldgb.mustbe0); + } + } + + return; + } else if (_OPC(6, cat6->opc) == OPC_RESINFO) { + dst.reg = (reg_t)(cat6->ldgb.dst); + + print_src(ctx, &dst); + fprintf(ctx->out, ", "); + fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo); + + return; + } else if (_OPC(6, cat6->opc) == OPC_LDGB) { + + src1.reg = (reg_t)(cat6->ldgb.src1); + src1.im = cat6->ldgb.src1_im; + src2.reg = (reg_t)(cat6->ldgb.src2); + src2.im = cat6->ldgb.src2_im; + dst.reg = (reg_t)(cat6->ldgb.dst); + + print_src(ctx, &dst); + fprintf(ctx->out, ", "); + fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); + print_src(ctx, &src1); + fprintf(ctx->out, ", "); + print_src(ctx, &src2); + + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0); + + return; + } + if (cat6->dst_off) { + dst.reg = (reg_t)(cat6->c.dst); + dstoff = cat6->c.off; + } else { + dst.reg = (reg_t)(cat6->d.dst); + } + + if (cat6->src_off) { + src1.reg = (reg_t)(cat6->a.src1); + src1.im = cat6->a.src1_im; + src2.reg = (reg_t)(cat6->a.src2); + src2.im = cat6->a.src2_im; + src1off = cat6->a.off; + } else { + src1.reg = (reg_t)(cat6->b.src1); + src1.im = cat6->b.src1_im; + src2.reg = (reg_t)(cat6->b.src2); + src2.im = cat6->b.src2_im; + } + + if (!nodst) { + if (sd) + fprintf(ctx->out, "%c[", sd); + /* note: dst might actually be a src (ie. address to store to) */ + print_src(ctx, &dst); + if (dstoff) + fprintf(ctx->out, "%+d", dstoff); + if (sd) + fprintf(ctx->out, "]"); + fprintf(ctx->out, ", "); + } + + if (ss) + fprintf(ctx->out, "%c[", ss); + + /* can have a larger than normal immed, so hack: */ + if (src1.im) { + fprintf(ctx->out, "%u", src1.reg.dummy13); + } else { + print_src(ctx, &src1); + } + + if (src1off) + fprintf(ctx->out, "%+d", src1off); + if (ss) + fprintf(ctx->out, "]"); + + switch (_OPC(6, cat6->opc)) { + case OPC_RESINFO: + case OPC_RESFMT: + break; + default: + fprintf(ctx->out, ", "); + print_src(ctx, &src2); + break; + } +} + +static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx; + struct reginfo src1, src2; + char ss = 0; + + memset(&src1, 0, sizeof(src1)); + memset(&src2, 0, sizeof(src2)); + + fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped"); + fprintf(ctx->out, ".%dd", cat6->d + 1); + fprintf(ctx->out, ".%s", type[cat6->type]); + fprintf(ctx->out, ".%u ", cat6->type_size + 1); + + /* NOTE: blob seems to use old encoding for ldl/stl (local memory) */ + ss = 'g'; + + fprintf(ctx->out, "%c[%u", ss, cat6->ssbo); + fprintf(ctx->out, "] + "); + src1.reg = (reg_t)(cat6->src1); + src1.full = true; // XXX + print_src(ctx, &src1); + fprintf(ctx->out, ", "); + + src2.reg = (reg_t)(cat6->src2); + src2.full = true; // XXX + print_src(ctx, &src2); + + if (debug & PRINT_VERBOSE) { + fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x)", cat6->pad1, + cat6->pad2, cat6->pad3, cat6->pad4); + } +} + +static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr) +{ + // TODO not sure if this is the best way to figure + // out if new vs old encoding, but it kinda seems + // to work: + if ((ctx->gpu_id >= 600) && (instr->cat6.opc == 0)) { + print_instr_cat6_a6xx(ctx, instr); + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, " NEW"); + } else { + print_instr_cat6_a3xx(ctx, instr); + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, " LEGACY"); + } +} +static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr) +{ + instr_cat7_t *cat7 = &instr->cat7; + + if (cat7->g) + fprintf(ctx->out, ".g"); + if (cat7->l) + fprintf(ctx->out, ".l"); + + if (_OPC(7, cat7->opc) == OPC_FENCE) { + if (cat7->r) + fprintf(ctx->out, ".r"); + if (cat7->w) + fprintf(ctx->out, ".w"); + } +} + +/* size of largest OPC field of all the instruction categories: */ +#define NOPC_BITS 6 + +static const struct opc_info { + uint16_t cat; + uint16_t opc; + const char *name; + void (*print)(struct disasm_ctx *ctx, instr_t *instr); +} opcs[1 << (3+NOPC_BITS)] = { +#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat } + /* category 0: */ + OPC(0, OPC_NOP, nop), + OPC(0, OPC_BR, br), + OPC(0, OPC_JUMP, jump), + OPC(0, OPC_CALL, call), + OPC(0, OPC_RET, ret), + OPC(0, OPC_KILL, kill), + OPC(0, OPC_END, end), + OPC(0, OPC_EMIT, emit), + OPC(0, OPC_CUT, cut), + OPC(0, OPC_CHMASK, chmask), + OPC(0, OPC_CHSH, chsh), + OPC(0, OPC_FLOW_REV, flow_rev), + + /* category 1: */ + OPC(1, OPC_MOV, ), + + /* category 2: */ + OPC(2, OPC_ADD_F, add.f), + OPC(2, OPC_MIN_F, min.f), + OPC(2, OPC_MAX_F, max.f), + OPC(2, OPC_MUL_F, mul.f), + OPC(2, OPC_SIGN_F, sign.f), + OPC(2, OPC_CMPS_F, cmps.f), + OPC(2, OPC_ABSNEG_F, absneg.f), + OPC(2, OPC_CMPV_F, cmpv.f), + OPC(2, OPC_FLOOR_F, floor.f), + OPC(2, OPC_CEIL_F, ceil.f), + OPC(2, OPC_RNDNE_F, rndne.f), + OPC(2, OPC_RNDAZ_F, rndaz.f), + OPC(2, OPC_TRUNC_F, trunc.f), + OPC(2, OPC_ADD_U, add.u), + OPC(2, OPC_ADD_S, add.s), + OPC(2, OPC_SUB_U, sub.u), + OPC(2, OPC_SUB_S, sub.s), + OPC(2, OPC_CMPS_U, cmps.u), + OPC(2, OPC_CMPS_S, cmps.s), + OPC(2, OPC_MIN_U, min.u), + OPC(2, OPC_MIN_S, min.s), + OPC(2, OPC_MAX_U, max.u), + OPC(2, OPC_MAX_S, max.s), + OPC(2, OPC_ABSNEG_S, absneg.s), + OPC(2, OPC_AND_B, and.b), + OPC(2, OPC_OR_B, or.b), + OPC(2, OPC_NOT_B, not.b), + OPC(2, OPC_XOR_B, xor.b), + OPC(2, OPC_CMPV_U, cmpv.u), + OPC(2, OPC_CMPV_S, cmpv.s), + OPC(2, OPC_MUL_U, mul.u), + OPC(2, OPC_MUL_S, mul.s), + OPC(2, OPC_MULL_U, mull.u), + OPC(2, OPC_BFREV_B, bfrev.b), + OPC(2, OPC_CLZ_S, clz.s), + OPC(2, OPC_CLZ_B, clz.b), + OPC(2, OPC_SHL_B, shl.b), + OPC(2, OPC_SHR_B, shr.b), + OPC(2, OPC_ASHR_B, ashr.b), + OPC(2, OPC_BARY_F, bary.f), + OPC(2, OPC_MGEN_B, mgen.b), + OPC(2, OPC_GETBIT_B, getbit.b), + OPC(2, OPC_SETRM, setrm), + OPC(2, OPC_CBITS_B, cbits.b), + OPC(2, OPC_SHB, shb), + OPC(2, OPC_MSAD, msad), + + /* category 3: */ + OPC(3, OPC_MAD_U16, mad.u16), + OPC(3, OPC_MADSH_U16, madsh.u16), + OPC(3, OPC_MAD_S16, mad.s16), + OPC(3, OPC_MADSH_M16, madsh.m16), + OPC(3, OPC_MAD_U24, mad.u24), + OPC(3, OPC_MAD_S24, mad.s24), + OPC(3, OPC_MAD_F16, mad.f16), + OPC(3, OPC_MAD_F32, mad.f32), + OPC(3, OPC_SEL_B16, sel.b16), + OPC(3, OPC_SEL_B32, sel.b32), + OPC(3, OPC_SEL_S16, sel.s16), + OPC(3, OPC_SEL_S32, sel.s32), + OPC(3, OPC_SEL_F16, sel.f16), + OPC(3, OPC_SEL_F32, sel.f32), + OPC(3, OPC_SAD_S16, sad.s16), + OPC(3, OPC_SAD_S32, sad.s32), + + /* category 4: */ + OPC(4, OPC_RCP, rcp), + OPC(4, OPC_RSQ, rsq), + OPC(4, OPC_LOG2, log2), + OPC(4, OPC_EXP2, exp2), + OPC(4, OPC_SIN, sin), + OPC(4, OPC_COS, cos), + OPC(4, OPC_SQRT, sqrt), + + /* category 5: */ + OPC(5, OPC_ISAM, isam), + OPC(5, OPC_ISAML, isaml), + OPC(5, OPC_ISAMM, isamm), + OPC(5, OPC_SAM, sam), + OPC(5, OPC_SAMB, samb), + OPC(5, OPC_SAML, saml), + OPC(5, OPC_SAMGQ, samgq), + OPC(5, OPC_GETLOD, getlod), + OPC(5, OPC_CONV, conv), + OPC(5, OPC_CONVM, convm), + OPC(5, OPC_GETSIZE, getsize), + OPC(5, OPC_GETBUF, getbuf), + OPC(5, OPC_GETPOS, getpos), + OPC(5, OPC_GETINFO, getinfo), + OPC(5, OPC_DSX, dsx), + OPC(5, OPC_DSY, dsy), + OPC(5, OPC_GATHER4R, gather4r), + OPC(5, OPC_GATHER4G, gather4g), + OPC(5, OPC_GATHER4B, gather4b), + OPC(5, OPC_GATHER4A, gather4a), + OPC(5, OPC_SAMGP0, samgp0), + OPC(5, OPC_SAMGP1, samgp1), + OPC(5, OPC_SAMGP2, samgp2), + OPC(5, OPC_SAMGP3, samgp3), + OPC(5, OPC_DSXPP_1, dsxpp.1), + OPC(5, OPC_DSYPP_1, dsypp.1), + OPC(5, OPC_RGETPOS, rgetpos), + OPC(5, OPC_RGETINFO, rgetinfo), + + + /* category 6: */ + OPC(6, OPC_LDG, ldg), + OPC(6, OPC_LDL, ldl), + OPC(6, OPC_LDP, ldp), + OPC(6, OPC_STG, stg), + OPC(6, OPC_STL, stl), + OPC(6, OPC_STP, stp), + OPC(6, OPC_STI, sti), + OPC(6, OPC_G2L, g2l), + OPC(6, OPC_L2G, l2g), + OPC(6, OPC_PREFETCH, prefetch), + OPC(6, OPC_LDLW, ldlw), + OPC(6, OPC_STLW, stlw), + OPC(6, OPC_RESFMT, resfmt), + OPC(6, OPC_RESINFO, resinfo), + OPC(6, OPC_ATOMIC_ADD, atomic.add), + OPC(6, OPC_ATOMIC_SUB, atomic.sub), + OPC(6, OPC_ATOMIC_XCHG, atomic.xchg), + OPC(6, OPC_ATOMIC_INC, atomic.inc), + OPC(6, OPC_ATOMIC_DEC, atomic.dec), + OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg), + OPC(6, OPC_ATOMIC_MIN, atomic.min), + OPC(6, OPC_ATOMIC_MAX, atomic.max), + OPC(6, OPC_ATOMIC_AND, atomic.and), + OPC(6, OPC_ATOMIC_OR, atomic.or), + OPC(6, OPC_ATOMIC_XOR, atomic.xor), + OPC(6, OPC_LDGB, ldgb), + OPC(6, OPC_STGB, stgb), + OPC(6, OPC_STIB, stib), + OPC(6, OPC_LDC, ldc), + OPC(6, OPC_LDLV, ldlv), + + OPC(7, OPC_BAR, bar), + OPC(7, OPC_FENCE, fence), + +#undef OPC +}; + +#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)])) + +// XXX hack.. probably should move this table somewhere common: +#include "ir3.h" +const char *ir3_instr_name(struct ir3_instruction *instr) +{ + if (opc_cat(instr->opc) == -1) return "??meta??"; + return opcs[instr->opc].name; +} + +static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) +{ + instr_t *instr = (instr_t *)dwords; + uint32_t opc = instr_opc(instr, ctx->gpu_id); + const char *name; + + if (debug & PRINT_VERBOSE) + fprintf(ctx->out, "%s%04d[%08xx_%08xx] ", levels[ctx->level], n, dwords[1], dwords[0]); + + /* NOTE: order flags are printed is a bit fugly.. but for now I + * try to match the order in llvm-a3xx disassembler for easy + * diff'ing.. + */ + + ctx->repeat = instr_repeat(instr); + + if (instr->sync) + fprintf(ctx->out, "(sy)"); + if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) + fprintf(ctx->out, "(ss)"); + if (instr->jmp_tgt) + fprintf(ctx->out, "(jp)"); + if (instr_sat(instr)) + fprintf(ctx->out, "(sat)"); + if (ctx->repeat) { + fprintf(ctx->out, "(rpt%d)", ctx->repeat); + } else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r)) { + unsigned nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r; + fprintf(ctx->out, "(nop%d)", nop); + } else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) { + unsigned nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r; + fprintf(ctx->out, "(nop%d)", nop); + } + if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) + fprintf(ctx->out, "(ul)"); + + name = GETINFO(instr)->name; + + if (name) { + fprintf(ctx->out, "%s", name); + GETINFO(instr)->print(ctx, instr); + } else { + fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc); + } + + fprintf(ctx->out, "\n"); + + return (instr->opc_cat == 0) && (opc == OPC_END); +} + +int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id) +{ + struct disasm_ctx ctx; + int i; + + assert((sizedwords % 2) == 0); + + memset(&ctx, 0, sizeof(ctx)); + ctx.out = out; + ctx.level = level; + ctx.gpu_id = gpu_id; + + for (i = 0; i < sizedwords; i += 2) + print_instr(&ctx, &dwords[i], i/2); + + return 0; +} diff -Nru mesa-18.3.3/src/freedreno/ir3/instr-a3xx.h mesa-19.0.1/src/freedreno/ir3/instr-a3xx.h --- mesa-18.3.3/src/freedreno/ir3/instr-a3xx.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/instr-a3xx.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,917 @@ +/* + * Copyright (c) 2013 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INSTR_A3XX_H_ +#define INSTR_A3XX_H_ + +#define PACKED __attribute__((__packed__)) + +#include +#include +#include +#include + +/* size of largest OPC field of all the instruction categories: */ +#define NOPC_BITS 6 + +#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) + +typedef enum { + /* category 0: */ + OPC_NOP = _OPC(0, 0), + OPC_BR = _OPC(0, 1), + OPC_JUMP = _OPC(0, 2), + OPC_CALL = _OPC(0, 3), + OPC_RET = _OPC(0, 4), + OPC_KILL = _OPC(0, 5), + OPC_END = _OPC(0, 6), + OPC_EMIT = _OPC(0, 7), + OPC_CUT = _OPC(0, 8), + OPC_CHMASK = _OPC(0, 9), + OPC_CHSH = _OPC(0, 10), + OPC_FLOW_REV = _OPC(0, 11), + + /* category 1: */ + OPC_MOV = _OPC(1, 0), + + /* category 2: */ + OPC_ADD_F = _OPC(2, 0), + OPC_MIN_F = _OPC(2, 1), + OPC_MAX_F = _OPC(2, 2), + OPC_MUL_F = _OPC(2, 3), + OPC_SIGN_F = _OPC(2, 4), + OPC_CMPS_F = _OPC(2, 5), + OPC_ABSNEG_F = _OPC(2, 6), + OPC_CMPV_F = _OPC(2, 7), + /* 8 - invalid */ + OPC_FLOOR_F = _OPC(2, 9), + OPC_CEIL_F = _OPC(2, 10), + OPC_RNDNE_F = _OPC(2, 11), + OPC_RNDAZ_F = _OPC(2, 12), + OPC_TRUNC_F = _OPC(2, 13), + /* 14-15 - invalid */ + OPC_ADD_U = _OPC(2, 16), + OPC_ADD_S = _OPC(2, 17), + OPC_SUB_U = _OPC(2, 18), + OPC_SUB_S = _OPC(2, 19), + OPC_CMPS_U = _OPC(2, 20), + OPC_CMPS_S = _OPC(2, 21), + OPC_MIN_U = _OPC(2, 22), + OPC_MIN_S = _OPC(2, 23), + OPC_MAX_U = _OPC(2, 24), + OPC_MAX_S = _OPC(2, 25), + OPC_ABSNEG_S = _OPC(2, 26), + /* 27 - invalid */ + OPC_AND_B = _OPC(2, 28), + OPC_OR_B = _OPC(2, 29), + OPC_NOT_B = _OPC(2, 30), + OPC_XOR_B = _OPC(2, 31), + /* 32 - invalid */ + OPC_CMPV_U = _OPC(2, 33), + OPC_CMPV_S = _OPC(2, 34), + /* 35-47 - invalid */ + OPC_MUL_U = _OPC(2, 48), + OPC_MUL_S = _OPC(2, 49), + OPC_MULL_U = _OPC(2, 50), + OPC_BFREV_B = _OPC(2, 51), + OPC_CLZ_S = _OPC(2, 52), + OPC_CLZ_B = _OPC(2, 53), + OPC_SHL_B = _OPC(2, 54), + OPC_SHR_B = _OPC(2, 55), + OPC_ASHR_B = _OPC(2, 56), + OPC_BARY_F = _OPC(2, 57), + OPC_MGEN_B = _OPC(2, 58), + OPC_GETBIT_B = _OPC(2, 59), + OPC_SETRM = _OPC(2, 60), + OPC_CBITS_B = _OPC(2, 61), + OPC_SHB = _OPC(2, 62), + OPC_MSAD = _OPC(2, 63), + + /* category 3: */ + OPC_MAD_U16 = _OPC(3, 0), + OPC_MADSH_U16 = _OPC(3, 1), + OPC_MAD_S16 = _OPC(3, 2), + OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ + OPC_MAD_U24 = _OPC(3, 4), + OPC_MAD_S24 = _OPC(3, 5), + OPC_MAD_F16 = _OPC(3, 6), + OPC_MAD_F32 = _OPC(3, 7), + OPC_SEL_B16 = _OPC(3, 8), + OPC_SEL_B32 = _OPC(3, 9), + OPC_SEL_S16 = _OPC(3, 10), + OPC_SEL_S32 = _OPC(3, 11), + OPC_SEL_F16 = _OPC(3, 12), + OPC_SEL_F32 = _OPC(3, 13), + OPC_SAD_S16 = _OPC(3, 14), + OPC_SAD_S32 = _OPC(3, 15), + + /* category 4: */ + OPC_RCP = _OPC(4, 0), + OPC_RSQ = _OPC(4, 1), + OPC_LOG2 = _OPC(4, 2), + OPC_EXP2 = _OPC(4, 3), + OPC_SIN = _OPC(4, 4), + OPC_COS = _OPC(4, 5), + OPC_SQRT = _OPC(4, 6), + // 7-63 - invalid + + /* category 5: */ + OPC_ISAM = _OPC(5, 0), + OPC_ISAML = _OPC(5, 1), + OPC_ISAMM = _OPC(5, 2), + OPC_SAM = _OPC(5, 3), + OPC_SAMB = _OPC(5, 4), + OPC_SAML = _OPC(5, 5), + OPC_SAMGQ = _OPC(5, 6), + OPC_GETLOD = _OPC(5, 7), + OPC_CONV = _OPC(5, 8), + OPC_CONVM = _OPC(5, 9), + OPC_GETSIZE = _OPC(5, 10), + OPC_GETBUF = _OPC(5, 11), + OPC_GETPOS = _OPC(5, 12), + OPC_GETINFO = _OPC(5, 13), + OPC_DSX = _OPC(5, 14), + OPC_DSY = _OPC(5, 15), + OPC_GATHER4R = _OPC(5, 16), + OPC_GATHER4G = _OPC(5, 17), + OPC_GATHER4B = _OPC(5, 18), + OPC_GATHER4A = _OPC(5, 19), + OPC_SAMGP0 = _OPC(5, 20), + OPC_SAMGP1 = _OPC(5, 21), + OPC_SAMGP2 = _OPC(5, 22), + OPC_SAMGP3 = _OPC(5, 23), + OPC_DSXPP_1 = _OPC(5, 24), + OPC_DSYPP_1 = _OPC(5, 25), + OPC_RGETPOS = _OPC(5, 26), + OPC_RGETINFO = _OPC(5, 27), + + /* category 6: */ + OPC_LDG = _OPC(6, 0), /* load-global */ + OPC_LDL = _OPC(6, 1), + OPC_LDP = _OPC(6, 2), + OPC_STG = _OPC(6, 3), /* store-global */ + OPC_STL = _OPC(6, 4), + OPC_STP = _OPC(6, 5), + OPC_STI = _OPC(6, 6), + OPC_G2L = _OPC(6, 7), + OPC_L2G = _OPC(6, 8), + OPC_PREFETCH = _OPC(6, 9), + OPC_LDLW = _OPC(6, 10), + OPC_STLW = _OPC(6, 11), + OPC_RESFMT = _OPC(6, 14), + OPC_RESINFO = _OPC(6, 15), + OPC_ATOMIC_ADD = _OPC(6, 16), + OPC_ATOMIC_SUB = _OPC(6, 17), + OPC_ATOMIC_XCHG = _OPC(6, 18), + OPC_ATOMIC_INC = _OPC(6, 19), + OPC_ATOMIC_DEC = _OPC(6, 20), + OPC_ATOMIC_CMPXCHG = _OPC(6, 21), + OPC_ATOMIC_MIN = _OPC(6, 22), + OPC_ATOMIC_MAX = _OPC(6, 23), + OPC_ATOMIC_AND = _OPC(6, 24), + OPC_ATOMIC_OR = _OPC(6, 25), + OPC_ATOMIC_XOR = _OPC(6, 26), + OPC_LDGB = _OPC(6, 27), + OPC_STGB = _OPC(6, 28), + OPC_STIB = _OPC(6, 29), + OPC_LDC = _OPC(6, 30), + OPC_LDLV = _OPC(6, 31), + + /* category 7: */ + OPC_BAR = _OPC(7, 0), + OPC_FENCE = _OPC(7, 1), + + /* meta instructions (category -1): */ + /* placeholder instr to mark shader inputs: */ + OPC_META_INPUT = _OPC(-1, 0), + /* The "fan-in" and "fan-out" instructions are used for keeping + * track of instructions that write to multiple dst registers + * (fan-out) like texture sample instructions, or read multiple + * consecutive scalar registers (fan-in) (bary.f, texture samp) + */ + OPC_META_FO = _OPC(-1, 2), + OPC_META_FI = _OPC(-1, 3), + +} opc_t; + +#define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) +#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) + +typedef enum { + TYPE_F16 = 0, + TYPE_F32 = 1, + TYPE_U16 = 2, + TYPE_U32 = 3, + TYPE_S16 = 4, + TYPE_S32 = 5, + TYPE_U8 = 6, + TYPE_S8 = 7, // XXX I assume? +} type_t; + +static inline uint32_t type_size(type_t type) +{ + switch (type) { + case TYPE_F32: + case TYPE_U32: + case TYPE_S32: + return 32; + case TYPE_F16: + case TYPE_U16: + case TYPE_S16: + return 16; + case TYPE_U8: + case TYPE_S8: + return 8; + default: + assert(0); /* invalid type */ + return 0; + } +} + +static inline int type_float(type_t type) +{ + return (type == TYPE_F32) || (type == TYPE_F16); +} + +static inline int type_uint(type_t type) +{ + return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); +} + +static inline int type_sint(type_t type) +{ + return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); +} + +typedef union PACKED { + /* normal gpr or const src register: */ + struct PACKED { + uint32_t comp : 2; + uint32_t num : 10; + }; + /* for immediate val: */ + int32_t iim_val : 11; + /* to make compiler happy: */ + uint32_t dummy32; + uint32_t dummy10 : 10; + int32_t idummy10 : 10; + uint32_t dummy11 : 11; + uint32_t dummy12 : 12; + uint32_t dummy13 : 13; + uint32_t dummy8 : 8; +} reg_t; + +/* special registers: */ +#define REG_A0 61 /* address register */ +#define REG_P0 62 /* predicate register */ + +static inline int reg_special(reg_t reg) +{ + return (reg.num == REG_A0) || (reg.num == REG_P0); +} + +typedef struct PACKED { + /* dword0: */ + union PACKED { + struct PACKED { + int16_t immed : 16; + uint32_t dummy1 : 16; + } a3xx; + struct PACKED { + int32_t immed : 20; + uint32_t dummy1 : 12; + } a4xx; + struct PACKED { + int32_t immed : 32; + } a5xx; + }; + + /* dword1: */ + uint32_t dummy2 : 8; + uint32_t repeat : 3; + uint32_t dummy3 : 1; + uint32_t ss : 1; + uint32_t dummy4 : 7; + uint32_t inv : 1; + uint32_t comp : 2; + uint32_t opc : 4; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat0_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + /* for normal src register: */ + struct PACKED { + uint32_t src : 11; + /* at least low bit of pad must be zero or it will + * look like a address relative src + */ + uint32_t pad : 21; + }; + /* for address relative: */ + struct PACKED { + int32_t off : 10; + uint32_t src_rel_c : 1; + uint32_t src_rel : 1; + uint32_t unknown : 20; + }; + /* for immediate: */ + int32_t iim_val; + uint32_t uim_val; + float fim_val; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 3; + uint32_t src_r : 1; + uint32_t ss : 1; + uint32_t ul : 1; + uint32_t dst_type : 3; + uint32_t dst_rel : 1; + uint32_t src_type : 3; + uint32_t src_c : 1; + uint32_t src_im : 1; + uint32_t even : 1; + uint32_t pos_inf : 1; + uint32_t must_be_0 : 2; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat1_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + struct PACKED { + uint32_t src1 : 11; + uint32_t must_be_zero1: 2; + uint32_t src1_im : 1; /* immediate */ + uint32_t src1_neg : 1; /* negate */ + uint32_t src1_abs : 1; /* absolute value */ + }; + struct PACKED { + uint32_t src1 : 10; + uint32_t src1_c : 1; /* relative-const */ + uint32_t src1_rel : 1; /* relative address */ + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel1; + struct PACKED { + uint32_t src1 : 12; + uint32_t src1_c : 1; /* const */ + uint32_t dummy : 3; + } c1; + }; + + union PACKED { + struct PACKED { + uint32_t src2 : 11; + uint32_t must_be_zero2: 2; + uint32_t src2_im : 1; /* immediate */ + uint32_t src2_neg : 1; /* negate */ + uint32_t src2_abs : 1; /* absolute value */ + }; + struct PACKED { + uint32_t src2 : 10; + uint32_t src2_c : 1; /* relative-const */ + uint32_t src2_rel : 1; /* relative address */ + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel2; + struct PACKED { + uint32_t src2 : 12; + uint32_t src2_c : 1; /* const */ + uint32_t dummy : 3; + } c2; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 2; + uint32_t sat : 1; + uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ + uint32_t ss : 1; + uint32_t ul : 1; /* dunno */ + uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ + uint32_t ei : 1; + uint32_t cond : 3; + uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ + uint32_t full : 1; /* not half */ + uint32_t opc : 6; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat2_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + struct PACKED { + uint32_t src1 : 11; + uint32_t must_be_zero1: 2; + uint32_t src2_c : 1; + uint32_t src1_neg : 1; + uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ + }; + struct PACKED { + uint32_t src1 : 10; + uint32_t src1_c : 1; + uint32_t src1_rel : 1; + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel1; + struct PACKED { + uint32_t src1 : 12; + uint32_t src1_c : 1; + uint32_t dummy : 3; + } c1; + }; + + union PACKED { + struct PACKED { + uint32_t src3 : 11; + uint32_t must_be_zero2: 2; + uint32_t src3_r : 1; + uint32_t src2_neg : 1; + uint32_t src3_neg : 1; + }; + struct PACKED { + uint32_t src3 : 10; + uint32_t src3_c : 1; + uint32_t src3_rel : 1; + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel2; + struct PACKED { + uint32_t src3 : 12; + uint32_t src3_c : 1; + uint32_t dummy : 3; + } c2; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 2; + uint32_t sat : 1; + uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ + uint32_t ss : 1; + uint32_t ul : 1; + uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ + uint32_t src2 : 8; + uint32_t opc : 4; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat3_t; + +static inline bool instr_cat3_full(instr_cat3_t *cat3) +{ + switch (_OPC(3, cat3->opc)) { + case OPC_MAD_F16: + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + case OPC_SAD_S32: // really?? + return false; + default: + return true; + } +} + +typedef struct PACKED { + /* dword0: */ + union PACKED { + struct PACKED { + uint32_t src : 11; + uint32_t must_be_zero1: 2; + uint32_t src_im : 1; /* immediate */ + uint32_t src_neg : 1; /* negate */ + uint32_t src_abs : 1; /* absolute value */ + }; + struct PACKED { + uint32_t src : 10; + uint32_t src_c : 1; /* relative-const */ + uint32_t src_rel : 1; /* relative address */ + uint32_t must_be_zero : 1; + uint32_t dummy : 3; + } rel; + struct PACKED { + uint32_t src : 12; + uint32_t src_c : 1; /* const */ + uint32_t dummy : 3; + } c; + }; + uint32_t dummy1 : 16; /* seem to be ignored */ + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 2; + uint32_t sat : 1; + uint32_t src_r : 1; + uint32_t ss : 1; + uint32_t ul : 1; + uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ + uint32_t dummy2 : 5; /* seem to be ignored */ + uint32_t full : 1; /* not half */ + uint32_t opc : 6; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat4_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + /* normal case: */ + struct PACKED { + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t src2 : 8; + uint32_t dummy1 : 4; /* seem to be ignored */ + uint32_t samp : 4; + uint32_t tex : 7; + } norm; + /* s2en case: */ + struct PACKED { + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t src2 : 11; + uint32_t dummy1 : 1; + uint32_t src3 : 8; + uint32_t dummy2 : 3; + } s2en; + /* same in either case: */ + // XXX I think, confirm this + struct PACKED { + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t pad : 23; + }; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t wrmask : 4; /* write-mask */ + uint32_t type : 3; + uint32_t dummy2 : 1; /* seems to be ignored */ + uint32_t is_3d : 1; + + uint32_t is_a : 1; + uint32_t is_s : 1; + uint32_t is_s2en : 1; + uint32_t is_o : 1; + uint32_t is_p : 1; + + uint32_t opc : 5; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat5_t; + +/* dword0 encoding for src_off: [src1 + off], src2: */ +typedef struct PACKED { + /* dword0: */ + uint32_t mustbe1 : 1; + int32_t off : 13; + uint32_t src1 : 8; + uint32_t src1_im : 1; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t dword1; +} instr_cat6a_t; + +/* dword0 encoding for !src_off: [src1], src2 */ +typedef struct PACKED { + /* dword0: */ + uint32_t mustbe0 : 1; + uint32_t src1 : 13; + uint32_t ignore0 : 8; + uint32_t src1_im : 1; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t dword1; +} instr_cat6b_t; + +/* dword1 encoding for dst_off: */ +typedef struct PACKED { + /* dword0: */ + uint32_t dword0; + + /* note: there is some weird stuff going on where sometimes + * cat6->a.off is involved.. but that seems like a bug in + * the blob, since it is used even if !cat6->src_off + * It would make sense for there to be some more bits to + * bring us to 11 bits worth of offset, but not sure.. + */ + int32_t off : 8; + uint32_t mustbe1 : 1; + uint32_t dst : 8; + uint32_t pad1 : 15; +} instr_cat6c_t; + +/* dword1 encoding for !dst_off: */ +typedef struct PACKED { + /* dword0: */ + uint32_t dword0; + + uint32_t dst : 8; + uint32_t mustbe0 : 1; + uint32_t idx : 8; + uint32_t pad0 : 15; +} instr_cat6d_t; + +/* ldgb and atomics.. + * + * ldgb: pad0=0, pad3=1 + * atomic .g: pad0=1, pad3=1 + * .l: pad0=1, pad3=0 + */ +typedef struct PACKED { + /* dword0: */ + uint32_t pad0 : 1; + uint32_t src3 : 8; + uint32_t d : 2; + uint32_t typed : 1; + uint32_t type_size : 2; + uint32_t src1 : 8; + uint32_t src1_im : 1; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t dst : 8; + uint32_t mustbe0 : 1; + uint32_t src_ssbo : 8; + uint32_t pad2 : 3; // type + uint32_t g : 1; + uint32_t pad3 : 1; + uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat +} instr_cat6ldgb_t; + +/* stgb, pad0=0, pad3=2 + */ +typedef struct PACKED { + /* dword0: */ + uint32_t mustbe1 : 1; // ??? + uint32_t src1 : 8; + uint32_t d : 2; + uint32_t typed : 1; + uint32_t type_size : 2; + uint32_t pad0 : 9; + uint32_t src2_im : 1; + uint32_t src2 : 8; + + /* dword1: */ + uint32_t src3 : 8; + uint32_t src3_im : 1; + uint32_t dst_ssbo : 8; + uint32_t pad2 : 3; // type + uint32_t pad3 : 2; + uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat +} instr_cat6stgb_t; + +typedef union PACKED { + instr_cat6a_t a; + instr_cat6b_t b; + instr_cat6c_t c; + instr_cat6d_t d; + instr_cat6ldgb_t ldgb; + instr_cat6stgb_t stgb; + struct PACKED { + /* dword0: */ + uint32_t src_off : 1; + uint32_t pad1 : 31; + + /* dword1: */ + uint32_t pad2 : 8; + uint32_t dst_off : 1; + uint32_t pad3 : 8; + uint32_t type : 3; + uint32_t g : 1; /* or in some cases it means dst immed */ + uint32_t pad4 : 1; + uint32_t opc : 5; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; + }; +} instr_cat6_t; + +/** + * For atomic ops (which return a value): + * + * pad1=1, pad2=c, pad3=0, pad4=3 + * src1 - vecN offset/coords + * src2.x - is actually dest register + * src2.y - is 'data' except for cmpxchg where src2.y is 'compare' + * and src2.z is 'data' + * + * For stib (which does not return a value): + * pad1=0, pad2=c, pad3=0, pad4=2 + * src1 - vecN offset/coords + * src2 - value to store + * + * for ldc (load from UBO using descriptor): + * pad1=0, pad2=8, pad3=0, pad4=2 + */ +typedef struct PACKED { + /* dword0: */ + uint32_t pad1 : 9; + uint32_t d : 2; + uint32_t typed : 1; + uint32_t type_size : 2; + uint32_t opc : 5; + uint32_t pad2 : 5; + uint32_t src1 : 8; /* coordinate/offset */ + + /* dword1: */ + uint32_t src2 : 8; + uint32_t pad3 : 1; //mustbe0 ?? or zero means imm vs reg for ssbo?? + uint32_t ssbo : 8; /* ssbo/image binding point */ + uint32_t type : 3; + uint32_t pad4 : 7; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat6_a6xx_t; + +typedef struct PACKED { + /* dword0: */ + uint32_t pad1 : 32; + + /* dword1: */ + uint32_t pad2 : 12; + uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */ + uint32_t pad3 : 6; + uint32_t w : 1; /* write */ + uint32_t r : 1; /* read */ + uint32_t l : 1; /* local */ + uint32_t g : 1; /* global */ + uint32_t opc : 4; /* presumed, but only a couple known OPCs */ + uint32_t jmp_tgt : 1; /* (jp) */ + uint32_t sync : 1; /* (sy) */ + uint32_t opc_cat : 3; +} instr_cat7_t; + +typedef union PACKED { + instr_cat0_t cat0; + instr_cat1_t cat1; + instr_cat2_t cat2; + instr_cat3_t cat3; + instr_cat4_t cat4; + instr_cat5_t cat5; + instr_cat6_t cat6; + instr_cat6_a6xx_t cat6_a6xx; + instr_cat7_t cat7; + struct PACKED { + /* dword0: */ + uint32_t pad1 : 32; + + /* dword1: */ + uint32_t pad2 : 12; + uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */ + uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ + uint32_t pad3 : 13; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; + + }; +} instr_t; + +static inline uint32_t instr_repeat(instr_t *instr) +{ + switch (instr->opc_cat) { + case 0: return instr->cat0.repeat; + case 1: return instr->cat1.repeat; + case 2: return instr->cat2.repeat; + case 3: return instr->cat3.repeat; + case 4: return instr->cat4.repeat; + default: return 0; + } +} + +static inline bool instr_sat(instr_t *instr) +{ + switch (instr->opc_cat) { + case 2: return instr->cat2.sat; + case 3: return instr->cat3.sat; + case 4: return instr->cat4.sat; + default: return false; + } +} + +static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id) +{ + switch (instr->opc_cat) { + case 0: return instr->cat0.opc; + case 1: return 0; + case 2: return instr->cat2.opc; + case 3: return instr->cat3.opc; + case 4: return instr->cat4.opc; + case 5: return instr->cat5.opc; + case 6: + // TODO not sure if this is the best way to figure + // out if new vs old encoding, but it kinda seems + // to work: + if ((gpu_id >= 600) && (instr->cat6.opc == 0)) + return instr->cat6_a6xx.opc; + return instr->cat6.opc; + case 7: return instr->cat7.opc; + default: return 0; + } +} + +static inline bool is_mad(opc_t opc) +{ + switch (opc) { + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_MAD_U24: + case OPC_MAD_S24: + case OPC_MAD_F16: + case OPC_MAD_F32: + return true; + default: + return false; + } +} + +static inline bool is_madsh(opc_t opc) +{ + switch (opc) { + case OPC_MADSH_U16: + case OPC_MADSH_M16: + return true; + default: + return false; + } +} + +static inline bool is_atomic(opc_t opc) +{ + switch (opc) { + case OPC_ATOMIC_ADD: + case OPC_ATOMIC_SUB: + case OPC_ATOMIC_XCHG: + case OPC_ATOMIC_INC: + case OPC_ATOMIC_DEC: + case OPC_ATOMIC_CMPXCHG: + case OPC_ATOMIC_MIN: + case OPC_ATOMIC_MAX: + case OPC_ATOMIC_AND: + case OPC_ATOMIC_OR: + case OPC_ATOMIC_XOR: + return true; + default: + return false; + } +} + +static inline bool is_ssbo(opc_t opc) +{ + switch (opc) { + case OPC_RESFMT: + case OPC_RESINFO: + case OPC_LDGB: + case OPC_STGB: + case OPC_STIB: + return true; + default: + return false; + } +} + +int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); + +#endif /* INSTR_A3XX_H_ */ diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3.c mesa-19.0.1/src/freedreno/ir3/ir3.c --- mesa-18.3.3/src/freedreno/ir3/ir3.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,941 @@ +/* + * Copyright (c) 2012 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ir3.h" + +#include +#include +#include +#include +#include +#include + +#include "util/bitscan.h" +#include "util/ralloc.h" +#include "util/u_math.h" + +#include "instr-a3xx.h" + +/* simple allocator to carve allocations out of an up-front allocated heap, + * so that we can free everything easily in one shot. + */ +void * ir3_alloc(struct ir3 *shader, int sz) +{ + return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */ +} + +struct ir3 * ir3_create(struct ir3_compiler *compiler, + unsigned nin, unsigned nout) +{ + struct ir3 *shader = rzalloc(compiler, struct ir3); + + shader->compiler = compiler; + shader->ninputs = nin; + shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin); + + shader->noutputs = nout; + shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout); + + list_inithead(&shader->block_list); + list_inithead(&shader->array_list); + + return shader; +} + +void ir3_destroy(struct ir3 *shader) +{ + ralloc_free(shader); +} + +#define iassert(cond) do { \ + if (!(cond)) { \ + debug_assert(cond); \ + return -1; \ + } } while (0) + +#define iassert_type(reg, full) do { \ + if ((full)) { \ + iassert(!((reg)->flags & IR3_REG_HALF)); \ + } else { \ + iassert((reg)->flags & IR3_REG_HALF); \ + } } while (0); + +static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, + uint32_t repeat, uint32_t valid_flags) +{ + reg_t val = { .dummy32 = 0 }; + + if (reg->flags & ~valid_flags) { + debug_printf("INVALID FLAGS: %x vs %x\n", + reg->flags, valid_flags); + } + + if (!(reg->flags & IR3_REG_R)) + repeat = 0; + + if (reg->flags & IR3_REG_IMMED) { + val.iim_val = reg->iim_val; + } else { + unsigned components; + int16_t max; + + if (reg->flags & IR3_REG_RELATIV) { + components = reg->size; + val.idummy10 = reg->array.offset; + max = (reg->array.offset + repeat + components - 1) >> 2; + } else { + components = util_last_bit(reg->wrmask); + val.comp = reg->num & 0x3; + val.num = reg->num >> 2; + max = (reg->num + repeat + components - 1) >> 2; + } + + if (reg->flags & IR3_REG_CONST) { + info->max_const = MAX2(info->max_const, max); + } else if (val.num == 63) { + /* ignore writes to dummy register r63.x */ + } else if (max < 48) { + if (reg->flags & IR3_REG_HALF) { + if (info->gpu_id >= 600) { + /* starting w/ a6xx, half regs conflict with full regs: */ + info->max_reg = MAX2(info->max_reg, (max+1)/2); + } else { + info->max_half_reg = MAX2(info->max_half_reg, max); + } + } else { + info->max_reg = MAX2(info->max_reg, max); + } + } + } + + return val.dummy32; +} + +static int emit_cat0(struct ir3_instruction *instr, void *ptr, + struct ir3_info *info) +{ + instr_cat0_t *cat0 = ptr; + + if (info->gpu_id >= 500) { + cat0->a5xx.immed = instr->cat0.immed; + } else if (info->gpu_id >= 400) { + cat0->a4xx.immed = instr->cat0.immed; + } else { + cat0->a3xx.immed = instr->cat0.immed; + } + cat0->repeat = instr->repeat; + cat0->ss = !!(instr->flags & IR3_INSTR_SS); + cat0->inv = instr->cat0.inv; + cat0->comp = instr->cat0.comp; + cat0->opc = instr->opc; + cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat0->sync = !!(instr->flags & IR3_INSTR_SY); + cat0->opc_cat = 0; + + return 0; +} + +static int emit_cat1(struct ir3_instruction *instr, void *ptr, + struct ir3_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src = instr->regs[1]; + instr_cat1_t *cat1 = ptr; + + iassert(instr->regs_count == 2); + iassert_type(dst, type_size(instr->cat1.dst_type) == 32); + if (!(src->flags & IR3_REG_IMMED)) + iassert_type(src, type_size(instr->cat1.src_type) == 32); + + if (src->flags & IR3_REG_IMMED) { + cat1->iim_val = src->iim_val; + cat1->src_im = 1; + } else if (src->flags & IR3_REG_RELATIV) { + cat1->off = reg(src, info, instr->repeat, + IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV); + cat1->src_rel = 1; + cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); + } else { + cat1->src = reg(src, info, instr->repeat, + IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF); + cat1->src_c = !!(src->flags & IR3_REG_CONST); + } + + cat1->dst = reg(dst, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_EVEN | + IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF); + cat1->repeat = instr->repeat; + cat1->src_r = !!(src->flags & IR3_REG_R); + cat1->ss = !!(instr->flags & IR3_INSTR_SS); + cat1->ul = !!(instr->flags & IR3_INSTR_UL); + cat1->dst_type = instr->cat1.dst_type; + cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); + cat1->src_type = instr->cat1.src_type; + cat1->even = !!(dst->flags & IR3_REG_EVEN); + cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); + cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat1->sync = !!(instr->flags & IR3_INSTR_SY); + cat1->opc_cat = 1; + + return 0; +} + +static int emit_cat2(struct ir3_instruction *instr, void *ptr, + struct ir3_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src1 = instr->regs[1]; + struct ir3_register *src2 = instr->regs[2]; + instr_cat2_t *cat2 = ptr; + unsigned absneg = ir3_cat2_absneg(instr->opc); + + iassert((instr->regs_count == 2) || (instr->regs_count == 3)); + + if (src1->flags & IR3_REG_RELATIV) { + iassert(src1->array.offset < (1 << 10)); + cat2->rel1.src1 = reg(src1, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | + IR3_REG_HALF | absneg); + cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); + cat2->rel1.src1_rel = 1; + } else if (src1->flags & IR3_REG_CONST) { + iassert(src1->num < (1 << 12)); + cat2->c1.src1 = reg(src1, info, instr->repeat, + IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); + cat2->c1.src1_c = 1; + } else { + iassert(src1->num < (1 << 11)); + cat2->src1 = reg(src1, info, instr->repeat, + IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | + absneg); + } + cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); + cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); + cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS)); + cat2->src1_r = !!(src1->flags & IR3_REG_R); + + if (src2) { + iassert((src2->flags & IR3_REG_IMMED) || + !((src1->flags ^ src2->flags) & IR3_REG_HALF)); + + if (src2->flags & IR3_REG_RELATIV) { + iassert(src2->array.offset < (1 << 10)); + cat2->rel2.src2 = reg(src2, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | + IR3_REG_HALF | absneg); + cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST); + cat2->rel2.src2_rel = 1; + } else if (src2->flags & IR3_REG_CONST) { + iassert(src2->num < (1 << 12)); + cat2->c2.src2 = reg(src2, info, instr->repeat, + IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); + cat2->c2.src2_c = 1; + } else { + iassert(src2->num < (1 << 11)); + cat2->src2 = reg(src2, info, instr->repeat, + IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | + absneg); + } + + cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); + cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); + cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS)); + cat2->src2_r = !!(src2->flags & IR3_REG_R); + } + + cat2->dst = reg(dst, info, instr->repeat, + IR3_REG_R | IR3_REG_EI | IR3_REG_HALF); + cat2->repeat = instr->repeat; + cat2->sat = !!(instr->flags & IR3_INSTR_SAT); + cat2->ss = !!(instr->flags & IR3_INSTR_SS); + cat2->ul = !!(instr->flags & IR3_INSTR_UL); + cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF); + cat2->ei = !!(dst->flags & IR3_REG_EI); + cat2->cond = instr->cat2.condition; + cat2->full = ! (src1->flags & IR3_REG_HALF); + cat2->opc = instr->opc; + cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat2->sync = !!(instr->flags & IR3_INSTR_SY); + cat2->opc_cat = 2; + + return 0; +} + +static int emit_cat3(struct ir3_instruction *instr, void *ptr, + struct ir3_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src1 = instr->regs[1]; + struct ir3_register *src2 = instr->regs[2]; + struct ir3_register *src3 = instr->regs[3]; + unsigned absneg = ir3_cat3_absneg(instr->opc); + instr_cat3_t *cat3 = ptr; + uint32_t src_flags = 0; + + switch (instr->opc) { + case OPC_MAD_F16: + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + case OPC_SAD_S32: // really?? + src_flags |= IR3_REG_HALF; + break; + default: + break; + } + + iassert(instr->regs_count == 4); + iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF)); + iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); + iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); + + if (src1->flags & IR3_REG_RELATIV) { + iassert(src1->array.offset < (1 << 10)); + cat3->rel1.src1 = reg(src1, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | + IR3_REG_HALF | absneg); + cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); + cat3->rel1.src1_rel = 1; + } else if (src1->flags & IR3_REG_CONST) { + iassert(src1->num < (1 << 12)); + cat3->c1.src1 = reg(src1, info, instr->repeat, + IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); + cat3->c1.src1_c = 1; + } else { + iassert(src1->num < (1 << 11)); + cat3->src1 = reg(src1, info, instr->repeat, + IR3_REG_R | IR3_REG_HALF | absneg); + } + + cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); + cat3->src1_r = !!(src1->flags & IR3_REG_R); + + cat3->src2 = reg(src2, info, instr->repeat, + IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg); + cat3->src2_c = !!(src2->flags & IR3_REG_CONST); + cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); + cat3->src2_r = !!(src2->flags & IR3_REG_R); + + + if (src3->flags & IR3_REG_RELATIV) { + iassert(src3->array.offset < (1 << 10)); + cat3->rel2.src3 = reg(src3, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | + IR3_REG_HALF | absneg); + cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST); + cat3->rel2.src3_rel = 1; + } else if (src3->flags & IR3_REG_CONST) { + iassert(src3->num < (1 << 12)); + cat3->c2.src3 = reg(src3, info, instr->repeat, + IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); + cat3->c2.src3_c = 1; + } else { + iassert(src3->num < (1 << 11)); + cat3->src3 = reg(src3, info, instr->repeat, + IR3_REG_R | IR3_REG_HALF | absneg); + } + + cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); + cat3->src3_r = !!(src3->flags & IR3_REG_R); + + cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat3->repeat = instr->repeat; + cat3->sat = !!(instr->flags & IR3_INSTR_SAT); + cat3->ss = !!(instr->flags & IR3_INSTR_SS); + cat3->ul = !!(instr->flags & IR3_INSTR_UL); + cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF); + cat3->opc = instr->opc; + cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat3->sync = !!(instr->flags & IR3_INSTR_SY); + cat3->opc_cat = 3; + + return 0; +} + +static int emit_cat4(struct ir3_instruction *instr, void *ptr, + struct ir3_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src = instr->regs[1]; + instr_cat4_t *cat4 = ptr; + + iassert(instr->regs_count == 2); + + if (src->flags & IR3_REG_RELATIV) { + iassert(src->array.offset < (1 << 10)); + cat4->rel.src = reg(src, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG | + IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF); + cat4->rel.src_c = !!(src->flags & IR3_REG_CONST); + cat4->rel.src_rel = 1; + } else if (src->flags & IR3_REG_CONST) { + iassert(src->num < (1 << 12)); + cat4->c.src = reg(src, info, instr->repeat, + IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS | + IR3_REG_R | IR3_REG_HALF); + cat4->c.src_c = 1; + } else { + iassert(src->num < (1 << 11)); + cat4->src = reg(src, info, instr->repeat, + IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS | + IR3_REG_R | IR3_REG_HALF); + } + + cat4->src_im = !!(src->flags & IR3_REG_IMMED); + cat4->src_neg = !!(src->flags & IR3_REG_FNEG); + cat4->src_abs = !!(src->flags & IR3_REG_FABS); + cat4->src_r = !!(src->flags & IR3_REG_R); + + cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat4->repeat = instr->repeat; + cat4->sat = !!(instr->flags & IR3_INSTR_SAT); + cat4->ss = !!(instr->flags & IR3_INSTR_SS); + cat4->ul = !!(instr->flags & IR3_INSTR_UL); + cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF); + cat4->full = ! (src->flags & IR3_REG_HALF); + cat4->opc = instr->opc; + cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat4->sync = !!(instr->flags & IR3_INSTR_SY); + cat4->opc_cat = 4; + + return 0; +} + +static int emit_cat5(struct ir3_instruction *instr, void *ptr, + struct ir3_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src1 = instr->regs[1]; + struct ir3_register *src2 = instr->regs[2]; + struct ir3_register *src3 = instr->regs[3]; + instr_cat5_t *cat5 = ptr; + + iassert_type(dst, type_size(instr->cat5.type) == 32) + + assume(src1 || !src2); + assume(src2 || !src3); + + if (src1) { + cat5->full = ! (src1->flags & IR3_REG_HALF); + cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); + } + + if (instr->flags & IR3_INSTR_S2EN) { + if (src2) { + iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); + cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); + } + if (src3) { + iassert(src3->flags & IR3_REG_HALF); + cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF); + } + iassert(!(instr->cat5.samp | instr->cat5.tex)); + } else { + iassert(!src3); + if (src2) { + iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); + cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); + } + cat5->norm.samp = instr->cat5.samp; + cat5->norm.tex = instr->cat5.tex; + } + + cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat5->wrmask = dst->wrmask; + cat5->type = instr->cat5.type; + cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); + cat5->is_a = !!(instr->flags & IR3_INSTR_A); + cat5->is_s = !!(instr->flags & IR3_INSTR_S); + cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); + cat5->is_o = !!(instr->flags & IR3_INSTR_O); + cat5->is_p = !!(instr->flags & IR3_INSTR_P); + cat5->opc = instr->opc; + cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat5->sync = !!(instr->flags & IR3_INSTR_SY); + cat5->opc_cat = 5; + + return 0; +} + +static int emit_cat6(struct ir3_instruction *instr, void *ptr, + struct ir3_info *info) +{ + struct ir3_register *dst, *src1, *src2; + instr_cat6_t *cat6 = ptr; + bool type_full = type_size(instr->cat6.type) == 32; + + cat6->type = instr->cat6.type; + cat6->opc = instr->opc; + cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat6->sync = !!(instr->flags & IR3_INSTR_SY); + cat6->g = !!(instr->flags & IR3_INSTR_G); + cat6->opc_cat = 6; + + switch (instr->opc) { + case OPC_RESINFO: + case OPC_RESFMT: + iassert_type(instr->regs[0], type_full); /* dst */ + iassert_type(instr->regs[1], type_full); /* src1 */ + break; + case OPC_L2G: + case OPC_G2L: + iassert_type(instr->regs[0], true); /* dst */ + iassert_type(instr->regs[1], true); /* src1 */ + break; + case OPC_STG: + case OPC_STL: + case OPC_STP: + case OPC_STI: + case OPC_STLW: + case OPC_STIB: + /* no dst, so regs[0] is dummy */ + iassert_type(instr->regs[1], true); /* dst */ + iassert_type(instr->regs[2], type_full); /* src1 */ + iassert_type(instr->regs[3], true); /* src2 */ + break; + default: + iassert_type(instr->regs[0], type_full); /* dst */ + iassert_type(instr->regs[1], true); /* src1 */ + if (instr->regs_count > 2) + iassert_type(instr->regs[2], true); /* src1 */ + break; + } + + /* the "dst" for a store instruction is (from the perspective + * of data flow in the shader, ie. register use/def, etc) in + * fact a register that is read by the instruction, rather + * than written: + */ + if (is_store(instr)) { + iassert(instr->regs_count >= 3); + + dst = instr->regs[1]; + src1 = instr->regs[2]; + src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL; + } else { + iassert(instr->regs_count >= 2); + + dst = instr->regs[0]; + src1 = instr->regs[1]; + src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL; + } + + /* TODO we need a more comprehensive list about which instructions + * can be encoded which way. Or possibly use IR3_INSTR_0 flag to + * indicate to use the src_off encoding even if offset is zero + * (but then what to do about dst_off?) + */ + if (is_atomic(instr->opc)) { + instr_cat6ldgb_t *ldgb = ptr; + + /* maybe these two bits both determine the instruction encoding? */ + cat6->src_off = false; + + ldgb->d = instr->cat6.d - 1; + ldgb->typed = instr->cat6.typed; + ldgb->type_size = instr->cat6.iim_val - 1; + + ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + + if (ldgb->g) { + struct ir3_register *src3 = instr->regs[3]; + struct ir3_register *src4 = instr->regs[4]; + + /* first src is src_ssbo: */ + iassert(src1->flags & IR3_REG_IMMED); + ldgb->src_ssbo = src1->uim_val; + + ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); + ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); + ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); + + ldgb->src3 = reg(src4, info, instr->repeat, 0); + ldgb->pad0 = 0x1; + ldgb->pad3 = 0x1; + } else { + ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); + ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED); + ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED); + ldgb->pad0 = 0x1; + ldgb->pad3 = 0x0; + } + + return 0; + } else if (instr->opc == OPC_LDGB) { + struct ir3_register *src3 = instr->regs[3]; + instr_cat6ldgb_t *ldgb = ptr; + + /* maybe these two bits both determine the instruction encoding? */ + cat6->src_off = false; + + ldgb->d = instr->cat6.d - 1; + ldgb->typed = instr->cat6.typed; + ldgb->type_size = instr->cat6.iim_val - 1; + + ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + + /* first src is src_ssbo: */ + iassert(src1->flags & IR3_REG_IMMED); + ldgb->src_ssbo = src1->uim_val; + + /* then next two are src1/src2: */ + ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); + ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); + ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); + + ldgb->pad0 = 0x0; + ldgb->pad3 = 0x1; + + return 0; + } else if (instr->opc == OPC_RESINFO) { + instr_cat6ldgb_t *ldgb = ptr; + + ldgb->d = instr->cat6.d - 1; + + ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + + /* first src is src_ssbo: */ + iassert(src1->flags & IR3_REG_IMMED); + ldgb->src_ssbo = src1->uim_val; + + return 0; + } else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) { + struct ir3_register *src3 = instr->regs[4]; + instr_cat6stgb_t *stgb = ptr; + + /* maybe these two bits both determine the instruction encoding? */ + cat6->src_off = true; + stgb->pad3 = 0x2; + + stgb->d = instr->cat6.d - 1; + stgb->typed = instr->cat6.typed; + stgb->type_size = instr->cat6.iim_val - 1; + + /* first src is dst_ssbo: */ + iassert(dst->flags & IR3_REG_IMMED); + stgb->dst_ssbo = dst->uim_val; + + /* then src1/src2/src3: */ + stgb->src1 = reg(src1, info, instr->repeat, 0); + stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + stgb->src2_im = !!(src2->flags & IR3_REG_IMMED); + stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED); + stgb->src3_im = !!(src3->flags & IR3_REG_IMMED); + + return 0; + } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) || + (instr->opc == OPC_LDL)) { + instr_cat6a_t *cat6a = ptr; + + cat6->src_off = true; + + cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); + cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED); + if (src2) { + cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED); + } + cat6a->off = instr->cat6.src_offset; + } else { + instr_cat6b_t *cat6b = ptr; + + cat6->src_off = false; + + cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF); + cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED); + if (src2) { + cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); + cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED); + } + } + + if (instr->cat6.dst_offset || (instr->opc == OPC_STG) || + (instr->opc == OPC_STL)) { + instr_cat6c_t *cat6c = ptr; + cat6->dst_off = true; + cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat6c->off = instr->cat6.dst_offset; + } else { + instr_cat6d_t *cat6d = ptr; + cat6->dst_off = false; + cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + } + + return 0; +} + +static int emit_cat7(struct ir3_instruction *instr, void *ptr, + struct ir3_info *info) +{ + instr_cat7_t *cat7 = ptr; + + cat7->ss = !!(instr->flags & IR3_INSTR_SS); + cat7->w = instr->cat7.w; + cat7->r = instr->cat7.r; + cat7->l = instr->cat7.l; + cat7->g = instr->cat7.g; + cat7->opc = instr->opc; + cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat7->sync = !!(instr->flags & IR3_INSTR_SY); + cat7->opc_cat = 7; + + return 0; +} + +static int (*emit[])(struct ir3_instruction *instr, void *ptr, + struct ir3_info *info) = { + emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, + emit_cat7, +}; + +void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, + uint32_t gpu_id) +{ + uint32_t *ptr, *dwords; + + info->gpu_id = gpu_id; + info->max_reg = -1; + info->max_half_reg = -1; + info->max_const = -1; + info->instrs_count = 0; + info->sizedwords = 0; + info->ss = info->sy = 0; + + list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + info->sizedwords += 2; + } + } + + /* need an integer number of instruction "groups" (sets of 16 + * instructions on a4xx or sets of 4 instructions on a3xx), + * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits) + */ + if (gpu_id >= 400) { + info->sizedwords = align(info->sizedwords, 16 * 2); + } else { + info->sizedwords = align(info->sizedwords, 4 * 2); + } + + ptr = dwords = calloc(4, info->sizedwords); + + list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + int ret = emit[opc_cat(instr->opc)](instr, dwords, info); + if (ret) + goto fail; + info->instrs_count += 1 + instr->repeat; + dwords += 2; + + if (instr->flags & IR3_INSTR_SS) + info->ss++; + + if (instr->flags & IR3_INSTR_SY) + info->sy++; + } + } + + return ptr; + +fail: + free(ptr); + return NULL; +} + +static struct ir3_register * reg_create(struct ir3 *shader, + int num, int flags) +{ + struct ir3_register *reg = + ir3_alloc(shader, sizeof(struct ir3_register)); + reg->wrmask = 1; + reg->flags = flags; + reg->num = num; + return reg; +} + +static void insert_instr(struct ir3_block *block, + struct ir3_instruction *instr) +{ + struct ir3 *shader = block->shader; +#ifdef DEBUG + instr->serialno = ++shader->instr_count; +#endif + list_addtail(&instr->node, &block->instr_list); + + if (is_input(instr)) + array_insert(shader, shader->baryfs, instr); +} + +struct ir3_block * ir3_block_create(struct ir3 *shader) +{ + struct ir3_block *block = ir3_alloc(shader, sizeof(*block)); +#ifdef DEBUG + block->serialno = ++shader->block_count; +#endif + block->shader = shader; + list_inithead(&block->node); + list_inithead(&block->instr_list); + return block; +} + +static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg) +{ + struct ir3_instruction *instr; + unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0])); + char *ptr = ir3_alloc(block->shader, sz); + + instr = (struct ir3_instruction *)ptr; + ptr += sizeof(*instr); + instr->regs = (struct ir3_register **)ptr; + +#ifdef DEBUG + instr->regs_max = nreg; +#endif + + return instr; +} + +struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, + opc_t opc, int nreg) +{ + struct ir3_instruction *instr = instr_create(block, nreg); + instr->block = block; + instr->opc = opc; + insert_instr(block, instr); + return instr; +} + +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc) +{ + /* NOTE: we could be slightly more clever, at least for non-meta, + * and choose # of regs based on category. + */ + return ir3_instr_create2(block, opc, 4); +} + +struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) +{ + struct ir3_instruction *new_instr = instr_create(instr->block, + instr->regs_count); + struct ir3_register **regs; + unsigned i; + + regs = new_instr->regs; + *new_instr = *instr; + new_instr->regs = regs; + + insert_instr(instr->block, new_instr); + + /* clone registers: */ + new_instr->regs_count = 0; + for (i = 0; i < instr->regs_count; i++) { + struct ir3_register *reg = instr->regs[i]; + struct ir3_register *new_reg = + ir3_reg_create(new_instr, reg->num, reg->flags); + *new_reg = *reg; + } + + return new_instr; +} + +/* Add a false dependency to instruction, to ensure it is scheduled first: */ +void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep) +{ + array_insert(instr, instr->deps, dep); +} + +struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, + int num, int flags) +{ + struct ir3 *shader = instr->block->shader; + struct ir3_register *reg = reg_create(shader, num, flags); +#ifdef DEBUG + debug_assert(instr->regs_count < instr->regs_max); +#endif + instr->regs[instr->regs_count++] = reg; + return reg; +} + +struct ir3_register * ir3_reg_clone(struct ir3 *shader, + struct ir3_register *reg) +{ + struct ir3_register *new_reg = reg_create(shader, 0, 0); + *new_reg = *reg; + return new_reg; +} + +void +ir3_instr_set_address(struct ir3_instruction *instr, + struct ir3_instruction *addr) +{ + if (instr->address != addr) { + struct ir3 *ir = instr->block->shader; + instr->address = addr; + array_insert(ir, ir->indirects, instr); + } +} + +void +ir3_block_clear_mark(struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) + instr->flags &= ~IR3_INSTR_MARK; +} + +void +ir3_clear_mark(struct ir3 *ir) +{ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + ir3_block_clear_mark(block); + } +} + +/* note: this will destroy instr->depth, don't do it until after sched! */ +unsigned +ir3_count_instructions(struct ir3 *ir) +{ + unsigned cnt = 0; + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + instr->ip = cnt++; + } + block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip; + block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip; + } + return cnt; +} + +struct ir3_array * +ir3_lookup_array(struct ir3 *ir, unsigned id) +{ + list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) + if (arr->id == id) + return arr; + return NULL; +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_compiler.c mesa-19.0.1/src/freedreno/ir3/ir3_compiler.c --- mesa-18.3.3/src/freedreno/ir3/ir3_compiler.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_compiler.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2015 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/ralloc.h" + +#include "ir3_compiler.h" + +static const struct debug_named_value shader_debug_options[] = { + {"vs", IR3_DBG_SHADER_VS, "Print shader disasm for vertex shaders"}, + {"fs", IR3_DBG_SHADER_FS, "Print shader disasm for fragment shaders"}, + {"cs", IR3_DBG_SHADER_CS, "Print shader disasm for compute shaders"}, + {"disasm", IR3_DBG_DISASM, "Dump NIR and adreno shader disassembly"}, + {"optmsgs", IR3_DBG_OPTMSGS,"Enable optimizer debug messages"}, + DEBUG_NAMED_VALUE_END +}; + +DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG", shader_debug_options, 0) + +enum ir3_shader_debug ir3_shader_debug = 0; + +struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id) +{ + struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler); + + ir3_shader_debug = debug_get_option_ir3_shader_debug(); + + compiler->dev = dev; + compiler->gpu_id = gpu_id; + compiler->set = ir3_ra_alloc_reg_set(compiler); + + if (compiler->gpu_id >= 400) { + /* need special handling for "flat" */ + compiler->flat_bypass = true; + compiler->levels_add_one = false; + compiler->unminify_coords = false; + compiler->txf_ms_with_isaml = false; + compiler->array_index_add_half = true; + } else { + /* no special handling for "flat" */ + compiler->flat_bypass = false; + compiler->levels_add_one = true; + compiler->unminify_coords = true; + compiler->txf_ms_with_isaml = true; + compiler->array_index_add_half = false; + } + + return compiler; +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_compiler.h mesa-19.0.1/src/freedreno/ir3/ir3_compiler.h --- mesa-18.3.3/src/freedreno/ir3/ir3_compiler.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_compiler.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2013 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef IR3_COMPILER_H_ +#define IR3_COMPILER_H_ + +#include "ir3_shader.h" + +struct ir3_ra_reg_set; + +struct ir3_compiler { + struct fd_device *dev; + uint32_t gpu_id; + struct ir3_ra_reg_set *set; + uint32_t shader_count; + + /* + * Configuration options for things that are handled differently on + * different generations: + */ + + /* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate + * so we need to use ldlv.u32 to load the varying directly: + */ + bool flat_bypass; + + /* on a3xx, we need to add one to # of array levels: + */ + bool levels_add_one; + + /* on a3xx, we need to scale up integer coords for isaml based + * on LoD: + */ + bool unminify_coords; + + /* on a3xx do txf_ms w/ isaml and scaled coords: */ + bool txf_ms_with_isaml; + + /* on a4xx, for array textures we need to add 0.5 to the array + * index coordinate: + */ + bool array_index_add_half; +}; + +struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id); + +int ir3_compile_shader_nir(struct ir3_compiler *compiler, + struct ir3_shader_variant *so); + +enum ir3_shader_debug { + IR3_DBG_SHADER_VS = 0x01, + IR3_DBG_SHADER_FS = 0x02, + IR3_DBG_SHADER_CS = 0x04, + IR3_DBG_DISASM = 0x08, + IR3_DBG_OPTMSGS = 0x10, +}; + +extern enum ir3_shader_debug ir3_shader_debug; + +static inline bool +shader_debug_enabled(gl_shader_stage type) +{ + switch (type) { + case MESA_SHADER_VERTEX: return !!(ir3_shader_debug & IR3_DBG_SHADER_VS); + case MESA_SHADER_FRAGMENT: return !!(ir3_shader_debug & IR3_DBG_SHADER_FS); + case MESA_SHADER_COMPUTE: return !!(ir3_shader_debug & IR3_DBG_SHADER_CS); + default: + debug_assert(0); + return false; + } +} + +#endif /* IR3_COMPILER_H_ */ diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_compiler_nir.c mesa-19.0.1/src/freedreno/ir3/ir3_compiler_nir.c --- mesa-18.3.3/src/freedreno/ir3/ir3_compiler_nir.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_compiler_nir.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,3217 @@ +/* + * Copyright (C) 2015 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include + +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "ir3_compiler.h" +#include "ir3_shader.h" +#include "ir3_nir.h" + +#include "instr-a3xx.h" +#include "ir3.h" +#include "ir3_context.h" + + +static struct ir3_instruction * +create_indirect_load(struct ir3_context *ctx, unsigned arrsz, int n, + struct ir3_instruction *address, struct ir3_instruction *collect) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *mov; + struct ir3_register *src; + + mov = ir3_instr_create(block, OPC_MOV); + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + ir3_reg_create(mov, 0, 0); + src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV); + src->instr = collect; + src->size = arrsz; + src->array.offset = n; + + ir3_instr_set_address(mov, address); + + return mov; +} + +static struct ir3_instruction * +create_input_compmask(struct ir3_context *ctx, unsigned n, unsigned compmask) +{ + struct ir3_instruction *in; + + in = ir3_instr_create(ctx->in_block, OPC_META_INPUT); + in->inout.block = ctx->in_block; + ir3_reg_create(in, n, 0); + + in->regs[0]->wrmask = compmask; + + return in; +} + +static struct ir3_instruction * +create_input(struct ir3_context *ctx, unsigned n) +{ + return create_input_compmask(ctx, n, 0x1); +} + +static struct ir3_instruction * +create_frag_input(struct ir3_context *ctx, bool use_ldlv) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *instr; + /* actual inloc is assigned and fixed up later: */ + struct ir3_instruction *inloc = create_immed(block, 0); + + if (use_ldlv) { + instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0); + instr->cat6.type = TYPE_U32; + instr->cat6.iim_val = 1; + } else { + instr = ir3_BARY_F(block, inloc, 0, ctx->frag_vcoord, 0); + instr->regs[2]->wrmask = 0x3; + } + + return instr; +} + +static struct ir3_instruction * +create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) +{ + /* first four vec4 sysval's reserved for UBOs: */ + /* NOTE: dp is in scalar, but there can be >4 dp components: */ + unsigned n = ctx->so->constbase.driver_param; + unsigned r = regid(n + dp / 4, dp % 4); + return create_uniform(ctx->block, r); +} + +/* + * Adreno uses uint rather than having dedicated bool type, + * which (potentially) requires some conversion, in particular + * when using output of an bool instr to int input, or visa + * versa. + * + * | Adreno | NIR | + * -------+---------+-------+- + * true | 1 | ~0 | + * false | 0 | 0 | + * + * To convert from an adreno bool (uint) to nir, use: + * + * absneg.s dst, (neg)src + * + * To convert back in the other direction: + * + * absneg.s dst, (abs)arc + * + * The CP step can clean up the absneg.s that cancel each other + * out, and with a slight bit of extra cleverness (to recognize + * the instructions which produce either a 0 or 1) can eliminate + * the absneg.s's completely when an instruction that wants + * 0/1 consumes the result. For example, when a nir 'bcsel' + * consumes the result of 'feq'. So we should be able to get by + * without a boolean resolve step, and without incuring any + * extra penalty in instruction count. + */ + +/* NIR bool -> native (adreno): */ +static struct ir3_instruction * +ir3_b2n(struct ir3_block *block, struct ir3_instruction *instr) +{ + return ir3_ABSNEG_S(block, instr, IR3_REG_SABS); +} + +/* native (adreno) -> NIR bool: */ +static struct ir3_instruction * +ir3_n2b(struct ir3_block *block, struct ir3_instruction *instr) +{ + return ir3_ABSNEG_S(block, instr, IR3_REG_SNEG); +} + +/* + * alu/sfu instructions: + */ + +static struct ir3_instruction * +create_cov(struct ir3_context *ctx, struct ir3_instruction *src, + unsigned src_bitsize, nir_op op) +{ + type_t src_type, dst_type; + + switch (op) { + case nir_op_f2f32: + case nir_op_f2f16_rtne: + case nir_op_f2f16_rtz: + case nir_op_f2f16: + case nir_op_f2i32: + case nir_op_f2i16: + case nir_op_f2i8: + case nir_op_f2u32: + case nir_op_f2u16: + case nir_op_f2u8: + switch (src_bitsize) { + case 32: + src_type = TYPE_F32; + break; + case 16: + src_type = TYPE_F16; + break; + default: + ir3_context_error(ctx, "invalid src bit size: %u", src_bitsize); + } + break; + + case nir_op_i2f32: + case nir_op_i2f16: + case nir_op_i2i32: + case nir_op_i2i16: + case nir_op_i2i8: + switch (src_bitsize) { + case 32: + src_type = TYPE_S32; + break; + case 16: + src_type = TYPE_S16; + break; + case 8: + src_type = TYPE_S8; + break; + default: + ir3_context_error(ctx, "invalid src bit size: %u", src_bitsize); + } + break; + + case nir_op_u2f32: + case nir_op_u2f16: + case nir_op_u2u32: + case nir_op_u2u16: + case nir_op_u2u8: + switch (src_bitsize) { + case 32: + src_type = TYPE_U32; + break; + case 16: + src_type = TYPE_U16; + break; + case 8: + src_type = TYPE_U8; + break; + default: + ir3_context_error(ctx, "invalid src bit size: %u", src_bitsize); + } + break; + + default: + ir3_context_error(ctx, "invalid conversion op: %u", op); + } + + switch (op) { + case nir_op_f2f32: + case nir_op_i2f32: + case nir_op_u2f32: + dst_type = TYPE_F32; + break; + + case nir_op_f2f16_rtne: + case nir_op_f2f16_rtz: + case nir_op_f2f16: + /* TODO how to handle rounding mode? */ + case nir_op_i2f16: + case nir_op_u2f16: + dst_type = TYPE_F16; + break; + + case nir_op_f2i32: + case nir_op_i2i32: + dst_type = TYPE_S32; + break; + + case nir_op_f2i16: + case nir_op_i2i16: + dst_type = TYPE_S16; + break; + + case nir_op_f2i8: + case nir_op_i2i8: + dst_type = TYPE_S8; + break; + + case nir_op_f2u32: + case nir_op_u2u32: + dst_type = TYPE_U32; + break; + + case nir_op_f2u16: + case nir_op_u2u16: + dst_type = TYPE_U16; + break; + + case nir_op_f2u8: + case nir_op_u2u8: + dst_type = TYPE_U8; + break; + + default: + ir3_context_error(ctx, "invalid conversion op: %u", op); + } + + return ir3_COV(ctx->block, src, src_type, dst_type); +} + +static void +emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + struct ir3_instruction **dst, *src[info->num_inputs]; + unsigned bs[info->num_inputs]; /* bit size */ + struct ir3_block *b = ctx->block; + unsigned dst_sz, wrmask; + + if (alu->dest.dest.is_ssa) { + dst_sz = alu->dest.dest.ssa.num_components; + wrmask = (1 << dst_sz) - 1; + } else { + dst_sz = alu->dest.dest.reg.reg->num_components; + wrmask = alu->dest.write_mask; + } + + dst = ir3_get_dst(ctx, &alu->dest.dest, dst_sz); + + /* Vectors are special in that they have non-scalarized writemasks, + * and just take the first swizzle channel for each argument in + * order into each writemask channel. + */ + if ((alu->op == nir_op_vec2) || + (alu->op == nir_op_vec3) || + (alu->op == nir_op_vec4)) { + + for (int i = 0; i < info->num_inputs; i++) { + nir_alu_src *asrc = &alu->src[i]; + + compile_assert(ctx, !asrc->abs); + compile_assert(ctx, !asrc->negate); + + src[i] = ir3_get_src(ctx, &asrc->src)[asrc->swizzle[0]]; + if (!src[i]) + src[i] = create_immed(ctx->block, 0); + dst[i] = ir3_MOV(b, src[i], TYPE_U32); + } + + put_dst(ctx, &alu->dest.dest); + return; + } + + /* We also get mov's with more than one component for mov's so + * handle those specially: + */ + if ((alu->op == nir_op_imov) || (alu->op == nir_op_fmov)) { + type_t type = (alu->op == nir_op_imov) ? TYPE_U32 : TYPE_F32; + nir_alu_src *asrc = &alu->src[0]; + struct ir3_instruction *const *src0 = ir3_get_src(ctx, &asrc->src); + + for (unsigned i = 0; i < dst_sz; i++) { + if (wrmask & (1 << i)) { + dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], type); + } else { + dst[i] = NULL; + } + } + + put_dst(ctx, &alu->dest.dest); + return; + } + + /* General case: We can just grab the one used channel per src. */ + for (int i = 0; i < info->num_inputs; i++) { + unsigned chan = ffs(alu->dest.write_mask) - 1; + nir_alu_src *asrc = &alu->src[i]; + + compile_assert(ctx, !asrc->abs); + compile_assert(ctx, !asrc->negate); + + src[i] = ir3_get_src(ctx, &asrc->src)[asrc->swizzle[chan]]; + bs[i] = nir_src_bit_size(asrc->src); + + compile_assert(ctx, src[i]); + } + + switch (alu->op) { + case nir_op_f2f32: + case nir_op_f2f16_rtne: + case nir_op_f2f16_rtz: + case nir_op_f2f16: + case nir_op_f2i32: + case nir_op_f2i16: + case nir_op_f2i8: + case nir_op_f2u32: + case nir_op_f2u16: + case nir_op_f2u8: + case nir_op_i2f32: + case nir_op_i2f16: + case nir_op_i2i32: + case nir_op_i2i16: + case nir_op_i2i8: + case nir_op_u2f32: + case nir_op_u2f16: + case nir_op_u2u32: + case nir_op_u2u16: + case nir_op_u2u8: + dst[0] = create_cov(ctx, src[0], bs[0], alu->op); + break; + case nir_op_f2b32: + dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0); + dst[0]->cat2.condition = IR3_COND_NE; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_b2f16: + case nir_op_b2f32: + dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32); + break; + case nir_op_b2i8: + case nir_op_b2i16: + case nir_op_b2i32: + dst[0] = ir3_b2n(b, src[0]); + break; + case nir_op_i2b32: + dst[0] = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0); + dst[0]->cat2.condition = IR3_COND_NE; + dst[0] = ir3_n2b(b, dst[0]); + break; + + case nir_op_fneg: + dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FNEG); + break; + case nir_op_fabs: + dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FABS); + break; + case nir_op_fmax: + dst[0] = ir3_MAX_F(b, src[0], 0, src[1], 0); + break; + case nir_op_fmin: + dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0); + break; + case nir_op_fsat: + /* if there is just a single use of the src, and it supports + * (sat) bit, we can just fold the (sat) flag back to the + * src instruction and create a mov. This is easier for cp + * to eliminate. + * + * TODO probably opc_cat==4 is ok too + */ + if (alu->src[0].src.is_ssa && + (list_length(&alu->src[0].src.ssa->uses) == 1) && + ((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) { + src[0]->flags |= IR3_INSTR_SAT; + dst[0] = ir3_MOV(b, src[0], TYPE_U32); + } else { + /* otherwise generate a max.f that saturates.. blob does + * similar (generating a cat2 mov using max.f) + */ + dst[0] = ir3_MAX_F(b, src[0], 0, src[0], 0); + dst[0]->flags |= IR3_INSTR_SAT; + } + break; + case nir_op_fmul: + dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0); + break; + case nir_op_fadd: + dst[0] = ir3_ADD_F(b, src[0], 0, src[1], 0); + break; + case nir_op_fsub: + dst[0] = ir3_ADD_F(b, src[0], 0, src[1], IR3_REG_FNEG); + break; + case nir_op_ffma: + dst[0] = ir3_MAD_F32(b, src[0], 0, src[1], 0, src[2], 0); + break; + case nir_op_fddx: + dst[0] = ir3_DSX(b, src[0], 0); + dst[0]->cat5.type = TYPE_F32; + break; + case nir_op_fddy: + dst[0] = ir3_DSY(b, src[0], 0); + dst[0]->cat5.type = TYPE_F32; + break; + break; + case nir_op_flt32: + dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_LT; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_fge32: + dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_GE; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_feq32: + dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_EQ; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_fne32: + dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_NE; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_fceil: + dst[0] = ir3_CEIL_F(b, src[0], 0); + break; + case nir_op_ffloor: + dst[0] = ir3_FLOOR_F(b, src[0], 0); + break; + case nir_op_ftrunc: + dst[0] = ir3_TRUNC_F(b, src[0], 0); + break; + case nir_op_fround_even: + dst[0] = ir3_RNDNE_F(b, src[0], 0); + break; + case nir_op_fsign: + dst[0] = ir3_SIGN_F(b, src[0], 0); + break; + + case nir_op_fsin: + dst[0] = ir3_SIN(b, src[0], 0); + break; + case nir_op_fcos: + dst[0] = ir3_COS(b, src[0], 0); + break; + case nir_op_frsq: + dst[0] = ir3_RSQ(b, src[0], 0); + break; + case nir_op_frcp: + dst[0] = ir3_RCP(b, src[0], 0); + break; + case nir_op_flog2: + dst[0] = ir3_LOG2(b, src[0], 0); + break; + case nir_op_fexp2: + dst[0] = ir3_EXP2(b, src[0], 0); + break; + case nir_op_fsqrt: + dst[0] = ir3_SQRT(b, src[0], 0); + break; + + case nir_op_iabs: + dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SABS); + break; + case nir_op_iadd: + dst[0] = ir3_ADD_U(b, src[0], 0, src[1], 0); + break; + case nir_op_iand: + dst[0] = ir3_AND_B(b, src[0], 0, src[1], 0); + break; + case nir_op_imax: + dst[0] = ir3_MAX_S(b, src[0], 0, src[1], 0); + break; + case nir_op_umax: + dst[0] = ir3_MAX_U(b, src[0], 0, src[1], 0); + break; + case nir_op_imin: + dst[0] = ir3_MIN_S(b, src[0], 0, src[1], 0); + break; + case nir_op_umin: + dst[0] = ir3_MIN_U(b, src[0], 0, src[1], 0); + break; + case nir_op_imul: + /* + * dst = (al * bl) + (ah * bl << 16) + (al * bh << 16) + * mull.u tmp0, a, b ; mul low, i.e. al * bl + * madsh.m16 tmp1, a, b, tmp0 ; mul-add shift high mix, i.e. ah * bl << 16 + * madsh.m16 dst, b, a, tmp1 ; i.e. al * bh << 16 + */ + dst[0] = ir3_MADSH_M16(b, src[1], 0, src[0], 0, + ir3_MADSH_M16(b, src[0], 0, src[1], 0, + ir3_MULL_U(b, src[0], 0, src[1], 0), 0), 0); + break; + case nir_op_ineg: + dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG); + break; + case nir_op_inot: + dst[0] = ir3_NOT_B(b, src[0], 0); + break; + case nir_op_ior: + dst[0] = ir3_OR_B(b, src[0], 0, src[1], 0); + break; + case nir_op_ishl: + dst[0] = ir3_SHL_B(b, src[0], 0, src[1], 0); + break; + case nir_op_ishr: + dst[0] = ir3_ASHR_B(b, src[0], 0, src[1], 0); + break; + case nir_op_isign: { + /* maybe this would be sane to lower in nir.. */ + struct ir3_instruction *neg, *pos; + + neg = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0); + neg->cat2.condition = IR3_COND_LT; + + pos = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0); + pos->cat2.condition = IR3_COND_GT; + + dst[0] = ir3_SUB_U(b, pos, 0, neg, 0); + + break; + } + case nir_op_isub: + dst[0] = ir3_SUB_U(b, src[0], 0, src[1], 0); + break; + case nir_op_ixor: + dst[0] = ir3_XOR_B(b, src[0], 0, src[1], 0); + break; + case nir_op_ushr: + dst[0] = ir3_SHR_B(b, src[0], 0, src[1], 0); + break; + case nir_op_ilt32: + dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_LT; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_ige32: + dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_GE; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_ieq32: + dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_EQ; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_ine32: + dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_NE; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_ult32: + dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_LT; + dst[0] = ir3_n2b(b, dst[0]); + break; + case nir_op_uge32: + dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0); + dst[0]->cat2.condition = IR3_COND_GE; + dst[0] = ir3_n2b(b, dst[0]); + break; + + case nir_op_b32csel: { + struct ir3_instruction *cond = ir3_b2n(b, src[0]); + compile_assert(ctx, bs[1] == bs[2]); + /* the boolean condition is 32b even if src[1] and src[2] are + * half-precision, but sel.b16 wants all three src's to be the + * same type. + */ + if (bs[1] < 32) + cond = ir3_COV(b, cond, TYPE_U32, TYPE_U16); + dst[0] = ir3_SEL_B32(b, src[1], 0, cond, 0, src[2], 0); + break; + } + case nir_op_bit_count: + dst[0] = ir3_CBITS_B(b, src[0], 0); + break; + case nir_op_ifind_msb: { + struct ir3_instruction *cmp; + dst[0] = ir3_CLZ_S(b, src[0], 0); + cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0); + cmp->cat2.condition = IR3_COND_GE; + dst[0] = ir3_SEL_B32(b, + ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, + cmp, 0, dst[0], 0); + break; + } + case nir_op_ufind_msb: + dst[0] = ir3_CLZ_B(b, src[0], 0); + dst[0] = ir3_SEL_B32(b, + ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, + src[0], 0, dst[0], 0); + break; + case nir_op_find_lsb: + dst[0] = ir3_BFREV_B(b, src[0], 0); + dst[0] = ir3_CLZ_B(b, dst[0], 0); + break; + case nir_op_bitfield_reverse: + dst[0] = ir3_BFREV_B(b, src[0], 0); + break; + + default: + ir3_context_error(ctx, "Unhandled ALU op: %s\n", + nir_op_infos[alu->op].name); + break; + } + + put_dst(ctx, &alu->dest.dest); +} + +/* handles direct/indirect UBO reads: */ +static void +emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1; + nir_const_value *const_offset; + /* UBO addresses are the first driver params: */ + unsigned ubo = regid(ctx->so->constbase.ubo, 0); + const unsigned ptrsz = ir3_pointer_size(ctx); + + int off = 0; + + /* First src is ubo index, which could either be an immed or not: */ + src0 = ir3_get_src(ctx, &intr->src[0])[0]; + if (is_same_type_mov(src0) && + (src0->regs[1]->flags & IR3_REG_IMMED)) { + base_lo = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz)); + base_hi = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz) + 1); + } else { + base_lo = create_uniform_indirect(b, ubo, ir3_get_addr(ctx, src0, 4)); + base_hi = create_uniform_indirect(b, ubo + 1, ir3_get_addr(ctx, src0, 4)); + } + + /* note: on 32bit gpu's base_hi is ignored and DCE'd */ + addr = base_lo; + + const_offset = nir_src_as_const_value(intr->src[1]); + if (const_offset) { + off += const_offset->u32[0]; + } else { + /* For load_ubo_indirect, second src is indirect offset: */ + src1 = ir3_get_src(ctx, &intr->src[1])[0]; + + /* and add offset to addr: */ + addr = ir3_ADD_S(b, addr, 0, src1, 0); + } + + /* if offset is to large to encode in the ldg, split it out: */ + if ((off + (intr->num_components * 4)) > 1024) { + /* split out the minimal amount to improve the odds that + * cp can fit the immediate in the add.s instruction: + */ + unsigned off2 = off + (intr->num_components * 4) - 1024; + addr = ir3_ADD_S(b, addr, 0, create_immed(b, off2), 0); + off -= off2; + } + + if (ptrsz == 2) { + struct ir3_instruction *carry; + + /* handle 32b rollover, ie: + * if (addr < base_lo) + * base_hi++ + */ + carry = ir3_CMPS_U(b, addr, 0, base_lo, 0); + carry->cat2.condition = IR3_COND_LT; + base_hi = ir3_ADD_S(b, base_hi, 0, carry, 0); + + addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){ addr, base_hi }, 2); + } + + for (int i = 0; i < intr->num_components; i++) { + struct ir3_instruction *load = + ir3_LDG(b, addr, 0, create_immed(b, 1), 0); + load->cat6.type = TYPE_U32; + load->cat6.src_offset = off + i * 4; /* byte offset */ + dst[i] = load; + } +} + +/* src[] = { buffer_index, offset }. No const_index */ +static void +emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *ldgb, *src0, *src1, *offset; + nir_const_value *const_offset; + + /* can this be non-const buffer_index? how do we handle that? */ + const_offset = nir_src_as_const_value(intr->src[0]); + compile_assert(ctx, const_offset); + + offset = ir3_get_src(ctx, &intr->src[1])[0]; + + /* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */ + src0 = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + offset, + create_immed(b, 0), + }, 2); + src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); + + ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0, + src0, 0, src1, 0); + ldgb->regs[0]->wrmask = MASK(intr->num_components); + ldgb->cat6.iim_val = intr->num_components; + ldgb->cat6.d = 4; + ldgb->cat6.type = TYPE_U32; + ldgb->barrier_class = IR3_BARRIER_BUFFER_R; + ldgb->barrier_conflict = IR3_BARRIER_BUFFER_W; + + ir3_split_dest(b, dst, ldgb, 0, intr->num_components); +} + +/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ +static void +emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *stgb, *src0, *src1, *src2, *offset; + nir_const_value *const_offset; + /* TODO handle wrmask properly, see _store_shared().. but I think + * it is more a PITA than that, since blob ends up loading the + * masked components and writing them back out. + */ + unsigned wrmask = intr->const_index[0]; + unsigned ncomp = ffs(~wrmask) - 1; + + /* can this be non-const buffer_index? how do we handle that? */ + const_offset = nir_src_as_const_value(intr->src[1]); + compile_assert(ctx, const_offset); + + offset = ir3_get_src(ctx, &intr->src[2])[0]; + + /* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0).. + * nir already *= 4: + */ + src0 = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp); + src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); + src2 = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + offset, + create_immed(b, 0), + }, 2); + + stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0, + src0, 0, src1, 0, src2, 0); + stgb->cat6.iim_val = ncomp; + stgb->cat6.d = 4; + stgb->cat6.type = TYPE_U32; + stgb->barrier_class = IR3_BARRIER_BUFFER_W; + stgb->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; + + array_insert(b, b->keeps, stgb); +} + +/* src[] = { block_index } */ +static void +emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + /* SSBO size stored as a const starting at ssbo_sizes: */ + unsigned blk_idx = nir_src_as_const_value(intr->src[0])->u32[0]; + unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) + + ctx->so->const_layout.ssbo_size.off[blk_idx]; + + debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx)); + + dst[0] = create_uniform(ctx->block, idx); +} + +/* + * SSBO atomic intrinsics + * + * All of the SSBO atomic memory operations read a value from memory, + * compute a new value using one of the operations below, write the new + * value to memory, and return the original value read. + * + * All operations take 3 sources except CompSwap that takes 4. These + * sources represent: + * + * 0: The SSBO buffer index. + * 1: The offset into the SSBO buffer of the variable that the atomic + * operation will operate on. + * 2: The data parameter to the atomic function (i.e. the value to add + * in ssbo_atomic_add, etc). + * 3: For CompSwap only: the second data parameter. + */ +static struct ir3_instruction * +emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *atomic, *ssbo, *src0, *src1, *src2, *offset; + nir_const_value *const_offset; + type_t type = TYPE_U32; + + /* can this be non-const buffer_index? how do we handle that? */ + const_offset = nir_src_as_const_value(intr->src[0]); + compile_assert(ctx, const_offset); + ssbo = create_immed(b, const_offset->u32[0]); + + offset = ir3_get_src(ctx, &intr->src[1])[0]; + + /* src0 is data (or uvec2(data, compare)) + * src1 is offset + * src2 is uvec2(offset*4, 0) (appears to be 64b byte offset) + * + * Note that nir already multiplies the offset by four + */ + src0 = ir3_get_src(ctx, &intr->src[2])[0]; + src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); + src2 = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + offset, + create_immed(b, 0), + }, 2); + + switch (intr->intrinsic) { + case nir_intrinsic_ssbo_atomic_add: + atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_imin: + atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + type = TYPE_S32; + break; + case nir_intrinsic_ssbo_atomic_umin: + atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_imax: + atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + type = TYPE_S32; + break; + case nir_intrinsic_ssbo_atomic_umax: + atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_and: + atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_or: + atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_xor: + atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_exchange: + atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_ssbo_atomic_comp_swap: + /* for cmpxchg, src0 is [ui]vec2(data, compare): */ + src0 = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + ir3_get_src(ctx, &intr->src[3])[0], + src0, + }, 2); + atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); + break; + default: + unreachable("boo"); + } + + atomic->cat6.iim_val = 1; + atomic->cat6.d = 4; + atomic->cat6.type = type; + atomic->barrier_class = IR3_BARRIER_BUFFER_W; + atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; + + /* even if nothing consume the result, we can't DCE the instruction: */ + array_insert(b, b->keeps, atomic); + + return atomic; +} + +/* src[] = { offset }. const_index[] = { base } */ +static void +emit_intrinsic_load_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *ldl, *offset; + unsigned base; + + offset = ir3_get_src(ctx, &intr->src[0])[0]; + base = nir_intrinsic_base(intr); + + ldl = ir3_LDL(b, offset, 0, create_immed(b, intr->num_components), 0); + ldl->cat6.src_offset = base; + ldl->cat6.type = utype_dst(intr->dest); + ldl->regs[0]->wrmask = MASK(intr->num_components); + + ldl->barrier_class = IR3_BARRIER_SHARED_R; + ldl->barrier_conflict = IR3_BARRIER_SHARED_W; + + ir3_split_dest(b, dst, ldl, 0, intr->num_components); +} + +/* src[] = { value, offset }. const_index[] = { base, write_mask } */ +static void +emit_intrinsic_store_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *stl, *offset; + struct ir3_instruction * const *value; + unsigned base, wrmask; + + value = ir3_get_src(ctx, &intr->src[0]); + offset = ir3_get_src(ctx, &intr->src[1])[0]; + + base = nir_intrinsic_base(intr); + wrmask = nir_intrinsic_write_mask(intr); + + /* Combine groups of consecutive enabled channels in one write + * message. We use ffs to find the first enabled channel and then ffs on + * the bit-inverse, down-shifted writemask to determine the length of + * the block of enabled bits. + * + * (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic()) + */ + while (wrmask) { + unsigned first_component = ffs(wrmask) - 1; + unsigned length = ffs(~(wrmask >> first_component)) - 1; + + stl = ir3_STL(b, offset, 0, + ir3_create_collect(ctx, &value[first_component], length), 0, + create_immed(b, length), 0); + stl->cat6.dst_offset = first_component + base; + stl->cat6.type = utype_src(intr->src[0]); + stl->barrier_class = IR3_BARRIER_SHARED_W; + stl->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W; + + array_insert(b, b->keeps, stl); + + /* Clear the bits in the writemask that we just wrote, then try + * again to see if more channels are left. + */ + wrmask &= (15 << (first_component + length)); + } +} + +/* + * CS shared variable atomic intrinsics + * + * All of the shared variable atomic memory operations read a value from + * memory, compute a new value using one of the operations below, write the + * new value to memory, and return the original value read. + * + * All operations take 2 sources except CompSwap that takes 3. These + * sources represent: + * + * 0: The offset into the shared variable storage region that the atomic + * operation will operate on. + * 1: The data parameter to the atomic function (i.e. the value to add + * in shared_atomic_add, etc). + * 2: For CompSwap only: the second data parameter. + */ +static struct ir3_instruction * +emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *atomic, *src0, *src1; + type_t type = TYPE_U32; + + src0 = ir3_get_src(ctx, &intr->src[0])[0]; /* offset */ + src1 = ir3_get_src(ctx, &intr->src[1])[0]; /* value */ + + switch (intr->intrinsic) { + case nir_intrinsic_shared_atomic_add: + atomic = ir3_ATOMIC_ADD(b, src0, 0, src1, 0); + break; + case nir_intrinsic_shared_atomic_imin: + atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0); + type = TYPE_S32; + break; + case nir_intrinsic_shared_atomic_umin: + atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0); + break; + case nir_intrinsic_shared_atomic_imax: + atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0); + type = TYPE_S32; + break; + case nir_intrinsic_shared_atomic_umax: + atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0); + break; + case nir_intrinsic_shared_atomic_and: + atomic = ir3_ATOMIC_AND(b, src0, 0, src1, 0); + break; + case nir_intrinsic_shared_atomic_or: + atomic = ir3_ATOMIC_OR(b, src0, 0, src1, 0); + break; + case nir_intrinsic_shared_atomic_xor: + atomic = ir3_ATOMIC_XOR(b, src0, 0, src1, 0); + break; + case nir_intrinsic_shared_atomic_exchange: + atomic = ir3_ATOMIC_XCHG(b, src0, 0, src1, 0); + break; + case nir_intrinsic_shared_atomic_comp_swap: + /* for cmpxchg, src1 is [ui]vec2(data, compare): */ + src1 = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + ir3_get_src(ctx, &intr->src[2])[0], + src1, + }, 2); + atomic = ir3_ATOMIC_CMPXCHG(b, src0, 0, src1, 0); + break; + default: + unreachable("boo"); + } + + atomic->cat6.iim_val = 1; + atomic->cat6.d = 1; + atomic->cat6.type = type; + atomic->barrier_class = IR3_BARRIER_SHARED_W; + atomic->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W; + + /* even if nothing consume the result, we can't DCE the instruction: */ + array_insert(b, b->keeps, atomic); + + return atomic; +} + +/* Images get mapped into SSBO/image state (for store/atomic) and texture + * state block (for load). To simplify things, invert the image id and + * map it from end of state block, ie. image 0 becomes num-1, image 1 + * becomes num-2, etc. This potentially avoids needing to re-emit texture + * state when switching shaders. + * + * TODO is max # of samplers and SSBOs the same. This shouldn't be hard- + * coded. Also, since all the gl shader stages (ie. everything but CS) + * share the same SSBO/image state block, this might require some more + * logic if we supported images in anything other than FS.. + */ +static unsigned +get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref) +{ + unsigned int loc = 0; + unsigned inner_size = 1; + + while (deref->deref_type != nir_deref_type_var) { + assert(deref->deref_type == nir_deref_type_array); + nir_const_value *const_index = nir_src_as_const_value(deref->arr.index); + assert(const_index); + + /* Go to the next instruction */ + deref = nir_deref_instr_parent(deref); + + assert(glsl_type_is_array(deref->type)); + const unsigned array_len = glsl_get_length(deref->type); + loc += MIN2(const_index->u32[0], array_len - 1) * inner_size; + + /* Update the inner size */ + inner_size *= array_len; + } + + loc += deref->var->data.driver_location; + + /* TODO figure out real limit per generation, and don't hardcode: */ + const unsigned max_samplers = 16; + return max_samplers - loc - 1; +} + +/* see tex_info() for equiv logic for texture instructions.. it would be + * nice if this could be better unified.. + */ +static unsigned +get_image_coords(const nir_variable *var, unsigned *flagsp) +{ + const struct glsl_type *type = glsl_without_array(var->type); + unsigned coords, flags = 0; + + switch (glsl_get_sampler_dim(type)) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + coords = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_EXTERNAL: + case GLSL_SAMPLER_DIM_MS: + coords = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + flags |= IR3_INSTR_3D; + coords = 3; + break; + default: + unreachable("bad sampler dim"); + return 0; + } + + if (glsl_sampler_type_is_array(type)) { + /* note: unlike tex_info(), adjust # of coords to include array idx: */ + coords++; + flags |= IR3_INSTR_A; + } + + if (flagsp) + *flagsp = flags; + + return coords; +} + +static type_t +get_image_type(const nir_variable *var) +{ + switch (glsl_get_sampler_result_type(glsl_without_array(var->type))) { + case GLSL_TYPE_UINT: + return TYPE_U32; + case GLSL_TYPE_INT: + return TYPE_S32; + case GLSL_TYPE_FLOAT: + return TYPE_F32; + default: + unreachable("bad sampler type."); + return 0; + } +} + +static struct ir3_instruction * +get_image_offset(struct ir3_context *ctx, const nir_variable *var, + struct ir3_instruction * const *coords, bool byteoff) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *offset; + unsigned ncoords = get_image_coords(var, NULL); + + /* to calculate the byte offset (yes, uggg) we need (up to) three + * const values to know the bytes per pixel, and y and z stride: + */ + unsigned cb = regid(ctx->so->constbase.image_dims, 0) + + ctx->so->const_layout.image_dims.off[var->data.driver_location]; + + debug_assert(ctx->so->const_layout.image_dims.mask & + (1 << var->data.driver_location)); + + /* offset = coords.x * bytes_per_pixel: */ + offset = ir3_MUL_S(b, coords[0], 0, create_uniform(b, cb + 0), 0); + if (ncoords > 1) { + /* offset += coords.y * y_pitch: */ + offset = ir3_MAD_S24(b, create_uniform(b, cb + 1), 0, + coords[1], 0, offset, 0); + } + if (ncoords > 2) { + /* offset += coords.z * z_pitch: */ + offset = ir3_MAD_S24(b, create_uniform(b, cb + 2), 0, + coords[2], 0, offset, 0); + } + + if (!byteoff) { + /* Some cases, like atomics, seem to use dword offset instead + * of byte offsets.. blob just puts an extra shr.b in there + * in those cases: + */ + offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); + } + + return ir3_create_collect(ctx, (struct ir3_instruction*[]){ + offset, + create_immed(b, 0), + }, 2); +} + +/* src[] = { deref, coord, sample_index }. const_index[] = {} */ +static void +emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + const nir_variable *var = nir_intrinsic_get_var(intr, 0); + struct ir3_instruction *sam; + struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]); + struct ir3_instruction *coords[4]; + unsigned flags, ncoords = get_image_coords(var, &flags); + unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0])); + type_t type = get_image_type(var); + + /* hmm, this seems a bit odd, but it is what blob does and (at least + * a5xx) just faults on bogus addresses otherwise: + */ + if (flags & IR3_INSTR_3D) { + flags &= ~IR3_INSTR_3D; + flags |= IR3_INSTR_A; + } + + for (unsigned i = 0; i < ncoords; i++) + coords[i] = src0[i]; + + if (ncoords == 1) + coords[ncoords++] = create_immed(b, 0); + + sam = ir3_SAM(b, OPC_ISAM, type, 0b1111, flags, + tex_idx, tex_idx, ir3_create_collect(ctx, coords, ncoords), NULL); + + sam->barrier_class = IR3_BARRIER_IMAGE_R; + sam->barrier_conflict = IR3_BARRIER_IMAGE_W; + + ir3_split_dest(b, dst, sam, 0, 4); +} + +/* Returns the number of components for the different image formats + * supported by the GLES 3.1 spec, plus those added by the + * GL_NV_image_formats extension. + */ +static unsigned +get_num_components_for_glformat(GLuint format) +{ + switch (format) { + case GL_R32F: + case GL_R32I: + case GL_R32UI: + case GL_R16F: + case GL_R16I: + case GL_R16UI: + case GL_R16: + case GL_R16_SNORM: + case GL_R8I: + case GL_R8UI: + case GL_R8: + case GL_R8_SNORM: + return 1; + + case GL_RG32F: + case GL_RG32I: + case GL_RG32UI: + case GL_RG16F: + case GL_RG16I: + case GL_RG16UI: + case GL_RG16: + case GL_RG16_SNORM: + case GL_RG8I: + case GL_RG8UI: + case GL_RG8: + case GL_RG8_SNORM: + return 2; + + case GL_R11F_G11F_B10F: + return 3; + + case GL_RGBA32F: + case GL_RGBA32I: + case GL_RGBA32UI: + case GL_RGBA16F: + case GL_RGBA16I: + case GL_RGBA16UI: + case GL_RGBA16: + case GL_RGBA16_SNORM: + case GL_RGBA8I: + case GL_RGBA8UI: + case GL_RGBA8: + case GL_RGBA8_SNORM: + case GL_RGB10_A2UI: + case GL_RGB10_A2: + return 4; + + case GL_NONE: + /* Omitting the image format qualifier is allowed on desktop GL + * profiles. Assuming 4 components is always safe. + */ + return 4; + + default: + /* Return 4 components also for all other formats we don't know + * about. The format should have been validated already by + * the higher level API, but drop a debug message just in case. + */ + debug_printf("Unhandled GL format %u while emitting imageStore()\n", + format); + return 4; + } +} + +/* src[] = { deref, coord, sample_index, value }. const_index[] = {} */ +static void +emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + const nir_variable *var = nir_intrinsic_get_var(intr, 0); + struct ir3_instruction *stib, *offset; + struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]); + struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); + unsigned ncoords = get_image_coords(var, NULL); + unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0])); + unsigned ncomp = get_num_components_for_glformat(var->data.image.format); + + /* src0 is value + * src1 is coords + * src2 is 64b byte offset + */ + + offset = get_image_offset(ctx, var, coords, true); + + /* NOTE: stib seems to take byte offset, but stgb.typed can be used + * too and takes a dword offset.. not quite sure yet why blob uses + * one over the other in various cases. + */ + + stib = ir3_STIB(b, create_immed(b, tex_idx), 0, + ir3_create_collect(ctx, value, ncomp), 0, + ir3_create_collect(ctx, coords, ncoords), 0, + offset, 0); + stib->cat6.iim_val = ncomp; + stib->cat6.d = ncoords; + stib->cat6.type = get_image_type(var); + stib->cat6.typed = true; + stib->barrier_class = IR3_BARRIER_IMAGE_W; + stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; + + array_insert(b, b->keeps, stib); +} + +static void +emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + const nir_variable *var = nir_intrinsic_get_var(intr, 0); + unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0])); + struct ir3_instruction *sam, *lod; + unsigned flags, ncoords = get_image_coords(var, &flags); + + lod = create_immed(b, 0); + sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, 0b1111, flags, + tex_idx, tex_idx, lod, NULL); + + /* Array size actually ends up in .w rather than .z. This doesn't + * matter for miplevel 0, but for higher mips the value in z is + * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is + * returned, which means that we have to add 1 to it for arrays for + * a3xx. + * + * Note use a temporary dst and then copy, since the size of the dst + * array that is passed in is based on nir's understanding of the + * result size, not the hardware's + */ + struct ir3_instruction *tmp[4]; + + ir3_split_dest(b, tmp, sam, 0, 4); + + /* get_size instruction returns size in bytes instead of texels + * for imageBuffer, so we need to divide it by the pixel size + * of the image format. + * + * TODO: This is at least true on a5xx. Check other gens. + */ + enum glsl_sampler_dim dim = + glsl_get_sampler_dim(glsl_without_array(var->type)); + if (dim == GLSL_SAMPLER_DIM_BUF) { + /* Since all the possible values the divisor can take are + * power-of-two (4, 8, or 16), the division is implemented + * as a shift-right. + * During shader setup, the log2 of the image format's + * bytes-per-pixel should have been emitted in 2nd slot of + * image_dims. See ir3_shader::emit_image_dims(). + */ + unsigned cb = regid(ctx->so->constbase.image_dims, 0) + + ctx->so->const_layout.image_dims.off[var->data.driver_location]; + struct ir3_instruction *aux = create_uniform(b, cb + 1); + + tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0); + } + + for (unsigned i = 0; i < ncoords; i++) + dst[i] = tmp[i]; + + if (flags & IR3_INSTR_A) { + if (ctx->compiler->levels_add_one) { + dst[ncoords-1] = ir3_ADD_U(b, tmp[3], 0, create_immed(b, 1), 0); + } else { + dst[ncoords-1] = ir3_MOV(b, tmp[3], TYPE_U32); + } + } +} + +/* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */ +static struct ir3_instruction * +emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + const nir_variable *var = nir_intrinsic_get_var(intr, 0); + struct ir3_instruction *atomic, *image, *src0, *src1, *src2; + struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]); + unsigned ncoords = get_image_coords(var, NULL); + + image = create_immed(b, get_image_slot(ctx, nir_src_as_deref(intr->src[0]))); + + /* src0 is value (or uvec2(value, compare)) + * src1 is coords + * src2 is 64b byte offset + */ + src0 = ir3_get_src(ctx, &intr->src[3])[0]; + src1 = ir3_create_collect(ctx, coords, ncoords); + src2 = get_image_offset(ctx, var, coords, false); + + switch (intr->intrinsic) { + case nir_intrinsic_image_deref_atomic_add: + atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_image_deref_atomic_min: + atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_image_deref_atomic_max: + atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_image_deref_atomic_and: + atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_image_deref_atomic_or: + atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_image_deref_atomic_xor: + atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_image_deref_atomic_exchange: + atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0); + break; + case nir_intrinsic_image_deref_atomic_comp_swap: + /* for cmpxchg, src0 is [ui]vec2(data, compare): */ + src0 = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + ir3_get_src(ctx, &intr->src[4])[0], + src0, + }, 2); + atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0); + break; + default: + unreachable("boo"); + } + + atomic->cat6.iim_val = 1; + atomic->cat6.d = ncoords; + atomic->cat6.type = get_image_type(var); + atomic->cat6.typed = true; + atomic->barrier_class = IR3_BARRIER_IMAGE_W; + atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; + + /* even if nothing consume the result, we can't DCE the instruction: */ + array_insert(b, b->keeps, atomic); + + return atomic; +} + +static void +emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *barrier; + + switch (intr->intrinsic) { + case nir_intrinsic_barrier: + barrier = ir3_BAR(b); + barrier->cat7.g = true; + barrier->cat7.l = true; + barrier->flags = IR3_INSTR_SS | IR3_INSTR_SY; + barrier->barrier_class = IR3_BARRIER_EVERYTHING; + break; + case nir_intrinsic_memory_barrier: + barrier = ir3_FENCE(b); + barrier->cat7.g = true; + barrier->cat7.r = true; + barrier->cat7.w = true; + barrier->barrier_class = IR3_BARRIER_IMAGE_W | + IR3_BARRIER_BUFFER_W; + barrier->barrier_conflict = + IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W | + IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; + break; + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + barrier = ir3_FENCE(b); + barrier->cat7.g = true; + barrier->cat7.r = true; + barrier->cat7.w = true; + barrier->barrier_class = IR3_BARRIER_BUFFER_W; + barrier->barrier_conflict = IR3_BARRIER_BUFFER_R | + IR3_BARRIER_BUFFER_W; + break; + case nir_intrinsic_memory_barrier_image: + // TODO double check if this should have .g set + barrier = ir3_FENCE(b); + barrier->cat7.g = true; + barrier->cat7.r = true; + barrier->cat7.w = true; + barrier->barrier_class = IR3_BARRIER_IMAGE_W; + barrier->barrier_conflict = IR3_BARRIER_IMAGE_R | + IR3_BARRIER_IMAGE_W; + break; + case nir_intrinsic_memory_barrier_shared: + barrier = ir3_FENCE(b); + barrier->cat7.g = true; + barrier->cat7.l = true; + barrier->cat7.r = true; + barrier->cat7.w = true; + barrier->barrier_class = IR3_BARRIER_SHARED_W; + barrier->barrier_conflict = IR3_BARRIER_SHARED_R | + IR3_BARRIER_SHARED_W; + break; + case nir_intrinsic_group_memory_barrier: + barrier = ir3_FENCE(b); + barrier->cat7.g = true; + barrier->cat7.l = true; + barrier->cat7.r = true; + barrier->cat7.w = true; + barrier->barrier_class = IR3_BARRIER_SHARED_W | + IR3_BARRIER_IMAGE_W | + IR3_BARRIER_BUFFER_W; + barrier->barrier_conflict = + IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W | + IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W | + IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; + break; + default: + unreachable("boo"); + } + + /* make sure barrier doesn't get DCE'd */ + array_insert(b, b->keeps, barrier); +} + +static void add_sysval_input_compmask(struct ir3_context *ctx, + gl_system_value slot, unsigned compmask, + struct ir3_instruction *instr) +{ + struct ir3_shader_variant *so = ctx->so; + unsigned r = regid(so->inputs_count, 0); + unsigned n = so->inputs_count++; + + so->inputs[n].sysval = true; + so->inputs[n].slot = slot; + so->inputs[n].compmask = compmask; + so->inputs[n].regid = r; + so->inputs[n].interpolate = INTERP_MODE_FLAT; + so->total_in++; + + ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1); + ctx->ir->inputs[r] = instr; +} + +static void add_sysval_input(struct ir3_context *ctx, gl_system_value slot, + struct ir3_instruction *instr) +{ + add_sysval_input_compmask(ctx, slot, 0x1, instr); +} + +static void +emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; + struct ir3_instruction **dst; + struct ir3_instruction * const *src; + struct ir3_block *b = ctx->block; + nir_const_value *const_offset; + int idx, comp; + + if (info->has_dest) { + unsigned n = nir_intrinsic_dest_components(intr); + dst = ir3_get_dst(ctx, &intr->dest, n); + } else { + dst = NULL; + } + + switch (intr->intrinsic) { + case nir_intrinsic_load_uniform: + idx = nir_intrinsic_base(intr); + const_offset = nir_src_as_const_value(intr->src[0]); + if (const_offset) { + idx += const_offset->u32[0]; + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_uniform(b, n); + } + } else { + src = ir3_get_src(ctx, &intr->src[0]); + for (int i = 0; i < intr->num_components; i++) { + int n = idx * 4 + i; + dst[i] = create_uniform_indirect(b, n, + ir3_get_addr(ctx, src[0], 4)); + } + /* NOTE: if relative addressing is used, we set + * constlen in the compiler (to worst-case value) + * since we don't know in the assembler what the max + * addr reg value can be: + */ + ctx->so->constlen = ctx->s->num_uniforms; + } + break; + case nir_intrinsic_load_ubo: + emit_intrinsic_load_ubo(ctx, intr, dst); + break; + case nir_intrinsic_load_input: + idx = nir_intrinsic_base(intr); + comp = nir_intrinsic_component(intr); + const_offset = nir_src_as_const_value(intr->src[0]); + if (const_offset) { + idx += const_offset->u32[0]; + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i + comp; + dst[i] = ctx->ir->inputs[n]; + } + } else { + src = ir3_get_src(ctx, &intr->src[0]); + struct ir3_instruction *collect = + ir3_create_collect(ctx, ctx->ir->inputs, ctx->ir->ninputs); + struct ir3_instruction *addr = ir3_get_addr(ctx, src[0], 4); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i + comp; + dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, + n, addr, collect); + } + } + break; + case nir_intrinsic_load_ssbo: + emit_intrinsic_load_ssbo(ctx, intr, dst); + break; + case nir_intrinsic_store_ssbo: + emit_intrinsic_store_ssbo(ctx, intr); + break; + case nir_intrinsic_get_buffer_size: + emit_intrinsic_ssbo_size(ctx, intr, dst); + break; + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + dst[0] = emit_intrinsic_atomic_ssbo(ctx, intr); + break; + case nir_intrinsic_load_shared: + emit_intrinsic_load_shared(ctx, intr, dst); + break; + case nir_intrinsic_store_shared: + emit_intrinsic_store_shared(ctx, intr); + break; + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: + dst[0] = emit_intrinsic_atomic_shared(ctx, intr); + break; + case nir_intrinsic_image_deref_load: + emit_intrinsic_load_image(ctx, intr, dst); + break; + case nir_intrinsic_image_deref_store: + emit_intrinsic_store_image(ctx, intr); + break; + case nir_intrinsic_image_deref_size: + emit_intrinsic_image_size(ctx, intr, dst); + break; + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + dst[0] = emit_intrinsic_atomic_image(ctx, intr); + break; + case nir_intrinsic_barrier: + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: + emit_intrinsic_barrier(ctx, intr); + /* note that blk ptr no longer valid, make that obvious: */ + b = NULL; + break; + case nir_intrinsic_store_output: + idx = nir_intrinsic_base(intr); + comp = nir_intrinsic_component(intr); + const_offset = nir_src_as_const_value(intr->src[1]); + compile_assert(ctx, const_offset != NULL); + idx += const_offset->u32[0]; + + src = ir3_get_src(ctx, &intr->src[0]); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i + comp; + ctx->ir->outputs[n] = src[i]; + } + break; + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_first_vertex: + if (!ctx->basevertex) { + ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE); + add_sysval_input(ctx, SYSTEM_VALUE_FIRST_VERTEX, ctx->basevertex); + } + dst[0] = ctx->basevertex; + break; + case nir_intrinsic_load_vertex_id_zero_base: + case nir_intrinsic_load_vertex_id: + if (!ctx->vertex_id) { + gl_system_value sv = (intr->intrinsic == nir_intrinsic_load_vertex_id) ? + SYSTEM_VALUE_VERTEX_ID : SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + ctx->vertex_id = create_input(ctx, 0); + add_sysval_input(ctx, sv, ctx->vertex_id); + } + dst[0] = ctx->vertex_id; + break; + case nir_intrinsic_load_instance_id: + if (!ctx->instance_id) { + ctx->instance_id = create_input(ctx, 0); + add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID, + ctx->instance_id); + } + dst[0] = ctx->instance_id; + break; + case nir_intrinsic_load_sample_id: + case nir_intrinsic_load_sample_id_no_per_sample: + if (!ctx->samp_id) { + ctx->samp_id = create_input(ctx, 0); + ctx->samp_id->regs[0]->flags |= IR3_REG_HALF; + add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_ID, + ctx->samp_id); + } + dst[0] = ir3_COV(b, ctx->samp_id, TYPE_U16, TYPE_U32); + break; + case nir_intrinsic_load_sample_mask_in: + if (!ctx->samp_mask_in) { + ctx->samp_mask_in = create_input(ctx, 0); + add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_MASK_IN, + ctx->samp_mask_in); + } + dst[0] = ctx->samp_mask_in; + break; + case nir_intrinsic_load_user_clip_plane: + idx = nir_intrinsic_ucp_id(intr); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n); + } + break; + case nir_intrinsic_load_front_face: + if (!ctx->frag_face) { + ctx->so->frag_face = true; + ctx->frag_face = create_input(ctx, 0); + add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face); + ctx->frag_face->regs[0]->flags |= IR3_REG_HALF; + } + /* for fragface, we get -1 for back and 0 for front. However this is + * the inverse of what nir expects (where ~0 is true). + */ + dst[0] = ir3_COV(b, ctx->frag_face, TYPE_S16, TYPE_S32); + dst[0] = ir3_NOT_B(b, dst[0], 0); + break; + case nir_intrinsic_load_local_invocation_id: + if (!ctx->local_invocation_id) { + ctx->local_invocation_id = create_input_compmask(ctx, 0, 0x7); + add_sysval_input_compmask(ctx, SYSTEM_VALUE_LOCAL_INVOCATION_ID, + 0x7, ctx->local_invocation_id); + } + ir3_split_dest(b, dst, ctx->local_invocation_id, 0, 3); + break; + case nir_intrinsic_load_work_group_id: + if (!ctx->work_group_id) { + ctx->work_group_id = create_input_compmask(ctx, 0, 0x7); + add_sysval_input_compmask(ctx, SYSTEM_VALUE_WORK_GROUP_ID, + 0x7, ctx->work_group_id); + ctx->work_group_id->regs[0]->flags |= IR3_REG_HIGH; + } + ir3_split_dest(b, dst, ctx->work_group_id, 0, 3); + break; + case nir_intrinsic_load_num_work_groups: + for (int i = 0; i < intr->num_components; i++) { + dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i); + } + break; + case nir_intrinsic_load_local_group_size: + for (int i = 0; i < intr->num_components; i++) { + dst[i] = create_driver_param(ctx, IR3_DP_LOCAL_GROUP_SIZE_X + i); + } + break; + case nir_intrinsic_discard_if: + case nir_intrinsic_discard: { + struct ir3_instruction *cond, *kill; + + if (intr->intrinsic == nir_intrinsic_discard_if) { + /* conditional discard: */ + src = ir3_get_src(ctx, &intr->src[0]); + cond = ir3_b2n(b, src[0]); + } else { + /* unconditional discard: */ + cond = create_immed(b, 1); + } + + /* NOTE: only cmps.*.* can write p0.x: */ + cond = ir3_CMPS_S(b, cond, 0, create_immed(b, 0), 0); + cond->cat2.condition = IR3_COND_NE; + + /* condition always goes in predicate register: */ + cond->regs[0]->num = regid(REG_P0, 0); + + kill = ir3_KILL(b, cond, 0); + array_insert(ctx->ir, ctx->ir->predicates, kill); + + array_insert(b, b->keeps, kill); + ctx->so->has_kill = true; + + break; + } + default: + ir3_context_error(ctx, "Unhandled intrinsic type: %s\n", + nir_intrinsic_infos[intr->intrinsic].name); + break; + } + + if (info->has_dest) + put_dst(ctx, &intr->dest); +} + +static void +emit_load_const(struct ir3_context *ctx, nir_load_const_instr *instr) +{ + struct ir3_instruction **dst = ir3_get_dst_ssa(ctx, &instr->def, + instr->def.num_components); + type_t type = (instr->def.bit_size < 32) ? TYPE_U16 : TYPE_U32; + + for (int i = 0; i < instr->def.num_components; i++) + dst[i] = create_immed_typed(ctx->block, instr->value.u32[i], type); +} + +static void +emit_undef(struct ir3_context *ctx, nir_ssa_undef_instr *undef) +{ + struct ir3_instruction **dst = ir3_get_dst_ssa(ctx, &undef->def, + undef->def.num_components); + type_t type = (undef->def.bit_size < 32) ? TYPE_U16 : TYPE_U32; + + /* backend doesn't want undefined instructions, so just plug + * in 0.0.. + */ + for (int i = 0; i < undef->def.num_components; i++) + dst[i] = create_immed_typed(ctx->block, fui(0.0), type); +} + +/* + * texture fetch/sample instructions: + */ + +static void +tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp) +{ + unsigned coords, flags = 0; + + /* note: would use tex->coord_components.. except txs.. also, + * since array index goes after shadow ref, we don't want to + * count it: + */ + switch (tex->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + coords = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_EXTERNAL: + case GLSL_SAMPLER_DIM_MS: + coords = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + coords = 3; + flags |= IR3_INSTR_3D; + break; + default: + unreachable("bad sampler_dim"); + } + + if (tex->is_shadow && tex->op != nir_texop_lod) + flags |= IR3_INSTR_S; + + if (tex->is_array && tex->op != nir_texop_lod) + flags |= IR3_INSTR_A; + + *flagsp = flags; + *coordsp = coords; +} + +static void +emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction **dst, *sam, *src0[12], *src1[4]; + struct ir3_instruction * const *coord, * const *off, * const *ddx, * const *ddy; + struct ir3_instruction *lod, *compare, *proj, *sample_index; + bool has_bias = false, has_lod = false, has_proj = false, has_off = false; + unsigned i, coords, flags; + unsigned nsrc0 = 0, nsrc1 = 0; + type_t type; + opc_t opc = 0; + + coord = off = ddx = ddy = NULL; + lod = proj = compare = sample_index = NULL; + + /* TODO: might just be one component for gathers? */ + dst = ir3_get_dst(ctx, &tex->dest, 4); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + coord = ir3_get_src(ctx, &tex->src[i].src); + break; + case nir_tex_src_bias: + lod = ir3_get_src(ctx, &tex->src[i].src)[0]; + has_bias = true; + break; + case nir_tex_src_lod: + lod = ir3_get_src(ctx, &tex->src[i].src)[0]; + has_lod = true; + break; + case nir_tex_src_comparator: /* shadow comparator */ + compare = ir3_get_src(ctx, &tex->src[i].src)[0]; + break; + case nir_tex_src_projector: + proj = ir3_get_src(ctx, &tex->src[i].src)[0]; + has_proj = true; + break; + case nir_tex_src_offset: + off = ir3_get_src(ctx, &tex->src[i].src); + has_off = true; + break; + case nir_tex_src_ddx: + ddx = ir3_get_src(ctx, &tex->src[i].src); + break; + case nir_tex_src_ddy: + ddy = ir3_get_src(ctx, &tex->src[i].src); + break; + case nir_tex_src_ms_index: + sample_index = ir3_get_src(ctx, &tex->src[i].src)[0]; + break; + default: + ir3_context_error(ctx, "Unhandled NIR tex src type: %d\n", + tex->src[i].src_type); + return; + } + } + + switch (tex->op) { + case nir_texop_tex: opc = has_lod ? OPC_SAML : OPC_SAM; break; + case nir_texop_txb: opc = OPC_SAMB; break; + case nir_texop_txl: opc = OPC_SAML; break; + case nir_texop_txd: opc = OPC_SAMGQ; break; + case nir_texop_txf: opc = OPC_ISAML; break; + case nir_texop_lod: opc = OPC_GETLOD; break; + case nir_texop_tg4: + /* NOTE: a4xx might need to emulate gather w/ txf (this is + * what blob does, seems gather is broken?), and a3xx did + * not support it (but probably could also emulate). + */ + switch (tex->component) { + case 0: opc = OPC_GATHER4R; break; + case 1: opc = OPC_GATHER4G; break; + case 2: opc = OPC_GATHER4B; break; + case 3: opc = OPC_GATHER4A; break; + } + break; + case nir_texop_txf_ms: opc = OPC_ISAMM; break; + case nir_texop_txs: + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_samples_identical: + case nir_texop_txf_ms_mcs: + ir3_context_error(ctx, "Unhandled NIR tex type: %d\n", tex->op); + return; + } + + tex_info(tex, &flags, &coords); + + /* + * lay out the first argument in the proper order: + * - actual coordinates first + * - shadow reference + * - array index + * - projection w + * - starting at offset 4, dpdx.xy, dpdy.xy + * + * bias/lod go into the second arg + */ + + /* insert tex coords: */ + for (i = 0; i < coords; i++) + src0[i] = coord[i]; + + nsrc0 = i; + + /* NOTE a3xx (and possibly a4xx?) might be different, using isaml + * with scaled x coord according to requested sample: + */ + if (tex->op == nir_texop_txf_ms) { + if (ctx->compiler->txf_ms_with_isaml) { + /* the samples are laid out in x dimension as + * 0 1 2 3 + * x_ms = (x << ms) + sample_index; + */ + struct ir3_instruction *ms; + ms = create_immed(b, (ctx->samples >> (2 * tex->texture_index)) & 3); + + src0[0] = ir3_SHL_B(b, src0[0], 0, ms, 0); + src0[0] = ir3_ADD_U(b, src0[0], 0, sample_index, 0); + + opc = OPC_ISAML; + } else { + src0[nsrc0++] = sample_index; + } + } + + /* scale up integer coords for TXF based on the LOD */ + if (ctx->compiler->unminify_coords && (opc == OPC_ISAML)) { + assert(has_lod); + for (i = 0; i < coords; i++) + src0[i] = ir3_SHL_B(b, src0[i], 0, lod, 0); + } + + if (coords == 1) { + /* hw doesn't do 1d, so we treat it as 2d with + * height of 1, and patch up the y coord. + * TODO: y coord should be (int)0 in some cases.. + */ + src0[nsrc0++] = create_immed(b, fui(0.5)); + } + + if (tex->is_shadow && tex->op != nir_texop_lod) + src0[nsrc0++] = compare; + + if (tex->is_array && tex->op != nir_texop_lod) { + struct ir3_instruction *idx = coord[coords]; + + /* the array coord for cube arrays needs 0.5 added to it */ + if (ctx->compiler->array_index_add_half && (opc != OPC_ISAML)) + idx = ir3_ADD_F(b, idx, 0, create_immed(b, fui(0.5)), 0); + + src0[nsrc0++] = idx; + } + + if (has_proj) { + src0[nsrc0++] = proj; + flags |= IR3_INSTR_P; + } + + /* pad to 4, then ddx/ddy: */ + if (tex->op == nir_texop_txd) { + while (nsrc0 < 4) + src0[nsrc0++] = create_immed(b, fui(0.0)); + for (i = 0; i < coords; i++) + src0[nsrc0++] = ddx[i]; + if (coords < 2) + src0[nsrc0++] = create_immed(b, fui(0.0)); + for (i = 0; i < coords; i++) + src0[nsrc0++] = ddy[i]; + if (coords < 2) + src0[nsrc0++] = create_immed(b, fui(0.0)); + } + + /* + * second argument (if applicable): + * - offsets + * - lod + * - bias + */ + if (has_off | has_lod | has_bias) { + if (has_off) { + unsigned off_coords = coords; + if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) + off_coords--; + for (i = 0; i < off_coords; i++) + src1[nsrc1++] = off[i]; + if (off_coords < 2) + src1[nsrc1++] = create_immed(b, fui(0.0)); + flags |= IR3_INSTR_O; + } + + if (has_lod | has_bias) + src1[nsrc1++] = lod; + } + + switch (tex->dest_type) { + case nir_type_invalid: + case nir_type_float: + type = TYPE_F32; + break; + case nir_type_int: + type = TYPE_S32; + break; + case nir_type_uint: + case nir_type_bool: + type = TYPE_U32; + break; + default: + unreachable("bad dest_type"); + } + + if (opc == OPC_GETLOD) + type = TYPE_U32; + + unsigned tex_idx = tex->texture_index; + + ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx); + + struct ir3_instruction *col0 = ir3_create_collect(ctx, src0, nsrc0); + struct ir3_instruction *col1 = ir3_create_collect(ctx, src1, nsrc1); + + sam = ir3_SAM(b, opc, type, 0b1111, flags, + tex_idx, tex_idx, col0, col1); + + if ((ctx->astc_srgb & (1 << tex_idx)) && !nir_tex_instr_is_query(tex)) { + /* only need first 3 components: */ + sam->regs[0]->wrmask = 0x7; + ir3_split_dest(b, dst, sam, 0, 3); + + /* we need to sample the alpha separately with a non-ASTC + * texture state: + */ + sam = ir3_SAM(b, opc, type, 0b1000, flags, + tex_idx, tex_idx, col0, col1); + + array_insert(ctx->ir, ctx->ir->astc_srgb, sam); + + /* fixup .w component: */ + ir3_split_dest(b, &dst[3], sam, 3, 1); + } else { + /* normal (non-workaround) case: */ + ir3_split_dest(b, dst, sam, 0, 4); + } + + /* GETLOD returns results in 4.8 fixed point */ + if (opc == OPC_GETLOD) { + struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256)); + + compile_assert(ctx, tex->dest_type == nir_type_float); + for (i = 0; i < 2; i++) { + dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0, + factor, 0); + } + } + + put_dst(ctx, &tex->dest); +} + +static void +emit_tex_query_levels(struct ir3_context *ctx, nir_tex_instr *tex) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction **dst, *sam; + + dst = ir3_get_dst(ctx, &tex->dest, 1); + + sam = ir3_SAM(b, OPC_GETINFO, TYPE_U32, 0b0100, 0, + tex->texture_index, tex->texture_index, NULL, NULL); + + /* even though there is only one component, since it ends + * up in .z rather than .x, we need a split_dest() + */ + ir3_split_dest(b, dst, sam, 0, 3); + + /* The # of levels comes from getinfo.z. We need to add 1 to it, since + * the value in TEX_CONST_0 is zero-based. + */ + if (ctx->compiler->levels_add_one) + dst[0] = ir3_ADD_U(b, dst[0], 0, create_immed(b, 1), 0); + + put_dst(ctx, &tex->dest); +} + +static void +emit_tex_txs(struct ir3_context *ctx, nir_tex_instr *tex) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction **dst, *sam; + struct ir3_instruction *lod; + unsigned flags, coords; + + tex_info(tex, &flags, &coords); + + /* Actually we want the number of dimensions, not coordinates. This + * distinction only matters for cubes. + */ + if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) + coords = 2; + + dst = ir3_get_dst(ctx, &tex->dest, 4); + + compile_assert(ctx, tex->num_srcs == 1); + compile_assert(ctx, tex->src[0].src_type == nir_tex_src_lod); + + lod = ir3_get_src(ctx, &tex->src[0].src)[0]; + + sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, 0b1111, flags, + tex->texture_index, tex->texture_index, lod, NULL); + + ir3_split_dest(b, dst, sam, 0, 4); + + /* Array size actually ends up in .w rather than .z. This doesn't + * matter for miplevel 0, but for higher mips the value in z is + * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is + * returned, which means that we have to add 1 to it for arrays. + */ + if (tex->is_array) { + if (ctx->compiler->levels_add_one) { + dst[coords] = ir3_ADD_U(b, dst[3], 0, create_immed(b, 1), 0); + } else { + dst[coords] = ir3_MOV(b, dst[3], TYPE_U32); + } + } + + put_dst(ctx, &tex->dest); +} + +static void +emit_jump(struct ir3_context *ctx, nir_jump_instr *jump) +{ + switch (jump->type) { + case nir_jump_break: + case nir_jump_continue: + case nir_jump_return: + /* I *think* we can simply just ignore this, and use the + * successor block link to figure out where we need to + * jump to for break/continue + */ + break; + default: + ir3_context_error(ctx, "Unhandled NIR jump type: %d\n", jump->type); + break; + } +} + +static void +emit_instr(struct ir3_context *ctx, nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + emit_alu(ctx, nir_instr_as_alu(instr)); + break; + case nir_instr_type_deref: + /* ignored, handled as part of the intrinsic they are src to */ + break; + case nir_instr_type_intrinsic: + emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_load_const: + emit_load_const(ctx, nir_instr_as_load_const(instr)); + break; + case nir_instr_type_ssa_undef: + emit_undef(ctx, nir_instr_as_ssa_undef(instr)); + break; + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + /* couple tex instructions get special-cased: + */ + switch (tex->op) { + case nir_texop_txs: + emit_tex_txs(ctx, tex); + break; + case nir_texop_query_levels: + emit_tex_query_levels(ctx, tex); + break; + default: + emit_tex(ctx, tex); + break; + } + break; + } + case nir_instr_type_jump: + emit_jump(ctx, nir_instr_as_jump(instr)); + break; + case nir_instr_type_phi: + /* we have converted phi webs to regs in NIR by now */ + ir3_context_error(ctx, "Unexpected NIR instruction type: %d\n", instr->type); + break; + case nir_instr_type_call: + case nir_instr_type_parallel_copy: + ir3_context_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type); + break; + } +} + +static struct ir3_block * +get_block(struct ir3_context *ctx, const nir_block *nblock) +{ + struct ir3_block *block; + struct hash_entry *hentry; + unsigned i; + + hentry = _mesa_hash_table_search(ctx->block_ht, nblock); + if (hentry) + return hentry->data; + + block = ir3_block_create(ctx->ir); + block->nblock = nblock; + _mesa_hash_table_insert(ctx->block_ht, nblock, block); + + block->predecessors_count = nblock->predecessors->entries; + block->predecessors = ralloc_array_size(block, + sizeof(block->predecessors[0]), block->predecessors_count); + i = 0; + set_foreach(nblock->predecessors, sentry) { + block->predecessors[i++] = get_block(ctx, sentry->key); + } + + return block; +} + +static void +emit_block(struct ir3_context *ctx, nir_block *nblock) +{ + struct ir3_block *block = get_block(ctx, nblock); + + for (int i = 0; i < ARRAY_SIZE(block->successors); i++) { + if (nblock->successors[i]) { + block->successors[i] = + get_block(ctx, nblock->successors[i]); + } + } + + ctx->block = block; + list_addtail(&block->node, &ctx->ir->block_list); + + /* re-emit addr register in each block if needed: */ + for (int i = 0; i < ARRAY_SIZE(ctx->addr_ht); i++) { + _mesa_hash_table_destroy(ctx->addr_ht[i], NULL); + ctx->addr_ht[i] = NULL; + } + + nir_foreach_instr(instr, nblock) { + ctx->cur_instr = instr; + emit_instr(ctx, instr); + ctx->cur_instr = NULL; + if (ctx->error) + return; + } +} + +static void emit_cf_list(struct ir3_context *ctx, struct exec_list *list); + +static void +emit_if(struct ir3_context *ctx, nir_if *nif) +{ + struct ir3_instruction *condition = ir3_get_src(ctx, &nif->condition)[0]; + + ctx->block->condition = + ir3_get_predicate(ctx, ir3_b2n(condition->block, condition)); + + emit_cf_list(ctx, &nif->then_list); + emit_cf_list(ctx, &nif->else_list); +} + +static void +emit_loop(struct ir3_context *ctx, nir_loop *nloop) +{ + emit_cf_list(ctx, &nloop->body); +} + +static void +stack_push(struct ir3_context *ctx) +{ + ctx->stack++; + ctx->max_stack = MAX2(ctx->max_stack, ctx->stack); +} + +static void +stack_pop(struct ir3_context *ctx) +{ + compile_assert(ctx, ctx->stack > 0); + ctx->stack--; +} + +static void +emit_cf_list(struct ir3_context *ctx, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) { + switch (node->type) { + case nir_cf_node_block: + emit_block(ctx, nir_cf_node_as_block(node)); + break; + case nir_cf_node_if: + stack_push(ctx); + emit_if(ctx, nir_cf_node_as_if(node)); + stack_pop(ctx); + break; + case nir_cf_node_loop: + stack_push(ctx); + emit_loop(ctx, nir_cf_node_as_loop(node)); + stack_pop(ctx); + break; + case nir_cf_node_function: + ir3_context_error(ctx, "TODO\n"); + break; + } + } +} + +/* emit stream-out code. At this point, the current block is the original + * (nir) end block, and nir ensures that all flow control paths terminate + * into the end block. We re-purpose the original end block to generate + * the 'if (vtxcnt < maxvtxcnt)' condition, then append the conditional + * block holding stream-out write instructions, followed by the new end + * block: + * + * blockOrigEnd { + * p0.x = (vtxcnt < maxvtxcnt) + * // succs: blockStreamOut, blockNewEnd + * } + * blockStreamOut { + * ... stream-out instructions ... + * // succs: blockNewEnd + * } + * blockNewEnd { + * } + */ +static void +emit_stream_out(struct ir3_context *ctx) +{ + struct ir3_shader_variant *v = ctx->so; + struct ir3 *ir = ctx->ir; + struct ir3_stream_output_info *strmout = + &ctx->so->shader->stream_output; + struct ir3_block *orig_end_block, *stream_out_block, *new_end_block; + struct ir3_instruction *vtxcnt, *maxvtxcnt, *cond; + struct ir3_instruction *bases[IR3_MAX_SO_BUFFERS]; + + /* create vtxcnt input in input block at top of shader, + * so that it is seen as live over the entire duration + * of the shader: + */ + vtxcnt = create_input(ctx, 0); + add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt); + + maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX); + + /* at this point, we are at the original 'end' block, + * re-purpose this block to stream-out condition, then + * append stream-out block and new-end block + */ + orig_end_block = ctx->block; + +// TODO these blocks need to update predecessors.. +// maybe w/ store_global intrinsic, we could do this +// stuff in nir->nir pass + + stream_out_block = ir3_block_create(ir); + list_addtail(&stream_out_block->node, &ir->block_list); + + new_end_block = ir3_block_create(ir); + list_addtail(&new_end_block->node, &ir->block_list); + + orig_end_block->successors[0] = stream_out_block; + orig_end_block->successors[1] = new_end_block; + stream_out_block->successors[0] = new_end_block; + + /* setup 'if (vtxcnt < maxvtxcnt)' condition: */ + cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0); + cond->regs[0]->num = regid(REG_P0, 0); + cond->cat2.condition = IR3_COND_LT; + + /* condition goes on previous block to the conditional, + * since it is used to pick which of the two successor + * paths to take: + */ + orig_end_block->condition = cond; + + /* switch to stream_out_block to generate the stream-out + * instructions: + */ + ctx->block = stream_out_block; + + /* Calculate base addresses based on vtxcnt. Instructions + * generated for bases not used in following loop will be + * stripped out in the backend. + */ + for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) { + unsigned stride = strmout->stride[i]; + struct ir3_instruction *base, *off; + + base = create_uniform(ctx->block, regid(v->constbase.tfbo, i)); + + /* 24-bit should be enough: */ + off = ir3_MUL_U(ctx->block, vtxcnt, 0, + create_immed(ctx->block, stride * 4), 0); + + bases[i] = ir3_ADD_S(ctx->block, off, 0, base, 0); + } + + /* Generate the per-output store instructions: */ + for (unsigned i = 0; i < strmout->num_outputs; i++) { + for (unsigned j = 0; j < strmout->output[i].num_components; j++) { + unsigned c = j + strmout->output[i].start_component; + struct ir3_instruction *base, *out, *stg; + + base = bases[strmout->output[i].output_buffer]; + out = ctx->ir->outputs[regid(strmout->output[i].register_index, c)]; + + stg = ir3_STG(ctx->block, base, 0, out, 0, + create_immed(ctx->block, 1), 0); + stg->cat6.type = TYPE_U32; + stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4; + + array_insert(ctx->block, ctx->block->keeps, stg); + } + } + + /* and finally switch to the new_end_block: */ + ctx->block = new_end_block; +} + +static void +emit_function(struct ir3_context *ctx, nir_function_impl *impl) +{ + nir_metadata_require(impl, nir_metadata_block_index); + + compile_assert(ctx, ctx->stack == 0); + + emit_cf_list(ctx, &impl->body); + emit_block(ctx, impl->end_block); + + compile_assert(ctx, ctx->stack == 0); + + /* at this point, we should have a single empty block, + * into which we emit the 'end' instruction. + */ + compile_assert(ctx, list_empty(&ctx->block->instr_list)); + + /* If stream-out (aka transform-feedback) enabled, emit the + * stream-out instructions, followed by a new empty block (into + * which the 'end' instruction lands). + * + * NOTE: it is done in this order, rather than inserting before + * we emit end_block, because NIR guarantees that all blocks + * flow into end_block, and that end_block has no successors. + * So by re-purposing end_block as the first block of stream- + * out, we guarantee that all exit paths flow into the stream- + * out instructions. + */ + if ((ctx->compiler->gpu_id < 500) && + (ctx->so->shader->stream_output.num_outputs > 0) && + !ctx->so->binning_pass) { + debug_assert(ctx->so->type == MESA_SHADER_VERTEX); + emit_stream_out(ctx); + } + + ir3_END(ctx->block); +} + +static struct ir3_instruction * +create_frag_coord(struct ir3_context *ctx, unsigned comp) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *instr; + + if (!ctx->frag_coord) { + ctx->frag_coord = create_input_compmask(ctx, 0, 0xf); + /* defer add_sysval_input() until after all inputs created */ + } + + ir3_split_dest(block, &instr, ctx->frag_coord, comp, 1); + + switch (comp) { + case 0: /* .x */ + case 1: /* .y */ + /* for frag_coord, we get unsigned values.. we need + * to subtract (integer) 8 and divide by 16 (right- + * shift by 4) then convert to float: + * + * sub.s tmp, src, 8 + * shr.b tmp, tmp, 4 + * mov.u32f32 dst, tmp + * + */ + instr = ir3_SUB_S(block, instr, 0, + create_immed(block, 8), 0); + instr = ir3_SHR_B(block, instr, 0, + create_immed(block, 4), 0); + instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32); + + return instr; + case 2: /* .z */ + case 3: /* .w */ + default: + /* seems that we can use these as-is: */ + return instr; + } +} + +static void +setup_input(struct ir3_context *ctx, nir_variable *in) +{ + struct ir3_shader_variant *so = ctx->so; + unsigned ncomp = glsl_get_components(in->type); + unsigned n = in->data.driver_location; + unsigned frac = in->data.location_frac; + unsigned slot = in->data.location; + + /* skip unread inputs, we could end up with (for example), unsplit + * matrix/etc inputs in the case they are not read, so just silently + * skip these. + */ + if (ncomp > 4) + return; + + so->inputs[n].slot = slot; + so->inputs[n].compmask = (1 << (ncomp + frac)) - 1; + so->inputs_count = MAX2(so->inputs_count, n + 1); + so->inputs[n].interpolate = in->data.interpolation; + + if (ctx->so->type == MESA_SHADER_FRAGMENT) { + for (int i = 0; i < ncomp; i++) { + struct ir3_instruction *instr = NULL; + unsigned idx = (n * 4) + i + frac; + + if (slot == VARYING_SLOT_POS) { + so->inputs[n].bary = false; + so->frag_coord = true; + instr = create_frag_coord(ctx, i); + } else if (slot == VARYING_SLOT_PNTC) { + /* see for example st_nir_fixup_varying_slots().. this is + * maybe a bit mesa/st specific. But we need things to line + * up for this in fdN_program: + * unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); + * if (emit->sprite_coord_enable & texmask) { + * ... + * } + */ + so->inputs[n].slot = VARYING_SLOT_VAR8; + so->inputs[n].bary = true; + instr = create_frag_input(ctx, false); + } else { + bool use_ldlv = false; + + /* detect the special case for front/back colors where + * we need to do flat vs smooth shading depending on + * rast state: + */ + if (in->data.interpolation == INTERP_MODE_NONE) { + switch (slot) { + case VARYING_SLOT_COL0: + case VARYING_SLOT_COL1: + case VARYING_SLOT_BFC0: + case VARYING_SLOT_BFC1: + so->inputs[n].rasterflat = true; + break; + default: + break; + } + } + + if (ctx->compiler->flat_bypass) { + if ((so->inputs[n].interpolate == INTERP_MODE_FLAT) || + (so->inputs[n].rasterflat && ctx->so->key.rasterflat)) + use_ldlv = true; + } + + so->inputs[n].bary = true; + + instr = create_frag_input(ctx, use_ldlv); + } + + compile_assert(ctx, idx < ctx->ir->ninputs); + + ctx->ir->inputs[idx] = instr; + } + } else if (ctx->so->type == MESA_SHADER_VERTEX) { + for (int i = 0; i < ncomp; i++) { + unsigned idx = (n * 4) + i + frac; + compile_assert(ctx, idx < ctx->ir->ninputs); + ctx->ir->inputs[idx] = create_input(ctx, idx); + } + } else { + ir3_context_error(ctx, "unknown shader type: %d\n", ctx->so->type); + } + + if (so->inputs[n].bary || (ctx->so->type == MESA_SHADER_VERTEX)) { + so->total_in += ncomp; + } +} + +static void +setup_output(struct ir3_context *ctx, nir_variable *out) +{ + struct ir3_shader_variant *so = ctx->so; + unsigned ncomp = glsl_get_components(out->type); + unsigned n = out->data.driver_location; + unsigned frac = out->data.location_frac; + unsigned slot = out->data.location; + unsigned comp = 0; + + if (ctx->so->type == MESA_SHADER_FRAGMENT) { + switch (slot) { + case FRAG_RESULT_DEPTH: + comp = 2; /* tgsi will write to .z component */ + so->writes_pos = true; + break; + case FRAG_RESULT_COLOR: + so->color0_mrt = 1; + break; + default: + if (slot >= FRAG_RESULT_DATA0) + break; + ir3_context_error(ctx, "unknown FS output name: %s\n", + gl_frag_result_name(slot)); + } + } else if (ctx->so->type == MESA_SHADER_VERTEX) { + switch (slot) { + case VARYING_SLOT_POS: + so->writes_pos = true; + break; + case VARYING_SLOT_PSIZ: + so->writes_psize = true; + break; + case VARYING_SLOT_COL0: + case VARYING_SLOT_COL1: + case VARYING_SLOT_BFC0: + case VARYING_SLOT_BFC1: + case VARYING_SLOT_FOGC: + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_CLIP_VERTEX: + break; + default: + if (slot >= VARYING_SLOT_VAR0) + break; + if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7)) + break; + ir3_context_error(ctx, "unknown VS output name: %s\n", + gl_varying_slot_name(slot)); + } + } else { + ir3_context_error(ctx, "unknown shader type: %d\n", ctx->so->type); + } + + compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); + + so->outputs[n].slot = slot; + so->outputs[n].regid = regid(n, comp); + so->outputs_count = MAX2(so->outputs_count, n + 1); + + for (int i = 0; i < ncomp; i++) { + unsigned idx = (n * 4) + i + frac; + compile_assert(ctx, idx < ctx->ir->noutputs); + ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0)); + } + + /* if varying packing doesn't happen, we could end up in a situation + * with "holes" in the output, and since the per-generation code that + * sets up varying linkage registers doesn't expect to have more than + * one varying per vec4 slot, pad the holes. + * + * Note that this should probably generate a performance warning of + * some sort. + */ + for (int i = 0; i < frac; i++) { + unsigned idx = (n * 4) + i; + if (!ctx->ir->outputs[idx]) { + ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0)); + } + } +} + +static int +max_drvloc(struct exec_list *vars) +{ + int drvloc = -1; + nir_foreach_variable(var, vars) { + drvloc = MAX2(drvloc, (int)var->data.driver_location); + } + return drvloc; +} + +static const unsigned max_sysvals[] = { + [MESA_SHADER_FRAGMENT] = 24, // TODO + [MESA_SHADER_VERTEX] = 16, + [MESA_SHADER_COMPUTE] = 16, // TODO how many do we actually need? + [MESA_SHADER_KERNEL] = 16, // TODO how many do we actually need? +}; + +static void +emit_instructions(struct ir3_context *ctx) +{ + unsigned ninputs, noutputs; + nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s); + + ninputs = (max_drvloc(&ctx->s->inputs) + 1) * 4; + noutputs = (max_drvloc(&ctx->s->outputs) + 1) * 4; + + /* we need to leave room for sysvals: + */ + ninputs += max_sysvals[ctx->so->type]; + + ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs); + + /* Create inputs in first block: */ + ctx->block = get_block(ctx, nir_start_block(fxn)); + ctx->in_block = ctx->block; + list_addtail(&ctx->block->node, &ctx->ir->block_list); + + ninputs -= max_sysvals[ctx->so->type]; + + /* for fragment shader, the vcoord input register is used as the + * base for bary.f varying fetch instrs: + */ + struct ir3_instruction *vcoord = NULL; + if (ctx->so->type == MESA_SHADER_FRAGMENT) { + struct ir3_instruction *xy[2]; + + vcoord = create_input_compmask(ctx, 0, 0x3); + ir3_split_dest(ctx->block, xy, vcoord, 0, 2); + + ctx->frag_vcoord = ir3_create_collect(ctx, xy, 2); + } + + /* Setup inputs: */ + nir_foreach_variable(var, &ctx->s->inputs) { + setup_input(ctx, var); + } + + /* Defer add_sysval_input() stuff until after setup_inputs(), + * because sysvals need to be appended after varyings: + */ + if (vcoord) { + add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD, + 0x3, vcoord); + } + + if (ctx->frag_coord) { + add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD, + 0xf, ctx->frag_coord); + } + + /* Setup outputs: */ + nir_foreach_variable(var, &ctx->s->outputs) { + setup_output(ctx, var); + } + + /* Setup registers (which should only be arrays): */ + nir_foreach_register(reg, &ctx->s->registers) { + ir3_declare_array(ctx, reg); + } + + /* NOTE: need to do something more clever when we support >1 fxn */ + nir_foreach_register(reg, &fxn->registers) { + ir3_declare_array(ctx, reg); + } + /* And emit the body: */ + ctx->impl = fxn; + emit_function(ctx, fxn); +} + +/* from NIR perspective, we actually have varying inputs. But the varying + * inputs, from an IR standpoint, are just bary.f/ldlv instructions. The + * only actual inputs are the sysvals. + */ +static void +fixup_frag_inputs(struct ir3_context *ctx) +{ + struct ir3_shader_variant *so = ctx->so; + struct ir3 *ir = ctx->ir; + unsigned i = 0; + + /* sysvals should appear at the end of the inputs, drop everything else: */ + while ((i < so->inputs_count) && !so->inputs[i].sysval) + i++; + + /* at IR level, inputs are always blocks of 4 scalars: */ + i *= 4; + + ir->inputs = &ir->inputs[i]; + ir->ninputs -= i; +} + +/* Fixup tex sampler state for astc/srgb workaround instructions. We + * need to assign the tex state indexes for these after we know the + * max tex index. + */ +static void +fixup_astc_srgb(struct ir3_context *ctx) +{ + struct ir3_shader_variant *so = ctx->so; + /* indexed by original tex idx, value is newly assigned alpha sampler + * state tex idx. Zero is invalid since there is at least one sampler + * if we get here. + */ + unsigned alt_tex_state[16] = {0}; + unsigned tex_idx = ctx->max_texture_index + 1; + unsigned idx = 0; + + so->astc_srgb.base = tex_idx; + + for (unsigned i = 0; i < ctx->ir->astc_srgb_count; i++) { + struct ir3_instruction *sam = ctx->ir->astc_srgb[i]; + + compile_assert(ctx, sam->cat5.tex < ARRAY_SIZE(alt_tex_state)); + + if (alt_tex_state[sam->cat5.tex] == 0) { + /* assign new alternate/alpha tex state slot: */ + alt_tex_state[sam->cat5.tex] = tex_idx++; + so->astc_srgb.orig_idx[idx++] = sam->cat5.tex; + so->astc_srgb.count++; + } + + sam->cat5.tex = alt_tex_state[sam->cat5.tex]; + } +} + +static void +fixup_binning_pass(struct ir3_context *ctx) +{ + struct ir3_shader_variant *so = ctx->so; + struct ir3 *ir = ctx->ir; + unsigned i, j; + + for (i = 0, j = 0; i < so->outputs_count; i++) { + unsigned slot = so->outputs[i].slot; + + /* throw away everything but first position/psize */ + if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) { + if (i != j) { + so->outputs[j] = so->outputs[i]; + ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0]; + ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1]; + ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2]; + ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3]; + } + j++; + } + } + so->outputs_count = j; + ir->noutputs = j * 4; +} + +int +ir3_compile_shader_nir(struct ir3_compiler *compiler, + struct ir3_shader_variant *so) +{ + struct ir3_context *ctx; + struct ir3 *ir; + struct ir3_instruction **inputs; + unsigned i, actual_in, inloc; + int ret = 0, max_bary; + + assert(!so->ir); + + ctx = ir3_context_init(compiler, so); + if (!ctx) { + DBG("INIT failed!"); + ret = -1; + goto out; + } + + emit_instructions(ctx); + + if (ctx->error) { + DBG("EMIT failed!"); + ret = -1; + goto out; + } + + ir = so->ir = ctx->ir; + + /* keep track of the inputs from TGSI perspective.. */ + inputs = ir->inputs; + + /* but fixup actual inputs for frag shader: */ + if (so->type == MESA_SHADER_FRAGMENT) + fixup_frag_inputs(ctx); + + /* at this point, for binning pass, throw away unneeded outputs: */ + if (so->binning_pass && (ctx->compiler->gpu_id < 600)) + fixup_binning_pass(ctx); + + /* if we want half-precision outputs, mark the output registers + * as half: + */ + if (so->key.half_precision) { + for (i = 0; i < ir->noutputs; i++) { + struct ir3_instruction *out = ir->outputs[i]; + + if (!out) + continue; + + /* if frag shader writes z, that needs to be full precision: */ + if (so->outputs[i/4].slot == FRAG_RESULT_DEPTH) + continue; + + out->regs[0]->flags |= IR3_REG_HALF; + /* output could be a fanout (ie. texture fetch output) + * in which case we need to propagate the half-reg flag + * up to the definer so that RA sees it: + */ + if (out->opc == OPC_META_FO) { + out = out->regs[1]->instr; + out->regs[0]->flags |= IR3_REG_HALF; + } + + if (out->opc == OPC_MOV) { + out->cat1.dst_type = half_type(out->cat1.dst_type); + } + } + } + + if (ir3_shader_debug & IR3_DBG_OPTMSGS) { + printf("BEFORE CP:\n"); + ir3_print(ir); + } + + ir3_cp(ir, so); + + /* at this point, for binning pass, throw away unneeded outputs: + * Note that for a6xx and later, we do this after ir3_cp to ensure + * that the uniform/constant layout for BS and VS matches, so that + * we can re-use same VS_CONST state group. + */ + if (so->binning_pass && (ctx->compiler->gpu_id >= 600)) + fixup_binning_pass(ctx); + + /* Insert mov if there's same instruction for each output. + * eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow + */ + for (int i = ir->noutputs - 1; i >= 0; i--) { + if (!ir->outputs[i]) + continue; + for (unsigned j = 0; j < i; j++) { + if (ir->outputs[i] == ir->outputs[j]) { + ir->outputs[i] = + ir3_MOV(ir->outputs[i]->block, ir->outputs[i], TYPE_F32); + } + } + } + + if (ir3_shader_debug & IR3_DBG_OPTMSGS) { + printf("BEFORE GROUPING:\n"); + ir3_print(ir); + } + + ir3_sched_add_deps(ir); + + /* Group left/right neighbors, inserting mov's where needed to + * solve conflicts: + */ + ir3_group(ir); + + if (ir3_shader_debug & IR3_DBG_OPTMSGS) { + printf("AFTER GROUPING:\n"); + ir3_print(ir); + } + + ir3_depth(ir); + + if (ir3_shader_debug & IR3_DBG_OPTMSGS) { + printf("AFTER DEPTH:\n"); + ir3_print(ir); + } + + ret = ir3_sched(ir); + if (ret) { + DBG("SCHED failed!"); + goto out; + } + + if (ir3_shader_debug & IR3_DBG_OPTMSGS) { + printf("AFTER SCHED:\n"); + ir3_print(ir); + } + + ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face); + if (ret) { + DBG("RA failed!"); + goto out; + } + + if (ir3_shader_debug & IR3_DBG_OPTMSGS) { + printf("AFTER RA:\n"); + ir3_print(ir); + } + + /* fixup input/outputs: */ + for (i = 0; i < so->outputs_count; i++) { + /* sometimes we get outputs that don't write the .x coord, like: + * + * decl_var shader_out INTERP_MODE_NONE float Color (VARYING_SLOT_VAR9.z, 1, 0) + * + * Presumably the result of varying packing and then eliminating + * some unneeded varyings? Just skip head to the first valid + * component of the output. + */ + for (unsigned j = 0; j < 4; j++) { + struct ir3_instruction *instr = ir->outputs[(i*4) + j]; + if (instr) { + so->outputs[i].regid = instr->regs[0]->num; + break; + } + } + } + + /* Note that some or all channels of an input may be unused: */ + actual_in = 0; + inloc = 0; + for (i = 0; i < so->inputs_count; i++) { + unsigned j, reg = regid(63,0), compmask = 0, maxcomp = 0; + so->inputs[i].ncomp = 0; + so->inputs[i].inloc = inloc; + for (j = 0; j < 4; j++) { + struct ir3_instruction *in = inputs[(i*4) + j]; + if (in && !(in->flags & IR3_INSTR_UNUSED)) { + compmask |= (1 << j); + reg = in->regs[0]->num - j; + actual_in++; + so->inputs[i].ncomp++; + if ((so->type == MESA_SHADER_FRAGMENT) && so->inputs[i].bary) { + /* assign inloc: */ + assert(in->regs[1]->flags & IR3_REG_IMMED); + in->regs[1]->iim_val = inloc + j; + maxcomp = j + 1; + } + } + } + if ((so->type == MESA_SHADER_FRAGMENT) && compmask && so->inputs[i].bary) { + so->varying_in++; + so->inputs[i].compmask = (1 << maxcomp) - 1; + inloc += maxcomp; + } else if (!so->inputs[i].sysval) { + so->inputs[i].compmask = compmask; + } + so->inputs[i].regid = reg; + } + + if (ctx->astc_srgb) + fixup_astc_srgb(ctx); + + /* We need to do legalize after (for frag shader's) the "bary.f" + * offsets (inloc) have been assigned. + */ + ir3_legalize(ir, &so->num_samp, &so->has_ssbo, &max_bary); + + if (ir3_shader_debug & IR3_DBG_OPTMSGS) { + printf("AFTER LEGALIZE:\n"); + ir3_print(ir); + } + + so->branchstack = ctx->max_stack; + + /* Note that actual_in counts inputs that are not bary.f'd for FS: */ + if (so->type == MESA_SHADER_VERTEX) + so->total_in = actual_in; + else + so->total_in = max_bary + 1; + +out: + if (ret) { + if (so->ir) + ir3_destroy(so->ir); + so->ir = NULL; + } + ir3_context_free(ctx); + + return ret; +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_context.c mesa-19.0.1/src/freedreno/ir3/ir3_context.c --- mesa-18.3.3/src/freedreno/ir3/ir3_context.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,594 @@ +/* + * Copyright (C) 2015-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/u_math.h" + +#include "ir3_compiler.h" +#include "ir3_context.h" +#include "ir3_shader.h" +#include "ir3_nir.h" + +struct ir3_context * +ir3_context_init(struct ir3_compiler *compiler, + struct ir3_shader_variant *so) +{ + struct ir3_context *ctx = rzalloc(NULL, struct ir3_context); + + if (compiler->gpu_id >= 400) { + if (so->type == MESA_SHADER_VERTEX) { + ctx->astc_srgb = so->key.vastc_srgb; + } else if (so->type == MESA_SHADER_FRAGMENT) { + ctx->astc_srgb = so->key.fastc_srgb; + } + + } else { + if (so->type == MESA_SHADER_VERTEX) { + ctx->samples = so->key.vsamples; + } else if (so->type == MESA_SHADER_FRAGMENT) { + ctx->samples = so->key.fsamples; + } + } + + ctx->compiler = compiler; + ctx->so = so; + ctx->def_ht = _mesa_hash_table_create(ctx, + _mesa_hash_pointer, _mesa_key_pointer_equal); + ctx->block_ht = _mesa_hash_table_create(ctx, + _mesa_hash_pointer, _mesa_key_pointer_equal); + + /* TODO: maybe generate some sort of bitmask of what key + * lowers vs what shader has (ie. no need to lower + * texture clamp lowering if no texture sample instrs).. + * although should be done further up the stack to avoid + * creating duplicate variants.. + */ + + if (ir3_key_lowers_nir(&so->key)) { + nir_shader *s = nir_shader_clone(ctx, so->shader->nir); + ctx->s = ir3_optimize_nir(so->shader, s, &so->key); + } else { + /* fast-path for shader key that lowers nothing in NIR: */ + ctx->s = nir_shader_clone(ctx, so->shader->nir); + } + + /* this needs to be the last pass run, so do this here instead of + * in ir3_optimize_nir(): + */ + NIR_PASS_V(ctx->s, nir_lower_bool_to_int32); + NIR_PASS_V(ctx->s, nir_lower_locals_to_regs); + NIR_PASS_V(ctx->s, nir_convert_from_ssa, true); + + if (ir3_shader_debug & IR3_DBG_DISASM) { + DBG("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}", + so->shader->id, so->id, so->type, + so->key.color_two_side, so->key.half_precision); + nir_print_shader(ctx->s, stdout); + } + + if (shader_debug_enabled(so->type)) { + fprintf(stderr, "NIR (final form) for %s shader:\n", + _mesa_shader_stage_to_string(so->type)); + nir_print_shader(ctx->s, stderr); + } + + ir3_nir_scan_driver_consts(ctx->s, &so->const_layout); + + so->num_uniforms = ctx->s->num_uniforms; + so->num_ubos = ctx->s->info.num_ubos; + + /* Layout of constant registers, each section aligned to vec4. Note + * that pointer size (ubo, etc) changes depending on generation. + * + * user consts + * UBO addresses + * SSBO sizes + * if (vertex shader) { + * driver params (IR3_DP_*) + * if (stream_output.num_outputs > 0) + * stream-out addresses + * } + * immediates + * + * Immediates go last mostly because they are inserted in the CP pass + * after the nir -> ir3 frontend. + */ + unsigned constoff = align(ctx->s->num_uniforms, 4); + unsigned ptrsz = ir3_pointer_size(ctx); + + memset(&so->constbase, ~0, sizeof(so->constbase)); + + if (so->num_ubos > 0) { + so->constbase.ubo = constoff; + constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4; + } + + if (so->const_layout.ssbo_size.count > 0) { + unsigned cnt = so->const_layout.ssbo_size.count; + so->constbase.ssbo_sizes = constoff; + constoff += align(cnt, 4) / 4; + } + + if (so->const_layout.image_dims.count > 0) { + unsigned cnt = so->const_layout.image_dims.count; + so->constbase.image_dims = constoff; + constoff += align(cnt, 4) / 4; + } + + unsigned num_driver_params = 0; + if (so->type == MESA_SHADER_VERTEX) { + num_driver_params = IR3_DP_VS_COUNT; + } else if (so->type == MESA_SHADER_COMPUTE) { + num_driver_params = IR3_DP_CS_COUNT; + } + + so->constbase.driver_param = constoff; + constoff += align(num_driver_params, 4) / 4; + + if ((so->type == MESA_SHADER_VERTEX) && + (compiler->gpu_id < 500) && + so->shader->stream_output.num_outputs > 0) { + so->constbase.tfbo = constoff; + constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4; + } + + so->constbase.immediate = constoff; + + return ctx; +} + +void +ir3_context_free(struct ir3_context *ctx) +{ + ralloc_free(ctx); +} + +/* + * Misc helpers + */ + +/* allocate a n element value array (to be populated by caller) and + * insert in def_ht + */ +struct ir3_instruction ** +ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n) +{ + struct ir3_instruction **value = + ralloc_array(ctx->def_ht, struct ir3_instruction *, n); + _mesa_hash_table_insert(ctx->def_ht, dst, value); + return value; +} + +struct ir3_instruction ** +ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n) +{ + struct ir3_instruction **value; + + if (dst->is_ssa) { + value = ir3_get_dst_ssa(ctx, &dst->ssa, n); + } else { + value = ralloc_array(ctx, struct ir3_instruction *, n); + } + + /* NOTE: in non-ssa case, we don't really need to store last_dst + * but this helps us catch cases where put_dst() call is forgotten + */ + compile_assert(ctx, !ctx->last_dst); + ctx->last_dst = value; + ctx->last_dst_n = n; + + return value; +} + +struct ir3_instruction * const * +ir3_get_src(struct ir3_context *ctx, nir_src *src) +{ + if (src->is_ssa) { + struct hash_entry *entry; + entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); + compile_assert(ctx, entry); + return entry->data; + } else { + nir_register *reg = src->reg.reg; + struct ir3_array *arr = ir3_get_array(ctx, reg); + unsigned num_components = arr->r->num_components; + struct ir3_instruction *addr = NULL; + struct ir3_instruction **value = + ralloc_array(ctx, struct ir3_instruction *, num_components); + + if (src->reg.indirect) + addr = ir3_get_addr(ctx, ir3_get_src(ctx, src->reg.indirect)[0], + reg->num_components); + + for (unsigned i = 0; i < num_components; i++) { + unsigned n = src->reg.base_offset * reg->num_components + i; + compile_assert(ctx, n < arr->length); + value[i] = ir3_create_array_load(ctx, arr, n, addr); + } + + return value; + } +} + +void +put_dst(struct ir3_context *ctx, nir_dest *dst) +{ + unsigned bit_size = nir_dest_bit_size(*dst); + + if (bit_size < 32) { + for (unsigned i = 0; i < ctx->last_dst_n; i++) { + struct ir3_instruction *dst = ctx->last_dst[i]; + dst->regs[0]->flags |= IR3_REG_HALF; + if (ctx->last_dst[i]->opc == OPC_META_FO) + dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF; + } + } + + if (!dst->is_ssa) { + nir_register *reg = dst->reg.reg; + struct ir3_array *arr = ir3_get_array(ctx, reg); + unsigned num_components = ctx->last_dst_n; + struct ir3_instruction *addr = NULL; + + if (dst->reg.indirect) + addr = ir3_get_addr(ctx, ir3_get_src(ctx, dst->reg.indirect)[0], + reg->num_components); + + for (unsigned i = 0; i < num_components; i++) { + unsigned n = dst->reg.base_offset * reg->num_components + i; + compile_assert(ctx, n < arr->length); + if (!ctx->last_dst[i]) + continue; + ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr); + } + + ralloc_free(ctx->last_dst); + } + ctx->last_dst = NULL; + ctx->last_dst_n = 0; +} + +struct ir3_instruction * +ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr, + unsigned arrsz) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *collect; + + if (arrsz == 0) + return NULL; + + unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF; + + collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz); + ir3_reg_create(collect, 0, flags); /* dst */ + for (unsigned i = 0; i < arrsz; i++) { + struct ir3_instruction *elem = arr[i]; + + /* Since arrays are pre-colored in RA, we can't assume that + * things will end up in the right place. (Ie. if a collect + * joins elements from two different arrays.) So insert an + * extra mov. + * + * We could possibly skip this if all the collected elements + * are contiguous elements in a single array.. not sure how + * likely that is to happen. + * + * Fixes a problem with glamor shaders, that in effect do + * something like: + * + * if (foo) + * texcoord = .. + * else + * texcoord = .. + * color = texture2D(tex, texcoord); + * + * In this case, texcoord will end up as nir registers (which + * translate to ir3 array's of length 1. And we can't assume + * the two (or more) arrays will get allocated in consecutive + * scalar registers. + * + */ + if (elem->regs[0]->flags & IR3_REG_ARRAY) { + type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; + elem = ir3_MOV(block, elem, type); + } + + compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags); + ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem; + } + + return collect; +} + +/* helper for instructions that produce multiple consecutive scalar + * outputs which need to have a split/fanout meta instruction inserted + */ +void +ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, + struct ir3_instruction *src, unsigned base, unsigned n) +{ + struct ir3_instruction *prev = NULL; + + if ((n == 1) && (src->regs[0]->wrmask == 0x1)) { + dst[0] = src; + return; + } + + for (int i = 0, j = 0; i < n; i++) { + struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO); + ir3_reg_create(split, 0, IR3_REG_SSA); + ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src; + split->fo.off = i + base; + + if (prev) { + split->cp.left = prev; + split->cp.left_cnt++; + prev->cp.right = split; + prev->cp.right_cnt++; + } + prev = split; + + if (src->regs[0]->wrmask & (1 << (i + base))) + dst[j++] = split; + } +} + +void +ir3_context_error(struct ir3_context *ctx, const char *format, ...) +{ + struct hash_table *errors = NULL; + va_list ap; + va_start(ap, format); + if (ctx->cur_instr) { + errors = _mesa_hash_table_create(NULL, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + char *msg = ralloc_vasprintf(errors, format, ap); + _mesa_hash_table_insert(errors, ctx->cur_instr, msg); + } else { + _debug_vprintf(format, ap); + } + va_end(ap); + nir_print_shader_annotated(ctx->s, stdout, errors); + ralloc_free(errors); + ctx->error = true; + debug_assert(0); +} + +static struct ir3_instruction * +create_addr(struct ir3_block *block, struct ir3_instruction *src, int align) +{ + struct ir3_instruction *instr, *immed; + + /* TODO in at least some cases, the backend could probably be + * made clever enough to propagate IR3_REG_HALF.. + */ + instr = ir3_COV(block, src, TYPE_U32, TYPE_S16); + instr->regs[0]->flags |= IR3_REG_HALF; + + switch(align){ + case 1: + /* src *= 1: */ + break; + case 2: + /* src *= 2 => src <<= 1: */ + immed = create_immed(block, 1); + immed->regs[0]->flags |= IR3_REG_HALF; + + instr = ir3_SHL_B(block, instr, 0, immed, 0); + instr->regs[0]->flags |= IR3_REG_HALF; + instr->regs[1]->flags |= IR3_REG_HALF; + break; + case 3: + /* src *= 3: */ + immed = create_immed(block, 3); + immed->regs[0]->flags |= IR3_REG_HALF; + + instr = ir3_MULL_U(block, instr, 0, immed, 0); + instr->regs[0]->flags |= IR3_REG_HALF; + instr->regs[1]->flags |= IR3_REG_HALF; + break; + case 4: + /* src *= 4 => src <<= 2: */ + immed = create_immed(block, 2); + immed->regs[0]->flags |= IR3_REG_HALF; + + instr = ir3_SHL_B(block, instr, 0, immed, 0); + instr->regs[0]->flags |= IR3_REG_HALF; + instr->regs[1]->flags |= IR3_REG_HALF; + break; + default: + unreachable("bad align"); + return NULL; + } + + instr = ir3_MOV(block, instr, TYPE_S16); + instr->regs[0]->num = regid(REG_A0, 0); + instr->regs[0]->flags |= IR3_REG_HALF; + instr->regs[1]->flags |= IR3_REG_HALF; + + return instr; +} + +/* caches addr values to avoid generating multiple cov/shl/mova + * sequences for each use of a given NIR level src as address + */ +struct ir3_instruction * +ir3_get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align) +{ + struct ir3_instruction *addr; + unsigned idx = align - 1; + + compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht)); + + if (!ctx->addr_ht[idx]) { + ctx->addr_ht[idx] = _mesa_hash_table_create(ctx, + _mesa_hash_pointer, _mesa_key_pointer_equal); + } else { + struct hash_entry *entry; + entry = _mesa_hash_table_search(ctx->addr_ht[idx], src); + if (entry) + return entry->data; + } + + addr = create_addr(ctx->block, src, align); + _mesa_hash_table_insert(ctx->addr_ht[idx], src, addr); + + return addr; +} + +struct ir3_instruction * +ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *cond; + + /* NOTE: only cmps.*.* can write p0.x: */ + cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0); + cond->cat2.condition = IR3_COND_NE; + + /* condition always goes in predicate register: */ + cond->regs[0]->num = regid(REG_P0, 0); + + return cond; +} + +/* + * Array helpers + */ + +void +ir3_declare_array(struct ir3_context *ctx, nir_register *reg) +{ + struct ir3_array *arr = rzalloc(ctx, struct ir3_array); + arr->id = ++ctx->num_arrays; + /* NOTE: sometimes we get non array regs, for example for arrays of + * length 1. See fs-const-array-of-struct-of-array.shader_test. So + * treat a non-array as if it was an array of length 1. + * + * It would be nice if there was a nir pass to convert arrays of + * length 1 to ssa. + */ + arr->length = reg->num_components * MAX2(1, reg->num_array_elems); + compile_assert(ctx, arr->length > 0); + arr->r = reg; + list_addtail(&arr->node, &ctx->ir->array_list); +} + +struct ir3_array * +ir3_get_array(struct ir3_context *ctx, nir_register *reg) +{ + list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { + if (arr->r == reg) + return arr; + } + ir3_context_error(ctx, "bogus reg: %s\n", reg->name); + return NULL; +} + +/* relative (indirect) if address!=NULL */ +struct ir3_instruction * +ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, + struct ir3_instruction *address) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *mov; + struct ir3_register *src; + + mov = ir3_instr_create(block, OPC_MOV); + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + mov->barrier_class = IR3_BARRIER_ARRAY_R; + mov->barrier_conflict = IR3_BARRIER_ARRAY_W; + ir3_reg_create(mov, 0, 0); + src = ir3_reg_create(mov, 0, IR3_REG_ARRAY | + COND(address, IR3_REG_RELATIV)); + src->instr = arr->last_write; + src->size = arr->length; + src->array.id = arr->id; + src->array.offset = n; + + if (address) + ir3_instr_set_address(mov, address); + + return mov; +} + +/* relative (indirect) if address!=NULL */ +void +ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, + struct ir3_instruction *src, struct ir3_instruction *address) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *mov; + struct ir3_register *dst; + + /* if not relative store, don't create an extra mov, since that + * ends up being difficult for cp to remove. + */ + if (!address) { + dst = src->regs[0]; + + src->barrier_class |= IR3_BARRIER_ARRAY_W; + src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; + + dst->flags |= IR3_REG_ARRAY; + dst->instr = arr->last_write; + dst->size = arr->length; + dst->array.id = arr->id; + dst->array.offset = n; + + arr->last_write = src; + + array_insert(block, block->keeps, src); + + return; + } + + mov = ir3_instr_create(block, OPC_MOV); + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + mov->barrier_class = IR3_BARRIER_ARRAY_W; + mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; + dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY | + COND(address, IR3_REG_RELATIV)); + dst->instr = arr->last_write; + dst->size = arr->length; + dst->array.id = arr->id; + dst->array.offset = n; + ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src; + + if (address) + ir3_instr_set_address(mov, address); + + arr->last_write = mov; + + /* the array store may only matter to something in an earlier + * block (ie. loops), but since arrays are not in SSA, depth + * pass won't know this.. so keep all array stores: + */ + array_insert(block, block->keeps, mov); +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_context.h mesa-19.0.1/src/freedreno/ir3/ir3_context.h --- mesa-18.3.3/src/freedreno/ir3/ir3_context.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2015-2018 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef IR3_CONTEXT_H_ +#define IR3_CONTEXT_H_ + +#include "ir3_nir.h" +#include "ir3.h" + +/* for conditionally setting boolean flag(s): */ +#define COND(bool, val) ((bool) ? (val) : 0) + +#define DBG(fmt, ...) \ + do { debug_printf("%s:%d: "fmt "\n", \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) + +/** + * The context for compilation of a single shader. + */ +struct ir3_context { + struct ir3_compiler *compiler; + + struct nir_shader *s; + + struct nir_instr *cur_instr; /* current instruction, just for debug */ + + struct ir3 *ir; + struct ir3_shader_variant *so; + + struct ir3_block *block; /* the current block */ + struct ir3_block *in_block; /* block created for shader inputs */ + + nir_function_impl *impl; + + /* For fragment shaders, varyings are not actual shader inputs, + * instead the hw passes a varying-coord which is used with + * bary.f. + * + * But NIR doesn't know that, it still declares varyings as + * inputs. So we do all the input tracking normally and fix + * things up after compile_instructions() + * + * NOTE that frag_vcoord is the hardware position (possibly it + * is actually an index or tag or some such.. it is *not* + * values that can be directly used for gl_FragCoord..) + */ + struct ir3_instruction *frag_vcoord; + + /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */ + struct ir3_instruction *frag_face, *frag_coord; + + /* For vertex shaders, keep track of the system values sources */ + struct ir3_instruction *vertex_id, *basevertex, *instance_id; + + /* For fragment shaders: */ + struct ir3_instruction *samp_id, *samp_mask_in; + + /* Compute shader inputs: */ + struct ir3_instruction *local_invocation_id, *work_group_id; + + /* mapping from nir_register to defining instruction: */ + struct hash_table *def_ht; + + unsigned num_arrays; + + /* Tracking for max level of flowcontrol (branchstack) needed + * by a5xx+: + */ + unsigned stack, max_stack; + + /* a common pattern for indirect addressing is to request the + * same address register multiple times. To avoid generating + * duplicate instruction sequences (which our backend does not + * try to clean up, since that should be done as the NIR stage) + * we cache the address value generated for a given src value: + * + * Note that we have to cache these per alignment, since same + * src used for an array of vec1 cannot be also used for an + * array of vec4. + */ + struct hash_table *addr_ht[4]; + + /* last dst array, for indirect we need to insert a var-store. + */ + struct ir3_instruction **last_dst; + unsigned last_dst_n; + + /* maps nir_block to ir3_block, mostly for the purposes of + * figuring out the blocks successors + */ + struct hash_table *block_ht; + + /* on a4xx, bitmask of samplers which need astc+srgb workaround: */ + unsigned astc_srgb; + + unsigned samples; /* bitmask of x,y sample shifts */ + + unsigned max_texture_index; + + /* set if we encounter something we can't handle yet, so we + * can bail cleanly and fallback to TGSI compiler f/e + */ + bool error; +}; + +struct ir3_context * ir3_context_init(struct ir3_compiler *compiler, + struct ir3_shader_variant *so); +void ir3_context_free(struct ir3_context *ctx); + +/* gpu pointer size in units of 32bit registers/slots */ +static inline +unsigned ir3_pointer_size(struct ir3_context *ctx) +{ + return (ctx->compiler->gpu_id >= 500) ? 2 : 1; +} + +struct ir3_instruction ** ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n); +struct ir3_instruction ** ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n); +struct ir3_instruction * const * ir3_get_src(struct ir3_context *ctx, nir_src *src); +void put_dst(struct ir3_context *ctx, nir_dest *dst); +struct ir3_instruction * ir3_create_collect(struct ir3_context *ctx, + struct ir3_instruction *const *arr, unsigned arrsz); +void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst, + struct ir3_instruction *src, unsigned base, unsigned n); + +void ir3_context_error(struct ir3_context *ctx, const char *format, ...); + +#define compile_assert(ctx, cond) do { \ + if (!(cond)) ir3_context_error((ctx), "failed assert: "#cond"\n"); \ + } while (0) + +struct ir3_instruction * ir3_get_addr(struct ir3_context *ctx, + struct ir3_instruction *src, int align); +struct ir3_instruction * ir3_get_predicate(struct ir3_context *ctx, + struct ir3_instruction *src); + +void ir3_declare_array(struct ir3_context *ctx, nir_register *reg); +struct ir3_array * ir3_get_array(struct ir3_context *ctx, nir_register *reg); +struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx, + struct ir3_array *arr, int n, struct ir3_instruction *address); +void ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, + struct ir3_instruction *src, struct ir3_instruction *address); + +static inline type_t utype_for_size(unsigned bit_size) +{ + switch (bit_size) { + case 32: return TYPE_U32; + case 16: return TYPE_U16; + case 8: return TYPE_U8; + default: unreachable("bad bitsize"); return ~0; + } +} + +static inline type_t utype_src(nir_src src) +{ return utype_for_size(nir_src_bit_size(src)); } + +static inline type_t utype_dst(nir_dest dst) +{ return utype_for_size(nir_dest_bit_size(dst)); } + +#endif /* IR3_CONTEXT_H_ */ diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_cp.c mesa-19.0.1/src/freedreno/ir3/ir3_cp.c --- mesa-18.3.3/src/freedreno/ir3/ir3_cp.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_cp.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,653 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include + +#include "ir3.h" +#include "ir3_shader.h" + +/* + * Copy Propagate: + */ + +struct ir3_cp_ctx { + struct ir3 *shader; + struct ir3_shader_variant *so; + unsigned immediate_idx; +}; + +/* is it a type preserving mov, with ok flags? */ +static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) +{ + if (is_same_type_mov(instr)) { + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src = instr->regs[1]; + struct ir3_instruction *src_instr = ssa(src); + + /* only if mov src is SSA (not const/immed): */ + if (!src_instr) + return false; + + /* no indirect: */ + if (dst->flags & IR3_REG_RELATIV) + return false; + if (src->flags & IR3_REG_RELATIV) + return false; + + if (src->flags & IR3_REG_ARRAY) + return false; + + if (!allow_flags) + if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG | + IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) + return false; + + /* TODO: remove this hack: */ + if (src_instr->opc == OPC_META_FO) + return false; + + return true; + } + return false; +} + +static unsigned cp_flags(unsigned flags) +{ + /* only considering these flags (at least for now): */ + flags &= (IR3_REG_CONST | IR3_REG_IMMED | + IR3_REG_FNEG | IR3_REG_FABS | + IR3_REG_SNEG | IR3_REG_SABS | + IR3_REG_BNOT | IR3_REG_RELATIV); + return flags; +} + +static bool valid_flags(struct ir3_instruction *instr, unsigned n, + unsigned flags) +{ + unsigned valid_flags; + flags = cp_flags(flags); + + /* If destination is indirect, then source cannot be.. at least + * I don't think so.. + */ + if ((instr->regs[0]->flags & IR3_REG_RELATIV) && + (flags & IR3_REG_RELATIV)) + return false; + + /* TODO it seems to *mostly* work to cp RELATIV, except we get some + * intermittent piglit variable-indexing fails. Newer blob driver + * doesn't seem to cp these. Possibly this is hw workaround? Not + * sure, but until that is understood better, lets just switch off + * cp for indirect src's: + */ + if (flags & IR3_REG_RELATIV) + return false; + + switch (opc_cat(instr->opc)) { + case 1: + valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV; + if (flags & ~valid_flags) + return false; + break; + case 2: + valid_flags = ir3_cat2_absneg(instr->opc) | + IR3_REG_CONST | IR3_REG_RELATIV; + + if (ir3_cat2_int(instr->opc)) + valid_flags |= IR3_REG_IMMED; + + if (flags & ~valid_flags) + return false; + + if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) { + unsigned m = (n ^ 1) + 1; + /* cannot deal w/ const in both srcs: + * (note that some cat2 actually only have a single src) + */ + if (m < instr->regs_count) { + struct ir3_register *reg = instr->regs[m]; + if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST)) + return false; + if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED)) + return false; + } + /* cannot be const + ABS|NEG: */ + if (flags & (IR3_REG_FABS | IR3_REG_FNEG | + IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) + return false; + } + break; + case 3: + valid_flags = ir3_cat3_absneg(instr->opc) | + IR3_REG_CONST | IR3_REG_RELATIV; + + if (flags & ~valid_flags) + return false; + + if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) { + /* cannot deal w/ const/relativ in 2nd src: */ + if (n == 1) + return false; + } + + if (flags & IR3_REG_CONST) { + /* cannot be const + ABS|NEG: */ + if (flags & (IR3_REG_FABS | IR3_REG_FNEG | + IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) + return false; + } + break; + case 4: + /* seems like blob compiler avoids const as src.. */ + /* TODO double check if this is still the case on a4xx */ + if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) + return false; + if (flags & (IR3_REG_SABS | IR3_REG_SNEG)) + return false; + break; + case 5: + /* no flags allowed */ + if (flags) + return false; + break; + case 6: + valid_flags = IR3_REG_IMMED; + if (flags & ~valid_flags) + return false; + + if (flags & IR3_REG_IMMED) { + /* doesn't seem like we can have immediate src for store + * instructions: + * + * TODO this restriction could also apply to load instructions, + * but for load instructions this arg is the address (and not + * really sure any good way to test a hard-coded immed addr src) + */ + if (is_store(instr) && (n == 1)) + return false; + + if ((instr->opc == OPC_LDL) && (n != 1)) + return false; + + if ((instr->opc == OPC_STL) && (n != 2)) + return false; + + /* disallow CP into anything but the SSBO slot argument for + * atomics: + */ + if (is_atomic(instr->opc) && (n != 0)) + return false; + + if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G)) + return false; + } + + break; + } + + return true; +} + +/* propagate register flags from src to dst.. negates need special + * handling to cancel each other out. + */ +static void combine_flags(unsigned *dstflags, struct ir3_instruction *src) +{ + unsigned srcflags = src->regs[1]->flags; + + /* if what we are combining into already has (abs) flags, + * we can drop (neg) from src: + */ + if (*dstflags & IR3_REG_FABS) + srcflags &= ~IR3_REG_FNEG; + if (*dstflags & IR3_REG_SABS) + srcflags &= ~IR3_REG_SNEG; + + if (srcflags & IR3_REG_FABS) + *dstflags |= IR3_REG_FABS; + if (srcflags & IR3_REG_SABS) + *dstflags |= IR3_REG_SABS; + if (srcflags & IR3_REG_FNEG) + *dstflags ^= IR3_REG_FNEG; + if (srcflags & IR3_REG_SNEG) + *dstflags ^= IR3_REG_SNEG; + if (srcflags & IR3_REG_BNOT) + *dstflags ^= IR3_REG_BNOT; + + *dstflags &= ~IR3_REG_SSA; + *dstflags |= srcflags & IR3_REG_SSA; + *dstflags |= srcflags & IR3_REG_CONST; + *dstflags |= srcflags & IR3_REG_IMMED; + *dstflags |= srcflags & IR3_REG_RELATIV; + *dstflags |= srcflags & IR3_REG_ARRAY; + + /* if src of the src is boolean we can drop the (abs) since we know + * the source value is already a postitive integer. This cleans + * up the absnegs that get inserted when converting between nir and + * native boolean (see ir3_b2n/n2b) + */ + struct ir3_instruction *srcsrc = ssa(src->regs[1]); + if (srcsrc && is_bool(srcsrc)) + *dstflags &= ~IR3_REG_SABS; +} + +static struct ir3_register * +lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags) +{ + unsigned swiz, idx, i; + + reg = ir3_reg_clone(ctx->shader, reg); + + /* in some cases, there are restrictions on (abs)/(neg) plus const.. + * so just evaluate those and clear the flags: + */ + if (new_flags & IR3_REG_SABS) { + reg->iim_val = abs(reg->iim_val); + new_flags &= ~IR3_REG_SABS; + } + + if (new_flags & IR3_REG_FABS) { + reg->fim_val = fabs(reg->fim_val); + new_flags &= ~IR3_REG_FABS; + } + + if (new_flags & IR3_REG_SNEG) { + reg->iim_val = -reg->iim_val; + new_flags &= ~IR3_REG_SNEG; + } + + if (new_flags & IR3_REG_FNEG) { + reg->fim_val = -reg->fim_val; + new_flags &= ~IR3_REG_FNEG; + } + + /* Reallocate for 4 more elements whenever it's necessary */ + if (ctx->immediate_idx == ctx->so->immediates_size * 4) { + ctx->so->immediates_size += 4; + ctx->so->immediates = realloc (ctx->so->immediates, + ctx->so->immediates_size * sizeof (ctx->so->immediates[0])); + } + + for (i = 0; i < ctx->immediate_idx; i++) { + swiz = i % 4; + idx = i / 4; + + if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) { + break; + } + } + + if (i == ctx->immediate_idx) { + /* need to generate a new immediate: */ + swiz = i % 4; + idx = i / 4; + ctx->so->immediates[idx].val[swiz] = reg->uim_val; + ctx->so->immediates_count = idx + 1; + ctx->immediate_idx++; + } + + new_flags &= ~IR3_REG_IMMED; + new_flags |= IR3_REG_CONST; + reg->flags = new_flags; + reg->num = i + (4 * ctx->so->constbase.immediate); + + return reg; +} + +static void +unuse(struct ir3_instruction *instr) +{ + debug_assert(instr->use_count > 0); + + if (--instr->use_count == 0) { + struct ir3_block *block = instr->block; + + instr->barrier_class = 0; + instr->barrier_conflict = 0; + + /* we don't want to remove anything in keeps (which could + * be things like array store's) + */ + for (unsigned i = 0; i < block->keeps_count; i++) { + debug_assert(block->keeps[i] != instr); + } + } +} + +/** + * Handle cp for a given src register. This additionally handles + * the cases of collapsing immedate/const (which replace the src + * register with a non-ssa src) or collapsing mov's from relative + * src (which needs to also fixup the address src reference by the + * instruction). + */ +static void +reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, + struct ir3_register *reg, unsigned n) +{ + struct ir3_instruction *src = ssa(reg); + + if (is_eligible_mov(src, true)) { + /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ + struct ir3_register *src_reg = src->regs[1]; + unsigned new_flags = reg->flags; + + combine_flags(&new_flags, src); + + if (valid_flags(instr, n, new_flags)) { + if (new_flags & IR3_REG_ARRAY) { + debug_assert(!(reg->flags & IR3_REG_ARRAY)); + reg->array = src_reg->array; + } + reg->flags = new_flags; + reg->instr = ssa(src_reg); + + instr->barrier_class |= src->barrier_class; + instr->barrier_conflict |= src->barrier_conflict; + + unuse(src); + reg->instr->use_count++; + } + + } else if (is_same_type_mov(src) && + /* cannot collapse const/immed/etc into meta instrs: */ + !is_meta(instr)) { + /* immed/const/etc cases, which require some special handling: */ + struct ir3_register *src_reg = src->regs[1]; + unsigned new_flags = reg->flags; + + combine_flags(&new_flags, src); + + if (!valid_flags(instr, n, new_flags)) { + /* See if lowering an immediate to const would help. */ + if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { + debug_assert(new_flags & IR3_REG_IMMED); + instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags); + return; + } + + /* special case for "normal" mad instructions, we can + * try swapping the first two args if that fits better. + * + * the "plain" MAD's (ie. the ones that don't shift first + * src prior to multiply) can swap their first two srcs if + * src[0] is !CONST and src[1] is CONST: + */ + if ((n == 1) && is_mad(instr->opc) && + !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) && + valid_flags(instr, 0, new_flags & ~IR3_REG_IMMED)) { + /* swap src[0] and src[1]: */ + struct ir3_register *tmp; + tmp = instr->regs[0 + 1]; + instr->regs[0 + 1] = instr->regs[1 + 1]; + instr->regs[1 + 1] = tmp; + + n = 0; + } else { + return; + } + } + + /* Here we handle the special case of mov from + * CONST and/or RELATIV. These need to be handled + * specially, because in the case of move from CONST + * there is no src ir3_instruction so we need to + * replace the ir3_register. And in the case of + * RELATIV we need to handle the address register + * dependency. + */ + if (src_reg->flags & IR3_REG_CONST) { + /* an instruction cannot reference two different + * address registers: + */ + if ((src_reg->flags & IR3_REG_RELATIV) && + conflicts(instr->address, reg->instr->address)) + return; + + /* This seems to be a hw bug, or something where the timings + * just somehow don't work out. This restriction may only + * apply if the first src is also CONST. + */ + if ((opc_cat(instr->opc) == 3) && (n == 2) && + (src_reg->flags & IR3_REG_RELATIV) && + (src_reg->array.offset == 0)) + return; + + src_reg = ir3_reg_clone(instr->block->shader, src_reg); + src_reg->flags = new_flags; + instr->regs[n+1] = src_reg; + + if (src_reg->flags & IR3_REG_RELATIV) + ir3_instr_set_address(instr, reg->instr->address); + + return; + } + + if ((src_reg->flags & IR3_REG_RELATIV) && + !conflicts(instr->address, reg->instr->address)) { + src_reg = ir3_reg_clone(instr->block->shader, src_reg); + src_reg->flags = new_flags; + instr->regs[n+1] = src_reg; + ir3_instr_set_address(instr, reg->instr->address); + + return; + } + + /* NOTE: seems we can only do immed integers, so don't + * need to care about float. But we do need to handle + * abs/neg *before* checking that the immediate requires + * few enough bits to encode: + * + * TODO: do we need to do something to avoid accidentally + * catching a float immed? + */ + if (src_reg->flags & IR3_REG_IMMED) { + int32_t iim_val = src_reg->iim_val; + + debug_assert((opc_cat(instr->opc) == 1) || + (opc_cat(instr->opc) == 6) || + ir3_cat2_int(instr->opc) || + (is_mad(instr->opc) && (n == 0))); + + if (new_flags & IR3_REG_SABS) + iim_val = abs(iim_val); + + if (new_flags & IR3_REG_SNEG) + iim_val = -iim_val; + + if (new_flags & IR3_REG_BNOT) + iim_val = ~iim_val; + + /* other than category 1 (mov) we can only encode up to 10 bits: */ + if ((instr->opc == OPC_MOV) || + !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) { + new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); + src_reg = ir3_reg_clone(instr->block->shader, src_reg); + src_reg->flags = new_flags; + src_reg->iim_val = iim_val; + instr->regs[n+1] = src_reg; + } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { + /* See if lowering an immediate to const would help. */ + instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags); + } + + return; + } + } +} + +/* Handle special case of eliminating output mov, and similar cases where + * there isn't a normal "consuming" instruction. In this case we cannot + * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot + * be eliminated) + */ +static struct ir3_instruction * +eliminate_output_mov(struct ir3_instruction *instr) +{ + if (is_eligible_mov(instr, false)) { + struct ir3_register *reg = instr->regs[1]; + if (!(reg->flags & IR3_REG_ARRAY)) { + struct ir3_instruction *src_instr = ssa(reg); + debug_assert(src_instr); + return src_instr; + } + } + return instr; +} + +/** + * Find instruction src's which are mov's that can be collapsed, replacing + * the mov dst with the mov src + */ +static void +instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) +{ + struct ir3_register *reg; + + if (instr->regs_count == 0) + return; + + if (ir3_instr_check_mark(instr)) + return; + + /* walk down the graph from each src: */ + foreach_src_n(reg, n, instr) { + struct ir3_instruction *src = ssa(reg); + + if (!src) + continue; + + instr_cp(ctx, src); + + /* TODO non-indirect access we could figure out which register + * we actually want and allow cp.. + */ + if (reg->flags & IR3_REG_ARRAY) + continue; + + /* Don't CP absneg into meta instructions, that won't end well: */ + if (is_meta(instr) && (src->opc != OPC_MOV)) + continue; + + reg_cp(ctx, instr, reg, n); + } + + if (instr->regs[0]->flags & IR3_REG_ARRAY) { + struct ir3_instruction *src = ssa(instr->regs[0]); + if (src) + instr_cp(ctx, src); + } + + if (instr->address) { + instr_cp(ctx, instr->address); + ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); + } + + /* we can end up with extra cmps.s from frontend, which uses a + * + * cmps.s p0.x, cond, 0 + * + * as a way to mov into the predicate register. But frequently 'cond' + * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and + * just re-write the instruction writing predicate register to get rid + * of the double cmps. + */ + if ((instr->opc == OPC_CMPS_S) && + (instr->regs[0]->num == regid(REG_P0, 0)) && + ssa(instr->regs[1]) && + (instr->regs[2]->flags & IR3_REG_IMMED) && + (instr->regs[2]->iim_val == 0)) { + struct ir3_instruction *cond = ssa(instr->regs[1]); + switch (cond->opc) { + case OPC_CMPS_S: + case OPC_CMPS_F: + case OPC_CMPS_U: + instr->opc = cond->opc; + instr->flags = cond->flags; + instr->cat2 = cond->cat2; + instr->address = cond->address; + instr->regs[1] = cond->regs[1]; + instr->regs[2] = cond->regs[2]; + instr->barrier_class |= cond->barrier_class; + instr->barrier_conflict |= cond->barrier_conflict; + unuse(cond); + break; + default: + break; + } + } +} + +void +ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so) +{ + struct ir3_cp_ctx ctx = { + .shader = ir, + .so = so, + }; + + /* This is a bit annoying, and probably wouldn't be necessary if we + * tracked a reverse link from producing instruction to consumer. + * But we need to know when we've eliminated the last consumer of + * a mov, so we need to do a pass to first count consumers of a + * mov. + */ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + struct ir3_instruction *src; + + /* by the way, we don't account for false-dep's, so the CP + * pass should always happen before false-dep's are inserted + */ + debug_assert(instr->deps_count == 0); + + foreach_ssa_src(src, instr) { + src->use_count++; + } + } + } + + ir3_clear_mark(ir); + + for (unsigned i = 0; i < ir->noutputs; i++) { + if (ir->outputs[i]) { + instr_cp(&ctx, ir->outputs[i]); + ir->outputs[i] = eliminate_output_mov(ir->outputs[i]); + } + } + + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + if (block->condition) { + instr_cp(&ctx, block->condition); + block->condition = eliminate_output_mov(block->condition); + } + + for (unsigned i = 0; i < block->keeps_count; i++) { + instr_cp(&ctx, block->keeps[i]); + block->keeps[i] = eliminate_output_mov(block->keeps[i]); + } + } +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_depth.c mesa-19.0.1/src/freedreno/ir3/ir3_depth.c --- mesa-18.3.3/src/freedreno/ir3/ir3_depth.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_depth.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,272 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/u_math.h" + +#include "ir3.h" + +/* + * Instruction Depth: + * + * Calculates weighted instruction depth, ie. the sum of # of needed + * instructions plus delay slots back to original input (ie INPUT or + * CONST). That is to say, an instructions depth is: + * + * depth(instr) { + * d = 0; + * // for each src register: + * foreach (src in instr->regs[1..n]) + * d = max(d, delayslots(src->instr, n) + depth(src->instr)); + * return d + 1; + * } + * + * After an instruction's depth is calculated, it is inserted into the + * blocks depth sorted list, which is used by the scheduling pass. + */ + +/* generally don't count false dependencies, since this can just be + * something like a barrier, or SSBO store. The exception is array + * dependencies if the assigner is an array write and the consumer + * reads the same array. + */ +static bool +ignore_dep(struct ir3_instruction *assigner, + struct ir3_instruction *consumer, unsigned n) +{ + if (!__is_false_dep(consumer, n)) + return false; + + if (assigner->barrier_class & IR3_BARRIER_ARRAY_W) { + struct ir3_register *dst = assigner->regs[0]; + struct ir3_register *src; + + debug_assert(dst->flags & IR3_REG_ARRAY); + + foreach_src(src, consumer) { + if ((src->flags & IR3_REG_ARRAY) && + (dst->array.id == src->array.id)) { + return false; + } + } + } + + return true; +} + +/* calculate required # of delay slots between the instruction that + * assigns a value and the one that consumes + */ +int ir3_delayslots(struct ir3_instruction *assigner, + struct ir3_instruction *consumer, unsigned n) +{ + if (ignore_dep(assigner, consumer, n)) + return 0; + + /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal + * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch + * handled with sync bits + */ + + if (is_meta(assigner)) + return 0; + + if (writes_addr(assigner)) + return 6; + + /* handled via sync flags: */ + if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner)) + return 0; + + /* assigner must be alu: */ + if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || + is_mem(consumer)) { + return 6; + } else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && + (n == 3)) { + /* special case, 3rd src to cat3 not required on first cycle */ + return 1; + } else { + return 3; + } +} + +void +ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list) +{ + /* remove from existing spot in list: */ + list_delinit(&instr->node); + + /* find where to re-insert instruction: */ + list_for_each_entry (struct ir3_instruction, pos, list, node) { + if (pos->depth > instr->depth) { + list_add(&instr->node, &pos->node); + return; + } + } + /* if we get here, we didn't find an insertion spot: */ + list_addtail(&instr->node, list); +} + +static void +ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep) +{ + struct ir3_instruction *src; + + /* don't mark falsedep's as used, but otherwise process them normally: */ + if (!falsedep) + instr->flags &= ~IR3_INSTR_UNUSED; + + if (ir3_instr_check_mark(instr)) + return; + + instr->depth = 0; + + foreach_ssa_src_n(src, i, instr) { + unsigned sd; + + /* visit child to compute it's depth: */ + ir3_instr_depth(src, boost, __is_false_dep(instr, i)); + + /* for array writes, no need to delay on previous write: */ + if (i == 0) + continue; + + sd = ir3_delayslots(src, instr, i) + src->depth; + sd += boost; + + instr->depth = MAX2(instr->depth, sd); + } + + if (!is_meta(instr)) + instr->depth++; + + ir3_insert_by_depth(instr, &instr->block->instr_list); +} + +static bool +remove_unused_by_block(struct ir3_block *block) +{ + bool progress = false; + list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) { + if (instr->opc == OPC_END) + continue; + if (instr->flags & IR3_INSTR_UNUSED) { + if (instr->opc == OPC_META_FO) { + struct ir3_instruction *src = ssa(instr->regs[1]); + /* leave inputs alone.. we can't optimize out components of + * an input, since the hw is still going to be writing all + * of the components, and we could end up in a situation + * where multiple inputs overlap. + */ + if ((src->opc != OPC_META_INPUT) && + (src->regs[0]->wrmask > 1)) { + src->regs[0]->wrmask &= ~(1 << instr->fo.off); + + /* prune no-longer needed right-neighbors. We could + * probably do the same for left-neighbors (ie. tex + * fetch that only need .yw components), but that + * makes RA a bit more confusing than it already is + */ + struct ir3_instruction *n = instr; + while (n && n->cp.right) + n = n->cp.right; + while (n->flags & IR3_INSTR_UNUSED) { + n = n->cp.left; + if (!n) + break; + n->cp.right = NULL; + } + } + } + list_delinit(&instr->node); + progress = true; + } + } + return progress; +} + +static bool +compute_depth_and_remove_unused(struct ir3 *ir) +{ + unsigned i; + bool progress = false; + + ir3_clear_mark(ir); + + /* initially mark everything as unused, we'll clear the flag as we + * visit the instructions: + */ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + instr->flags |= IR3_INSTR_UNUSED; + } + } + + for (i = 0; i < ir->noutputs; i++) + if (ir->outputs[i]) + ir3_instr_depth(ir->outputs[i], 0, false); + + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + for (i = 0; i < block->keeps_count; i++) + ir3_instr_depth(block->keeps[i], 0, false); + + /* We also need to account for if-condition: */ + if (block->condition) + ir3_instr_depth(block->condition, 6, false); + } + + /* mark un-used instructions: */ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + progress |= remove_unused_by_block(block); + } + + /* note that we can end up with unused indirects, but we should + * not end up with unused predicates. + */ + for (i = 0; i < ir->indirects_count; i++) { + struct ir3_instruction *instr = ir->indirects[i]; + if (instr && (instr->flags & IR3_INSTR_UNUSED)) + ir->indirects[i] = NULL; + } + + /* cleanup unused inputs: */ + for (i = 0; i < ir->ninputs; i++) { + struct ir3_instruction *in = ir->inputs[i]; + if (in && (in->flags & IR3_INSTR_UNUSED)) + ir->inputs[i] = NULL; + } + + return progress; +} + +void +ir3_depth(struct ir3 *ir) +{ + bool progress; + do { + progress = compute_depth_and_remove_unused(ir); + } while (progress); +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_group.c mesa-19.0.1/src/freedreno/ir3/ir3_group.c --- mesa-18.3.3/src/freedreno/ir3/ir3_group.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_group.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,274 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "ir3.h" + +/* + * Find/group instruction neighbors: + */ + +/* bleh.. we need to do the same group_n() thing for both inputs/outputs + * (where we have a simple instr[] array), and fanin nodes (where we have + * an extra indirection via reg->instr). + */ +struct group_ops { + struct ir3_instruction *(*get)(void *arr, int idx); + void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr); +}; + +static struct ir3_instruction *arr_get(void *arr, int idx) +{ + return ((struct ir3_instruction **)arr)[idx]; +} +static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr) +{ + ((struct ir3_instruction **)arr)[idx] = + ir3_MOV(instr->block, instr, TYPE_F32); +} +static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr) +{ + /* so, we can't insert a mov in front of a meta:in.. and the downstream + * instruction already has a pointer to 'instr'. So we cheat a bit and + * morph the meta:in instruction into a mov and insert a new meta:in + * in front. + */ + struct ir3_instruction *in; + + debug_assert(instr->regs_count == 1); + + in = ir3_instr_create(instr->block, OPC_META_INPUT); + in->inout.block = instr->block; + ir3_reg_create(in, instr->regs[0]->num, 0); + + /* create src reg for meta:in and fixup to now be a mov: */ + ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in; + instr->opc = OPC_MOV; + instr->cat1.src_type = TYPE_F32; + instr->cat1.dst_type = TYPE_F32; + + ((struct ir3_instruction **)arr)[idx] = in; +} +static struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out }; +static struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in }; + +static struct ir3_instruction *instr_get(void *arr, int idx) +{ + return ssa(((struct ir3_instruction *)arr)->regs[idx+1]); +} +static void +instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) +{ + ((struct ir3_instruction *)arr)->regs[idx+1]->instr = + ir3_MOV(instr->block, instr, TYPE_F32); +} +static struct group_ops instr_ops = { instr_get, instr_insert_mov }; + +/* verify that cur != instr, but cur is also not in instr's neighbor-list: */ +static bool +in_neighbor_list(struct ir3_instruction *instr, struct ir3_instruction *cur, int pos) +{ + int idx = 0; + + if (!instr) + return false; + + if (instr == cur) + return true; + + for (instr = ir3_neighbor_first(instr); instr; instr = instr->cp.right) + if ((idx++ != pos) && (instr == cur)) + return true; + + return false; +} + +static void +group_n(struct group_ops *ops, void *arr, unsigned n) +{ + unsigned i, j; + + /* first pass, figure out what has conflicts and needs a mov + * inserted. Do this up front, before starting to setup + * left/right neighbor pointers. Trying to do it in a single + * pass could result in a situation where we can't even setup + * the mov's right neighbor ptr if the next instr also needs + * a mov. + */ +restart: + for (i = 0; i < n; i++) { + struct ir3_instruction *instr = ops->get(arr, i); + if (instr) { + struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; + struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; + bool conflict; + + /* check for left/right neighbor conflicts: */ + conflict = conflicts(instr->cp.left, left) || + conflicts(instr->cp.right, right); + + /* Mixing array elements and higher register classes + * (ie. groups) doesn't really work out in RA. See: + * + * https://trello.com/c/DqeDkeVf/156-bug-with-stk-70frag + */ + if (instr->regs[0]->flags & IR3_REG_ARRAY) + conflict = true; + + /* we also can't have an instr twice in the group: */ + for (j = i + 1; (j < n) && !conflict; j++) + if (in_neighbor_list(ops->get(arr, j), instr, i)) + conflict = true; + + if (conflict) { + ops->insert_mov(arr, i, instr); + /* inserting the mov may have caused a conflict + * against the previous: + */ + goto restart; + } + } + } + + /* second pass, now that we've inserted mov's, fixup left/right + * neighbors. This is guaranteed to succeed, since by definition + * the newly inserted mov's cannot conflict with anything. + */ + for (i = 0; i < n; i++) { + struct ir3_instruction *instr = ops->get(arr, i); + if (instr) { + struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; + struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; + + debug_assert(!conflicts(instr->cp.left, left)); + if (left) { + instr->cp.left_cnt++; + instr->cp.left = left; + } + + debug_assert(!conflicts(instr->cp.right, right)); + if (right) { + instr->cp.right_cnt++; + instr->cp.right = right; + } + } + } +} + +static void +instr_find_neighbors(struct ir3_instruction *instr) +{ + struct ir3_instruction *src; + + if (ir3_instr_check_mark(instr)) + return; + + if (instr->opc == OPC_META_FI) + group_n(&instr_ops, instr, instr->regs_count - 1); + + foreach_ssa_src(src, instr) + instr_find_neighbors(src); +} + +/* a bit of sadness.. we can't have "holes" in inputs from PoV of + * register assignment, they still need to be grouped together. So + * we need to insert dummy/padding instruction for grouping, and + * then take it back out again before anyone notices. + */ +static void +pad_and_group_input(struct ir3_instruction **input, unsigned n) +{ + int i, mask = 0; + struct ir3_block *block = NULL; + + for (i = n - 1; i >= 0; i--) { + struct ir3_instruction *instr = input[i]; + if (instr) { + block = instr->block; + } else if (block) { + instr = ir3_NOP(block); + ir3_reg_create(instr, 0, IR3_REG_SSA); /* dummy dst */ + input[i] = instr; + mask |= (1 << i); + } + } + + group_n(&arr_ops_in, input, n); + + for (i = 0; i < n; i++) { + if (mask & (1 << i)) + input[i] = NULL; + } +} + +static void +find_neighbors(struct ir3 *ir) +{ + unsigned i; + + /* shader inputs/outputs themselves must be contiguous as well: + * + * NOTE: group inputs first, since we only insert mov's + * *before* the conflicted instr (and that would go badly + * for inputs). By doing inputs first, we should never + * have a conflict on inputs.. pushing any conflict to + * resolve to the outputs, for stuff like: + * + * MOV OUT[n], IN[m].wzyx + * + * NOTE: we assume here inputs/outputs are grouped in vec4. + * This logic won't quite cut it if we don't align smaller + * on vec4 boundaries + */ + for (i = 0; i < ir->ninputs; i += 4) + pad_and_group_input(&ir->inputs[i], 4); + for (i = 0; i < ir->noutputs; i += 4) + group_n(&arr_ops_out, &ir->outputs[i], 4); + + for (i = 0; i < ir->noutputs; i++) { + if (ir->outputs[i]) { + struct ir3_instruction *instr = ir->outputs[i]; + instr_find_neighbors(instr); + } + } + + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + for (i = 0; i < block->keeps_count; i++) { + struct ir3_instruction *instr = block->keeps[i]; + instr_find_neighbors(instr); + } + + /* We also need to account for if-condition: */ + if (block->condition) + instr_find_neighbors(block->condition); + } +} + +void +ir3_group(struct ir3 *ir) +{ + ir3_clear_mark(ir); + find_neighbors(ir); +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3.h mesa-19.0.1/src/freedreno/ir3/ir3.h --- mesa-18.3.3/src/freedreno/ir3/ir3.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,1447 @@ +/* + * Copyright (c) 2013 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IR3_H_ +#define IR3_H_ + +#include +#include + +#include "compiler/shader_enums.h" + +#include "util/u_debug.h" +#include "util/list.h" + +#include "instr-a3xx.h" + +/* low level intermediate representation of an adreno shader program */ + +struct ir3_compiler; +struct ir3; +struct ir3_instruction; +struct ir3_block; + +struct ir3_info { + uint32_t gpu_id; + uint16_t sizedwords; + uint16_t instrs_count; /* expanded to account for rpt's */ + /* NOTE: max_reg, etc, does not include registers not touched + * by the shader (ie. vertex fetched via VFD_DECODE but not + * touched by shader) + */ + int8_t max_reg; /* highest GPR # used by shader */ + int8_t max_half_reg; + int16_t max_const; + + /* number of sync bits: */ + uint16_t ss, sy; +}; + +struct ir3_register { + enum { + IR3_REG_CONST = 0x001, + IR3_REG_IMMED = 0x002, + IR3_REG_HALF = 0x004, + /* high registers are used for some things in compute shaders, + * for example. Seems to be for things that are global to all + * threads in a wave, so possibly these are global/shared by + * all the threads in the wave? + */ + IR3_REG_HIGH = 0x008, + IR3_REG_RELATIV= 0x010, + IR3_REG_R = 0x020, + /* Most instructions, it seems, can do float abs/neg but not + * integer. The CP pass needs to know what is intended (int or + * float) in order to do the right thing. For this reason the + * abs/neg flags are split out into float and int variants. In + * addition, .b (bitwise) operations, the negate is actually a + * bitwise not, so split that out into a new flag to make it + * more clear. + */ + IR3_REG_FNEG = 0x040, + IR3_REG_FABS = 0x080, + IR3_REG_SNEG = 0x100, + IR3_REG_SABS = 0x200, + IR3_REG_BNOT = 0x400, + IR3_REG_EVEN = 0x800, + IR3_REG_POS_INF= 0x1000, + /* (ei) flag, end-input? Set on last bary, presumably to signal + * that the shader needs no more input: + */ + IR3_REG_EI = 0x2000, + /* meta-flags, for intermediate stages of IR, ie. + * before register assignment is done: + */ + IR3_REG_SSA = 0x4000, /* 'instr' is ptr to assigning instr */ + IR3_REG_ARRAY = 0x8000, + + } flags; + + /* normal registers: + * the component is in the low two bits of the reg #, so + * rN.x becomes: (N << 2) | x + */ + int num; + union { + /* immediate: */ + int32_t iim_val; + uint32_t uim_val; + float fim_val; + /* relative: */ + struct { + uint16_t id; + int16_t offset; + } array; + }; + + /* For IR3_REG_SSA, src registers contain ptr back to assigning + * instruction. + * + * For IR3_REG_ARRAY, the pointer is back to the last dependent + * array access (although the net effect is the same, it points + * back to a previous instruction that we depend on). + */ + struct ir3_instruction *instr; + + union { + /* used for cat5 instructions, but also for internal/IR level + * tracking of what registers are read/written by an instruction. + * wrmask may be a bad name since it is used to represent both + * src and dst that touch multiple adjacent registers. + */ + unsigned wrmask; + /* for relative addressing, 32bits for array size is too small, + * but otoh we don't need to deal with disjoint sets, so instead + * use a simple size field (number of scalar components). + */ + unsigned size; + }; +}; + +/* + * Stupid/simple growable array implementation: + */ +#define DECLARE_ARRAY(type, name) \ + unsigned name ## _count, name ## _sz; \ + type * name; + +#define array_insert(ctx, arr, val) do { \ + if (arr ## _count == arr ## _sz) { \ + arr ## _sz = MAX2(2 * arr ## _sz, 16); \ + arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \ + } \ + arr[arr ##_count++] = val; \ + } while (0) + +struct ir3_instruction { + struct ir3_block *block; + opc_t opc; + enum { + /* (sy) flag is set on first instruction, and after sample + * instructions (probably just on RAW hazard). + */ + IR3_INSTR_SY = 0x001, + /* (ss) flag is set on first instruction, and first instruction + * to depend on the result of "long" instructions (RAW hazard): + * + * rcp, rsq, log2, exp2, sin, cos, sqrt + * + * It seems to synchronize until all in-flight instructions are + * completed, for example: + * + * rsq hr1.w, hr1.w + * add.f hr2.z, (neg)hr2.z, hc0.y + * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y + * rsq hr2.x, hr2.x + * (rpt1)nop + * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w + * nop + * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w + * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w + * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x + * + * The last mul.f does not have (ss) set, presumably because the + * (ss) on the previous instruction does the job. + * + * The blob driver also seems to set it on WAR hazards, although + * not really clear if this is needed or just blob compiler being + * sloppy. So far I haven't found a case where removing the (ss) + * causes problems for WAR hazard, but I could just be getting + * lucky: + * + * rcp r1.y, r3.y + * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z + * + */ + IR3_INSTR_SS = 0x002, + /* (jp) flag is set on jump targets: + */ + IR3_INSTR_JP = 0x004, + IR3_INSTR_UL = 0x008, + IR3_INSTR_3D = 0x010, + IR3_INSTR_A = 0x020, + IR3_INSTR_O = 0x040, + IR3_INSTR_P = 0x080, + IR3_INSTR_S = 0x100, + IR3_INSTR_S2EN = 0x200, + IR3_INSTR_G = 0x400, + IR3_INSTR_SAT = 0x800, + /* meta-flags, for intermediate stages of IR, ie. + * before register assignment is done: + */ + IR3_INSTR_MARK = 0x1000, + IR3_INSTR_UNUSED= 0x2000, + } flags; + int repeat; +#ifdef DEBUG + unsigned regs_max; +#endif + unsigned regs_count; + struct ir3_register **regs; + union { + struct { + char inv; + char comp; + int immed; + struct ir3_block *target; + } cat0; + struct { + type_t src_type, dst_type; + } cat1; + struct { + enum { + IR3_COND_LT = 0, + IR3_COND_LE = 1, + IR3_COND_GT = 2, + IR3_COND_GE = 3, + IR3_COND_EQ = 4, + IR3_COND_NE = 5, + } condition; + } cat2; + struct { + unsigned samp, tex; + type_t type; + } cat5; + struct { + type_t type; + int src_offset; + int dst_offset; + int iim_val : 3; /* for ldgb/stgb, # of components */ + int d : 3; + bool typed : 1; + } cat6; + struct { + unsigned w : 1; /* write */ + unsigned r : 1; /* read */ + unsigned l : 1; /* local */ + unsigned g : 1; /* global */ + } cat7; + /* for meta-instructions, just used to hold extra data + * before instruction scheduling, etc + */ + struct { + int off; /* component/offset */ + } fo; + struct { + struct ir3_block *block; + } inout; + }; + + /* transient values used during various algorithms: */ + union { + /* The instruction depth is the max dependency distance to output. + * + * You can also think of it as the "cost", if we did any sort of + * optimization for register footprint. Ie. a value that is just + * result of moving a const to a reg would have a low cost, so to + * it could make sense to duplicate the instruction at various + * points where the result is needed to reduce register footprint. + */ + unsigned depth; + /* When we get to the RA stage, we no longer need depth, but + * we do need instruction's position/name: + */ + struct { + uint16_t ip; + uint16_t name; + }; + }; + + /* used for per-pass extra instruction data. + */ + void *data; + + /* Used during CP and RA stages. For fanin and shader inputs/ + * outputs where we need a sequence of consecutive registers, + * keep track of each src instructions left (ie 'n-1') and right + * (ie 'n+1') neighbor. The front-end must insert enough mov's + * to ensure that each instruction has at most one left and at + * most one right neighbor. During the copy-propagation pass, + * we only remove mov's when we can preserve this constraint. + * And during the RA stage, we use the neighbor information to + * allocate a block of registers in one shot. + * + * TODO: maybe just add something like: + * struct ir3_instruction_ref { + * struct ir3_instruction *instr; + * unsigned cnt; + * } + * + * Or can we get away without the refcnt stuff? It seems like + * it should be overkill.. the problem is if, potentially after + * already eliminating some mov's, if you have a single mov that + * needs to be grouped with it's neighbors in two different + * places (ex. shader output and a fanin). + */ + struct { + struct ir3_instruction *left, *right; + uint16_t left_cnt, right_cnt; + } cp; + + /* an instruction can reference at most one address register amongst + * it's src/dst registers. Beyond that, you need to insert mov's. + * + * NOTE: do not write this directly, use ir3_instr_set_address() + */ + struct ir3_instruction *address; + + /* Tracking for additional dependent instructions. Used to handle + * barriers, WAR hazards for arrays/SSBOs/etc. + */ + DECLARE_ARRAY(struct ir3_instruction *, deps); + + /* + * From PoV of instruction scheduling, not execution (ie. ignores global/ + * local distinction): + * shared image atomic SSBO everything + * barrier()/ - R/W R/W R/W R/W X + * groupMemoryBarrier() + * memoryBarrier() - R/W R/W + * (but only images declared coherent?) + * memoryBarrierAtomic() - R/W + * memoryBarrierBuffer() - R/W + * memoryBarrierImage() - R/W + * memoryBarrierShared() - R/W + * + * TODO I think for SSBO/image/shared, in cases where we can determine + * which variable is accessed, we don't need to care about accesses to + * different variables (unless declared coherent??) + */ + enum { + IR3_BARRIER_EVERYTHING = 1 << 0, + IR3_BARRIER_SHARED_R = 1 << 1, + IR3_BARRIER_SHARED_W = 1 << 2, + IR3_BARRIER_IMAGE_R = 1 << 3, + IR3_BARRIER_IMAGE_W = 1 << 4, + IR3_BARRIER_BUFFER_R = 1 << 5, + IR3_BARRIER_BUFFER_W = 1 << 6, + IR3_BARRIER_ARRAY_R = 1 << 7, + IR3_BARRIER_ARRAY_W = 1 << 8, + } barrier_class, barrier_conflict; + + /* Entry in ir3_block's instruction list: */ + struct list_head node; + + int use_count; /* currently just updated/used by cp */ + +#ifdef DEBUG + uint32_t serialno; +#endif +}; + +static inline struct ir3_instruction * +ir3_neighbor_first(struct ir3_instruction *instr) +{ + int cnt = 0; + while (instr->cp.left) { + instr = instr->cp.left; + if (++cnt > 0xffff) { + debug_assert(0); + break; + } + } + return instr; +} + +static inline int ir3_neighbor_count(struct ir3_instruction *instr) +{ + int num = 1; + + debug_assert(!instr->cp.left); + + while (instr->cp.right) { + num++; + instr = instr->cp.right; + if (num > 0xffff) { + debug_assert(0); + break; + } + } + + return num; +} + +struct ir3 { + struct ir3_compiler *compiler; + + unsigned ninputs, noutputs; + struct ir3_instruction **inputs; + struct ir3_instruction **outputs; + + /* Track bary.f (and ldlv) instructions.. this is needed in + * scheduling to ensure that all varying fetches happen before + * any potential kill instructions. The hw gets grumpy if all + * threads in a group are killed before the last bary.f gets + * a chance to signal end of input (ei). + */ + DECLARE_ARRAY(struct ir3_instruction *, baryfs); + + /* Track all indirect instructions (read and write). To avoid + * deadlock scenario where an address register gets scheduled, + * but other dependent src instructions cannot be scheduled due + * to dependency on a *different* address register value, the + * scheduler needs to ensure that all dependencies other than + * the instruction other than the address register are scheduled + * before the one that writes the address register. Having a + * convenient list of instructions that reference some address + * register simplifies this. + */ + DECLARE_ARRAY(struct ir3_instruction *, indirects); + + /* and same for instructions that consume predicate register: */ + DECLARE_ARRAY(struct ir3_instruction *, predicates); + + /* Track texture sample instructions which need texture state + * patched in (for astc-srgb workaround): + */ + DECLARE_ARRAY(struct ir3_instruction *, astc_srgb); + + /* List of blocks: */ + struct list_head block_list; + + /* List of ir3_array's: */ + struct list_head array_list; + +#ifdef DEBUG + unsigned block_count, instr_count; +#endif +}; + +struct ir3_array { + struct list_head node; + unsigned length; + unsigned id; + + struct nir_register *r; + + /* To avoid array write's from getting DCE'd, keep track of the + * most recent write. Any array access depends on the most + * recent write. This way, nothing depends on writes after the + * last read. But all the writes that happen before that have + * something depending on them + */ + struct ir3_instruction *last_write; + + /* extra stuff used in RA pass: */ + unsigned base; /* base vreg name */ + unsigned reg; /* base physical reg */ + uint16_t start_ip, end_ip; +}; + +struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id); + +struct ir3_block { + struct list_head node; + struct ir3 *shader; + + const struct nir_block *nblock; + + struct list_head instr_list; /* list of ir3_instruction */ + + /* each block has either one or two successors.. in case of + * two successors, 'condition' decides which one to follow. + * A block preceding an if/else has two successors. + */ + struct ir3_instruction *condition; + struct ir3_block *successors[2]; + + unsigned predecessors_count; + struct ir3_block **predecessors; + + uint16_t start_ip, end_ip; + + /* Track instructions which do not write a register but other- + * wise must not be discarded (such as kill, stg, etc) + */ + DECLARE_ARRAY(struct ir3_instruction *, keeps); + + /* used for per-pass extra block data. Mainly used right + * now in RA step to track livein/liveout. + */ + void *data; + +#ifdef DEBUG + uint32_t serialno; +#endif +}; + +static inline uint32_t +block_id(struct ir3_block *block) +{ +#ifdef DEBUG + return block->serialno; +#else + return (uint32_t)(unsigned long)block; +#endif +} + +struct ir3 * ir3_create(struct ir3_compiler *compiler, + unsigned nin, unsigned nout); +void ir3_destroy(struct ir3 *shader); +void * ir3_assemble(struct ir3 *shader, + struct ir3_info *info, uint32_t gpu_id); +void * ir3_alloc(struct ir3 *shader, int sz); + +struct ir3_block * ir3_block_create(struct ir3 *shader); + +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc); +struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, + opc_t opc, int nreg); +struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); +void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep); +const char *ir3_instr_name(struct ir3_instruction *instr); + +struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, + int num, int flags); +struct ir3_register * ir3_reg_clone(struct ir3 *shader, + struct ir3_register *reg); + +void ir3_instr_set_address(struct ir3_instruction *instr, + struct ir3_instruction *addr); + +static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) +{ + if (instr->flags & IR3_INSTR_MARK) + return true; /* already visited */ + instr->flags |= IR3_INSTR_MARK; + return false; +} + +void ir3_block_clear_mark(struct ir3_block *block); +void ir3_clear_mark(struct ir3 *shader); + +unsigned ir3_count_instructions(struct ir3 *ir); + +static inline int ir3_instr_regno(struct ir3_instruction *instr, + struct ir3_register *reg) +{ + unsigned i; + for (i = 0; i < instr->regs_count; i++) + if (reg == instr->regs[i]) + return i; + return -1; +} + + +#define MAX_ARRAYS 16 + +/* comp: + * 0 - x + * 1 - y + * 2 - z + * 3 - w + */ +static inline uint32_t regid(int num, int comp) +{ + return (num << 2) | (comp & 0x3); +} + +static inline uint32_t reg_num(struct ir3_register *reg) +{ + return reg->num >> 2; +} + +static inline uint32_t reg_comp(struct ir3_register *reg) +{ + return reg->num & 0x3; +} + +static inline bool is_flow(struct ir3_instruction *instr) +{ + return (opc_cat(instr->opc) == 0); +} + +static inline bool is_kill(struct ir3_instruction *instr) +{ + return instr->opc == OPC_KILL; +} + +static inline bool is_nop(struct ir3_instruction *instr) +{ + return instr->opc == OPC_NOP; +} + +/* Is it a non-transformative (ie. not type changing) mov? This can + * also include absneg.s/absneg.f, which for the most part can be + * treated as a mov (single src argument). + */ +static inline bool is_same_type_mov(struct ir3_instruction *instr) +{ + struct ir3_register *dst; + + switch (instr->opc) { + case OPC_MOV: + if (instr->cat1.src_type != instr->cat1.dst_type) + return false; + break; + case OPC_ABSNEG_F: + case OPC_ABSNEG_S: + if (instr->flags & IR3_INSTR_SAT) + return false; + break; + default: + return false; + } + + dst = instr->regs[0]; + + /* mov's that write to a0.x or p0.x are special: */ + if (dst->num == regid(REG_P0, 0)) + return false; + if (dst->num == regid(REG_A0, 0)) + return false; + + if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) + return false; + + return true; +} + +static inline bool is_alu(struct ir3_instruction *instr) +{ + return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3); +} + +static inline bool is_sfu(struct ir3_instruction *instr) +{ + return (opc_cat(instr->opc) == 4); +} + +static inline bool is_tex(struct ir3_instruction *instr) +{ + return (opc_cat(instr->opc) == 5); +} + +static inline bool is_mem(struct ir3_instruction *instr) +{ + return (opc_cat(instr->opc) == 6); +} + +static inline bool is_barrier(struct ir3_instruction *instr) +{ + return (opc_cat(instr->opc) == 7); +} + +static inline bool +is_store(struct ir3_instruction *instr) +{ + /* these instructions, the "destination" register is + * actually a source, the address to store to. + */ + switch (instr->opc) { + case OPC_STG: + case OPC_STGB: + case OPC_STIB: + case OPC_STP: + case OPC_STL: + case OPC_STLW: + case OPC_L2G: + case OPC_G2L: + return true; + default: + return false; + } +} + +static inline bool is_load(struct ir3_instruction *instr) +{ + switch (instr->opc) { + case OPC_LDG: + case OPC_LDGB: + case OPC_LDL: + case OPC_LDP: + case OPC_L2G: + case OPC_LDLW: + case OPC_LDC: + case OPC_LDLV: + /* probably some others too.. */ + return true; + default: + return false; + } +} + +static inline bool is_input(struct ir3_instruction *instr) +{ + /* in some cases, ldlv is used to fetch varying without + * interpolation.. fortunately inloc is the first src + * register in either case + */ + switch (instr->opc) { + case OPC_LDLV: + case OPC_BARY_F: + return true; + default: + return false; + } +} + +static inline bool is_bool(struct ir3_instruction *instr) +{ + switch (instr->opc) { + case OPC_CMPS_F: + case OPC_CMPS_S: + case OPC_CMPS_U: + return true; + default: + return false; + } +} + +static inline bool is_meta(struct ir3_instruction *instr) +{ + /* TODO how should we count PHI (and maybe fan-in/out) which + * might actually contribute some instructions to the final + * result? + */ + return (opc_cat(instr->opc) == -1); +} + +static inline bool writes_addr(struct ir3_instruction *instr) +{ + if (instr->regs_count > 0) { + struct ir3_register *dst = instr->regs[0]; + return reg_num(dst) == REG_A0; + } + return false; +} + +static inline bool writes_pred(struct ir3_instruction *instr) +{ + if (instr->regs_count > 0) { + struct ir3_register *dst = instr->regs[0]; + return reg_num(dst) == REG_P0; + } + return false; +} + +/* returns defining instruction for reg */ +/* TODO better name */ +static inline struct ir3_instruction *ssa(struct ir3_register *reg) +{ + if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) { + return reg->instr; + } + return NULL; +} + +static inline bool conflicts(struct ir3_instruction *a, + struct ir3_instruction *b) +{ + return (a && b) && (a != b); +} + +static inline bool reg_gpr(struct ir3_register *r) +{ + if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED)) + return false; + if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) + return false; + return true; +} + +static inline type_t half_type(type_t type) +{ + switch (type) { + case TYPE_F32: return TYPE_F16; + case TYPE_U32: return TYPE_U16; + case TYPE_S32: return TYPE_S16; + case TYPE_F16: + case TYPE_U16: + case TYPE_S16: + return type; + default: + assert(0); + return ~0; + } +} + +/* some cat2 instructions (ie. those which are not float) can embed an + * immediate: + */ +static inline bool ir3_cat2_int(opc_t opc) +{ + switch (opc) { + case OPC_ADD_U: + case OPC_ADD_S: + case OPC_SUB_U: + case OPC_SUB_S: + case OPC_CMPS_U: + case OPC_CMPS_S: + case OPC_MIN_U: + case OPC_MIN_S: + case OPC_MAX_U: + case OPC_MAX_S: + case OPC_CMPV_U: + case OPC_CMPV_S: + case OPC_MUL_U: + case OPC_MUL_S: + case OPC_MULL_U: + case OPC_CLZ_S: + case OPC_ABSNEG_S: + case OPC_AND_B: + case OPC_OR_B: + case OPC_NOT_B: + case OPC_XOR_B: + case OPC_BFREV_B: + case OPC_CLZ_B: + case OPC_SHL_B: + case OPC_SHR_B: + case OPC_ASHR_B: + case OPC_MGEN_B: + case OPC_GETBIT_B: + case OPC_CBITS_B: + case OPC_BARY_F: + return true; + + default: + return false; + } +} + + +/* map cat2 instruction to valid abs/neg flags: */ +static inline unsigned ir3_cat2_absneg(opc_t opc) +{ + switch (opc) { + case OPC_ADD_F: + case OPC_MIN_F: + case OPC_MAX_F: + case OPC_MUL_F: + case OPC_SIGN_F: + case OPC_CMPS_F: + case OPC_ABSNEG_F: + case OPC_CMPV_F: + case OPC_FLOOR_F: + case OPC_CEIL_F: + case OPC_RNDNE_F: + case OPC_RNDAZ_F: + case OPC_TRUNC_F: + case OPC_BARY_F: + return IR3_REG_FABS | IR3_REG_FNEG; + + case OPC_ADD_U: + case OPC_ADD_S: + case OPC_SUB_U: + case OPC_SUB_S: + case OPC_CMPS_U: + case OPC_CMPS_S: + case OPC_MIN_U: + case OPC_MIN_S: + case OPC_MAX_U: + case OPC_MAX_S: + case OPC_CMPV_U: + case OPC_CMPV_S: + case OPC_MUL_U: + case OPC_MUL_S: + case OPC_MULL_U: + case OPC_CLZ_S: + return 0; + + case OPC_ABSNEG_S: + return IR3_REG_SABS | IR3_REG_SNEG; + + case OPC_AND_B: + case OPC_OR_B: + case OPC_NOT_B: + case OPC_XOR_B: + case OPC_BFREV_B: + case OPC_CLZ_B: + case OPC_SHL_B: + case OPC_SHR_B: + case OPC_ASHR_B: + case OPC_MGEN_B: + case OPC_GETBIT_B: + case OPC_CBITS_B: + return IR3_REG_BNOT; + + default: + return 0; + } +} + +/* map cat3 instructions to valid abs/neg flags: */ +static inline unsigned ir3_cat3_absneg(opc_t opc) +{ + switch (opc) { + case OPC_MAD_F16: + case OPC_MAD_F32: + case OPC_SEL_F16: + case OPC_SEL_F32: + return IR3_REG_FNEG; + + case OPC_MAD_U16: + case OPC_MADSH_U16: + case OPC_MAD_S16: + case OPC_MADSH_M16: + case OPC_MAD_U24: + case OPC_MAD_S24: + case OPC_SEL_S16: + case OPC_SEL_S32: + case OPC_SAD_S16: + case OPC_SAD_S32: + /* neg *may* work on 3rd src.. */ + + case OPC_SEL_B16: + case OPC_SEL_B32: + + default: + return 0; + } +} + +#define MASK(n) ((1 << (n)) - 1) + +/* iterator for an instructions's sources (reg), also returns src #: */ +#define foreach_src_n(__srcreg, __n, __instr) \ + if ((__instr)->regs_count) \ + for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \ + if ((__srcreg = (__instr)->regs[__n + 1])) + +/* iterator for an instructions's sources (reg): */ +#define foreach_src(__srcreg, __instr) \ + foreach_src_n(__srcreg, __i, __instr) + +static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr) +{ + unsigned cnt = instr->regs_count + instr->deps_count; + if (instr->address) + cnt++; + return cnt; +} + +static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n) +{ + if (n == (instr->regs_count + instr->deps_count)) + return instr->address; + if (n >= instr->regs_count) + return instr->deps[n - instr->regs_count]; + return ssa(instr->regs[n]); +} + +static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n) +{ + if (n == (instr->regs_count + instr->deps_count)) + return false; + if (n >= instr->regs_count) + return true; + return false; +} + +#define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1) + +/* iterator for an instruction's SSA sources (instr), also returns src #: */ +#define foreach_ssa_src_n(__srcinst, __n, __instr) \ + for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \ + if ((__srcinst = __ssa_src_n(__instr, __n))) + +/* iterator for an instruction's SSA sources (instr): */ +#define foreach_ssa_src(__srcinst, __instr) \ + foreach_ssa_src_n(__srcinst, __i, __instr) + + +/* dump: */ +void ir3_print(struct ir3 *ir); +void ir3_print_instr(struct ir3_instruction *instr); + +/* depth calculation: */ +int ir3_delayslots(struct ir3_instruction *assigner, + struct ir3_instruction *consumer, unsigned n); +void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list); +void ir3_depth(struct ir3 *ir); + +/* copy-propagate: */ +struct ir3_shader_variant; +void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); + +/* group neighbors and insert mov's to resolve conflicts: */ +void ir3_group(struct ir3 *ir); + +/* scheduling: */ +void ir3_sched_add_deps(struct ir3 *ir); +int ir3_sched(struct ir3 *ir); + +/* register assignment: */ +struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler); +int ir3_ra(struct ir3 *ir3, gl_shader_stage type, + bool frag_coord, bool frag_face); + +/* legalize: */ +void ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary); + +/* ************************************************************************* */ +/* instruction helpers */ + +static inline struct ir3_instruction * +create_immed_typed(struct ir3_block *block, uint32_t val, type_t type) +{ + struct ir3_instruction *mov; + unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; + + mov = ir3_instr_create(block, OPC_MOV); + mov->cat1.src_type = type; + mov->cat1.dst_type = type; + ir3_reg_create(mov, 0, flags); + ir3_reg_create(mov, 0, IR3_REG_IMMED)->uim_val = val; + + return mov; +} + +static inline struct ir3_instruction * +create_immed(struct ir3_block *block, uint32_t val) +{ + return create_immed_typed(block, val, TYPE_U32); +} + +static inline struct ir3_instruction * +create_uniform(struct ir3_block *block, unsigned n) +{ + struct ir3_instruction *mov; + + mov = ir3_instr_create(block, OPC_MOV); + /* TODO get types right? */ + mov->cat1.src_type = TYPE_F32; + mov->cat1.dst_type = TYPE_F32; + ir3_reg_create(mov, 0, 0); + ir3_reg_create(mov, n, IR3_REG_CONST); + + return mov; +} + +static inline struct ir3_instruction * +create_uniform_indirect(struct ir3_block *block, int n, + struct ir3_instruction *address) +{ + struct ir3_instruction *mov; + + mov = ir3_instr_create(block, OPC_MOV); + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + ir3_reg_create(mov, 0, 0); + ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n; + + ir3_instr_set_address(mov, address); + + return mov; +} + +/* creates SSA src of correct type (ie. half vs full precision) */ +static inline struct ir3_register * __ssa_src(struct ir3_instruction *instr, + struct ir3_instruction *src, unsigned flags) +{ + struct ir3_register *reg; + if (src->regs[0]->flags & IR3_REG_HALF) + flags |= IR3_REG_HALF; + reg = ir3_reg_create(instr, 0, IR3_REG_SSA | flags); + reg->instr = src; + return reg; +} + +static inline struct ir3_instruction * +ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) +{ + struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); + ir3_reg_create(instr, 0, 0); /* dst */ + if (src->regs[0]->flags & IR3_REG_ARRAY) { + struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY); + src_reg->array = src->regs[0]->array; + } else { + __ssa_src(instr, src, 0); + } + debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV)); + instr->cat1.src_type = type; + instr->cat1.dst_type = type; + return instr; +} + +static inline struct ir3_instruction * +ir3_COV(struct ir3_block *block, struct ir3_instruction *src, + type_t src_type, type_t dst_type) +{ + struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); + unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0; + unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0; + + debug_assert((src->regs[0]->flags & IR3_REG_HALF) == src_flags); + + ir3_reg_create(instr, 0, dst_flags); /* dst */ + __ssa_src(instr, src, 0); + instr->cat1.src_type = src_type; + instr->cat1.dst_type = dst_type; + debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY)); + return instr; +} + +static inline struct ir3_instruction * +ir3_NOP(struct ir3_block *block) +{ + return ir3_instr_create(block, OPC_NOP); +} + +#define INSTR0(name) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create(block, OPC_##name); \ + return instr; \ +} + +#define INSTR1(name) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block, \ + struct ir3_instruction *a, unsigned aflags) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create(block, OPC_##name); \ + ir3_reg_create(instr, 0, 0); /* dst */ \ + __ssa_src(instr, a, aflags); \ + return instr; \ +} + +#define INSTR2(name) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block, \ + struct ir3_instruction *a, unsigned aflags, \ + struct ir3_instruction *b, unsigned bflags) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create(block, OPC_##name); \ + ir3_reg_create(instr, 0, 0); /* dst */ \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ + return instr; \ +} + +#define INSTR3(name) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block, \ + struct ir3_instruction *a, unsigned aflags, \ + struct ir3_instruction *b, unsigned bflags, \ + struct ir3_instruction *c, unsigned cflags) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create(block, OPC_##name); \ + ir3_reg_create(instr, 0, 0); /* dst */ \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ + __ssa_src(instr, c, cflags); \ + return instr; \ +} + +#define INSTR4(name) \ +static inline struct ir3_instruction * \ +ir3_##name(struct ir3_block *block, \ + struct ir3_instruction *a, unsigned aflags, \ + struct ir3_instruction *b, unsigned bflags, \ + struct ir3_instruction *c, unsigned cflags, \ + struct ir3_instruction *d, unsigned dflags) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create2(block, OPC_##name, 5); \ + ir3_reg_create(instr, 0, 0); /* dst */ \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ + __ssa_src(instr, c, cflags); \ + __ssa_src(instr, d, dflags); \ + return instr; \ +} + +#define INSTR4F(f, name) \ +static inline struct ir3_instruction * \ +ir3_##name##_##f(struct ir3_block *block, \ + struct ir3_instruction *a, unsigned aflags, \ + struct ir3_instruction *b, unsigned bflags, \ + struct ir3_instruction *c, unsigned cflags, \ + struct ir3_instruction *d, unsigned dflags) \ +{ \ + struct ir3_instruction *instr = \ + ir3_instr_create2(block, OPC_##name, 5); \ + ir3_reg_create(instr, 0, 0); /* dst */ \ + __ssa_src(instr, a, aflags); \ + __ssa_src(instr, b, bflags); \ + __ssa_src(instr, c, cflags); \ + __ssa_src(instr, d, dflags); \ + instr->flags |= IR3_INSTR_##f; \ + return instr; \ +} + +/* cat0 instructions: */ +INSTR0(BR) +INSTR0(JUMP) +INSTR1(KILL) +INSTR0(END) + +/* cat2 instructions, most 2 src but some 1 src: */ +INSTR2(ADD_F) +INSTR2(MIN_F) +INSTR2(MAX_F) +INSTR2(MUL_F) +INSTR1(SIGN_F) +INSTR2(CMPS_F) +INSTR1(ABSNEG_F) +INSTR2(CMPV_F) +INSTR1(FLOOR_F) +INSTR1(CEIL_F) +INSTR1(RNDNE_F) +INSTR1(RNDAZ_F) +INSTR1(TRUNC_F) +INSTR2(ADD_U) +INSTR2(ADD_S) +INSTR2(SUB_U) +INSTR2(SUB_S) +INSTR2(CMPS_U) +INSTR2(CMPS_S) +INSTR2(MIN_U) +INSTR2(MIN_S) +INSTR2(MAX_U) +INSTR2(MAX_S) +INSTR1(ABSNEG_S) +INSTR2(AND_B) +INSTR2(OR_B) +INSTR1(NOT_B) +INSTR2(XOR_B) +INSTR2(CMPV_U) +INSTR2(CMPV_S) +INSTR2(MUL_U) +INSTR2(MUL_S) +INSTR2(MULL_U) +INSTR1(BFREV_B) +INSTR1(CLZ_S) +INSTR1(CLZ_B) +INSTR2(SHL_B) +INSTR2(SHR_B) +INSTR2(ASHR_B) +INSTR2(BARY_F) +INSTR2(MGEN_B) +INSTR2(GETBIT_B) +INSTR1(SETRM) +INSTR1(CBITS_B) +INSTR2(SHB) +INSTR2(MSAD) + +/* cat3 instructions: */ +INSTR3(MAD_U16) +INSTR3(MADSH_U16) +INSTR3(MAD_S16) +INSTR3(MADSH_M16) +INSTR3(MAD_U24) +INSTR3(MAD_S24) +INSTR3(MAD_F16) +INSTR3(MAD_F32) +INSTR3(SEL_B16) +INSTR3(SEL_B32) +INSTR3(SEL_S16) +INSTR3(SEL_S32) +INSTR3(SEL_F16) +INSTR3(SEL_F32) +INSTR3(SAD_S16) +INSTR3(SAD_S32) + +/* cat4 instructions: */ +INSTR1(RCP) +INSTR1(RSQ) +INSTR1(LOG2) +INSTR1(EXP2) +INSTR1(SIN) +INSTR1(COS) +INSTR1(SQRT) + +/* cat5 instructions: */ +INSTR1(DSX) +INSTR1(DSY) + +static inline struct ir3_instruction * +ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, + unsigned wrmask, unsigned flags, unsigned samp, unsigned tex, + struct ir3_instruction *src0, struct ir3_instruction *src1) +{ + struct ir3_instruction *sam; + struct ir3_register *reg; + + sam = ir3_instr_create(block, opc); + sam->flags |= flags; + ir3_reg_create(sam, 0, 0)->wrmask = wrmask; + if (src0) { + reg = ir3_reg_create(sam, 0, IR3_REG_SSA); + reg->wrmask = (1 << (src0->regs_count - 1)) - 1; + reg->instr = src0; + } + if (src1) { + reg = ir3_reg_create(sam, 0, IR3_REG_SSA); + reg->instr = src1; + reg->wrmask = (1 << (src1->regs_count - 1)) - 1; + } + sam->cat5.samp = samp; + sam->cat5.tex = tex; + sam->cat5.type = type; + + return sam; +} + +/* cat6 instructions: */ +INSTR2(LDLV) +INSTR2(LDG) +INSTR2(LDL) +INSTR3(STG) +INSTR3(STL) +INSTR3(LDGB) +INSTR4(STGB) +INSTR4(STIB) +INSTR1(RESINFO) +INSTR1(RESFMT) +INSTR2(ATOMIC_ADD) +INSTR2(ATOMIC_SUB) +INSTR2(ATOMIC_XCHG) +INSTR2(ATOMIC_INC) +INSTR2(ATOMIC_DEC) +INSTR2(ATOMIC_CMPXCHG) +INSTR2(ATOMIC_MIN) +INSTR2(ATOMIC_MAX) +INSTR2(ATOMIC_AND) +INSTR2(ATOMIC_OR) +INSTR2(ATOMIC_XOR) +INSTR4F(G, ATOMIC_ADD) +INSTR4F(G, ATOMIC_SUB) +INSTR4F(G, ATOMIC_XCHG) +INSTR4F(G, ATOMIC_INC) +INSTR4F(G, ATOMIC_DEC) +INSTR4F(G, ATOMIC_CMPXCHG) +INSTR4F(G, ATOMIC_MIN) +INSTR4F(G, ATOMIC_MAX) +INSTR4F(G, ATOMIC_AND) +INSTR4F(G, ATOMIC_OR) +INSTR4F(G, ATOMIC_XOR) + +/* cat7 instructions: */ +INSTR0(BAR) +INSTR0(FENCE) + +/* ************************************************************************* */ +/* split this out or find some helper to use.. like main/bitset.h.. */ + +#include + +#define MAX_REG 256 + +typedef uint8_t regmask_t[2 * MAX_REG / 8]; + +static inline unsigned regmask_idx(struct ir3_register *reg) +{ + unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num; + debug_assert(num < MAX_REG); + if (reg->flags & IR3_REG_HALF) + num += MAX_REG; + return num; +} + +static inline void regmask_init(regmask_t *regmask) +{ + memset(regmask, 0, sizeof(*regmask)); +} + +static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + if (reg->flags & IR3_REG_RELATIV) { + unsigned i; + for (i = 0; i < reg->size; i++, idx++) + (*regmask)[idx / 8] |= 1 << (idx % 8); + } else { + unsigned mask; + for (mask = reg->wrmask; mask; mask >>= 1, idx++) + if (mask & 1) + (*regmask)[idx / 8] |= 1 << (idx % 8); + } +} + +static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b) +{ + unsigned i; + for (i = 0; i < ARRAY_SIZE(*dst); i++) + (*dst)[i] = (*a)[i] | (*b)[i]; +} + +/* set bits in a if not set in b, conceptually: + * a |= (reg & ~b) + */ +static inline void regmask_set_if_not(regmask_t *a, + struct ir3_register *reg, regmask_t *b) +{ + unsigned idx = regmask_idx(reg); + if (reg->flags & IR3_REG_RELATIV) { + unsigned i; + for (i = 0; i < reg->size; i++, idx++) + if (!((*b)[idx / 8] & (1 << (idx % 8)))) + (*a)[idx / 8] |= 1 << (idx % 8); + } else { + unsigned mask; + for (mask = reg->wrmask; mask; mask >>= 1, idx++) + if (mask & 1) + if (!((*b)[idx / 8] & (1 << (idx % 8)))) + (*a)[idx / 8] |= 1 << (idx % 8); + } +} + +static inline bool regmask_get(regmask_t *regmask, + struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + if (reg->flags & IR3_REG_RELATIV) { + unsigned i; + for (i = 0; i < reg->size; i++, idx++) + if ((*regmask)[idx / 8] & (1 << (idx % 8))) + return true; + } else { + unsigned mask; + for (mask = reg->wrmask; mask; mask >>= 1, idx++) + if (mask & 1) + if ((*regmask)[idx / 8] & (1 << (idx % 8))) + return true; + } + return false; +} + +/* ************************************************************************* */ + +#endif /* IR3_H_ */ diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_legalize.c mesa-19.0.1/src/freedreno/ir3/ir3_legalize.c --- mesa-18.3.3/src/freedreno/ir3/ir3_legalize.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_legalize.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,496 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/ralloc.h" +#include "util/u_math.h" + +#include "ir3.h" + +/* + * Legalize: + * + * We currently require that scheduling ensures that we have enough nop's + * in all the right places. The legalize step mostly handles fixing up + * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's + * into fewer nop's w/ rpt flag. + */ + +struct ir3_legalize_ctx { + int num_samp; + bool has_ssbo; + int max_bary; +}; + +struct ir3_legalize_state { + regmask_t needs_ss; + regmask_t needs_ss_war; /* write after read */ + regmask_t needs_sy; +}; + +struct ir3_legalize_block_data { + bool valid; + struct ir3_legalize_state state; +}; + +/* We want to evaluate each block from the position of any other + * predecessor block, in order that the flags set are the union of + * all possible program paths. + * + * To do this, we need to know the output state (needs_ss/ss_war/sy) + * of all predecessor blocks. The tricky thing is loops, which mean + * that we can't simply recursively process each predecessor block + * before legalizing the current block. + * + * How we handle that is by looping over all the blocks until the + * results converge. If the output state of a given block changes + * in a given pass, this means that all successor blocks are not + * yet fully legalized. + */ + +static bool +legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) +{ + struct ir3_legalize_block_data *bd = block->data; + + if (bd->valid) + return false; + + struct ir3_instruction *last_input = NULL; + struct ir3_instruction *last_rel = NULL; + struct ir3_instruction *last_n = NULL; + struct list_head instr_list; + struct ir3_legalize_state prev_state = bd->state; + struct ir3_legalize_state *state = &bd->state; + + /* our input state is the OR of all predecessor blocks' state: */ + for (unsigned i = 0; i < block->predecessors_count; i++) { + struct ir3_legalize_block_data *pbd = block->predecessors[i]->data; + struct ir3_legalize_state *pstate = &pbd->state; + + /* Our input (ss)/(sy) state is based on OR'ing the output + * state of all our predecessor blocks + */ + regmask_or(&state->needs_ss, + &state->needs_ss, &pstate->needs_ss); + regmask_or(&state->needs_ss_war, + &state->needs_ss_war, &pstate->needs_ss_war); + regmask_or(&state->needs_sy, + &state->needs_sy, &pstate->needs_sy); + } + + /* remove all the instructions from the list, we'll be adding + * them back in as we go + */ + list_replace(&block->instr_list, &instr_list); + list_inithead(&block->instr_list); + + list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) { + struct ir3_register *reg; + unsigned i; + + n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY); + + if (is_meta(n)) + continue; + + if (is_input(n)) { + struct ir3_register *inloc = n->regs[1]; + assert(inloc->flags & IR3_REG_IMMED); + ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val); + } + + if (last_n && is_barrier(last_n)) + n->flags |= IR3_INSTR_SS | IR3_INSTR_SY; + + /* NOTE: consider dst register too.. it could happen that + * texture sample instruction (for example) writes some + * components which are unused. A subsequent instruction + * that writes the same register can race w/ the sam instr + * resulting in undefined results: + */ + for (i = 0; i < n->regs_count; i++) { + reg = n->regs[i]; + + if (reg_gpr(reg)) { + + /* TODO: we probably only need (ss) for alu + * instr consuming sfu result.. need to make + * some tests for both this and (sy).. + */ + if (regmask_get(&state->needs_ss, reg)) { + n->flags |= IR3_INSTR_SS; + regmask_init(&state->needs_ss_war); + regmask_init(&state->needs_ss); + } + + if (regmask_get(&state->needs_sy, reg)) { + n->flags |= IR3_INSTR_SY; + regmask_init(&state->needs_sy); + } + } + + /* TODO: is it valid to have address reg loaded from a + * relative src (ie. mova a0, c)? If so, the + * last_rel check below should be moved ahead of this: + */ + if (reg->flags & IR3_REG_RELATIV) + last_rel = n; + } + + if (n->regs_count > 0) { + reg = n->regs[0]; + if (regmask_get(&state->needs_ss_war, reg)) { + n->flags |= IR3_INSTR_SS; + regmask_init(&state->needs_ss_war); + regmask_init(&state->needs_ss); + } + + if (last_rel && (reg->num == regid(REG_A0, 0))) { + last_rel->flags |= IR3_INSTR_UL; + last_rel = NULL; + } + } + + /* cat5+ does not have an (ss) bit, if needed we need to + * insert a nop to carry the sync flag. Would be kinda + * clever if we were aware of this during scheduling, but + * this should be a pretty rare case: + */ + if ((n->flags & IR3_INSTR_SS) && (opc_cat(n->opc) >= 5)) { + struct ir3_instruction *nop; + nop = ir3_NOP(block); + nop->flags |= IR3_INSTR_SS; + n->flags &= ~IR3_INSTR_SS; + } + + /* need to be able to set (ss) on first instruction: */ + if (list_empty(&block->instr_list) && (opc_cat(n->opc) >= 5)) + ir3_NOP(block); + + if (is_nop(n) && !list_empty(&block->instr_list)) { + struct ir3_instruction *last = list_last_entry(&block->instr_list, + struct ir3_instruction, node); + if (is_nop(last) && (last->repeat < 5)) { + last->repeat++; + last->flags |= n->flags; + continue; + } + } + + list_addtail(&n->node, &block->instr_list); + + if (is_sfu(n)) + regmask_set(&state->needs_ss, n->regs[0]); + + if (is_tex(n)) { + /* this ends up being the # of samp instructions.. but that + * is ok, everything else only cares whether it is zero or + * not. We do this here, rather than when we encounter a + * SAMP decl, because (especially in binning pass shader) + * the samp instruction(s) could get eliminated if the + * result is not used. + */ + ctx->num_samp = MAX2(ctx->num_samp, n->cat5.samp + 1); + regmask_set(&state->needs_sy, n->regs[0]); + } else if (n->opc == OPC_RESINFO) { + regmask_set(&state->needs_ss, n->regs[0]); + ir3_NOP(block)->flags |= IR3_INSTR_SS; + } else if (is_load(n)) { + /* seems like ldlv needs (ss) bit instead?? which is odd but + * makes a bunch of flat-varying tests start working on a4xx. + */ + if ((n->opc == OPC_LDLV) || (n->opc == OPC_LDL)) + regmask_set(&state->needs_ss, n->regs[0]); + else + regmask_set(&state->needs_sy, n->regs[0]); + } else if (is_atomic(n->opc)) { + if (n->flags & IR3_INSTR_G) + regmask_set(&state->needs_sy, n->regs[0]); + else + regmask_set(&state->needs_ss, n->regs[0]); + } + + if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G))) + ctx->has_ssbo = true; + + /* both tex/sfu appear to not always immediately consume + * their src register(s): + */ + if (is_tex(n) || is_sfu(n) || is_mem(n)) { + foreach_src(reg, n) { + if (reg_gpr(reg)) + regmask_set(&state->needs_ss_war, reg); + } + } + + if (is_input(n)) + last_input = n; + + last_n = n; + } + + if (last_input) { + /* special hack.. if using ldlv to bypass interpolation, + * we need to insert a dummy bary.f on which we can set + * the (ei) flag: + */ + if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) { + struct ir3_instruction *baryf; + + /* (ss)bary.f (ei)r63.x, 0, r0.x */ + baryf = ir3_instr_create(block, OPC_BARY_F); + baryf->flags |= IR3_INSTR_SS; + ir3_reg_create(baryf, regid(63, 0), 0); + ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0; + ir3_reg_create(baryf, regid(0, 0), 0); + + /* insert the dummy bary.f after last_input: */ + list_delinit(&baryf->node); + list_add(&baryf->node, &last_input->node); + + last_input = baryf; + } + last_input->regs[0]->flags |= IR3_REG_EI; + } + + if (last_rel) + last_rel->flags |= IR3_INSTR_UL; + + bd->valid = true; + + if (memcmp(&prev_state, state, sizeof(*state))) { + /* our output state changed, this invalidates all of our + * successors: + */ + for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { + if (!block->successors[i]) + break; + struct ir3_legalize_block_data *pbd = block->successors[i]->data; + pbd->valid = false; + } + } + + return true; +} + +/* NOTE: branch instructions are always the last instruction(s) + * in the block. We take advantage of this as we resolve the + * branches, since "if (foo) break;" constructs turn into + * something like: + * + * block3 { + * ... + * 0029:021: mov.s32s32 r62.x, r1.y + * 0082:022: br !p0.x, target=block5 + * 0083:023: br p0.x, target=block4 + * // succs: if _[0029:021: mov.s32s32] block4; else block5; + * } + * block4 { + * 0084:024: jump, target=block6 + * // succs: block6; + * } + * block5 { + * 0085:025: jump, target=block7 + * // succs: block7; + * } + * + * ie. only instruction in block4/block5 is a jump, so when + * resolving branches we can easily detect this by checking + * that the first instruction in the target block is itself + * a jump, and setup the br directly to the jump's target + * (and strip back out the now unreached jump) + * + * TODO sometimes we end up with things like: + * + * br !p0.x, #2 + * br p0.x, #12 + * add.u r0.y, r0.y, 1 + * + * If we swapped the order of the branches, we could drop one. + */ +static struct ir3_block * +resolve_dest_block(struct ir3_block *block) +{ + /* special case for last block: */ + if (!block->successors[0]) + return block; + + /* NOTE that we may or may not have inserted the jump + * in the target block yet, so conditions to resolve + * the dest to the dest block's successor are: + * + * (1) successor[1] == NULL && + * (2) (block-is-empty || only-instr-is-jump) + */ + if (block->successors[1] == NULL) { + if (list_empty(&block->instr_list)) { + return block->successors[0]; + } else if (list_length(&block->instr_list) == 1) { + struct ir3_instruction *instr = list_first_entry( + &block->instr_list, struct ir3_instruction, node); + if (instr->opc == OPC_JUMP) + return block->successors[0]; + } + } + return block; +} + +static bool +resolve_jump(struct ir3_instruction *instr) +{ + struct ir3_block *tblock = + resolve_dest_block(instr->cat0.target); + struct ir3_instruction *target; + + if (tblock != instr->cat0.target) { + list_delinit(&instr->cat0.target->node); + instr->cat0.target = tblock; + return true; + } + + target = list_first_entry(&tblock->instr_list, + struct ir3_instruction, node); + + /* TODO maybe a less fragile way to do this. But we are expecting + * a pattern from sched_block() that looks like: + * + * br !p0.x, #else-block + * br p0.x, #if-block + * + * if the first branch target is +2, or if 2nd branch target is +1 + * then we can just drop the jump. + */ + unsigned next_block; + if (instr->cat0.inv == true) + next_block = 2; + else + next_block = 1; + + if ((!target) || (target->ip == (instr->ip + next_block))) { + list_delinit(&instr->node); + return true; + } else { + instr->cat0.immed = + (int)target->ip - (int)instr->ip; + } + return false; +} + +/* resolve jumps, removing jumps/branches to immediately following + * instruction which we end up with from earlier stages. Since + * removing an instruction can invalidate earlier instruction's + * branch offsets, we need to do this iteratively until no more + * branches are removed. + */ +static bool +resolve_jumps(struct ir3 *ir) +{ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) + if (is_flow(instr) && instr->cat0.target) + if (resolve_jump(instr)) + return true; + + return false; +} + +/* we want to mark points where divergent flow control re-converges + * with (jp) flags. For now, since we don't do any optimization for + * things that start out as a 'do {} while()', re-convergence points + * will always be a branch or jump target. Note that this is overly + * conservative, since unconditional jump targets are not convergence + * points, we are just assuming that the other path to reach the jump + * target was divergent. If we were clever enough to optimize the + * jump at end of a loop back to a conditional branch into a single + * conditional branch, ie. like: + * + * add.f r1.w, r0.x, (neg)(r)c2.x <= loop start + * mul.f r1.z, r1.z, r0.x + * mul.f r1.y, r1.y, r0.x + * mul.f r0.z, r1.x, r0.x + * mul.f r0.w, r0.y, r0.x + * cmps.f.ge r0.x, (r)c2.y, (r)r1.w + * add.s r0.x, (r)r0.x, (r)-1 + * sel.f32 r0.x, (r)c3.y, (r)r0.x, c3.x + * cmps.f.eq p0.x, r0.x, c3.y + * mov.f32f32 r0.x, r1.w + * mov.f32f32 r0.y, r0.w + * mov.f32f32 r1.x, r0.z + * (rpt2)nop + * br !p0.x, #-13 + * (jp)mul.f r0.x, c263.y, r1.y + * + * Then we'd have to be more clever, as the convergence point is no + * longer a branch or jump target. + */ +static void +mark_convergence_points(struct ir3 *ir) +{ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + if (is_flow(instr) && instr->cat0.target) { + struct ir3_instruction *target = + list_first_entry(&instr->cat0.target->instr_list, + struct ir3_instruction, node); + target->flags |= IR3_INSTR_JP; + } + } + } +} + +void +ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary) +{ + struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx); + bool progress; + + ctx->max_bary = -1; + + /* allocate per-block data: */ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + block->data = rzalloc(ctx, struct ir3_legalize_block_data); + } + + /* process each block: */ + do { + progress = false; + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + progress |= legalize_block(ctx, block); + } + } while (progress); + + *num_samp = ctx->num_samp; + *has_ssbo = ctx->has_ssbo; + *max_bary = ctx->max_bary; + + do { + ir3_count_instructions(ir); + } while(resolve_jumps(ir)); + + mark_convergence_points(ir); + + ralloc_free(ctx); +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_nir.c mesa-19.0.1/src/freedreno/ir3/ir3_nir.c --- mesa-18.3.3/src/freedreno/ir3/ir3_nir.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_nir.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,263 @@ +/* + * Copyright (C) 2015 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +#include "util/debug.h" + +#include "ir3_nir.h" +#include "ir3_compiler.h" +#include "ir3_shader.h" + +static const nir_shader_compiler_options options = { + .lower_fpow = true, + .lower_scmp = true, + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_ffract = true, + .lower_fmod32 = true, + .lower_fmod64 = true, + .lower_fdiv = true, + .lower_ldexp = true, + .fuse_ffma = true, + .native_integers = true, + .vertex_id_zero_based = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_all_io_to_temps = true, + .lower_helper_invocation = true, +}; + +const nir_shader_compiler_options * +ir3_get_compiler_options(struct ir3_compiler *compiler) +{ + return &options; +} + +/* for given shader key, are any steps handled in nir? */ +bool +ir3_key_lowers_nir(const struct ir3_shader_key *key) +{ + return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r | + key->vsaturate_s | key->vsaturate_t | key->vsaturate_r | + key->ucp_enables | key->color_two_side | + key->fclamp_color | key->vclamp_color; +} + +#define OPT(nir, pass, ...) ({ \ + bool this_progress = false; \ + NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ + this_progress; \ +}) + +#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) + +static void +ir3_optimize_loop(nir_shader *s) +{ + bool progress; + do { + progress = false; + + OPT_V(s, nir_lower_vars_to_ssa); + progress |= OPT(s, nir_opt_copy_prop_vars); + progress |= OPT(s, nir_opt_dead_write_vars); + progress |= OPT(s, nir_lower_alu_to_scalar); + progress |= OPT(s, nir_lower_phis_to_scalar); + + progress |= OPT(s, nir_copy_prop); + progress |= OPT(s, nir_opt_dce); + progress |= OPT(s, nir_opt_cse); + static int gcm = -1; + if (gcm == -1) + gcm = env_var_as_unsigned("GCM", 0); + if (gcm == 1) + progress |= OPT(s, nir_opt_gcm, true); + else if (gcm == 2) + progress |= OPT(s, nir_opt_gcm, false); + progress |= OPT(s, nir_opt_peephole_select, 16, true); + progress |= OPT(s, nir_opt_intrinsics); + progress |= OPT(s, nir_opt_algebraic); + progress |= OPT(s, nir_opt_constant_folding); + progress |= OPT(s, nir_opt_dead_cf); + if (OPT(s, nir_opt_trivial_continues)) { + progress |= true; + /* If nir_opt_trivial_continues makes progress, then we need to clean + * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll + * to make progress. + */ + OPT(s, nir_copy_prop); + OPT(s, nir_opt_dce); + } + progress |= OPT(s, nir_opt_if); + progress |= OPT(s, nir_opt_remove_phis); + progress |= OPT(s, nir_opt_undef); + + } while (progress); +} + +struct nir_shader * +ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, + const struct ir3_shader_key *key) +{ + struct nir_lower_tex_options tex_options = { + .lower_rect = 0, + }; + + if (key) { + switch (shader->type) { + case MESA_SHADER_FRAGMENT: + tex_options.saturate_s = key->fsaturate_s; + tex_options.saturate_t = key->fsaturate_t; + tex_options.saturate_r = key->fsaturate_r; + break; + case MESA_SHADER_VERTEX: + tex_options.saturate_s = key->vsaturate_s; + tex_options.saturate_t = key->vsaturate_t; + tex_options.saturate_r = key->vsaturate_r; + break; + default: + /* TODO */ + break; + } + } + + if (shader->compiler->gpu_id >= 400) { + /* a4xx seems to have *no* sam.p */ + tex_options.lower_txp = ~0; /* lower all txp */ + } else { + /* a3xx just needs to avoid sam.p for 3d tex */ + tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D); + } + + if (ir3_shader_debug & IR3_DBG_DISASM) { + debug_printf("----------------------\n"); + nir_print_shader(s, stdout); + debug_printf("----------------------\n"); + } + + OPT_V(s, nir_opt_global_to_local); + OPT_V(s, nir_lower_regs_to_ssa); + + if (key) { + if (s->info.stage == MESA_SHADER_VERTEX) { + OPT_V(s, nir_lower_clip_vs, key->ucp_enables, false); + if (key->vclamp_color) + OPT_V(s, nir_lower_clamp_color_outputs); + } else if (s->info.stage == MESA_SHADER_FRAGMENT) { + OPT_V(s, nir_lower_clip_fs, key->ucp_enables); + if (key->fclamp_color) + OPT_V(s, nir_lower_clamp_color_outputs); + } + if (key->color_two_side) { + OPT_V(s, nir_lower_two_sided_color); + } + } else { + /* only want to do this the first time (when key is null) + * and not again on any potential 2nd variant lowering pass: + */ + OPT_V(s, ir3_nir_apply_trig_workarounds); + } + + OPT_V(s, nir_lower_tex, &tex_options); + OPT_V(s, nir_lower_load_const_to_scalar); + if (shader->compiler->gpu_id < 500) + OPT_V(s, ir3_nir_lower_tg4_to_tex); + + ir3_optimize_loop(s); + + /* do idiv lowering after first opt loop to give a chance for + * divide by immed power-of-two to be caught first: + */ + if (OPT(s, nir_lower_idiv)) + ir3_optimize_loop(s); + + OPT_V(s, nir_remove_dead_variables, nir_var_function_temp); + + OPT_V(s, nir_move_load_const); + + if (ir3_shader_debug & IR3_DBG_DISASM) { + debug_printf("----------------------\n"); + nir_print_shader(s, stdout); + debug_printf("----------------------\n"); + } + + nir_sweep(s); + + return s; +} + +void +ir3_nir_scan_driver_consts(nir_shader *shader, + struct ir3_driver_const_layout *layout) +{ + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = + nir_instr_as_intrinsic(instr); + unsigned idx; + + switch (intr->intrinsic) { + case nir_intrinsic_get_buffer_size: + idx = nir_src_as_const_value(intr->src[0])->u32[0]; + if (layout->ssbo_size.mask & (1 << idx)) + break; + layout->ssbo_size.mask |= (1 << idx); + layout->ssbo_size.off[idx] = + layout->ssbo_size.count; + layout->ssbo_size.count += 1; /* one const per */ + break; + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_size: + idx = nir_intrinsic_get_var(intr, 0)->data.driver_location; + if (layout->image_dims.mask & (1 << idx)) + break; + layout->image_dims.mask |= (1 << idx); + layout->image_dims.off[idx] = + layout->image_dims.count; + layout->image_dims.count += 3; /* three const per */ + break; + default: + break; + } + } + } + } +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_nir.h mesa-19.0.1/src/freedreno/ir3/ir3_nir.h --- mesa-18.3.3/src/freedreno/ir3/ir3_nir.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_nir.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2015 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef IR3_NIR_H_ +#define IR3_NIR_H_ + +#include "compiler/nir/nir.h" +#include "compiler/shader_enums.h" + +#include "ir3_shader.h" + +void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout); + +bool ir3_nir_apply_trig_workarounds(nir_shader *shader); +bool ir3_nir_lower_tg4_to_tex(nir_shader *shader); + +const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler); +bool ir3_key_lowers_nir(const struct ir3_shader_key *key); +struct nir_shader * ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, + const struct ir3_shader_key *key); + +#endif /* IR3_NIR_H_ */ diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c mesa-19.0.1/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c --- mesa-18.3.3/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,138 @@ +/* + * Copyright © 2017 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "ir3_nir.h" +#include "compiler/nir/nir_builder.h" + +/* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the + * gather results, rather than before. As a result, it must be emulated with + * direct texture calls. + */ + +static bool +lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx) +{ + bool progress = false; + + static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} }; + + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tg4 = (nir_tex_instr *)instr; + + if (tg4->op != nir_texop_tg4) + continue; + + b->cursor = nir_before_instr(&tg4->instr); + + nir_ssa_def *results[4]; + int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset); + for (int i = 0; i < 4; i++) { + int num_srcs = tg4->num_srcs + 1 /* lod */; + if (offset_index < 0 && i < 3) + num_srcs++; + + nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs); + tex->op = nir_texop_txl; + tex->sampler_dim = tg4->sampler_dim; + tex->coord_components = tg4->coord_components; + tex->is_array = tg4->is_array; + tex->is_shadow = tg4->is_shadow; + tex->is_new_style_shadow = tg4->is_new_style_shadow; + tex->texture_index = tg4->texture_index; + tex->sampler_index = tg4->sampler_index; + tex->dest_type = tg4->dest_type; + + for (int j = 0; j < tg4->num_srcs; j++) { + nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex); + tex->src[j].src_type = tg4->src[j].src_type; + } + if (i != 3) { + nir_ssa_def *offset = + nir_vec2(b, nir_imm_int(b, offsets[i][0]), + nir_imm_int(b, offsets[i][1])); + if (offset_index < 0) { + tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset); + tex->src[tg4->num_srcs].src_type = nir_tex_src_offset; + } else { + assert(nir_tex_instr_src_size(tex, offset_index) == 2); + nir_ssa_def *orig = nir_ssa_for_src( + b, tex->src[offset_index].src, 2); + tex->src[offset_index].src = + nir_src_for_ssa(nir_iadd(b, orig, offset)); + } + } + tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0)); + tex->src[num_srcs - 1].src_type = nir_tex_src_lod; + + nir_ssa_dest_init(&tex->instr, &tex->dest, + nir_tex_instr_dest_size(tex), 32, NULL); + nir_builder_instr_insert(b, &tex->instr); + + results[i] = nir_channel(b, &tex->dest.ssa, tg4->component); + } + + nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]); + nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result)); + + nir_instr_remove(&tg4->instr); + + progress = true; + } + + return progress; +} + +static bool +lower_tg4_func(nir_function_impl *impl) +{ + void *mem_ctx = ralloc_parent(impl); + nir_builder b; + nir_builder_init(&b, impl); + + bool progress = false; + nir_foreach_block_safe(block, impl) { + progress |= lower_tg4(block, &b, mem_ctx); + } + + if (progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return progress; +} + +bool +ir3_nir_lower_tg4_to_tex(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) + progress |= lower_tg4_func(function->impl); + } + + return progress; +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_nir_trig.py mesa-19.0.1/src/freedreno/ir3/ir3_nir_trig.py --- mesa-18.3.3/src/freedreno/ir3/ir3_nir_trig.py 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_nir_trig.py 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,51 @@ +# +# Copyright (C) 2016 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +from __future__ import print_function + +import argparse +import sys + +trig_workarounds = [ + (('fsin', 'x'), ('fsin', ('fsub', ('fmul', 6.283185, ('ffract', ('fadd', ('fmul', 0.159155, 'x'), 0.5))), 3.141593))), + (('fcos', 'x'), ('fcos', ('fsub', ('fmul', 6.283185, ('ffract', ('fadd', ('fmul', 0.159155, 'x'), 0.5))), 3.141593))), +] + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--import-path', required=True) + args = parser.parse_args() + sys.path.insert(0, args.import_path) + run() + + +def run(): + import nir_algebraic # pylint: disable=import-error + + print('#include "ir3_nir.h"') + print(nir_algebraic.AlgebraicPass("ir3_nir_apply_trig_workarounds", + trig_workarounds).render()) + + +if __name__ == '__main__': + main() diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_print.c mesa-19.0.1/src/freedreno/ir3/ir3_print.c --- mesa-18.3.3/src/freedreno/ir3/ir3_print.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_print.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,264 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include +#include + +#include "ir3.h" + +#define PTRID(x) ((unsigned long)(x)) + +static void print_instr_name(struct ir3_instruction *instr) +{ + if (!instr) + return; +#ifdef DEBUG + printf("%04u:", instr->serialno); +#endif + printf("%04u:", instr->name); + printf("%04u:", instr->ip); + printf("%03u: ", instr->depth); + + if (instr->flags & IR3_INSTR_SY) + printf("(sy)"); + if (instr->flags & IR3_INSTR_SS) + printf("(ss)"); + + if (is_meta(instr)) { + switch (instr->opc) { + case OPC_META_INPUT: printf("_meta:in"); break; + case OPC_META_FO: printf("_meta:fo"); break; + case OPC_META_FI: printf("_meta:fi"); break; + + /* shouldn't hit here.. just for debugging: */ + default: printf("_meta:%d", instr->opc); break; + } + } else if (instr->opc == OPC_MOV) { + static const char *type[] = { + [TYPE_F16] = "f16", + [TYPE_F32] = "f32", + [TYPE_U16] = "u16", + [TYPE_U32] = "u32", + [TYPE_S16] = "s16", + [TYPE_S32] = "s32", + [TYPE_U8] = "u8", + [TYPE_S8] = "s8", + }; + if (instr->cat1.src_type == instr->cat1.dst_type) + printf("mov"); + else + printf("cov"); + printf(".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]); + } else { + printf("%s", ir3_instr_name(instr)); + if (instr->flags & IR3_INSTR_3D) + printf(".3d"); + if (instr->flags & IR3_INSTR_A) + printf(".a"); + if (instr->flags & IR3_INSTR_O) + printf(".o"); + if (instr->flags & IR3_INSTR_P) + printf(".p"); + if (instr->flags & IR3_INSTR_S) + printf(".s"); + if (instr->flags & IR3_INSTR_S2EN) + printf(".s2en"); + } +} + +static void print_reg_name(struct ir3_register *reg) +{ + if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) && + (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))) + printf("(absneg)"); + else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)) + printf("(neg)"); + else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) + printf("(abs)"); + + if (reg->flags & IR3_REG_IMMED) { + printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val); + } else if (reg->flags & IR3_REG_ARRAY) { + printf("arr[id=%u, offset=%d, size=%u", reg->array.id, + reg->array.offset, reg->size); + /* for ARRAY we could have null src, for example first write + * instruction.. + */ + if (reg->instr) { + printf(", _["); + print_instr_name(reg->instr); + printf("]"); + } + printf("]"); + } else if (reg->flags & IR3_REG_SSA) { + printf("_["); + print_instr_name(reg->instr); + printf("]"); + } else if (reg->flags & IR3_REG_RELATIV) { + if (reg->flags & IR3_REG_HALF) + printf("h"); + if (reg->flags & IR3_REG_CONST) + printf("c", reg->array.offset); + else + printf("\x1b[0;31mr\x1b[0m (%u)", reg->array.offset, reg->size); + } else { + if (reg->flags & IR3_REG_HALF) + printf("h"); + if (reg->flags & IR3_REG_CONST) + printf("c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]); + else + printf("\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]); + } +} + +static void +tab(int lvl) +{ + for (int i = 0; i < lvl; i++) + printf("\t"); +} + +static void +print_instr(struct ir3_instruction *instr, int lvl) +{ + unsigned i; + + tab(lvl); + + print_instr_name(instr); + for (i = 0; i < instr->regs_count; i++) { + struct ir3_register *reg = instr->regs[i]; + printf(i ? ", " : " "); + print_reg_name(reg); + } + + if (instr->address) { + printf(", address=_"); + printf("["); + print_instr_name(instr->address); + printf("]"); + } + + if (instr->cp.left) { + printf(", left=_"); + printf("["); + print_instr_name(instr->cp.left); + printf("]"); + } + + if (instr->cp.right) { + printf(", right=_"); + printf("["); + print_instr_name(instr->cp.right); + printf("]"); + } + + if (instr->opc == OPC_META_FO) { + printf(", off=%d", instr->fo.off); + } + + if (is_flow(instr) && instr->cat0.target) { + /* the predicate register src is implied: */ + if (instr->opc == OPC_BR) { + printf(" %sp0.x", instr->cat0.inv ? "!" : ""); + } + printf(", target=block%u", block_id(instr->cat0.target)); + } + + if (instr->deps_count) { + printf(", false-deps:"); + for (unsigned i = 0; i < instr->deps_count; i++) { + if (i > 0) + printf(", "); + printf("_["); + print_instr_name(instr->deps[i]); + printf("]"); + } + } + + printf("\n"); +} + +void ir3_print_instr(struct ir3_instruction *instr) +{ + print_instr(instr, 0); +} + +static void +print_block(struct ir3_block *block, int lvl) +{ + tab(lvl); printf("block%u {\n", block_id(block)); + + if (block->predecessors_count > 0) { + tab(lvl+1); + printf("pred: "); + for (unsigned i = 0; i < block->predecessors_count; i++) { + if (i) + printf(", "); + printf("block%u", block_id(block->predecessors[i])); + } + printf("\n"); + } + + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + print_instr(instr, lvl+1); + } + + tab(lvl+1); printf("/* keeps:\n"); + for (unsigned i = 0; i < block->keeps_count; i++) { + print_instr(block->keeps[i], lvl+2); + } + tab(lvl+1); printf(" */\n"); + + if (block->successors[1]) { + /* leading into if/else: */ + tab(lvl+1); + printf("/* succs: if _["); + print_instr_name(block->condition); + printf("] block%u; else block%u; */\n", + block_id(block->successors[0]), + block_id(block->successors[1])); + } else if (block->successors[0]) { + tab(lvl+1); + printf("/* succs: block%u; */\n", + block_id(block->successors[0])); + } + tab(lvl); printf("}\n"); +} + +void +ir3_print(struct ir3 *ir) +{ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) + print_block(block, 0); + + for (unsigned i = 0; i < ir->noutputs; i++) { + if (!ir->outputs[i]) + continue; + printf("out%d: ", i); + print_instr(ir->outputs[i], 0); + } +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_ra.c mesa-19.0.1/src/freedreno/ir3/ir3_ra.c --- mesa-18.3.3/src/freedreno/ir3/ir3_ra.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_ra.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,1128 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/u_math.h" +#include "util/register_allocate.h" +#include "util/ralloc.h" +#include "util/bitset.h" + +#include "ir3.h" +#include "ir3_compiler.h" + +/* + * Register Assignment: + * + * Uses the register_allocate util, which implements graph coloring + * algo with interference classes. To handle the cases where we need + * consecutive registers (for example, texture sample instructions), + * we model these as larger (double/quad/etc) registers which conflict + * with the corresponding registers in other classes. + * + * Additionally we create additional classes for half-regs, which + * do not conflict with the full-reg classes. We do need at least + * sizes 1-4 (to deal w/ texture sample instructions output to half- + * reg). At the moment we don't create the higher order half-reg + * classes as half-reg frequently does not have enough precision + * for texture coords at higher resolutions. + * + * There are some additional cases that we need to handle specially, + * as the graph coloring algo doesn't understand "partial writes". + * For example, a sequence like: + * + * add r0.z, ... + * sam (f32)(xy)r0.x, ... + * ... + * sam (f32)(xyzw)r0.w, r0.x, ... ; 3d texture, so r0.xyz are coord + * + * In this scenario, we treat r0.xyz as class size 3, which is written + * (from a use/def perspective) at the 'add' instruction and ignore the + * subsequent partial writes to r0.xy. So the 'add r0.z, ...' is the + * defining instruction, as it is the first to partially write r0.xyz. + * + * Note i965 has a similar scenario, which they solve with a virtual + * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after + * register assignment. But for us that is horrible from a scheduling + * standpoint. Instead what we do is use idea of 'definer' instruction. + * Ie. the first instruction (lowest ip) to write to the variable is the + * one we consider from use/def perspective when building interference + * graph. (Other instructions which write other variable components + * just define the variable some more.) + * + * Arrays of arbitrary size are handled via pre-coloring a consecutive + * sequence of registers. Additional scalar (single component) reg + * names are allocated starting at ctx->class_base[total_class_count] + * (see arr->base), which are pre-colored. In the use/def graph direct + * access is treated as a single element use/def, and indirect access + * is treated as use or def of all array elements. (Only the first + * def is tracked, in case of multiple indirect writes, etc.) + * + * TODO arrays that fit in one of the pre-defined class sizes should + * not need to be pre-colored, but instead could be given a normal + * vreg name. (Ignoring this for now since it is a good way to work + * out the kinks with arbitrary sized arrays.) + * + * TODO might be easier for debugging to split this into two passes, + * the first assigning vreg names in a way that we could ir3_print() + * the result. + */ + +static const unsigned class_sizes[] = { + 1, 2, 3, 4, + 4 + 4, /* txd + 1d/2d */ + 4 + 6, /* txd + 3d */ +}; +#define class_count ARRAY_SIZE(class_sizes) + +static const unsigned half_class_sizes[] = { + 1, 2, 3, 4, +}; +#define half_class_count ARRAY_SIZE(half_class_sizes) + +/* seems to just be used for compute shaders? Seems like vec1 and vec3 + * are sufficient (for now?) + */ +static const unsigned high_class_sizes[] = { + 1, 3, +}; +#define high_class_count ARRAY_SIZE(high_class_sizes) + +#define total_class_count (class_count + half_class_count + high_class_count) + +/* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */ +#define NUM_REGS (4 * 48) /* r0 to r47 */ +#define NUM_HIGH_REGS (4 * 8) /* r48 to r55 */ +#define FIRST_HIGH_REG (4 * 48) +/* Number of virtual regs in a given class: */ +#define CLASS_REGS(i) (NUM_REGS - (class_sizes[i] - 1)) +#define HALF_CLASS_REGS(i) (NUM_REGS - (half_class_sizes[i] - 1)) +#define HIGH_CLASS_REGS(i) (NUM_HIGH_REGS - (high_class_sizes[i] - 1)) + +#define HALF_OFFSET (class_count) +#define HIGH_OFFSET (class_count + half_class_count) + +/* register-set, created one time, used for all shaders: */ +struct ir3_ra_reg_set { + struct ra_regs *regs; + unsigned int classes[class_count]; + unsigned int half_classes[half_class_count]; + unsigned int high_classes[high_class_count]; + /* maps flat virtual register space to base gpr: */ + uint16_t *ra_reg_to_gpr; + /* maps cls,gpr to flat virtual register space: */ + uint16_t **gpr_to_ra_reg; +}; + +static void +build_q_values(unsigned int **q_values, unsigned off, + const unsigned *sizes, unsigned count) +{ + for (unsigned i = 0; i < count; i++) { + q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count); + + /* From register_allocate.c: + * + * q(B,C) (indexed by C, B is this register class) in + * Runeson/Nyström paper. This is "how many registers of B could + * the worst choice register from C conflict with". + * + * If we just let the register allocation algorithm compute these + * values, is extremely expensive. However, since all of our + * registers are laid out, we can very easily compute them + * ourselves. View the register from C as fixed starting at GRF n + * somewhere in the middle, and the register from B as sliding back + * and forth. Then the first register to conflict from B is the + * one starting at n - class_size[B] + 1 and the last register to + * conflict will start at n + class_size[B] - 1. Therefore, the + * number of conflicts from B is class_size[B] + class_size[C] - 1. + * + * +-+-+-+-+-+-+ +-+-+-+-+-+-+ + * B | | | | | |n| --> | | | | | | | + * +-+-+-+-+-+-+ +-+-+-+-+-+-+ + * +-+-+-+-+-+ + * C |n| | | | | + * +-+-+-+-+-+ + * + * (Idea copied from brw_fs_reg_allocate.cpp) + */ + for (unsigned j = 0; j < count; j++) + q_values[i + off][j + off] = sizes[i] + sizes[j] - 1; + } +} + +/* One-time setup of RA register-set, which describes all the possible + * "virtual" registers and their interferences. Ie. double register + * occupies (and conflicts with) two single registers, and so forth. + * Since registers do not need to be aligned to their class size, they + * can conflict with other registers in the same class too. Ie: + * + * Single (base) | Double + * --------------+--------------- + * R0 | D0 + * R1 | D0 D1 + * R2 | D1 D2 + * R3 | D2 + * .. and so on.. + * + * (NOTE the disassembler uses notation like r0.x/y/z/w but those are + * really just four scalar registers. Don't let that confuse you.) + */ +struct ir3_ra_reg_set * +ir3_ra_alloc_reg_set(struct ir3_compiler *compiler) +{ + struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set); + unsigned ra_reg_count, reg, first_half_reg, first_high_reg, base; + unsigned int **q_values; + + /* calculate # of regs across all classes: */ + ra_reg_count = 0; + for (unsigned i = 0; i < class_count; i++) + ra_reg_count += CLASS_REGS(i); + for (unsigned i = 0; i < half_class_count; i++) + ra_reg_count += HALF_CLASS_REGS(i); + for (unsigned i = 0; i < high_class_count; i++) + ra_reg_count += HIGH_CLASS_REGS(i); + + /* allocate and populate q_values: */ + q_values = ralloc_array(set, unsigned *, total_class_count); + + build_q_values(q_values, 0, class_sizes, class_count); + build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count); + build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count); + + /* allocate the reg-set.. */ + set->regs = ra_alloc_reg_set(set, ra_reg_count, true); + set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count); + set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count); + + /* .. and classes */ + reg = 0; + for (unsigned i = 0; i < class_count; i++) { + set->classes[i] = ra_alloc_reg_class(set->regs); + + set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i)); + + for (unsigned j = 0; j < CLASS_REGS(i); j++) { + ra_class_add_reg(set->regs, set->classes[i], reg); + + set->ra_reg_to_gpr[reg] = j; + set->gpr_to_ra_reg[i][j] = reg; + + for (unsigned br = j; br < j + class_sizes[i]; br++) + ra_add_transitive_reg_conflict(set->regs, br, reg); + + reg++; + } + } + + first_half_reg = reg; + base = HALF_OFFSET; + + for (unsigned i = 0; i < half_class_count; i++) { + set->half_classes[i] = ra_alloc_reg_class(set->regs); + + set->gpr_to_ra_reg[base + i] = + ralloc_array(set, uint16_t, HALF_CLASS_REGS(i)); + + for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) { + ra_class_add_reg(set->regs, set->half_classes[i], reg); + + set->ra_reg_to_gpr[reg] = j; + set->gpr_to_ra_reg[base + i][j] = reg; + + for (unsigned br = j; br < j + half_class_sizes[i]; br++) + ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg); + + reg++; + } + } + + first_high_reg = reg; + base = HIGH_OFFSET; + + for (unsigned i = 0; i < high_class_count; i++) { + set->high_classes[i] = ra_alloc_reg_class(set->regs); + + set->gpr_to_ra_reg[base + i] = + ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i)); + + for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) { + ra_class_add_reg(set->regs, set->high_classes[i], reg); + + set->ra_reg_to_gpr[reg] = j; + set->gpr_to_ra_reg[base + i][j] = reg; + + for (unsigned br = j; br < j + high_class_sizes[i]; br++) + ra_add_transitive_reg_conflict(set->regs, br + first_high_reg, reg); + + reg++; + } + } + + /* starting a6xx, half precision regs conflict w/ full precision regs: */ + if (compiler->gpu_id >= 600) { + /* because of transitivity, we can get away with just setting up + * conflicts between the first class of full and half regs: + */ + for (unsigned j = 0; j < CLASS_REGS(0) / 2; j++) { + unsigned freg = set->gpr_to_ra_reg[0][j]; + unsigned hreg0 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 0]; + unsigned hreg1 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 1]; + + ra_add_transitive_reg_conflict(set->regs, freg, hreg0); + ra_add_transitive_reg_conflict(set->regs, freg, hreg1); + } + + // TODO also need to update q_values, but for now: + ra_set_finalize(set->regs, NULL); + } else { + ra_set_finalize(set->regs, q_values); + } + + ralloc_free(q_values); + + return set; +} + +/* additional block-data (per-block) */ +struct ir3_ra_block_data { + BITSET_WORD *def; /* variables defined before used in block */ + BITSET_WORD *use; /* variables used before defined in block */ + BITSET_WORD *livein; /* which defs reach entry point of block */ + BITSET_WORD *liveout; /* which defs reach exit point of block */ +}; + +/* additional instruction-data (per-instruction) */ +struct ir3_ra_instr_data { + /* cached instruction 'definer' info: */ + struct ir3_instruction *defn; + int off, sz, cls; +}; + +/* register-assign context, per-shader */ +struct ir3_ra_ctx { + struct ir3 *ir; + gl_shader_stage type; + bool frag_face; + + struct ir3_ra_reg_set *set; + struct ra_graph *g; + unsigned alloc_count; + /* one per class, plus one slot for arrays: */ + unsigned class_alloc_count[total_class_count + 1]; + unsigned class_base[total_class_count + 1]; + unsigned instr_cnt; + unsigned *def, *use; /* def/use table */ + struct ir3_ra_instr_data *instrd; +}; + +/* does it conflict? */ +static inline bool +intersects(unsigned a_start, unsigned a_end, unsigned b_start, unsigned b_end) +{ + return !((a_start >= b_end) || (b_start >= a_end)); +} + +static bool +is_half(struct ir3_instruction *instr) +{ + return !!(instr->regs[0]->flags & IR3_REG_HALF); +} + +static bool +is_high(struct ir3_instruction *instr) +{ + return !!(instr->regs[0]->flags & IR3_REG_HIGH); +} + +static int +size_to_class(unsigned sz, bool half, bool high) +{ + if (high) { + for (unsigned i = 0; i < high_class_count; i++) + if (high_class_sizes[i] >= sz) + return i + HIGH_OFFSET; + } else if (half) { + for (unsigned i = 0; i < half_class_count; i++) + if (half_class_sizes[i] >= sz) + return i + HALF_OFFSET; + } else { + for (unsigned i = 0; i < class_count; i++) + if (class_sizes[i] >= sz) + return i; + } + debug_assert(0); + return -1; +} + +static bool +writes_gpr(struct ir3_instruction *instr) +{ + if (is_store(instr)) + return false; + /* is dest a normal temp register: */ + struct ir3_register *reg = instr->regs[0]; + if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)) + return false; + if ((reg->num == regid(REG_A0, 0)) || + (reg->num == regid(REG_P0, 0))) + return false; + return true; +} + +static bool +instr_before(struct ir3_instruction *a, struct ir3_instruction *b) +{ + if (a->flags & IR3_INSTR_UNUSED) + return false; + return (a->ip < b->ip); +} + +static struct ir3_instruction * +get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, + int *sz, int *off) +{ + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; + struct ir3_instruction *d = NULL; + + if (id->defn) { + *sz = id->sz; + *off = id->off; + return id->defn; + } + + if (instr->opc == OPC_META_FI) { + /* What about the case where collect is subset of array, we + * need to find the distance between where actual array starts + * and fanin.. that probably doesn't happen currently. + */ + struct ir3_register *src; + int dsz, doff; + + /* note: don't use foreach_ssa_src as this gets called once + * while assigning regs (which clears SSA flag) + */ + foreach_src_n(src, n, instr) { + struct ir3_instruction *dd; + if (!src->instr) + continue; + + dd = get_definer(ctx, src->instr, &dsz, &doff); + + if ((!d) || instr_before(dd, d)) { + d = dd; + *sz = dsz; + *off = doff - n; + } + } + + } else if (instr->cp.right || instr->cp.left) { + /* covers also the meta:fo case, which ends up w/ single + * scalar instructions for each component: + */ + struct ir3_instruction *f = ir3_neighbor_first(instr); + + /* by definition, the entire sequence forms one linked list + * of single scalar register nodes (even if some of them may + * be fanouts from a texture sample (for example) instr. We + * just need to walk the list finding the first element of + * the group defined (lowest ip) + */ + int cnt = 0; + + /* need to skip over unused in the group: */ + while (f && (f->flags & IR3_INSTR_UNUSED)) { + f = f->cp.right; + cnt++; + } + + while (f) { + if ((!d) || instr_before(f, d)) + d = f; + if (f == instr) + *off = cnt; + f = f->cp.right; + cnt++; + } + + *sz = cnt; + + } else { + /* second case is looking directly at the instruction which + * produces multiple values (eg, texture sample), rather + * than the fanout nodes that point back to that instruction. + * This isn't quite right, because it may be part of a larger + * group, such as: + * + * sam (f32)(xyzw)r0.x, ... + * add r1.x, ... + * add r1.y, ... + * sam (f32)(xyzw)r2.x, r0.w <-- (r0.w, r1.x, r1.y) + * + * need to come up with a better way to handle that case. + */ + if (instr->address) { + *sz = instr->regs[0]->size; + } else { + *sz = util_last_bit(instr->regs[0]->wrmask); + } + *off = 0; + d = instr; + } + + if (d->opc == OPC_META_FO) { + struct ir3_instruction *dd; + int dsz, doff; + + dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff); + + /* by definition, should come before: */ + debug_assert(instr_before(dd, d)); + + *sz = MAX2(*sz, dsz); + + debug_assert(instr->opc == OPC_META_FO); + *off = MAX2(*off, instr->fo.off); + + d = dd; + } + + debug_assert(d->opc != OPC_META_FO); + + id->defn = d; + id->sz = *sz; + id->off = *off; + + return d; +} + +static void +ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; + if (instr->regs_count == 0) + continue; + /* couple special cases: */ + if (writes_addr(instr) || writes_pred(instr)) { + id->cls = -1; + } else if (instr->regs[0]->flags & IR3_REG_ARRAY) { + id->cls = total_class_count; + } else { + id->defn = get_definer(ctx, instr, &id->sz, &id->off); + id->cls = size_to_class(id->sz, is_half(id->defn), is_high(id->defn)); + } + } +} + +/* give each instruction a name (and ip), and count up the # of names + * of each class + */ +static void +ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; + +#ifdef DEBUG + instr->name = ~0; +#endif + + ctx->instr_cnt++; + + if (instr->regs_count == 0) + continue; + + if (!writes_gpr(instr)) + continue; + + if (id->defn != instr) + continue; + + /* arrays which don't fit in one of the pre-defined class + * sizes are pre-colored: + */ + if ((id->cls >= 0) && (id->cls < total_class_count)) { + instr->name = ctx->class_alloc_count[id->cls]++; + ctx->alloc_count++; + } + } +} + +static void +ra_init(struct ir3_ra_ctx *ctx) +{ + unsigned n, base; + + ir3_clear_mark(ctx->ir); + n = ir3_count_instructions(ctx->ir); + + ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n); + + list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { + ra_block_find_definers(ctx, block); + } + + list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { + ra_block_name_instructions(ctx, block); + } + + /* figure out the base register name for each class. The + * actual ra name is class_base[cls] + instr->name; + */ + ctx->class_base[0] = 0; + for (unsigned i = 1; i <= total_class_count; i++) { + ctx->class_base[i] = ctx->class_base[i-1] + + ctx->class_alloc_count[i-1]; + } + + /* and vreg names for array elements: */ + base = ctx->class_base[total_class_count]; + list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { + arr->base = base; + ctx->class_alloc_count[total_class_count] += arr->length; + base += arr->length; + } + ctx->alloc_count += ctx->class_alloc_count[total_class_count]; + + ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count); + ralloc_steal(ctx->g, ctx->instrd); + ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); + ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); +} + +static unsigned +__ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn) +{ + unsigned name; + debug_assert(cls >= 0); + debug_assert(cls < total_class_count); /* we shouldn't get arrays here.. */ + name = ctx->class_base[cls] + defn->name; + debug_assert(name < ctx->alloc_count); + return name; +} + +static int +ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id) +{ + /* TODO handle name mapping for arrays */ + return __ra_name(ctx, id->cls, id->defn); +} + +static void +ra_destroy(struct ir3_ra_ctx *ctx) +{ + ralloc_free(ctx->g); +} + +static void +ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) +{ + struct ir3_ra_block_data *bd; + unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); + +#define def(name, instr) \ + do { \ + /* defined on first write: */ \ + if (!ctx->def[name]) \ + ctx->def[name] = instr->ip; \ + ctx->use[name] = instr->ip; \ + BITSET_SET(bd->def, name); \ + } while(0); + +#define use(name, instr) \ + do { \ + ctx->use[name] = MAX2(ctx->use[name], instr->ip); \ + if (!BITSET_TEST(bd->def, name)) \ + BITSET_SET(bd->use, name); \ + } while(0); + + bd = rzalloc(ctx->g, struct ir3_ra_block_data); + + bd->def = rzalloc_array(bd, BITSET_WORD, bitset_words); + bd->use = rzalloc_array(bd, BITSET_WORD, bitset_words); + bd->livein = rzalloc_array(bd, BITSET_WORD, bitset_words); + bd->liveout = rzalloc_array(bd, BITSET_WORD, bitset_words); + + block->data = bd; + + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + struct ir3_instruction *src; + struct ir3_register *reg; + + if (instr->regs_count == 0) + continue; + + /* There are a couple special cases to deal with here: + * + * fanout: used to split values from a higher class to a lower + * class, for example split the results of a texture fetch + * into individual scalar values; We skip over these from + * a 'def' perspective, and for a 'use' we walk the chain + * up to the defining instruction. + * + * fanin: used to collect values from lower class and assemble + * them together into a higher class, for example arguments + * to texture sample instructions; We consider these to be + * defined at the earliest fanin source. + * + * Most of this is handled in the get_definer() helper. + * + * In either case, we trace the instruction back to the original + * definer and consider that as the def/use ip. + */ + + if (writes_gpr(instr)) { + struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; + struct ir3_register *dst = instr->regs[0]; + + if (dst->flags & IR3_REG_ARRAY) { + struct ir3_array *arr = + ir3_lookup_array(ctx->ir, dst->array.id); + unsigned i; + + arr->start_ip = MIN2(arr->start_ip, instr->ip); + arr->end_ip = MAX2(arr->end_ip, instr->ip); + + /* set the node class now.. in case we don't encounter + * this array dst again. From register_alloc algo's + * perspective, these are all single/scalar regs: + */ + for (i = 0; i < arr->length; i++) { + unsigned name = arr->base + i; + ra_set_node_class(ctx->g, name, ctx->set->classes[0]); + } + + /* indirect write is treated like a write to all array + * elements, since we don't know which one is actually + * written: + */ + if (dst->flags & IR3_REG_RELATIV) { + for (i = 0; i < arr->length; i++) { + unsigned name = arr->base + i; + def(name, instr); + } + } else { + unsigned name = arr->base + dst->array.offset; + def(name, instr); + } + + } else if (id->defn == instr) { + unsigned name = ra_name(ctx, id); + + /* since we are in SSA at this point: */ + debug_assert(!BITSET_TEST(bd->use, name)); + + def(name, id->defn); + + if (is_high(id->defn)) { + ra_set_node_class(ctx->g, name, + ctx->set->high_classes[id->cls - HIGH_OFFSET]); + } else if (is_half(id->defn)) { + ra_set_node_class(ctx->g, name, + ctx->set->half_classes[id->cls - HALF_OFFSET]); + } else { + ra_set_node_class(ctx->g, name, + ctx->set->classes[id->cls]); + } + } + } + + foreach_src(reg, instr) { + if (reg->flags & IR3_REG_ARRAY) { + struct ir3_array *arr = + ir3_lookup_array(ctx->ir, reg->array.id); + arr->start_ip = MIN2(arr->start_ip, instr->ip); + arr->end_ip = MAX2(arr->end_ip, instr->ip); + + /* indirect read is treated like a read fromall array + * elements, since we don't know which one is actually + * read: + */ + if (reg->flags & IR3_REG_RELATIV) { + unsigned i; + for (i = 0; i < arr->length; i++) { + unsigned name = arr->base + i; + use(name, instr); + } + } else { + unsigned name = arr->base + reg->array.offset; + use(name, instr); + /* NOTE: arrays are not SSA so unconditionally + * set use bit: + */ + BITSET_SET(bd->use, name); + debug_assert(reg->array.offset < arr->length); + } + } else if ((src = ssa(reg)) && writes_gpr(src)) { + unsigned name = ra_name(ctx, &ctx->instrd[src->ip]); + use(name, instr); + } + } + } +} + +static bool +ra_compute_livein_liveout(struct ir3_ra_ctx *ctx) +{ + unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); + bool progress = false; + + list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { + struct ir3_ra_block_data *bd = block->data; + + /* update livein: */ + for (unsigned i = 0; i < bitset_words; i++) { + BITSET_WORD new_livein = + (bd->use[i] | (bd->liveout[i] & ~bd->def[i])); + + if (new_livein & ~bd->livein[i]) { + bd->livein[i] |= new_livein; + progress = true; + } + } + + /* update liveout: */ + for (unsigned j = 0; j < ARRAY_SIZE(block->successors); j++) { + struct ir3_block *succ = block->successors[j]; + struct ir3_ra_block_data *succ_bd; + + if (!succ) + continue; + + succ_bd = succ->data; + + for (unsigned i = 0; i < bitset_words; i++) { + BITSET_WORD new_liveout = + (succ_bd->livein[i] & ~bd->liveout[i]); + + if (new_liveout) { + bd->liveout[i] |= new_liveout; + progress = true; + } + } + } + } + + return progress; +} + +static void +print_bitset(const char *name, BITSET_WORD *bs, unsigned cnt) +{ + bool first = true; + debug_printf(" %s:", name); + for (unsigned i = 0; i < cnt; i++) { + if (BITSET_TEST(bs, i)) { + if (!first) + debug_printf(","); + debug_printf(" %04u", i); + first = false; + } + } + debug_printf("\n"); +} + +static void +ra_add_interference(struct ir3_ra_ctx *ctx) +{ + struct ir3 *ir = ctx->ir; + + /* initialize array live ranges: */ + list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) { + arr->start_ip = ~0; + arr->end_ip = 0; + } + + /* compute live ranges (use/def) on a block level, also updating + * block's def/use bitmasks (used below to calculate per-block + * livein/liveout): + */ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + ra_block_compute_live_ranges(ctx, block); + } + + /* update per-block livein/liveout: */ + while (ra_compute_livein_liveout(ctx)) {} + + if (ir3_shader_debug & IR3_DBG_OPTMSGS) { + debug_printf("AFTER LIVEIN/OUT:\n"); + ir3_print(ir); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + struct ir3_ra_block_data *bd = block->data; + debug_printf("block%u:\n", block_id(block)); + print_bitset(" def", bd->def, ctx->alloc_count); + print_bitset(" use", bd->use, ctx->alloc_count); + print_bitset(" l/i", bd->livein, ctx->alloc_count); + print_bitset(" l/o", bd->liveout, ctx->alloc_count); + } + list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) { + debug_printf("array%u:\n", arr->id); + debug_printf(" length: %u\n", arr->length); + debug_printf(" start_ip: %u\n", arr->start_ip); + debug_printf(" end_ip: %u\n", arr->end_ip); + } + } + + /* extend start/end ranges based on livein/liveout info from cfg: */ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + struct ir3_ra_block_data *bd = block->data; + + for (unsigned i = 0; i < ctx->alloc_count; i++) { + if (BITSET_TEST(bd->livein, i)) { + ctx->def[i] = MIN2(ctx->def[i], block->start_ip); + ctx->use[i] = MAX2(ctx->use[i], block->start_ip); + } + + if (BITSET_TEST(bd->liveout, i)) { + ctx->def[i] = MIN2(ctx->def[i], block->end_ip); + ctx->use[i] = MAX2(ctx->use[i], block->end_ip); + } + } + + list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { + for (unsigned i = 0; i < arr->length; i++) { + if (BITSET_TEST(bd->livein, i + arr->base)) { + arr->start_ip = MIN2(arr->start_ip, block->start_ip); + } + if (BITSET_TEST(bd->livein, i + arr->base)) { + arr->end_ip = MAX2(arr->end_ip, block->end_ip); + } + } + } + } + + /* need to fix things up to keep outputs live: */ + for (unsigned i = 0; i < ir->noutputs; i++) { + struct ir3_instruction *instr = ir->outputs[i]; + if (!instr) + continue; + unsigned name = ra_name(ctx, &ctx->instrd[instr->ip]); + ctx->use[name] = ctx->instr_cnt; + } + + for (unsigned i = 0; i < ctx->alloc_count; i++) { + for (unsigned j = 0; j < ctx->alloc_count; j++) { + if (intersects(ctx->def[i], ctx->use[i], + ctx->def[j], ctx->use[j])) { + ra_add_node_interference(ctx->g, i, j); + } + } + } +} + +/* some instructions need fix-up if dst register is half precision: */ +static void fixup_half_instr_dst(struct ir3_instruction *instr) +{ + switch (opc_cat(instr->opc)) { + case 1: /* move instructions */ + instr->cat1.dst_type = half_type(instr->cat1.dst_type); + break; + case 3: + switch (instr->opc) { + case OPC_MAD_F32: + instr->opc = OPC_MAD_F16; + break; + case OPC_SEL_B32: + instr->opc = OPC_SEL_B16; + break; + case OPC_SEL_S32: + instr->opc = OPC_SEL_S16; + break; + case OPC_SEL_F32: + instr->opc = OPC_SEL_F16; + break; + case OPC_SAD_S32: + instr->opc = OPC_SAD_S16; + break; + /* instructions may already be fixed up: */ + case OPC_MAD_F16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + break; + default: + assert(0); + break; + } + break; + case 5: + instr->cat5.type = half_type(instr->cat5.type); + break; + } +} +/* some instructions need fix-up if src register is half precision: */ +static void fixup_half_instr_src(struct ir3_instruction *instr) +{ + switch (instr->opc) { + case OPC_MOV: + instr->cat1.src_type = half_type(instr->cat1.src_type); + break; + default: + break; + } +} + +/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first + * array access(es) which do not have any previous access to depend + * on from scheduling point of view + */ +static void +reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg, + struct ir3_instruction *instr) +{ + struct ir3_ra_instr_data *id; + + if (reg->flags & IR3_REG_ARRAY) { + struct ir3_array *arr = + ir3_lookup_array(ctx->ir, reg->array.id); + unsigned name = arr->base + reg->array.offset; + unsigned r = ra_get_node_reg(ctx->g, name); + unsigned num = ctx->set->ra_reg_to_gpr[r]; + + if (reg->flags & IR3_REG_RELATIV) { + reg->array.offset = num; + } else { + reg->num = num; + reg->flags &= ~IR3_REG_SSA; + } + + reg->flags &= ~IR3_REG_ARRAY; + } else if ((id = &ctx->instrd[instr->ip]) && id->defn) { + unsigned name = ra_name(ctx, id); + unsigned r = ra_get_node_reg(ctx->g, name); + unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off; + + debug_assert(!(reg->flags & IR3_REG_RELATIV)); + + if (is_high(id->defn)) + num += FIRST_HIGH_REG; + + reg->num = num; + reg->flags &= ~IR3_REG_SSA; + + if (is_half(id->defn)) + reg->flags |= IR3_REG_HALF; + } +} + +static void +ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + struct ir3_register *reg; + + if (instr->regs_count == 0) + continue; + + if (writes_gpr(instr)) { + reg_assign(ctx, instr->regs[0], instr); + if (instr->regs[0]->flags & IR3_REG_HALF) + fixup_half_instr_dst(instr); + } + + foreach_src_n(reg, n, instr) { + struct ir3_instruction *src = reg->instr; + /* Note: reg->instr could be null for IR3_REG_ARRAY */ + if (!(src || (reg->flags & IR3_REG_ARRAY))) + continue; + reg_assign(ctx, instr->regs[n+1], src); + if (instr->regs[n+1]->flags & IR3_REG_HALF) + fixup_half_instr_src(instr); + } + } +} + +static int +ra_alloc(struct ir3_ra_ctx *ctx) +{ + /* pre-assign array elements: + */ + list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { + unsigned base = 0; + + if (arr->end_ip == 0) + continue; + + /* figure out what else we conflict with which has already + * been assigned: + */ +retry: + list_for_each_entry (struct ir3_array, arr2, &ctx->ir->array_list, node) { + if (arr2 == arr) + break; + if (arr2->end_ip == 0) + continue; + /* if it intersects with liverange AND register range.. */ + if (intersects(arr->start_ip, arr->end_ip, + arr2->start_ip, arr2->end_ip) && + intersects(base, base + arr->length, + arr2->reg, arr2->reg + arr2->length)) { + base = MAX2(base, arr2->reg + arr2->length); + goto retry; + } + } + + arr->reg = base; + + for (unsigned i = 0; i < arr->length; i++) { + unsigned name, reg; + + name = arr->base + i; + reg = ctx->set->gpr_to_ra_reg[0][base++]; + + ra_set_node_reg(ctx->g, name, reg); + } + } + + if (!ra_allocate(ctx->g)) + return -1; + + list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { + ra_block_alloc(ctx, block); + } + + return 0; +} + +int ir3_ra(struct ir3 *ir, gl_shader_stage type, + bool frag_coord, bool frag_face) +{ + struct ir3_ra_ctx ctx = { + .ir = ir, + .type = type, + .frag_face = frag_face, + .set = ir->compiler->set, + }; + int ret; + + ra_init(&ctx); + ra_add_interference(&ctx); + ret = ra_alloc(&ctx); + ra_destroy(&ctx); + + return ret; +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_sched.c mesa-19.0.1/src/freedreno/ir3/ir3_sched.c --- mesa-18.3.3/src/freedreno/ir3/ir3_sched.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_sched.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,818 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +#include "util/u_math.h" + +#include "ir3.h" + +/* + * Instruction Scheduling: + * + * A recursive depth based scheduling algo. Recursively find an eligible + * instruction to schedule from the deepest instruction (recursing through + * it's unscheduled src instructions). Normally this would result in a + * lot of re-traversal of the same instructions, so we cache results in + * instr->data (and clear cached results that would be no longer valid + * after scheduling an instruction). + * + * There are a few special cases that need to be handled, since sched + * is currently independent of register allocation. Usages of address + * register (a0.x) or predicate register (p0.x) must be serialized. Ie. + * if you have two pairs of instructions that write the same special + * register and then read it, then those pairs cannot be interleaved. + * To solve this, when we are in such a scheduling "critical section", + * and we encounter a conflicting write to a special register, we try + * to schedule any remaining instructions that use that value first. + */ + +struct ir3_sched_ctx { + struct ir3_block *block; /* the current block */ + struct list_head depth_list; /* depth sorted unscheduled instrs */ + struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/ + struct ir3_instruction *addr; /* current a0.x user, if any */ + struct ir3_instruction *pred; /* current p0.x user, if any */ + bool error; +}; + +static bool is_sfu_or_mem(struct ir3_instruction *instr) +{ + return is_sfu(instr) || is_mem(instr); +} + +#define NULL_INSTR ((void *)~0) + +static void +clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) +{ + list_for_each_entry (struct ir3_instruction, instr2, &ctx->depth_list, node) { + if ((instr2->data == instr) || (instr2->data == NULL_INSTR) || !instr) + instr2->data = NULL; + } +} + +static void +schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) +{ + debug_assert(ctx->block == instr->block); + + /* maybe there is a better way to handle this than just stuffing + * a nop.. ideally we'd know about this constraint in the + * scheduling and depth calculation.. + */ + if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr)) + ir3_NOP(ctx->block); + + /* remove from depth list: + */ + list_delinit(&instr->node); + + if (writes_addr(instr)) { + debug_assert(ctx->addr == NULL); + ctx->addr = instr; + } + + if (writes_pred(instr)) { + debug_assert(ctx->pred == NULL); + ctx->pred = instr; + } + + instr->flags |= IR3_INSTR_MARK; + + list_addtail(&instr->node, &instr->block->instr_list); + ctx->scheduled = instr; + + if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) { + clear_cache(ctx, NULL); + } else { + /* invalidate only the necessary entries.. */ + clear_cache(ctx, instr); + } +} + +static struct ir3_instruction * +deepest(struct ir3_instruction **srcs, unsigned nsrcs) +{ + struct ir3_instruction *d = NULL; + unsigned i = 0, id = 0; + + while ((i < nsrcs) && !(d = srcs[id = i])) + i++; + + if (!d) + return NULL; + + for (; i < nsrcs; i++) + if (srcs[i] && (srcs[i]->depth > d->depth)) + d = srcs[id = i]; + + srcs[id] = NULL; + + return d; +} + +/** + * @block: the block to search in, starting from end; in first pass, + * this will be the block the instruction would be inserted into + * (but has not yet, ie. it only contains already scheduled + * instructions). For intra-block scheduling (second pass), this + * would be one of the predecessor blocks. + * @instr: the instruction to search for + * @maxd: max distance, bail after searching this # of instruction + * slots, since it means the instruction we are looking for is + * far enough away + * @pred: if true, recursively search into predecessor blocks to + * find the worst case (shortest) distance (only possible after + * individual blocks are all scheduled + */ +static unsigned +distance(struct ir3_block *block, struct ir3_instruction *instr, + unsigned maxd, bool pred) +{ + unsigned d = 0; + + list_for_each_entry_rev (struct ir3_instruction, n, &block->instr_list, node) { + if ((n == instr) || (d >= maxd)) + return d; + /* NOTE: don't count branch/jump since we don't know yet if they will + * be eliminated later in resolve_jumps().. really should do that + * earlier so we don't have this constraint. + */ + if (is_alu(n) || (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR))) + d++; + } + + /* if coming from a predecessor block, assume it is assigned far + * enough away.. we'll fix up later. + */ + if (!pred) + return maxd; + + if (pred && (block->data != block)) { + /* Search into predecessor blocks, finding the one with the + * shortest distance, since that will be the worst case + */ + unsigned min = maxd - d; + + /* (ab)use block->data to prevent recursion: */ + block->data = block; + + for (unsigned i = 0; i < block->predecessors_count; i++) { + unsigned n; + + n = distance(block->predecessors[i], instr, min, pred); + + min = MIN2(min, n); + } + + block->data = NULL; + d += min; + } + + return d; +} + +/* calculate delay for specified src: */ +static unsigned +delay_calc_srcn(struct ir3_block *block, + struct ir3_instruction *assigner, + struct ir3_instruction *consumer, + unsigned srcn, bool soft, bool pred) +{ + unsigned delay = 0; + + if (is_meta(assigner)) { + struct ir3_instruction *src; + foreach_ssa_src(src, assigner) { + unsigned d; + d = delay_calc_srcn(block, src, consumer, srcn, soft, pred); + delay = MAX2(delay, d); + } + } else { + if (soft) { + if (is_sfu(assigner)) { + delay = 4; + } else { + delay = ir3_delayslots(assigner, consumer, srcn); + } + } else { + delay = ir3_delayslots(assigner, consumer, srcn); + } + delay -= distance(block, assigner, delay, pred); + } + + return delay; +} + +/* calculate delay for instruction (maximum of delay for all srcs): */ +static unsigned +delay_calc(struct ir3_block *block, struct ir3_instruction *instr, + bool soft, bool pred) +{ + unsigned delay = 0; + struct ir3_instruction *src; + + foreach_ssa_src_n(src, i, instr) { + unsigned d; + d = delay_calc_srcn(block, src, instr, i, soft, pred); + delay = MAX2(delay, d); + } + + return delay; +} + +struct ir3_sched_notes { + /* there is at least one kill which could be scheduled, except + * for unscheduled bary.f's: + */ + bool blocked_kill; + /* there is at least one instruction that could be scheduled, + * except for conflicting address/predicate register usage: + */ + bool addr_conflict, pred_conflict; +}; + +static bool is_scheduled(struct ir3_instruction *instr) +{ + return !!(instr->flags & IR3_INSTR_MARK); +} + +/* could an instruction be scheduled if specified ssa src was scheduled? */ +static bool +could_sched(struct ir3_instruction *instr, struct ir3_instruction *src) +{ + struct ir3_instruction *other_src; + foreach_ssa_src(other_src, instr) { + /* if dependency not scheduled, we aren't ready yet: */ + if ((src != other_src) && !is_scheduled(other_src)) { + return false; + } + } + return true; +} + +/* Check if instruction is ok to schedule. Make sure it is not blocked + * by use of addr/predicate register, etc. + */ +static bool +check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, + struct ir3_instruction *instr) +{ + /* For instructions that write address register we need to + * make sure there is at least one instruction that uses the + * addr value which is otherwise ready. + * + * TODO if any instructions use pred register and have other + * src args, we would need to do the same for writes_pred().. + */ + if (writes_addr(instr)) { + struct ir3 *ir = instr->block->shader; + bool ready = false; + for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) { + struct ir3_instruction *indirect = ir->indirects[i]; + if (!indirect) + continue; + if (indirect->address != instr) + continue; + ready = could_sched(indirect, instr); + } + + /* nothing could be scheduled, so keep looking: */ + if (!ready) + return false; + } + + /* if this is a write to address/predicate register, and that + * register is currently in use, we need to defer until it is + * free: + */ + if (writes_addr(instr) && ctx->addr) { + debug_assert(ctx->addr != instr); + notes->addr_conflict = true; + return false; + } + + if (writes_pred(instr) && ctx->pred) { + debug_assert(ctx->pred != instr); + notes->pred_conflict = true; + return false; + } + + /* if the instruction is a kill, we need to ensure *every* + * bary.f is scheduled. The hw seems unhappy if the thread + * gets killed before the end-input (ei) flag is hit. + * + * We could do this by adding each bary.f instruction as + * virtual ssa src for the kill instruction. But we have + * fixed length instr->regs[]. + * + * TODO this wouldn't be quite right if we had multiple + * basic blocks, if any block was conditional. We'd need + * to schedule the bary.f's outside of any block which + * was conditional that contained a kill.. I think.. + */ + if (is_kill(instr)) { + struct ir3 *ir = instr->block->shader; + + for (unsigned i = 0; i < ir->baryfs_count; i++) { + struct ir3_instruction *baryf = ir->baryfs[i]; + if (baryf->flags & IR3_INSTR_UNUSED) + continue; + if (!is_scheduled(baryf)) { + notes->blocked_kill = true; + return false; + } + } + } + + return true; +} + +/* Find the best instruction to schedule from specified instruction or + * recursively it's ssa sources. + */ +static struct ir3_instruction * +find_instr_recursive(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, + struct ir3_instruction *instr) +{ + struct ir3_instruction *srcs[__ssa_src_cnt(instr)]; + struct ir3_instruction *src; + unsigned nsrcs = 0; + + if (is_scheduled(instr)) + return NULL; + + /* use instr->data to cache the results of recursing up the + * instr src's. Otherwise the recursive algo can scale quite + * badly w/ shader size. But this takes some care to clear + * the cache appropriately when instructions are scheduled. + */ + if (instr->data) { + if (instr->data == NULL_INSTR) + return NULL; + return instr->data; + } + + /* find unscheduled srcs: */ + foreach_ssa_src(src, instr) { + if (!is_scheduled(src)) { + debug_assert(nsrcs < ARRAY_SIZE(srcs)); + srcs[nsrcs++] = src; + } + } + + /* if all our src's are already scheduled: */ + if (nsrcs == 0) { + if (check_instr(ctx, notes, instr)) { + instr->data = instr; + return instr; + } + return NULL; + } + + while ((src = deepest(srcs, nsrcs))) { + struct ir3_instruction *candidate; + + candidate = find_instr_recursive(ctx, notes, src); + if (!candidate) + continue; + + if (check_instr(ctx, notes, candidate)) { + instr->data = candidate; + return candidate; + } + } + + instr->data = NULL_INSTR; + return NULL; +} + +/* find instruction to schedule: */ +static struct ir3_instruction * +find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, + bool soft) +{ + struct ir3_instruction *best_instr = NULL; + unsigned min_delay = ~0; + + /* TODO we'd really rather use the list/array of block outputs. But we + * don't have such a thing. Recursing *every* instruction in the list + * will result in a lot of repeated traversal, since instructions will + * get traversed both when they appear as ssa src to a later instruction + * as well as where they appear in the depth_list. + */ + list_for_each_entry_rev (struct ir3_instruction, instr, &ctx->depth_list, node) { + struct ir3_instruction *candidate; + unsigned delay; + + candidate = find_instr_recursive(ctx, notes, instr); + if (!candidate) + continue; + + delay = delay_calc(ctx->block, candidate, soft, false); + if (delay < min_delay) { + best_instr = candidate; + min_delay = delay; + } + + if (min_delay == 0) + break; + } + + return best_instr; +} + +/* "spill" the address register by remapping any unscheduled + * instructions which depend on the current address register + * to a clone of the instruction which wrote the address reg. + */ +static struct ir3_instruction * +split_addr(struct ir3_sched_ctx *ctx) +{ + struct ir3 *ir; + struct ir3_instruction *new_addr = NULL; + unsigned i; + + debug_assert(ctx->addr); + + ir = ctx->addr->block->shader; + + for (i = 0; i < ir->indirects_count; i++) { + struct ir3_instruction *indirect = ir->indirects[i]; + + if (!indirect) + continue; + + /* skip instructions already scheduled: */ + if (is_scheduled(indirect)) + continue; + + /* remap remaining instructions using current addr + * to new addr: + */ + if (indirect->address == ctx->addr) { + if (!new_addr) { + new_addr = ir3_instr_clone(ctx->addr); + /* original addr is scheduled, but new one isn't: */ + new_addr->flags &= ~IR3_INSTR_MARK; + } + ir3_instr_set_address(indirect, new_addr); + } + } + + /* all remaining indirects remapped to new addr: */ + ctx->addr = NULL; + + return new_addr; +} + +/* "spill" the predicate register by remapping any unscheduled + * instructions which depend on the current predicate register + * to a clone of the instruction which wrote the address reg. + */ +static struct ir3_instruction * +split_pred(struct ir3_sched_ctx *ctx) +{ + struct ir3 *ir; + struct ir3_instruction *new_pred = NULL; + unsigned i; + + debug_assert(ctx->pred); + + ir = ctx->pred->block->shader; + + for (i = 0; i < ir->predicates_count; i++) { + struct ir3_instruction *predicated = ir->predicates[i]; + + /* skip instructions already scheduled: */ + if (is_scheduled(predicated)) + continue; + + /* remap remaining instructions using current pred + * to new pred: + * + * TODO is there ever a case when pred isn't first + * (and only) src? + */ + if (ssa(predicated->regs[1]) == ctx->pred) { + if (!new_pred) { + new_pred = ir3_instr_clone(ctx->pred); + /* original pred is scheduled, but new one isn't: */ + new_pred->flags &= ~IR3_INSTR_MARK; + } + predicated->regs[1]->instr = new_pred; + } + } + + /* all remaining predicated remapped to new pred: */ + ctx->pred = NULL; + + return new_pred; +} + +static void +sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) +{ + struct list_head unscheduled_list; + + ctx->block = block; + + /* addr/pred writes are per-block: */ + ctx->addr = NULL; + ctx->pred = NULL; + + /* move all instructions to the unscheduled list, and + * empty the block's instruction list (to which we will + * be inserting). + */ + list_replace(&block->instr_list, &unscheduled_list); + list_inithead(&block->instr_list); + list_inithead(&ctx->depth_list); + + /* first a pre-pass to schedule all meta:input instructions + * (which need to appear first so that RA knows the register is + * occupied), and move remaining to depth sorted list: + */ + list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { + if (instr->opc == OPC_META_INPUT) { + schedule(ctx, instr); + } else { + ir3_insert_by_depth(instr, &ctx->depth_list); + } + } + + while (!list_empty(&ctx->depth_list)) { + struct ir3_sched_notes notes = {0}; + struct ir3_instruction *instr; + + instr = find_eligible_instr(ctx, ¬es, true); + if (!instr) + instr = find_eligible_instr(ctx, ¬es, false); + + if (instr) { + unsigned delay = delay_calc(ctx->block, instr, false, false); + + /* and if we run out of instructions that can be scheduled, + * then it is time for nop's: + */ + debug_assert(delay <= 6); + while (delay > 0) { + ir3_NOP(block); + delay--; + } + + schedule(ctx, instr); + } else { + struct ir3_instruction *new_instr = NULL; + + /* nothing available to schedule.. if we are blocked on + * address/predicate register conflict, then break the + * deadlock by cloning the instruction that wrote that + * reg: + */ + if (notes.addr_conflict) { + new_instr = split_addr(ctx); + } else if (notes.pred_conflict) { + new_instr = split_pred(ctx); + } else { + debug_assert(0); + ctx->error = true; + return; + } + + if (new_instr) { + /* clearing current addr/pred can change what is + * available to schedule, so clear cache.. + */ + clear_cache(ctx, NULL); + + ir3_insert_by_depth(new_instr, &ctx->depth_list); + /* the original instr that wrote addr/pred may have + * originated from a different block: + */ + new_instr->block = block; + } + } + } + + /* And lastly, insert branch/jump instructions to take us to + * the next block. Later we'll strip back out the branches + * that simply jump to next instruction. + */ + if (block->successors[1]) { + /* if/else, conditional branches to "then" or "else": */ + struct ir3_instruction *br; + unsigned delay = 6; + + debug_assert(ctx->pred); + debug_assert(block->condition); + + delay -= distance(ctx->block, ctx->pred, delay, false); + + while (delay > 0) { + ir3_NOP(block); + delay--; + } + + /* create "else" branch first (since "then" block should + * frequently/always end up being a fall-thru): + */ + br = ir3_BR(block); + br->cat0.inv = true; + br->cat0.target = block->successors[1]; + + /* NOTE: we have to hard code delay of 6 above, since + * we want to insert the nop's before constructing the + * branch. Throw in an assert so we notice if this + * ever breaks on future generation: + */ + debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6); + + br = ir3_BR(block); + br->cat0.target = block->successors[0]; + + } else if (block->successors[0]) { + /* otherwise unconditional jump to next block: */ + struct ir3_instruction *jmp; + + jmp = ir3_JUMP(block); + jmp->cat0.target = block->successors[0]; + } + + /* NOTE: if we kept track of the predecessors, we could do a better + * job w/ (jp) flags.. every node w/ > predecessor is a join point. + * Note that as we eliminate blocks which contain only an unconditional + * jump we probably need to propagate (jp) flag.. + */ +} + +/* After scheduling individual blocks, we still could have cases where + * one (or more) paths into a block, a value produced by a previous + * has too few delay slots to be legal. We can't deal with this in the + * first pass, because loops (ie. we can't ensure all predecessor blocks + * are already scheduled in the first pass). All we can really do at + * this point is stuff in extra nop's until things are legal. + */ +static void +sched_intra_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) +{ + unsigned n = 0; + + ctx->block = block; + + list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) { + unsigned delay = 0; + + for (unsigned i = 0; i < block->predecessors_count; i++) { + unsigned d = delay_calc(block->predecessors[i], instr, false, true); + delay = MAX2(d, delay); + } + + while (delay > n) { + struct ir3_instruction *nop = ir3_NOP(block); + + /* move to before instr: */ + list_delinit(&nop->node); + list_addtail(&nop->node, &instr->node); + + n++; + } + + /* we can bail once we hit worst case delay: */ + if (++n > 6) + break; + } +} + +int ir3_sched(struct ir3 *ir) +{ + struct ir3_sched_ctx ctx = {0}; + + ir3_clear_mark(ir); + + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + sched_block(&ctx, block); + } + + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + sched_intra_block(&ctx, block); + } + + if (ctx.error) + return -1; + return 0; +} + +/* does instruction 'prior' need to be scheduled before 'instr'? */ +static bool +depends_on(struct ir3_instruction *instr, struct ir3_instruction *prior) +{ + /* TODO for dependencies that are related to a specific object, ie + * a specific SSBO/image/array, we could relax this constraint to + * make accesses to unrelated objects not depend on each other (at + * least as long as not declared coherent) + */ + if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && prior->barrier_class) || + ((prior->barrier_class & IR3_BARRIER_EVERYTHING) && instr->barrier_class)) + return true; + return !!(instr->barrier_class & prior->barrier_conflict); +} + +static void +add_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr) +{ + struct list_head *prev = instr->node.prev; + struct list_head *next = instr->node.next; + + /* add dependencies on previous instructions that must be scheduled + * prior to the current instruction + */ + while (prev != &block->instr_list) { + struct ir3_instruction *pi = + LIST_ENTRY(struct ir3_instruction, prev, node); + + prev = prev->prev; + + if (is_meta(pi)) + continue; + + if (instr->barrier_class == pi->barrier_class) { + ir3_instr_add_dep(instr, pi); + break; + } + + if (depends_on(instr, pi)) + ir3_instr_add_dep(instr, pi); + } + + /* add dependencies on this instruction to following instructions + * that must be scheduled after the current instruction: + */ + while (next != &block->instr_list) { + struct ir3_instruction *ni = + LIST_ENTRY(struct ir3_instruction, next, node); + + next = next->next; + + if (is_meta(ni)) + continue; + + if (instr->barrier_class == ni->barrier_class) { + ir3_instr_add_dep(ni, instr); + break; + } + + if (depends_on(ni, instr)) + ir3_instr_add_dep(ni, instr); + } +} + +/* before scheduling a block, we need to add any necessary false-dependencies + * to ensure that: + * + * (1) barriers are scheduled in the right order wrt instructions related + * to the barrier + * + * (2) reads that come before a write actually get scheduled before the + * write + */ +static void +calculate_deps(struct ir3_block *block) +{ + list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { + if (instr->barrier_class) { + add_barrier_deps(block, instr); + } + } +} + +void +ir3_sched_add_deps(struct ir3 *ir) +{ + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + calculate_deps(block); + } +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_shader.c mesa-19.0.1/src/freedreno/ir3/ir3_shader.c --- mesa-18.3.3/src/freedreno/ir3/ir3_shader.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_shader.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,438 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_format.h" + +#include "drm/freedreno_drmif.h" + +#include "ir3_shader.h" +#include "ir3_compiler.h" +#include "ir3_nir.h" + +int +ir3_glsl_type_size(const struct glsl_type *type) +{ + return glsl_count_attribute_slots(type, false); +} + +static void +delete_variant(struct ir3_shader_variant *v) +{ + if (v->ir) + ir3_destroy(v->ir); + if (v->bo) + fd_bo_del(v->bo); + if (v->immediates) + free(v->immediates); + free(v); +} + +/* for vertex shader, the inputs are loaded into registers before the shader + * is executed, so max_regs from the shader instructions might not properly + * reflect the # of registers actually used, especially in case passthrough + * varyings. + * + * Likewise, for fragment shader, we can have some regs which are passed + * input values but never touched by the resulting shader (ie. as result + * of dead code elimination or simply because we don't know how to turn + * the reg off. + */ +static void +fixup_regfootprint(struct ir3_shader_variant *v) +{ + unsigned i; + + for (i = 0; i < v->inputs_count; i++) { + /* skip frag inputs fetch via bary.f since their reg's are + * not written by gpu before shader starts (and in fact the + * regid's might not even be valid) + */ + if (v->inputs[i].bary) + continue; + + /* ignore high regs that are global to all threads in a warp + * (they exist by default) (a5xx+) + */ + if (v->inputs[i].regid >= regid(48,0)) + continue; + + if (v->inputs[i].compmask) { + unsigned n = util_last_bit(v->inputs[i].compmask) - 1; + int32_t regid = (v->inputs[i].regid + n) >> 2; + v->info.max_reg = MAX2(v->info.max_reg, regid); + } + } + + for (i = 0; i < v->outputs_count; i++) { + int32_t regid = (v->outputs[i].regid + 3) >> 2; + v->info.max_reg = MAX2(v->info.max_reg, regid); + } +} + +/* wrapper for ir3_assemble() which does some info fixup based on + * shader state. Non-static since used by ir3_cmdline too. + */ +void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id) +{ + void *bin; + + bin = ir3_assemble(v->ir, &v->info, gpu_id); + if (!bin) + return NULL; + + if (gpu_id >= 400) { + v->instrlen = v->info.sizedwords / (2 * 16); + } else { + v->instrlen = v->info.sizedwords / (2 * 4); + } + + /* NOTE: if relative addressing is used, we set constlen in + * the compiler (to worst-case value) since we don't know in + * the assembler what the max addr reg value can be: + */ + v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1)); + + fixup_regfootprint(v); + + return bin; +} + +static void +assemble_variant(struct ir3_shader_variant *v) +{ + struct ir3_compiler *compiler = v->shader->compiler; + struct shader_info *info = &v->shader->nir->info; + uint32_t gpu_id = compiler->gpu_id; + uint32_t sz, *bin; + + bin = ir3_shader_assemble(v, gpu_id); + sz = v->info.sizedwords * 4; + + v->bo = fd_bo_new(compiler->dev, sz, + DRM_FREEDRENO_GEM_CACHE_WCOMBINE | + DRM_FREEDRENO_GEM_TYPE_KMEM, + "%s:%s", ir3_shader_stage(v->shader), info->name); + + memcpy(fd_bo_map(v->bo), bin, sz); + + if (ir3_shader_debug & IR3_DBG_DISASM) { + struct ir3_shader_key key = v->key; + printf("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type, + v->binning_pass, key.color_two_side, key.half_precision); + ir3_shader_disasm(v, bin, stdout); + } + + if (shader_debug_enabled(v->shader->type)) { + fprintf(stderr, "Native code for unnamed %s shader %s:\n", + _mesa_shader_stage_to_string(v->shader->type), + v->shader->nir->info.name); + if (v->shader->type == MESA_SHADER_FRAGMENT) + fprintf(stderr, "SIMD0\n"); + ir3_shader_disasm(v, bin, stderr); + } + + free(bin); + + /* no need to keep the ir around beyond this point: */ + ir3_destroy(v->ir); + v->ir = NULL; +} + +static struct ir3_shader_variant * +create_variant(struct ir3_shader *shader, struct ir3_shader_key *key, + bool binning_pass) +{ + struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant); + int ret; + + if (!v) + return NULL; + + v->id = ++shader->variant_count; + v->shader = shader; + v->binning_pass = binning_pass; + v->key = *key; + v->type = shader->type; + + ret = ir3_compile_shader_nir(shader->compiler, v); + if (ret) { + debug_error("compile failed!"); + goto fail; + } + + assemble_variant(v); + if (!v->bo) { + debug_error("assemble failed!"); + goto fail; + } + + return v; + +fail: + delete_variant(v); + return NULL; +} + +static inline struct ir3_shader_variant * +shader_variant(struct ir3_shader *shader, struct ir3_shader_key *key, + bool *created) +{ + struct ir3_shader_variant *v; + + *created = false; + + for (v = shader->variants; v; v = v->next) + if (ir3_shader_key_equal(key, &v->key)) + return v; + + /* compile new variant if it doesn't exist already: */ + v = create_variant(shader, key, false); + if (v) { + v->next = shader->variants; + shader->variants = v; + *created = true; + } + + return v; +} + +struct ir3_shader_variant * +ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_key *key, + bool binning_pass, bool *created) +{ + struct ir3_shader_variant *v = + shader_variant(shader, key, created); + + if (binning_pass) { + if (!v->binning) + v->binning = create_variant(shader, key, true); + return v->binning; + } + + return v; +} + +void +ir3_shader_destroy(struct ir3_shader *shader) +{ + struct ir3_shader_variant *v, *t; + for (v = shader->variants; v; ) { + t = v; + v = v->next; + delete_variant(t); + } + ralloc_free(shader->nir); + free(shader); +} + +struct ir3_shader * +ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir) +{ + struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader); + + shader->compiler = compiler; + shader->id = ++shader->compiler->shader_count; + shader->type = nir->info.stage; + + NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, + (nir_lower_io_options)0); + + /* do first pass optimization, ignoring the key: */ + shader->nir = ir3_optimize_nir(shader, nir, NULL); + if (ir3_shader_debug & IR3_DBG_DISASM) { + printf("dump nir%d: type=%d", shader->id, shader->type); + nir_print_shader(shader->nir, stdout); + } + + return shader; +} + +static void dump_reg(FILE *out, const char *name, uint32_t r) +{ + if (r != regid(63,0)) + fprintf(out, "; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]); +} + +static void dump_output(FILE *out, struct ir3_shader_variant *so, + unsigned slot, const char *name) +{ + uint32_t regid; + regid = ir3_find_output_regid(so, slot); + dump_reg(out, name, regid); +} + +void +ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) +{ + struct ir3 *ir = so->ir; + struct ir3_register *reg; + const char *type = ir3_shader_stage(so->shader); + uint8_t regid; + unsigned i; + + for (i = 0; i < ir->ninputs; i++) { + if (!ir->inputs[i]) { + fprintf(out, "; in%d unused\n", i); + continue; + } + reg = ir->inputs[i]->regs[0]; + regid = reg->num; + fprintf(out, "@in(%sr%d.%c)\tin%d\n", + (reg->flags & IR3_REG_HALF) ? "h" : "", + (regid >> 2), "xyzw"[regid & 0x3], i); + } + + for (i = 0; i < ir->noutputs; i++) { + if (!ir->outputs[i]) { + fprintf(out, "; out%d unused\n", i); + continue; + } + /* kill shows up as a virtual output.. skip it! */ + if (is_kill(ir->outputs[i])) + continue; + reg = ir->outputs[i]->regs[0]; + regid = reg->num; + fprintf(out, "@out(%sr%d.%c)\tout%d\n", + (reg->flags & IR3_REG_HALF) ? "h" : "", + (regid >> 2), "xyzw"[regid & 0x3], i); + } + + for (i = 0; i < so->immediates_count; i++) { + fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i); + fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", + so->immediates[i].val[0], + so->immediates[i].val[1], + so->immediates[i].val[2], + so->immediates[i].val[3]); + } + + disasm_a3xx(bin, so->info.sizedwords, 0, out, ir->compiler->gpu_id); + + switch (so->type) { + case MESA_SHADER_VERTEX: + fprintf(out, "; %s: outputs:", type); + for (i = 0; i < so->outputs_count; i++) { + uint8_t regid = so->outputs[i].regid; + fprintf(out, " r%d.%c (%s)", + (regid >> 2), "xyzw"[regid & 0x3], + gl_varying_slot_name(so->outputs[i].slot)); + } + fprintf(out, "\n"); + fprintf(out, "; %s: inputs:", type); + for (i = 0; i < so->inputs_count; i++) { + uint8_t regid = so->inputs[i].regid; + fprintf(out, " r%d.%c (cm=%x,il=%u,b=%u)", + (regid >> 2), "xyzw"[regid & 0x3], + so->inputs[i].compmask, + so->inputs[i].inloc, + so->inputs[i].bary); + } + fprintf(out, "\n"); + break; + case MESA_SHADER_FRAGMENT: + fprintf(out, "; %s: outputs:", type); + for (i = 0; i < so->outputs_count; i++) { + uint8_t regid = so->outputs[i].regid; + fprintf(out, " r%d.%c (%s)", + (regid >> 2), "xyzw"[regid & 0x3], + gl_frag_result_name(so->outputs[i].slot)); + } + fprintf(out, "\n"); + fprintf(out, "; %s: inputs:", type); + for (i = 0; i < so->inputs_count; i++) { + uint8_t regid = so->inputs[i].regid; + fprintf(out, " r%d.%c (%s,cm=%x,il=%u,b=%u)", + (regid >> 2), "xyzw"[regid & 0x3], + gl_varying_slot_name(so->inputs[i].slot), + so->inputs[i].compmask, + so->inputs[i].inloc, + so->inputs[i].bary); + } + fprintf(out, "\n"); + break; + default: + /* TODO */ + break; + } + + /* print generic shader info: */ + fprintf(out, "; %s prog %d/%d: %u instructions, %d half, %d full\n", + type, so->shader->id, so->id, + so->info.instrs_count, + so->info.max_half_reg + 1, + so->info.max_reg + 1); + + fprintf(out, "; %d const, %u constlen\n", + so->info.max_const + 1, + so->constlen); + + fprintf(out, "; %u (ss), %u (sy)\n", so->info.ss, so->info.sy); + + /* print shader type specific info: */ + switch (so->type) { + case MESA_SHADER_VERTEX: + dump_output(out, so, VARYING_SLOT_POS, "pos"); + dump_output(out, so, VARYING_SLOT_PSIZ, "psize"); + break; + case MESA_SHADER_FRAGMENT: + dump_reg(out, "pos (bary)", + ir3_find_sysval_regid(so, SYSTEM_VALUE_VARYING_COORD)); + dump_output(out, so, FRAG_RESULT_DEPTH, "posz"); + if (so->color0_mrt) { + dump_output(out, so, FRAG_RESULT_COLOR, "color"); + } else { + dump_output(out, so, FRAG_RESULT_DATA0, "data0"); + dump_output(out, so, FRAG_RESULT_DATA1, "data1"); + dump_output(out, so, FRAG_RESULT_DATA2, "data2"); + dump_output(out, so, FRAG_RESULT_DATA3, "data3"); + dump_output(out, so, FRAG_RESULT_DATA4, "data4"); + dump_output(out, so, FRAG_RESULT_DATA5, "data5"); + dump_output(out, so, FRAG_RESULT_DATA6, "data6"); + dump_output(out, so, FRAG_RESULT_DATA7, "data7"); + } + /* these two are hard-coded since we don't know how to + * program them to anything but all 0's... + */ + if (so->frag_coord) + fprintf(out, "; fragcoord: r0.x\n"); + if (so->frag_face) + fprintf(out, "; fragface: hr0.x\n"); + break; + default: + /* TODO */ + break; + } + + fprintf(out, "\n"); +} + +uint64_t +ir3_shader_outputs(const struct ir3_shader *so) +{ + return so->nir->info.outputs_written; +} diff -Nru mesa-18.3.3/src/freedreno/ir3/ir3_shader.h mesa-19.0.1/src/freedreno/ir3/ir3_shader.h --- mesa-18.3.3/src/freedreno/ir3/ir3_shader.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/ir3_shader.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,591 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef IR3_SHADER_H_ +#define IR3_SHADER_H_ + +#include + +#include "compiler/shader_enums.h" +#include "compiler/nir/nir.h" +#include "util/bitscan.h" + +#include "ir3.h" + +struct glsl_type; + +/* driver param indices: */ +enum ir3_driver_param { + /* compute shader driver params: */ + IR3_DP_NUM_WORK_GROUPS_X = 0, + IR3_DP_NUM_WORK_GROUPS_Y = 1, + IR3_DP_NUM_WORK_GROUPS_Z = 2, + IR3_DP_LOCAL_GROUP_SIZE_X = 4, + IR3_DP_LOCAL_GROUP_SIZE_Y = 5, + IR3_DP_LOCAL_GROUP_SIZE_Z = 6, + /* NOTE: gl_NumWorkGroups should be vec4 aligned because + * glDispatchComputeIndirect() needs to load these from + * the info->indirect buffer. Keep that in mind when/if + * adding any addition CS driver params. + */ + IR3_DP_CS_COUNT = 8, /* must be aligned to vec4 */ + + /* vertex shader driver params: */ + IR3_DP_VTXID_BASE = 0, + IR3_DP_VTXCNT_MAX = 1, + /* user-clip-plane components, up to 8x vec4's: */ + IR3_DP_UCP0_X = 4, + /* .... */ + IR3_DP_UCP7_W = 35, + IR3_DP_VS_COUNT = 36 /* must be aligned to vec4 */ +}; + +#define IR3_MAX_SHADER_BUFFERS 32 +#define IR3_MAX_SHADER_IMAGES 32 +#define IR3_MAX_SO_BUFFERS 4 +#define IR3_MAX_SO_OUTPUTS 64 + +/** + * For consts needed to pass internal values to shader which may or may not + * be required, rather than allocating worst-case const space, we scan the + * shader and allocate consts as-needed: + * + * + SSBO sizes: only needed if shader has a get_buffer_size intrinsic + * for a given SSBO + * + * + Image dimensions: needed to calculate pixel offset, but only for + * images that have a image_store intrinsic + */ +struct ir3_driver_const_layout { + struct { + uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */ + uint32_t count; /* number of consts allocated */ + /* one const allocated per SSBO which has get_buffer_size, + * ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes + * consts: + */ + uint32_t off[IR3_MAX_SHADER_BUFFERS]; + } ssbo_size; + + struct { + uint32_t mask; /* bitmask of images that have image_store */ + uint32_t count; /* number of consts allocated */ + /* three const allocated per image which has image_store: + * + cpp (bytes per pixel) + * + pitch (y pitch) + * + array_pitch (z pitch) + */ + uint32_t off[IR3_MAX_SHADER_IMAGES]; + } image_dims; +}; + +/** + * A single output for vertex transform feedback. + */ +struct ir3_stream_output { + unsigned register_index:6; /**< 0 to 63 (OUT index) */ + unsigned start_component:2; /** 0 to 3 */ + unsigned num_components:3; /** 1 to 4 */ + unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */ + unsigned dst_offset:16; /**< offset into the buffer in dwords */ + unsigned stream:2; /**< 0 to 3 */ +}; + +/** + * Stream output for vertex transform feedback. + */ +struct ir3_stream_output_info { + unsigned num_outputs; + /** stride for an entire vertex for each buffer in dwords */ + uint16_t stride[IR3_MAX_SO_BUFFERS]; + + /** + * Array of stream outputs, in the order they are to be written in. + * Selected components are tightly packed into the output buffer. + */ + struct ir3_stream_output output[IR3_MAX_SO_OUTPUTS]; +}; + +/* Configuration key used to identify a shader variant.. different + * shader variants can be used to implement features not supported + * in hw (two sided color), binning-pass vertex shader, etc. + */ +struct ir3_shader_key { + union { + struct { + /* + * Combined Vertex/Fragment shader parameters: + */ + unsigned ucp_enables : 8; + + /* do we need to check {v,f}saturate_{s,t,r}? */ + unsigned has_per_samp : 1; + + /* + * Vertex shader variant parameters: + */ + unsigned vclamp_color : 1; + + /* + * Fragment shader variant parameters: + */ + unsigned color_two_side : 1; + unsigned half_precision : 1; + /* used when shader needs to handle flat varyings (a4xx) + * for front/back color inputs to frag shader: + */ + unsigned rasterflat : 1; + unsigned fclamp_color : 1; + }; + uint32_t global; + }; + + /* bitmask of sampler which needs coords clamped for vertex + * shader: + */ + uint16_t vsaturate_s, vsaturate_t, vsaturate_r; + + /* bitmask of sampler which needs coords clamped for frag + * shader: + */ + uint16_t fsaturate_s, fsaturate_t, fsaturate_r; + + /* bitmask of ms shifts */ + uint32_t vsamples, fsamples; + + /* bitmask of samplers which need astc srgb workaround: */ + uint16_t vastc_srgb, fastc_srgb; +}; + +static inline bool +ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b) +{ + /* slow-path if we need to check {v,f}saturate_{s,t,r} */ + if (a->has_per_samp || b->has_per_samp) + return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0; + return a->global == b->global; +} + +/* will the two keys produce different lowering for a fragment shader? */ +static inline bool +ir3_shader_key_changes_fs(struct ir3_shader_key *key, struct ir3_shader_key *last_key) +{ + if (last_key->has_per_samp || key->has_per_samp) { + if ((last_key->fsaturate_s != key->fsaturate_s) || + (last_key->fsaturate_t != key->fsaturate_t) || + (last_key->fsaturate_r != key->fsaturate_r) || + (last_key->fsamples != key->fsamples) || + (last_key->fastc_srgb != key->fastc_srgb)) + return true; + } + + if (last_key->fclamp_color != key->fclamp_color) + return true; + + if (last_key->color_two_side != key->color_two_side) + return true; + + if (last_key->half_precision != key->half_precision) + return true; + + if (last_key->rasterflat != key->rasterflat) + return true; + + if (last_key->ucp_enables != key->ucp_enables) + return true; + + return false; +} + +/* will the two keys produce different lowering for a vertex shader? */ +static inline bool +ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *last_key) +{ + if (last_key->has_per_samp || key->has_per_samp) { + if ((last_key->vsaturate_s != key->vsaturate_s) || + (last_key->vsaturate_t != key->vsaturate_t) || + (last_key->vsaturate_r != key->vsaturate_r) || + (last_key->vsamples != key->vsamples) || + (last_key->vastc_srgb != key->vastc_srgb)) + return true; + } + + if (last_key->vclamp_color != key->vclamp_color) + return true; + + if (last_key->ucp_enables != key->ucp_enables) + return true; + + return false; +} + +/* clears shader-key flags which don't apply to the given shader + * stage + */ +static inline void +ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type) +{ + switch (type) { + case MESA_SHADER_FRAGMENT: + if (key->has_per_samp) { + key->vsaturate_s = 0; + key->vsaturate_t = 0; + key->vsaturate_r = 0; + key->vastc_srgb = 0; + key->vsamples = 0; + } + break; + case MESA_SHADER_VERTEX: + key->color_two_side = false; + key->half_precision = false; + key->rasterflat = false; + if (key->has_per_samp) { + key->fsaturate_s = 0; + key->fsaturate_t = 0; + key->fsaturate_r = 0; + key->fastc_srgb = 0; + key->fsamples = 0; + } + break; + default: + /* TODO */ + break; + } + +} + +struct ir3_shader_variant { + struct fd_bo *bo; + + /* variant id (for debug) */ + uint32_t id; + + struct ir3_shader_key key; + + /* vertex shaders can have an extra version for hwbinning pass, + * which is pointed to by so->binning: + */ + bool binning_pass; + struct ir3_shader_variant *binning; + + struct ir3_driver_const_layout const_layout; + struct ir3_info info; + struct ir3 *ir; + + /* Levels of nesting of flow control: + */ + unsigned branchstack; + + /* the instructions length is in units of instruction groups + * (4 instructions for a3xx, 16 instructions for a4xx.. each + * instruction is 2 dwords): + */ + unsigned instrlen; + + /* the constants length is in units of vec4's, and is the sum of + * the uniforms and the built-in compiler constants + */ + unsigned constlen; + + /* number of uniforms (in vec4), not including built-in compiler + * constants, etc. + */ + unsigned num_uniforms; + + unsigned num_ubos; + + /* About Linkage: + * + Let the frag shader determine the position/compmask for the + * varyings, since it is the place where we know if the varying + * is actually used, and if so, which components are used. So + * what the hw calls "outloc" is taken from the "inloc" of the + * frag shader. + * + From the vert shader, we only need the output regid + */ + + bool frag_coord, frag_face, color0_mrt; + + /* NOTE: for input/outputs, slot is: + * gl_vert_attrib - for VS inputs + * gl_varying_slot - for VS output / FS input + * gl_frag_result - for FS output + */ + + /* varyings/outputs: */ + unsigned outputs_count; + struct { + uint8_t slot; + uint8_t regid; + } outputs[16 + 2]; /* +POSITION +PSIZE */ + bool writes_pos, writes_psize; + + /* attributes (VS) / varyings (FS): + * Note that sysval's should come *after* normal inputs. + */ + unsigned inputs_count; + struct { + uint8_t slot; + uint8_t regid; + uint8_t compmask; + uint8_t ncomp; + /* location of input (ie. offset passed to bary.f, etc). This + * matches the SP_VS_VPC_DST_REG.OUTLOCn value (a3xx and a4xx + * have the OUTLOCn value offset by 8, presumably to account + * for gl_Position/gl_PointSize) + */ + uint8_t inloc; + /* vertex shader specific: */ + bool sysval : 1; /* slot is a gl_system_value */ + /* fragment shader specific: */ + bool bary : 1; /* fetched varying (vs one loaded into reg) */ + bool rasterflat : 1; /* special handling for emit->rasterflat */ + enum glsl_interp_mode interpolate; + } inputs[16 + 2]; /* +POSITION +FACE */ + + /* sum of input components (scalar). For frag shaders, it only counts + * the varying inputs: + */ + unsigned total_in; + + /* For frag shaders, the total number of inputs (not scalar, + * ie. SP_VS_PARAM_REG.TOTALVSOUTVAR) + */ + unsigned varying_in; + + /* number of samplers/textures (which are currently 1:1): */ + int num_samp; + + /* do we have one or more SSBO instructions: */ + bool has_ssbo; + + /* do we have kill instructions: */ + bool has_kill; + + /* Layout of constant registers, each section (in vec4). Pointer size + * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the + * UBO and stream-out consts. + */ + struct { + /* user const start at zero */ + unsigned ubo; + /* NOTE that a3xx might need a section for SSBO addresses too */ + unsigned ssbo_sizes; + unsigned image_dims; + unsigned driver_param; + unsigned tfbo; + unsigned immediate; + } constbase; + + unsigned immediates_count; + unsigned immediates_size; + struct { + uint32_t val[4]; + } *immediates; + + /* for astc srgb workaround, the number/base of additional + * alpha tex states we need, and index of original tex states + */ + struct { + unsigned base, count; + unsigned orig_idx[16]; + } astc_srgb; + + /* shader variants form a linked list: */ + struct ir3_shader_variant *next; + + /* replicated here to avoid passing extra ptrs everywhere: */ + gl_shader_stage type; + struct ir3_shader *shader; +}; + +struct ir3_shader { + gl_shader_stage type; + + /* shader id (for debug): */ + uint32_t id; + uint32_t variant_count; + + /* so we know when we can disable TGSI related hacks: */ + bool from_tgsi; + + struct ir3_compiler *compiler; + + struct nir_shader *nir; + struct ir3_stream_output_info stream_output; + + struct ir3_shader_variant *variants; +}; + +void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id); +struct ir3_shader_variant * ir3_shader_get_variant(struct ir3_shader *shader, + struct ir3_shader_key *key, bool binning_pass, bool *created); +struct ir3_shader * ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir); +void ir3_shader_destroy(struct ir3_shader *shader); +void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out); +uint64_t ir3_shader_outputs(const struct ir3_shader *so); + +int +ir3_glsl_type_size(const struct glsl_type *type); + +static inline const char * +ir3_shader_stage(struct ir3_shader *shader) +{ + switch (shader->type) { + case MESA_SHADER_VERTEX: return "VERT"; + case MESA_SHADER_FRAGMENT: return "FRAG"; + case MESA_SHADER_COMPUTE: return "CL"; + default: + unreachable("invalid type"); + return NULL; + } +} + +/* + * Helper/util: + */ + +static inline int +ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot) +{ + int j; + + for (j = 0; j < so->outputs_count; j++) + if (so->outputs[j].slot == slot) + return j; + + /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n] + * in the vertex shader.. but the fragment shader doesn't know this + * so it will always have both IN.COLOR[n] and IN.BCOLOR[n]. So + * at link time if there is no matching OUT.BCOLOR[n], we must map + * OUT.COLOR[n] to IN.BCOLOR[n]. And visa versa if there is only + * a OUT.BCOLOR[n] but no matching OUT.COLOR[n] + */ + if (slot == VARYING_SLOT_BFC0) { + slot = VARYING_SLOT_COL0; + } else if (slot == VARYING_SLOT_BFC1) { + slot = VARYING_SLOT_COL1; + } else if (slot == VARYING_SLOT_COL0) { + slot = VARYING_SLOT_BFC0; + } else if (slot == VARYING_SLOT_COL1) { + slot = VARYING_SLOT_BFC1; + } else { + return 0; + } + + for (j = 0; j < so->outputs_count; j++) + if (so->outputs[j].slot == slot) + return j; + + debug_assert(0); + + return 0; +} + +static inline int +ir3_next_varying(const struct ir3_shader_variant *so, int i) +{ + while (++i < so->inputs_count) + if (so->inputs[i].compmask && so->inputs[i].bary) + break; + return i; +} + +struct ir3_shader_linkage { + uint8_t max_loc; + uint8_t cnt; + struct { + uint8_t regid; + uint8_t compmask; + uint8_t loc; + } var[32]; +}; + +static inline void +ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid, uint8_t compmask, uint8_t loc) +{ + int i = l->cnt++; + + debug_assert(i < ARRAY_SIZE(l->var)); + + l->var[i].regid = regid; + l->var[i].compmask = compmask; + l->var[i].loc = loc; + l->max_loc = MAX2(l->max_loc, loc + util_last_bit(compmask)); +} + +static inline void +ir3_link_shaders(struct ir3_shader_linkage *l, + const struct ir3_shader_variant *vs, + const struct ir3_shader_variant *fs) +{ + int j = -1, k; + + while (l->cnt < ARRAY_SIZE(l->var)) { + j = ir3_next_varying(fs, j); + + if (j >= fs->inputs_count) + break; + + if (fs->inputs[j].inloc >= fs->total_in) + continue; + + k = ir3_find_output(vs, fs->inputs[j].slot); + + ir3_link_add(l, vs->outputs[k].regid, + fs->inputs[j].compmask, fs->inputs[j].inloc); + } +} + +static inline uint32_t +ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot) +{ + int j; + for (j = 0; j < so->outputs_count; j++) + if (so->outputs[j].slot == slot) + return so->outputs[j].regid; + return regid(63, 0); +} + +static inline uint32_t +ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot) +{ + int j; + for (j = 0; j < so->inputs_count; j++) + if (so->inputs[j].sysval && (so->inputs[j].slot == slot)) + return so->inputs[j].regid; + return regid(63, 0); +} + +/* calculate register footprint in terms of half-regs (ie. one full + * reg counts as two half-regs). + */ +static inline uint32_t +ir3_shader_halfregs(const struct ir3_shader_variant *v) +{ + return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1); +} + +#endif /* IR3_SHADER_H_ */ diff -Nru mesa-18.3.3/src/freedreno/ir3/meson.build mesa-19.0.1/src/freedreno/ir3/meson.build --- mesa-18.3.3/src/freedreno/ir3/meson.build 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/ir3/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,66 @@ +# Copyright © 2018 Rob Clark + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +ir3_nir_trig_c = custom_target( + 'ir3_nir_trig.c', + input : 'ir3_nir_trig.py', + output : 'ir3_nir_trig.c', + command : [ + prog_python, '@INPUT@', + '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), + ], + capture : true, + depend_files : nir_algebraic_py, +) + +libfreedreno_ir3_files = files( + 'disasm-a3xx.c', + 'instr-a3xx.h', + 'ir3.c', + 'ir3_compiler_nir.c', + 'ir3_compiler.c', + 'ir3_compiler.h', + 'ir3_context.c', + 'ir3_context.h', + 'ir3_cp.c', + 'ir3_depth.c', + 'ir3_group.c', + 'ir3.h', + 'ir3_legalize.c', + 'ir3_nir.c', + 'ir3_nir.h', + 'ir3_nir_lower_tg4_to_tex.c', + 'ir3_print.c', + 'ir3_ra.c', + 'ir3_sched.c', + 'ir3_shader.c', + 'ir3_shader.h', +) + +libfreedreno_ir3 = static_library( + 'freedreno_ir3', + [libfreedreno_ir3_files, ir3_nir_trig_c], + include_directories : [inc_freedreno, inc_common], + c_args : [c_vis_args, no_override_init_args], + cpp_args : [cpp_vis_args], + dependencies : idep_nir_headers, + build_by_default : false, +) + diff -Nru mesa-18.3.3/src/freedreno/Makefile.am mesa-19.0.1/src/freedreno/Makefile.am --- mesa-18.3.3/src/freedreno/Makefile.am 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,74 @@ +# Copyright © 2016 Broadcom +# Copyright © 2016 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_builddir)/src \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/freedreno/ \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/include \ + $(DEFINES) + +AM_CFLAGS = \ + $(EXPAT_CFLAGS) + +include Makefile.sources + +lib_LTLIBRARIES = +check_LTLIBRARIES = +noinst_DATA = +noinst_HEADERS = $(registers_FILES) +noinst_LTLIBRARIES = +noinst_PROGRAMS = +check_PROGRAMS = +TESTS = +BUILT_SOURCES = +CLEANFILES = +EXTRA_DIST = \ + meson.build \ + drm/meson.build \ + ir3/ir3_nir_trig.py \ + ir3/meson.build + +MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) +PYTHON_GEN = $(AM_V_GEN)$(PYTHON) $(PYTHON_FLAGS) + +CLEANFILES += $(BUILT_SOURCES) + +noinst_LTLIBRARIES += libfreedreno_drm.la + +libfreedreno_drm_la_SOURCES = $(drm_SOURCES) +libfreedreno_drm_la_CFLAGS = $(VALGRIND_CFLAGS) $(LIBDRM_CFLAGS) + +noinst_LTLIBRARIES += libfreedreno_ir3.la + +libfreedreno_ir3_la_SOURCES = $(ir3_SOURCES) $(ir3_GENERATED_FILES) +libfreedreno_ir3_la_CFLAGS = \ + -I$(top_srcdir)/src/freedreno/ir3 \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_srcdir)/src/compiler/nir + +ir3/ir3_nir_trig.c: ir3/ir3_nir_trig.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py + $(MKDIR_GEN) + $(AM_V_GEN) $(PYTHON) $(PYTHON_FLAGS) $(srcdir)/ir3/ir3_nir_trig.py -p $(top_srcdir)/src/compiler/nir > $@ || ($(RM) $@; false) + diff -Nru mesa-18.3.3/src/freedreno/Makefile.sources mesa-19.0.1/src/freedreno/Makefile.sources --- mesa-18.3.3/src/freedreno/Makefile.sources 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,51 @@ +drm_SOURCES := \ + drm/freedreno_bo.c \ + drm/freedreno_drmif.h \ + drm/freedreno_ringbuffer.c \ + drm/msm_bo.c \ + drm/msm_pipe.c \ + drm/msm_ringbuffer_sp.c \ + drm/freedreno_bo_cache.c \ + drm/freedreno_pipe.c \ + drm/freedreno_ringbuffer.h \ + drm/msm_device.c \ + drm/msm_priv.h \ + drm/freedreno_device.c \ + drm/freedreno_priv.h \ + drm/msm_drm.h \ + drm/msm_ringbuffer.c + +ir3_SOURCES := \ + ir3/disasm-a3xx.c \ + ir3/instr-a3xx.h \ + ir3/ir3.c \ + ir3/ir3_compiler.c \ + ir3/ir3_compiler.h \ + ir3/ir3_compiler_nir.c \ + ir3/ir3_context.c \ + ir3/ir3_context.h \ + ir3/ir3_cp.c \ + ir3/ir3_depth.c \ + ir3/ir3_group.c \ + ir3/ir3.h \ + ir3/ir3_legalize.c \ + ir3/ir3_nir.c \ + ir3/ir3_nir.h \ + ir3/ir3_nir_lower_tg4_to_tex.c \ + ir3/ir3_print.c \ + ir3/ir3_ra.c \ + ir3/ir3_sched.c \ + ir3/ir3_shader.c \ + ir3/ir3_shader.h + +ir3_GENERATED_FILES := \ + ir3/ir3_nir_trig.c + +registers_FILES := \ + registers/a2xx.xml.h \ + registers/a3xx.xml.h \ + registers/a4xx.xml.h \ + registers/a5xx.xml.h \ + registers/a6xx.xml.h \ + registers/adreno_common.xml.h \ + registers/adreno_pm4.xml.h diff -Nru mesa-18.3.3/src/freedreno/meson.build mesa-19.0.1/src/freedreno/meson.build --- mesa-18.3.3/src/freedreno/meson.build 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,24 @@ +# Copyright © 2018 Rob Clark + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +inc_freedreno = include_directories(['.', './registers']) + +subdir('drm') +subdir('ir3') diff -Nru mesa-18.3.3/src/freedreno/registers/a2xx.xml.h mesa-19.0.1/src/freedreno/registers/a2xx.xml.h --- mesa-18.3.3/src/freedreno/registers/a2xx.xml.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/registers/a2xx.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,3010 @@ +#ifndef A2XX_XML +#define A2XX_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- ./rnndb/adreno/a2xx.xml ( 79608 bytes, from 2018-12-21 03:07:09) +- ./rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-09-02 13:35:19) +- ./rnndb/adreno/adreno_common.xml ( 14201 bytes, from 2018-09-07 18:12:21) +- ./rnndb/adreno/adreno_pm4.xml ( 42626 bytes, from 2018-09-17 18:20:14) + +Copyright (C) 2013-2018 by the following authors: +- Rob Clark (robclark) +- Ilia Mirkin (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum a2xx_rb_dither_type { + DITHER_PIXEL = 0, + DITHER_SUBPIXEL = 1, +}; + +enum a2xx_colorformatx { + COLORX_4_4_4_4 = 0, + COLORX_1_5_5_5 = 1, + COLORX_5_6_5 = 2, + COLORX_8 = 3, + COLORX_8_8 = 4, + COLORX_8_8_8_8 = 5, + COLORX_S8_8_8_8 = 6, + COLORX_16_FLOAT = 7, + COLORX_16_16_FLOAT = 8, + COLORX_16_16_16_16_FLOAT = 9, + COLORX_32_FLOAT = 10, + COLORX_32_32_FLOAT = 11, + COLORX_32_32_32_32_FLOAT = 12, + COLORX_2_3_3 = 13, + COLORX_8_8_8 = 14, +}; + +enum a2xx_sq_surfaceformat { + FMT_1_REVERSE = 0, + FMT_1 = 1, + FMT_8 = 2, + FMT_1_5_5_5 = 3, + FMT_5_6_5 = 4, + FMT_6_5_5 = 5, + FMT_8_8_8_8 = 6, + FMT_2_10_10_10 = 7, + FMT_8_A = 8, + FMT_8_B = 9, + FMT_8_8 = 10, + FMT_Cr_Y1_Cb_Y0 = 11, + FMT_Y1_Cr_Y0_Cb = 12, + FMT_5_5_5_1 = 13, + FMT_8_8_8_8_A = 14, + FMT_4_4_4_4 = 15, + FMT_8_8_8 = 16, + FMT_DXT1 = 18, + FMT_DXT2_3 = 19, + FMT_DXT4_5 = 20, + FMT_10_10_10_2 = 21, + FMT_24_8 = 22, + FMT_16 = 24, + FMT_16_16 = 25, + FMT_16_16_16_16 = 26, + FMT_16_EXPAND = 27, + FMT_16_16_EXPAND = 28, + FMT_16_16_16_16_EXPAND = 29, + FMT_16_FLOAT = 30, + FMT_16_16_FLOAT = 31, + FMT_16_16_16_16_FLOAT = 32, + FMT_32 = 33, + FMT_32_32 = 34, + FMT_32_32_32_32 = 35, + FMT_32_FLOAT = 36, + FMT_32_32_FLOAT = 37, + FMT_32_32_32_32_FLOAT = 38, + FMT_ATI_TC_RGB = 39, + FMT_ATI_TC_RGBA = 40, + FMT_ATI_TC_555_565_RGB = 41, + FMT_ATI_TC_555_565_RGBA = 42, + FMT_ATI_TC_RGBA_INTERP = 43, + FMT_ATI_TC_555_565_RGBA_INTERP = 44, + FMT_ETC1_RGBA_INTERP = 46, + FMT_ETC1_RGB = 47, + FMT_ETC1_RGBA = 48, + FMT_DXN = 49, + FMT_2_3_3 = 51, + FMT_2_10_10_10_AS_16_16_16_16 = 54, + FMT_10_10_10_2_AS_16_16_16_16 = 55, + FMT_32_32_32_FLOAT = 57, + FMT_DXT3A = 58, + FMT_DXT5A = 59, + FMT_CTX1 = 60, +}; + +enum a2xx_sq_ps_vtx_mode { + POSITION_1_VECTOR = 0, + POSITION_2_VECTORS_UNUSED = 1, + POSITION_2_VECTORS_SPRITE = 2, + POSITION_2_VECTORS_EDGE = 3, + POSITION_2_VECTORS_KILL = 4, + POSITION_2_VECTORS_SPRITE_KILL = 5, + POSITION_2_VECTORS_EDGE_KILL = 6, + MULTIPASS = 7, +}; + +enum a2xx_sq_sample_cntl { + CENTROIDS_ONLY = 0, + CENTERS_ONLY = 1, + CENTROIDS_AND_CENTERS = 2, +}; + +enum a2xx_dx_clip_space { + DXCLIP_OPENGL = 0, + DXCLIP_DIRECTX = 1, +}; + +enum a2xx_pa_su_sc_polymode { + POLY_DISABLED = 0, + POLY_DUALMODE = 1, +}; + +enum a2xx_rb_edram_mode { + EDRAM_NOP = 0, + COLOR_DEPTH = 4, + DEPTH_ONLY = 5, + EDRAM_COPY = 6, +}; + +enum a2xx_pa_sc_pattern_bit_order { + LITTLE = 0, + BIG = 1, +}; + +enum a2xx_pa_sc_auto_reset_cntl { + NEVER = 0, + EACH_PRIMITIVE = 1, + EACH_PACKET = 2, +}; + +enum a2xx_pa_pixcenter { + PIXCENTER_D3D = 0, + PIXCENTER_OGL = 1, +}; + +enum a2xx_pa_roundmode { + TRUNCATE = 0, + ROUND = 1, + ROUNDTOEVEN = 2, + ROUNDTOODD = 3, +}; + +enum a2xx_pa_quantmode { + ONE_SIXTEENTH = 0, + ONE_EIGTH = 1, + ONE_QUARTER = 2, + ONE_HALF = 3, + ONE = 4, +}; + +enum a2xx_rb_copy_sample_select { + SAMPLE_0 = 0, + SAMPLE_1 = 1, + SAMPLE_2 = 2, + SAMPLE_3 = 3, + SAMPLE_01 = 4, + SAMPLE_23 = 5, + SAMPLE_0123 = 6, +}; + +enum a2xx_rb_blend_opcode { + BLEND2_DST_PLUS_SRC = 0, + BLEND2_SRC_MINUS_DST = 1, + BLEND2_MIN_DST_SRC = 2, + BLEND2_MAX_DST_SRC = 3, + BLEND2_DST_MINUS_SRC = 4, + BLEND2_DST_PLUS_SRC_BIAS = 5, +}; + +enum a2xx_su_perfcnt_select { + PERF_PAPC_PASX_REQ = 0, + PERF_PAPC_PASX_FIRST_VECTOR = 2, + PERF_PAPC_PASX_SECOND_VECTOR = 3, + PERF_PAPC_PASX_FIRST_DEAD = 4, + PERF_PAPC_PASX_SECOND_DEAD = 5, + PERF_PAPC_PASX_VTX_KILL_DISCARD = 6, + PERF_PAPC_PASX_VTX_NAN_DISCARD = 7, + PERF_PAPC_PA_INPUT_PRIM = 8, + PERF_PAPC_PA_INPUT_NULL_PRIM = 9, + PERF_PAPC_PA_INPUT_EVENT_FLAG = 10, + PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT = 11, + PERF_PAPC_PA_INPUT_END_OF_PACKET = 12, + PERF_PAPC_CLPR_CULL_PRIM = 13, + PERF_PAPC_CLPR_VV_CULL_PRIM = 15, + PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM = 17, + PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM = 18, + PERF_PAPC_CLPR_CULL_TO_NULL_PRIM = 19, + PERF_PAPC_CLPR_VV_CLIP_PRIM = 21, + PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE = 23, + PERF_PAPC_CLPR_CLIP_PLANE_CNT_1 = 24, + PERF_PAPC_CLPR_CLIP_PLANE_CNT_2 = 25, + PERF_PAPC_CLPR_CLIP_PLANE_CNT_3 = 26, + PERF_PAPC_CLPR_CLIP_PLANE_CNT_4 = 27, + PERF_PAPC_CLPR_CLIP_PLANE_CNT_5 = 28, + PERF_PAPC_CLPR_CLIP_PLANE_CNT_6 = 29, + PERF_PAPC_CLPR_CLIP_PLANE_NEAR = 30, + PERF_PAPC_CLPR_CLIP_PLANE_FAR = 31, + PERF_PAPC_CLPR_CLIP_PLANE_LEFT = 32, + PERF_PAPC_CLPR_CLIP_PLANE_RIGHT = 33, + PERF_PAPC_CLPR_CLIP_PLANE_TOP = 34, + PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM = 35, + PERF_PAPC_CLSM_NULL_PRIM = 36, + PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM = 37, + PERF_PAPC_CLSM_CLIP_PRIM = 38, + PERF_PAPC_CLSM_CULL_TO_NULL_PRIM = 39, + PERF_PAPC_CLSM_OUT_PRIM_CNT_1 = 40, + PERF_PAPC_CLSM_OUT_PRIM_CNT_2 = 41, + PERF_PAPC_CLSM_OUT_PRIM_CNT_3 = 42, + PERF_PAPC_CLSM_OUT_PRIM_CNT_4 = 43, + PERF_PAPC_CLSM_OUT_PRIM_CNT_5 = 44, + PERF_PAPC_CLSM_OUT_PRIM_CNT_6_7 = 45, + PERF_PAPC_CLSM_NON_TRIVIAL_CULL = 46, + PERF_PAPC_SU_INPUT_PRIM = 47, + PERF_PAPC_SU_INPUT_CLIP_PRIM = 48, + PERF_PAPC_SU_INPUT_NULL_PRIM = 49, + PERF_PAPC_SU_ZERO_AREA_CULL_PRIM = 50, + PERF_PAPC_SU_BACK_FACE_CULL_PRIM = 51, + PERF_PAPC_SU_FRONT_FACE_CULL_PRIM = 52, + PERF_PAPC_SU_POLYMODE_FACE_CULL = 53, + PERF_PAPC_SU_POLYMODE_BACK_CULL = 54, + PERF_PAPC_SU_POLYMODE_FRONT_CULL = 55, + PERF_PAPC_SU_POLYMODE_INVALID_FILL = 56, + PERF_PAPC_SU_OUTPUT_PRIM = 57, + PERF_PAPC_SU_OUTPUT_CLIP_PRIM = 58, + PERF_PAPC_SU_OUTPUT_NULL_PRIM = 59, + PERF_PAPC_SU_OUTPUT_EVENT_FLAG = 60, + PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT = 61, + PERF_PAPC_SU_OUTPUT_END_OF_PACKET = 62, + PERF_PAPC_SU_OUTPUT_POLYMODE_FACE = 63, + PERF_PAPC_SU_OUTPUT_POLYMODE_BACK = 64, + PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT = 65, + PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE = 66, + PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK = 67, + PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT = 68, + PERF_PAPC_PASX_REQ_IDLE = 69, + PERF_PAPC_PASX_REQ_BUSY = 70, + PERF_PAPC_PASX_REQ_STALLED = 71, + PERF_PAPC_PASX_REC_IDLE = 72, + PERF_PAPC_PASX_REC_BUSY = 73, + PERF_PAPC_PASX_REC_STARVED_SX = 74, + PERF_PAPC_PASX_REC_STALLED = 75, + PERF_PAPC_PASX_REC_STALLED_POS_MEM = 76, + PERF_PAPC_PASX_REC_STALLED_CCGSM_IN = 77, + PERF_PAPC_CCGSM_IDLE = 78, + PERF_PAPC_CCGSM_BUSY = 79, + PERF_PAPC_CCGSM_STALLED = 80, + PERF_PAPC_CLPRIM_IDLE = 81, + PERF_PAPC_CLPRIM_BUSY = 82, + PERF_PAPC_CLPRIM_STALLED = 83, + PERF_PAPC_CLPRIM_STARVED_CCGSM = 84, + PERF_PAPC_CLIPSM_IDLE = 85, + PERF_PAPC_CLIPSM_BUSY = 86, + PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH = 87, + PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ = 88, + PERF_PAPC_CLIPSM_WAIT_CLIPGA = 89, + PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP = 90, + PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM = 91, + PERF_PAPC_CLIPGA_IDLE = 92, + PERF_PAPC_CLIPGA_BUSY = 93, + PERF_PAPC_CLIPGA_STARVED_VTE_CLIP = 94, + PERF_PAPC_CLIPGA_STALLED = 95, + PERF_PAPC_CLIP_IDLE = 96, + PERF_PAPC_CLIP_BUSY = 97, + PERF_PAPC_SU_IDLE = 98, + PERF_PAPC_SU_BUSY = 99, + PERF_PAPC_SU_STARVED_CLIP = 100, + PERF_PAPC_SU_STALLED_SC = 101, + PERF_PAPC_SU_FACENESS_CULL = 102, +}; + +enum a2xx_sc_perfcnt_select { + SC_SR_WINDOW_VALID = 0, + SC_CW_WINDOW_VALID = 1, + SC_QM_WINDOW_VALID = 2, + SC_FW_WINDOW_VALID = 3, + SC_EZ_WINDOW_VALID = 4, + SC_IT_WINDOW_VALID = 5, + SC_STARVED_BY_PA = 6, + SC_STALLED_BY_RB_TILE = 7, + SC_STALLED_BY_RB_SAMP = 8, + SC_STARVED_BY_RB_EZ = 9, + SC_STALLED_BY_SAMPLE_FF = 10, + SC_STALLED_BY_SQ = 11, + SC_STALLED_BY_SP = 12, + SC_TOTAL_NO_PRIMS = 13, + SC_NON_EMPTY_PRIMS = 14, + SC_NO_TILES_PASSING_QM = 15, + SC_NO_PIXELS_PRE_EZ = 16, + SC_NO_PIXELS_POST_EZ = 17, +}; + +enum a2xx_vgt_perfcount_select { + VGT_SQ_EVENT_WINDOW_ACTIVE = 0, + VGT_SQ_SEND = 1, + VGT_SQ_STALLED = 2, + VGT_SQ_STARVED_BUSY = 3, + VGT_SQ_STARVED_IDLE = 4, + VGT_SQ_STATIC = 5, + VGT_PA_EVENT_WINDOW_ACTIVE = 6, + VGT_PA_CLIP_V_SEND = 7, + VGT_PA_CLIP_V_STALLED = 8, + VGT_PA_CLIP_V_STARVED_BUSY = 9, + VGT_PA_CLIP_V_STARVED_IDLE = 10, + VGT_PA_CLIP_V_STATIC = 11, + VGT_PA_CLIP_P_SEND = 12, + VGT_PA_CLIP_P_STALLED = 13, + VGT_PA_CLIP_P_STARVED_BUSY = 14, + VGT_PA_CLIP_P_STARVED_IDLE = 15, + VGT_PA_CLIP_P_STATIC = 16, + VGT_PA_CLIP_S_SEND = 17, + VGT_PA_CLIP_S_STALLED = 18, + VGT_PA_CLIP_S_STARVED_BUSY = 19, + VGT_PA_CLIP_S_STARVED_IDLE = 20, + VGT_PA_CLIP_S_STATIC = 21, + RBIU_FIFOS_EVENT_WINDOW_ACTIVE = 22, + RBIU_IMMED_DATA_FIFO_STARVED = 23, + RBIU_IMMED_DATA_FIFO_STALLED = 24, + RBIU_DMA_REQUEST_FIFO_STARVED = 25, + RBIU_DMA_REQUEST_FIFO_STALLED = 26, + RBIU_DRAW_INITIATOR_FIFO_STARVED = 27, + RBIU_DRAW_INITIATOR_FIFO_STALLED = 28, + BIN_PRIM_NEAR_CULL = 29, + BIN_PRIM_ZERO_CULL = 30, + BIN_PRIM_FAR_CULL = 31, + BIN_PRIM_BIN_CULL = 32, + BIN_PRIM_FACE_CULL = 33, + SPARE34 = 34, + SPARE35 = 35, + SPARE36 = 36, + SPARE37 = 37, + SPARE38 = 38, + SPARE39 = 39, + TE_SU_IN_VALID = 40, + TE_SU_IN_READ = 41, + TE_SU_IN_PRIM = 42, + TE_SU_IN_EOP = 43, + TE_SU_IN_NULL_PRIM = 44, + TE_WK_IN_VALID = 45, + TE_WK_IN_READ = 46, + TE_OUT_PRIM_VALID = 47, + TE_OUT_PRIM_READ = 48, +}; + +enum a2xx_tcr_perfcount_select { + DGMMPD_IPMUX0_STALL = 0, + DGMMPD_IPMUX_ALL_STALL = 4, + OPMUX0_L2_WRITES = 5, +}; + +enum a2xx_tp_perfcount_select { + POINT_QUADS = 0, + BILIN_QUADS = 1, + ANISO_QUADS = 2, + MIP_QUADS = 3, + VOL_QUADS = 4, + MIP_VOL_QUADS = 5, + MIP_ANISO_QUADS = 6, + VOL_ANISO_QUADS = 7, + ANISO_2_1_QUADS = 8, + ANISO_4_1_QUADS = 9, + ANISO_6_1_QUADS = 10, + ANISO_8_1_QUADS = 11, + ANISO_10_1_QUADS = 12, + ANISO_12_1_QUADS = 13, + ANISO_14_1_QUADS = 14, + ANISO_16_1_QUADS = 15, + MIP_VOL_ANISO_QUADS = 16, + ALIGN_2_QUADS = 17, + ALIGN_4_QUADS = 18, + PIX_0_QUAD = 19, + PIX_1_QUAD = 20, + PIX_2_QUAD = 21, + PIX_3_QUAD = 22, + PIX_4_QUAD = 23, + TP_MIPMAP_LOD0 = 24, + TP_MIPMAP_LOD1 = 25, + TP_MIPMAP_LOD2 = 26, + TP_MIPMAP_LOD3 = 27, + TP_MIPMAP_LOD4 = 28, + TP_MIPMAP_LOD5 = 29, + TP_MIPMAP_LOD6 = 30, + TP_MIPMAP_LOD7 = 31, + TP_MIPMAP_LOD8 = 32, + TP_MIPMAP_LOD9 = 33, + TP_MIPMAP_LOD10 = 34, + TP_MIPMAP_LOD11 = 35, + TP_MIPMAP_LOD12 = 36, + TP_MIPMAP_LOD13 = 37, + TP_MIPMAP_LOD14 = 38, +}; + +enum a2xx_tcm_perfcount_select { + QUAD0_RD_LAT_FIFO_EMPTY = 0, + QUAD0_RD_LAT_FIFO_4TH_FULL = 3, + QUAD0_RD_LAT_FIFO_HALF_FULL = 4, + QUAD0_RD_LAT_FIFO_FULL = 5, + QUAD0_RD_LAT_FIFO_LT_4TH_FULL = 6, + READ_STARVED_QUAD0 = 28, + READ_STARVED = 32, + READ_STALLED_QUAD0 = 33, + READ_STALLED = 37, + VALID_READ_QUAD0 = 38, + TC_TP_STARVED_QUAD0 = 42, + TC_TP_STARVED = 46, +}; + +enum a2xx_tcf_perfcount_select { + VALID_CYCLES = 0, + SINGLE_PHASES = 1, + ANISO_PHASES = 2, + MIP_PHASES = 3, + VOL_PHASES = 4, + MIP_VOL_PHASES = 5, + MIP_ANISO_PHASES = 6, + VOL_ANISO_PHASES = 7, + ANISO_2_1_PHASES = 8, + ANISO_4_1_PHASES = 9, + ANISO_6_1_PHASES = 10, + ANISO_8_1_PHASES = 11, + ANISO_10_1_PHASES = 12, + ANISO_12_1_PHASES = 13, + ANISO_14_1_PHASES = 14, + ANISO_16_1_PHASES = 15, + MIP_VOL_ANISO_PHASES = 16, + ALIGN_2_PHASES = 17, + ALIGN_4_PHASES = 18, + TPC_BUSY = 19, + TPC_STALLED = 20, + TPC_STARVED = 21, + TPC_WORKING = 22, + TPC_WALKER_BUSY = 23, + TPC_WALKER_STALLED = 24, + TPC_WALKER_WORKING = 25, + TPC_ALIGNER_BUSY = 26, + TPC_ALIGNER_STALLED = 27, + TPC_ALIGNER_STALLED_BY_BLEND = 28, + TPC_ALIGNER_STALLED_BY_CACHE = 29, + TPC_ALIGNER_WORKING = 30, + TPC_BLEND_BUSY = 31, + TPC_BLEND_SYNC = 32, + TPC_BLEND_STARVED = 33, + TPC_BLEND_WORKING = 34, + OPCODE_0x00 = 35, + OPCODE_0x01 = 36, + OPCODE_0x04 = 37, + OPCODE_0x10 = 38, + OPCODE_0x11 = 39, + OPCODE_0x12 = 40, + OPCODE_0x13 = 41, + OPCODE_0x18 = 42, + OPCODE_0x19 = 43, + OPCODE_0x1A = 44, + OPCODE_OTHER = 45, + IN_FIFO_0_EMPTY = 56, + IN_FIFO_0_LT_HALF_FULL = 57, + IN_FIFO_0_HALF_FULL = 58, + IN_FIFO_0_FULL = 59, + IN_FIFO_TPC_EMPTY = 72, + IN_FIFO_TPC_LT_HALF_FULL = 73, + IN_FIFO_TPC_HALF_FULL = 74, + IN_FIFO_TPC_FULL = 75, + TPC_TC_XFC = 76, + TPC_TC_STATE = 77, + TC_STALL = 78, + QUAD0_TAPS = 79, + QUADS = 83, + TCA_SYNC_STALL = 84, + TAG_STALL = 85, + TCB_SYNC_STALL = 88, + TCA_VALID = 89, + PROBES_VALID = 90, + MISS_STALL = 91, + FETCH_FIFO_STALL = 92, + TCO_STALL = 93, + ANY_STALL = 94, + TAG_MISSES = 95, + TAG_HITS = 96, + SUB_TAG_MISSES = 97, + SET0_INVALIDATES = 98, + SET1_INVALIDATES = 99, + SET2_INVALIDATES = 100, + SET3_INVALIDATES = 101, + SET0_TAG_MISSES = 102, + SET1_TAG_MISSES = 103, + SET2_TAG_MISSES = 104, + SET3_TAG_MISSES = 105, + SET0_TAG_HITS = 106, + SET1_TAG_HITS = 107, + SET2_TAG_HITS = 108, + SET3_TAG_HITS = 109, + SET0_SUB_TAG_MISSES = 110, + SET1_SUB_TAG_MISSES = 111, + SET2_SUB_TAG_MISSES = 112, + SET3_SUB_TAG_MISSES = 113, + SET0_EVICT1 = 114, + SET0_EVICT2 = 115, + SET0_EVICT3 = 116, + SET0_EVICT4 = 117, + SET0_EVICT5 = 118, + SET0_EVICT6 = 119, + SET0_EVICT7 = 120, + SET0_EVICT8 = 121, + SET1_EVICT1 = 130, + SET1_EVICT2 = 131, + SET1_EVICT3 = 132, + SET1_EVICT4 = 133, + SET1_EVICT5 = 134, + SET1_EVICT6 = 135, + SET1_EVICT7 = 136, + SET1_EVICT8 = 137, + SET2_EVICT1 = 146, + SET2_EVICT2 = 147, + SET2_EVICT3 = 148, + SET2_EVICT4 = 149, + SET2_EVICT5 = 150, + SET2_EVICT6 = 151, + SET2_EVICT7 = 152, + SET2_EVICT8 = 153, + SET3_EVICT1 = 162, + SET3_EVICT2 = 163, + SET3_EVICT3 = 164, + SET3_EVICT4 = 165, + SET3_EVICT5 = 166, + SET3_EVICT6 = 167, + SET3_EVICT7 = 168, + SET3_EVICT8 = 169, + FF_EMPTY = 178, + FF_LT_HALF_FULL = 179, + FF_HALF_FULL = 180, + FF_FULL = 181, + FF_XFC = 182, + FF_STALLED = 183, + FG_MASKS = 184, + FG_LEFT_MASKS = 185, + FG_LEFT_MASK_STALLED = 186, + FG_LEFT_NOT_DONE_STALL = 187, + FG_LEFT_FG_STALL = 188, + FG_LEFT_SECTORS = 189, + FG0_REQUESTS = 195, + FG0_STALLED = 196, + MEM_REQ512 = 199, + MEM_REQ_SENT = 200, + MEM_LOCAL_READ_REQ = 202, + TC0_MH_STALLED = 203, +}; + +enum a2xx_sq_perfcnt_select { + SQ_PIXEL_VECTORS_SUB = 0, + SQ_VERTEX_VECTORS_SUB = 1, + SQ_ALU0_ACTIVE_VTX_SIMD0 = 2, + SQ_ALU1_ACTIVE_VTX_SIMD0 = 3, + SQ_ALU0_ACTIVE_PIX_SIMD0 = 4, + SQ_ALU1_ACTIVE_PIX_SIMD0 = 5, + SQ_ALU0_ACTIVE_VTX_SIMD1 = 6, + SQ_ALU1_ACTIVE_VTX_SIMD1 = 7, + SQ_ALU0_ACTIVE_PIX_SIMD1 = 8, + SQ_ALU1_ACTIVE_PIX_SIMD1 = 9, + SQ_EXPORT_CYCLES = 10, + SQ_ALU_CST_WRITTEN = 11, + SQ_TEX_CST_WRITTEN = 12, + SQ_ALU_CST_STALL = 13, + SQ_ALU_TEX_STALL = 14, + SQ_INST_WRITTEN = 15, + SQ_BOOLEAN_WRITTEN = 16, + SQ_LOOPS_WRITTEN = 17, + SQ_PIXEL_SWAP_IN = 18, + SQ_PIXEL_SWAP_OUT = 19, + SQ_VERTEX_SWAP_IN = 20, + SQ_VERTEX_SWAP_OUT = 21, + SQ_ALU_VTX_INST_ISSUED = 22, + SQ_TEX_VTX_INST_ISSUED = 23, + SQ_VC_VTX_INST_ISSUED = 24, + SQ_CF_VTX_INST_ISSUED = 25, + SQ_ALU_PIX_INST_ISSUED = 26, + SQ_TEX_PIX_INST_ISSUED = 27, + SQ_VC_PIX_INST_ISSUED = 28, + SQ_CF_PIX_INST_ISSUED = 29, + SQ_ALU0_FIFO_EMPTY_SIMD0 = 30, + SQ_ALU1_FIFO_EMPTY_SIMD0 = 31, + SQ_ALU0_FIFO_EMPTY_SIMD1 = 32, + SQ_ALU1_FIFO_EMPTY_SIMD1 = 33, + SQ_ALU_NOPS = 34, + SQ_PRED_SKIP = 35, + SQ_SYNC_ALU_STALL_SIMD0_VTX = 36, + SQ_SYNC_ALU_STALL_SIMD1_VTX = 37, + SQ_SYNC_TEX_STALL_VTX = 38, + SQ_SYNC_VC_STALL_VTX = 39, + SQ_CONSTANTS_USED_SIMD0 = 40, + SQ_CONSTANTS_SENT_SP_SIMD0 = 41, + SQ_GPR_STALL_VTX = 42, + SQ_GPR_STALL_PIX = 43, + SQ_VTX_RS_STALL = 44, + SQ_PIX_RS_STALL = 45, + SQ_SX_PC_FULL = 46, + SQ_SX_EXP_BUFF_FULL = 47, + SQ_SX_POS_BUFF_FULL = 48, + SQ_INTERP_QUADS = 49, + SQ_INTERP_ACTIVE = 50, + SQ_IN_PIXEL_STALL = 51, + SQ_IN_VTX_STALL = 52, + SQ_VTX_CNT = 53, + SQ_VTX_VECTOR2 = 54, + SQ_VTX_VECTOR3 = 55, + SQ_VTX_VECTOR4 = 56, + SQ_PIXEL_VECTOR1 = 57, + SQ_PIXEL_VECTOR23 = 58, + SQ_PIXEL_VECTOR4 = 59, + SQ_CONSTANTS_USED_SIMD1 = 60, + SQ_CONSTANTS_SENT_SP_SIMD1 = 61, + SQ_SX_MEM_EXP_FULL = 62, + SQ_ALU0_ACTIVE_VTX_SIMD2 = 63, + SQ_ALU1_ACTIVE_VTX_SIMD2 = 64, + SQ_ALU0_ACTIVE_PIX_SIMD2 = 65, + SQ_ALU1_ACTIVE_PIX_SIMD2 = 66, + SQ_ALU0_ACTIVE_VTX_SIMD3 = 67, + SQ_PERFCOUNT_VTX_QUAL_TP_DONE = 68, + SQ_ALU0_ACTIVE_PIX_SIMD3 = 69, + SQ_PERFCOUNT_PIX_QUAL_TP_DONE = 70, + SQ_ALU0_FIFO_EMPTY_SIMD2 = 71, + SQ_ALU1_FIFO_EMPTY_SIMD2 = 72, + SQ_ALU0_FIFO_EMPTY_SIMD3 = 73, + SQ_ALU1_FIFO_EMPTY_SIMD3 = 74, + SQ_SYNC_ALU_STALL_SIMD2_VTX = 75, + SQ_PERFCOUNT_VTX_POP_THREAD = 76, + SQ_SYNC_ALU_STALL_SIMD0_PIX = 77, + SQ_SYNC_ALU_STALL_SIMD1_PIX = 78, + SQ_SYNC_ALU_STALL_SIMD2_PIX = 79, + SQ_PERFCOUNT_PIX_POP_THREAD = 80, + SQ_SYNC_TEX_STALL_PIX = 81, + SQ_SYNC_VC_STALL_PIX = 82, + SQ_CONSTANTS_USED_SIMD2 = 83, + SQ_CONSTANTS_SENT_SP_SIMD2 = 84, + SQ_PERFCOUNT_VTX_DEALLOC_ACK = 85, + SQ_PERFCOUNT_PIX_DEALLOC_ACK = 86, + SQ_ALU0_FIFO_FULL_SIMD0 = 87, + SQ_ALU1_FIFO_FULL_SIMD0 = 88, + SQ_ALU0_FIFO_FULL_SIMD1 = 89, + SQ_ALU1_FIFO_FULL_SIMD1 = 90, + SQ_ALU0_FIFO_FULL_SIMD2 = 91, + SQ_ALU1_FIFO_FULL_SIMD2 = 92, + SQ_ALU0_FIFO_FULL_SIMD3 = 93, + SQ_ALU1_FIFO_FULL_SIMD3 = 94, + VC_PERF_STATIC = 95, + VC_PERF_STALLED = 96, + VC_PERF_STARVED = 97, + VC_PERF_SEND = 98, + VC_PERF_ACTUAL_STARVED = 99, + PIXEL_THREAD_0_ACTIVE = 100, + VERTEX_THREAD_0_ACTIVE = 101, + PIXEL_THREAD_0_NUMBER = 102, + VERTEX_THREAD_0_NUMBER = 103, + VERTEX_EVENT_NUMBER = 104, + PIXEL_EVENT_NUMBER = 105, + PTRBUFF_EF_PUSH = 106, + PTRBUFF_EF_POP_EVENT = 107, + PTRBUFF_EF_POP_NEW_VTX = 108, + PTRBUFF_EF_POP_DEALLOC = 109, + PTRBUFF_EF_POP_PVECTOR = 110, + PTRBUFF_EF_POP_PVECTOR_X = 111, + PTRBUFF_EF_POP_PVECTOR_VNZ = 112, + PTRBUFF_PB_DEALLOC = 113, + PTRBUFF_PI_STATE_PPB_POP = 114, + PTRBUFF_PI_RTR = 115, + PTRBUFF_PI_READ_EN = 116, + PTRBUFF_PI_BUFF_SWAP = 117, + PTRBUFF_SQ_FREE_BUFF = 118, + PTRBUFF_SQ_DEC = 119, + PTRBUFF_SC_VALID_CNTL_EVENT = 120, + PTRBUFF_SC_VALID_IJ_XFER = 121, + PTRBUFF_SC_NEW_VECTOR_1_Q = 122, + PTRBUFF_QUAL_NEW_VECTOR = 123, + PTRBUFF_QUAL_EVENT = 124, + PTRBUFF_END_BUFFER = 125, + PTRBUFF_FILL_QUAD = 126, + VERTS_WRITTEN_SPI = 127, + TP_FETCH_INSTR_EXEC = 128, + TP_FETCH_INSTR_REQ = 129, + TP_DATA_RETURN = 130, + SPI_WRITE_CYCLES_SP = 131, + SPI_WRITES_SP = 132, + SP_ALU_INSTR_EXEC = 133, + SP_CONST_ADDR_TO_SQ = 134, + SP_PRED_KILLS_TO_SQ = 135, + SP_EXPORT_CYCLES_TO_SX = 136, + SP_EXPORTS_TO_SX = 137, + SQ_CYCLES_ELAPSED = 138, + SQ_TCFS_OPT_ALLOC_EXEC = 139, + SQ_TCFS_NO_OPT_ALLOC = 140, + SQ_ALU0_NO_OPT_ALLOC = 141, + SQ_ALU1_NO_OPT_ALLOC = 142, + SQ_TCFS_ARB_XFC_CNT = 143, + SQ_ALU0_ARB_XFC_CNT = 144, + SQ_ALU1_ARB_XFC_CNT = 145, + SQ_TCFS_CFS_UPDATE_CNT = 146, + SQ_ALU0_CFS_UPDATE_CNT = 147, + SQ_ALU1_CFS_UPDATE_CNT = 148, + SQ_VTX_PUSH_THREAD_CNT = 149, + SQ_VTX_POP_THREAD_CNT = 150, + SQ_PIX_PUSH_THREAD_CNT = 151, + SQ_PIX_POP_THREAD_CNT = 152, + SQ_PIX_TOTAL = 153, + SQ_PIX_KILLED = 154, +}; + +enum a2xx_sx_perfcnt_select { + SX_EXPORT_VECTORS = 0, + SX_DUMMY_QUADS = 1, + SX_ALPHA_FAIL = 2, + SX_RB_QUAD_BUSY = 3, + SX_RB_COLOR_BUSY = 4, + SX_RB_QUAD_STALL = 5, + SX_RB_COLOR_STALL = 6, +}; + +enum a2xx_rbbm_perfcount1_sel { + RBBM1_COUNT = 0, + RBBM1_NRT_BUSY = 1, + RBBM1_RB_BUSY = 2, + RBBM1_SQ_CNTX0_BUSY = 3, + RBBM1_SQ_CNTX17_BUSY = 4, + RBBM1_VGT_BUSY = 5, + RBBM1_VGT_NODMA_BUSY = 6, + RBBM1_PA_BUSY = 7, + RBBM1_SC_CNTX_BUSY = 8, + RBBM1_TPC_BUSY = 9, + RBBM1_TC_BUSY = 10, + RBBM1_SX_BUSY = 11, + RBBM1_CP_COHER_BUSY = 12, + RBBM1_CP_NRT_BUSY = 13, + RBBM1_GFX_IDLE_STALL = 14, + RBBM1_INTERRUPT = 15, +}; + +enum a2xx_cp_perfcount_sel { + ALWAYS_COUNT = 0, + TRANS_FIFO_FULL = 1, + TRANS_FIFO_AF = 2, + RCIU_PFPTRANS_WAIT = 3, + RCIU_NRTTRANS_WAIT = 6, + CSF_NRT_READ_WAIT = 8, + CSF_I1_FIFO_FULL = 9, + CSF_I2_FIFO_FULL = 10, + CSF_ST_FIFO_FULL = 11, + CSF_RING_ROQ_FULL = 13, + CSF_I1_ROQ_FULL = 14, + CSF_I2_ROQ_FULL = 15, + CSF_ST_ROQ_FULL = 16, + MIU_TAG_MEM_FULL = 18, + MIU_WRITECLEAN = 19, + MIU_NRT_WRITE_STALLED = 22, + MIU_NRT_READ_STALLED = 23, + ME_WRITE_CONFIRM_FIFO_FULL = 24, + ME_VS_DEALLOC_FIFO_FULL = 25, + ME_PS_DEALLOC_FIFO_FULL = 26, + ME_REGS_VS_EVENT_FIFO_FULL = 27, + ME_REGS_PS_EVENT_FIFO_FULL = 28, + ME_REGS_CF_EVENT_FIFO_FULL = 29, + ME_MICRO_RB_STARVED = 30, + ME_MICRO_I1_STARVED = 31, + ME_MICRO_I2_STARVED = 32, + ME_MICRO_ST_STARVED = 33, + RCIU_RBBM_DWORD_SENT = 40, + ME_BUSY_CLOCKS = 41, + ME_WAIT_CONTEXT_AVAIL = 42, + PFP_TYPE0_PACKET = 43, + PFP_TYPE3_PACKET = 44, + CSF_RB_WPTR_NEQ_RPTR = 45, + CSF_I1_SIZE_NEQ_ZERO = 46, + CSF_I2_SIZE_NEQ_ZERO = 47, + CSF_RBI1I2_FETCHING = 48, +}; + +enum a2xx_rb_perfcnt_select { + RBPERF_CNTX_BUSY = 0, + RBPERF_CNTX_BUSY_MAX = 1, + RBPERF_SX_QUAD_STARVED = 2, + RBPERF_SX_QUAD_STARVED_MAX = 3, + RBPERF_GA_GC_CH0_SYS_REQ = 4, + RBPERF_GA_GC_CH0_SYS_REQ_MAX = 5, + RBPERF_GA_GC_CH1_SYS_REQ = 6, + RBPERF_GA_GC_CH1_SYS_REQ_MAX = 7, + RBPERF_MH_STARVED = 8, + RBPERF_MH_STARVED_MAX = 9, + RBPERF_AZ_BC_COLOR_BUSY = 10, + RBPERF_AZ_BC_COLOR_BUSY_MAX = 11, + RBPERF_AZ_BC_Z_BUSY = 12, + RBPERF_AZ_BC_Z_BUSY_MAX = 13, + RBPERF_RB_SC_TILE_RTR_N = 14, + RBPERF_RB_SC_TILE_RTR_N_MAX = 15, + RBPERF_RB_SC_SAMP_RTR_N = 16, + RBPERF_RB_SC_SAMP_RTR_N_MAX = 17, + RBPERF_RB_SX_QUAD_RTR_N = 18, + RBPERF_RB_SX_QUAD_RTR_N_MAX = 19, + RBPERF_RB_SX_COLOR_RTR_N = 20, + RBPERF_RB_SX_COLOR_RTR_N_MAX = 21, + RBPERF_RB_SC_SAMP_LZ_BUSY = 22, + RBPERF_RB_SC_SAMP_LZ_BUSY_MAX = 23, + RBPERF_ZXP_STALL = 24, + RBPERF_ZXP_STALL_MAX = 25, + RBPERF_EVENT_PENDING = 26, + RBPERF_EVENT_PENDING_MAX = 27, + RBPERF_RB_MH_VALID = 28, + RBPERF_RB_MH_VALID_MAX = 29, + RBPERF_SX_RB_QUAD_SEND = 30, + RBPERF_SX_RB_COLOR_SEND = 31, + RBPERF_SC_RB_TILE_SEND = 32, + RBPERF_SC_RB_SAMPLE_SEND = 33, + RBPERF_SX_RB_MEM_EXPORT = 34, + RBPERF_SX_RB_QUAD_EVENT = 35, + RBPERF_SC_RB_TILE_EVENT_FILTERED = 36, + RBPERF_SC_RB_TILE_EVENT_ALL = 37, + RBPERF_RB_SC_EZ_SEND = 38, + RBPERF_RB_SX_INDEX_SEND = 39, + RBPERF_GMEM_INTFO_RD = 40, + RBPERF_GMEM_INTF1_RD = 41, + RBPERF_GMEM_INTFO_WR = 42, + RBPERF_GMEM_INTF1_WR = 43, + RBPERF_RB_CP_CONTEXT_DONE = 44, + RBPERF_RB_CP_CACHE_FLUSH = 45, + RBPERF_ZPASS_DONE = 46, + RBPERF_ZCMD_VALID = 47, + RBPERF_CCMD_VALID = 48, + RBPERF_ACCUM_GRANT = 49, + RBPERF_ACCUM_C0_GRANT = 50, + RBPERF_ACCUM_C1_GRANT = 51, + RBPERF_ACCUM_FULL_BE_WR = 52, + RBPERF_ACCUM_REQUEST_NO_GRANT = 53, + RBPERF_ACCUM_TIMEOUT_PULSE = 54, + RBPERF_ACCUM_LIN_TIMEOUT_PULSE = 55, + RBPERF_ACCUM_CAM_HIT_FLUSHING = 56, +}; + +enum adreno_mmu_clnt_beh { + BEH_NEVR = 0, + BEH_TRAN_RNG = 1, + BEH_TRAN_FLT = 2, +}; + +enum sq_tex_clamp { + SQ_TEX_WRAP = 0, + SQ_TEX_MIRROR = 1, + SQ_TEX_CLAMP_LAST_TEXEL = 2, + SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 3, + SQ_TEX_CLAMP_HALF_BORDER = 4, + SQ_TEX_MIRROR_ONCE_HALF_BORDER = 5, + SQ_TEX_CLAMP_BORDER = 6, + SQ_TEX_MIRROR_ONCE_BORDER = 7, +}; + +enum sq_tex_swiz { + SQ_TEX_X = 0, + SQ_TEX_Y = 1, + SQ_TEX_Z = 2, + SQ_TEX_W = 3, + SQ_TEX_ZERO = 4, + SQ_TEX_ONE = 5, +}; + +enum sq_tex_filter { + SQ_TEX_FILTER_POINT = 0, + SQ_TEX_FILTER_BILINEAR = 1, + SQ_TEX_FILTER_BASEMAP = 2, + SQ_TEX_FILTER_USE_FETCH_CONST = 3, +}; + +enum sq_tex_aniso_filter { + SQ_TEX_ANISO_FILTER_DISABLED = 0, + SQ_TEX_ANISO_FILTER_MAX_1_1 = 1, + SQ_TEX_ANISO_FILTER_MAX_2_1 = 2, + SQ_TEX_ANISO_FILTER_MAX_4_1 = 3, + SQ_TEX_ANISO_FILTER_MAX_8_1 = 4, + SQ_TEX_ANISO_FILTER_MAX_16_1 = 5, + SQ_TEX_ANISO_FILTER_USE_FETCH_CONST = 7, +}; + +enum sq_tex_dimension { + SQ_TEX_DIMENSION_1D = 0, + SQ_TEX_DIMENSION_2D = 1, + SQ_TEX_DIMENSION_3D = 2, + SQ_TEX_DIMENSION_CUBE = 3, +}; + +enum sq_tex_border_color { + SQ_TEX_BORDER_COLOR_BLACK = 0, + SQ_TEX_BORDER_COLOR_WHITE = 1, + SQ_TEX_BORDER_COLOR_ACBYCR_BLACK = 2, + SQ_TEX_BORDER_COLOR_ACBCRY_BLACK = 3, +}; + +enum sq_tex_sign { + SQ_TEX_SIGN_UNISIGNED = 0, + SQ_TEX_SIGN_SIGNED = 1, + SQ_TEX_SIGN_UNISIGNED_BIASED = 2, + SQ_TEX_SIGN_GAMMA = 3, +}; + +enum sq_tex_endian { + SQ_TEX_ENDIAN_NONE = 0, + SQ_TEX_ENDIAN_8IN16 = 1, + SQ_TEX_ENDIAN_8IN32 = 2, + SQ_TEX_ENDIAN_16IN32 = 3, +}; + +enum sq_tex_clamp_policy { + SQ_TEX_CLAMP_POLICY_D3D = 0, + SQ_TEX_CLAMP_POLICY_OGL = 1, +}; + +enum sq_tex_num_format { + SQ_TEX_NUM_FORMAT_FRAC = 0, + SQ_TEX_NUM_FORMAT_INT = 1, +}; + +enum sq_tex_type { + SQ_TEX_TYPE_0 = 0, + SQ_TEX_TYPE_1 = 1, + SQ_TEX_TYPE_2 = 2, + SQ_TEX_TYPE_3 = 3, +}; + +#define REG_A2XX_RBBM_PATCH_RELEASE 0x00000001 + +#define REG_A2XX_RBBM_CNTL 0x0000003b + +#define REG_A2XX_RBBM_SOFT_RESET 0x0000003c + +#define REG_A2XX_CP_PFP_UCODE_ADDR 0x000000c0 + +#define REG_A2XX_CP_PFP_UCODE_DATA 0x000000c1 + +#define REG_A2XX_MH_MMU_CONFIG 0x00000040 +#define A2XX_MH_MMU_CONFIG_MMU_ENABLE 0x00000001 +#define A2XX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE 0x00000002 +#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK 0x00000030 +#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT 4 +static inline uint32_t A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK 0x000000c0 +#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT 6 +static inline uint32_t A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK 0x00000300 +#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT 8 +static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK 0x00000c00 +#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT 10 +static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK 0x00003000 +#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT 12 +static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK 0x0000c000 +#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT 14 +static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK 0x00030000 +#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT 16 +static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK 0x000c0000 +#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT 18 +static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK 0x00300000 +#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT 20 +static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK 0x00c00000 +#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT 22 +static inline uint32_t A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK; +} +#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK 0x03000000 +#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT 24 +static inline uint32_t A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) +{ + return ((val) << A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK; +} + +#define REG_A2XX_MH_MMU_VA_RANGE 0x00000041 +#define A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__MASK 0x00000fff +#define A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__SHIFT 0 +static inline uint32_t A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS(uint32_t val) +{ + return ((val) << A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__SHIFT) & A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__MASK; +} +#define A2XX_MH_MMU_VA_RANGE_VA_BASE__MASK 0xfffff000 +#define A2XX_MH_MMU_VA_RANGE_VA_BASE__SHIFT 12 +static inline uint32_t A2XX_MH_MMU_VA_RANGE_VA_BASE(uint32_t val) +{ + return ((val) << A2XX_MH_MMU_VA_RANGE_VA_BASE__SHIFT) & A2XX_MH_MMU_VA_RANGE_VA_BASE__MASK; +} + +#define REG_A2XX_MH_MMU_PT_BASE 0x00000042 + +#define REG_A2XX_MH_MMU_PAGE_FAULT 0x00000043 + +#define REG_A2XX_MH_MMU_TRAN_ERROR 0x00000044 + +#define REG_A2XX_MH_MMU_INVALIDATE 0x00000045 +#define A2XX_MH_MMU_INVALIDATE_INVALIDATE_ALL 0x00000001 +#define A2XX_MH_MMU_INVALIDATE_INVALIDATE_TC 0x00000002 + +#define REG_A2XX_MH_MMU_MPU_BASE 0x00000046 + +#define REG_A2XX_MH_MMU_MPU_END 0x00000047 + +#define REG_A2XX_NQWAIT_UNTIL 0x00000394 + +#define REG_A2XX_RBBM_PERFCOUNTER1_SELECT 0x00000395 + +#define REG_A2XX_RBBM_PERFCOUNTER1_LO 0x00000397 + +#define REG_A2XX_RBBM_PERFCOUNTER1_HI 0x00000398 + +#define REG_A2XX_RBBM_DEBUG 0x0000039b + +#define REG_A2XX_RBBM_PM_OVERRIDE1 0x0000039c +#define A2XX_RBBM_PM_OVERRIDE1_RBBM_AHBCLK_PM_OVERRIDE 0x00000001 +#define A2XX_RBBM_PM_OVERRIDE1_SC_REG_SCLK_PM_OVERRIDE 0x00000002 +#define A2XX_RBBM_PM_OVERRIDE1_SC_SCLK_PM_OVERRIDE 0x00000004 +#define A2XX_RBBM_PM_OVERRIDE1_SP_TOP_SCLK_PM_OVERRIDE 0x00000008 +#define A2XX_RBBM_PM_OVERRIDE1_SP_V0_SCLK_PM_OVERRIDE 0x00000010 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_SCLK_PM_OVERRIDE 0x00000020 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_FIFOS_SCLK_PM_OVERRIDE 0x00000040 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_CONST_MEM_SCLK_PM_OVERRIDE 0x00000080 +#define A2XX_RBBM_PM_OVERRIDE1_SQ_SQ_SCLK_PM_OVERRIDE 0x00000100 +#define A2XX_RBBM_PM_OVERRIDE1_SX_SCLK_PM_OVERRIDE 0x00000200 +#define A2XX_RBBM_PM_OVERRIDE1_SX_REG_SCLK_PM_OVERRIDE 0x00000400 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCO_SCLK_PM_OVERRIDE 0x00000800 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCM_SCLK_PM_OVERRIDE 0x00001000 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCD_SCLK_PM_OVERRIDE 0x00002000 +#define A2XX_RBBM_PM_OVERRIDE1_TCM_REG_SCLK_PM_OVERRIDE 0x00004000 +#define A2XX_RBBM_PM_OVERRIDE1_TPC_TPC_SCLK_PM_OVERRIDE 0x00008000 +#define A2XX_RBBM_PM_OVERRIDE1_TPC_REG_SCLK_PM_OVERRIDE 0x00010000 +#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCA_SCLK_PM_OVERRIDE 0x00020000 +#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_SCLK_PM_OVERRIDE 0x00040000 +#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_READ_SCLK_PM_OVERRIDE 0x00080000 +#define A2XX_RBBM_PM_OVERRIDE1_TP_TP_SCLK_PM_OVERRIDE 0x00100000 +#define A2XX_RBBM_PM_OVERRIDE1_TP_REG_SCLK_PM_OVERRIDE 0x00200000 +#define A2XX_RBBM_PM_OVERRIDE1_CP_G_SCLK_PM_OVERRIDE 0x00400000 +#define A2XX_RBBM_PM_OVERRIDE1_CP_REG_SCLK_PM_OVERRIDE 0x00800000 +#define A2XX_RBBM_PM_OVERRIDE1_CP_G_REG_SCLK_PM_OVERRIDE 0x01000000 +#define A2XX_RBBM_PM_OVERRIDE1_SPI_SCLK_PM_OVERRIDE 0x02000000 +#define A2XX_RBBM_PM_OVERRIDE1_RB_REG_SCLK_PM_OVERRIDE 0x04000000 +#define A2XX_RBBM_PM_OVERRIDE1_RB_SCLK_PM_OVERRIDE 0x08000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_MH_SCLK_PM_OVERRIDE 0x10000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_REG_SCLK_PM_OVERRIDE 0x20000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_MMU_SCLK_PM_OVERRIDE 0x40000000 +#define A2XX_RBBM_PM_OVERRIDE1_MH_TCROQ_SCLK_PM_OVERRIDE 0x80000000 + +#define REG_A2XX_RBBM_PM_OVERRIDE2 0x0000039d + +#define REG_A2XX_RBBM_DEBUG_OUT 0x000003a0 + +#define REG_A2XX_RBBM_DEBUG_CNTL 0x000003a1 + +#define REG_A2XX_RBBM_READ_ERROR 0x000003b3 + +#define REG_A2XX_RBBM_INT_CNTL 0x000003b4 +#define A2XX_RBBM_INT_CNTL_RDERR_INT_MASK 0x00000001 +#define A2XX_RBBM_INT_CNTL_DISPLAY_UPDATE_INT_MASK 0x00000002 +#define A2XX_RBBM_INT_CNTL_GUI_IDLE_INT_MASK 0x00080000 + +#define REG_A2XX_RBBM_INT_STATUS 0x000003b5 + +#define REG_A2XX_RBBM_INT_ACK 0x000003b6 + +#define REG_A2XX_MASTER_INT_SIGNAL 0x000003b7 +#define A2XX_MASTER_INT_SIGNAL_MH_INT_STAT 0x00000020 +#define A2XX_MASTER_INT_SIGNAL_SQ_INT_STAT 0x04000000 +#define A2XX_MASTER_INT_SIGNAL_CP_INT_STAT 0x40000000 +#define A2XX_MASTER_INT_SIGNAL_RBBM_INT_STAT 0x80000000 + +#define REG_A2XX_RBBM_PERIPHID1 0x000003f9 + +#define REG_A2XX_RBBM_PERIPHID2 0x000003fa + +#define REG_A2XX_CP_PERFMON_CNTL 0x00000444 + +#define REG_A2XX_CP_PERFCOUNTER_SELECT 0x00000445 + +#define REG_A2XX_CP_PERFCOUNTER_LO 0x00000446 + +#define REG_A2XX_CP_PERFCOUNTER_HI 0x00000447 + +#define REG_A2XX_RBBM_STATUS 0x000005d0 +#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK 0x0000001f +#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT 0 +static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val) +{ + return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK; +} +#define A2XX_RBBM_STATUS_TC_BUSY 0x00000020 +#define A2XX_RBBM_STATUS_HIRQ_PENDING 0x00000100 +#define A2XX_RBBM_STATUS_CPRQ_PENDING 0x00000200 +#define A2XX_RBBM_STATUS_CFRQ_PENDING 0x00000400 +#define A2XX_RBBM_STATUS_PFRQ_PENDING 0x00000800 +#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA 0x00001000 +#define A2XX_RBBM_STATUS_RBBM_WU_BUSY 0x00004000 +#define A2XX_RBBM_STATUS_CP_NRT_BUSY 0x00010000 +#define A2XX_RBBM_STATUS_MH_BUSY 0x00040000 +#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY 0x00080000 +#define A2XX_RBBM_STATUS_SX_BUSY 0x00200000 +#define A2XX_RBBM_STATUS_TPC_BUSY 0x00400000 +#define A2XX_RBBM_STATUS_SC_CNTX_BUSY 0x01000000 +#define A2XX_RBBM_STATUS_PA_BUSY 0x02000000 +#define A2XX_RBBM_STATUS_VGT_BUSY 0x04000000 +#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY 0x08000000 +#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY 0x10000000 +#define A2XX_RBBM_STATUS_RB_CNTX_BUSY 0x40000000 +#define A2XX_RBBM_STATUS_GUI_ACTIVE 0x80000000 + +#define REG_A2XX_MH_ARBITER_CONFIG 0x00000a40 +#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK 0x0000003f +#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT 0 +static inline uint32_t A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT(uint32_t val) +{ + return ((val) << A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK; +} +#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_GRANULARITY 0x00000040 +#define A2XX_MH_ARBITER_CONFIG_L1_ARB_ENABLE 0x00000080 +#define A2XX_MH_ARBITER_CONFIG_L1_ARB_HOLD_ENABLE 0x00000100 +#define A2XX_MH_ARBITER_CONFIG_L2_ARB_CONTROL 0x00000200 +#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK 0x00001c00 +#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT 10 +static inline uint32_t A2XX_MH_ARBITER_CONFIG_PAGE_SIZE(uint32_t val) +{ + return ((val) << A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT) & A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK; +} +#define A2XX_MH_ARBITER_CONFIG_TC_REORDER_ENABLE 0x00002000 +#define A2XX_MH_ARBITER_CONFIG_TC_ARB_HOLD_ENABLE 0x00004000 +#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT_ENABLE 0x00008000 +#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK 0x003f0000 +#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT 16 +static inline uint32_t A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT(uint32_t val) +{ + return ((val) << A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK; +} +#define A2XX_MH_ARBITER_CONFIG_CP_CLNT_ENABLE 0x00400000 +#define A2XX_MH_ARBITER_CONFIG_VGT_CLNT_ENABLE 0x00800000 +#define A2XX_MH_ARBITER_CONFIG_TC_CLNT_ENABLE 0x01000000 +#define A2XX_MH_ARBITER_CONFIG_RB_CLNT_ENABLE 0x02000000 +#define A2XX_MH_ARBITER_CONFIG_PA_CLNT_ENABLE 0x04000000 + +#define REG_A2XX_MH_INTERRUPT_MASK 0x00000a42 +#define A2XX_MH_INTERRUPT_MASK_AXI_READ_ERROR 0x00000001 +#define A2XX_MH_INTERRUPT_MASK_AXI_WRITE_ERROR 0x00000002 +#define A2XX_MH_INTERRUPT_MASK_MMU_PAGE_FAULT 0x00000004 + +#define REG_A2XX_MH_INTERRUPT_STATUS 0x00000a43 + +#define REG_A2XX_MH_INTERRUPT_CLEAR 0x00000a44 + +#define REG_A2XX_MH_CLNT_INTF_CTRL_CONFIG1 0x00000a54 + +#define REG_A2XX_MH_CLNT_INTF_CTRL_CONFIG2 0x00000a55 + +#define REG_A2XX_A220_VSC_BIN_SIZE 0x00000c01 +#define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f +#define A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A2XX_A220_VSC_BIN_SIZE_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT) & A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK; +} +#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 +#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT 5 +static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK; +} + +static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } + +static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } + +static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } + +static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } + +#define REG_A2XX_PC_DEBUG_CNTL 0x00000c38 + +#define REG_A2XX_PC_DEBUG_DATA 0x00000c39 + +#define REG_A2XX_PA_SC_VIZ_QUERY_STATUS 0x00000c44 + +#define REG_A2XX_GRAS_DEBUG_CNTL 0x00000c80 + +#define REG_A2XX_PA_SU_DEBUG_CNTL 0x00000c80 + +#define REG_A2XX_GRAS_DEBUG_DATA 0x00000c81 + +#define REG_A2XX_PA_SU_DEBUG_DATA 0x00000c81 + +#define REG_A2XX_PA_SU_FACE_DATA 0x00000c86 +#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK 0xffffffe0 +#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT 5 +static inline uint32_t A2XX_PA_SU_FACE_DATA_BASE_ADDR(uint32_t val) +{ + return ((val) << A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT) & A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK; +} + +#define REG_A2XX_SQ_GPR_MANAGEMENT 0x00000d00 +#define A2XX_SQ_GPR_MANAGEMENT_REG_DYNAMIC 0x00000001 +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK 0x00000ff0 +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT 4 +static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX(uint32_t val) +{ + return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK; +} +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK 0x000ff000 +#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT 12 +static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX(uint32_t val) +{ + return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK; +} + +#define REG_A2XX_SQ_FLOW_CONTROL 0x00000d01 + +#define REG_A2XX_SQ_INST_STORE_MANAGMENT 0x00000d02 +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK 0x00000fff +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT 0 +static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX(uint32_t val) +{ + return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK; +} +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK 0x0fff0000 +#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT 16 +static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX(uint32_t val) +{ + return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK; +} + +#define REG_A2XX_SQ_DEBUG_MISC 0x00000d05 + +#define REG_A2XX_SQ_INT_CNTL 0x00000d34 + +#define REG_A2XX_SQ_INT_STATUS 0x00000d35 + +#define REG_A2XX_SQ_INT_ACK 0x00000d36 + +#define REG_A2XX_SQ_DEBUG_INPUT_FSM 0x00000dae + +#define REG_A2XX_SQ_DEBUG_CONST_MGR_FSM 0x00000daf + +#define REG_A2XX_SQ_DEBUG_TP_FSM 0x00000db0 + +#define REG_A2XX_SQ_DEBUG_FSM_ALU_0 0x00000db1 + +#define REG_A2XX_SQ_DEBUG_FSM_ALU_1 0x00000db2 + +#define REG_A2XX_SQ_DEBUG_EXP_ALLOC 0x00000db3 + +#define REG_A2XX_SQ_DEBUG_PTR_BUFF 0x00000db4 + +#define REG_A2XX_SQ_DEBUG_GPR_VTX 0x00000db5 + +#define REG_A2XX_SQ_DEBUG_GPR_PIX 0x00000db6 + +#define REG_A2XX_SQ_DEBUG_TB_STATUS_SEL 0x00000db7 + +#define REG_A2XX_SQ_DEBUG_VTX_TB_0 0x00000db8 + +#define REG_A2XX_SQ_DEBUG_VTX_TB_1 0x00000db9 + +#define REG_A2XX_SQ_DEBUG_VTX_TB_STATUS_REG 0x00000dba + +#define REG_A2XX_SQ_DEBUG_VTX_TB_STATE_MEM 0x00000dbb + +#define REG_A2XX_SQ_DEBUG_PIX_TB_0 0x00000dbc + +#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_0 0x00000dbd + +#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_1 0x00000dbe + +#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_2 0x00000dbf + +#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_3 0x00000dc0 + +#define REG_A2XX_SQ_DEBUG_PIX_TB_STATE_MEM 0x00000dc1 + +#define REG_A2XX_TC_CNTL_STATUS 0x00000e00 +#define A2XX_TC_CNTL_STATUS_L2_INVALIDATE 0x00000001 + +#define REG_A2XX_TP0_CHICKEN 0x00000e1e + +#define REG_A2XX_RB_BC_CONTROL 0x00000f01 +#define A2XX_RB_BC_CONTROL_ACCUM_LINEAR_MODE_ENABLE 0x00000001 +#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK 0x00000006 +#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT 1 +static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(uint32_t val) +{ + return ((val) << A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK; +} +#define A2XX_RB_BC_CONTROL_DISABLE_EDRAM_CAM 0x00000008 +#define A2XX_RB_BC_CONTROL_DISABLE_EZ_FAST_CONTEXT_SWITCH 0x00000010 +#define A2XX_RB_BC_CONTROL_DISABLE_EZ_NULL_ZCMD_DROP 0x00000020 +#define A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP 0x00000040 +#define A2XX_RB_BC_CONTROL_ENABLE_AZ_THROTTLE 0x00000080 +#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK 0x00001f00 +#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT 8 +static inline uint32_t A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT(uint32_t val) +{ + return ((val) << A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT) & A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK; +} +#define A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE 0x00004000 +#define A2XX_RB_BC_CONTROL_CRC_MODE 0x00008000 +#define A2XX_RB_BC_CONTROL_DISABLE_SAMPLE_COUNTERS 0x00010000 +#define A2XX_RB_BC_CONTROL_DISABLE_ACCUM 0x00020000 +#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK 0x003c0000 +#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT 18 +static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK(uint32_t val) +{ + return ((val) << A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK; +} +#define A2XX_RB_BC_CONTROL_LINEAR_PERFORMANCE_ENABLE 0x00400000 +#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK 0x07800000 +#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT 23 +static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(uint32_t val) +{ + return ((val) << A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK; +} +#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK 0x18000000 +#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT 27 +static inline uint32_t A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(uint32_t val) +{ + return ((val) << A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK; +} +#define A2XX_RB_BC_CONTROL_MEM_EXPORT_LINEAR_MODE_ENABLE 0x20000000 +#define A2XX_RB_BC_CONTROL_CRC_SYSTEM 0x40000000 +#define A2XX_RB_BC_CONTROL_RESERVED6 0x80000000 + +#define REG_A2XX_RB_EDRAM_INFO 0x00000f02 + +#define REG_A2XX_RB_DEBUG_CNTL 0x00000f26 + +#define REG_A2XX_RB_DEBUG_DATA 0x00000f27 + +#define REG_A2XX_RB_SURFACE_INFO 0x00002000 +#define A2XX_RB_SURFACE_INFO_SURFACE_PITCH__MASK 0x00003fff +#define A2XX_RB_SURFACE_INFO_SURFACE_PITCH__SHIFT 0 +static inline uint32_t A2XX_RB_SURFACE_INFO_SURFACE_PITCH(uint32_t val) +{ + return ((val) << A2XX_RB_SURFACE_INFO_SURFACE_PITCH__SHIFT) & A2XX_RB_SURFACE_INFO_SURFACE_PITCH__MASK; +} +#define A2XX_RB_SURFACE_INFO_MSAA_SAMPLES__MASK 0x0000c000 +#define A2XX_RB_SURFACE_INFO_MSAA_SAMPLES__SHIFT 14 +static inline uint32_t A2XX_RB_SURFACE_INFO_MSAA_SAMPLES(uint32_t val) +{ + return ((val) << A2XX_RB_SURFACE_INFO_MSAA_SAMPLES__SHIFT) & A2XX_RB_SURFACE_INFO_MSAA_SAMPLES__MASK; +} + +#define REG_A2XX_RB_COLOR_INFO 0x00002001 +#define A2XX_RB_COLOR_INFO_FORMAT__MASK 0x0000000f +#define A2XX_RB_COLOR_INFO_FORMAT__SHIFT 0 +static inline uint32_t A2XX_RB_COLOR_INFO_FORMAT(enum a2xx_colorformatx val) +{ + return ((val) << A2XX_RB_COLOR_INFO_FORMAT__SHIFT) & A2XX_RB_COLOR_INFO_FORMAT__MASK; +} +#define A2XX_RB_COLOR_INFO_ROUND_MODE__MASK 0x00000030 +#define A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT 4 +static inline uint32_t A2XX_RB_COLOR_INFO_ROUND_MODE(uint32_t val) +{ + return ((val) << A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT) & A2XX_RB_COLOR_INFO_ROUND_MODE__MASK; +} +#define A2XX_RB_COLOR_INFO_LINEAR 0x00000040 +#define A2XX_RB_COLOR_INFO_ENDIAN__MASK 0x00000180 +#define A2XX_RB_COLOR_INFO_ENDIAN__SHIFT 7 +static inline uint32_t A2XX_RB_COLOR_INFO_ENDIAN(uint32_t val) +{ + return ((val) << A2XX_RB_COLOR_INFO_ENDIAN__SHIFT) & A2XX_RB_COLOR_INFO_ENDIAN__MASK; +} +#define A2XX_RB_COLOR_INFO_SWAP__MASK 0x00000600 +#define A2XX_RB_COLOR_INFO_SWAP__SHIFT 9 +static inline uint32_t A2XX_RB_COLOR_INFO_SWAP(uint32_t val) +{ + return ((val) << A2XX_RB_COLOR_INFO_SWAP__SHIFT) & A2XX_RB_COLOR_INFO_SWAP__MASK; +} +#define A2XX_RB_COLOR_INFO_BASE__MASK 0xfffff000 +#define A2XX_RB_COLOR_INFO_BASE__SHIFT 12 +static inline uint32_t A2XX_RB_COLOR_INFO_BASE(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A2XX_RB_COLOR_INFO_BASE__SHIFT) & A2XX_RB_COLOR_INFO_BASE__MASK; +} + +#define REG_A2XX_RB_DEPTH_INFO 0x00002002 +#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK 0x00000001 +#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val) +{ + return ((val) << A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK; +} +#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK 0xfffff000 +#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 12 +static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; +} + +#define REG_A2XX_A225_RB_COLOR_INFO3 0x00002005 + +#define REG_A2XX_COHER_DEST_BASE_0 0x00002006 + +#define REG_A2XX_PA_SC_SCREEN_SCISSOR_TL 0x0000200e +#define A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK 0x00007fff +#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK; +} +#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK; +} + +#define REG_A2XX_PA_SC_SCREEN_SCISSOR_BR 0x0000200f +#define A2XX_PA_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK 0x00007fff +#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK; +} +#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK; +} + +#define REG_A2XX_PA_SC_WINDOW_OFFSET 0x00002080 +#define A2XX_PA_SC_WINDOW_OFFSET_X__MASK 0x00007fff +#define A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_X(int32_t val) +{ + return ((val) << A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_X__MASK; +} +#define A2XX_PA_SC_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_Y(int32_t val) +{ + return ((val) << A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_Y__MASK; +} +#define A2XX_PA_SC_WINDOW_OFFSET_DISABLE 0x80000000 + +#define REG_A2XX_PA_SC_WINDOW_SCISSOR_TL 0x00002081 +#define A2XX_PA_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff +#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK; +} +#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK; +} + +#define REG_A2XX_PA_SC_WINDOW_SCISSOR_BR 0x00002082 +#define A2XX_PA_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff +#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK; +} +#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK; +} + +#define REG_A2XX_UNKNOWN_2010 0x00002010 + +#define REG_A2XX_VGT_MAX_VTX_INDX 0x00002100 + +#define REG_A2XX_VGT_MIN_VTX_INDX 0x00002101 + +#define REG_A2XX_VGT_INDX_OFFSET 0x00002102 + +#define REG_A2XX_A225_PC_MULTI_PRIM_IB_RESET_INDX 0x00002103 + +#define REG_A2XX_RB_COLOR_MASK 0x00002104 +#define A2XX_RB_COLOR_MASK_WRITE_RED 0x00000001 +#define A2XX_RB_COLOR_MASK_WRITE_GREEN 0x00000002 +#define A2XX_RB_COLOR_MASK_WRITE_BLUE 0x00000004 +#define A2XX_RB_COLOR_MASK_WRITE_ALPHA 0x00000008 + +#define REG_A2XX_RB_BLEND_RED 0x00002105 + +#define REG_A2XX_RB_BLEND_GREEN 0x00002106 + +#define REG_A2XX_RB_BLEND_BLUE 0x00002107 + +#define REG_A2XX_RB_BLEND_ALPHA 0x00002108 + +#define REG_A2XX_RB_FOG_COLOR 0x00002109 +#define A2XX_RB_FOG_COLOR_FOG_RED__MASK 0x000000ff +#define A2XX_RB_FOG_COLOR_FOG_RED__SHIFT 0 +static inline uint32_t A2XX_RB_FOG_COLOR_FOG_RED(uint32_t val) +{ + return ((val) << A2XX_RB_FOG_COLOR_FOG_RED__SHIFT) & A2XX_RB_FOG_COLOR_FOG_RED__MASK; +} +#define A2XX_RB_FOG_COLOR_FOG_GREEN__MASK 0x0000ff00 +#define A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT 8 +static inline uint32_t A2XX_RB_FOG_COLOR_FOG_GREEN(uint32_t val) +{ + return ((val) << A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT) & A2XX_RB_FOG_COLOR_FOG_GREEN__MASK; +} +#define A2XX_RB_FOG_COLOR_FOG_BLUE__MASK 0x00ff0000 +#define A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT 16 +static inline uint32_t A2XX_RB_FOG_COLOR_FOG_BLUE(uint32_t val) +{ + return ((val) << A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT) & A2XX_RB_FOG_COLOR_FOG_BLUE__MASK; +} + +#define REG_A2XX_RB_STENCILREFMASK_BF 0x0000210c +#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff +#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 +static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) +{ + return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; +} +#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 +#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 +static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) +{ + return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; +} +#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 +#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; +} + +#define REG_A2XX_RB_STENCILREFMASK 0x0000210d +#define A2XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff +#define A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 +static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) +{ + return ((val) << A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILREF__MASK; +} +#define A2XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 +#define A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 +static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) +{ + return ((val) << A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILMASK__MASK; +} +#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 +#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; +} + +#define REG_A2XX_RB_ALPHA_REF 0x0000210e + +#define REG_A2XX_PA_CL_VPORT_XSCALE 0x0000210f +#define A2XX_PA_CL_VPORT_XSCALE__MASK 0xffffffff +#define A2XX_PA_CL_VPORT_XSCALE__SHIFT 0 +static inline uint32_t A2XX_PA_CL_VPORT_XSCALE(float val) +{ + return ((fui(val)) << A2XX_PA_CL_VPORT_XSCALE__SHIFT) & A2XX_PA_CL_VPORT_XSCALE__MASK; +} + +#define REG_A2XX_PA_CL_VPORT_XOFFSET 0x00002110 +#define A2XX_PA_CL_VPORT_XOFFSET__MASK 0xffffffff +#define A2XX_PA_CL_VPORT_XOFFSET__SHIFT 0 +static inline uint32_t A2XX_PA_CL_VPORT_XOFFSET(float val) +{ + return ((fui(val)) << A2XX_PA_CL_VPORT_XOFFSET__SHIFT) & A2XX_PA_CL_VPORT_XOFFSET__MASK; +} + +#define REG_A2XX_PA_CL_VPORT_YSCALE 0x00002111 +#define A2XX_PA_CL_VPORT_YSCALE__MASK 0xffffffff +#define A2XX_PA_CL_VPORT_YSCALE__SHIFT 0 +static inline uint32_t A2XX_PA_CL_VPORT_YSCALE(float val) +{ + return ((fui(val)) << A2XX_PA_CL_VPORT_YSCALE__SHIFT) & A2XX_PA_CL_VPORT_YSCALE__MASK; +} + +#define REG_A2XX_PA_CL_VPORT_YOFFSET 0x00002112 +#define A2XX_PA_CL_VPORT_YOFFSET__MASK 0xffffffff +#define A2XX_PA_CL_VPORT_YOFFSET__SHIFT 0 +static inline uint32_t A2XX_PA_CL_VPORT_YOFFSET(float val) +{ + return ((fui(val)) << A2XX_PA_CL_VPORT_YOFFSET__SHIFT) & A2XX_PA_CL_VPORT_YOFFSET__MASK; +} + +#define REG_A2XX_PA_CL_VPORT_ZSCALE 0x00002113 +#define A2XX_PA_CL_VPORT_ZSCALE__MASK 0xffffffff +#define A2XX_PA_CL_VPORT_ZSCALE__SHIFT 0 +static inline uint32_t A2XX_PA_CL_VPORT_ZSCALE(float val) +{ + return ((fui(val)) << A2XX_PA_CL_VPORT_ZSCALE__SHIFT) & A2XX_PA_CL_VPORT_ZSCALE__MASK; +} + +#define REG_A2XX_PA_CL_VPORT_ZOFFSET 0x00002114 +#define A2XX_PA_CL_VPORT_ZOFFSET__MASK 0xffffffff +#define A2XX_PA_CL_VPORT_ZOFFSET__SHIFT 0 +static inline uint32_t A2XX_PA_CL_VPORT_ZOFFSET(float val) +{ + return ((fui(val)) << A2XX_PA_CL_VPORT_ZOFFSET__SHIFT) & A2XX_PA_CL_VPORT_ZOFFSET__MASK; +} + +#define REG_A2XX_SQ_PROGRAM_CNTL 0x00002180 +#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK 0x000000ff +#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT 0 +static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_REGS(uint32_t val) +{ + return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK; +} +#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK 0x0000ff00 +#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT 8 +static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_REGS(uint32_t val) +{ + return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK; +} +#define A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE 0x00010000 +#define A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE 0x00020000 +#define A2XX_SQ_PROGRAM_CNTL_PARAM_GEN 0x00040000 +#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_PIX 0x00080000 +#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK 0x00f00000 +#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT 20 +static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(uint32_t val) +{ + return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK; +} +#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK 0x07000000 +#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT 24 +static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(enum a2xx_sq_ps_vtx_mode val) +{ + return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK; +} +#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK 0x78000000 +#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT 27 +static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(uint32_t val) +{ + return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK; +} +#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX 0x80000000 + +#define REG_A2XX_SQ_CONTEXT_MISC 0x00002181 +#define A2XX_SQ_CONTEXT_MISC_INST_PRED_OPTIMIZE 0x00000001 +#define A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY 0x00000002 +#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK 0x0000000c +#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT 2 +static inline uint32_t A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(enum a2xx_sq_sample_cntl val) +{ + return ((val) << A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT) & A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK; +} +#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK 0x0000ff00 +#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT 8 +static inline uint32_t A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(uint32_t val) +{ + return ((val) << A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT) & A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK; +} +#define A2XX_SQ_CONTEXT_MISC_PERFCOUNTER_REF 0x00010000 +#define A2XX_SQ_CONTEXT_MISC_YEILD_OPTIMIZE 0x00020000 +#define A2XX_SQ_CONTEXT_MISC_TX_CACHE_SEL 0x00040000 + +#define REG_A2XX_SQ_INTERPOLATOR_CNTL 0x00002182 +#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK 0x0000ffff +#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT 0 +static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE(uint32_t val) +{ + return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK; +} +#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK 0xffff0000 +#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT 16 +static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN(uint32_t val) +{ + return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK; +} + +#define REG_A2XX_SQ_WRAPPING_0 0x00002183 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK 0x0000000f +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT 0 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_0(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK 0x000000f0 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT 4 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_1(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK 0x00000f00 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT 8 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_2(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK 0x0000f000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT 12 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_3(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK 0x000f0000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT 16 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_4(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK 0x00f00000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT 20 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_5(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK 0x0f000000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT 24 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_6(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK; +} +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK 0xf0000000 +#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT 28 +static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_7(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK; +} + +#define REG_A2XX_SQ_WRAPPING_1 0x00002184 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK 0x0000000f +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT 0 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_8(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK 0x000000f0 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT 4 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_9(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK 0x00000f00 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT 8 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_10(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK 0x0000f000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT 12 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_11(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK 0x000f0000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT 16 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_12(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK 0x00f00000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT 20 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_13(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK 0x0f000000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT 24 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_14(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK; +} +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK 0xf0000000 +#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT 28 +static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_15(uint32_t val) +{ + return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK; +} + +#define REG_A2XX_SQ_PS_PROGRAM 0x000021f6 +#define A2XX_SQ_PS_PROGRAM_BASE__MASK 0x00000fff +#define A2XX_SQ_PS_PROGRAM_BASE__SHIFT 0 +static inline uint32_t A2XX_SQ_PS_PROGRAM_BASE(uint32_t val) +{ + return ((val) << A2XX_SQ_PS_PROGRAM_BASE__SHIFT) & A2XX_SQ_PS_PROGRAM_BASE__MASK; +} +#define A2XX_SQ_PS_PROGRAM_SIZE__MASK 0x00fff000 +#define A2XX_SQ_PS_PROGRAM_SIZE__SHIFT 12 +static inline uint32_t A2XX_SQ_PS_PROGRAM_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_PS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_PS_PROGRAM_SIZE__MASK; +} + +#define REG_A2XX_SQ_VS_PROGRAM 0x000021f7 +#define A2XX_SQ_VS_PROGRAM_BASE__MASK 0x00000fff +#define A2XX_SQ_VS_PROGRAM_BASE__SHIFT 0 +static inline uint32_t A2XX_SQ_VS_PROGRAM_BASE(uint32_t val) +{ + return ((val) << A2XX_SQ_VS_PROGRAM_BASE__SHIFT) & A2XX_SQ_VS_PROGRAM_BASE__MASK; +} +#define A2XX_SQ_VS_PROGRAM_SIZE__MASK 0x00fff000 +#define A2XX_SQ_VS_PROGRAM_SIZE__SHIFT 12 +static inline uint32_t A2XX_SQ_VS_PROGRAM_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_VS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_VS_PROGRAM_SIZE__MASK; +} + +#define REG_A2XX_VGT_EVENT_INITIATOR 0x000021f9 + +#define REG_A2XX_VGT_DRAW_INITIATOR 0x000021fc +#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK 0x0000003f +#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT 0 +static inline uint32_t A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK; +} +#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK 0x000000c0 +#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT 6 +static inline uint32_t A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK; +} +#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK 0x00000600 +#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT 9 +static inline uint32_t A2XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK; +} +#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK 0x00000800 +#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT 11 +static inline uint32_t A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val) +{ + return ((val) << A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK; +} +#define A2XX_VGT_DRAW_INITIATOR_NOT_EOP 0x00001000 +#define A2XX_VGT_DRAW_INITIATOR_SMALL_INDEX 0x00002000 +#define A2XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x00004000 +#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK 0xff000000 +#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT 24 +static inline uint32_t A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val) +{ + return ((val) << A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK; +} + +#define REG_A2XX_VGT_IMMED_DATA 0x000021fd + +#define REG_A2XX_RB_DEPTHCONTROL 0x00002200 +#define A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE 0x00000001 +#define A2XX_RB_DEPTHCONTROL_Z_ENABLE 0x00000002 +#define A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE 0x00000004 +#define A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE 0x00000008 +#define A2XX_RB_DEPTHCONTROL_ZFUNC__MASK 0x00000070 +#define A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT 4 +static inline uint32_t A2XX_RB_DEPTHCONTROL_ZFUNC(enum adreno_compare_func val) +{ + return ((val) << A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_ZFUNC__MASK; +} +#define A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE 0x00000080 +#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK 0x00000700 +#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT 8 +static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC(enum adreno_compare_func val) +{ + return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK; +} +#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK 0x00003800 +#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT 11 +static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL(enum adreno_stencil_op val) +{ + return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK; +} +#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK 0x0001c000 +#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT 14 +static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS(enum adreno_stencil_op val) +{ + return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK; +} +#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK 0x000e0000 +#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT 17 +static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL(enum adreno_stencil_op val) +{ + return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK; +} +#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK 0x00700000 +#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT 20 +static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(enum adreno_compare_func val) +{ + return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK; +} +#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK 0x03800000 +#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT 23 +static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK; +} +#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK 0x1c000000 +#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT 26 +static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(enum adreno_stencil_op val) +{ + return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK; +} +#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK 0xe0000000 +#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT 29 +static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK; +} + +#define REG_A2XX_RB_BLEND_CONTROL 0x00002201 +#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK 0x0000001f +#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT 0 +static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(enum adreno_rb_blend_factor val) +{ + return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK; +} +#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK 0x000000e0 +#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT 5 +static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(enum a2xx_rb_blend_opcode val) +{ + return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK; +} +#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK 0x00001f00 +#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT 8 +static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(enum adreno_rb_blend_factor val) +{ + return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK; +} +#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK 0x001f0000 +#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT 16 +static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(enum adreno_rb_blend_factor val) +{ + return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK; +} +#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK 0x00e00000 +#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT 21 +static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(enum a2xx_rb_blend_opcode val) +{ + return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK; +} +#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK 0x1f000000 +#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT 24 +static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(enum adreno_rb_blend_factor val) +{ + return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK; +} +#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE_ENABLE 0x20000000 +#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE 0x40000000 + +#define REG_A2XX_RB_COLORCONTROL 0x00002202 +#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK 0x00000007 +#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT 0 +static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_FUNC(enum adreno_compare_func val) +{ + return ((val) << A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK; +} +#define A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE 0x00000008 +#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_ENABLE 0x00000010 +#define A2XX_RB_COLORCONTROL_BLEND_DISABLE 0x00000020 +#define A2XX_RB_COLORCONTROL_VOB_ENABLE 0x00000040 +#define A2XX_RB_COLORCONTROL_VS_EXPORTS_FOG 0x00000080 +#define A2XX_RB_COLORCONTROL_ROP_CODE__MASK 0x00000f00 +#define A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT 8 +static inline uint32_t A2XX_RB_COLORCONTROL_ROP_CODE(uint32_t val) +{ + return ((val) << A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT) & A2XX_RB_COLORCONTROL_ROP_CODE__MASK; +} +#define A2XX_RB_COLORCONTROL_DITHER_MODE__MASK 0x00003000 +#define A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT 12 +static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_MODE__MASK; +} +#define A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK 0x0000c000 +#define A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT 14 +static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_TYPE(enum a2xx_rb_dither_type val) +{ + return ((val) << A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK; +} +#define A2XX_RB_COLORCONTROL_PIXEL_FOG 0x00010000 +#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK 0x03000000 +#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT 24 +static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0(uint32_t val) +{ + return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK; +} +#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK 0x0c000000 +#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT 26 +static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1(uint32_t val) +{ + return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK; +} +#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK 0x30000000 +#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT 28 +static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2(uint32_t val) +{ + return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK; +} +#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK 0xc0000000 +#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT 30 +static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3(uint32_t val) +{ + return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK; +} + +#define REG_A2XX_VGT_CURRENT_BIN_ID_MAX 0x00002203 +#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK 0x00000007 +#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT 0 +static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN(uint32_t val) +{ + return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK; +} +#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK 0x00000038 +#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT 3 +static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_ROW(uint32_t val) +{ + return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK; +} +#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK 0x000001c0 +#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT 6 +static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK(uint32_t val) +{ + return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK; +} + +#define REG_A2XX_PA_CL_CLIP_CNTL 0x00002204 +#define A2XX_PA_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 +#define A2XX_PA_CL_CLIP_CNTL_BOUNDARY_EDGE_FLAG_ENA 0x00040000 +#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK 0x00080000 +#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT 19 +static inline uint32_t A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF(enum a2xx_dx_clip_space val) +{ + return ((val) << A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT) & A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK; +} +#define A2XX_PA_CL_CLIP_CNTL_DIS_CLIP_ERR_DETECT 0x00100000 +#define A2XX_PA_CL_CLIP_CNTL_VTX_KILL_OR 0x00200000 +#define A2XX_PA_CL_CLIP_CNTL_XY_NAN_RETAIN 0x00400000 +#define A2XX_PA_CL_CLIP_CNTL_Z_NAN_RETAIN 0x00800000 +#define A2XX_PA_CL_CLIP_CNTL_W_NAN_RETAIN 0x01000000 + +#define REG_A2XX_PA_SU_SC_MODE_CNTL 0x00002205 +#define A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT 0x00000001 +#define A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK 0x00000002 +#define A2XX_PA_SU_SC_MODE_CNTL_FACE 0x00000004 +#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK 0x00000018 +#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT 3 +static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(enum a2xx_pa_su_sc_polymode val) +{ + return ((val) << A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK; +} +#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK 0x000000e0 +#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT 5 +static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK; +} +#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK 0x00000700 +#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT 8 +static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK; +} +#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE 0x00000800 +#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE 0x00001000 +#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE 0x00002000 +#define A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE 0x00008000 +#define A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE 0x00010000 +#define A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE 0x00040000 +#define A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST 0x00080000 +#define A2XX_PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS 0x00100000 +#define A2XX_PA_SU_SC_MODE_CNTL_MULTI_PRIM_IB_ENA 0x00200000 +#define A2XX_PA_SU_SC_MODE_CNTL_QUAD_ORDER_ENABLE 0x00800000 +#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_ALL_TRI 0x02000000 +#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_FIRST_TRI_NEW_STATE 0x04000000 +#define A2XX_PA_SU_SC_MODE_CNTL_CLAMPED_FACENESS 0x10000000 +#define A2XX_PA_SU_SC_MODE_CNTL_ZERO_AREA_FACENESS 0x20000000 +#define A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE 0x40000000 +#define A2XX_PA_SU_SC_MODE_CNTL_FACE_WRITE_ENABLE 0x80000000 + +#define REG_A2XX_PA_CL_VTE_CNTL 0x00002206 +#define A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA 0x00000001 +#define A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA 0x00000002 +#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA 0x00000004 +#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA 0x00000008 +#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA 0x00000010 +#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA 0x00000020 +#define A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT 0x00000100 +#define A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT 0x00000200 +#define A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT 0x00000400 +#define A2XX_PA_CL_VTE_CNTL_PERFCOUNTER_REF 0x00000800 + +#define REG_A2XX_VGT_CURRENT_BIN_ID_MIN 0x00002207 +#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK 0x00000007 +#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT 0 +static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN(uint32_t val) +{ + return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK; +} +#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK 0x00000038 +#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT 3 +static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_ROW(uint32_t val) +{ + return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK; +} +#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK 0x000001c0 +#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT 6 +static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK(uint32_t val) +{ + return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK; +} + +#define REG_A2XX_RB_MODECONTROL 0x00002208 +#define A2XX_RB_MODECONTROL_EDRAM_MODE__MASK 0x00000007 +#define A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT 0 +static inline uint32_t A2XX_RB_MODECONTROL_EDRAM_MODE(enum a2xx_rb_edram_mode val) +{ + return ((val) << A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT) & A2XX_RB_MODECONTROL_EDRAM_MODE__MASK; +} + +#define REG_A2XX_A220_RB_LRZ_VSC_CONTROL 0x00002209 + +#define REG_A2XX_RB_SAMPLE_POS 0x0000220a + +#define REG_A2XX_CLEAR_COLOR 0x0000220b +#define A2XX_CLEAR_COLOR_RED__MASK 0x000000ff +#define A2XX_CLEAR_COLOR_RED__SHIFT 0 +static inline uint32_t A2XX_CLEAR_COLOR_RED(uint32_t val) +{ + return ((val) << A2XX_CLEAR_COLOR_RED__SHIFT) & A2XX_CLEAR_COLOR_RED__MASK; +} +#define A2XX_CLEAR_COLOR_GREEN__MASK 0x0000ff00 +#define A2XX_CLEAR_COLOR_GREEN__SHIFT 8 +static inline uint32_t A2XX_CLEAR_COLOR_GREEN(uint32_t val) +{ + return ((val) << A2XX_CLEAR_COLOR_GREEN__SHIFT) & A2XX_CLEAR_COLOR_GREEN__MASK; +} +#define A2XX_CLEAR_COLOR_BLUE__MASK 0x00ff0000 +#define A2XX_CLEAR_COLOR_BLUE__SHIFT 16 +static inline uint32_t A2XX_CLEAR_COLOR_BLUE(uint32_t val) +{ + return ((val) << A2XX_CLEAR_COLOR_BLUE__SHIFT) & A2XX_CLEAR_COLOR_BLUE__MASK; +} +#define A2XX_CLEAR_COLOR_ALPHA__MASK 0xff000000 +#define A2XX_CLEAR_COLOR_ALPHA__SHIFT 24 +static inline uint32_t A2XX_CLEAR_COLOR_ALPHA(uint32_t val) +{ + return ((val) << A2XX_CLEAR_COLOR_ALPHA__SHIFT) & A2XX_CLEAR_COLOR_ALPHA__MASK; +} + +#define REG_A2XX_A220_GRAS_CONTROL 0x00002210 + +#define REG_A2XX_PA_SU_POINT_SIZE 0x00002280 +#define A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK 0x0000ffff +#define A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT 0 +static inline uint32_t A2XX_PA_SU_POINT_SIZE_HEIGHT(float val) +{ + return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT) & A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK; +} +#define A2XX_PA_SU_POINT_SIZE_WIDTH__MASK 0xffff0000 +#define A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT 16 +static inline uint32_t A2XX_PA_SU_POINT_SIZE_WIDTH(float val) +{ + return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT) & A2XX_PA_SU_POINT_SIZE_WIDTH__MASK; +} + +#define REG_A2XX_PA_SU_POINT_MINMAX 0x00002281 +#define A2XX_PA_SU_POINT_MINMAX_MIN__MASK 0x0000ffff +#define A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT 0 +static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MIN(float val) +{ + return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MIN__MASK; +} +#define A2XX_PA_SU_POINT_MINMAX_MAX__MASK 0xffff0000 +#define A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT 16 +static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MAX(float val) +{ + return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MAX__MASK; +} + +#define REG_A2XX_PA_SU_LINE_CNTL 0x00002282 +#define A2XX_PA_SU_LINE_CNTL_WIDTH__MASK 0x0000ffff +#define A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT 0 +static inline uint32_t A2XX_PA_SU_LINE_CNTL_WIDTH(float val) +{ + return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT) & A2XX_PA_SU_LINE_CNTL_WIDTH__MASK; +} + +#define REG_A2XX_PA_SC_LINE_STIPPLE 0x00002283 +#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK 0x0000ffff +#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT 0 +static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(uint32_t val) +{ + return ((val) << A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK; +} +#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK 0x00ff0000 +#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT 16 +static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(uint32_t val) +{ + return ((val) << A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK; +} +#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK 0x10000000 +#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT 28 +static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER(enum a2xx_pa_sc_pattern_bit_order val) +{ + return ((val) << A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK; +} +#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK 0x60000000 +#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT 29 +static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum a2xx_pa_sc_auto_reset_cntl val) +{ + return ((val) << A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK; +} + +#define REG_A2XX_PA_SC_VIZ_QUERY 0x00002293 +#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ENA 0x00000001 +#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK 0x0000007e +#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT 1 +static inline uint32_t A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(uint32_t val) +{ + return ((val) << A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT) & A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK; +} +#define A2XX_PA_SC_VIZ_QUERY_KILL_PIX_POST_EARLY_Z 0x00000100 + +#define REG_A2XX_VGT_ENHANCE 0x00002294 + +#define REG_A2XX_PA_SC_LINE_CNTL 0x00002300 +#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK 0x0000ffff +#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT 0 +static inline uint32_t A2XX_PA_SC_LINE_CNTL_BRES_CNTL(uint32_t val) +{ + return ((val) << A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT) & A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK; +} +#define A2XX_PA_SC_LINE_CNTL_USE_BRES_CNTL 0x00000100 +#define A2XX_PA_SC_LINE_CNTL_EXPAND_LINE_WIDTH 0x00000200 +#define A2XX_PA_SC_LINE_CNTL_LAST_PIXEL 0x00000400 + +#define REG_A2XX_PA_SC_AA_CONFIG 0x00002301 +#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK 0x00000007 +#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT 0 +static inline uint32_t A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(uint32_t val) +{ + return ((val) << A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT) & A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK; +} +#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK 0x0001e000 +#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT 13 +static inline uint32_t A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST(uint32_t val) +{ + return ((val) << A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT) & A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK; +} + +#define REG_A2XX_PA_SU_VTX_CNTL 0x00002302 +#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK 0x00000001 +#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT 0 +static inline uint32_t A2XX_PA_SU_VTX_CNTL_PIX_CENTER(enum a2xx_pa_pixcenter val) +{ + return ((val) << A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT) & A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK; +} +#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK 0x00000006 +#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT 1 +static inline uint32_t A2XX_PA_SU_VTX_CNTL_ROUND_MODE(enum a2xx_pa_roundmode val) +{ + return ((val) << A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK; +} +#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK 0x00000380 +#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT 7 +static inline uint32_t A2XX_PA_SU_VTX_CNTL_QUANT_MODE(enum a2xx_pa_quantmode val) +{ + return ((val) << A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK; +} + +#define REG_A2XX_PA_CL_GB_VERT_CLIP_ADJ 0x00002303 +#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK 0xffffffff +#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT 0 +static inline uint32_t A2XX_PA_CL_GB_VERT_CLIP_ADJ(float val) +{ + return ((fui(val)) << A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK; +} + +#define REG_A2XX_PA_CL_GB_VERT_DISC_ADJ 0x00002304 +#define A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK 0xffffffff +#define A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT 0 +static inline uint32_t A2XX_PA_CL_GB_VERT_DISC_ADJ(float val) +{ + return ((fui(val)) << A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK; +} + +#define REG_A2XX_PA_CL_GB_HORZ_CLIP_ADJ 0x00002305 +#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK 0xffffffff +#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT 0 +static inline uint32_t A2XX_PA_CL_GB_HORZ_CLIP_ADJ(float val) +{ + return ((fui(val)) << A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK; +} + +#define REG_A2XX_PA_CL_GB_HORZ_DISC_ADJ 0x00002306 +#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK 0xffffffff +#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT 0 +static inline uint32_t A2XX_PA_CL_GB_HORZ_DISC_ADJ(float val) +{ + return ((fui(val)) << A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK; +} + +#define REG_A2XX_SQ_VS_CONST 0x00002307 +#define A2XX_SQ_VS_CONST_BASE__MASK 0x000001ff +#define A2XX_SQ_VS_CONST_BASE__SHIFT 0 +static inline uint32_t A2XX_SQ_VS_CONST_BASE(uint32_t val) +{ + return ((val) << A2XX_SQ_VS_CONST_BASE__SHIFT) & A2XX_SQ_VS_CONST_BASE__MASK; +} +#define A2XX_SQ_VS_CONST_SIZE__MASK 0x001ff000 +#define A2XX_SQ_VS_CONST_SIZE__SHIFT 12 +static inline uint32_t A2XX_SQ_VS_CONST_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_VS_CONST_SIZE__SHIFT) & A2XX_SQ_VS_CONST_SIZE__MASK; +} + +#define REG_A2XX_SQ_PS_CONST 0x00002308 +#define A2XX_SQ_PS_CONST_BASE__MASK 0x000001ff +#define A2XX_SQ_PS_CONST_BASE__SHIFT 0 +static inline uint32_t A2XX_SQ_PS_CONST_BASE(uint32_t val) +{ + return ((val) << A2XX_SQ_PS_CONST_BASE__SHIFT) & A2XX_SQ_PS_CONST_BASE__MASK; +} +#define A2XX_SQ_PS_CONST_SIZE__MASK 0x001ff000 +#define A2XX_SQ_PS_CONST_SIZE__SHIFT 12 +static inline uint32_t A2XX_SQ_PS_CONST_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_PS_CONST_SIZE__SHIFT) & A2XX_SQ_PS_CONST_SIZE__MASK; +} + +#define REG_A2XX_SQ_DEBUG_MISC_0 0x00002309 + +#define REG_A2XX_SQ_DEBUG_MISC_1 0x0000230a + +#define REG_A2XX_PA_SC_AA_MASK 0x00002312 + +#define REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL 0x00002316 +#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK 0x00000007 +#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT 0 +static inline uint32_t A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH(uint32_t val) +{ + return ((val) << A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT) & A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK; +} + +#define REG_A2XX_VGT_OUT_DEALLOC_CNTL 0x00002317 +#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK 0x00000003 +#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT 0 +static inline uint32_t A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST(uint32_t val) +{ + return ((val) << A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT) & A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK; +} + +#define REG_A2XX_RB_COPY_CONTROL 0x00002318 +#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK 0x00000007 +#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT 0 +static inline uint32_t A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT(enum a2xx_rb_copy_sample_select val) +{ + return ((val) << A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT) & A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK; +} +#define A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE 0x00000008 +#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK 0x000000f0 +#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT 4 +static inline uint32_t A2XX_RB_COPY_CONTROL_CLEAR_MASK(uint32_t val) +{ + return ((val) << A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT) & A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK; +} + +#define REG_A2XX_RB_COPY_DEST_BASE 0x00002319 + +#define REG_A2XX_RB_COPY_DEST_PITCH 0x0000231a +#define A2XX_RB_COPY_DEST_PITCH__MASK 0xffffffff +#define A2XX_RB_COPY_DEST_PITCH__SHIFT 0 +static inline uint32_t A2XX_RB_COPY_DEST_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A2XX_RB_COPY_DEST_PITCH__SHIFT) & A2XX_RB_COPY_DEST_PITCH__MASK; +} + +#define REG_A2XX_RB_COPY_DEST_INFO 0x0000231b +#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK 0x00000007 +#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT 0 +static inline uint32_t A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN(enum adreno_rb_surface_endian val) +{ + return ((val) << A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT) & A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK; +} +#define A2XX_RB_COPY_DEST_INFO_LINEAR 0x00000008 +#define A2XX_RB_COPY_DEST_INFO_FORMAT__MASK 0x000000f0 +#define A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT 4 +static inline uint32_t A2XX_RB_COPY_DEST_INFO_FORMAT(enum a2xx_colorformatx val) +{ + return ((val) << A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A2XX_RB_COPY_DEST_INFO_FORMAT__MASK; +} +#define A2XX_RB_COPY_DEST_INFO_SWAP__MASK 0x00000300 +#define A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT 8 +static inline uint32_t A2XX_RB_COPY_DEST_INFO_SWAP(uint32_t val) +{ + return ((val) << A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A2XX_RB_COPY_DEST_INFO_SWAP__MASK; +} +#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK 0x00000c00 +#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT 10 +static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK; +} +#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK 0x00003000 +#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT 12 +static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_TYPE(enum a2xx_rb_dither_type val) +{ + return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK; +} +#define A2XX_RB_COPY_DEST_INFO_WRITE_RED 0x00004000 +#define A2XX_RB_COPY_DEST_INFO_WRITE_GREEN 0x00008000 +#define A2XX_RB_COPY_DEST_INFO_WRITE_BLUE 0x00010000 +#define A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA 0x00020000 + +#define REG_A2XX_RB_COPY_DEST_OFFSET 0x0000231c +#define A2XX_RB_COPY_DEST_OFFSET_X__MASK 0x00001fff +#define A2XX_RB_COPY_DEST_OFFSET_X__SHIFT 0 +static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_X(uint32_t val) +{ + return ((val) << A2XX_RB_COPY_DEST_OFFSET_X__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_X__MASK; +} +#define A2XX_RB_COPY_DEST_OFFSET_Y__MASK 0x03ffe000 +#define A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT 13 +static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_Y(uint32_t val) +{ + return ((val) << A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_Y__MASK; +} + +#define REG_A2XX_RB_DEPTH_CLEAR 0x0000231d + +#define REG_A2XX_RB_SAMPLE_COUNT_CTL 0x00002324 + +#define REG_A2XX_RB_COLOR_DEST_MASK 0x00002326 + +#define REG_A2XX_A225_GRAS_UCP0X 0x00002340 + +#define REG_A2XX_A225_GRAS_UCP5W 0x00002357 + +#define REG_A2XX_A225_GRAS_UCP_ENABLED 0x00002360 + +#define REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE 0x00002380 + +#define REG_A2XX_PA_SU_POLY_OFFSET_BACK_OFFSET 0x00002383 + +#define REG_A2XX_SQ_CONSTANT_0 0x00004000 + +#define REG_A2XX_SQ_FETCH_0 0x00004800 + +#define REG_A2XX_SQ_CF_BOOLEANS 0x00004900 + +#define REG_A2XX_SQ_CF_LOOP 0x00004908 + +#define REG_A2XX_COHER_SIZE_PM4 0x00000a29 + +#define REG_A2XX_COHER_BASE_PM4 0x00000a2a + +#define REG_A2XX_COHER_STATUS_PM4 0x00000a2b + +#define REG_A2XX_PA_SU_PERFCOUNTER0_SELECT 0x00000c88 + +#define REG_A2XX_PA_SU_PERFCOUNTER1_SELECT 0x00000c89 + +#define REG_A2XX_PA_SU_PERFCOUNTER2_SELECT 0x00000c8a + +#define REG_A2XX_PA_SU_PERFCOUNTER3_SELECT 0x00000c8b + +#define REG_A2XX_PA_SU_PERFCOUNTER0_LOW 0x00000c8c + +#define REG_A2XX_PA_SU_PERFCOUNTER0_HI 0x00000c8d + +#define REG_A2XX_PA_SU_PERFCOUNTER1_LOW 0x00000c8e + +#define REG_A2XX_PA_SU_PERFCOUNTER1_HI 0x00000c8f + +#define REG_A2XX_PA_SU_PERFCOUNTER2_LOW 0x00000c90 + +#define REG_A2XX_PA_SU_PERFCOUNTER2_HI 0x00000c91 + +#define REG_A2XX_PA_SU_PERFCOUNTER3_LOW 0x00000c92 + +#define REG_A2XX_PA_SU_PERFCOUNTER3_HI 0x00000c93 + +#define REG_A2XX_PA_SC_PERFCOUNTER0_SELECT 0x00000c98 + +#define REG_A2XX_PA_SC_PERFCOUNTER0_LOW 0x00000c99 + +#define REG_A2XX_PA_SC_PERFCOUNTER0_HI 0x00000c9a + +#define REG_A2XX_VGT_PERFCOUNTER0_SELECT 0x00000c48 + +#define REG_A2XX_VGT_PERFCOUNTER1_SELECT 0x00000c49 + +#define REG_A2XX_VGT_PERFCOUNTER2_SELECT 0x00000c4a + +#define REG_A2XX_VGT_PERFCOUNTER3_SELECT 0x00000c4b + +#define REG_A2XX_VGT_PERFCOUNTER0_LOW 0x00000c4c + +#define REG_A2XX_VGT_PERFCOUNTER1_LOW 0x00000c4e + +#define REG_A2XX_VGT_PERFCOUNTER2_LOW 0x00000c50 + +#define REG_A2XX_VGT_PERFCOUNTER3_LOW 0x00000c52 + +#define REG_A2XX_VGT_PERFCOUNTER0_HI 0x00000c4d + +#define REG_A2XX_VGT_PERFCOUNTER1_HI 0x00000c4f + +#define REG_A2XX_VGT_PERFCOUNTER2_HI 0x00000c51 + +#define REG_A2XX_VGT_PERFCOUNTER3_HI 0x00000c53 + +#define REG_A2XX_TCR_PERFCOUNTER0_SELECT 0x00000e05 + +#define REG_A2XX_TCR_PERFCOUNTER1_SELECT 0x00000e08 + +#define REG_A2XX_TCR_PERFCOUNTER0_HI 0x00000e06 + +#define REG_A2XX_TCR_PERFCOUNTER1_HI 0x00000e09 + +#define REG_A2XX_TCR_PERFCOUNTER0_LOW 0x00000e07 + +#define REG_A2XX_TCR_PERFCOUNTER1_LOW 0x00000e0a + +#define REG_A2XX_TP0_PERFCOUNTER0_SELECT 0x00000e1f + +#define REG_A2XX_TP0_PERFCOUNTER0_HI 0x00000e20 + +#define REG_A2XX_TP0_PERFCOUNTER0_LOW 0x00000e21 + +#define REG_A2XX_TP0_PERFCOUNTER1_SELECT 0x00000e22 + +#define REG_A2XX_TP0_PERFCOUNTER1_HI 0x00000e23 + +#define REG_A2XX_TP0_PERFCOUNTER1_LOW 0x00000e24 + +#define REG_A2XX_TCM_PERFCOUNTER0_SELECT 0x00000e54 + +#define REG_A2XX_TCM_PERFCOUNTER1_SELECT 0x00000e57 + +#define REG_A2XX_TCM_PERFCOUNTER0_HI 0x00000e55 + +#define REG_A2XX_TCM_PERFCOUNTER1_HI 0x00000e58 + +#define REG_A2XX_TCM_PERFCOUNTER0_LOW 0x00000e56 + +#define REG_A2XX_TCM_PERFCOUNTER1_LOW 0x00000e59 + +#define REG_A2XX_TCF_PERFCOUNTER0_SELECT 0x00000e5a + +#define REG_A2XX_TCF_PERFCOUNTER1_SELECT 0x00000e5d + +#define REG_A2XX_TCF_PERFCOUNTER2_SELECT 0x00000e60 + +#define REG_A2XX_TCF_PERFCOUNTER3_SELECT 0x00000e63 + +#define REG_A2XX_TCF_PERFCOUNTER4_SELECT 0x00000e66 + +#define REG_A2XX_TCF_PERFCOUNTER5_SELECT 0x00000e69 + +#define REG_A2XX_TCF_PERFCOUNTER6_SELECT 0x00000e6c + +#define REG_A2XX_TCF_PERFCOUNTER7_SELECT 0x00000e6f + +#define REG_A2XX_TCF_PERFCOUNTER8_SELECT 0x00000e72 + +#define REG_A2XX_TCF_PERFCOUNTER9_SELECT 0x00000e75 + +#define REG_A2XX_TCF_PERFCOUNTER10_SELECT 0x00000e78 + +#define REG_A2XX_TCF_PERFCOUNTER11_SELECT 0x00000e7b + +#define REG_A2XX_TCF_PERFCOUNTER0_HI 0x00000e5b + +#define REG_A2XX_TCF_PERFCOUNTER1_HI 0x00000e5e + +#define REG_A2XX_TCF_PERFCOUNTER2_HI 0x00000e61 + +#define REG_A2XX_TCF_PERFCOUNTER3_HI 0x00000e64 + +#define REG_A2XX_TCF_PERFCOUNTER4_HI 0x00000e67 + +#define REG_A2XX_TCF_PERFCOUNTER5_HI 0x00000e6a + +#define REG_A2XX_TCF_PERFCOUNTER6_HI 0x00000e6d + +#define REG_A2XX_TCF_PERFCOUNTER7_HI 0x00000e70 + +#define REG_A2XX_TCF_PERFCOUNTER8_HI 0x00000e73 + +#define REG_A2XX_TCF_PERFCOUNTER9_HI 0x00000e76 + +#define REG_A2XX_TCF_PERFCOUNTER10_HI 0x00000e79 + +#define REG_A2XX_TCF_PERFCOUNTER11_HI 0x00000e7c + +#define REG_A2XX_TCF_PERFCOUNTER0_LOW 0x00000e5c + +#define REG_A2XX_TCF_PERFCOUNTER1_LOW 0x00000e5f + +#define REG_A2XX_TCF_PERFCOUNTER2_LOW 0x00000e62 + +#define REG_A2XX_TCF_PERFCOUNTER3_LOW 0x00000e65 + +#define REG_A2XX_TCF_PERFCOUNTER4_LOW 0x00000e68 + +#define REG_A2XX_TCF_PERFCOUNTER5_LOW 0x00000e6b + +#define REG_A2XX_TCF_PERFCOUNTER6_LOW 0x00000e6e + +#define REG_A2XX_TCF_PERFCOUNTER7_LOW 0x00000e71 + +#define REG_A2XX_TCF_PERFCOUNTER8_LOW 0x00000e74 + +#define REG_A2XX_TCF_PERFCOUNTER9_LOW 0x00000e77 + +#define REG_A2XX_TCF_PERFCOUNTER10_LOW 0x00000e7a + +#define REG_A2XX_TCF_PERFCOUNTER11_LOW 0x00000e7d + +#define REG_A2XX_SQ_PERFCOUNTER0_SELECT 0x00000dc8 + +#define REG_A2XX_SQ_PERFCOUNTER1_SELECT 0x00000dc9 + +#define REG_A2XX_SQ_PERFCOUNTER2_SELECT 0x00000dca + +#define REG_A2XX_SQ_PERFCOUNTER3_SELECT 0x00000dcb + +#define REG_A2XX_SQ_PERFCOUNTER0_LOW 0x00000dcc + +#define REG_A2XX_SQ_PERFCOUNTER0_HI 0x00000dcd + +#define REG_A2XX_SQ_PERFCOUNTER1_LOW 0x00000dce + +#define REG_A2XX_SQ_PERFCOUNTER1_HI 0x00000dcf + +#define REG_A2XX_SQ_PERFCOUNTER2_LOW 0x00000dd0 + +#define REG_A2XX_SQ_PERFCOUNTER2_HI 0x00000dd1 + +#define REG_A2XX_SQ_PERFCOUNTER3_LOW 0x00000dd2 + +#define REG_A2XX_SQ_PERFCOUNTER3_HI 0x00000dd3 + +#define REG_A2XX_SX_PERFCOUNTER0_SELECT 0x00000dd4 + +#define REG_A2XX_SX_PERFCOUNTER0_LOW 0x00000dd8 + +#define REG_A2XX_SX_PERFCOUNTER0_HI 0x00000dd9 + +#define REG_A2XX_MH_PERFCOUNTER0_SELECT 0x00000a46 + +#define REG_A2XX_MH_PERFCOUNTER1_SELECT 0x00000a4a + +#define REG_A2XX_MH_PERFCOUNTER0_CONFIG 0x00000a47 + +#define REG_A2XX_MH_PERFCOUNTER1_CONFIG 0x00000a4b + +#define REG_A2XX_MH_PERFCOUNTER0_LOW 0x00000a48 + +#define REG_A2XX_MH_PERFCOUNTER1_LOW 0x00000a4c + +#define REG_A2XX_MH_PERFCOUNTER0_HI 0x00000a49 + +#define REG_A2XX_MH_PERFCOUNTER1_HI 0x00000a4d + +#define REG_A2XX_RBBM_PERFCOUNTER1_SELECT 0x00000395 + +#define REG_A2XX_RBBM_PERFCOUNTER1_LO 0x00000397 + +#define REG_A2XX_RBBM_PERFCOUNTER1_HI 0x00000398 + +#define REG_A2XX_CP_PERFCOUNTER_SELECT 0x00000445 + +#define REG_A2XX_CP_PERFCOUNTER_LO 0x00000446 + +#define REG_A2XX_CP_PERFCOUNTER_HI 0x00000447 + +#define REG_A2XX_RB_PERFCOUNTER0_SELECT 0x00000f04 + +#define REG_A2XX_RB_PERFCOUNTER0_LOW 0x00000f08 + +#define REG_A2XX_RB_PERFCOUNTER0_HI 0x00000f09 + +#define REG_A2XX_SQ_TEX_0 0x00000000 +#define A2XX_SQ_TEX_0_TYPE__MASK 0x00000003 +#define A2XX_SQ_TEX_0_TYPE__SHIFT 0 +static inline uint32_t A2XX_SQ_TEX_0_TYPE(enum sq_tex_type val) +{ + return ((val) << A2XX_SQ_TEX_0_TYPE__SHIFT) & A2XX_SQ_TEX_0_TYPE__MASK; +} +#define A2XX_SQ_TEX_0_SIGN_X__MASK 0x0000000c +#define A2XX_SQ_TEX_0_SIGN_X__SHIFT 2 +static inline uint32_t A2XX_SQ_TEX_0_SIGN_X(enum sq_tex_sign val) +{ + return ((val) << A2XX_SQ_TEX_0_SIGN_X__SHIFT) & A2XX_SQ_TEX_0_SIGN_X__MASK; +} +#define A2XX_SQ_TEX_0_SIGN_Y__MASK 0x00000030 +#define A2XX_SQ_TEX_0_SIGN_Y__SHIFT 4 +static inline uint32_t A2XX_SQ_TEX_0_SIGN_Y(enum sq_tex_sign val) +{ + return ((val) << A2XX_SQ_TEX_0_SIGN_Y__SHIFT) & A2XX_SQ_TEX_0_SIGN_Y__MASK; +} +#define A2XX_SQ_TEX_0_SIGN_Z__MASK 0x000000c0 +#define A2XX_SQ_TEX_0_SIGN_Z__SHIFT 6 +static inline uint32_t A2XX_SQ_TEX_0_SIGN_Z(enum sq_tex_sign val) +{ + return ((val) << A2XX_SQ_TEX_0_SIGN_Z__SHIFT) & A2XX_SQ_TEX_0_SIGN_Z__MASK; +} +#define A2XX_SQ_TEX_0_SIGN_W__MASK 0x00000300 +#define A2XX_SQ_TEX_0_SIGN_W__SHIFT 8 +static inline uint32_t A2XX_SQ_TEX_0_SIGN_W(enum sq_tex_sign val) +{ + return ((val) << A2XX_SQ_TEX_0_SIGN_W__SHIFT) & A2XX_SQ_TEX_0_SIGN_W__MASK; +} +#define A2XX_SQ_TEX_0_CLAMP_X__MASK 0x00001c00 +#define A2XX_SQ_TEX_0_CLAMP_X__SHIFT 10 +static inline uint32_t A2XX_SQ_TEX_0_CLAMP_X(enum sq_tex_clamp val) +{ + return ((val) << A2XX_SQ_TEX_0_CLAMP_X__SHIFT) & A2XX_SQ_TEX_0_CLAMP_X__MASK; +} +#define A2XX_SQ_TEX_0_CLAMP_Y__MASK 0x0000e000 +#define A2XX_SQ_TEX_0_CLAMP_Y__SHIFT 13 +static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Y(enum sq_tex_clamp val) +{ + return ((val) << A2XX_SQ_TEX_0_CLAMP_Y__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Y__MASK; +} +#define A2XX_SQ_TEX_0_CLAMP_Z__MASK 0x00070000 +#define A2XX_SQ_TEX_0_CLAMP_Z__SHIFT 16 +static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Z(enum sq_tex_clamp val) +{ + return ((val) << A2XX_SQ_TEX_0_CLAMP_Z__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Z__MASK; +} +#define A2XX_SQ_TEX_0_PITCH__MASK 0x7fc00000 +#define A2XX_SQ_TEX_0_PITCH__SHIFT 22 +static inline uint32_t A2XX_SQ_TEX_0_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A2XX_SQ_TEX_0_PITCH__SHIFT) & A2XX_SQ_TEX_0_PITCH__MASK; +} +#define A2XX_SQ_TEX_0_TILED 0x00000002 + +#define REG_A2XX_SQ_TEX_1 0x00000001 +#define A2XX_SQ_TEX_1_FORMAT__MASK 0x0000003f +#define A2XX_SQ_TEX_1_FORMAT__SHIFT 0 +static inline uint32_t A2XX_SQ_TEX_1_FORMAT(enum a2xx_sq_surfaceformat val) +{ + return ((val) << A2XX_SQ_TEX_1_FORMAT__SHIFT) & A2XX_SQ_TEX_1_FORMAT__MASK; +} +#define A2XX_SQ_TEX_1_ENDIANNESS__MASK 0x000000c0 +#define A2XX_SQ_TEX_1_ENDIANNESS__SHIFT 6 +static inline uint32_t A2XX_SQ_TEX_1_ENDIANNESS(enum sq_tex_endian val) +{ + return ((val) << A2XX_SQ_TEX_1_ENDIANNESS__SHIFT) & A2XX_SQ_TEX_1_ENDIANNESS__MASK; +} +#define A2XX_SQ_TEX_1_REQUEST_SIZE__MASK 0x00000300 +#define A2XX_SQ_TEX_1_REQUEST_SIZE__SHIFT 8 +static inline uint32_t A2XX_SQ_TEX_1_REQUEST_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_1_REQUEST_SIZE__SHIFT) & A2XX_SQ_TEX_1_REQUEST_SIZE__MASK; +} +#define A2XX_SQ_TEX_1_STACKED 0x00000400 +#define A2XX_SQ_TEX_1_CLAMP_POLICY__MASK 0x00000800 +#define A2XX_SQ_TEX_1_CLAMP_POLICY__SHIFT 11 +static inline uint32_t A2XX_SQ_TEX_1_CLAMP_POLICY(enum sq_tex_clamp_policy val) +{ + return ((val) << A2XX_SQ_TEX_1_CLAMP_POLICY__SHIFT) & A2XX_SQ_TEX_1_CLAMP_POLICY__MASK; +} +#define A2XX_SQ_TEX_1_BASE_ADDRESS__MASK 0xfffff000 +#define A2XX_SQ_TEX_1_BASE_ADDRESS__SHIFT 12 +static inline uint32_t A2XX_SQ_TEX_1_BASE_ADDRESS(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A2XX_SQ_TEX_1_BASE_ADDRESS__SHIFT) & A2XX_SQ_TEX_1_BASE_ADDRESS__MASK; +} + +#define REG_A2XX_SQ_TEX_2 0x00000002 +#define A2XX_SQ_TEX_2_WIDTH__MASK 0x00001fff +#define A2XX_SQ_TEX_2_WIDTH__SHIFT 0 +static inline uint32_t A2XX_SQ_TEX_2_WIDTH(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_2_WIDTH__SHIFT) & A2XX_SQ_TEX_2_WIDTH__MASK; +} +#define A2XX_SQ_TEX_2_HEIGHT__MASK 0x03ffe000 +#define A2XX_SQ_TEX_2_HEIGHT__SHIFT 13 +static inline uint32_t A2XX_SQ_TEX_2_HEIGHT(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_2_HEIGHT__SHIFT) & A2XX_SQ_TEX_2_HEIGHT__MASK; +} +#define A2XX_SQ_TEX_2_DEPTH__MASK 0xfc000000 +#define A2XX_SQ_TEX_2_DEPTH__SHIFT 26 +static inline uint32_t A2XX_SQ_TEX_2_DEPTH(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_2_DEPTH__SHIFT) & A2XX_SQ_TEX_2_DEPTH__MASK; +} + +#define REG_A2XX_SQ_TEX_3 0x00000003 +#define A2XX_SQ_TEX_3_NUM_FORMAT__MASK 0x00000001 +#define A2XX_SQ_TEX_3_NUM_FORMAT__SHIFT 0 +static inline uint32_t A2XX_SQ_TEX_3_NUM_FORMAT(enum sq_tex_num_format val) +{ + return ((val) << A2XX_SQ_TEX_3_NUM_FORMAT__SHIFT) & A2XX_SQ_TEX_3_NUM_FORMAT__MASK; +} +#define A2XX_SQ_TEX_3_SWIZ_X__MASK 0x0000000e +#define A2XX_SQ_TEX_3_SWIZ_X__SHIFT 1 +static inline uint32_t A2XX_SQ_TEX_3_SWIZ_X(enum sq_tex_swiz val) +{ + return ((val) << A2XX_SQ_TEX_3_SWIZ_X__SHIFT) & A2XX_SQ_TEX_3_SWIZ_X__MASK; +} +#define A2XX_SQ_TEX_3_SWIZ_Y__MASK 0x00000070 +#define A2XX_SQ_TEX_3_SWIZ_Y__SHIFT 4 +static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Y(enum sq_tex_swiz val) +{ + return ((val) << A2XX_SQ_TEX_3_SWIZ_Y__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Y__MASK; +} +#define A2XX_SQ_TEX_3_SWIZ_Z__MASK 0x00000380 +#define A2XX_SQ_TEX_3_SWIZ_Z__SHIFT 7 +static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Z(enum sq_tex_swiz val) +{ + return ((val) << A2XX_SQ_TEX_3_SWIZ_Z__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Z__MASK; +} +#define A2XX_SQ_TEX_3_SWIZ_W__MASK 0x00001c00 +#define A2XX_SQ_TEX_3_SWIZ_W__SHIFT 10 +static inline uint32_t A2XX_SQ_TEX_3_SWIZ_W(enum sq_tex_swiz val) +{ + return ((val) << A2XX_SQ_TEX_3_SWIZ_W__SHIFT) & A2XX_SQ_TEX_3_SWIZ_W__MASK; +} +#define A2XX_SQ_TEX_3_EXP_ADJUST__MASK 0x0007e000 +#define A2XX_SQ_TEX_3_EXP_ADJUST__SHIFT 13 +static inline uint32_t A2XX_SQ_TEX_3_EXP_ADJUST(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_3_EXP_ADJUST__SHIFT) & A2XX_SQ_TEX_3_EXP_ADJUST__MASK; +} +#define A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK 0x00180000 +#define A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT 19 +static inline uint32_t A2XX_SQ_TEX_3_XY_MAG_FILTER(enum sq_tex_filter val) +{ + return ((val) << A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK; +} +#define A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK 0x00600000 +#define A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT 21 +static inline uint32_t A2XX_SQ_TEX_3_XY_MIN_FILTER(enum sq_tex_filter val) +{ + return ((val) << A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK; +} +#define A2XX_SQ_TEX_3_MIP_FILTER__MASK 0x01800000 +#define A2XX_SQ_TEX_3_MIP_FILTER__SHIFT 23 +static inline uint32_t A2XX_SQ_TEX_3_MIP_FILTER(enum sq_tex_filter val) +{ + return ((val) << A2XX_SQ_TEX_3_MIP_FILTER__SHIFT) & A2XX_SQ_TEX_3_MIP_FILTER__MASK; +} +#define A2XX_SQ_TEX_3_ANISO_FILTER__MASK 0x0e000000 +#define A2XX_SQ_TEX_3_ANISO_FILTER__SHIFT 25 +static inline uint32_t A2XX_SQ_TEX_3_ANISO_FILTER(enum sq_tex_aniso_filter val) +{ + return ((val) << A2XX_SQ_TEX_3_ANISO_FILTER__SHIFT) & A2XX_SQ_TEX_3_ANISO_FILTER__MASK; +} +#define A2XX_SQ_TEX_3_BORDER_SIZE__MASK 0x80000000 +#define A2XX_SQ_TEX_3_BORDER_SIZE__SHIFT 31 +static inline uint32_t A2XX_SQ_TEX_3_BORDER_SIZE(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_3_BORDER_SIZE__SHIFT) & A2XX_SQ_TEX_3_BORDER_SIZE__MASK; +} + +#define REG_A2XX_SQ_TEX_4 0x00000004 +#define A2XX_SQ_TEX_4_VOL_MAG_FILTER__MASK 0x00000001 +#define A2XX_SQ_TEX_4_VOL_MAG_FILTER__SHIFT 0 +static inline uint32_t A2XX_SQ_TEX_4_VOL_MAG_FILTER(enum sq_tex_filter val) +{ + return ((val) << A2XX_SQ_TEX_4_VOL_MAG_FILTER__SHIFT) & A2XX_SQ_TEX_4_VOL_MAG_FILTER__MASK; +} +#define A2XX_SQ_TEX_4_VOL_MIN_FILTER__MASK 0x00000002 +#define A2XX_SQ_TEX_4_VOL_MIN_FILTER__SHIFT 1 +static inline uint32_t A2XX_SQ_TEX_4_VOL_MIN_FILTER(enum sq_tex_filter val) +{ + return ((val) << A2XX_SQ_TEX_4_VOL_MIN_FILTER__SHIFT) & A2XX_SQ_TEX_4_VOL_MIN_FILTER__MASK; +} +#define A2XX_SQ_TEX_4_MIP_MIN_LEVEL__MASK 0x0000003c +#define A2XX_SQ_TEX_4_MIP_MIN_LEVEL__SHIFT 2 +static inline uint32_t A2XX_SQ_TEX_4_MIP_MIN_LEVEL(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_4_MIP_MIN_LEVEL__SHIFT) & A2XX_SQ_TEX_4_MIP_MIN_LEVEL__MASK; +} +#define A2XX_SQ_TEX_4_MIP_MAX_LEVEL__MASK 0x000003c0 +#define A2XX_SQ_TEX_4_MIP_MAX_LEVEL__SHIFT 6 +static inline uint32_t A2XX_SQ_TEX_4_MIP_MAX_LEVEL(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_4_MIP_MAX_LEVEL__SHIFT) & A2XX_SQ_TEX_4_MIP_MAX_LEVEL__MASK; +} +#define A2XX_SQ_TEX_4_MAX_ANISO_WALK 0x00000400 +#define A2XX_SQ_TEX_4_MIN_ANISO_WALK 0x00000800 +#define A2XX_SQ_TEX_4_LOD_BIAS__MASK 0x003ff000 +#define A2XX_SQ_TEX_4_LOD_BIAS__SHIFT 12 +static inline uint32_t A2XX_SQ_TEX_4_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 32.0))) << A2XX_SQ_TEX_4_LOD_BIAS__SHIFT) & A2XX_SQ_TEX_4_LOD_BIAS__MASK; +} +#define A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H__MASK 0x07c00000 +#define A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H__SHIFT 22 +static inline uint32_t A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H__SHIFT) & A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_H__MASK; +} +#define A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V__MASK 0xf8000000 +#define A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V__SHIFT 27 +static inline uint32_t A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V__SHIFT) & A2XX_SQ_TEX_4_GRAD_EXP_ADJUST_V__MASK; +} + +#define REG_A2XX_SQ_TEX_5 0x00000005 +#define A2XX_SQ_TEX_5_BORDER_COLOR__MASK 0x00000003 +#define A2XX_SQ_TEX_5_BORDER_COLOR__SHIFT 0 +static inline uint32_t A2XX_SQ_TEX_5_BORDER_COLOR(enum sq_tex_border_color val) +{ + return ((val) << A2XX_SQ_TEX_5_BORDER_COLOR__SHIFT) & A2XX_SQ_TEX_5_BORDER_COLOR__MASK; +} +#define A2XX_SQ_TEX_5_FORCE_BCW_MAX 0x00000004 +#define A2XX_SQ_TEX_5_TRI_CLAMP__MASK 0x00000018 +#define A2XX_SQ_TEX_5_TRI_CLAMP__SHIFT 3 +static inline uint32_t A2XX_SQ_TEX_5_TRI_CLAMP(uint32_t val) +{ + return ((val) << A2XX_SQ_TEX_5_TRI_CLAMP__SHIFT) & A2XX_SQ_TEX_5_TRI_CLAMP__MASK; +} +#define A2XX_SQ_TEX_5_ANISO_BIAS__MASK 0x000001e0 +#define A2XX_SQ_TEX_5_ANISO_BIAS__SHIFT 5 +static inline uint32_t A2XX_SQ_TEX_5_ANISO_BIAS(float val) +{ + return ((((int32_t)(val * 1.0))) << A2XX_SQ_TEX_5_ANISO_BIAS__SHIFT) & A2XX_SQ_TEX_5_ANISO_BIAS__MASK; +} +#define A2XX_SQ_TEX_5_DIMENSION__MASK 0x00000600 +#define A2XX_SQ_TEX_5_DIMENSION__SHIFT 9 +static inline uint32_t A2XX_SQ_TEX_5_DIMENSION(enum sq_tex_dimension val) +{ + return ((val) << A2XX_SQ_TEX_5_DIMENSION__SHIFT) & A2XX_SQ_TEX_5_DIMENSION__MASK; +} +#define A2XX_SQ_TEX_5_PACKED_MIPS 0x00000800 +#define A2XX_SQ_TEX_5_MIP_ADDRESS__MASK 0xfffff000 +#define A2XX_SQ_TEX_5_MIP_ADDRESS__SHIFT 12 +static inline uint32_t A2XX_SQ_TEX_5_MIP_ADDRESS(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A2XX_SQ_TEX_5_MIP_ADDRESS__SHIFT) & A2XX_SQ_TEX_5_MIP_ADDRESS__MASK; +} + + +#endif /* A2XX_XML */ diff -Nru mesa-18.3.3/src/freedreno/registers/a3xx.xml.h mesa-19.0.1/src/freedreno/registers/a3xx.xml.h --- mesa-18.3.3/src/freedreno/registers/a3xx.xml.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/registers/a3xx.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,3239 @@ +#ifndef A3XX_XML +#define A3XX_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 42463 bytes, from 2018-11-19 13:44:03) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14239 bytes, from 2018-12-05 15:25:53) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 43052 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 141895 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: +- Rob Clark (robclark) +- Ilia Mirkin (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum a3xx_tile_mode { + LINEAR = 0, + TILE_32X32 = 2, +}; + +enum a3xx_state_block_id { + HLSQ_BLOCK_ID_TP_TEX = 2, + HLSQ_BLOCK_ID_TP_MIPMAP = 3, + HLSQ_BLOCK_ID_SP_VS = 4, + HLSQ_BLOCK_ID_SP_FS = 6, +}; + +enum a3xx_cache_opcode { + INVALIDATE = 1, +}; + +enum a3xx_vtx_fmt { + VFMT_32_FLOAT = 0, + VFMT_32_32_FLOAT = 1, + VFMT_32_32_32_FLOAT = 2, + VFMT_32_32_32_32_FLOAT = 3, + VFMT_16_FLOAT = 4, + VFMT_16_16_FLOAT = 5, + VFMT_16_16_16_FLOAT = 6, + VFMT_16_16_16_16_FLOAT = 7, + VFMT_32_FIXED = 8, + VFMT_32_32_FIXED = 9, + VFMT_32_32_32_FIXED = 10, + VFMT_32_32_32_32_FIXED = 11, + VFMT_16_SINT = 16, + VFMT_16_16_SINT = 17, + VFMT_16_16_16_SINT = 18, + VFMT_16_16_16_16_SINT = 19, + VFMT_16_UINT = 20, + VFMT_16_16_UINT = 21, + VFMT_16_16_16_UINT = 22, + VFMT_16_16_16_16_UINT = 23, + VFMT_16_SNORM = 24, + VFMT_16_16_SNORM = 25, + VFMT_16_16_16_SNORM = 26, + VFMT_16_16_16_16_SNORM = 27, + VFMT_16_UNORM = 28, + VFMT_16_16_UNORM = 29, + VFMT_16_16_16_UNORM = 30, + VFMT_16_16_16_16_UNORM = 31, + VFMT_32_UINT = 32, + VFMT_32_32_UINT = 33, + VFMT_32_32_32_UINT = 34, + VFMT_32_32_32_32_UINT = 35, + VFMT_32_SINT = 36, + VFMT_32_32_SINT = 37, + VFMT_32_32_32_SINT = 38, + VFMT_32_32_32_32_SINT = 39, + VFMT_8_UINT = 40, + VFMT_8_8_UINT = 41, + VFMT_8_8_8_UINT = 42, + VFMT_8_8_8_8_UINT = 43, + VFMT_8_UNORM = 44, + VFMT_8_8_UNORM = 45, + VFMT_8_8_8_UNORM = 46, + VFMT_8_8_8_8_UNORM = 47, + VFMT_8_SINT = 48, + VFMT_8_8_SINT = 49, + VFMT_8_8_8_SINT = 50, + VFMT_8_8_8_8_SINT = 51, + VFMT_8_SNORM = 52, + VFMT_8_8_SNORM = 53, + VFMT_8_8_8_SNORM = 54, + VFMT_8_8_8_8_SNORM = 55, + VFMT_10_10_10_2_UINT = 56, + VFMT_10_10_10_2_UNORM = 57, + VFMT_10_10_10_2_SINT = 58, + VFMT_10_10_10_2_SNORM = 59, + VFMT_2_10_10_10_UINT = 60, + VFMT_2_10_10_10_UNORM = 61, + VFMT_2_10_10_10_SINT = 62, + VFMT_2_10_10_10_SNORM = 63, +}; + +enum a3xx_tex_fmt { + TFMT_5_6_5_UNORM = 4, + TFMT_5_5_5_1_UNORM = 5, + TFMT_4_4_4_4_UNORM = 7, + TFMT_Z16_UNORM = 9, + TFMT_X8Z24_UNORM = 10, + TFMT_Z32_FLOAT = 11, + TFMT_UV_64X32 = 16, + TFMT_VU_64X32 = 17, + TFMT_Y_64X32 = 18, + TFMT_NV12_64X32 = 19, + TFMT_UV_LINEAR = 20, + TFMT_VU_LINEAR = 21, + TFMT_Y_LINEAR = 22, + TFMT_NV12_LINEAR = 23, + TFMT_I420_Y = 24, + TFMT_I420_U = 26, + TFMT_I420_V = 27, + TFMT_ATC_RGB = 32, + TFMT_ATC_RGBA_EXPLICIT = 33, + TFMT_ETC1 = 34, + TFMT_ATC_RGBA_INTERPOLATED = 35, + TFMT_DXT1 = 36, + TFMT_DXT3 = 37, + TFMT_DXT5 = 38, + TFMT_2_10_10_10_UNORM = 40, + TFMT_10_10_10_2_UNORM = 41, + TFMT_9_9_9_E5_FLOAT = 42, + TFMT_11_11_10_FLOAT = 43, + TFMT_A8_UNORM = 44, + TFMT_L8_UNORM = 45, + TFMT_L8_A8_UNORM = 47, + TFMT_8_UNORM = 48, + TFMT_8_8_UNORM = 49, + TFMT_8_8_8_UNORM = 50, + TFMT_8_8_8_8_UNORM = 51, + TFMT_8_SNORM = 52, + TFMT_8_8_SNORM = 53, + TFMT_8_8_8_SNORM = 54, + TFMT_8_8_8_8_SNORM = 55, + TFMT_8_UINT = 56, + TFMT_8_8_UINT = 57, + TFMT_8_8_8_UINT = 58, + TFMT_8_8_8_8_UINT = 59, + TFMT_8_SINT = 60, + TFMT_8_8_SINT = 61, + TFMT_8_8_8_SINT = 62, + TFMT_8_8_8_8_SINT = 63, + TFMT_16_FLOAT = 64, + TFMT_16_16_FLOAT = 65, + TFMT_16_16_16_16_FLOAT = 67, + TFMT_16_UINT = 68, + TFMT_16_16_UINT = 69, + TFMT_16_16_16_16_UINT = 71, + TFMT_16_SINT = 72, + TFMT_16_16_SINT = 73, + TFMT_16_16_16_16_SINT = 75, + TFMT_16_UNORM = 76, + TFMT_16_16_UNORM = 77, + TFMT_16_16_16_16_UNORM = 79, + TFMT_16_SNORM = 80, + TFMT_16_16_SNORM = 81, + TFMT_16_16_16_16_SNORM = 83, + TFMT_32_FLOAT = 84, + TFMT_32_32_FLOAT = 85, + TFMT_32_32_32_32_FLOAT = 87, + TFMT_32_UINT = 88, + TFMT_32_32_UINT = 89, + TFMT_32_32_32_32_UINT = 91, + TFMT_32_SINT = 92, + TFMT_32_32_SINT = 93, + TFMT_32_32_32_32_SINT = 95, + TFMT_2_10_10_10_UINT = 96, + TFMT_10_10_10_2_UINT = 97, + TFMT_ETC2_RG11_SNORM = 112, + TFMT_ETC2_RG11_UNORM = 113, + TFMT_ETC2_R11_SNORM = 114, + TFMT_ETC2_R11_UNORM = 115, + TFMT_ETC2_RGBA8 = 116, + TFMT_ETC2_RGB8A1 = 117, + TFMT_ETC2_RGB8 = 118, +}; + +enum a3xx_tex_fetchsize { + TFETCH_DISABLE = 0, + TFETCH_1_BYTE = 1, + TFETCH_2_BYTE = 2, + TFETCH_4_BYTE = 3, + TFETCH_8_BYTE = 4, + TFETCH_16_BYTE = 5, +}; + +enum a3xx_color_fmt { + RB_R5G6B5_UNORM = 0, + RB_R5G5B5A1_UNORM = 1, + RB_R4G4B4A4_UNORM = 3, + RB_R8G8B8_UNORM = 4, + RB_R8G8B8A8_UNORM = 8, + RB_R8G8B8A8_SNORM = 9, + RB_R8G8B8A8_UINT = 10, + RB_R8G8B8A8_SINT = 11, + RB_R8G8_UNORM = 12, + RB_R8G8_SNORM = 13, + RB_R8_UINT = 14, + RB_R8_SINT = 15, + RB_R10G10B10A2_UNORM = 16, + RB_A2R10G10B10_UNORM = 17, + RB_R10G10B10A2_UINT = 18, + RB_A2R10G10B10_UINT = 19, + RB_A8_UNORM = 20, + RB_R8_UNORM = 21, + RB_R16_FLOAT = 24, + RB_R16G16_FLOAT = 25, + RB_R16G16B16A16_FLOAT = 27, + RB_R11G11B10_FLOAT = 28, + RB_R16_SNORM = 32, + RB_R16G16_SNORM = 33, + RB_R16G16B16A16_SNORM = 35, + RB_R16_UNORM = 36, + RB_R16G16_UNORM = 37, + RB_R16G16B16A16_UNORM = 39, + RB_R16_SINT = 40, + RB_R16G16_SINT = 41, + RB_R16G16B16A16_SINT = 43, + RB_R16_UINT = 44, + RB_R16G16_UINT = 45, + RB_R16G16B16A16_UINT = 47, + RB_R32_FLOAT = 48, + RB_R32G32_FLOAT = 49, + RB_R32G32B32A32_FLOAT = 51, + RB_R32_SINT = 52, + RB_R32G32_SINT = 53, + RB_R32G32B32A32_SINT = 55, + RB_R32_UINT = 56, + RB_R32G32_UINT = 57, + RB_R32G32B32A32_UINT = 59, +}; + +enum a3xx_cp_perfcounter_select { + CP_ALWAYS_COUNT = 0, + CP_AHB_PFPTRANS_WAIT = 3, + CP_AHB_NRTTRANS_WAIT = 6, + CP_CSF_NRT_READ_WAIT = 8, + CP_CSF_I1_FIFO_FULL = 9, + CP_CSF_I2_FIFO_FULL = 10, + CP_CSF_ST_FIFO_FULL = 11, + CP_RESERVED_12 = 12, + CP_CSF_RING_ROQ_FULL = 13, + CP_CSF_I1_ROQ_FULL = 14, + CP_CSF_I2_ROQ_FULL = 15, + CP_CSF_ST_ROQ_FULL = 16, + CP_RESERVED_17 = 17, + CP_MIU_TAG_MEM_FULL = 18, + CP_MIU_NRT_WRITE_STALLED = 22, + CP_MIU_NRT_READ_STALLED = 23, + CP_ME_REGS_RB_DONE_FIFO_FULL = 26, + CP_ME_REGS_VS_EVENT_FIFO_FULL = 27, + CP_ME_REGS_PS_EVENT_FIFO_FULL = 28, + CP_ME_REGS_CF_EVENT_FIFO_FULL = 29, + CP_ME_MICRO_RB_STARVED = 30, + CP_AHB_RBBM_DWORD_SENT = 40, + CP_ME_BUSY_CLOCKS = 41, + CP_ME_WAIT_CONTEXT_AVAIL = 42, + CP_PFP_TYPE0_PACKET = 43, + CP_PFP_TYPE3_PACKET = 44, + CP_CSF_RB_WPTR_NEQ_RPTR = 45, + CP_CSF_I1_SIZE_NEQ_ZERO = 46, + CP_CSF_I2_SIZE_NEQ_ZERO = 47, + CP_CSF_RBI1I2_FETCHING = 48, +}; + +enum a3xx_gras_tse_perfcounter_select { + GRAS_TSEPERF_INPUT_PRIM = 0, + GRAS_TSEPERF_INPUT_NULL_PRIM = 1, + GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2, + GRAS_TSEPERF_CLIPPED_PRIM = 3, + GRAS_TSEPERF_NEW_PRIM = 4, + GRAS_TSEPERF_ZERO_AREA_PRIM = 5, + GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6, + GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7, + GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8, + GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9, + GRAS_TSEPERF_PRE_CLIP_PRIM = 10, + GRAS_TSEPERF_POST_CLIP_PRIM = 11, + GRAS_TSEPERF_WORKING_CYCLES = 12, + GRAS_TSEPERF_PC_STARVE = 13, + GRAS_TSERASPERF_STALL = 14, +}; + +enum a3xx_gras_ras_perfcounter_select { + GRAS_RASPERF_16X16_TILES = 0, + GRAS_RASPERF_8X8_TILES = 1, + GRAS_RASPERF_4X4_TILES = 2, + GRAS_RASPERF_WORKING_CYCLES = 3, + GRAS_RASPERF_STALL_CYCLES_BY_RB = 4, + GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5, + GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6, +}; + +enum a3xx_hlsq_perfcounter_select { + HLSQ_PERF_SP_VS_CONSTANT = 0, + HLSQ_PERF_SP_VS_INSTRUCTIONS = 1, + HLSQ_PERF_SP_FS_CONSTANT = 2, + HLSQ_PERF_SP_FS_INSTRUCTIONS = 3, + HLSQ_PERF_TP_STATE = 4, + HLSQ_PERF_QUADS = 5, + HLSQ_PERF_PIXELS = 6, + HLSQ_PERF_VERTICES = 7, + HLSQ_PERF_FS8_THREADS = 8, + HLSQ_PERF_FS16_THREADS = 9, + HLSQ_PERF_FS32_THREADS = 10, + HLSQ_PERF_VS8_THREADS = 11, + HLSQ_PERF_VS16_THREADS = 12, + HLSQ_PERF_SP_VS_DATA_BYTES = 13, + HLSQ_PERF_SP_FS_DATA_BYTES = 14, + HLSQ_PERF_ACTIVE_CYCLES = 15, + HLSQ_PERF_STALL_CYCLES_SP_STATE = 16, + HLSQ_PERF_STALL_CYCLES_SP_VS = 17, + HLSQ_PERF_STALL_CYCLES_SP_FS = 18, + HLSQ_PERF_STALL_CYCLES_UCHE = 19, + HLSQ_PERF_RBBM_LOAD_CYCLES = 20, + HLSQ_PERF_DI_TO_VS_START_SP0 = 21, + HLSQ_PERF_DI_TO_FS_START_SP0 = 22, + HLSQ_PERF_VS_START_TO_DONE_SP0 = 23, + HLSQ_PERF_FS_START_TO_DONE_SP0 = 24, + HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25, + HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26, + HLSQ_PERF_UCHE_LATENCY_CYCLES = 27, + HLSQ_PERF_UCHE_LATENCY_COUNT = 28, +}; + +enum a3xx_pc_perfcounter_select { + PC_PCPERF_VISIBILITY_STREAMS = 0, + PC_PCPERF_TOTAL_INSTANCES = 1, + PC_PCPERF_PRIMITIVES_PC_VPC = 2, + PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3, + PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4, + PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5, + PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6, + PC_PCPERF_VERTICES_TO_VFD = 7, + PC_PCPERF_REUSED_VERTICES = 8, + PC_PCPERF_CYCLES_STALLED_BY_VFD = 9, + PC_PCPERF_CYCLES_STALLED_BY_TSE = 10, + PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11, + PC_PCPERF_CYCLES_IS_WORKING = 12, +}; + +enum a3xx_rb_perfcounter_select { + RB_RBPERF_ACTIVE_CYCLES_ANY = 0, + RB_RBPERF_ACTIVE_CYCLES_ALL = 1, + RB_RBPERF_STARVE_CYCLES_BY_SP = 2, + RB_RBPERF_STARVE_CYCLES_BY_RAS = 3, + RB_RBPERF_STARVE_CYCLES_BY_MARB = 4, + RB_RBPERF_STALL_CYCLES_BY_MARB = 5, + RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6, + RB_RBPERF_RB_MARB_DATA = 7, + RB_RBPERF_SP_RB_QUAD = 8, + RB_RBPERF_RAS_EARLY_Z_QUADS = 9, + RB_RBPERF_GMEM_CH0_READ = 10, + RB_RBPERF_GMEM_CH1_READ = 11, + RB_RBPERF_GMEM_CH0_WRITE = 12, + RB_RBPERF_GMEM_CH1_WRITE = 13, + RB_RBPERF_CP_CONTEXT_DONE = 14, + RB_RBPERF_CP_CACHE_FLUSH = 15, + RB_RBPERF_CP_ZPASS_DONE = 16, +}; + +enum a3xx_rbbm_perfcounter_select { + RBBM_ALAWYS_ON = 0, + RBBM_VBIF_BUSY = 1, + RBBM_TSE_BUSY = 2, + RBBM_RAS_BUSY = 3, + RBBM_PC_DCALL_BUSY = 4, + RBBM_PC_VSD_BUSY = 5, + RBBM_VFD_BUSY = 6, + RBBM_VPC_BUSY = 7, + RBBM_UCHE_BUSY = 8, + RBBM_VSC_BUSY = 9, + RBBM_HLSQ_BUSY = 10, + RBBM_ANY_RB_BUSY = 11, + RBBM_ANY_TEX_BUSY = 12, + RBBM_ANY_USP_BUSY = 13, + RBBM_ANY_MARB_BUSY = 14, + RBBM_ANY_ARB_BUSY = 15, + RBBM_AHB_STATUS_BUSY = 16, + RBBM_AHB_STATUS_STALLED = 17, + RBBM_AHB_STATUS_TXFR = 18, + RBBM_AHB_STATUS_TXFR_SPLIT = 19, + RBBM_AHB_STATUS_TXFR_ERROR = 20, + RBBM_AHB_STATUS_LONG_STALL = 21, + RBBM_RBBM_STATUS_MASKED = 22, +}; + +enum a3xx_sp_perfcounter_select { + SP_LM_LOAD_INSTRUCTIONS = 0, + SP_LM_STORE_INSTRUCTIONS = 1, + SP_LM_ATOMICS = 2, + SP_UCHE_LOAD_INSTRUCTIONS = 3, + SP_UCHE_STORE_INSTRUCTIONS = 4, + SP_UCHE_ATOMICS = 5, + SP_VS_TEX_INSTRUCTIONS = 6, + SP_VS_CFLOW_INSTRUCTIONS = 7, + SP_VS_EFU_INSTRUCTIONS = 8, + SP_VS_FULL_ALU_INSTRUCTIONS = 9, + SP_VS_HALF_ALU_INSTRUCTIONS = 10, + SP_FS_TEX_INSTRUCTIONS = 11, + SP_FS_CFLOW_INSTRUCTIONS = 12, + SP_FS_EFU_INSTRUCTIONS = 13, + SP_FS_FULL_ALU_INSTRUCTIONS = 14, + SP_FS_HALF_ALU_INSTRUCTIONS = 15, + SP_FS_BARY_INSTRUCTIONS = 16, + SP_VS_INSTRUCTIONS = 17, + SP_FS_INSTRUCTIONS = 18, + SP_ADDR_LOCK_COUNT = 19, + SP_UCHE_READ_TRANS = 20, + SP_UCHE_WRITE_TRANS = 21, + SP_EXPORT_VPC_TRANS = 22, + SP_EXPORT_RB_TRANS = 23, + SP_PIXELS_KILLED = 24, + SP_ICL1_REQUESTS = 25, + SP_ICL1_MISSES = 26, + SP_ICL0_REQUESTS = 27, + SP_ICL0_MISSES = 28, + SP_ALU_ACTIVE_CYCLES = 29, + SP_EFU_ACTIVE_CYCLES = 30, + SP_STALL_CYCLES_BY_VPC = 31, + SP_STALL_CYCLES_BY_TP = 32, + SP_STALL_CYCLES_BY_UCHE = 33, + SP_STALL_CYCLES_BY_RB = 34, + SP_ACTIVE_CYCLES_ANY = 35, + SP_ACTIVE_CYCLES_ALL = 36, +}; + +enum a3xx_tp_perfcounter_select { + TPL1_TPPERF_L1_REQUESTS = 0, + TPL1_TPPERF_TP0_L1_REQUESTS = 1, + TPL1_TPPERF_TP0_L1_MISSES = 2, + TPL1_TPPERF_TP1_L1_REQUESTS = 3, + TPL1_TPPERF_TP1_L1_MISSES = 4, + TPL1_TPPERF_TP2_L1_REQUESTS = 5, + TPL1_TPPERF_TP2_L1_MISSES = 6, + TPL1_TPPERF_TP3_L1_REQUESTS = 7, + TPL1_TPPERF_TP3_L1_MISSES = 8, + TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9, + TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10, + TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11, + TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12, + TPL1_TPPERF_BILINEAR_OPS = 13, + TPL1_TPPERF_QUADSQUADS_OFFSET = 14, + TPL1_TPPERF_QUADQUADS_SHADOW = 15, + TPL1_TPPERF_QUADS_ARRAY = 16, + TPL1_TPPERF_QUADS_PROJECTION = 17, + TPL1_TPPERF_QUADS_GRADIENT = 18, + TPL1_TPPERF_QUADS_1D2D = 19, + TPL1_TPPERF_QUADS_3DCUBE = 20, + TPL1_TPPERF_ZERO_LOD = 21, + TPL1_TPPERF_OUTPUT_TEXELS = 22, + TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23, + TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24, + TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25, + TPL1_TPPERF_LATENCY = 26, + TPL1_TPPERF_LATENCY_TRANS = 27, +}; + +enum a3xx_vfd_perfcounter_select { + VFD_PERF_UCHE_BYTE_FETCHED = 0, + VFD_PERF_UCHE_TRANS = 1, + VFD_PERF_VPC_BYPASS_COMPONENTS = 2, + VFD_PERF_FETCH_INSTRUCTIONS = 3, + VFD_PERF_DECODE_INSTRUCTIONS = 4, + VFD_PERF_ACTIVE_CYCLES = 5, + VFD_PERF_STALL_CYCLES_UCHE = 6, + VFD_PERF_STALL_CYCLES_HLSQ = 7, + VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8, + VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9, +}; + +enum a3xx_vpc_perfcounter_select { + VPC_PERF_SP_LM_PRIMITIVES = 0, + VPC_PERF_COMPONENTS_FROM_SP = 1, + VPC_PERF_SP_LM_COMPONENTS = 2, + VPC_PERF_ACTIVE_CYCLES = 3, + VPC_PERF_STALL_CYCLES_LM = 4, + VPC_PERF_STALL_CYCLES_RAS = 5, +}; + +enum a3xx_uche_perfcounter_select { + UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0, + UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1, + UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2, + UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3, + UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4, + UCHE_UCHEPERF_READ_REQUESTS_TP = 8, + UCHE_UCHEPERF_READ_REQUESTS_VFD = 9, + UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10, + UCHE_UCHEPERF_READ_REQUESTS_MARB = 11, + UCHE_UCHEPERF_READ_REQUESTS_SP = 12, + UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13, + UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14, + UCHE_UCHEPERF_TAG_CHECK_FAILS = 15, + UCHE_UCHEPERF_EVICTS = 16, + UCHE_UCHEPERF_FLUSHES = 17, + UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18, + UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19, + UCHE_UCHEPERF_ACTIVE_CYCLES = 20, +}; + +enum a3xx_intp_mode { + SMOOTH = 0, + FLAT = 1, + ZERO = 2, + ONE = 3, +}; + +enum a3xx_repl_mode { + S = 1, + T = 2, + ONE_T = 3, +}; + +enum a3xx_tex_filter { + A3XX_TEX_NEAREST = 0, + A3XX_TEX_LINEAR = 1, + A3XX_TEX_ANISO = 2, +}; + +enum a3xx_tex_clamp { + A3XX_TEX_REPEAT = 0, + A3XX_TEX_CLAMP_TO_EDGE = 1, + A3XX_TEX_MIRROR_REPEAT = 2, + A3XX_TEX_CLAMP_TO_BORDER = 3, + A3XX_TEX_MIRROR_CLAMP = 4, +}; + +enum a3xx_tex_aniso { + A3XX_TEX_ANISO_1 = 0, + A3XX_TEX_ANISO_2 = 1, + A3XX_TEX_ANISO_4 = 2, + A3XX_TEX_ANISO_8 = 3, + A3XX_TEX_ANISO_16 = 4, +}; + +enum a3xx_tex_swiz { + A3XX_TEX_X = 0, + A3XX_TEX_Y = 1, + A3XX_TEX_Z = 2, + A3XX_TEX_W = 3, + A3XX_TEX_ZERO = 4, + A3XX_TEX_ONE = 5, +}; + +enum a3xx_tex_type { + A3XX_TEX_1D = 0, + A3XX_TEX_2D = 1, + A3XX_TEX_CUBE = 2, + A3XX_TEX_3D = 3, +}; + +enum a3xx_tex_msaa { + A3XX_TPL1_MSAA1X = 0, + A3XX_TPL1_MSAA2X = 1, + A3XX_TPL1_MSAA4X = 2, + A3XX_TPL1_MSAA8X = 3, +}; + +#define A3XX_INT0_RBBM_GPU_IDLE 0x00000001 +#define A3XX_INT0_RBBM_AHB_ERROR 0x00000002 +#define A3XX_INT0_RBBM_REG_TIMEOUT 0x00000004 +#define A3XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 +#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 +#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00000020 +#define A3XX_INT0_VFD_ERROR 0x00000040 +#define A3XX_INT0_CP_SW_INT 0x00000080 +#define A3XX_INT0_CP_T0_PACKET_IN_IB 0x00000100 +#define A3XX_INT0_CP_OPCODE_ERROR 0x00000200 +#define A3XX_INT0_CP_RESERVED_BIT_ERROR 0x00000400 +#define A3XX_INT0_CP_HW_FAULT 0x00000800 +#define A3XX_INT0_CP_DMA 0x00001000 +#define A3XX_INT0_CP_IB2_INT 0x00002000 +#define A3XX_INT0_CP_IB1_INT 0x00004000 +#define A3XX_INT0_CP_RB_INT 0x00008000 +#define A3XX_INT0_CP_REG_PROTECT_FAULT 0x00010000 +#define A3XX_INT0_CP_RB_DONE_TS 0x00020000 +#define A3XX_INT0_CP_VS_DONE_TS 0x00040000 +#define A3XX_INT0_CP_PS_DONE_TS 0x00080000 +#define A3XX_INT0_CACHE_FLUSH_TS 0x00100000 +#define A3XX_INT0_CP_AHB_ERROR_HALT 0x00200000 +#define A3XX_INT0_MISC_HANG_DETECT 0x01000000 +#define A3XX_INT0_UCHE_OOB_ACCESS 0x02000000 +#define REG_A3XX_RBBM_HW_VERSION 0x00000000 + +#define REG_A3XX_RBBM_HW_RELEASE 0x00000001 + +#define REG_A3XX_RBBM_HW_CONFIGURATION 0x00000002 + +#define REG_A3XX_RBBM_CLOCK_CTL 0x00000010 + +#define REG_A3XX_RBBM_SP_HYST_CNT 0x00000012 + +#define REG_A3XX_RBBM_SW_RESET_CMD 0x00000018 + +#define REG_A3XX_RBBM_AHB_CTL0 0x00000020 + +#define REG_A3XX_RBBM_AHB_CTL1 0x00000021 + +#define REG_A3XX_RBBM_AHB_CMD 0x00000022 + +#define REG_A3XX_RBBM_AHB_ERROR_STATUS 0x00000027 + +#define REG_A3XX_RBBM_GPR0_CTL 0x0000002e + +#define REG_A3XX_RBBM_STATUS 0x00000030 +#define A3XX_RBBM_STATUS_HI_BUSY 0x00000001 +#define A3XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 +#define A3XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 +#define A3XX_RBBM_STATUS_CP_NRT_BUSY 0x00004000 +#define A3XX_RBBM_STATUS_VBIF_BUSY 0x00008000 +#define A3XX_RBBM_STATUS_TSE_BUSY 0x00010000 +#define A3XX_RBBM_STATUS_RAS_BUSY 0x00020000 +#define A3XX_RBBM_STATUS_RB_BUSY 0x00040000 +#define A3XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 +#define A3XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 +#define A3XX_RBBM_STATUS_VFD_BUSY 0x00200000 +#define A3XX_RBBM_STATUS_VPC_BUSY 0x00400000 +#define A3XX_RBBM_STATUS_UCHE_BUSY 0x00800000 +#define A3XX_RBBM_STATUS_SP_BUSY 0x01000000 +#define A3XX_RBBM_STATUS_TPL1_BUSY 0x02000000 +#define A3XX_RBBM_STATUS_MARB_BUSY 0x04000000 +#define A3XX_RBBM_STATUS_VSC_BUSY 0x08000000 +#define A3XX_RBBM_STATUS_ARB_BUSY 0x10000000 +#define A3XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 +#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC 0x40000000 +#define A3XX_RBBM_STATUS_GPU_BUSY 0x80000000 + +#define REG_A3XX_RBBM_NQWAIT_UNTIL 0x00000040 + +#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x00000033 + +#define REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x00000050 + +#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x00000051 + +#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x00000054 + +#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x00000057 + +#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x0000005a + +#define REG_A3XX_RBBM_INT_SET_CMD 0x00000060 + +#define REG_A3XX_RBBM_INT_CLEAR_CMD 0x00000061 + +#define REG_A3XX_RBBM_INT_0_MASK 0x00000063 + +#define REG_A3XX_RBBM_INT_0_STATUS 0x00000064 + +#define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080 +#define A3XX_RBBM_PERFCTR_CTL_ENABLE 0x00000001 + +#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0 0x00000081 + +#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1 0x00000082 + +#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000084 + +#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000085 + +#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT 0x00000086 + +#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT 0x00000087 + +#define REG_A3XX_RBBM_GPU_BUSY_MASKED 0x00000088 + +#define REG_A3XX_RBBM_PERFCTR_CP_0_LO 0x00000090 + +#define REG_A3XX_RBBM_PERFCTR_CP_0_HI 0x00000091 + +#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO 0x00000092 + +#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI 0x00000093 + +#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO 0x00000094 + +#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI 0x00000095 + +#define REG_A3XX_RBBM_PERFCTR_PC_0_LO 0x00000096 + +#define REG_A3XX_RBBM_PERFCTR_PC_0_HI 0x00000097 + +#define REG_A3XX_RBBM_PERFCTR_PC_1_LO 0x00000098 + +#define REG_A3XX_RBBM_PERFCTR_PC_1_HI 0x00000099 + +#define REG_A3XX_RBBM_PERFCTR_PC_2_LO 0x0000009a + +#define REG_A3XX_RBBM_PERFCTR_PC_2_HI 0x0000009b + +#define REG_A3XX_RBBM_PERFCTR_PC_3_LO 0x0000009c + +#define REG_A3XX_RBBM_PERFCTR_PC_3_HI 0x0000009d + +#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO 0x0000009e + +#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI 0x0000009f + +#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO 0x000000a0 + +#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI 0x000000a1 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000a2 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000a3 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000a4 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000a5 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000a6 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000a7 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000a8 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000a9 + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000aa + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000ab + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000ac + +#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000ad + +#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO 0x000000ae + +#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI 0x000000af + +#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO 0x000000b0 + +#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI 0x000000b1 + +#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO 0x000000b2 + +#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI 0x000000b3 + +#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO 0x000000b4 + +#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI 0x000000b5 + +#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO 0x000000b6 + +#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI 0x000000b7 + +#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO 0x000000b8 + +#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI 0x000000b9 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO 0x000000ba + +#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI 0x000000bb + +#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO 0x000000bc + +#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI 0x000000bd + +#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO 0x000000be + +#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI 0x000000bf + +#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO 0x000000c0 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI 0x000000c1 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO 0x000000c2 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI 0x000000c3 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO 0x000000c4 + +#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI 0x000000c5 + +#define REG_A3XX_RBBM_PERFCTR_TP_0_LO 0x000000c6 + +#define REG_A3XX_RBBM_PERFCTR_TP_0_HI 0x000000c7 + +#define REG_A3XX_RBBM_PERFCTR_TP_1_LO 0x000000c8 + +#define REG_A3XX_RBBM_PERFCTR_TP_1_HI 0x000000c9 + +#define REG_A3XX_RBBM_PERFCTR_TP_2_LO 0x000000ca + +#define REG_A3XX_RBBM_PERFCTR_TP_2_HI 0x000000cb + +#define REG_A3XX_RBBM_PERFCTR_TP_3_LO 0x000000cc + +#define REG_A3XX_RBBM_PERFCTR_TP_3_HI 0x000000cd + +#define REG_A3XX_RBBM_PERFCTR_TP_4_LO 0x000000ce + +#define REG_A3XX_RBBM_PERFCTR_TP_4_HI 0x000000cf + +#define REG_A3XX_RBBM_PERFCTR_TP_5_LO 0x000000d0 + +#define REG_A3XX_RBBM_PERFCTR_TP_5_HI 0x000000d1 + +#define REG_A3XX_RBBM_PERFCTR_SP_0_LO 0x000000d2 + +#define REG_A3XX_RBBM_PERFCTR_SP_0_HI 0x000000d3 + +#define REG_A3XX_RBBM_PERFCTR_SP_1_LO 0x000000d4 + +#define REG_A3XX_RBBM_PERFCTR_SP_1_HI 0x000000d5 + +#define REG_A3XX_RBBM_PERFCTR_SP_2_LO 0x000000d6 + +#define REG_A3XX_RBBM_PERFCTR_SP_2_HI 0x000000d7 + +#define REG_A3XX_RBBM_PERFCTR_SP_3_LO 0x000000d8 + +#define REG_A3XX_RBBM_PERFCTR_SP_3_HI 0x000000d9 + +#define REG_A3XX_RBBM_PERFCTR_SP_4_LO 0x000000da + +#define REG_A3XX_RBBM_PERFCTR_SP_4_HI 0x000000db + +#define REG_A3XX_RBBM_PERFCTR_SP_5_LO 0x000000dc + +#define REG_A3XX_RBBM_PERFCTR_SP_5_HI 0x000000dd + +#define REG_A3XX_RBBM_PERFCTR_SP_6_LO 0x000000de + +#define REG_A3XX_RBBM_PERFCTR_SP_6_HI 0x000000df + +#define REG_A3XX_RBBM_PERFCTR_SP_7_LO 0x000000e0 + +#define REG_A3XX_RBBM_PERFCTR_SP_7_HI 0x000000e1 + +#define REG_A3XX_RBBM_PERFCTR_RB_0_LO 0x000000e2 + +#define REG_A3XX_RBBM_PERFCTR_RB_0_HI 0x000000e3 + +#define REG_A3XX_RBBM_PERFCTR_RB_1_LO 0x000000e4 + +#define REG_A3XX_RBBM_PERFCTR_RB_1_HI 0x000000e5 + +#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO 0x000000ea + +#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI 0x000000eb + +#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO 0x000000ec + +#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI 0x000000ed + +#define REG_A3XX_RBBM_RBBM_CTL 0x00000100 + +#define REG_A3XX_RBBM_DEBUG_BUS_CTL 0x00000111 + +#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x00000112 + +#define REG_A3XX_CP_PFP_UCODE_ADDR 0x000001c9 + +#define REG_A3XX_CP_PFP_UCODE_DATA 0x000001ca + +#define REG_A3XX_CP_ROQ_ADDR 0x000001cc + +#define REG_A3XX_CP_ROQ_DATA 0x000001cd + +#define REG_A3XX_CP_MERCIU_ADDR 0x000001d1 + +#define REG_A3XX_CP_MERCIU_DATA 0x000001d2 + +#define REG_A3XX_CP_MERCIU_DATA2 0x000001d3 + +#define REG_A3XX_CP_MEQ_ADDR 0x000001da + +#define REG_A3XX_CP_MEQ_DATA 0x000001db + +#define REG_A3XX_CP_WFI_PEND_CTR 0x000001f5 + +#define REG_A3XX_RBBM_PM_OVERRIDE2 0x0000039d + +#define REG_A3XX_CP_PERFCOUNTER_SELECT 0x00000445 + +#define REG_A3XX_CP_HW_FAULT 0x0000045c + +#define REG_A3XX_CP_PROTECT_CTRL 0x0000045e + +#define REG_A3XX_CP_PROTECT_STATUS 0x0000045f + +static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; } + +static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; } + +#define REG_A3XX_CP_AHB_FAULT 0x0000054d + +#define REG_A3XX_SQ_GPR_MANAGEMENT 0x00000d00 + +#define REG_A3XX_SQ_INST_STORE_MANAGMENT 0x00000d02 + +#define REG_A3XX_TP0_CHICKEN 0x00000e1e + +#define REG_A3XX_SP_GLOBAL_MEM_SIZE 0x00000e22 + +#define REG_A3XX_SP_GLOBAL_MEM_ADDR 0x00000e23 + +#define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040 +#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000 +#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 +#define A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 0x00020000 +#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 0x00080000 +#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 0x00100000 +#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 0x00200000 +#define A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 +#define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD 0x00800000 +#define A3XX_GRAS_CL_CLIP_CNTL_WCOORD 0x01000000 +#define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE 0x02000000 +#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK 0x1c000000 +#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT 26 +static inline uint32_t A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(uint32_t val) +{ + return ((val) << A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT) & A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK; +} + +#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ 0x00002044 +#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK 0x000003ff +#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val) +{ + return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK; +} +#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK 0x000ffc00 +#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT 10 +static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val) +{ + return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_XOFFSET 0x00002048 +#define A3XX_GRAS_CL_VPORT_XOFFSET__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_XOFFSET(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_XOFFSET__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_XSCALE 0x00002049 +#define A3XX_GRAS_CL_VPORT_XSCALE__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_XSCALE__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_XSCALE(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_XSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_XSCALE__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_YOFFSET 0x0000204a +#define A3XX_GRAS_CL_VPORT_YOFFSET__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_YOFFSET(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_YOFFSET__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_YSCALE 0x0000204b +#define A3XX_GRAS_CL_VPORT_YSCALE__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_YSCALE__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_YSCALE(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_YSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_YSCALE__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_ZOFFSET 0x0000204c +#define A3XX_GRAS_CL_VPORT_ZOFFSET__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_ZOFFSET(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_ZOFFSET__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_ZSCALE 0x0000204d +#define A3XX_GRAS_CL_VPORT_ZSCALE__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_ZSCALE__MASK; +} + +#define REG_A3XX_GRAS_SU_POINT_MINMAX 0x00002068 +#define A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff +#define A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 +static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MIN(float val) +{ + return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK; +} +#define A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 +#define A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 +static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val) +{ + return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK; +} + +#define REG_A3XX_GRAS_SU_POINT_SIZE 0x00002069 +#define A3XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff +#define A3XX_GRAS_SU_POINT_SIZE__SHIFT 0 +static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val) +{ + return ((((int32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK; +} + +#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000206c +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK 0x00ffffff +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT 0 +static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val) +{ + return ((((int32_t)(val * 1048576.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK; +} + +#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000206d +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 +static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) +{ + return ((((int32_t)(val * 64.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; +} + +#define REG_A3XX_GRAS_SU_MODE_CONTROL 0x00002070 +#define A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT 0x00000001 +#define A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK 0x00000002 +#define A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW 0x00000004 +#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK 0x000007f8 +#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT 3 +static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val) +{ + return ((((int32_t)(val * 4.0))) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK; +} +#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET 0x00000800 + +#define REG_A3XX_GRAS_SC_CONTROL 0x00002072 +#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK 0x000000f0 +#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT 4 +static inline uint32_t A3XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val) +{ + return ((val) << A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK; +} +#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK 0x00000f00 +#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT 8 +static inline uint32_t A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK; +} +#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK 0x0000f000 +#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT 12 +static inline uint32_t A3XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK; +} + +#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x00002074 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK 0x00007fff +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK; +} +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK; +} + +#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x00002075 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK 0x00007fff +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK; +} +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK; +} + +#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x00002079 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; +} +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; +} + +#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000207a +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; +} +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; +} + +#define REG_A3XX_RB_MODE_CONTROL 0x000020c0 +#define A3XX_RB_MODE_CONTROL_GMEM_BYPASS 0x00000080 +#define A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK 0x00000700 +#define A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT 8 +static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode val) +{ + return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK; +} +#define A3XX_RB_MODE_CONTROL_MRT__MASK 0x00003000 +#define A3XX_RB_MODE_CONTROL_MRT__SHIFT 12 +static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val) +{ + return ((val) << A3XX_RB_MODE_CONTROL_MRT__SHIFT) & A3XX_RB_MODE_CONTROL_MRT__MASK; +} +#define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE 0x00008000 +#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE 0x00010000 + +#define REG_A3XX_RB_RENDER_CONTROL 0x000020c1 +#define A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE 0x00000001 +#define A3XX_RB_RENDER_CONTROL_YUV_IN_ENABLE 0x00000002 +#define A3XX_RB_RENDER_CONTROL_COV_VALUE_INPUT_ENABLE 0x00000004 +#define A3XX_RB_RENDER_CONTROL_FACENESS 0x00000008 +#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK 0x00000ff0 +#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT 4 +static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK; +} +#define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE 0x00001000 +#define A3XX_RB_RENDER_CONTROL_ENABLE_GMEM 0x00002000 +#define A3XX_RB_RENDER_CONTROL_XCOORD 0x00004000 +#define A3XX_RB_RENDER_CONTROL_YCOORD 0x00008000 +#define A3XX_RB_RENDER_CONTROL_ZCOORD 0x00010000 +#define A3XX_RB_RENDER_CONTROL_WCOORD 0x00020000 +#define A3XX_RB_RENDER_CONTROL_I_CLAMP_ENABLE 0x00080000 +#define A3XX_RB_RENDER_CONTROL_COV_VALUE_OUTPUT_ENABLE 0x00100000 +#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST 0x00400000 +#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK 0x07000000 +#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT 24 +static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) +{ + return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK; +} +#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_COVERAGE 0x40000000 +#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_ONE 0x80000000 + +#define REG_A3XX_RB_MSAA_CONTROL 0x000020c2 +#define A3XX_RB_MSAA_CONTROL_DISABLE 0x00000400 +#define A3XX_RB_MSAA_CONTROL_SAMPLES__MASK 0x0000f000 +#define A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT 12 +static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLES__MASK; +} +#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK 0xffff0000 +#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT 16 +static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val) +{ + return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK; +} + +#define REG_A3XX_RB_ALPHA_REF 0x000020c3 +#define A3XX_RB_ALPHA_REF_UINT__MASK 0x0000ff00 +#define A3XX_RB_ALPHA_REF_UINT__SHIFT 8 +static inline uint32_t A3XX_RB_ALPHA_REF_UINT(uint32_t val) +{ + return ((val) << A3XX_RB_ALPHA_REF_UINT__SHIFT) & A3XX_RB_ALPHA_REF_UINT__MASK; +} +#define A3XX_RB_ALPHA_REF_FLOAT__MASK 0xffff0000 +#define A3XX_RB_ALPHA_REF_FLOAT__SHIFT 16 +static inline uint32_t A3XX_RB_ALPHA_REF_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A3XX_RB_ALPHA_REF_FLOAT__SHIFT) & A3XX_RB_ALPHA_REF_FLOAT__MASK; +} + +static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; } + +static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; } +#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 +#define A3XX_RB_MRT_CONTROL_BLEND 0x00000010 +#define A3XX_RB_MRT_CONTROL_BLEND2 0x00000020 +#define A3XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00 +#define A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8 +static inline uint32_t A3XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A3XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} +#define A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK 0x00003000 +#define A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT 12 +static inline uint32_t A3XX_RB_MRT_CONTROL_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT) & A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK; +} +#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000 +#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24 +static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; +} + +static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; } +#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f +#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val) +{ + return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; +} +#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x000000c0 +#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 6 +static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a3xx_tile_mode val) +{ + return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; +} +#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; +} +#define A3XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00004000 +#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0xfffe0000 +#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 17 +static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; +} + +static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; } +#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK 0xfffffff0 +#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT 4 +static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK; +} + +static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; } +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE 0x20000000 + +#define REG_A3XX_RB_BLEND_RED 0x000020e4 +#define A3XX_RB_BLEND_RED_UINT__MASK 0x000000ff +#define A3XX_RB_BLEND_RED_UINT__SHIFT 0 +static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val) +{ + return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK; +} +#define A3XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 +#define A3XX_RB_BLEND_RED_FLOAT__SHIFT 16 +static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK; +} + +#define REG_A3XX_RB_BLEND_GREEN 0x000020e5 +#define A3XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff +#define A3XX_RB_BLEND_GREEN_UINT__SHIFT 0 +static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val) +{ + return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK; +} +#define A3XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 +#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 +static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK; +} + +#define REG_A3XX_RB_BLEND_BLUE 0x000020e6 +#define A3XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff +#define A3XX_RB_BLEND_BLUE_UINT__SHIFT 0 +static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val) +{ + return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK; +} +#define A3XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 +#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 +static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK; +} + +#define REG_A3XX_RB_BLEND_ALPHA 0x000020e7 +#define A3XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff +#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT 0 +static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val) +{ + return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK; +} +#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 +#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 +static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK; +} + +#define REG_A3XX_RB_CLEAR_COLOR_DW0 0x000020e8 + +#define REG_A3XX_RB_CLEAR_COLOR_DW1 0x000020e9 + +#define REG_A3XX_RB_CLEAR_COLOR_DW2 0x000020ea + +#define REG_A3XX_RB_CLEAR_COLOR_DW3 0x000020eb + +#define REG_A3XX_RB_COPY_CONTROL 0x000020ec +#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK 0x00000003 +#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT 0 +static inline uint32_t A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val) +{ + return ((val) << A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK; +} +#define A3XX_RB_COPY_CONTROL_DEPTHCLEAR 0x00000008 +#define A3XX_RB_COPY_CONTROL_MODE__MASK 0x00000070 +#define A3XX_RB_COPY_CONTROL_MODE__SHIFT 4 +static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val) +{ + return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK; +} +#define A3XX_RB_COPY_CONTROL_MSAA_SRGB_DOWNSAMPLE 0x00000080 +#define A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK 0x00000f00 +#define A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT 8 +static inline uint32_t A3XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val) +{ + return ((val) << A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK; +} +#define A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE 0x00001000 +#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xffffc000 +#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 14 +static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) +{ + assert(!(val & 0x3fff)); + return ((val >> 14) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK; +} + +#define REG_A3XX_RB_COPY_DEST_BASE 0x000020ed +#define A3XX_RB_COPY_DEST_BASE_BASE__MASK 0xfffffff0 +#define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT 4 +static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK; +} + +#define REG_A3XX_RB_COPY_DEST_PITCH 0x000020ee +#define A3XX_RB_COPY_DEST_PITCH_PITCH__MASK 0xffffffff +#define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT 0 +static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK; +} + +#define REG_A3XX_RB_COPY_DEST_INFO 0x000020ef +#define A3XX_RB_COPY_DEST_INFO_TILE__MASK 0x00000003 +#define A3XX_RB_COPY_DEST_INFO_TILE__SHIFT 0 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_TILE(enum a3xx_tile_mode val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A3XX_RB_COPY_DEST_INFO_TILE__MASK; +} +#define A3XX_RB_COPY_DEST_INFO_FORMAT__MASK 0x000000fc +#define A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT 2 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_FORMAT(enum a3xx_color_fmt val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A3XX_RB_COPY_DEST_INFO_FORMAT__MASK; +} +#define A3XX_RB_COPY_DEST_INFO_SWAP__MASK 0x00000300 +#define A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT 8 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK; +} +#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK 0x00000c00 +#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT 10 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK; +} +#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK 0x0003c000 +#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT 14 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK; +} +#define A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK 0x001c0000 +#define A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT 18 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK; +} + +#define REG_A3XX_RB_DEPTH_CONTROL 0x00002100 +#define A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z 0x00000001 +#define A3XX_RB_DEPTH_CONTROL_Z_ENABLE 0x00000002 +#define A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE 0x00000004 +#define A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00000008 +#define A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK 0x00000070 +#define A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT 4 +static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val) +{ + return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK; +} +#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x00000080 +#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000 + +#define REG_A3XX_RB_DEPTH_CLEAR 0x00002101 + +#define REG_A3XX_RB_DEPTH_INFO 0x00002102 +#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK 0x00000003 +#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val) +{ + return ((val) << A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK; +} +#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK 0xfffff800 +#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 11 +static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; +} + +#define REG_A3XX_RB_DEPTH_PITCH 0x00002103 +#define A3XX_RB_DEPTH_PITCH__MASK 0xffffffff +#define A3XX_RB_DEPTH_PITCH__SHIFT 0 +static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val) +{ + assert(!(val & 0x7)); + return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK; +} + +#define REG_A3XX_RB_STENCIL_CONTROL 0x00002104 +#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 +#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 +#define A3XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 +#define A3XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 +#define A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 +#define A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 +#define A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 +#define A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 +#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 +#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 +#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 +#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; +} + +#define REG_A3XX_RB_STENCIL_CLEAR 0x00002105 + +#define REG_A3XX_RB_STENCIL_INFO 0x00002106 +#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK 0xfffff800 +#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT 11 +static inline uint32_t A3XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK; +} + +#define REG_A3XX_RB_STENCIL_PITCH 0x00002107 +#define A3XX_RB_STENCIL_PITCH__MASK 0xffffffff +#define A3XX_RB_STENCIL_PITCH__SHIFT 0 +static inline uint32_t A3XX_RB_STENCIL_PITCH(uint32_t val) +{ + assert(!(val & 0x7)); + return ((val >> 3) << A3XX_RB_STENCIL_PITCH__SHIFT) & A3XX_RB_STENCIL_PITCH__MASK; +} + +#define REG_A3XX_RB_STENCILREFMASK 0x00002108 +#define A3XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff +#define A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 +static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILREF__MASK; +} +#define A3XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 +#define A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 +static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILMASK__MASK; +} +#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 +#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; +} + +#define REG_A3XX_RB_STENCILREFMASK_BF 0x00002109 +#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff +#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 +static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; +} +#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 +#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 +static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; +} +#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 +#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; +} + +#define REG_A3XX_RB_LRZ_VSC_CONTROL 0x0000210c +#define A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE 0x00000002 + +#define REG_A3XX_RB_WINDOW_OFFSET 0x0000210e +#define A3XX_RB_WINDOW_OFFSET_X__MASK 0x0000ffff +#define A3XX_RB_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A3XX_RB_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A3XX_RB_WINDOW_OFFSET_X__SHIFT) & A3XX_RB_WINDOW_OFFSET_X__MASK; +} +#define A3XX_RB_WINDOW_OFFSET_Y__MASK 0xffff0000 +#define A3XX_RB_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A3XX_RB_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A3XX_RB_WINDOW_OFFSET_Y__SHIFT) & A3XX_RB_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A3XX_RB_SAMPLE_COUNT_CONTROL 0x00002110 +#define A3XX_RB_SAMPLE_COUNT_CONTROL_RESET 0x00000001 +#define A3XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 + +#define REG_A3XX_RB_SAMPLE_COUNT_ADDR 0x00002111 + +#define REG_A3XX_RB_Z_CLAMP_MIN 0x00002114 + +#define REG_A3XX_RB_Z_CLAMP_MAX 0x00002115 + +#define REG_A3XX_VGT_BIN_BASE 0x000021e1 + +#define REG_A3XX_VGT_BIN_SIZE 0x000021e2 + +#define REG_A3XX_PC_VSTREAM_CONTROL 0x000021e4 +#define A3XX_PC_VSTREAM_CONTROL_SIZE__MASK 0x003f0000 +#define A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT 16 +static inline uint32_t A3XX_PC_VSTREAM_CONTROL_SIZE(uint32_t val) +{ + return ((val) << A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT) & A3XX_PC_VSTREAM_CONTROL_SIZE__MASK; +} +#define A3XX_PC_VSTREAM_CONTROL_N__MASK 0x07c00000 +#define A3XX_PC_VSTREAM_CONTROL_N__SHIFT 22 +static inline uint32_t A3XX_PC_VSTREAM_CONTROL_N(uint32_t val) +{ + return ((val) << A3XX_PC_VSTREAM_CONTROL_N__SHIFT) & A3XX_PC_VSTREAM_CONTROL_N__MASK; +} + +#define REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x000021ea + +#define REG_A3XX_PC_PRIM_VTX_CNTL 0x000021ec +#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK 0x0000001f +#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK; +} +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK 0x000000e0 +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT 5 +static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK; +} +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK 0x00000700 +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT 8 +static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK; +} +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE 0x00001000 +#define A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART 0x00100000 +#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 +#define A3XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 + +#define REG_A3XX_PC_RESTART_INDEX 0x000021ed + +#define REG_A3XX_HLSQ_CONTROL_0_REG 0x00002200 +#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000030 +#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4 +static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; +} +#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE 0x00000040 +#define A3XX_HLSQ_CONTROL_0_REG_COMPUTEMODE 0x00000100 +#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200 +#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400 +#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK 0x00fff000 +#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT 12 +static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK; +} +#define A3XX_HLSQ_CONTROL_0_REG_FSONLYTEX 0x02000000 +#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000 +#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK 0x08000000 +#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT 27 +static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK; +} +#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE 0x10000000 +#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE 0x20000000 +#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE 0x40000000 +#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT 0x80000000 + +#define REG_A3XX_HLSQ_CONTROL_1_REG 0x00002201 +#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x000000c0 +#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT 6 +static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK; +} +#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE 0x00000100 +#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK 0x00ff0000 +#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT 16 +static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK; +} +#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK 0xff000000 +#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT 24 +static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK; +} + +#define REG_A3XX_HLSQ_CONTROL_2_REG 0x00002202 +#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK 0x000003fc +#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT 2 +static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK; +} +#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK 0x03fc0000 +#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT 18 +static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK; +} +#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000 +#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT 26 +static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK; +} + +#define REG_A3XX_HLSQ_CONTROL_3_REG 0x00002203 +#define A3XX_HLSQ_CONTROL_3_REG_REGID__MASK 0x000000ff +#define A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT 0 +static inline uint32_t A3XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A3XX_HLSQ_CONTROL_3_REG_REGID__MASK; +} + +#define REG_A3XX_HLSQ_VS_CONTROL_REG 0x00002204 +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000 +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12 +static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val) +{ + return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK; +} +#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A3XX_HLSQ_FS_CONTROL_REG 0x00002205 +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000 +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12 +static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val) +{ + return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK; +} +#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x00002206 +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT 0 +static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK; +} +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000 +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT 16 +static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK; +} + +#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x00002207 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT 0 +static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK; +} +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT 16 +static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK; +} + +#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG 0x0000220a +#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK 0x00000003 +#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT 0 +static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK; +} +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK 0x00000ffc +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT 2 +static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK; +} +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK 0x003ff000 +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT 12 +static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK; +} +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK 0xffc00000 +#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT 22 +static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK; +} + +static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK(uint32_t i0) { return 0x0000220b + 0x2*i0; } + +static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_SIZE(uint32_t i0) { return 0x0000220b + 0x2*i0; } + +static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_OFFSET(uint32_t i0) { return 0x0000220c + 0x2*i0; } + +#define REG_A3XX_HLSQ_CL_CONTROL_0_REG 0x00002211 + +#define REG_A3XX_HLSQ_CL_CONTROL_1_REG 0x00002212 + +#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG 0x00002214 + +static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP(uint32_t i0) { return 0x00002215 + 0x1*i0; } + +static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP_RATIO(uint32_t i0) { return 0x00002215 + 0x1*i0; } + +#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x00002216 + +#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x00002217 + +#define REG_A3XX_HLSQ_CL_WG_OFFSET_REG 0x0000221a + +#define REG_A3XX_VFD_CONTROL_0 0x00002240 +#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK 0x0003ffff +#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT 0 +static inline uint32_t A3XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK; +} +#define A3XX_VFD_CONTROL_0_PACKETSIZE__MASK 0x003c0000 +#define A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT 18 +static inline uint32_t A3XX_VFD_CONTROL_0_PACKETSIZE(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT) & A3XX_VFD_CONTROL_0_PACKETSIZE__MASK; +} +#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK 0x07c00000 +#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT 22 +static inline uint32_t A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK; +} +#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK 0xf8000000 +#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT 27 +static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK; +} + +#define REG_A3XX_VFD_CONTROL_1 0x00002241 +#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000000f +#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0 +static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK; +} +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK 0x000000f0 +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT 4 +static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK; +} +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK 0x00000f00 +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT 8 +static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK; +} +#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 +#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 +static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A3XX_VFD_CONTROL_1_REGID4VTX__MASK; +} +#define A3XX_VFD_CONTROL_1_REGID4INST__MASK 0xff000000 +#define A3XX_VFD_CONTROL_1_REGID4INST__SHIFT 24 +static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A3XX_VFD_CONTROL_1_REGID4INST__MASK; +} + +#define REG_A3XX_VFD_INDEX_MIN 0x00002242 + +#define REG_A3XX_VFD_INDEX_MAX 0x00002243 + +#define REG_A3XX_VFD_INSTANCEID_OFFSET 0x00002244 + +#define REG_A3XX_VFD_INDEX_OFFSET 0x00002245 + +#define REG_A3XX_VFD_INDEX_OFFSET 0x00002245 + +static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; } + +static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; } +#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f +#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0 +static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val) +{ + return ((val) << A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK; +} +#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK 0x0000ff80 +#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT 7 +static inline uint32_t A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val) +{ + return ((val) << A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK; +} +#define A3XX_VFD_FETCH_INSTR_0_INSTANCED 0x00010000 +#define A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT 0x00020000 +#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK 0x00fc0000 +#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT 18 +static inline uint32_t A3XX_VFD_FETCH_INSTR_0_INDEXCODE(uint32_t val) +{ + return ((val) << A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK; +} +#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK 0xff000000 +#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT 24 +static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val) +{ + return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK; +} + +static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; } + +static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; } + +static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; } +#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f +#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0 +static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val) +{ + return ((val) << A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK; +} +#define A3XX_VFD_DECODE_INSTR_CONSTFILL 0x00000010 +#define A3XX_VFD_DECODE_INSTR_FORMAT__MASK 0x00000fc0 +#define A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT 6 +static inline uint32_t A3XX_VFD_DECODE_INSTR_FORMAT(enum a3xx_vtx_fmt val) +{ + return ((val) << A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A3XX_VFD_DECODE_INSTR_FORMAT__MASK; +} +#define A3XX_VFD_DECODE_INSTR_REGID__MASK 0x000ff000 +#define A3XX_VFD_DECODE_INSTR_REGID__SHIFT 12 +static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val) +{ + return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK; +} +#define A3XX_VFD_DECODE_INSTR_INT 0x00100000 +#define A3XX_VFD_DECODE_INSTR_SWAP__MASK 0x00c00000 +#define A3XX_VFD_DECODE_INSTR_SWAP__SHIFT 22 +static inline uint32_t A3XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A3XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A3XX_VFD_DECODE_INSTR_SWAP__MASK; +} +#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK 0x1f000000 +#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT 24 +static inline uint32_t A3XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val) +{ + return ((val) << A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK; +} +#define A3XX_VFD_DECODE_INSTR_LASTCOMPVALID 0x20000000 +#define A3XX_VFD_DECODE_INSTR_SWITCHNEXT 0x40000000 + +#define REG_A3XX_VFD_VS_THREADING_THRESHOLD 0x0000227e +#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK 0x0000000f +#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT 0 +static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK; +} +#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK 0x0000ff00 +#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT 8 +static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(uint32_t val) +{ + return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK; +} + +#define REG_A3XX_VPC_ATTR 0x00002280 +#define A3XX_VPC_ATTR_TOTALATTR__MASK 0x000001ff +#define A3XX_VPC_ATTR_TOTALATTR__SHIFT 0 +static inline uint32_t A3XX_VPC_ATTR_TOTALATTR(uint32_t val) +{ + return ((val) << A3XX_VPC_ATTR_TOTALATTR__SHIFT) & A3XX_VPC_ATTR_TOTALATTR__MASK; +} +#define A3XX_VPC_ATTR_PSIZE 0x00000200 +#define A3XX_VPC_ATTR_THRDASSIGN__MASK 0x0ffff000 +#define A3XX_VPC_ATTR_THRDASSIGN__SHIFT 12 +static inline uint32_t A3XX_VPC_ATTR_THRDASSIGN(uint32_t val) +{ + return ((val) << A3XX_VPC_ATTR_THRDASSIGN__SHIFT) & A3XX_VPC_ATTR_THRDASSIGN__MASK; +} +#define A3XX_VPC_ATTR_LMSIZE__MASK 0xf0000000 +#define A3XX_VPC_ATTR_LMSIZE__SHIFT 28 +static inline uint32_t A3XX_VPC_ATTR_LMSIZE(uint32_t val) +{ + return ((val) << A3XX_VPC_ATTR_LMSIZE__SHIFT) & A3XX_VPC_ATTR_LMSIZE__MASK; +} + +#define REG_A3XX_VPC_PACK 0x00002281 +#define A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK 0x0000ff00 +#define A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT 8 +static inline uint32_t A3XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val) +{ + return ((val) << A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK; +} +#define A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK 0x00ff0000 +#define A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT 16 +static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val) +{ + return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK; +} + +static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; } + +static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; } +#define A3XX_VPC_VARYING_INTERP_MODE_C0__MASK 0x00000003 +#define A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT 0 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C0(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C0__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_C1__MASK 0x0000000c +#define A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT 2 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C1(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C1__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_C2__MASK 0x00000030 +#define A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT 4 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C2(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C2__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_C3__MASK 0x000000c0 +#define A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT 6 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C3(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C3__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_C4__MASK 0x00000300 +#define A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT 8 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C4(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C4__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_C5__MASK 0x00000c00 +#define A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT 10 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C5(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C5__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_C6__MASK 0x00003000 +#define A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT 12 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C6(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C6__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_C7__MASK 0x0000c000 +#define A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT 14 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C7(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C7__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_C8__MASK 0x00030000 +#define A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT 16 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C8(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C8__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_C9__MASK 0x000c0000 +#define A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT 18 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C9(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C9__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_CA__MASK 0x00300000 +#define A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT 20 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CA(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CA__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_CB__MASK 0x00c00000 +#define A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT 22 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CB(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CB__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_CC__MASK 0x03000000 +#define A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT 24 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CC(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CC__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_CD__MASK 0x0c000000 +#define A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT 26 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CD(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CD__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_CE__MASK 0x30000000 +#define A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT 28 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CE(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CE__MASK; +} +#define A3XX_VPC_VARYING_INTERP_MODE_CF__MASK 0xc0000000 +#define A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT 30 +static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CF(enum a3xx_intp_mode val) +{ + return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CF__MASK; +} + +static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; } + +static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; } +#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK 0x00000003 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT 0 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C0(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK 0x0000000c +#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT 2 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C1(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK 0x00000030 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT 4 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C2(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK 0x000000c0 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT 6 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C3(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK 0x00000300 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT 8 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C4(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK 0x00000c00 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT 10 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C5(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK 0x00003000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT 12 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C6(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK 0x0000c000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT 14 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C7(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK 0x00030000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT 16 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C8(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK 0x000c0000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT 18 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C9(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK 0x00300000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT 20 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CA(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK 0x00c00000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT 22 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CB(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK 0x03000000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT 24 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CC(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK 0x0c000000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT 26 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CD(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK 0x30000000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT 28 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CE(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK 0xc0000000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT 30 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CF(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK; +} + +#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0 0x0000228a + +#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x0000228b + +#define REG_A3XX_SP_SP_CTRL_REG 0x000022c0 +#define A3XX_SP_SP_CTRL_REG_RESOLVE 0x00010000 +#define A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK 0x00040000 +#define A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT 18 +static inline uint32_t A3XX_SP_SP_CTRL_REG_CONSTMODE(uint32_t val) +{ + return ((val) << A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK; +} +#define A3XX_SP_SP_CTRL_REG_BINNING 0x00080000 +#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK 0x00300000 +#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT 20 +static inline uint32_t A3XX_SP_SP_CTRL_REG_SLEEPMODE(uint32_t val) +{ + return ((val) << A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK; +} +#define A3XX_SP_SP_CTRL_REG_L0MODE__MASK 0x00c00000 +#define A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT 22 +static inline uint32_t A3XX_SP_SP_CTRL_REG_L0MODE(uint32_t val) +{ + return ((val) << A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT) & A3XX_SP_SP_CTRL_REG_L0MODE__MASK; +} + +#define REG_A3XX_SP_VS_CTRL_REG0 0x000022c4 +#define A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK 0x00000001 +#define A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT 0 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK 0x00000002 +#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT 1 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID 0x00000004 +#define A3XX_SP_VS_CTRL_REG0_ALUSCHMODE 0x00000008 +#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000 +#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000 +#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG0_LENGTH__MASK; +} + +#define REG_A3XX_SP_VS_CTRL_REG1 0x000022c5 +#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK 0x000003ff +#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT 0 +static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK; +} +#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK 0x000ffc00 +#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT 10 +static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK; +} +#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x7f000000 +#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 24 +static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK; +} + +#define REG_A3XX_SP_VS_PARAM_REG 0x000022c6 +#define A3XX_SP_VS_PARAM_REG_POSREGID__MASK 0x000000ff +#define A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT 0 +static inline uint32_t A3XX_SP_VS_PARAM_REG_POSREGID(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_POSREGID__MASK; +} +#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK 0x0000ff00 +#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT 8 +static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK; +} +#define A3XX_SP_VS_PARAM_REG_POS2DMODE 0x00010000 +#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0x01f00000 +#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT 20 +static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK; +} + +static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; } + +static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; } +#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff +#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 +static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK; +} +#define A3XX_SP_VS_OUT_REG_A_HALF 0x00000100 +#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00001e00 +#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 9 +static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK; +} +#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 +#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 +static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK; +} +#define A3XX_SP_VS_OUT_REG_B_HALF 0x01000000 +#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x1e000000 +#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 25 +static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK; +} + +static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; } + +static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; } +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x0000007f +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 +static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) +{ + return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; +} +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x00007f00 +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 +static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) +{ + return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; +} +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x007f0000 +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 +static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) +{ + return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; +} +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0x7f000000 +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 +static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) +{ + return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; +} + +#define REG_A3XX_SP_VS_OBJ_OFFSET_REG 0x000022d4 +#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff +#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0 +static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK; +} +#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 +#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 +static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; +} +#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 +#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 +static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; +} + +#define REG_A3XX_SP_VS_OBJ_START_REG 0x000022d5 + +#define REG_A3XX_SP_VS_PVT_MEM_PARAM_REG 0x000022d6 +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0 +static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK; +} +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00 +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8 +static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK; +} +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000 +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24 +static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK; +} + +#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG 0x000022d7 +#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f +#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0 +static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK; +} +#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0 +#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5 +static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK; +} + +#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG 0x000022d8 + +#define REG_A3XX_SP_VS_LENGTH_REG 0x000022df +#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK 0xffffffff +#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT 0 +static inline uint32_t A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK; +} + +#define REG_A3XX_SP_FS_CTRL_REG0 0x000022e0 +#define A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK 0x00000001 +#define A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT 0 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK 0x00000002 +#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT 1 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID 0x00000004 +#define A3XX_SP_FS_CTRL_REG0_ALUSCHMODE 0x00000008 +#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_FSBYPASSENABLE 0x00020000 +#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP 0x00040000 +#define A3XX_SP_FS_CTRL_REG0_OUTORDERED 0x00080000 +#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE 0x00200000 +#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00400000 +#define A3XX_SP_FS_CTRL_REG0_COMPUTEMODE 0x00800000 +#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK 0xff000000 +#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT 24 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG0_LENGTH__MASK; +} + +#define REG_A3XX_SP_FS_CTRL_REG1 0x000022e1 +#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK 0x000003ff +#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT 0 +static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK; +} +#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK 0x000ffc00 +#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT 10 +static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK; +} +#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x00f00000 +#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 20 +static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK; +} +#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x7f000000 +#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT 24 +static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT) & A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK; +} + +#define REG_A3XX_SP_FS_OBJ_OFFSET_REG 0x000022e2 +#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff +#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0 +static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK; +} +#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 +#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 +static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; +} +#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 +#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 +static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; +} + +#define REG_A3XX_SP_FS_OBJ_START_REG 0x000022e3 + +#define REG_A3XX_SP_FS_PVT_MEM_PARAM_REG 0x000022e4 +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0 +static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val) +{ + return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK; +} +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00 +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8 +static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK; +} +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000 +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24 +static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val) +{ + return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK; +} + +#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG 0x000022e5 +#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f +#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0 +static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val) +{ + return ((val) << A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK; +} +#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0 +#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5 +static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK; +} + +#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG 0x000022e6 + +#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x000022e8 + +#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x000022e9 + +#define REG_A3XX_SP_FS_OUTPUT_REG 0x000022ec +#define A3XX_SP_FS_OUTPUT_REG_MRT__MASK 0x00000003 +#define A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT 0 +static inline uint32_t A3XX_SP_FS_OUTPUT_REG_MRT(uint32_t val) +{ + return ((val) << A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A3XX_SP_FS_OUTPUT_REG_MRT__MASK; +} +#define A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE 0x00000080 +#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK 0x0000ff00 +#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT 8 +static inline uint32_t A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val) +{ + return ((val) << A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK; +} + +static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; } + +static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; } +#define A3XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff +#define A3XX_SP_FS_MRT_REG_REGID__SHIFT 0 +static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val) +{ + return ((val) << A3XX_SP_FS_MRT_REG_REGID__SHIFT) & A3XX_SP_FS_MRT_REG_REGID__MASK; +} +#define A3XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100 +#define A3XX_SP_FS_MRT_REG_SINT 0x00000400 +#define A3XX_SP_FS_MRT_REG_UINT 0x00000800 + +static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; } + +static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; } +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK 0x0000003f +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT 0 +static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val) +{ + return ((val) << A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT) & A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK; +} + +#define REG_A3XX_SP_FS_LENGTH_REG 0x000022ff +#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK 0xffffffff +#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT 0 +static inline uint32_t A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK; +} + +#define REG_A3XX_PA_SC_AA_CONFIG 0x00002301 + +#define REG_A3XX_TPL1_TP_VS_TEX_OFFSET 0x00002340 +#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK 0x000000ff +#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT 0 +static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK; +} +#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK 0x0000ff00 +#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT 8 +static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK; +} +#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK 0xffff0000 +#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT 16 +static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK; +} + +#define REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR 0x00002341 + +#define REG_A3XX_TPL1_TP_FS_TEX_OFFSET 0x00002342 +#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK 0x000000ff +#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT 0 +static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK; +} +#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK 0x0000ff00 +#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT 8 +static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK; +} +#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK 0xffff0000 +#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT 16 +static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK; +} + +#define REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x00002343 + +#define REG_A3XX_VBIF_CLKON 0x00003001 + +#define REG_A3XX_VBIF_FIXED_SORT_EN 0x0000300c + +#define REG_A3XX_VBIF_FIXED_SORT_SEL0 0x0000300d + +#define REG_A3XX_VBIF_FIXED_SORT_SEL1 0x0000300e + +#define REG_A3XX_VBIF_ABIT_SORT 0x0000301c + +#define REG_A3XX_VBIF_ABIT_SORT_CONF 0x0000301d + +#define REG_A3XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a + +#define REG_A3XX_VBIF_IN_RD_LIM_CONF0 0x0000302c + +#define REG_A3XX_VBIF_IN_RD_LIM_CONF1 0x0000302d + +#define REG_A3XX_VBIF_IN_WR_LIM_CONF0 0x00003030 + +#define REG_A3XX_VBIF_IN_WR_LIM_CONF1 0x00003031 + +#define REG_A3XX_VBIF_OUT_RD_LIM_CONF0 0x00003034 + +#define REG_A3XX_VBIF_OUT_WR_LIM_CONF0 0x00003035 + +#define REG_A3XX_VBIF_DDR_OUT_MAX_BURST 0x00003036 + +#define REG_A3XX_VBIF_ARB_CTL 0x0000303c + +#define REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 + +#define REG_A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0 0x00003058 + +#define REG_A3XX_VBIF_OUT_AXI_AOOO_EN 0x0000305e + +#define REG_A3XX_VBIF_OUT_AXI_AOOO 0x0000305f + +#define REG_A3XX_VBIF_PERF_CNT_EN 0x00003070 +#define A3XX_VBIF_PERF_CNT_EN_CNT0 0x00000001 +#define A3XX_VBIF_PERF_CNT_EN_CNT1 0x00000002 +#define A3XX_VBIF_PERF_CNT_EN_PWRCNT0 0x00000004 +#define A3XX_VBIF_PERF_CNT_EN_PWRCNT1 0x00000008 +#define A3XX_VBIF_PERF_CNT_EN_PWRCNT2 0x00000010 + +#define REG_A3XX_VBIF_PERF_CNT_CLR 0x00003071 +#define A3XX_VBIF_PERF_CNT_CLR_CNT0 0x00000001 +#define A3XX_VBIF_PERF_CNT_CLR_CNT1 0x00000002 +#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT0 0x00000004 +#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT1 0x00000008 +#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT2 0x00000010 + +#define REG_A3XX_VBIF_PERF_CNT_SEL 0x00003072 + +#define REG_A3XX_VBIF_PERF_CNT0_LO 0x00003073 + +#define REG_A3XX_VBIF_PERF_CNT0_HI 0x00003074 + +#define REG_A3XX_VBIF_PERF_CNT1_LO 0x00003075 + +#define REG_A3XX_VBIF_PERF_CNT1_HI 0x00003076 + +#define REG_A3XX_VBIF_PERF_PWR_CNT0_LO 0x00003077 + +#define REG_A3XX_VBIF_PERF_PWR_CNT0_HI 0x00003078 + +#define REG_A3XX_VBIF_PERF_PWR_CNT1_LO 0x00003079 + +#define REG_A3XX_VBIF_PERF_PWR_CNT1_HI 0x0000307a + +#define REG_A3XX_VBIF_PERF_PWR_CNT2_LO 0x0000307b + +#define REG_A3XX_VBIF_PERF_PWR_CNT2_HI 0x0000307c + +#define REG_A3XX_VSC_BIN_SIZE 0x00000c01 +#define A3XX_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f +#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK; +} +#define A3XX_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 +#define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT 5 +static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A3XX_VSC_SIZE_ADDRESS 0x00000c02 + +static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } + +static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } +#define A3XX_VSC_PIPE_CONFIG_X__MASK 0x000003ff +#define A3XX_VSC_PIPE_CONFIG_X__SHIFT 0 +static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val) +{ + return ((val) << A3XX_VSC_PIPE_CONFIG_X__SHIFT) & A3XX_VSC_PIPE_CONFIG_X__MASK; +} +#define A3XX_VSC_PIPE_CONFIG_Y__MASK 0x000ffc00 +#define A3XX_VSC_PIPE_CONFIG_Y__SHIFT 10 +static inline uint32_t A3XX_VSC_PIPE_CONFIG_Y(uint32_t val) +{ + return ((val) << A3XX_VSC_PIPE_CONFIG_Y__SHIFT) & A3XX_VSC_PIPE_CONFIG_Y__MASK; +} +#define A3XX_VSC_PIPE_CONFIG_W__MASK 0x00f00000 +#define A3XX_VSC_PIPE_CONFIG_W__SHIFT 20 +static inline uint32_t A3XX_VSC_PIPE_CONFIG_W(uint32_t val) +{ + return ((val) << A3XX_VSC_PIPE_CONFIG_W__SHIFT) & A3XX_VSC_PIPE_CONFIG_W__MASK; +} +#define A3XX_VSC_PIPE_CONFIG_H__MASK 0x0f000000 +#define A3XX_VSC_PIPE_CONFIG_H__SHIFT 24 +static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val) +{ + return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK; +} + +static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } + +static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } + +#define REG_A3XX_VSC_BIN_CONTROL 0x00000c3c +#define A3XX_VSC_BIN_CONTROL_BINNING_ENABLE 0x00000001 + +#define REG_A3XX_UNKNOWN_0C3D 0x00000c3d + +#define REG_A3XX_PC_PERFCOUNTER0_SELECT 0x00000c48 + +#define REG_A3XX_PC_PERFCOUNTER1_SELECT 0x00000c49 + +#define REG_A3XX_PC_PERFCOUNTER2_SELECT 0x00000c4a + +#define REG_A3XX_PC_PERFCOUNTER3_SELECT 0x00000c4b + +#define REG_A3XX_GRAS_TSE_DEBUG_ECO 0x00000c81 + +#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT 0x00000c88 + +#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT 0x00000c89 + +#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT 0x00000c8a + +#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT 0x00000c8b + +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } + +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } + +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; } + +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; } + +static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; } + +#define REG_A3XX_RB_GMEM_BASE_ADDR 0x00000cc0 + +#define REG_A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0x00000cc1 + +#define REG_A3XX_RB_PERFCOUNTER0_SELECT 0x00000cc6 + +#define REG_A3XX_RB_PERFCOUNTER1_SELECT 0x00000cc7 + +#define REG_A3XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0 +#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK 0x00003fff +#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT 0 +static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val) +{ + return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK; +} +#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK 0x0fffc000 +#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT 14 +static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val) +{ + return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK; +} + +#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT 0x00000e00 + +#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT 0x00000e01 + +#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT 0x00000e02 + +#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT 0x00000e03 + +#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT 0x00000e04 + +#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT 0x00000e05 + +#define REG_A3XX_UNKNOWN_0E43 0x00000e43 + +#define REG_A3XX_VFD_PERFCOUNTER0_SELECT 0x00000e44 + +#define REG_A3XX_VFD_PERFCOUNTER1_SELECT 0x00000e45 + +#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL 0x00000e61 + +#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ 0x00000e62 + +#define REG_A3XX_VPC_PERFCOUNTER0_SELECT 0x00000e64 + +#define REG_A3XX_VPC_PERFCOUNTER1_SELECT 0x00000e65 + +#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG 0x00000e82 + +#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT 0x00000e84 + +#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT 0x00000e85 + +#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT 0x00000e86 + +#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT 0x00000e87 + +#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT 0x00000e88 + +#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT 0x00000e89 + +#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG 0x00000ea0 +#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK 0x0fffffff +#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT 0 +static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(uint32_t val) +{ + return ((val) << A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK; +} + +#define REG_A3XX_UCHE_CACHE_INVALIDATE1_REG 0x00000ea1 +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK 0x0fffffff +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT 0 +static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(uint32_t val) +{ + return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK; +} +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK 0x30000000 +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT 28 +static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_opcode val) +{ + return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK; +} +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE 0x80000000 + +#define REG_A3XX_UNKNOWN_0EA6 0x00000ea6 + +#define REG_A3XX_SP_PERFCOUNTER0_SELECT 0x00000ec4 + +#define REG_A3XX_SP_PERFCOUNTER1_SELECT 0x00000ec5 + +#define REG_A3XX_SP_PERFCOUNTER2_SELECT 0x00000ec6 + +#define REG_A3XX_SP_PERFCOUNTER3_SELECT 0x00000ec7 + +#define REG_A3XX_SP_PERFCOUNTER4_SELECT 0x00000ec8 + +#define REG_A3XX_SP_PERFCOUNTER5_SELECT 0x00000ec9 + +#define REG_A3XX_SP_PERFCOUNTER6_SELECT 0x00000eca + +#define REG_A3XX_SP_PERFCOUNTER7_SELECT 0x00000ecb + +#define REG_A3XX_UNKNOWN_0EE0 0x00000ee0 + +#define REG_A3XX_UNKNOWN_0F03 0x00000f03 + +#define REG_A3XX_TP_PERFCOUNTER0_SELECT 0x00000f04 + +#define REG_A3XX_TP_PERFCOUNTER1_SELECT 0x00000f05 + +#define REG_A3XX_TP_PERFCOUNTER2_SELECT 0x00000f06 + +#define REG_A3XX_TP_PERFCOUNTER3_SELECT 0x00000f07 + +#define REG_A3XX_TP_PERFCOUNTER4_SELECT 0x00000f08 + +#define REG_A3XX_TP_PERFCOUNTER5_SELECT 0x00000f09 + +#define REG_A3XX_VGT_CL_INITIATOR 0x000021f0 + +#define REG_A3XX_VGT_EVENT_INITIATOR 0x000021f9 + +#define REG_A3XX_VGT_DRAW_INITIATOR 0x000021fc +#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK 0x0000003f +#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT 0 +static inline uint32_t A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK; +} +#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK 0x000000c0 +#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT 6 +static inline uint32_t A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK; +} +#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK 0x00000600 +#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT 9 +static inline uint32_t A3XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK; +} +#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK 0x00000800 +#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT 11 +static inline uint32_t A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val) +{ + return ((val) << A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK; +} +#define A3XX_VGT_DRAW_INITIATOR_NOT_EOP 0x00001000 +#define A3XX_VGT_DRAW_INITIATOR_SMALL_INDEX 0x00002000 +#define A3XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x00004000 +#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK 0xff000000 +#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT 24 +static inline uint32_t A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val) +{ + return ((val) << A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK; +} + +#define REG_A3XX_VGT_IMMED_DATA 0x000021fd + +#define REG_A3XX_TEX_SAMP_0 0x00000000 +#define A3XX_TEX_SAMP_0_CLAMPENABLE 0x00000001 +#define A3XX_TEX_SAMP_0_MIPFILTER_LINEAR 0x00000002 +#define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c +#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2 +static inline uint32_t A3XX_TEX_SAMP_0_XY_MAG(enum a3xx_tex_filter val) +{ + return ((val) << A3XX_TEX_SAMP_0_XY_MAG__SHIFT) & A3XX_TEX_SAMP_0_XY_MAG__MASK; +} +#define A3XX_TEX_SAMP_0_XY_MIN__MASK 0x00000030 +#define A3XX_TEX_SAMP_0_XY_MIN__SHIFT 4 +static inline uint32_t A3XX_TEX_SAMP_0_XY_MIN(enum a3xx_tex_filter val) +{ + return ((val) << A3XX_TEX_SAMP_0_XY_MIN__SHIFT) & A3XX_TEX_SAMP_0_XY_MIN__MASK; +} +#define A3XX_TEX_SAMP_0_WRAP_S__MASK 0x000001c0 +#define A3XX_TEX_SAMP_0_WRAP_S__SHIFT 6 +static inline uint32_t A3XX_TEX_SAMP_0_WRAP_S(enum a3xx_tex_clamp val) +{ + return ((val) << A3XX_TEX_SAMP_0_WRAP_S__SHIFT) & A3XX_TEX_SAMP_0_WRAP_S__MASK; +} +#define A3XX_TEX_SAMP_0_WRAP_T__MASK 0x00000e00 +#define A3XX_TEX_SAMP_0_WRAP_T__SHIFT 9 +static inline uint32_t A3XX_TEX_SAMP_0_WRAP_T(enum a3xx_tex_clamp val) +{ + return ((val) << A3XX_TEX_SAMP_0_WRAP_T__SHIFT) & A3XX_TEX_SAMP_0_WRAP_T__MASK; +} +#define A3XX_TEX_SAMP_0_WRAP_R__MASK 0x00007000 +#define A3XX_TEX_SAMP_0_WRAP_R__SHIFT 12 +static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val) +{ + return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK; +} +#define A3XX_TEX_SAMP_0_ANISO__MASK 0x00038000 +#define A3XX_TEX_SAMP_0_ANISO__SHIFT 15 +static inline uint32_t A3XX_TEX_SAMP_0_ANISO(enum a3xx_tex_aniso val) +{ + return ((val) << A3XX_TEX_SAMP_0_ANISO__SHIFT) & A3XX_TEX_SAMP_0_ANISO__MASK; +} +#define A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK 0x00700000 +#define A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT 20 +static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val) +{ + return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK; +} +#define A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF 0x01000000 +#define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000 + +#define REG_A3XX_TEX_SAMP_1 0x00000001 +#define A3XX_TEX_SAMP_1_LOD_BIAS__MASK 0x000007ff +#define A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT 0 +static inline uint32_t A3XX_TEX_SAMP_1_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT) & A3XX_TEX_SAMP_1_LOD_BIAS__MASK; +} +#define A3XX_TEX_SAMP_1_MAX_LOD__MASK 0x003ff000 +#define A3XX_TEX_SAMP_1_MAX_LOD__SHIFT 12 +static inline uint32_t A3XX_TEX_SAMP_1_MAX_LOD(float val) +{ + return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A3XX_TEX_SAMP_1_MAX_LOD__MASK; +} +#define A3XX_TEX_SAMP_1_MIN_LOD__MASK 0xffc00000 +#define A3XX_TEX_SAMP_1_MIN_LOD__SHIFT 22 +static inline uint32_t A3XX_TEX_SAMP_1_MIN_LOD(float val) +{ + return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A3XX_TEX_SAMP_1_MIN_LOD__MASK; +} + +#define REG_A3XX_TEX_CONST_0 0x00000000 +#define A3XX_TEX_CONST_0_TILED 0x00000001 +#define A3XX_TEX_CONST_0_SRGB 0x00000004 +#define A3XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 +#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT 4 +static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val) +{ + return ((val) << A3XX_TEX_CONST_0_SWIZ_X__SHIFT) & A3XX_TEX_CONST_0_SWIZ_X__MASK; +} +#define A3XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 +#define A3XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 +static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Y(enum a3xx_tex_swiz val) +{ + return ((val) << A3XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Y__MASK; +} +#define A3XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 +#define A3XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 +static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Z(enum a3xx_tex_swiz val) +{ + return ((val) << A3XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Z__MASK; +} +#define A3XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 +#define A3XX_TEX_CONST_0_SWIZ_W__SHIFT 13 +static inline uint32_t A3XX_TEX_CONST_0_SWIZ_W(enum a3xx_tex_swiz val) +{ + return ((val) << A3XX_TEX_CONST_0_SWIZ_W__SHIFT) & A3XX_TEX_CONST_0_SWIZ_W__MASK; +} +#define A3XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 +#define A3XX_TEX_CONST_0_MIPLVLS__SHIFT 16 +static inline uint32_t A3XX_TEX_CONST_0_MIPLVLS(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_0_MIPLVLS__SHIFT) & A3XX_TEX_CONST_0_MIPLVLS__MASK; +} +#define A3XX_TEX_CONST_0_MSAATEX__MASK 0x00300000 +#define A3XX_TEX_CONST_0_MSAATEX__SHIFT 20 +static inline uint32_t A3XX_TEX_CONST_0_MSAATEX(enum a3xx_tex_msaa val) +{ + return ((val) << A3XX_TEX_CONST_0_MSAATEX__SHIFT) & A3XX_TEX_CONST_0_MSAATEX__MASK; +} +#define A3XX_TEX_CONST_0_FMT__MASK 0x1fc00000 +#define A3XX_TEX_CONST_0_FMT__SHIFT 22 +static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val) +{ + return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK; +} +#define A3XX_TEX_CONST_0_NOCONVERT 0x20000000 +#define A3XX_TEX_CONST_0_TYPE__MASK 0xc0000000 +#define A3XX_TEX_CONST_0_TYPE__SHIFT 30 +static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val) +{ + return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK; +} + +#define REG_A3XX_TEX_CONST_1 0x00000001 +#define A3XX_TEX_CONST_1_HEIGHT__MASK 0x00003fff +#define A3XX_TEX_CONST_1_HEIGHT__SHIFT 0 +static inline uint32_t A3XX_TEX_CONST_1_HEIGHT(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_1_HEIGHT__SHIFT) & A3XX_TEX_CONST_1_HEIGHT__MASK; +} +#define A3XX_TEX_CONST_1_WIDTH__MASK 0x0fffc000 +#define A3XX_TEX_CONST_1_WIDTH__SHIFT 14 +static inline uint32_t A3XX_TEX_CONST_1_WIDTH(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_1_WIDTH__SHIFT) & A3XX_TEX_CONST_1_WIDTH__MASK; +} +#define A3XX_TEX_CONST_1_FETCHSIZE__MASK 0xf0000000 +#define A3XX_TEX_CONST_1_FETCHSIZE__SHIFT 28 +static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val) +{ + return ((val) << A3XX_TEX_CONST_1_FETCHSIZE__SHIFT) & A3XX_TEX_CONST_1_FETCHSIZE__MASK; +} + +#define REG_A3XX_TEX_CONST_2 0x00000002 +#define A3XX_TEX_CONST_2_INDX__MASK 0x000001ff +#define A3XX_TEX_CONST_2_INDX__SHIFT 0 +static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_2_INDX__SHIFT) & A3XX_TEX_CONST_2_INDX__MASK; +} +#define A3XX_TEX_CONST_2_PITCH__MASK 0x3ffff000 +#define A3XX_TEX_CONST_2_PITCH__SHIFT 12 +static inline uint32_t A3XX_TEX_CONST_2_PITCH(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_2_PITCH__SHIFT) & A3XX_TEX_CONST_2_PITCH__MASK; +} +#define A3XX_TEX_CONST_2_SWAP__MASK 0xc0000000 +#define A3XX_TEX_CONST_2_SWAP__SHIFT 30 +static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A3XX_TEX_CONST_2_SWAP__SHIFT) & A3XX_TEX_CONST_2_SWAP__MASK; +} + +#define REG_A3XX_TEX_CONST_3 0x00000003 +#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x0001ffff +#define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT 0 +static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ1__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ1__MASK; +} +#define A3XX_TEX_CONST_3_DEPTH__MASK 0x0ffe0000 +#define A3XX_TEX_CONST_3_DEPTH__SHIFT 17 +static inline uint32_t A3XX_TEX_CONST_3_DEPTH(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_3_DEPTH__SHIFT) & A3XX_TEX_CONST_3_DEPTH__MASK; +} +#define A3XX_TEX_CONST_3_LAYERSZ2__MASK 0xf0000000 +#define A3XX_TEX_CONST_3_LAYERSZ2__SHIFT 28 +static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ2(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ2__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ2__MASK; +} + + +#endif /* A3XX_XML */ diff -Nru mesa-18.3.3/src/freedreno/registers/a4xx.xml.h mesa-19.0.1/src/freedreno/registers/a4xx.xml.h --- mesa-18.3.3/src/freedreno/registers/a4xx.xml.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/registers/a4xx.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,4257 @@ +#ifndef A4XX_XML +#define A4XX_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 42463 bytes, from 2018-11-19 13:44:03) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14239 bytes, from 2018-12-05 15:25:53) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 43052 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 141895 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: +- Rob Clark (robclark) +- Ilia Mirkin (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum a4xx_color_fmt { + RB4_A8_UNORM = 1, + RB4_R8_UNORM = 2, + RB4_R8_SNORM = 3, + RB4_R8_UINT = 4, + RB4_R8_SINT = 5, + RB4_R4G4B4A4_UNORM = 8, + RB4_R5G5B5A1_UNORM = 10, + RB4_R5G6B5_UNORM = 14, + RB4_R8G8_UNORM = 15, + RB4_R8G8_SNORM = 16, + RB4_R8G8_UINT = 17, + RB4_R8G8_SINT = 18, + RB4_R16_UNORM = 19, + RB4_R16_SNORM = 20, + RB4_R16_FLOAT = 21, + RB4_R16_UINT = 22, + RB4_R16_SINT = 23, + RB4_R8G8B8_UNORM = 25, + RB4_R8G8B8A8_UNORM = 26, + RB4_R8G8B8A8_SNORM = 28, + RB4_R8G8B8A8_UINT = 29, + RB4_R8G8B8A8_SINT = 30, + RB4_R10G10B10A2_UNORM = 31, + RB4_R10G10B10A2_UINT = 34, + RB4_R11G11B10_FLOAT = 39, + RB4_R16G16_UNORM = 40, + RB4_R16G16_SNORM = 41, + RB4_R16G16_FLOAT = 42, + RB4_R16G16_UINT = 43, + RB4_R16G16_SINT = 44, + RB4_R32_FLOAT = 45, + RB4_R32_UINT = 46, + RB4_R32_SINT = 47, + RB4_R16G16B16A16_UNORM = 52, + RB4_R16G16B16A16_SNORM = 53, + RB4_R16G16B16A16_FLOAT = 54, + RB4_R16G16B16A16_UINT = 55, + RB4_R16G16B16A16_SINT = 56, + RB4_R32G32_FLOAT = 57, + RB4_R32G32_UINT = 58, + RB4_R32G32_SINT = 59, + RB4_R32G32B32A32_FLOAT = 60, + RB4_R32G32B32A32_UINT = 61, + RB4_R32G32B32A32_SINT = 62, +}; + +enum a4xx_tile_mode { + TILE4_LINEAR = 0, + TILE4_2 = 2, + TILE4_3 = 3, +}; + +enum a4xx_vtx_fmt { + VFMT4_32_FLOAT = 1, + VFMT4_32_32_FLOAT = 2, + VFMT4_32_32_32_FLOAT = 3, + VFMT4_32_32_32_32_FLOAT = 4, + VFMT4_16_FLOAT = 5, + VFMT4_16_16_FLOAT = 6, + VFMT4_16_16_16_FLOAT = 7, + VFMT4_16_16_16_16_FLOAT = 8, + VFMT4_32_FIXED = 9, + VFMT4_32_32_FIXED = 10, + VFMT4_32_32_32_FIXED = 11, + VFMT4_32_32_32_32_FIXED = 12, + VFMT4_11_11_10_FLOAT = 13, + VFMT4_16_SINT = 16, + VFMT4_16_16_SINT = 17, + VFMT4_16_16_16_SINT = 18, + VFMT4_16_16_16_16_SINT = 19, + VFMT4_16_UINT = 20, + VFMT4_16_16_UINT = 21, + VFMT4_16_16_16_UINT = 22, + VFMT4_16_16_16_16_UINT = 23, + VFMT4_16_SNORM = 24, + VFMT4_16_16_SNORM = 25, + VFMT4_16_16_16_SNORM = 26, + VFMT4_16_16_16_16_SNORM = 27, + VFMT4_16_UNORM = 28, + VFMT4_16_16_UNORM = 29, + VFMT4_16_16_16_UNORM = 30, + VFMT4_16_16_16_16_UNORM = 31, + VFMT4_32_UINT = 32, + VFMT4_32_32_UINT = 33, + VFMT4_32_32_32_UINT = 34, + VFMT4_32_32_32_32_UINT = 35, + VFMT4_32_SINT = 36, + VFMT4_32_32_SINT = 37, + VFMT4_32_32_32_SINT = 38, + VFMT4_32_32_32_32_SINT = 39, + VFMT4_8_UINT = 40, + VFMT4_8_8_UINT = 41, + VFMT4_8_8_8_UINT = 42, + VFMT4_8_8_8_8_UINT = 43, + VFMT4_8_UNORM = 44, + VFMT4_8_8_UNORM = 45, + VFMT4_8_8_8_UNORM = 46, + VFMT4_8_8_8_8_UNORM = 47, + VFMT4_8_SINT = 48, + VFMT4_8_8_SINT = 49, + VFMT4_8_8_8_SINT = 50, + VFMT4_8_8_8_8_SINT = 51, + VFMT4_8_SNORM = 52, + VFMT4_8_8_SNORM = 53, + VFMT4_8_8_8_SNORM = 54, + VFMT4_8_8_8_8_SNORM = 55, + VFMT4_10_10_10_2_UINT = 56, + VFMT4_10_10_10_2_UNORM = 57, + VFMT4_10_10_10_2_SINT = 58, + VFMT4_10_10_10_2_SNORM = 59, + VFMT4_2_10_10_10_UINT = 60, + VFMT4_2_10_10_10_UNORM = 61, + VFMT4_2_10_10_10_SINT = 62, + VFMT4_2_10_10_10_SNORM = 63, +}; + +enum a4xx_tex_fmt { + TFMT4_A8_UNORM = 3, + TFMT4_8_UNORM = 4, + TFMT4_8_SNORM = 5, + TFMT4_8_UINT = 6, + TFMT4_8_SINT = 7, + TFMT4_4_4_4_4_UNORM = 8, + TFMT4_5_5_5_1_UNORM = 9, + TFMT4_5_6_5_UNORM = 11, + TFMT4_L8_A8_UNORM = 13, + TFMT4_8_8_UNORM = 14, + TFMT4_8_8_SNORM = 15, + TFMT4_8_8_UINT = 16, + TFMT4_8_8_SINT = 17, + TFMT4_16_UNORM = 18, + TFMT4_16_SNORM = 19, + TFMT4_16_FLOAT = 20, + TFMT4_16_UINT = 21, + TFMT4_16_SINT = 22, + TFMT4_8_8_8_8_UNORM = 28, + TFMT4_8_8_8_8_SNORM = 29, + TFMT4_8_8_8_8_UINT = 30, + TFMT4_8_8_8_8_SINT = 31, + TFMT4_9_9_9_E5_FLOAT = 32, + TFMT4_10_10_10_2_UNORM = 33, + TFMT4_10_10_10_2_UINT = 34, + TFMT4_11_11_10_FLOAT = 37, + TFMT4_16_16_UNORM = 38, + TFMT4_16_16_SNORM = 39, + TFMT4_16_16_FLOAT = 40, + TFMT4_16_16_UINT = 41, + TFMT4_16_16_SINT = 42, + TFMT4_32_FLOAT = 43, + TFMT4_32_UINT = 44, + TFMT4_32_SINT = 45, + TFMT4_16_16_16_16_UNORM = 51, + TFMT4_16_16_16_16_SNORM = 52, + TFMT4_16_16_16_16_FLOAT = 53, + TFMT4_16_16_16_16_UINT = 54, + TFMT4_16_16_16_16_SINT = 55, + TFMT4_32_32_FLOAT = 56, + TFMT4_32_32_UINT = 57, + TFMT4_32_32_SINT = 58, + TFMT4_32_32_32_FLOAT = 59, + TFMT4_32_32_32_UINT = 60, + TFMT4_32_32_32_SINT = 61, + TFMT4_32_32_32_32_FLOAT = 63, + TFMT4_32_32_32_32_UINT = 64, + TFMT4_32_32_32_32_SINT = 65, + TFMT4_X8Z24_UNORM = 71, + TFMT4_DXT1 = 86, + TFMT4_DXT3 = 87, + TFMT4_DXT5 = 88, + TFMT4_RGTC1_UNORM = 90, + TFMT4_RGTC1_SNORM = 91, + TFMT4_RGTC2_UNORM = 94, + TFMT4_RGTC2_SNORM = 95, + TFMT4_BPTC_UFLOAT = 97, + TFMT4_BPTC_FLOAT = 98, + TFMT4_BPTC = 99, + TFMT4_ATC_RGB = 100, + TFMT4_ATC_RGBA_EXPLICIT = 101, + TFMT4_ATC_RGBA_INTERPOLATED = 102, + TFMT4_ETC2_RG11_UNORM = 103, + TFMT4_ETC2_RG11_SNORM = 104, + TFMT4_ETC2_R11_UNORM = 105, + TFMT4_ETC2_R11_SNORM = 106, + TFMT4_ETC1 = 107, + TFMT4_ETC2_RGB8 = 108, + TFMT4_ETC2_RGBA8 = 109, + TFMT4_ETC2_RGB8A1 = 110, + TFMT4_ASTC_4x4 = 111, + TFMT4_ASTC_5x4 = 112, + TFMT4_ASTC_5x5 = 113, + TFMT4_ASTC_6x5 = 114, + TFMT4_ASTC_6x6 = 115, + TFMT4_ASTC_8x5 = 116, + TFMT4_ASTC_8x6 = 117, + TFMT4_ASTC_8x8 = 118, + TFMT4_ASTC_10x5 = 119, + TFMT4_ASTC_10x6 = 120, + TFMT4_ASTC_10x8 = 121, + TFMT4_ASTC_10x10 = 122, + TFMT4_ASTC_12x10 = 123, + TFMT4_ASTC_12x12 = 124, +}; + +enum a4xx_tex_fetchsize { + TFETCH4_1_BYTE = 0, + TFETCH4_2_BYTE = 1, + TFETCH4_4_BYTE = 2, + TFETCH4_8_BYTE = 3, + TFETCH4_16_BYTE = 4, +}; + +enum a4xx_depth_format { + DEPTH4_NONE = 0, + DEPTH4_16 = 1, + DEPTH4_24_8 = 2, + DEPTH4_32 = 3, +}; + +enum a4xx_ccu_perfcounter_select { + CCU_BUSY_CYCLES = 0, + CCU_RB_DEPTH_RETURN_STALL = 2, + CCU_RB_COLOR_RETURN_STALL = 3, + CCU_DEPTH_BLOCKS = 6, + CCU_COLOR_BLOCKS = 7, + CCU_DEPTH_BLOCK_HIT = 8, + CCU_COLOR_BLOCK_HIT = 9, + CCU_DEPTH_FLAG1_COUNT = 10, + CCU_DEPTH_FLAG2_COUNT = 11, + CCU_DEPTH_FLAG3_COUNT = 12, + CCU_DEPTH_FLAG4_COUNT = 13, + CCU_COLOR_FLAG1_COUNT = 14, + CCU_COLOR_FLAG2_COUNT = 15, + CCU_COLOR_FLAG3_COUNT = 16, + CCU_COLOR_FLAG4_COUNT = 17, + CCU_PARTIAL_BLOCK_READ = 18, +}; + +enum a4xx_cp_perfcounter_select { + CP_ALWAYS_COUNT = 0, + CP_BUSY = 1, + CP_PFP_IDLE = 2, + CP_PFP_BUSY_WORKING = 3, + CP_PFP_STALL_CYCLES_ANY = 4, + CP_PFP_STARVE_CYCLES_ANY = 5, + CP_PFP_STARVED_PER_LOAD_ADDR = 6, + CP_PFP_STALLED_PER_STORE_ADDR = 7, + CP_PFP_PC_PROFILE = 8, + CP_PFP_MATCH_PM4_PKT_PROFILE = 9, + CP_PFP_COND_INDIRECT_DISCARDED = 10, + CP_LONG_RESUMPTIONS = 11, + CP_RESUME_CYCLES = 12, + CP_RESUME_TO_BOUNDARY_CYCLES = 13, + CP_LONG_PREEMPTIONS = 14, + CP_PREEMPT_CYCLES = 15, + CP_PREEMPT_TO_BOUNDARY_CYCLES = 16, + CP_ME_FIFO_EMPTY_PFP_IDLE = 17, + CP_ME_FIFO_EMPTY_PFP_BUSY = 18, + CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19, + CP_ME_FIFO_FULL_ME_BUSY = 20, + CP_ME_FIFO_FULL_ME_NON_WORKING = 21, + CP_ME_WAITING_FOR_PACKETS = 22, + CP_ME_BUSY_WORKING = 23, + CP_ME_STARVE_CYCLES_ANY = 24, + CP_ME_STARVE_CYCLES_PER_PROFILE = 25, + CP_ME_STALL_CYCLES_PER_PROFILE = 26, + CP_ME_PC_PROFILE = 27, + CP_RCIU_FIFO_EMPTY = 28, + CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29, + CP_RCIU_FIFO_FULL = 30, + CP_RCIU_FIFO_FULL_NO_CONTEXT = 31, + CP_RCIU_FIFO_FULL_AHB_MASTER = 32, + CP_RCIU_FIFO_FULL_OTHER = 33, + CP_AHB_IDLE = 34, + CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35, + CP_AHB_STALL_ON_GRANT_SPLIT = 36, + CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37, + CP_AHB_BUSY_WORKING = 38, + CP_AHB_BUSY_STALL_ON_HRDY = 39, + CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40, +}; + +enum a4xx_gras_ras_perfcounter_select { + RAS_SUPER_TILES = 0, + RAS_8X8_TILES = 1, + RAS_4X4_TILES = 2, + RAS_BUSY_CYCLES = 3, + RAS_STALL_CYCLES_BY_RB = 4, + RAS_STALL_CYCLES_BY_VSC = 5, + RAS_STARVE_CYCLES_BY_TSE = 6, + RAS_SUPERTILE_CYCLES = 7, + RAS_TILE_CYCLES = 8, + RAS_FULLY_COVERED_SUPER_TILES = 9, + RAS_FULLY_COVERED_8X8_TILES = 10, + RAS_4X4_PRIM = 11, + RAS_8X4_4X8_PRIM = 12, + RAS_8X8_PRIM = 13, +}; + +enum a4xx_gras_tse_perfcounter_select { + TSE_INPUT_PRIM = 0, + TSE_INPUT_NULL_PRIM = 1, + TSE_TRIVAL_REJ_PRIM = 2, + TSE_CLIPPED_PRIM = 3, + TSE_NEW_PRIM = 4, + TSE_ZERO_AREA_PRIM = 5, + TSE_FACENESS_CULLED_PRIM = 6, + TSE_ZERO_PIXEL_PRIM = 7, + TSE_OUTPUT_NULL_PRIM = 8, + TSE_OUTPUT_VISIBLE_PRIM = 9, + TSE_PRE_CLIP_PRIM = 10, + TSE_POST_CLIP_PRIM = 11, + TSE_BUSY_CYCLES = 12, + TSE_PC_STARVE = 13, + TSE_RAS_STALL = 14, + TSE_STALL_BARYPLANE_FIFO_FULL = 15, + TSE_STALL_ZPLANE_FIFO_FULL = 16, +}; + +enum a4xx_hlsq_perfcounter_select { + HLSQ_SP_VS_STAGE_CONSTANT = 0, + HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1, + HLSQ_SP_FS_STAGE_CONSTANT = 2, + HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3, + HLSQ_TP_STATE = 4, + HLSQ_QUADS = 5, + HLSQ_PIXELS = 6, + HLSQ_VERTICES = 7, + HLSQ_SP_VS_STAGE_DATA_BYTES = 13, + HLSQ_SP_FS_STAGE_DATA_BYTES = 14, + HLSQ_BUSY_CYCLES = 15, + HLSQ_STALL_CYCLES_SP_STATE = 16, + HLSQ_STALL_CYCLES_SP_VS_STAGE = 17, + HLSQ_STALL_CYCLES_SP_FS_STAGE = 18, + HLSQ_STALL_CYCLES_UCHE = 19, + HLSQ_RBBM_LOAD_CYCLES = 20, + HLSQ_DI_TO_VS_START_SP = 21, + HLSQ_DI_TO_FS_START_SP = 22, + HLSQ_VS_STAGE_START_TO_DONE_SP = 23, + HLSQ_FS_STAGE_START_TO_DONE_SP = 24, + HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25, + HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26, + HLSQ_UCHE_LATENCY_CYCLES = 27, + HLSQ_UCHE_LATENCY_COUNT = 28, + HLSQ_STARVE_CYCLES_VFD = 29, +}; + +enum a4xx_pc_perfcounter_select { + PC_VIS_STREAMS_LOADED = 0, + PC_VPC_PRIMITIVES = 2, + PC_DEAD_PRIM = 3, + PC_LIVE_PRIM = 4, + PC_DEAD_DRAWCALLS = 5, + PC_LIVE_DRAWCALLS = 6, + PC_VERTEX_MISSES = 7, + PC_STALL_CYCLES_VFD = 9, + PC_STALL_CYCLES_TSE = 10, + PC_STALL_CYCLES_UCHE = 11, + PC_WORKING_CYCLES = 12, + PC_IA_VERTICES = 13, + PC_GS_PRIMITIVES = 14, + PC_HS_INVOCATIONS = 15, + PC_DS_INVOCATIONS = 16, + PC_DS_PRIMITIVES = 17, + PC_STARVE_CYCLES_FOR_INDEX = 20, + PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21, + PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22, + PC_STALL_CYCLES_TESS = 23, + PC_STARVE_CYCLES_FOR_POSITION = 24, + PC_MODE0_DRAWCALL = 25, + PC_MODE1_DRAWCALL = 26, + PC_MODE2_DRAWCALL = 27, + PC_MODE3_DRAWCALL = 28, + PC_MODE4_DRAWCALL = 29, + PC_PREDICATED_DEAD_DRAWCALL = 30, + PC_STALL_CYCLES_BY_TSE_ONLY = 31, + PC_STALL_CYCLES_BY_VPC_ONLY = 32, + PC_VPC_POS_DATA_TRANSACTION = 33, + PC_BUSY_CYCLES = 34, + PC_STARVE_CYCLES_DI = 35, + PC_STALL_CYCLES_VPC = 36, + TESS_WORKING_CYCLES = 37, + TESS_NUM_CYCLES_SETUP_WORKING = 38, + TESS_NUM_CYCLES_PTGEN_WORKING = 39, + TESS_NUM_CYCLES_CONNGEN_WORKING = 40, + TESS_BUSY_CYCLES = 41, + TESS_STARVE_CYCLES_PC = 42, + TESS_STALL_CYCLES_PC = 43, +}; + +enum a4xx_pwr_perfcounter_select { + PWR_CORE_CLOCK_CYCLES = 0, + PWR_BUSY_CLOCK_CYCLES = 1, +}; + +enum a4xx_rb_perfcounter_select { + RB_BUSY_CYCLES = 0, + RB_BUSY_CYCLES_BINNING = 1, + RB_BUSY_CYCLES_RENDERING = 2, + RB_BUSY_CYCLES_RESOLVE = 3, + RB_STARVE_CYCLES_BY_SP = 4, + RB_STARVE_CYCLES_BY_RAS = 5, + RB_STARVE_CYCLES_BY_MARB = 6, + RB_STALL_CYCLES_BY_MARB = 7, + RB_STALL_CYCLES_BY_HLSQ = 8, + RB_RB_RB_MARB_DATA = 9, + RB_SP_RB_QUAD = 10, + RB_RAS_RB_Z_QUADS = 11, + RB_GMEM_CH0_READ = 12, + RB_GMEM_CH1_READ = 13, + RB_GMEM_CH0_WRITE = 14, + RB_GMEM_CH1_WRITE = 15, + RB_CP_CONTEXT_DONE = 16, + RB_CP_CACHE_FLUSH = 17, + RB_CP_ZPASS_DONE = 18, + RB_STALL_FIFO0_FULL = 19, + RB_STALL_FIFO1_FULL = 20, + RB_STALL_FIFO2_FULL = 21, + RB_STALL_FIFO3_FULL = 22, + RB_RB_HLSQ_TRANSACTIONS = 23, + RB_Z_READ = 24, + RB_Z_WRITE = 25, + RB_C_READ = 26, + RB_C_WRITE = 27, + RB_C_READ_LATENCY = 28, + RB_Z_READ_LATENCY = 29, + RB_STALL_BY_UCHE = 30, + RB_MARB_UCHE_TRANSACTIONS = 31, + RB_CACHE_STALL_MISS = 32, + RB_CACHE_STALL_FIFO_FULL = 33, + RB_8BIT_BLENDER_UNITS_ACTIVE = 34, + RB_16BIT_BLENDER_UNITS_ACTIVE = 35, + RB_SAMPLER_UNITS_ACTIVE = 36, + RB_TOTAL_PASS = 38, + RB_Z_PASS = 39, + RB_Z_FAIL = 40, + RB_S_FAIL = 41, + RB_POWER0 = 42, + RB_POWER1 = 43, + RB_POWER2 = 44, + RB_POWER3 = 45, + RB_POWER4 = 46, + RB_POWER5 = 47, + RB_POWER6 = 48, + RB_POWER7 = 49, +}; + +enum a4xx_rbbm_perfcounter_select { + RBBM_ALWAYS_ON = 0, + RBBM_VBIF_BUSY = 1, + RBBM_TSE_BUSY = 2, + RBBM_RAS_BUSY = 3, + RBBM_PC_DCALL_BUSY = 4, + RBBM_PC_VSD_BUSY = 5, + RBBM_VFD_BUSY = 6, + RBBM_VPC_BUSY = 7, + RBBM_UCHE_BUSY = 8, + RBBM_VSC_BUSY = 9, + RBBM_HLSQ_BUSY = 10, + RBBM_ANY_RB_BUSY = 11, + RBBM_ANY_TPL1_BUSY = 12, + RBBM_ANY_SP_BUSY = 13, + RBBM_ANY_MARB_BUSY = 14, + RBBM_ANY_ARB_BUSY = 15, + RBBM_AHB_STATUS_BUSY = 16, + RBBM_AHB_STATUS_STALLED = 17, + RBBM_AHB_STATUS_TXFR = 18, + RBBM_AHB_STATUS_TXFR_SPLIT = 19, + RBBM_AHB_STATUS_TXFR_ERROR = 20, + RBBM_AHB_STATUS_LONG_STALL = 21, + RBBM_STATUS_MASKED = 22, + RBBM_CP_BUSY_GFX_CORE_IDLE = 23, + RBBM_TESS_BUSY = 24, + RBBM_COM_BUSY = 25, + RBBM_DCOM_BUSY = 32, + RBBM_ANY_CCU_BUSY = 33, + RBBM_DPM_BUSY = 34, +}; + +enum a4xx_sp_perfcounter_select { + SP_LM_LOAD_INSTRUCTIONS = 0, + SP_LM_STORE_INSTRUCTIONS = 1, + SP_LM_ATOMICS = 2, + SP_GM_LOAD_INSTRUCTIONS = 3, + SP_GM_STORE_INSTRUCTIONS = 4, + SP_GM_ATOMICS = 5, + SP_VS_STAGE_TEX_INSTRUCTIONS = 6, + SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7, + SP_VS_STAGE_EFU_INSTRUCTIONS = 8, + SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9, + SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10, + SP_FS_STAGE_TEX_INSTRUCTIONS = 11, + SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12, + SP_FS_STAGE_EFU_INSTRUCTIONS = 13, + SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14, + SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15, + SP_VS_INSTRUCTIONS = 17, + SP_FS_INSTRUCTIONS = 18, + SP_ADDR_LOCK_COUNT = 19, + SP_UCHE_READ_TRANS = 20, + SP_UCHE_WRITE_TRANS = 21, + SP_EXPORT_VPC_TRANS = 22, + SP_EXPORT_RB_TRANS = 23, + SP_PIXELS_KILLED = 24, + SP_ICL1_REQUESTS = 25, + SP_ICL1_MISSES = 26, + SP_ICL0_REQUESTS = 27, + SP_ICL0_MISSES = 28, + SP_ALU_WORKING_CYCLES = 29, + SP_EFU_WORKING_CYCLES = 30, + SP_STALL_CYCLES_BY_VPC = 31, + SP_STALL_CYCLES_BY_TP = 32, + SP_STALL_CYCLES_BY_UCHE = 33, + SP_STALL_CYCLES_BY_RB = 34, + SP_BUSY_CYCLES = 35, + SP_HS_INSTRUCTIONS = 36, + SP_DS_INSTRUCTIONS = 37, + SP_GS_INSTRUCTIONS = 38, + SP_CS_INSTRUCTIONS = 39, + SP_SCHEDULER_NON_WORKING = 40, + SP_WAVE_CONTEXTS = 41, + SP_WAVE_CONTEXT_CYCLES = 42, + SP_POWER0 = 43, + SP_POWER1 = 44, + SP_POWER2 = 45, + SP_POWER3 = 46, + SP_POWER4 = 47, + SP_POWER5 = 48, + SP_POWER6 = 49, + SP_POWER7 = 50, + SP_POWER8 = 51, + SP_POWER9 = 52, + SP_POWER10 = 53, + SP_POWER11 = 54, + SP_POWER12 = 55, + SP_POWER13 = 56, + SP_POWER14 = 57, + SP_POWER15 = 58, +}; + +enum a4xx_tp_perfcounter_select { + TP_L1_REQUESTS = 0, + TP_L1_MISSES = 1, + TP_QUADS_OFFSET = 8, + TP_QUAD_SHADOW = 9, + TP_QUADS_ARRAY = 10, + TP_QUADS_GRADIENT = 11, + TP_QUADS_1D2D = 12, + TP_QUADS_3DCUBE = 13, + TP_BUSY_CYCLES = 16, + TP_STALL_CYCLES_BY_ARB = 17, + TP_STATE_CACHE_REQUESTS = 20, + TP_STATE_CACHE_MISSES = 21, + TP_POWER0 = 22, + TP_POWER1 = 23, + TP_POWER2 = 24, + TP_POWER3 = 25, + TP_POWER4 = 26, + TP_POWER5 = 27, + TP_POWER6 = 28, + TP_POWER7 = 29, +}; + +enum a4xx_uche_perfcounter_select { + UCHE_VBIF_READ_BEATS_TP = 0, + UCHE_VBIF_READ_BEATS_VFD = 1, + UCHE_VBIF_READ_BEATS_HLSQ = 2, + UCHE_VBIF_READ_BEATS_MARB = 3, + UCHE_VBIF_READ_BEATS_SP = 4, + UCHE_READ_REQUESTS_TP = 5, + UCHE_READ_REQUESTS_VFD = 6, + UCHE_READ_REQUESTS_HLSQ = 7, + UCHE_READ_REQUESTS_MARB = 8, + UCHE_READ_REQUESTS_SP = 9, + UCHE_WRITE_REQUESTS_MARB = 10, + UCHE_WRITE_REQUESTS_SP = 11, + UCHE_TAG_CHECK_FAILS = 12, + UCHE_EVICTS = 13, + UCHE_FLUSHES = 14, + UCHE_VBIF_LATENCY_CYCLES = 15, + UCHE_VBIF_LATENCY_SAMPLES = 16, + UCHE_BUSY_CYCLES = 17, + UCHE_VBIF_READ_BEATS_PC = 18, + UCHE_READ_REQUESTS_PC = 19, + UCHE_WRITE_REQUESTS_VPC = 20, + UCHE_STALL_BY_VBIF = 21, + UCHE_WRITE_REQUESTS_VSC = 22, + UCHE_POWER0 = 23, + UCHE_POWER1 = 24, + UCHE_POWER2 = 25, + UCHE_POWER3 = 26, + UCHE_POWER4 = 27, + UCHE_POWER5 = 28, + UCHE_POWER6 = 29, + UCHE_POWER7 = 30, +}; + +enum a4xx_vbif_perfcounter_select { + AXI_READ_REQUESTS_ID_0 = 0, + AXI_READ_REQUESTS_ID_1 = 1, + AXI_READ_REQUESTS_ID_2 = 2, + AXI_READ_REQUESTS_ID_3 = 3, + AXI_READ_REQUESTS_ID_4 = 4, + AXI_READ_REQUESTS_ID_5 = 5, + AXI_READ_REQUESTS_ID_6 = 6, + AXI_READ_REQUESTS_ID_7 = 7, + AXI_READ_REQUESTS_ID_8 = 8, + AXI_READ_REQUESTS_ID_9 = 9, + AXI_READ_REQUESTS_ID_10 = 10, + AXI_READ_REQUESTS_ID_11 = 11, + AXI_READ_REQUESTS_ID_12 = 12, + AXI_READ_REQUESTS_ID_13 = 13, + AXI_READ_REQUESTS_ID_14 = 14, + AXI_READ_REQUESTS_ID_15 = 15, + AXI0_READ_REQUESTS_TOTAL = 16, + AXI1_READ_REQUESTS_TOTAL = 17, + AXI2_READ_REQUESTS_TOTAL = 18, + AXI3_READ_REQUESTS_TOTAL = 19, + AXI_READ_REQUESTS_TOTAL = 20, + AXI_WRITE_REQUESTS_ID_0 = 21, + AXI_WRITE_REQUESTS_ID_1 = 22, + AXI_WRITE_REQUESTS_ID_2 = 23, + AXI_WRITE_REQUESTS_ID_3 = 24, + AXI_WRITE_REQUESTS_ID_4 = 25, + AXI_WRITE_REQUESTS_ID_5 = 26, + AXI_WRITE_REQUESTS_ID_6 = 27, + AXI_WRITE_REQUESTS_ID_7 = 28, + AXI_WRITE_REQUESTS_ID_8 = 29, + AXI_WRITE_REQUESTS_ID_9 = 30, + AXI_WRITE_REQUESTS_ID_10 = 31, + AXI_WRITE_REQUESTS_ID_11 = 32, + AXI_WRITE_REQUESTS_ID_12 = 33, + AXI_WRITE_REQUESTS_ID_13 = 34, + AXI_WRITE_REQUESTS_ID_14 = 35, + AXI_WRITE_REQUESTS_ID_15 = 36, + AXI0_WRITE_REQUESTS_TOTAL = 37, + AXI1_WRITE_REQUESTS_TOTAL = 38, + AXI2_WRITE_REQUESTS_TOTAL = 39, + AXI3_WRITE_REQUESTS_TOTAL = 40, + AXI_WRITE_REQUESTS_TOTAL = 41, + AXI_TOTAL_REQUESTS = 42, + AXI_READ_DATA_BEATS_ID_0 = 43, + AXI_READ_DATA_BEATS_ID_1 = 44, + AXI_READ_DATA_BEATS_ID_2 = 45, + AXI_READ_DATA_BEATS_ID_3 = 46, + AXI_READ_DATA_BEATS_ID_4 = 47, + AXI_READ_DATA_BEATS_ID_5 = 48, + AXI_READ_DATA_BEATS_ID_6 = 49, + AXI_READ_DATA_BEATS_ID_7 = 50, + AXI_READ_DATA_BEATS_ID_8 = 51, + AXI_READ_DATA_BEATS_ID_9 = 52, + AXI_READ_DATA_BEATS_ID_10 = 53, + AXI_READ_DATA_BEATS_ID_11 = 54, + AXI_READ_DATA_BEATS_ID_12 = 55, + AXI_READ_DATA_BEATS_ID_13 = 56, + AXI_READ_DATA_BEATS_ID_14 = 57, + AXI_READ_DATA_BEATS_ID_15 = 58, + AXI0_READ_DATA_BEATS_TOTAL = 59, + AXI1_READ_DATA_BEATS_TOTAL = 60, + AXI2_READ_DATA_BEATS_TOTAL = 61, + AXI3_READ_DATA_BEATS_TOTAL = 62, + AXI_READ_DATA_BEATS_TOTAL = 63, + AXI_WRITE_DATA_BEATS_ID_0 = 64, + AXI_WRITE_DATA_BEATS_ID_1 = 65, + AXI_WRITE_DATA_BEATS_ID_2 = 66, + AXI_WRITE_DATA_BEATS_ID_3 = 67, + AXI_WRITE_DATA_BEATS_ID_4 = 68, + AXI_WRITE_DATA_BEATS_ID_5 = 69, + AXI_WRITE_DATA_BEATS_ID_6 = 70, + AXI_WRITE_DATA_BEATS_ID_7 = 71, + AXI_WRITE_DATA_BEATS_ID_8 = 72, + AXI_WRITE_DATA_BEATS_ID_9 = 73, + AXI_WRITE_DATA_BEATS_ID_10 = 74, + AXI_WRITE_DATA_BEATS_ID_11 = 75, + AXI_WRITE_DATA_BEATS_ID_12 = 76, + AXI_WRITE_DATA_BEATS_ID_13 = 77, + AXI_WRITE_DATA_BEATS_ID_14 = 78, + AXI_WRITE_DATA_BEATS_ID_15 = 79, + AXI0_WRITE_DATA_BEATS_TOTAL = 80, + AXI1_WRITE_DATA_BEATS_TOTAL = 81, + AXI2_WRITE_DATA_BEATS_TOTAL = 82, + AXI3_WRITE_DATA_BEATS_TOTAL = 83, + AXI_WRITE_DATA_BEATS_TOTAL = 84, + AXI_DATA_BEATS_TOTAL = 85, + CYCLES_HELD_OFF_ID_0 = 86, + CYCLES_HELD_OFF_ID_1 = 87, + CYCLES_HELD_OFF_ID_2 = 88, + CYCLES_HELD_OFF_ID_3 = 89, + CYCLES_HELD_OFF_ID_4 = 90, + CYCLES_HELD_OFF_ID_5 = 91, + CYCLES_HELD_OFF_ID_6 = 92, + CYCLES_HELD_OFF_ID_7 = 93, + CYCLES_HELD_OFF_ID_8 = 94, + CYCLES_HELD_OFF_ID_9 = 95, + CYCLES_HELD_OFF_ID_10 = 96, + CYCLES_HELD_OFF_ID_11 = 97, + CYCLES_HELD_OFF_ID_12 = 98, + CYCLES_HELD_OFF_ID_13 = 99, + CYCLES_HELD_OFF_ID_14 = 100, + CYCLES_HELD_OFF_ID_15 = 101, + AXI_READ_REQUEST_HELD_OFF = 102, + AXI_WRITE_REQUEST_HELD_OFF = 103, + AXI_REQUEST_HELD_OFF = 104, + AXI_WRITE_DATA_HELD_OFF = 105, + OCMEM_AXI_READ_REQUEST_HELD_OFF = 106, + OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107, + OCMEM_AXI_REQUEST_HELD_OFF = 108, + OCMEM_AXI_WRITE_DATA_HELD_OFF = 109, + ELAPSED_CYCLES_DDR = 110, + ELAPSED_CYCLES_OCMEM = 111, +}; + +enum a4xx_vfd_perfcounter_select { + VFD_UCHE_BYTE_FETCHED = 0, + VFD_UCHE_TRANS = 1, + VFD_FETCH_INSTRUCTIONS = 3, + VFD_BUSY_CYCLES = 5, + VFD_STALL_CYCLES_UCHE = 6, + VFD_STALL_CYCLES_HLSQ = 7, + VFD_STALL_CYCLES_VPC_BYPASS = 8, + VFD_STALL_CYCLES_VPC_ALLOC = 9, + VFD_MODE_0_FIBERS = 13, + VFD_MODE_1_FIBERS = 14, + VFD_MODE_2_FIBERS = 15, + VFD_MODE_3_FIBERS = 16, + VFD_MODE_4_FIBERS = 17, + VFD_BFIFO_STALL = 18, + VFD_NUM_VERTICES_TOTAL = 19, + VFD_PACKER_FULL = 20, + VFD_UCHE_REQUEST_FIFO_FULL = 21, + VFD_STARVE_CYCLES_PC = 22, + VFD_STARVE_CYCLES_UCHE = 23, +}; + +enum a4xx_vpc_perfcounter_select { + VPC_SP_LM_COMPONENTS = 2, + VPC_SP0_LM_BYTES = 3, + VPC_SP1_LM_BYTES = 4, + VPC_SP2_LM_BYTES = 5, + VPC_SP3_LM_BYTES = 6, + VPC_WORKING_CYCLES = 7, + VPC_STALL_CYCLES_LM = 8, + VPC_STARVE_CYCLES_RAS = 9, + VPC_STREAMOUT_CYCLES = 10, + VPC_UCHE_TRANSACTIONS = 12, + VPC_STALL_CYCLES_UCHE = 13, + VPC_BUSY_CYCLES = 14, + VPC_STARVE_CYCLES_SP = 15, +}; + +enum a4xx_vsc_perfcounter_select { + VSC_BUSY_CYCLES = 0, + VSC_WORKING_CYCLES = 1, + VSC_STALL_CYCLES_UCHE = 2, + VSC_STARVE_CYCLES_RAS = 3, + VSC_EOT_NUM = 4, +}; + +enum a4xx_tex_filter { + A4XX_TEX_NEAREST = 0, + A4XX_TEX_LINEAR = 1, + A4XX_TEX_ANISO = 2, +}; + +enum a4xx_tex_clamp { + A4XX_TEX_REPEAT = 0, + A4XX_TEX_CLAMP_TO_EDGE = 1, + A4XX_TEX_MIRROR_REPEAT = 2, + A4XX_TEX_CLAMP_TO_BORDER = 3, + A4XX_TEX_MIRROR_CLAMP = 4, +}; + +enum a4xx_tex_aniso { + A4XX_TEX_ANISO_1 = 0, + A4XX_TEX_ANISO_2 = 1, + A4XX_TEX_ANISO_4 = 2, + A4XX_TEX_ANISO_8 = 3, + A4XX_TEX_ANISO_16 = 4, +}; + +enum a4xx_tex_swiz { + A4XX_TEX_X = 0, + A4XX_TEX_Y = 1, + A4XX_TEX_Z = 2, + A4XX_TEX_W = 3, + A4XX_TEX_ZERO = 4, + A4XX_TEX_ONE = 5, +}; + +enum a4xx_tex_type { + A4XX_TEX_1D = 0, + A4XX_TEX_2D = 1, + A4XX_TEX_CUBE = 2, + A4XX_TEX_3D = 3, +}; + +#define A4XX_CGC_HLSQ_EARLY_CYC__MASK 0x00700000 +#define A4XX_CGC_HLSQ_EARLY_CYC__SHIFT 20 +static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val) +{ + return ((val) << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT) & A4XX_CGC_HLSQ_EARLY_CYC__MASK; +} +#define A4XX_INT0_RBBM_GPU_IDLE 0x00000001 +#define A4XX_INT0_RBBM_AHB_ERROR 0x00000002 +#define A4XX_INT0_RBBM_REG_TIMEOUT 0x00000004 +#define A4XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 +#define A4XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 +#define A4XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00000020 +#define A4XX_INT0_VFD_ERROR 0x00000040 +#define A4XX_INT0_CP_SW_INT 0x00000080 +#define A4XX_INT0_CP_T0_PACKET_IN_IB 0x00000100 +#define A4XX_INT0_CP_OPCODE_ERROR 0x00000200 +#define A4XX_INT0_CP_RESERVED_BIT_ERROR 0x00000400 +#define A4XX_INT0_CP_HW_FAULT 0x00000800 +#define A4XX_INT0_CP_DMA 0x00001000 +#define A4XX_INT0_CP_IB2_INT 0x00002000 +#define A4XX_INT0_CP_IB1_INT 0x00004000 +#define A4XX_INT0_CP_RB_INT 0x00008000 +#define A4XX_INT0_CP_REG_PROTECT_FAULT 0x00010000 +#define A4XX_INT0_CP_RB_DONE_TS 0x00020000 +#define A4XX_INT0_CP_VS_DONE_TS 0x00040000 +#define A4XX_INT0_CP_PS_DONE_TS 0x00080000 +#define A4XX_INT0_CACHE_FLUSH_TS 0x00100000 +#define A4XX_INT0_CP_AHB_ERROR_HALT 0x00200000 +#define A4XX_INT0_MISC_HANG_DETECT 0x01000000 +#define A4XX_INT0_UCHE_OOB_ACCESS 0x02000000 +#define REG_A4XX_RB_GMEM_BASE_ADDR 0x00000cc0 + +#define REG_A4XX_RB_PERFCTR_RB_SEL_0 0x00000cc7 + +#define REG_A4XX_RB_PERFCTR_RB_SEL_1 0x00000cc8 + +#define REG_A4XX_RB_PERFCTR_RB_SEL_2 0x00000cc9 + +#define REG_A4XX_RB_PERFCTR_RB_SEL_3 0x00000cca + +#define REG_A4XX_RB_PERFCTR_RB_SEL_4 0x00000ccb + +#define REG_A4XX_RB_PERFCTR_RB_SEL_5 0x00000ccc + +#define REG_A4XX_RB_PERFCTR_RB_SEL_6 0x00000ccd + +#define REG_A4XX_RB_PERFCTR_RB_SEL_7 0x00000cce + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_0 0x00000ccf + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_1 0x00000cd0 + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_2 0x00000cd1 + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_3 0x00000cd2 + +#define REG_A4XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0 +#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK 0x00003fff +#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT 0 +static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val) +{ + return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK; +} +#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK 0x3fff0000 +#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT 16 +static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val) +{ + return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK; +} + +#define REG_A4XX_RB_CLEAR_COLOR_DW0 0x000020cc + +#define REG_A4XX_RB_CLEAR_COLOR_DW1 0x000020cd + +#define REG_A4XX_RB_CLEAR_COLOR_DW2 0x000020ce + +#define REG_A4XX_RB_CLEAR_COLOR_DW3 0x000020cf + +#define REG_A4XX_RB_MODE_CONTROL 0x000020a0 +#define A4XX_RB_MODE_CONTROL_WIDTH__MASK 0x0000003f +#define A4XX_RB_MODE_CONTROL_WIDTH__SHIFT 0 +static inline uint32_t A4XX_RB_MODE_CONTROL_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_RB_MODE_CONTROL_WIDTH__SHIFT) & A4XX_RB_MODE_CONTROL_WIDTH__MASK; +} +#define A4XX_RB_MODE_CONTROL_HEIGHT__MASK 0x00003f00 +#define A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT 8 +static inline uint32_t A4XX_RB_MODE_CONTROL_HEIGHT(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT) & A4XX_RB_MODE_CONTROL_HEIGHT__MASK; +} +#define A4XX_RB_MODE_CONTROL_ENABLE_GMEM 0x00010000 + +#define REG_A4XX_RB_RENDER_CONTROL 0x000020a1 +#define A4XX_RB_RENDER_CONTROL_BINNING_PASS 0x00000001 +#define A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE 0x00000020 + +#define REG_A4XX_RB_MSAA_CONTROL 0x000020a2 +#define A4XX_RB_MSAA_CONTROL_DISABLE 0x00001000 +#define A4XX_RB_MSAA_CONTROL_SAMPLES__MASK 0x0000e000 +#define A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT 13 +static inline uint32_t A4XX_RB_MSAA_CONTROL_SAMPLES(uint32_t val) +{ + return ((val) << A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A4XX_RB_MSAA_CONTROL_SAMPLES__MASK; +} + +#define REG_A4XX_RB_RENDER_CONTROL2 0x000020a3 +#define A4XX_RB_RENDER_CONTROL2_XCOORD 0x00000001 +#define A4XX_RB_RENDER_CONTROL2_YCOORD 0x00000002 +#define A4XX_RB_RENDER_CONTROL2_ZCOORD 0x00000004 +#define A4XX_RB_RENDER_CONTROL2_WCOORD 0x00000008 +#define A4XX_RB_RENDER_CONTROL2_SAMPLEMASK 0x00000010 +#define A4XX_RB_RENDER_CONTROL2_FACENESS 0x00000020 +#define A4XX_RB_RENDER_CONTROL2_SAMPLEID 0x00000040 +#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK 0x00000380 +#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT 7 +static inline uint32_t A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT) & A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK; +} +#define A4XX_RB_RENDER_CONTROL2_SAMPLEID_HR 0x00000800 +#define A4XX_RB_RENDER_CONTROL2_VARYING 0x00001000 + +static inline uint32_t REG_A4XX_RB_MRT(uint32_t i0) { return 0x000020a4 + 0x5*i0; } + +static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4 + 0x5*i0; } +#define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 +#define A4XX_RB_MRT_CONTROL_BLEND 0x00000010 +#define A4XX_RB_MRT_CONTROL_BLEND2 0x00000020 +#define A4XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000040 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8 +static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} +#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000 +#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24 +static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; +} + +static inline uint32_t REG_A4XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020a5 + 0x5*i0; } +#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f +#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a4xx_color_fmt val) +{ + return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; +} +#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x000000c0 +#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 6 +static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a4xx_tile_mode val) +{ + return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; +} +#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK 0x00000600 +#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT 9 +static inline uint32_t A4XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK; +} +#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00001800 +#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 11 +static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; +} +#define A4XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00002000 +#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0xffffc000 +#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 14 +static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) +{ + assert(!(val & 0xf)); + return ((val >> 4) << A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; +} + +static inline uint32_t REG_A4XX_RB_MRT_BASE(uint32_t i0) { return 0x000020a6 + 0x5*i0; } + +static inline uint32_t REG_A4XX_RB_MRT_CONTROL3(uint32_t i0) { return 0x000020a7 + 0x5*i0; } +#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK 0x03fffff8 +#define A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT 3 +static inline uint32_t A4XX_RB_MRT_CONTROL3_STRIDE(uint32_t val) +{ + return ((val) << A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT) & A4XX_RB_MRT_CONTROL3_STRIDE__MASK; +} + +static inline uint32_t REG_A4XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020a8 + 0x5*i0; } +#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f +#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 +static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; +} +#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 +#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 +static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; +} +#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 +#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 +static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; +} +#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 +#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 +static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; +} +#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 +#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 +static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; +} +#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 +#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 +static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; +} + +#define REG_A4XX_RB_BLEND_RED 0x000020f0 +#define A4XX_RB_BLEND_RED_UINT__MASK 0x000000ff +#define A4XX_RB_BLEND_RED_UINT__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val) +{ + return ((val) << A4XX_RB_BLEND_RED_UINT__SHIFT) & A4XX_RB_BLEND_RED_UINT__MASK; +} +#define A4XX_RB_BLEND_RED_SINT__MASK 0x0000ff00 +#define A4XX_RB_BLEND_RED_SINT__SHIFT 8 +static inline uint32_t A4XX_RB_BLEND_RED_SINT(uint32_t val) +{ + return ((val) << A4XX_RB_BLEND_RED_SINT__SHIFT) & A4XX_RB_BLEND_RED_SINT__MASK; +} +#define A4XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 +#define A4XX_RB_BLEND_RED_FLOAT__SHIFT 16 +static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK; +} + +#define REG_A4XX_RB_BLEND_RED_F32 0x000020f1 +#define A4XX_RB_BLEND_RED_F32__MASK 0xffffffff +#define A4XX_RB_BLEND_RED_F32__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_RED_F32(float val) +{ + return ((fui(val)) << A4XX_RB_BLEND_RED_F32__SHIFT) & A4XX_RB_BLEND_RED_F32__MASK; +} + +#define REG_A4XX_RB_BLEND_GREEN 0x000020f2 +#define A4XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff +#define A4XX_RB_BLEND_GREEN_UINT__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val) +{ + return ((val) << A4XX_RB_BLEND_GREEN_UINT__SHIFT) & A4XX_RB_BLEND_GREEN_UINT__MASK; +} +#define A4XX_RB_BLEND_GREEN_SINT__MASK 0x0000ff00 +#define A4XX_RB_BLEND_GREEN_SINT__SHIFT 8 +static inline uint32_t A4XX_RB_BLEND_GREEN_SINT(uint32_t val) +{ + return ((val) << A4XX_RB_BLEND_GREEN_SINT__SHIFT) & A4XX_RB_BLEND_GREEN_SINT__MASK; +} +#define A4XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 +#define A4XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 +static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK; +} + +#define REG_A4XX_RB_BLEND_GREEN_F32 0x000020f3 +#define A4XX_RB_BLEND_GREEN_F32__MASK 0xffffffff +#define A4XX_RB_BLEND_GREEN_F32__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_GREEN_F32(float val) +{ + return ((fui(val)) << A4XX_RB_BLEND_GREEN_F32__SHIFT) & A4XX_RB_BLEND_GREEN_F32__MASK; +} + +#define REG_A4XX_RB_BLEND_BLUE 0x000020f4 +#define A4XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff +#define A4XX_RB_BLEND_BLUE_UINT__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val) +{ + return ((val) << A4XX_RB_BLEND_BLUE_UINT__SHIFT) & A4XX_RB_BLEND_BLUE_UINT__MASK; +} +#define A4XX_RB_BLEND_BLUE_SINT__MASK 0x0000ff00 +#define A4XX_RB_BLEND_BLUE_SINT__SHIFT 8 +static inline uint32_t A4XX_RB_BLEND_BLUE_SINT(uint32_t val) +{ + return ((val) << A4XX_RB_BLEND_BLUE_SINT__SHIFT) & A4XX_RB_BLEND_BLUE_SINT__MASK; +} +#define A4XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 +#define A4XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 +static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK; +} + +#define REG_A4XX_RB_BLEND_BLUE_F32 0x000020f5 +#define A4XX_RB_BLEND_BLUE_F32__MASK 0xffffffff +#define A4XX_RB_BLEND_BLUE_F32__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_BLUE_F32(float val) +{ + return ((fui(val)) << A4XX_RB_BLEND_BLUE_F32__SHIFT) & A4XX_RB_BLEND_BLUE_F32__MASK; +} + +#define REG_A4XX_RB_BLEND_ALPHA 0x000020f6 +#define A4XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff +#define A4XX_RB_BLEND_ALPHA_UINT__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val) +{ + return ((val) << A4XX_RB_BLEND_ALPHA_UINT__SHIFT) & A4XX_RB_BLEND_ALPHA_UINT__MASK; +} +#define A4XX_RB_BLEND_ALPHA_SINT__MASK 0x0000ff00 +#define A4XX_RB_BLEND_ALPHA_SINT__SHIFT 8 +static inline uint32_t A4XX_RB_BLEND_ALPHA_SINT(uint32_t val) +{ + return ((val) << A4XX_RB_BLEND_ALPHA_SINT__SHIFT) & A4XX_RB_BLEND_ALPHA_SINT__MASK; +} +#define A4XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 +#define A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 +static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK; +} + +#define REG_A4XX_RB_BLEND_ALPHA_F32 0x000020f7 +#define A4XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff +#define A4XX_RB_BLEND_ALPHA_F32__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_ALPHA_F32(float val) +{ + return ((fui(val)) << A4XX_RB_BLEND_ALPHA_F32__SHIFT) & A4XX_RB_BLEND_ALPHA_F32__MASK; +} + +#define REG_A4XX_RB_ALPHA_CONTROL 0x000020f8 +#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff +#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 +static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val) +{ + return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK; +} +#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST 0x00000100 +#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK 0x00000e00 +#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT 9 +static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) +{ + return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK; +} + +#define REG_A4XX_RB_FS_OUTPUT 0x000020f9 +#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK 0x000000ff +#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT 0 +static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val) +{ + return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK; +} +#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100 +#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000 +#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16 +static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val) +{ + return ((val) << A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT) & A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK; +} + +#define REG_A4XX_RB_SAMPLE_COUNT_CONTROL 0x000020fa +#define A4XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 +#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK 0xfffffffc +#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT 2 +static inline uint32_t A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT) & A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK; +} + +#define REG_A4XX_RB_RENDER_COMPONENTS 0x000020fb +#define A4XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f +#define A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT0__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 +#define A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT1__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 +#define A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT2__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 +#define A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT3__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 +#define A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT4__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 +#define A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT5__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 +#define A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT6__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 +#define A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT7__MASK; +} + +#define REG_A4XX_RB_COPY_CONTROL 0x000020fc +#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK 0x00000003 +#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT 0 +static inline uint32_t A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val) +{ + return ((val) << A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK; +} +#define A4XX_RB_COPY_CONTROL_MODE__MASK 0x00000070 +#define A4XX_RB_COPY_CONTROL_MODE__SHIFT 4 +static inline uint32_t A4XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val) +{ + return ((val) << A4XX_RB_COPY_CONTROL_MODE__SHIFT) & A4XX_RB_COPY_CONTROL_MODE__MASK; +} +#define A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK 0x00000f00 +#define A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT 8 +static inline uint32_t A4XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val) +{ + return ((val) << A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK; +} +#define A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xffffc000 +#define A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 14 +static inline uint32_t A4XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) +{ + assert(!(val & 0x3fff)); + return ((val >> 14) << A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK; +} + +#define REG_A4XX_RB_COPY_DEST_BASE 0x000020fd +#define A4XX_RB_COPY_DEST_BASE_BASE__MASK 0xffffffe0 +#define A4XX_RB_COPY_DEST_BASE_BASE__SHIFT 5 +static inline uint32_t A4XX_RB_COPY_DEST_BASE_BASE(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A4XX_RB_COPY_DEST_BASE_BASE__MASK; +} + +#define REG_A4XX_RB_COPY_DEST_PITCH 0x000020fe +#define A4XX_RB_COPY_DEST_PITCH_PITCH__MASK 0xffffffff +#define A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT 0 +static inline uint32_t A4XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A4XX_RB_COPY_DEST_PITCH_PITCH__MASK; +} + +#define REG_A4XX_RB_COPY_DEST_INFO 0x000020ff +#define A4XX_RB_COPY_DEST_INFO_FORMAT__MASK 0x000000fc +#define A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT 2 +static inline uint32_t A4XX_RB_COPY_DEST_INFO_FORMAT(enum a4xx_color_fmt val) +{ + return ((val) << A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A4XX_RB_COPY_DEST_INFO_FORMAT__MASK; +} +#define A4XX_RB_COPY_DEST_INFO_SWAP__MASK 0x00000300 +#define A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT 8 +static inline uint32_t A4XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A4XX_RB_COPY_DEST_INFO_SWAP__MASK; +} +#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK 0x00000c00 +#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT 10 +static inline uint32_t A4XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK; +} +#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK 0x0003c000 +#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT 14 +static inline uint32_t A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK; +} +#define A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK 0x001c0000 +#define A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT 18 +static inline uint32_t A4XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val) +{ + return ((val) << A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK; +} +#define A4XX_RB_COPY_DEST_INFO_TILE__MASK 0x03000000 +#define A4XX_RB_COPY_DEST_INFO_TILE__SHIFT 24 +static inline uint32_t A4XX_RB_COPY_DEST_INFO_TILE(enum a4xx_tile_mode val) +{ + return ((val) << A4XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A4XX_RB_COPY_DEST_INFO_TILE__MASK; +} + +#define REG_A4XX_RB_FS_OUTPUT_REG 0x00002100 +#define A4XX_RB_FS_OUTPUT_REG_MRT__MASK 0x0000000f +#define A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT 0 +static inline uint32_t A4XX_RB_FS_OUTPUT_REG_MRT(uint32_t val) +{ + return ((val) << A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_RB_FS_OUTPUT_REG_MRT__MASK; +} +#define A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z 0x00000020 + +#define REG_A4XX_RB_DEPTH_CONTROL 0x00002101 +#define A4XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z 0x00000001 +#define A4XX_RB_DEPTH_CONTROL_Z_ENABLE 0x00000002 +#define A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE 0x00000004 +#define A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK 0x00000070 +#define A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT 4 +static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val) +{ + return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK; +} +#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x00000080 +#define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00010000 +#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS 0x00020000 +#define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000 + +#define REG_A4XX_RB_DEPTH_CLEAR 0x00002102 + +#define REG_A4XX_RB_DEPTH_INFO 0x00002103 +#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK 0x00000003 +#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum a4xx_depth_format val) +{ + return ((val) << A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK; +} +#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK 0xfffff000 +#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 12 +static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; +} + +#define REG_A4XX_RB_DEPTH_PITCH 0x00002104 +#define A4XX_RB_DEPTH_PITCH__MASK 0xffffffff +#define A4XX_RB_DEPTH_PITCH__SHIFT 0 +static inline uint32_t A4XX_RB_DEPTH_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_RB_DEPTH_PITCH__SHIFT) & A4XX_RB_DEPTH_PITCH__MASK; +} + +#define REG_A4XX_RB_DEPTH_PITCH2 0x00002105 +#define A4XX_RB_DEPTH_PITCH2__MASK 0xffffffff +#define A4XX_RB_DEPTH_PITCH2__SHIFT 0 +static inline uint32_t A4XX_RB_DEPTH_PITCH2(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_RB_DEPTH_PITCH2__SHIFT) & A4XX_RB_DEPTH_PITCH2__MASK; +} + +#define REG_A4XX_RB_STENCIL_CONTROL 0x00002106 +#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 +#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 +#define A4XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 +#define A4XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 +#define A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 +static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) +{ + return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC__MASK; +} +#define A4XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 +#define A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 +static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) +{ + return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL__MASK; +} +#define A4XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 +#define A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 +static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) +{ + return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS__MASK; +} +#define A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 +#define A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 +static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) +{ + return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK; +} +#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 +#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 +static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) +{ + return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; +} +#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 +#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 +static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; +} +#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 +#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 +static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) +{ + return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; +} +#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 +#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 +static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; +} + +#define REG_A4XX_RB_STENCIL_CONTROL2 0x00002107 +#define A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER 0x00000001 + +#define REG_A4XX_RB_STENCIL_INFO 0x00002108 +#define A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001 +#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK 0xfffff000 +#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT 12 +static inline uint32_t A4XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK; +} + +#define REG_A4XX_RB_STENCIL_PITCH 0x00002109 +#define A4XX_RB_STENCIL_PITCH__MASK 0xffffffff +#define A4XX_RB_STENCIL_PITCH__SHIFT 0 +static inline uint32_t A4XX_RB_STENCIL_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_RB_STENCIL_PITCH__SHIFT) & A4XX_RB_STENCIL_PITCH__MASK; +} + +#define REG_A4XX_RB_STENCILREFMASK 0x0000210b +#define A4XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff +#define A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 +static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) +{ + return ((val) << A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILREF__MASK; +} +#define A4XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 +#define A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 +static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) +{ + return ((val) << A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILMASK__MASK; +} +#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 +#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; +} + +#define REG_A4XX_RB_STENCILREFMASK_BF 0x0000210c +#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff +#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 +static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) +{ + return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; +} +#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 +#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 +static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) +{ + return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; +} +#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 +#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; +} + +#define REG_A4XX_RB_BIN_OFFSET 0x0000210d +#define A4XX_RB_BIN_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A4XX_RB_BIN_OFFSET_X__MASK 0x00007fff +#define A4XX_RB_BIN_OFFSET_X__SHIFT 0 +static inline uint32_t A4XX_RB_BIN_OFFSET_X(uint32_t val) +{ + return ((val) << A4XX_RB_BIN_OFFSET_X__SHIFT) & A4XX_RB_BIN_OFFSET_X__MASK; +} +#define A4XX_RB_BIN_OFFSET_Y__MASK 0x7fff0000 +#define A4XX_RB_BIN_OFFSET_Y__SHIFT 16 +static inline uint32_t A4XX_RB_BIN_OFFSET_Y(uint32_t val) +{ + return ((val) << A4XX_RB_BIN_OFFSET_Y__SHIFT) & A4XX_RB_BIN_OFFSET_Y__MASK; +} + +static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP(uint32_t i0) { return 0x00002120 + 0x2*i0; } + +static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MIN(uint32_t i0) { return 0x00002120 + 0x2*i0; } + +static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MAX(uint32_t i0) { return 0x00002121 + 0x2*i0; } + +#define REG_A4XX_RBBM_HW_VERSION 0x00000000 + +#define REG_A4XX_RBBM_HW_CONFIGURATION 0x00000002 + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP(uint32_t i0) { return 0x00000004 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP_REG(uint32_t i0) { return 0x00000004 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP(uint32_t i0) { return 0x00000008 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP_REG(uint32_t i0) { return 0x00000008 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP(uint32_t i0) { return 0x0000000c + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP_REG(uint32_t i0) { return 0x0000000c + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP(uint32_t i0) { return 0x00000010 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x00000010 + 0x1*i0; } + +#define REG_A4XX_RBBM_CLOCK_CTL_UCHE 0x00000014 + +#define REG_A4XX_RBBM_CLOCK_CTL2_UCHE 0x00000015 + +#define REG_A4XX_RBBM_CLOCK_CTL3_UCHE 0x00000016 + +#define REG_A4XX_RBBM_CLOCK_CTL4_UCHE 0x00000017 + +#define REG_A4XX_RBBM_CLOCK_HYST_UCHE 0x00000018 + +#define REG_A4XX_RBBM_CLOCK_DELAY_UCHE 0x00000019 + +#define REG_A4XX_RBBM_CLOCK_MODE_GPC 0x0000001a + +#define REG_A4XX_RBBM_CLOCK_DELAY_GPC 0x0000001b + +#define REG_A4XX_RBBM_CLOCK_HYST_GPC 0x0000001c + +#define REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM 0x0000001d + +#define REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0000001e + +#define REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x0000001f + +#define REG_A4XX_RBBM_CLOCK_CTL 0x00000020 + +#define REG_A4XX_RBBM_SP_HYST_CNT 0x00000021 + +#define REG_A4XX_RBBM_SW_RESET_CMD 0x00000022 + +#define REG_A4XX_RBBM_AHB_CTL0 0x00000023 + +#define REG_A4XX_RBBM_AHB_CTL1 0x00000024 + +#define REG_A4XX_RBBM_AHB_CMD 0x00000025 + +#define REG_A4XX_RBBM_RB_SUB_BLOCK_SEL_CTL 0x00000026 + +#define REG_A4XX_RBBM_RAM_ACC_63_32 0x00000028 + +#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x0000002b + +#define REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL 0x0000002f + +#define REG_A4XX_RBBM_INTERFACE_HANG_MASK_CTL4 0x00000034 + +#define REG_A4XX_RBBM_INT_CLEAR_CMD 0x00000036 + +#define REG_A4XX_RBBM_INT_0_MASK 0x00000037 + +#define REG_A4XX_RBBM_RBBM_CTL 0x0000003e + +#define REG_A4XX_RBBM_AHB_DEBUG_CTL 0x0000003f + +#define REG_A4XX_RBBM_VBIF_DEBUG_CTL 0x00000041 + +#define REG_A4XX_RBBM_CLOCK_CTL2 0x00000042 + +#define REG_A4XX_RBBM_BLOCK_SW_RESET_CMD 0x00000045 + +#define REG_A4XX_RBBM_RESET_CYCLES 0x00000047 + +#define REG_A4XX_RBBM_EXT_TRACE_BUS_CTL 0x00000049 + +#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_A 0x0000004a + +#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_B 0x0000004b + +#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_C 0x0000004c + +#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_D 0x0000004d + +#define REG_A4XX_RBBM_POWER_CNTL_IP 0x00000098 +#define A4XX_RBBM_POWER_CNTL_IP_SW_COLLAPSE 0x00000001 +#define A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON 0x00100000 + +#define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c + +#define REG_A4XX_RBBM_PERFCTR_CP_0_HI 0x0000009d + +#define REG_A4XX_RBBM_PERFCTR_CP_1_LO 0x0000009e + +#define REG_A4XX_RBBM_PERFCTR_CP_1_HI 0x0000009f + +#define REG_A4XX_RBBM_PERFCTR_CP_2_LO 0x000000a0 + +#define REG_A4XX_RBBM_PERFCTR_CP_2_HI 0x000000a1 + +#define REG_A4XX_RBBM_PERFCTR_CP_3_LO 0x000000a2 + +#define REG_A4XX_RBBM_PERFCTR_CP_3_HI 0x000000a3 + +#define REG_A4XX_RBBM_PERFCTR_CP_4_LO 0x000000a4 + +#define REG_A4XX_RBBM_PERFCTR_CP_4_HI 0x000000a5 + +#define REG_A4XX_RBBM_PERFCTR_CP_5_LO 0x000000a6 + +#define REG_A4XX_RBBM_PERFCTR_CP_5_HI 0x000000a7 + +#define REG_A4XX_RBBM_PERFCTR_CP_6_LO 0x000000a8 + +#define REG_A4XX_RBBM_PERFCTR_CP_6_HI 0x000000a9 + +#define REG_A4XX_RBBM_PERFCTR_CP_7_LO 0x000000aa + +#define REG_A4XX_RBBM_PERFCTR_CP_7_HI 0x000000ab + +#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO 0x000000ac + +#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI 0x000000ad + +#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO 0x000000ae + +#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI 0x000000af + +#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO 0x000000b0 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI 0x000000b1 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO 0x000000b2 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI 0x000000b3 + +#define REG_A4XX_RBBM_PERFCTR_PC_0_LO 0x000000b4 + +#define REG_A4XX_RBBM_PERFCTR_PC_0_HI 0x000000b5 + +#define REG_A4XX_RBBM_PERFCTR_PC_1_LO 0x000000b6 + +#define REG_A4XX_RBBM_PERFCTR_PC_1_HI 0x000000b7 + +#define REG_A4XX_RBBM_PERFCTR_PC_2_LO 0x000000b8 + +#define REG_A4XX_RBBM_PERFCTR_PC_2_HI 0x000000b9 + +#define REG_A4XX_RBBM_PERFCTR_PC_3_LO 0x000000ba + +#define REG_A4XX_RBBM_PERFCTR_PC_3_HI 0x000000bb + +#define REG_A4XX_RBBM_PERFCTR_PC_4_LO 0x000000bc + +#define REG_A4XX_RBBM_PERFCTR_PC_4_HI 0x000000bd + +#define REG_A4XX_RBBM_PERFCTR_PC_5_LO 0x000000be + +#define REG_A4XX_RBBM_PERFCTR_PC_5_HI 0x000000bf + +#define REG_A4XX_RBBM_PERFCTR_PC_6_LO 0x000000c0 + +#define REG_A4XX_RBBM_PERFCTR_PC_6_HI 0x000000c1 + +#define REG_A4XX_RBBM_PERFCTR_PC_7_LO 0x000000c2 + +#define REG_A4XX_RBBM_PERFCTR_PC_7_HI 0x000000c3 + +#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO 0x000000c4 + +#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI 0x000000c5 + +#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO 0x000000c6 + +#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI 0x000000c7 + +#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO 0x000000c8 + +#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI 0x000000c9 + +#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO 0x000000ca + +#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI 0x000000cb + +#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO 0x000000cc + +#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI 0x000000cd + +#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO 0x000000ce + +#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI 0x000000cf + +#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO 0x000000d0 + +#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI 0x000000d1 + +#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO 0x000000d2 + +#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI 0x000000d3 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000d4 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000d5 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000d6 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000d7 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000d8 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000d9 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000da + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000db + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000dc + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000dd + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000de + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000df + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO 0x000000e0 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI 0x000000e1 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO 0x000000e2 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI 0x000000e3 + +#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO 0x000000e4 + +#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI 0x000000e5 + +#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO 0x000000e6 + +#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI 0x000000e7 + +#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO 0x000000e8 + +#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI 0x000000e9 + +#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO 0x000000ea + +#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI 0x000000eb + +#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO 0x000000ec + +#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI 0x000000ed + +#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO 0x000000ee + +#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI 0x000000ef + +#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO 0x000000f0 + +#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI 0x000000f1 + +#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO 0x000000f2 + +#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI 0x000000f3 + +#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO 0x000000f4 + +#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI 0x000000f5 + +#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO 0x000000f6 + +#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI 0x000000f7 + +#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO 0x000000f8 + +#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI 0x000000f9 + +#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO 0x000000fa + +#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI 0x000000fb + +#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO 0x000000fc + +#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI 0x000000fd + +#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO 0x000000fe + +#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI 0x000000ff + +#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO 0x00000100 + +#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI 0x00000101 + +#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO 0x00000102 + +#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI 0x00000103 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO 0x00000104 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI 0x00000105 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO 0x00000106 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI 0x00000107 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO 0x00000108 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI 0x00000109 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO 0x0000010a + +#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI 0x0000010b + +#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO 0x0000010c + +#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI 0x0000010d + +#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO 0x0000010e + +#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI 0x0000010f + +#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO 0x00000110 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI 0x00000111 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO 0x00000112 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI 0x00000113 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 + +#define REG_A4XX_RBBM_PERFCTR_TP_1_LO 0x00000116 + +#define REG_A4XX_RBBM_PERFCTR_TP_1_HI 0x00000117 + +#define REG_A4XX_RBBM_PERFCTR_TP_2_LO 0x00000118 + +#define REG_A4XX_RBBM_PERFCTR_TP_2_HI 0x00000119 + +#define REG_A4XX_RBBM_PERFCTR_TP_3_LO 0x0000011a + +#define REG_A4XX_RBBM_PERFCTR_TP_3_HI 0x0000011b + +#define REG_A4XX_RBBM_PERFCTR_TP_4_LO 0x0000011c + +#define REG_A4XX_RBBM_PERFCTR_TP_4_HI 0x0000011d + +#define REG_A4XX_RBBM_PERFCTR_TP_5_LO 0x0000011e + +#define REG_A4XX_RBBM_PERFCTR_TP_5_HI 0x0000011f + +#define REG_A4XX_RBBM_PERFCTR_TP_6_LO 0x00000120 + +#define REG_A4XX_RBBM_PERFCTR_TP_6_HI 0x00000121 + +#define REG_A4XX_RBBM_PERFCTR_TP_7_LO 0x00000122 + +#define REG_A4XX_RBBM_PERFCTR_TP_7_HI 0x00000123 + +#define REG_A4XX_RBBM_PERFCTR_SP_0_LO 0x00000124 + +#define REG_A4XX_RBBM_PERFCTR_SP_0_HI 0x00000125 + +#define REG_A4XX_RBBM_PERFCTR_SP_1_LO 0x00000126 + +#define REG_A4XX_RBBM_PERFCTR_SP_1_HI 0x00000127 + +#define REG_A4XX_RBBM_PERFCTR_SP_2_LO 0x00000128 + +#define REG_A4XX_RBBM_PERFCTR_SP_2_HI 0x00000129 + +#define REG_A4XX_RBBM_PERFCTR_SP_3_LO 0x0000012a + +#define REG_A4XX_RBBM_PERFCTR_SP_3_HI 0x0000012b + +#define REG_A4XX_RBBM_PERFCTR_SP_4_LO 0x0000012c + +#define REG_A4XX_RBBM_PERFCTR_SP_4_HI 0x0000012d + +#define REG_A4XX_RBBM_PERFCTR_SP_5_LO 0x0000012e + +#define REG_A4XX_RBBM_PERFCTR_SP_5_HI 0x0000012f + +#define REG_A4XX_RBBM_PERFCTR_SP_6_LO 0x00000130 + +#define REG_A4XX_RBBM_PERFCTR_SP_6_HI 0x00000131 + +#define REG_A4XX_RBBM_PERFCTR_SP_7_LO 0x00000132 + +#define REG_A4XX_RBBM_PERFCTR_SP_7_HI 0x00000133 + +#define REG_A4XX_RBBM_PERFCTR_SP_8_LO 0x00000134 + +#define REG_A4XX_RBBM_PERFCTR_SP_8_HI 0x00000135 + +#define REG_A4XX_RBBM_PERFCTR_SP_9_LO 0x00000136 + +#define REG_A4XX_RBBM_PERFCTR_SP_9_HI 0x00000137 + +#define REG_A4XX_RBBM_PERFCTR_SP_10_LO 0x00000138 + +#define REG_A4XX_RBBM_PERFCTR_SP_10_HI 0x00000139 + +#define REG_A4XX_RBBM_PERFCTR_SP_11_LO 0x0000013a + +#define REG_A4XX_RBBM_PERFCTR_SP_11_HI 0x0000013b + +#define REG_A4XX_RBBM_PERFCTR_RB_0_LO 0x0000013c + +#define REG_A4XX_RBBM_PERFCTR_RB_0_HI 0x0000013d + +#define REG_A4XX_RBBM_PERFCTR_RB_1_LO 0x0000013e + +#define REG_A4XX_RBBM_PERFCTR_RB_1_HI 0x0000013f + +#define REG_A4XX_RBBM_PERFCTR_RB_2_LO 0x00000140 + +#define REG_A4XX_RBBM_PERFCTR_RB_2_HI 0x00000141 + +#define REG_A4XX_RBBM_PERFCTR_RB_3_LO 0x00000142 + +#define REG_A4XX_RBBM_PERFCTR_RB_3_HI 0x00000143 + +#define REG_A4XX_RBBM_PERFCTR_RB_4_LO 0x00000144 + +#define REG_A4XX_RBBM_PERFCTR_RB_4_HI 0x00000145 + +#define REG_A4XX_RBBM_PERFCTR_RB_5_LO 0x00000146 + +#define REG_A4XX_RBBM_PERFCTR_RB_5_HI 0x00000147 + +#define REG_A4XX_RBBM_PERFCTR_RB_6_LO 0x00000148 + +#define REG_A4XX_RBBM_PERFCTR_RB_6_HI 0x00000149 + +#define REG_A4XX_RBBM_PERFCTR_RB_7_LO 0x0000014a + +#define REG_A4XX_RBBM_PERFCTR_RB_7_HI 0x0000014b + +#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO 0x0000014c + +#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI 0x0000014d + +#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO 0x0000014e + +#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI 0x0000014f + +#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO 0x00000166 + +#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI 0x00000167 + +#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168 + +#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI 0x00000169 + +#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO 0x0000016e + +#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI 0x0000016f + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP(uint32_t i0) { return 0x0000006c + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP_REG(uint32_t i0) { return 0x0000006c + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP(uint32_t i0) { return 0x00000070 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP_REG(uint32_t i0) { return 0x00000070 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP(uint32_t i0) { return 0x00000074 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP_REG(uint32_t i0) { return 0x00000074 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB(uint32_t i0) { return 0x00000078 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB_REG(uint32_t i0) { return 0x00000078 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB(uint32_t i0) { return 0x0000007c + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB_REG(uint32_t i0) { return 0x0000007c + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(uint32_t i0) { return 0x00000082 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU_REG(uint32_t i0) { return 0x00000082 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(uint32_t i0) { return 0x00000086 + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU_REG(uint32_t i0) { return 0x00000086 + 0x1*i0; } + +#define REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM 0x00000080 + +#define REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM 0x00000081 + +#define REG_A4XX_RBBM_CLOCK_CTL_HLSQ 0x0000008a + +#define REG_A4XX_RBBM_CLOCK_HYST_HLSQ 0x0000008b + +#define REG_A4XX_RBBM_CLOCK_DELAY_HLSQ 0x0000008c + +#define REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM 0x0000008d + +static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(uint32_t i0) { return 0x0000008e + 0x1*i0; } + +static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) { return 0x0000008e + 0x1*i0; } + +#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0 0x00000099 + +#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1 0x0000009a + +#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168 + +#define REG_A4XX_RBBM_PERFCTR_CTL 0x00000170 + +#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD0 0x00000171 + +#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD1 0x00000172 + +#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD2 0x00000173 + +#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000174 + +#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000175 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000176 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000177 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000178 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3 0x00000179 + +#define REG_A4XX_RBBM_GPU_BUSY_MASKED 0x0000017a + +#define REG_A4XX_RBBM_INT_0_STATUS 0x0000017d + +#define REG_A4XX_RBBM_CLOCK_STATUS 0x00000182 + +#define REG_A4XX_RBBM_AHB_STATUS 0x00000189 + +#define REG_A4XX_RBBM_AHB_ME_SPLIT_STATUS 0x0000018c + +#define REG_A4XX_RBBM_AHB_PFP_SPLIT_STATUS 0x0000018d + +#define REG_A4XX_RBBM_AHB_ERROR_STATUS 0x0000018f + +#define REG_A4XX_RBBM_STATUS 0x00000191 +#define A4XX_RBBM_STATUS_HI_BUSY 0x00000001 +#define A4XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 +#define A4XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 +#define A4XX_RBBM_STATUS_CP_NRT_BUSY 0x00004000 +#define A4XX_RBBM_STATUS_VBIF_BUSY 0x00008000 +#define A4XX_RBBM_STATUS_TSE_BUSY 0x00010000 +#define A4XX_RBBM_STATUS_RAS_BUSY 0x00020000 +#define A4XX_RBBM_STATUS_RB_BUSY 0x00040000 +#define A4XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 +#define A4XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 +#define A4XX_RBBM_STATUS_VFD_BUSY 0x00200000 +#define A4XX_RBBM_STATUS_VPC_BUSY 0x00400000 +#define A4XX_RBBM_STATUS_UCHE_BUSY 0x00800000 +#define A4XX_RBBM_STATUS_SP_BUSY 0x01000000 +#define A4XX_RBBM_STATUS_TPL1_BUSY 0x02000000 +#define A4XX_RBBM_STATUS_MARB_BUSY 0x04000000 +#define A4XX_RBBM_STATUS_VSC_BUSY 0x08000000 +#define A4XX_RBBM_STATUS_ARB_BUSY 0x10000000 +#define A4XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 +#define A4XX_RBBM_STATUS_GPU_BUSY_NOHC 0x40000000 +#define A4XX_RBBM_STATUS_GPU_BUSY 0x80000000 + +#define REG_A4XX_RBBM_INTERFACE_RRDY_STATUS5 0x0000019f + +#define REG_A4XX_RBBM_POWER_STATUS 0x000001b0 +#define A4XX_RBBM_POWER_STATUS_SP_TP_PWR_ON 0x00100000 + +#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2 0x000001b8 + +#define REG_A4XX_CP_SCRATCH_UMASK 0x00000228 + +#define REG_A4XX_CP_SCRATCH_ADDR 0x00000229 + +#define REG_A4XX_CP_RB_BASE 0x00000200 + +#define REG_A4XX_CP_RB_CNTL 0x00000201 + +#define REG_A4XX_CP_RB_WPTR 0x00000205 + +#define REG_A4XX_CP_RB_RPTR_ADDR 0x00000203 + +#define REG_A4XX_CP_RB_RPTR 0x00000204 + +#define REG_A4XX_CP_IB1_BASE 0x00000206 + +#define REG_A4XX_CP_IB1_BUFSZ 0x00000207 + +#define REG_A4XX_CP_IB2_BASE 0x00000208 + +#define REG_A4XX_CP_IB2_BUFSZ 0x00000209 + +#define REG_A4XX_CP_ME_NRT_ADDR 0x0000020c + +#define REG_A4XX_CP_ME_NRT_DATA 0x0000020d + +#define REG_A4XX_CP_ME_RB_DONE_DATA 0x00000217 + +#define REG_A4XX_CP_QUEUE_THRESH2 0x00000219 + +#define REG_A4XX_CP_MERCIU_SIZE 0x0000021b + +#define REG_A4XX_CP_ROQ_ADDR 0x0000021c + +#define REG_A4XX_CP_ROQ_DATA 0x0000021d + +#define REG_A4XX_CP_MEQ_ADDR 0x0000021e + +#define REG_A4XX_CP_MEQ_DATA 0x0000021f + +#define REG_A4XX_CP_MERCIU_ADDR 0x00000220 + +#define REG_A4XX_CP_MERCIU_DATA 0x00000221 + +#define REG_A4XX_CP_MERCIU_DATA2 0x00000222 + +#define REG_A4XX_CP_PFP_UCODE_ADDR 0x00000223 + +#define REG_A4XX_CP_PFP_UCODE_DATA 0x00000224 + +#define REG_A4XX_CP_ME_RAM_WADDR 0x00000225 + +#define REG_A4XX_CP_ME_RAM_RADDR 0x00000226 + +#define REG_A4XX_CP_ME_RAM_DATA 0x00000227 + +#define REG_A4XX_CP_PREEMPT 0x0000022a + +#define REG_A4XX_CP_CNTL 0x0000022c + +#define REG_A4XX_CP_ME_CNTL 0x0000022d + +#define REG_A4XX_CP_DEBUG 0x0000022e + +#define REG_A4XX_CP_DEBUG_ECO_CONTROL 0x00000231 + +#define REG_A4XX_CP_DRAW_STATE_ADDR 0x00000232 + +static inline uint32_t REG_A4XX_CP_PROTECT(uint32_t i0) { return 0x00000240 + 0x1*i0; } + +static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240 + 0x1*i0; } +#define A4XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0001ffff +#define A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 +static inline uint32_t A4XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) +{ + return ((val) << A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A4XX_CP_PROTECT_REG_BASE_ADDR__MASK; +} +#define A4XX_CP_PROTECT_REG_MASK_LEN__MASK 0x1f000000 +#define A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT 24 +static inline uint32_t A4XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) +{ + return ((val) << A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A4XX_CP_PROTECT_REG_MASK_LEN__MASK; +} +#define A4XX_CP_PROTECT_REG_TRAP_WRITE 0x20000000 +#define A4XX_CP_PROTECT_REG_TRAP_READ 0x40000000 + +#define REG_A4XX_CP_PROTECT_CTRL 0x00000250 + +#define REG_A4XX_CP_ST_BASE 0x000004c0 + +#define REG_A4XX_CP_STQ_AVAIL 0x000004ce + +#define REG_A4XX_CP_MERCIU_STAT 0x000004d0 + +#define REG_A4XX_CP_WFI_PEND_CTR 0x000004d2 + +#define REG_A4XX_CP_HW_FAULT 0x000004d8 + +#define REG_A4XX_CP_PROTECT_STATUS 0x000004da + +#define REG_A4XX_CP_EVENTS_IN_FLIGHT 0x000004dd + +#define REG_A4XX_CP_PERFCTR_CP_SEL_0 0x00000500 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_1 0x00000501 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_2 0x00000502 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_3 0x00000503 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_4 0x00000504 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_5 0x00000505 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_6 0x00000506 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_7 0x00000507 + +#define REG_A4XX_CP_PERFCOMBINER_SELECT 0x0000050b + +static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; } + +static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578 + 0x1*i0; } + +#define REG_A4XX_SP_VS_STATUS 0x00000ec0 + +#define REG_A4XX_SP_MODE_CONTROL 0x00000ec3 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_0 0x00000ec4 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_1 0x00000ec5 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_2 0x00000ec6 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_3 0x00000ec7 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_4 0x00000ec8 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_5 0x00000ec9 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_6 0x00000eca + +#define REG_A4XX_SP_PERFCTR_SP_SEL_7 0x00000ecb + +#define REG_A4XX_SP_PERFCTR_SP_SEL_8 0x00000ecc + +#define REG_A4XX_SP_PERFCTR_SP_SEL_9 0x00000ecd + +#define REG_A4XX_SP_PERFCTR_SP_SEL_10 0x00000ece + +#define REG_A4XX_SP_PERFCTR_SP_SEL_11 0x00000ecf + +#define REG_A4XX_SP_SP_CTRL_REG 0x000022c0 +#define A4XX_SP_SP_CTRL_REG_BINNING_PASS 0x00080000 + +#define REG_A4XX_SP_INSTR_CACHE_CTRL 0x000022c1 +#define A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER 0x00000080 +#define A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER 0x00000100 +#define A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER 0x00000400 + +#define REG_A4XX_SP_VS_CTRL_REG0 0x000022c4 +#define A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK 0x00000001 +#define A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT 0 +static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) +{ + return ((val) << A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK; +} +#define A4XX_SP_VS_CTRL_REG0_VARYING 0x00000002 +#define A4XX_SP_VS_CTRL_REG0_CACHEINVALID 0x00000004 +#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000 +#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18 +static inline uint32_t A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val) +{ + return ((val) << A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK; +} +#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; +} +#define A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000 +#define A4XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000 + +#define REG_A4XX_SP_VS_CTRL_REG1 0x000022c5 +#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK 0x000000ff +#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT 0 +static inline uint32_t A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val) +{ + return ((val) << A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK; +} +#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x7f000000 +#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 24 +static inline uint32_t A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val) +{ + return ((val) << A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK; +} + +#define REG_A4XX_SP_VS_PARAM_REG 0x000022c6 +#define A4XX_SP_VS_PARAM_REG_POSREGID__MASK 0x000000ff +#define A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT 0 +static inline uint32_t A4XX_SP_VS_PARAM_REG_POSREGID(uint32_t val) +{ + return ((val) << A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_POSREGID__MASK; +} +#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK 0x0000ff00 +#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT 8 +static inline uint32_t A4XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val) +{ + return ((val) << A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK; +} +#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0xfff00000 +#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT 20 +static inline uint32_t A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) +{ + return ((val) << A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK; +} + +static inline uint32_t REG_A4XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; } + +static inline uint32_t REG_A4XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; } +#define A4XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff +#define A4XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 +static inline uint32_t A4XX_SP_VS_OUT_REG_A_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_A_REGID__MASK; +} +#define A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00001e00 +#define A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 9 +static inline uint32_t A4XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) +{ + return ((val) << A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK; +} +#define A4XX_SP_VS_OUT_REG_B_REGID__MASK 0x01ff0000 +#define A4XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 +static inline uint32_t A4XX_SP_VS_OUT_REG_B_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_B_REGID__MASK; +} +#define A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x1e000000 +#define A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 25 +static inline uint32_t A4XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) +{ + return ((val) << A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK; +} + +static inline uint32_t REG_A4XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d8 + 0x1*i0; } + +static inline uint32_t REG_A4XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d8 + 0x1*i0; } +#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff +#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 +static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) +{ + return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; +} +#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 +#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 +static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) +{ + return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; +} +#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 +#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 +static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) +{ + return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; +} +#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 +#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 +static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) +{ + return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; +} + +#define REG_A4XX_SP_VS_OBJ_OFFSET_REG 0x000022e0 +#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 +#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 +static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 +#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 +static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; +} + +#define REG_A4XX_SP_VS_OBJ_START 0x000022e1 + +#define REG_A4XX_SP_VS_PVT_MEM_PARAM 0x000022e2 + +#define REG_A4XX_SP_VS_PVT_MEM_ADDR 0x000022e3 + +#define REG_A4XX_SP_VS_LENGTH_REG 0x000022e5 + +#define REG_A4XX_SP_FS_CTRL_REG0 0x000022e8 +#define A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK 0x00000001 +#define A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT 0 +static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) +{ + return ((val) << A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK; +} +#define A4XX_SP_FS_CTRL_REG0_VARYING 0x00000002 +#define A4XX_SP_FS_CTRL_REG0_CACHEINVALID 0x00000004 +#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000 +#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18 +static inline uint32_t A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val) +{ + return ((val) << A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK; +} +#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; +} +#define A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE 0x00200000 +#define A4XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00400000 + +#define REG_A4XX_SP_FS_CTRL_REG1 0x000022e9 +#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK 0x000000ff +#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT 0 +static inline uint32_t A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val) +{ + return ((val) << A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK; +} +#define A4XX_SP_FS_CTRL_REG1_FACENESS 0x00080000 +#define A4XX_SP_FS_CTRL_REG1_VARYING 0x00100000 +#define A4XX_SP_FS_CTRL_REG1_FRAGCOORD 0x00200000 + +#define REG_A4XX_SP_FS_OBJ_OFFSET_REG 0x000022ea +#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 +#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 +static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 +#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 +static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; +} + +#define REG_A4XX_SP_FS_OBJ_START 0x000022eb + +#define REG_A4XX_SP_FS_PVT_MEM_PARAM 0x000022ec + +#define REG_A4XX_SP_FS_PVT_MEM_ADDR 0x000022ed + +#define REG_A4XX_SP_FS_LENGTH_REG 0x000022ef + +#define REG_A4XX_SP_FS_OUTPUT_REG 0x000022f0 +#define A4XX_SP_FS_OUTPUT_REG_MRT__MASK 0x0000000f +#define A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT 0 +static inline uint32_t A4XX_SP_FS_OUTPUT_REG_MRT(uint32_t val) +{ + return ((val) << A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_SP_FS_OUTPUT_REG_MRT__MASK; +} +#define A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE 0x00000080 +#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK 0x0000ff00 +#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT 8 +static inline uint32_t A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK; +} +#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK 0xff000000 +#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT 24 +static inline uint32_t A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK; +} + +static inline uint32_t REG_A4XX_SP_FS_MRT(uint32_t i0) { return 0x000022f1 + 0x1*i0; } + +static inline uint32_t REG_A4XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f1 + 0x1*i0; } +#define A4XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff +#define A4XX_SP_FS_MRT_REG_REGID__SHIFT 0 +static inline uint32_t A4XX_SP_FS_MRT_REG_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_FS_MRT_REG_REGID__SHIFT) & A4XX_SP_FS_MRT_REG_REGID__MASK; +} +#define A4XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100 +#define A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK 0x0003f000 +#define A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT 12 +static inline uint32_t A4XX_SP_FS_MRT_REG_MRTFORMAT(enum a4xx_color_fmt val) +{ + return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK; +} +#define A4XX_SP_FS_MRT_REG_COLOR_SRGB 0x00040000 + +#define REG_A4XX_SP_CS_CTRL_REG0 0x00002300 + +#define REG_A4XX_SP_CS_OBJ_OFFSET_REG 0x00002301 + +#define REG_A4XX_SP_CS_OBJ_START 0x00002302 + +#define REG_A4XX_SP_CS_PVT_MEM_PARAM 0x00002303 + +#define REG_A4XX_SP_CS_PVT_MEM_ADDR 0x00002304 + +#define REG_A4XX_SP_CS_PVT_MEM_SIZE 0x00002305 + +#define REG_A4XX_SP_CS_LENGTH_REG 0x00002306 + +#define REG_A4XX_SP_HS_OBJ_OFFSET_REG 0x0000230d +#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 +#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 +static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 +#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 +static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; +} + +#define REG_A4XX_SP_HS_OBJ_START 0x0000230e + +#define REG_A4XX_SP_HS_PVT_MEM_PARAM 0x0000230f + +#define REG_A4XX_SP_HS_PVT_MEM_ADDR 0x00002310 + +#define REG_A4XX_SP_HS_LENGTH_REG 0x00002312 + +#define REG_A4XX_SP_DS_PARAM_REG 0x0000231a +#define A4XX_SP_DS_PARAM_REG_POSREGID__MASK 0x000000ff +#define A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT 0 +static inline uint32_t A4XX_SP_DS_PARAM_REG_POSREGID(uint32_t val) +{ + return ((val) << A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_DS_PARAM_REG_POSREGID__MASK; +} +#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK 0xfff00000 +#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT 20 +static inline uint32_t A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR(uint32_t val) +{ + return ((val) << A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK; +} + +static inline uint32_t REG_A4XX_SP_DS_OUT(uint32_t i0) { return 0x0000231b + 0x1*i0; } + +static inline uint32_t REG_A4XX_SP_DS_OUT_REG(uint32_t i0) { return 0x0000231b + 0x1*i0; } +#define A4XX_SP_DS_OUT_REG_A_REGID__MASK 0x000001ff +#define A4XX_SP_DS_OUT_REG_A_REGID__SHIFT 0 +static inline uint32_t A4XX_SP_DS_OUT_REG_A_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_DS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_A_REGID__MASK; +} +#define A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK 0x00001e00 +#define A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT 9 +static inline uint32_t A4XX_SP_DS_OUT_REG_A_COMPMASK(uint32_t val) +{ + return ((val) << A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK; +} +#define A4XX_SP_DS_OUT_REG_B_REGID__MASK 0x01ff0000 +#define A4XX_SP_DS_OUT_REG_B_REGID__SHIFT 16 +static inline uint32_t A4XX_SP_DS_OUT_REG_B_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_DS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_B_REGID__MASK; +} +#define A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK 0x1e000000 +#define A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT 25 +static inline uint32_t A4XX_SP_DS_OUT_REG_B_COMPMASK(uint32_t val) +{ + return ((val) << A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK; +} + +static inline uint32_t REG_A4XX_SP_DS_VPC_DST(uint32_t i0) { return 0x0000232c + 0x1*i0; } + +static inline uint32_t REG_A4XX_SP_DS_VPC_DST_REG(uint32_t i0) { return 0x0000232c + 0x1*i0; } +#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff +#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT 0 +static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC0(uint32_t val) +{ + return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK; +} +#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 +#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT 8 +static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC1(uint32_t val) +{ + return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK; +} +#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 +#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT 16 +static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC2(uint32_t val) +{ + return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK; +} +#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 +#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT 24 +static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC3(uint32_t val) +{ + return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK; +} + +#define REG_A4XX_SP_DS_OBJ_OFFSET_REG 0x00002334 +#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 +#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 +static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 +#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 +static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; +} + +#define REG_A4XX_SP_DS_OBJ_START 0x00002335 + +#define REG_A4XX_SP_DS_PVT_MEM_PARAM 0x00002336 + +#define REG_A4XX_SP_DS_PVT_MEM_ADDR 0x00002337 + +#define REG_A4XX_SP_DS_LENGTH_REG 0x00002339 + +#define REG_A4XX_SP_GS_PARAM_REG 0x00002341 +#define A4XX_SP_GS_PARAM_REG_POSREGID__MASK 0x000000ff +#define A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT 0 +static inline uint32_t A4XX_SP_GS_PARAM_REG_POSREGID(uint32_t val) +{ + return ((val) << A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_POSREGID__MASK; +} +#define A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK 0x0000ff00 +#define A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT 8 +static inline uint32_t A4XX_SP_GS_PARAM_REG_PRIMREGID(uint32_t val) +{ + return ((val) << A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK; +} +#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK 0xfff00000 +#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT 20 +static inline uint32_t A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR(uint32_t val) +{ + return ((val) << A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK; +} + +static inline uint32_t REG_A4XX_SP_GS_OUT(uint32_t i0) { return 0x00002342 + 0x1*i0; } + +static inline uint32_t REG_A4XX_SP_GS_OUT_REG(uint32_t i0) { return 0x00002342 + 0x1*i0; } +#define A4XX_SP_GS_OUT_REG_A_REGID__MASK 0x000001ff +#define A4XX_SP_GS_OUT_REG_A_REGID__SHIFT 0 +static inline uint32_t A4XX_SP_GS_OUT_REG_A_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_GS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_A_REGID__MASK; +} +#define A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK 0x00001e00 +#define A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT 9 +static inline uint32_t A4XX_SP_GS_OUT_REG_A_COMPMASK(uint32_t val) +{ + return ((val) << A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK; +} +#define A4XX_SP_GS_OUT_REG_B_REGID__MASK 0x01ff0000 +#define A4XX_SP_GS_OUT_REG_B_REGID__SHIFT 16 +static inline uint32_t A4XX_SP_GS_OUT_REG_B_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_GS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_B_REGID__MASK; +} +#define A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK 0x1e000000 +#define A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT 25 +static inline uint32_t A4XX_SP_GS_OUT_REG_B_COMPMASK(uint32_t val) +{ + return ((val) << A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK; +} + +static inline uint32_t REG_A4XX_SP_GS_VPC_DST(uint32_t i0) { return 0x00002353 + 0x1*i0; } + +static inline uint32_t REG_A4XX_SP_GS_VPC_DST_REG(uint32_t i0) { return 0x00002353 + 0x1*i0; } +#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff +#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT 0 +static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC0(uint32_t val) +{ + return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK; +} +#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 +#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT 8 +static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC1(uint32_t val) +{ + return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK; +} +#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 +#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT 16 +static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC2(uint32_t val) +{ + return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK; +} +#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 +#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT 24 +static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC3(uint32_t val) +{ + return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK; +} + +#define REG_A4XX_SP_GS_OBJ_OFFSET_REG 0x0000235b +#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 +#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 +static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 +#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 +static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; +} + +#define REG_A4XX_SP_GS_OBJ_START 0x0000235c + +#define REG_A4XX_SP_GS_PVT_MEM_PARAM 0x0000235d + +#define REG_A4XX_SP_GS_PVT_MEM_ADDR 0x0000235e + +#define REG_A4XX_SP_GS_LENGTH_REG 0x00002360 + +#define REG_A4XX_VPC_DEBUG_RAM_SEL 0x00000e60 + +#define REG_A4XX_VPC_DEBUG_RAM_READ 0x00000e61 + +#define REG_A4XX_VPC_DEBUG_ECO_CONTROL 0x00000e64 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0 0x00000e65 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1 0x00000e66 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2 0x00000e67 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_3 0x00000e68 + +#define REG_A4XX_VPC_ATTR 0x00002140 +#define A4XX_VPC_ATTR_TOTALATTR__MASK 0x000001ff +#define A4XX_VPC_ATTR_TOTALATTR__SHIFT 0 +static inline uint32_t A4XX_VPC_ATTR_TOTALATTR(uint32_t val) +{ + return ((val) << A4XX_VPC_ATTR_TOTALATTR__SHIFT) & A4XX_VPC_ATTR_TOTALATTR__MASK; +} +#define A4XX_VPC_ATTR_PSIZE 0x00000200 +#define A4XX_VPC_ATTR_THRDASSIGN__MASK 0x00003000 +#define A4XX_VPC_ATTR_THRDASSIGN__SHIFT 12 +static inline uint32_t A4XX_VPC_ATTR_THRDASSIGN(uint32_t val) +{ + return ((val) << A4XX_VPC_ATTR_THRDASSIGN__SHIFT) & A4XX_VPC_ATTR_THRDASSIGN__MASK; +} +#define A4XX_VPC_ATTR_ENABLE 0x02000000 + +#define REG_A4XX_VPC_PACK 0x00002141 +#define A4XX_VPC_PACK_NUMBYPASSVAR__MASK 0x000000ff +#define A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT 0 +static inline uint32_t A4XX_VPC_PACK_NUMBYPASSVAR(uint32_t val) +{ + return ((val) << A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT) & A4XX_VPC_PACK_NUMBYPASSVAR__MASK; +} +#define A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK 0x0000ff00 +#define A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT 8 +static inline uint32_t A4XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val) +{ + return ((val) << A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK; +} +#define A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK 0x00ff0000 +#define A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT 16 +static inline uint32_t A4XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val) +{ + return ((val) << A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK; +} + +static inline uint32_t REG_A4XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002142 + 0x1*i0; } + +static inline uint32_t REG_A4XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002142 + 0x1*i0; } + +static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000214a + 0x1*i0; } + +static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000214a + 0x1*i0; } + +#define REG_A4XX_VPC_SO_FLUSH_WADDR_3 0x0000216e + +#define REG_A4XX_VSC_BIN_SIZE 0x00000c00 +#define A4XX_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f +#define A4XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A4XX_VSC_BIN_SIZE_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A4XX_VSC_BIN_SIZE_WIDTH__MASK; +} +#define A4XX_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 +#define A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT 5 +static inline uint32_t A4XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A4XX_VSC_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A4XX_VSC_SIZE_ADDRESS 0x00000c01 + +#define REG_A4XX_VSC_SIZE_ADDRESS2 0x00000c02 + +#define REG_A4XX_VSC_DEBUG_ECO_CONTROL 0x00000c03 + +static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c08 + 0x1*i0; } + +static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c08 + 0x1*i0; } +#define A4XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff +#define A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 +static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) +{ + return ((val) << A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_X__MASK; +} +#define A4XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 +#define A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 +static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) +{ + return ((val) << A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_Y__MASK; +} +#define A4XX_VSC_PIPE_CONFIG_REG_W__MASK 0x00f00000 +#define A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 +static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) +{ + return ((val) << A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_W__MASK; +} +#define A4XX_VSC_PIPE_CONFIG_REG_H__MASK 0x0f000000 +#define A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT 24 +static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) +{ + return ((val) << A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_H__MASK; +} + +static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c10 + 0x1*i0; } + +static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; } + +static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c18 + 0x1*i0; } + +static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c18 + 0x1*i0; } + +#define REG_A4XX_VSC_PIPE_PARTIAL_POSN_1 0x00000c41 + +#define REG_A4XX_VSC_PERFCTR_VSC_SEL_0 0x00000c50 + +#define REG_A4XX_VSC_PERFCTR_VSC_SEL_1 0x00000c51 + +#define REG_A4XX_VFD_DEBUG_CONTROL 0x00000e40 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0 0x00000e43 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1 0x00000e44 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2 0x00000e45 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3 0x00000e46 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4 0x00000e47 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5 0x00000e48 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6 0x00000e49 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_7 0x00000e4a + +#define REG_A4XX_VGT_CL_INITIATOR 0x000021d0 + +#define REG_A4XX_VGT_EVENT_INITIATOR 0x000021d9 + +#define REG_A4XX_VFD_CONTROL_0 0x00002200 +#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK 0x000000ff +#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT 0 +static inline uint32_t A4XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK; +} +#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK 0x0001fe00 +#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT 9 +static inline uint32_t A4XX_VFD_CONTROL_0_BYPASSATTROVS(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT) & A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK; +} +#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK 0x03f00000 +#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT 20 +static inline uint32_t A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK; +} +#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK 0xfc000000 +#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT 26 +static inline uint32_t A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK; +} + +#define REG_A4XX_VFD_CONTROL_1 0x00002201 +#define A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff +#define A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0 +static inline uint32_t A4XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK; +} +#define A4XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 +#define A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 +static inline uint32_t A4XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A4XX_VFD_CONTROL_1_REGID4VTX__MASK; +} +#define A4XX_VFD_CONTROL_1_REGID4INST__MASK 0xff000000 +#define A4XX_VFD_CONTROL_1_REGID4INST__SHIFT 24 +static inline uint32_t A4XX_VFD_CONTROL_1_REGID4INST(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A4XX_VFD_CONTROL_1_REGID4INST__MASK; +} + +#define REG_A4XX_VFD_CONTROL_2 0x00002202 + +#define REG_A4XX_VFD_CONTROL_3 0x00002203 +#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK 0x0000ff00 +#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT 8 +static inline uint32_t A4XX_VFD_CONTROL_3_REGID_VTXCNT(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT) & A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK; +} +#define A4XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000 +#define A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16 +static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSX__MASK; +} +#define A4XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000 +#define A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24 +static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val) +{ + return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSY__MASK; +} + +#define REG_A4XX_VFD_CONTROL_4 0x00002204 + +#define REG_A4XX_VFD_INDEX_OFFSET 0x00002208 + +static inline uint32_t REG_A4XX_VFD_FETCH(uint32_t i0) { return 0x0000220a + 0x4*i0; } + +static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x0000220a + 0x4*i0; } +#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f +#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0 +static inline uint32_t A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val) +{ + return ((val) << A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK; +} +#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK 0x0001ff80 +#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT 7 +static inline uint32_t A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val) +{ + return ((val) << A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK; +} +#define A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT 0x00080000 +#define A4XX_VFD_FETCH_INSTR_0_INSTANCED 0x00100000 + +static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x0000220b + 0x4*i0; } + +static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_2(uint32_t i0) { return 0x0000220c + 0x4*i0; } +#define A4XX_VFD_FETCH_INSTR_2_SIZE__MASK 0xffffffff +#define A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT 0 +static inline uint32_t A4XX_VFD_FETCH_INSTR_2_SIZE(uint32_t val) +{ + return ((val) << A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_2_SIZE__MASK; +} + +static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_3(uint32_t i0) { return 0x0000220d + 0x4*i0; } +#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK 0x000001ff +#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT 0 +static inline uint32_t A4XX_VFD_FETCH_INSTR_3_STEPRATE(uint32_t val) +{ + return ((val) << A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT) & A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK; +} + +static inline uint32_t REG_A4XX_VFD_DECODE(uint32_t i0) { return 0x0000228a + 0x1*i0; } + +static inline uint32_t REG_A4XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000228a + 0x1*i0; } +#define A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f +#define A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0 +static inline uint32_t A4XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val) +{ + return ((val) << A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK; +} +#define A4XX_VFD_DECODE_INSTR_CONSTFILL 0x00000010 +#define A4XX_VFD_DECODE_INSTR_FORMAT__MASK 0x00000fc0 +#define A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT 6 +static inline uint32_t A4XX_VFD_DECODE_INSTR_FORMAT(enum a4xx_vtx_fmt val) +{ + return ((val) << A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A4XX_VFD_DECODE_INSTR_FORMAT__MASK; +} +#define A4XX_VFD_DECODE_INSTR_REGID__MASK 0x000ff000 +#define A4XX_VFD_DECODE_INSTR_REGID__SHIFT 12 +static inline uint32_t A4XX_VFD_DECODE_INSTR_REGID(uint32_t val) +{ + return ((val) << A4XX_VFD_DECODE_INSTR_REGID__SHIFT) & A4XX_VFD_DECODE_INSTR_REGID__MASK; +} +#define A4XX_VFD_DECODE_INSTR_INT 0x00100000 +#define A4XX_VFD_DECODE_INSTR_SWAP__MASK 0x00c00000 +#define A4XX_VFD_DECODE_INSTR_SWAP__SHIFT 22 +static inline uint32_t A4XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A4XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A4XX_VFD_DECODE_INSTR_SWAP__MASK; +} +#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK 0x1f000000 +#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT 24 +static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val) +{ + return ((val) << A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK; +} +#define A4XX_VFD_DECODE_INSTR_LASTCOMPVALID 0x20000000 +#define A4XX_VFD_DECODE_INSTR_SWITCHNEXT 0x40000000 + +#define REG_A4XX_TPL1_DEBUG_ECO_CONTROL 0x00000f00 + +#define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0 0x00000f04 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1 0x00000f05 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2 0x00000f06 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3 0x00000f07 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4 0x00000f08 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5 0x00000f09 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6 0x00000f0a + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b + +#define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380 + +#define REG_A4XX_TPL1_TP_TEX_COUNT 0x00002381 +#define A4XX_TPL1_TP_TEX_COUNT_VS__MASK 0x000000ff +#define A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT 0 +static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_VS(uint32_t val) +{ + return ((val) << A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_VS__MASK; +} +#define A4XX_TPL1_TP_TEX_COUNT_HS__MASK 0x0000ff00 +#define A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT 8 +static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_HS(uint32_t val) +{ + return ((val) << A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_HS__MASK; +} +#define A4XX_TPL1_TP_TEX_COUNT_DS__MASK 0x00ff0000 +#define A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT 16 +static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_DS(uint32_t val) +{ + return ((val) << A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_DS__MASK; +} +#define A4XX_TPL1_TP_TEX_COUNT_GS__MASK 0xff000000 +#define A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT 24 +static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) +{ + return ((val) << A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_GS__MASK; +} + +#define REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR 0x00002384 + +#define REG_A4XX_TPL1_TP_HS_BORDER_COLOR_BASE_ADDR 0x00002387 + +#define REG_A4XX_TPL1_TP_DS_BORDER_COLOR_BASE_ADDR 0x0000238a + +#define REG_A4XX_TPL1_TP_GS_BORDER_COLOR_BASE_ADDR 0x0000238d + +#define REG_A4XX_TPL1_TP_FS_TEX_COUNT 0x000023a0 + +#define REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x000023a1 + +#define REG_A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR 0x000023a4 + +#define REG_A4XX_TPL1_TP_CS_SAMPLER_BASE_ADDR 0x000023a5 + +#define REG_A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR 0x000023a6 + +#define REG_A4XX_GRAS_TSE_STATUS 0x00000c80 + +#define REG_A4XX_GRAS_DEBUG_ECO_CONTROL 0x00000c81 + +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c88 + +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c89 + +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c8a + +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c8c + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c8d + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c8e + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c8f + +#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000 +#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000 +#define A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE 0x00010000 +#define A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 0x00020000 +#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 + +#define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003 +#define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR 0x00000001 + +#define REG_A4XX_GRAS_CL_GB_CLIP_ADJ 0x00002004 +#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK 0x000003ff +#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT 0 +static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val) +{ + return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK; +} +#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK 0x000ffc00 +#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT 10 +static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val) +{ + return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK; +} + +#define REG_A4XX_GRAS_CL_VPORT_XOFFSET_0 0x00002008 +#define A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK 0xffffffff +#define A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT 0 +static inline uint32_t A4XX_GRAS_CL_VPORT_XOFFSET_0(float val) +{ + return ((fui(val)) << A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK; +} + +#define REG_A4XX_GRAS_CL_VPORT_XSCALE_0 0x00002009 +#define A4XX_GRAS_CL_VPORT_XSCALE_0__MASK 0xffffffff +#define A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT 0 +static inline uint32_t A4XX_GRAS_CL_VPORT_XSCALE_0(float val) +{ + return ((fui(val)) << A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_XSCALE_0__MASK; +} + +#define REG_A4XX_GRAS_CL_VPORT_YOFFSET_0 0x0000200a +#define A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK 0xffffffff +#define A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT 0 +static inline uint32_t A4XX_GRAS_CL_VPORT_YOFFSET_0(float val) +{ + return ((fui(val)) << A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK; +} + +#define REG_A4XX_GRAS_CL_VPORT_YSCALE_0 0x0000200b +#define A4XX_GRAS_CL_VPORT_YSCALE_0__MASK 0xffffffff +#define A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT 0 +static inline uint32_t A4XX_GRAS_CL_VPORT_YSCALE_0(float val) +{ + return ((fui(val)) << A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_YSCALE_0__MASK; +} + +#define REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0 0x0000200c +#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK 0xffffffff +#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT 0 +static inline uint32_t A4XX_GRAS_CL_VPORT_ZOFFSET_0(float val) +{ + return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK; +} + +#define REG_A4XX_GRAS_CL_VPORT_ZSCALE_0 0x0000200d +#define A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK 0xffffffff +#define A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT 0 +static inline uint32_t A4XX_GRAS_CL_VPORT_ZSCALE_0(float val) +{ + return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK; +} + +#define REG_A4XX_GRAS_SU_POINT_MINMAX 0x00002070 +#define A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff +#define A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 +static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MIN(float val) +{ + return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK; +} +#define A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 +#define A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 +static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MAX(float val) +{ + return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK; +} + +#define REG_A4XX_GRAS_SU_POINT_SIZE 0x00002071 +#define A4XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff +#define A4XX_GRAS_SU_POINT_SIZE__SHIFT 0 +static inline uint32_t A4XX_GRAS_SU_POINT_SIZE(float val) +{ + return ((((int32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_SIZE__SHIFT) & A4XX_GRAS_SU_POINT_SIZE__MASK; +} + +#define REG_A4XX_GRAS_ALPHA_CONTROL 0x00002073 +#define A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE 0x00000004 +#define A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS 0x00000008 + +#define REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE 0x00002074 +#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff +#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT 0 +static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_SCALE(float val) +{ + return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK; +} + +#define REG_A4XX_GRAS_SU_POLY_OFFSET_OFFSET 0x00002075 +#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff +#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 +static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) +{ + return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; +} + +#define REG_A4XX_GRAS_SU_POLY_OFFSET_CLAMP 0x00002076 +#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK 0xffffffff +#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT 0 +static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_CLAMP(float val) +{ + return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK; +} + +#define REG_A4XX_GRAS_DEPTH_CONTROL 0x00002077 +#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK 0x00000003 +#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT 0 +static inline uint32_t A4XX_GRAS_DEPTH_CONTROL_FORMAT(enum a4xx_depth_format val) +{ + return ((val) << A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT) & A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK; +} + +#define REG_A4XX_GRAS_SU_MODE_CONTROL 0x00002078 +#define A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT 0x00000001 +#define A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK 0x00000002 +#define A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW 0x00000004 +#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK 0x000007f8 +#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT 3 +static inline uint32_t A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val) +{ + return ((((int32_t)(val * 4.0))) << A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK; +} +#define A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET 0x00000800 +#define A4XX_GRAS_SU_MODE_CONTROL_MSAA_ENABLE 0x00002000 +#define A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS 0x00100000 + +#define REG_A4XX_GRAS_SC_CONTROL 0x0000207b +#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK 0x0000000c +#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT 2 +static inline uint32_t A4XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val) +{ + return ((val) << A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK; +} +#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK 0x00000380 +#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT 7 +static inline uint32_t A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK; +} +#define A4XX_GRAS_SC_CONTROL_MSAA_DISABLE 0x00000800 +#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK 0x0000f000 +#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT 12 +static inline uint32_t A4XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK; +} + +#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL 0x0000207c +#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK 0x00007fff +#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK; +} +#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK; +} + +#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_BR 0x0000207d +#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK 0x00007fff +#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK; +} +#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK; +} + +#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000209c +#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff +#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; +} +#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; +} + +#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_TL 0x0000209d +#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff +#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; +} +#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; +} + +#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_BR 0x0000209e +#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK 0x00007fff +#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT 0 +static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_X(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK; +} +#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK 0x7fff0000 +#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT 16 +static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK; +} + +#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_TL 0x0000209f +#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK 0x00007fff +#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT 0 +static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_X(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK; +} +#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK 0x7fff0000 +#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT 16 +static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val) +{ + return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK; +} + +#define REG_A4XX_UCHE_CACHE_MODE_CONTROL 0x00000e80 + +#define REG_A4XX_UCHE_TRAP_BASE_LO 0x00000e83 + +#define REG_A4XX_UCHE_TRAP_BASE_HI 0x00000e84 + +#define REG_A4XX_UCHE_CACHE_STATUS 0x00000e88 + +#define REG_A4XX_UCHE_INVALIDATE0 0x00000e8a + +#define REG_A4XX_UCHE_INVALIDATE1 0x00000e8b + +#define REG_A4XX_UCHE_CACHE_WAYS_VFD 0x00000e8c + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e8e + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e8f + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e90 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e91 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e92 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e93 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e94 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e95 + +#define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD 0x00000e00 + +#define REG_A4XX_HLSQ_DEBUG_ECO_CONTROL 0x00000e04 + +#define REG_A4XX_HLSQ_MODE_CONTROL 0x00000e05 + +#define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e06 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e07 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e08 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e09 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e0a + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e0b + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e0c + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e0d + +#define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0 +#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010 +#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4 +static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; +} +#define A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE 0x00000040 +#define A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200 +#define A4XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400 +#define A4XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000 +#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK 0x08000000 +#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT 27 +static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK; +} +#define A4XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE 0x10000000 +#define A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE 0x20000000 +#define A4XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE 0x40000000 +#define A4XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT 0x80000000 + +#define REG_A4XX_HLSQ_CONTROL_1_REG 0x000023c1 +#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x00000040 +#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT 6 +static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK; +} +#define A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE 0x00000100 +#define A4XX_HLSQ_CONTROL_1_REG_RESERVED1 0x00000200 +#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK 0x00ff0000 +#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT 16 +static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_COORDREGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK; +} +#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK 0xff000000 +#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT 24 +static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK; +} + +#define REG_A4XX_HLSQ_CONTROL_2_REG 0x000023c2 +#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000 +#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT 26 +static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK; +} +#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK 0x000003fc +#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT 2 +static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; +} +#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK 0x0003fc00 +#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT 10 +static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK; +} +#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK 0x03fc0000 +#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT 18 +static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK; +} + +#define REG_A4XX_HLSQ_CONTROL_3_REG 0x000023c3 +#define A4XX_HLSQ_CONTROL_3_REG_REGID__MASK 0x000000ff +#define A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A4XX_HLSQ_CONTROL_3_REG_REGID__MASK; +} + +#define REG_A4XX_HLSQ_CONTROL_4_REG 0x000023c4 + +#define REG_A4XX_HLSQ_VS_CONTROL_REG 0x000023c5 +#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff +#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 +#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 +static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_HLSQ_VS_CONTROL_REG_SSBO_ENABLE 0x00008000 +#define A4XX_HLSQ_VS_CONTROL_REG_ENABLED 0x00010000 +#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 +#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 +static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK; +} +#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A4XX_HLSQ_FS_CONTROL_REG 0x000023c6 +#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff +#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 +#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 +static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_HLSQ_FS_CONTROL_REG_SSBO_ENABLE 0x00008000 +#define A4XX_HLSQ_FS_CONTROL_REG_ENABLED 0x00010000 +#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 +#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 +static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK; +} +#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A4XX_HLSQ_HS_CONTROL_REG 0x000023c7 +#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff +#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 +#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 +static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_HLSQ_HS_CONTROL_REG_SSBO_ENABLE 0x00008000 +#define A4XX_HLSQ_HS_CONTROL_REG_ENABLED 0x00010000 +#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 +#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 +static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK; +} +#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A4XX_HLSQ_DS_CONTROL_REG 0x000023c8 +#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff +#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 +#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 +static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_HLSQ_DS_CONTROL_REG_SSBO_ENABLE 0x00008000 +#define A4XX_HLSQ_DS_CONTROL_REG_ENABLED 0x00010000 +#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 +#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 +static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK; +} +#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A4XX_HLSQ_GS_CONTROL_REG 0x000023c9 +#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff +#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 +#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 +static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_HLSQ_GS_CONTROL_REG_SSBO_ENABLE 0x00008000 +#define A4XX_HLSQ_GS_CONTROL_REG_ENABLED 0x00010000 +#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 +#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 +static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK; +} +#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A4XX_HLSQ_CS_CONTROL_REG 0x000023ca +#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff +#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 +#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 +static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; +} +#define A4XX_HLSQ_CS_CONTROL_REG_SSBO_ENABLE 0x00008000 +#define A4XX_HLSQ_CS_CONTROL_REG_ENABLED 0x00010000 +#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 +#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 +static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK; +} +#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A4XX_HLSQ_CL_NDRANGE_0 0x000023cd +#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK 0x00000003 +#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK; +} +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT 2 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK; +} +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT 12 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK; +} +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 +#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK; +} + +#define REG_A4XX_HLSQ_CL_NDRANGE_1 0x000023ce +#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK 0xffffffff +#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_1_SIZE_X(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT) & A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK; +} + +#define REG_A4XX_HLSQ_CL_NDRANGE_2 0x000023cf + +#define REG_A4XX_HLSQ_CL_NDRANGE_3 0x000023d0 +#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK 0xffffffff +#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT) & A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK; +} + +#define REG_A4XX_HLSQ_CL_NDRANGE_4 0x000023d1 + +#define REG_A4XX_HLSQ_CL_NDRANGE_5 0x000023d2 +#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK 0xffffffff +#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT) & A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK; +} + +#define REG_A4XX_HLSQ_CL_NDRANGE_6 0x000023d3 + +#define REG_A4XX_HLSQ_CL_CONTROL_0 0x000023d4 +#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK 0x000000ff +#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT 0 +static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK; +} +#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK 0xff000000 +#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT 24 +static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK; +} + +#define REG_A4XX_HLSQ_CL_CONTROL_1 0x000023d5 + +#define REG_A4XX_HLSQ_CL_KERNEL_CONST 0x000023d6 + +#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_X 0x000023d7 + +#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Y 0x000023d8 + +#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Z 0x000023d9 + +#define REG_A4XX_HLSQ_CL_WG_OFFSET 0x000023da + +#define REG_A4XX_HLSQ_UPDATE_CONTROL 0x000023db + +#define REG_A4XX_PC_BINNING_COMMAND 0x00000d00 +#define A4XX_PC_BINNING_COMMAND_BINNING_ENABLE 0x00000001 + +#define REG_A4XX_PC_TESSFACTOR_ADDR 0x00000d08 + +#define REG_A4XX_PC_DRAWCALL_SETUP_OVERRIDE 0x00000d0c + +#define REG_A4XX_PC_PERFCTR_PC_SEL_0 0x00000d10 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_1 0x00000d11 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_2 0x00000d12 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_3 0x00000d13 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_4 0x00000d14 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_5 0x00000d15 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_6 0x00000d16 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_7 0x00000d17 + +#define REG_A4XX_PC_BIN_BASE 0x000021c0 + +#define REG_A4XX_PC_VSTREAM_CONTROL 0x000021c2 +#define A4XX_PC_VSTREAM_CONTROL_SIZE__MASK 0x003f0000 +#define A4XX_PC_VSTREAM_CONTROL_SIZE__SHIFT 16 +static inline uint32_t A4XX_PC_VSTREAM_CONTROL_SIZE(uint32_t val) +{ + return ((val) << A4XX_PC_VSTREAM_CONTROL_SIZE__SHIFT) & A4XX_PC_VSTREAM_CONTROL_SIZE__MASK; +} +#define A4XX_PC_VSTREAM_CONTROL_N__MASK 0x07c00000 +#define A4XX_PC_VSTREAM_CONTROL_N__SHIFT 22 +static inline uint32_t A4XX_PC_VSTREAM_CONTROL_N(uint32_t val) +{ + return ((val) << A4XX_PC_VSTREAM_CONTROL_N__SHIFT) & A4XX_PC_VSTREAM_CONTROL_N__MASK; +} + +#define REG_A4XX_PC_PRIM_VTX_CNTL 0x000021c4 +#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK 0x0000000f +#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT 0 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT) & A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART 0x00100000 +#define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 +#define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 + +#define REG_A4XX_PC_PRIM_VTX_CNTL2 0x000021c5 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK 0x00000007 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT 0 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK 0x00000038 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT 3 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE 0x00000040 + +#define REG_A4XX_PC_RESTART_INDEX 0x000021c6 + +#define REG_A4XX_PC_GS_PARAM 0x000021e5 +#define A4XX_PC_GS_PARAM_MAX_VERTICES__MASK 0x000003ff +#define A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT 0 +static inline uint32_t A4XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val) +{ + return ((val) << A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A4XX_PC_GS_PARAM_MAX_VERTICES__MASK; +} +#define A4XX_PC_GS_PARAM_INVOCATIONS__MASK 0x0000f800 +#define A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT 11 +static inline uint32_t A4XX_PC_GS_PARAM_INVOCATIONS(uint32_t val) +{ + return ((val) << A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A4XX_PC_GS_PARAM_INVOCATIONS__MASK; +} +#define A4XX_PC_GS_PARAM_PRIMTYPE__MASK 0x01800000 +#define A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT 23 +static inline uint32_t A4XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_GS_PARAM_PRIMTYPE__MASK; +} +#define A4XX_PC_GS_PARAM_LAYER 0x80000000 + +#define REG_A4XX_PC_HS_PARAM 0x000021e7 +#define A4XX_PC_HS_PARAM_VERTICES_OUT__MASK 0x0000003f +#define A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT 0 +static inline uint32_t A4XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val) +{ + return ((val) << A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A4XX_PC_HS_PARAM_VERTICES_OUT__MASK; +} +#define A4XX_PC_HS_PARAM_SPACING__MASK 0x00600000 +#define A4XX_PC_HS_PARAM_SPACING__SHIFT 21 +static inline uint32_t A4XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val) +{ + return ((val) << A4XX_PC_HS_PARAM_SPACING__SHIFT) & A4XX_PC_HS_PARAM_SPACING__MASK; +} +#define A4XX_PC_HS_PARAM_CW 0x00800000 +#define A4XX_PC_HS_PARAM_CONNECTED 0x01000000 + +#define REG_A4XX_VBIF_VERSION 0x00003000 + +#define REG_A4XX_VBIF_CLKON 0x00003001 +#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS 0x00000001 + +#define REG_A4XX_VBIF_ABIT_SORT 0x0000301c + +#define REG_A4XX_VBIF_ABIT_SORT_CONF 0x0000301d + +#define REG_A4XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a + +#define REG_A4XX_VBIF_IN_RD_LIM_CONF0 0x0000302c + +#define REG_A4XX_VBIF_IN_RD_LIM_CONF1 0x0000302d + +#define REG_A4XX_VBIF_IN_WR_LIM_CONF0 0x00003030 + +#define REG_A4XX_VBIF_IN_WR_LIM_CONF1 0x00003031 + +#define REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 + +#define REG_A4XX_VBIF_PERF_CNT_EN0 0x000030c0 + +#define REG_A4XX_VBIF_PERF_CNT_EN1 0x000030c1 + +#define REG_A4XX_VBIF_PERF_CNT_EN2 0x000030c2 + +#define REG_A4XX_VBIF_PERF_CNT_EN3 0x000030c3 + +#define REG_A4XX_VBIF_PERF_CNT_SEL0 0x000030d0 + +#define REG_A4XX_VBIF_PERF_CNT_SEL1 0x000030d1 + +#define REG_A4XX_VBIF_PERF_CNT_SEL2 0x000030d2 + +#define REG_A4XX_VBIF_PERF_CNT_SEL3 0x000030d3 + +#define REG_A4XX_VBIF_PERF_CNT_LOW0 0x000030d8 + +#define REG_A4XX_VBIF_PERF_CNT_LOW1 0x000030d9 + +#define REG_A4XX_VBIF_PERF_CNT_LOW2 0x000030da + +#define REG_A4XX_VBIF_PERF_CNT_LOW3 0x000030db + +#define REG_A4XX_VBIF_PERF_CNT_HIGH0 0x000030e0 + +#define REG_A4XX_VBIF_PERF_CNT_HIGH1 0x000030e1 + +#define REG_A4XX_VBIF_PERF_CNT_HIGH2 0x000030e2 + +#define REG_A4XX_VBIF_PERF_CNT_HIGH3 0x000030e3 + +#define REG_A4XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 + +#define REG_A4XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 + +#define REG_A4XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 + +#define REG_A4XX_UNKNOWN_0CC5 0x00000cc5 + +#define REG_A4XX_UNKNOWN_0CC6 0x00000cc6 + +#define REG_A4XX_UNKNOWN_0D01 0x00000d01 + +#define REG_A4XX_UNKNOWN_0E42 0x00000e42 + +#define REG_A4XX_UNKNOWN_0EC2 0x00000ec2 + +#define REG_A4XX_UNKNOWN_2001 0x00002001 + +#define REG_A4XX_UNKNOWN_209B 0x0000209b + +#define REG_A4XX_UNKNOWN_20EF 0x000020ef + +#define REG_A4XX_UNKNOWN_2152 0x00002152 + +#define REG_A4XX_UNKNOWN_2153 0x00002153 + +#define REG_A4XX_UNKNOWN_2154 0x00002154 + +#define REG_A4XX_UNKNOWN_2155 0x00002155 + +#define REG_A4XX_UNKNOWN_2156 0x00002156 + +#define REG_A4XX_UNKNOWN_2157 0x00002157 + +#define REG_A4XX_UNKNOWN_21C3 0x000021c3 + +#define REG_A4XX_UNKNOWN_21E6 0x000021e6 + +#define REG_A4XX_UNKNOWN_2209 0x00002209 + +#define REG_A4XX_UNKNOWN_22D7 0x000022d7 + +#define REG_A4XX_UNKNOWN_2352 0x00002352 + +#define REG_A4XX_TEX_SAMP_0 0x00000000 +#define A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 +#define A4XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 +#define A4XX_TEX_SAMP_0_XY_MAG__SHIFT 1 +static inline uint32_t A4XX_TEX_SAMP_0_XY_MAG(enum a4xx_tex_filter val) +{ + return ((val) << A4XX_TEX_SAMP_0_XY_MAG__SHIFT) & A4XX_TEX_SAMP_0_XY_MAG__MASK; +} +#define A4XX_TEX_SAMP_0_XY_MIN__MASK 0x00000018 +#define A4XX_TEX_SAMP_0_XY_MIN__SHIFT 3 +static inline uint32_t A4XX_TEX_SAMP_0_XY_MIN(enum a4xx_tex_filter val) +{ + return ((val) << A4XX_TEX_SAMP_0_XY_MIN__SHIFT) & A4XX_TEX_SAMP_0_XY_MIN__MASK; +} +#define A4XX_TEX_SAMP_0_WRAP_S__MASK 0x000000e0 +#define A4XX_TEX_SAMP_0_WRAP_S__SHIFT 5 +static inline uint32_t A4XX_TEX_SAMP_0_WRAP_S(enum a4xx_tex_clamp val) +{ + return ((val) << A4XX_TEX_SAMP_0_WRAP_S__SHIFT) & A4XX_TEX_SAMP_0_WRAP_S__MASK; +} +#define A4XX_TEX_SAMP_0_WRAP_T__MASK 0x00000700 +#define A4XX_TEX_SAMP_0_WRAP_T__SHIFT 8 +static inline uint32_t A4XX_TEX_SAMP_0_WRAP_T(enum a4xx_tex_clamp val) +{ + return ((val) << A4XX_TEX_SAMP_0_WRAP_T__SHIFT) & A4XX_TEX_SAMP_0_WRAP_T__MASK; +} +#define A4XX_TEX_SAMP_0_WRAP_R__MASK 0x00003800 +#define A4XX_TEX_SAMP_0_WRAP_R__SHIFT 11 +static inline uint32_t A4XX_TEX_SAMP_0_WRAP_R(enum a4xx_tex_clamp val) +{ + return ((val) << A4XX_TEX_SAMP_0_WRAP_R__SHIFT) & A4XX_TEX_SAMP_0_WRAP_R__MASK; +} +#define A4XX_TEX_SAMP_0_ANISO__MASK 0x0001c000 +#define A4XX_TEX_SAMP_0_ANISO__SHIFT 14 +static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val) +{ + return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK; +} +#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 +#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 +static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK; +} + +#define REG_A4XX_TEX_SAMP_1 0x00000001 +#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e +#define A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT 1 +static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val) +{ + return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK; +} +#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 +#define A4XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 +#define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 +#define A4XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 +#define A4XX_TEX_SAMP_1_MAX_LOD__SHIFT 8 +static inline uint32_t A4XX_TEX_SAMP_1_MAX_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A4XX_TEX_SAMP_1_MAX_LOD__MASK; +} +#define A4XX_TEX_SAMP_1_MIN_LOD__MASK 0xfff00000 +#define A4XX_TEX_SAMP_1_MIN_LOD__SHIFT 20 +static inline uint32_t A4XX_TEX_SAMP_1_MIN_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A4XX_TEX_SAMP_1_MIN_LOD__MASK; +} + +#define REG_A4XX_TEX_CONST_0 0x00000000 +#define A4XX_TEX_CONST_0_TILED 0x00000001 +#define A4XX_TEX_CONST_0_SRGB 0x00000004 +#define A4XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 +#define A4XX_TEX_CONST_0_SWIZ_X__SHIFT 4 +static inline uint32_t A4XX_TEX_CONST_0_SWIZ_X(enum a4xx_tex_swiz val) +{ + return ((val) << A4XX_TEX_CONST_0_SWIZ_X__SHIFT) & A4XX_TEX_CONST_0_SWIZ_X__MASK; +} +#define A4XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 +#define A4XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 +static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Y(enum a4xx_tex_swiz val) +{ + return ((val) << A4XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Y__MASK; +} +#define A4XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 +#define A4XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 +static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Z(enum a4xx_tex_swiz val) +{ + return ((val) << A4XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Z__MASK; +} +#define A4XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 +#define A4XX_TEX_CONST_0_SWIZ_W__SHIFT 13 +static inline uint32_t A4XX_TEX_CONST_0_SWIZ_W(enum a4xx_tex_swiz val) +{ + return ((val) << A4XX_TEX_CONST_0_SWIZ_W__SHIFT) & A4XX_TEX_CONST_0_SWIZ_W__MASK; +} +#define A4XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 +#define A4XX_TEX_CONST_0_MIPLVLS__SHIFT 16 +static inline uint32_t A4XX_TEX_CONST_0_MIPLVLS(uint32_t val) +{ + return ((val) << A4XX_TEX_CONST_0_MIPLVLS__SHIFT) & A4XX_TEX_CONST_0_MIPLVLS__MASK; +} +#define A4XX_TEX_CONST_0_FMT__MASK 0x1fc00000 +#define A4XX_TEX_CONST_0_FMT__SHIFT 22 +static inline uint32_t A4XX_TEX_CONST_0_FMT(enum a4xx_tex_fmt val) +{ + return ((val) << A4XX_TEX_CONST_0_FMT__SHIFT) & A4XX_TEX_CONST_0_FMT__MASK; +} +#define A4XX_TEX_CONST_0_TYPE__MASK 0x60000000 +#define A4XX_TEX_CONST_0_TYPE__SHIFT 29 +static inline uint32_t A4XX_TEX_CONST_0_TYPE(enum a4xx_tex_type val) +{ + return ((val) << A4XX_TEX_CONST_0_TYPE__SHIFT) & A4XX_TEX_CONST_0_TYPE__MASK; +} + +#define REG_A4XX_TEX_CONST_1 0x00000001 +#define A4XX_TEX_CONST_1_HEIGHT__MASK 0x00007fff +#define A4XX_TEX_CONST_1_HEIGHT__SHIFT 0 +static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val) +{ + return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK; +} +#define A4XX_TEX_CONST_1_WIDTH__MASK 0x3fff8000 +#define A4XX_TEX_CONST_1_WIDTH__SHIFT 15 +static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val) +{ + return ((val) << A4XX_TEX_CONST_1_WIDTH__SHIFT) & A4XX_TEX_CONST_1_WIDTH__MASK; +} + +#define REG_A4XX_TEX_CONST_2 0x00000002 +#define A4XX_TEX_CONST_2_FETCHSIZE__MASK 0x0000000f +#define A4XX_TEX_CONST_2_FETCHSIZE__SHIFT 0 +static inline uint32_t A4XX_TEX_CONST_2_FETCHSIZE(enum a4xx_tex_fetchsize val) +{ + return ((val) << A4XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A4XX_TEX_CONST_2_FETCHSIZE__MASK; +} +#define A4XX_TEX_CONST_2_PITCH__MASK 0x3ffffe00 +#define A4XX_TEX_CONST_2_PITCH__SHIFT 9 +static inline uint32_t A4XX_TEX_CONST_2_PITCH(uint32_t val) +{ + return ((val) << A4XX_TEX_CONST_2_PITCH__SHIFT) & A4XX_TEX_CONST_2_PITCH__MASK; +} +#define A4XX_TEX_CONST_2_SWAP__MASK 0xc0000000 +#define A4XX_TEX_CONST_2_SWAP__SHIFT 30 +static inline uint32_t A4XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A4XX_TEX_CONST_2_SWAP__SHIFT) & A4XX_TEX_CONST_2_SWAP__MASK; +} + +#define REG_A4XX_TEX_CONST_3 0x00000003 +#define A4XX_TEX_CONST_3_LAYERSZ__MASK 0x00003fff +#define A4XX_TEX_CONST_3_LAYERSZ__SHIFT 0 +static inline uint32_t A4XX_TEX_CONST_3_LAYERSZ(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A4XX_TEX_CONST_3_LAYERSZ__SHIFT) & A4XX_TEX_CONST_3_LAYERSZ__MASK; +} +#define A4XX_TEX_CONST_3_DEPTH__MASK 0x7ffc0000 +#define A4XX_TEX_CONST_3_DEPTH__SHIFT 18 +static inline uint32_t A4XX_TEX_CONST_3_DEPTH(uint32_t val) +{ + return ((val) << A4XX_TEX_CONST_3_DEPTH__SHIFT) & A4XX_TEX_CONST_3_DEPTH__MASK; +} + +#define REG_A4XX_TEX_CONST_4 0x00000004 +#define A4XX_TEX_CONST_4_LAYERSZ__MASK 0x0000000f +#define A4XX_TEX_CONST_4_LAYERSZ__SHIFT 0 +static inline uint32_t A4XX_TEX_CONST_4_LAYERSZ(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A4XX_TEX_CONST_4_LAYERSZ__SHIFT) & A4XX_TEX_CONST_4_LAYERSZ__MASK; +} +#define A4XX_TEX_CONST_4_BASE__MASK 0xffffffe0 +#define A4XX_TEX_CONST_4_BASE__SHIFT 5 +static inline uint32_t A4XX_TEX_CONST_4_BASE(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_TEX_CONST_4_BASE__SHIFT) & A4XX_TEX_CONST_4_BASE__MASK; +} + +#define REG_A4XX_TEX_CONST_5 0x00000005 + +#define REG_A4XX_TEX_CONST_6 0x00000006 + +#define REG_A4XX_TEX_CONST_7 0x00000007 + +#define REG_A4XX_SSBO_0_0 0x00000000 +#define A4XX_SSBO_0_0_BASE__MASK 0xffffffe0 +#define A4XX_SSBO_0_0_BASE__SHIFT 5 +static inline uint32_t A4XX_SSBO_0_0_BASE(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A4XX_SSBO_0_0_BASE__SHIFT) & A4XX_SSBO_0_0_BASE__MASK; +} + +#define REG_A4XX_SSBO_0_1 0x00000001 +#define A4XX_SSBO_0_1_PITCH__MASK 0x003fffff +#define A4XX_SSBO_0_1_PITCH__SHIFT 0 +static inline uint32_t A4XX_SSBO_0_1_PITCH(uint32_t val) +{ + return ((val) << A4XX_SSBO_0_1_PITCH__SHIFT) & A4XX_SSBO_0_1_PITCH__MASK; +} + +#define REG_A4XX_SSBO_0_2 0x00000002 +#define A4XX_SSBO_0_2_ARRAY_PITCH__MASK 0x03fff000 +#define A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT 12 +static inline uint32_t A4XX_SSBO_0_2_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A4XX_SSBO_0_2_ARRAY_PITCH__MASK; +} + +#define REG_A4XX_SSBO_0_3 0x00000003 +#define A4XX_SSBO_0_3_CPP__MASK 0x0000003f +#define A4XX_SSBO_0_3_CPP__SHIFT 0 +static inline uint32_t A4XX_SSBO_0_3_CPP(uint32_t val) +{ + return ((val) << A4XX_SSBO_0_3_CPP__SHIFT) & A4XX_SSBO_0_3_CPP__MASK; +} + +#define REG_A4XX_SSBO_1_0 0x00000000 +#define A4XX_SSBO_1_0_CPP__MASK 0x0000001f +#define A4XX_SSBO_1_0_CPP__SHIFT 0 +static inline uint32_t A4XX_SSBO_1_0_CPP(uint32_t val) +{ + return ((val) << A4XX_SSBO_1_0_CPP__SHIFT) & A4XX_SSBO_1_0_CPP__MASK; +} +#define A4XX_SSBO_1_0_FMT__MASK 0x0000ff00 +#define A4XX_SSBO_1_0_FMT__SHIFT 8 +static inline uint32_t A4XX_SSBO_1_0_FMT(enum a4xx_color_fmt val) +{ + return ((val) << A4XX_SSBO_1_0_FMT__SHIFT) & A4XX_SSBO_1_0_FMT__MASK; +} +#define A4XX_SSBO_1_0_WIDTH__MASK 0xffff0000 +#define A4XX_SSBO_1_0_WIDTH__SHIFT 16 +static inline uint32_t A4XX_SSBO_1_0_WIDTH(uint32_t val) +{ + return ((val) << A4XX_SSBO_1_0_WIDTH__SHIFT) & A4XX_SSBO_1_0_WIDTH__MASK; +} + +#define REG_A4XX_SSBO_1_1 0x00000001 +#define A4XX_SSBO_1_1_HEIGHT__MASK 0x0000ffff +#define A4XX_SSBO_1_1_HEIGHT__SHIFT 0 +static inline uint32_t A4XX_SSBO_1_1_HEIGHT(uint32_t val) +{ + return ((val) << A4XX_SSBO_1_1_HEIGHT__SHIFT) & A4XX_SSBO_1_1_HEIGHT__MASK; +} +#define A4XX_SSBO_1_1_DEPTH__MASK 0xffff0000 +#define A4XX_SSBO_1_1_DEPTH__SHIFT 16 +static inline uint32_t A4XX_SSBO_1_1_DEPTH(uint32_t val) +{ + return ((val) << A4XX_SSBO_1_1_DEPTH__SHIFT) & A4XX_SSBO_1_1_DEPTH__MASK; +} + + +#endif /* A4XX_XML */ diff -Nru mesa-18.3.3/src/freedreno/registers/a5xx.xml.h mesa-19.0.1/src/freedreno/registers/a5xx.xml.h --- mesa-18.3.3/src/freedreno/registers/a5xx.xml.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/registers/a5xx.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,5226 @@ +#ifndef A5XX_XML +#define A5XX_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 42463 bytes, from 2018-11-19 13:44:03) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14239 bytes, from 2018-12-05 15:25:53) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 43052 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 141895 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: +- Rob Clark (robclark) +- Ilia Mirkin (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum a5xx_color_fmt { + RB5_A8_UNORM = 2, + RB5_R8_UNORM = 3, + RB5_R8_SNORM = 4, + RB5_R8_UINT = 5, + RB5_R8_SINT = 6, + RB5_R4G4B4A4_UNORM = 8, + RB5_R5G5B5A1_UNORM = 10, + RB5_R5G6B5_UNORM = 14, + RB5_R8G8_UNORM = 15, + RB5_R8G8_SNORM = 16, + RB5_R8G8_UINT = 17, + RB5_R8G8_SINT = 18, + RB5_R16_UNORM = 21, + RB5_R16_SNORM = 22, + RB5_R16_FLOAT = 23, + RB5_R16_UINT = 24, + RB5_R16_SINT = 25, + RB5_R8G8B8A8_UNORM = 48, + RB5_R8G8B8_UNORM = 49, + RB5_R8G8B8A8_SNORM = 50, + RB5_R8G8B8A8_UINT = 51, + RB5_R8G8B8A8_SINT = 52, + RB5_R10G10B10A2_UNORM = 55, + RB5_R10G10B10A2_UINT = 58, + RB5_R11G11B10_FLOAT = 66, + RB5_R16G16_UNORM = 67, + RB5_R16G16_SNORM = 68, + RB5_R16G16_FLOAT = 69, + RB5_R16G16_UINT = 70, + RB5_R16G16_SINT = 71, + RB5_R32_FLOAT = 74, + RB5_R32_UINT = 75, + RB5_R32_SINT = 76, + RB5_R16G16B16A16_UNORM = 96, + RB5_R16G16B16A16_SNORM = 97, + RB5_R16G16B16A16_FLOAT = 98, + RB5_R16G16B16A16_UINT = 99, + RB5_R16G16B16A16_SINT = 100, + RB5_R32G32_FLOAT = 103, + RB5_R32G32_UINT = 104, + RB5_R32G32_SINT = 105, + RB5_R32G32B32A32_FLOAT = 130, + RB5_R32G32B32A32_UINT = 131, + RB5_R32G32B32A32_SINT = 132, +}; + +enum a5xx_tile_mode { + TILE5_LINEAR = 0, + TILE5_2 = 2, + TILE5_3 = 3, +}; + +enum a5xx_vtx_fmt { + VFMT5_8_UNORM = 3, + VFMT5_8_SNORM = 4, + VFMT5_8_UINT = 5, + VFMT5_8_SINT = 6, + VFMT5_8_8_UNORM = 15, + VFMT5_8_8_SNORM = 16, + VFMT5_8_8_UINT = 17, + VFMT5_8_8_SINT = 18, + VFMT5_16_UNORM = 21, + VFMT5_16_SNORM = 22, + VFMT5_16_FLOAT = 23, + VFMT5_16_UINT = 24, + VFMT5_16_SINT = 25, + VFMT5_8_8_8_UNORM = 33, + VFMT5_8_8_8_SNORM = 34, + VFMT5_8_8_8_UINT = 35, + VFMT5_8_8_8_SINT = 36, + VFMT5_8_8_8_8_UNORM = 48, + VFMT5_8_8_8_8_SNORM = 50, + VFMT5_8_8_8_8_UINT = 51, + VFMT5_8_8_8_8_SINT = 52, + VFMT5_10_10_10_2_UNORM = 54, + VFMT5_10_10_10_2_SNORM = 57, + VFMT5_10_10_10_2_UINT = 58, + VFMT5_10_10_10_2_SINT = 59, + VFMT5_11_11_10_FLOAT = 66, + VFMT5_16_16_UNORM = 67, + VFMT5_16_16_SNORM = 68, + VFMT5_16_16_FLOAT = 69, + VFMT5_16_16_UINT = 70, + VFMT5_16_16_SINT = 71, + VFMT5_32_UNORM = 72, + VFMT5_32_SNORM = 73, + VFMT5_32_FLOAT = 74, + VFMT5_32_UINT = 75, + VFMT5_32_SINT = 76, + VFMT5_32_FIXED = 77, + VFMT5_16_16_16_UNORM = 88, + VFMT5_16_16_16_SNORM = 89, + VFMT5_16_16_16_FLOAT = 90, + VFMT5_16_16_16_UINT = 91, + VFMT5_16_16_16_SINT = 92, + VFMT5_16_16_16_16_UNORM = 96, + VFMT5_16_16_16_16_SNORM = 97, + VFMT5_16_16_16_16_FLOAT = 98, + VFMT5_16_16_16_16_UINT = 99, + VFMT5_16_16_16_16_SINT = 100, + VFMT5_32_32_UNORM = 101, + VFMT5_32_32_SNORM = 102, + VFMT5_32_32_FLOAT = 103, + VFMT5_32_32_UINT = 104, + VFMT5_32_32_SINT = 105, + VFMT5_32_32_FIXED = 106, + VFMT5_32_32_32_UNORM = 112, + VFMT5_32_32_32_SNORM = 113, + VFMT5_32_32_32_UINT = 114, + VFMT5_32_32_32_SINT = 115, + VFMT5_32_32_32_FLOAT = 116, + VFMT5_32_32_32_FIXED = 117, + VFMT5_32_32_32_32_UNORM = 128, + VFMT5_32_32_32_32_SNORM = 129, + VFMT5_32_32_32_32_FLOAT = 130, + VFMT5_32_32_32_32_UINT = 131, + VFMT5_32_32_32_32_SINT = 132, + VFMT5_32_32_32_32_FIXED = 133, +}; + +enum a5xx_tex_fmt { + TFMT5_A8_UNORM = 2, + TFMT5_8_UNORM = 3, + TFMT5_8_SNORM = 4, + TFMT5_8_UINT = 5, + TFMT5_8_SINT = 6, + TFMT5_4_4_4_4_UNORM = 8, + TFMT5_5_5_5_1_UNORM = 10, + TFMT5_5_6_5_UNORM = 14, + TFMT5_8_8_UNORM = 15, + TFMT5_8_8_SNORM = 16, + TFMT5_8_8_UINT = 17, + TFMT5_8_8_SINT = 18, + TFMT5_L8_A8_UNORM = 19, + TFMT5_16_UNORM = 21, + TFMT5_16_SNORM = 22, + TFMT5_16_FLOAT = 23, + TFMT5_16_UINT = 24, + TFMT5_16_SINT = 25, + TFMT5_8_8_8_8_UNORM = 48, + TFMT5_8_8_8_UNORM = 49, + TFMT5_8_8_8_8_SNORM = 50, + TFMT5_8_8_8_8_UINT = 51, + TFMT5_8_8_8_8_SINT = 52, + TFMT5_9_9_9_E5_FLOAT = 53, + TFMT5_10_10_10_2_UNORM = 54, + TFMT5_10_10_10_2_UINT = 58, + TFMT5_11_11_10_FLOAT = 66, + TFMT5_16_16_UNORM = 67, + TFMT5_16_16_SNORM = 68, + TFMT5_16_16_FLOAT = 69, + TFMT5_16_16_UINT = 70, + TFMT5_16_16_SINT = 71, + TFMT5_32_FLOAT = 74, + TFMT5_32_UINT = 75, + TFMT5_32_SINT = 76, + TFMT5_16_16_16_16_UNORM = 96, + TFMT5_16_16_16_16_SNORM = 97, + TFMT5_16_16_16_16_FLOAT = 98, + TFMT5_16_16_16_16_UINT = 99, + TFMT5_16_16_16_16_SINT = 100, + TFMT5_32_32_FLOAT = 103, + TFMT5_32_32_UINT = 104, + TFMT5_32_32_SINT = 105, + TFMT5_32_32_32_UINT = 114, + TFMT5_32_32_32_SINT = 115, + TFMT5_32_32_32_FLOAT = 116, + TFMT5_32_32_32_32_FLOAT = 130, + TFMT5_32_32_32_32_UINT = 131, + TFMT5_32_32_32_32_SINT = 132, + TFMT5_X8Z24_UNORM = 160, + TFMT5_ETC2_RG11_UNORM = 171, + TFMT5_ETC2_RG11_SNORM = 172, + TFMT5_ETC2_R11_UNORM = 173, + TFMT5_ETC2_R11_SNORM = 174, + TFMT5_ETC1 = 175, + TFMT5_ETC2_RGB8 = 176, + TFMT5_ETC2_RGBA8 = 177, + TFMT5_ETC2_RGB8A1 = 178, + TFMT5_DXT1 = 179, + TFMT5_DXT3 = 180, + TFMT5_DXT5 = 181, + TFMT5_RGTC1_UNORM = 183, + TFMT5_RGTC1_SNORM = 184, + TFMT5_RGTC2_UNORM = 187, + TFMT5_RGTC2_SNORM = 188, + TFMT5_BPTC_UFLOAT = 190, + TFMT5_BPTC_FLOAT = 191, + TFMT5_BPTC = 192, + TFMT5_ASTC_4x4 = 193, + TFMT5_ASTC_5x4 = 194, + TFMT5_ASTC_5x5 = 195, + TFMT5_ASTC_6x5 = 196, + TFMT5_ASTC_6x6 = 197, + TFMT5_ASTC_8x5 = 198, + TFMT5_ASTC_8x6 = 199, + TFMT5_ASTC_8x8 = 200, + TFMT5_ASTC_10x5 = 201, + TFMT5_ASTC_10x6 = 202, + TFMT5_ASTC_10x8 = 203, + TFMT5_ASTC_10x10 = 204, + TFMT5_ASTC_12x10 = 205, + TFMT5_ASTC_12x12 = 206, +}; + +enum a5xx_tex_fetchsize { + TFETCH5_1_BYTE = 0, + TFETCH5_2_BYTE = 1, + TFETCH5_4_BYTE = 2, + TFETCH5_8_BYTE = 3, + TFETCH5_16_BYTE = 4, +}; + +enum a5xx_depth_format { + DEPTH5_NONE = 0, + DEPTH5_16 = 1, + DEPTH5_24_8 = 2, + DEPTH5_32 = 4, +}; + +enum a5xx_blit_buf { + BLIT_MRT0 = 0, + BLIT_MRT1 = 1, + BLIT_MRT2 = 2, + BLIT_MRT3 = 3, + BLIT_MRT4 = 4, + BLIT_MRT5 = 5, + BLIT_MRT6 = 6, + BLIT_MRT7 = 7, + BLIT_ZS = 8, + BLIT_S = 9, +}; + +enum a5xx_cp_perfcounter_select { + PERF_CP_ALWAYS_COUNT = 0, + PERF_CP_BUSY_GFX_CORE_IDLE = 1, + PERF_CP_BUSY_CYCLES = 2, + PERF_CP_PFP_IDLE = 3, + PERF_CP_PFP_BUSY_WORKING = 4, + PERF_CP_PFP_STALL_CYCLES_ANY = 5, + PERF_CP_PFP_STARVE_CYCLES_ANY = 6, + PERF_CP_PFP_ICACHE_MISS = 7, + PERF_CP_PFP_ICACHE_HIT = 8, + PERF_CP_PFP_MATCH_PM4_PKT_PROFILE = 9, + PERF_CP_ME_BUSY_WORKING = 10, + PERF_CP_ME_IDLE = 11, + PERF_CP_ME_STARVE_CYCLES_ANY = 12, + PERF_CP_ME_FIFO_EMPTY_PFP_IDLE = 13, + PERF_CP_ME_FIFO_EMPTY_PFP_BUSY = 14, + PERF_CP_ME_FIFO_FULL_ME_BUSY = 15, + PERF_CP_ME_FIFO_FULL_ME_NON_WORKING = 16, + PERF_CP_ME_STALL_CYCLES_ANY = 17, + PERF_CP_ME_ICACHE_MISS = 18, + PERF_CP_ME_ICACHE_HIT = 19, + PERF_CP_NUM_PREEMPTIONS = 20, + PERF_CP_PREEMPTION_REACTION_DELAY = 21, + PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 22, + PERF_CP_PREEMPTION_SWITCH_IN_TIME = 23, + PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 24, + PERF_CP_PREDICATED_DRAWS_KILLED = 25, + PERF_CP_MODE_SWITCH = 26, + PERF_CP_ZPASS_DONE = 27, + PERF_CP_CONTEXT_DONE = 28, + PERF_CP_CACHE_FLUSH = 29, + PERF_CP_LONG_PREEMPTIONS = 30, +}; + +enum a5xx_rbbm_perfcounter_select { + PERF_RBBM_ALWAYS_COUNT = 0, + PERF_RBBM_ALWAYS_ON = 1, + PERF_RBBM_TSE_BUSY = 2, + PERF_RBBM_RAS_BUSY = 3, + PERF_RBBM_PC_DCALL_BUSY = 4, + PERF_RBBM_PC_VSD_BUSY = 5, + PERF_RBBM_STATUS_MASKED = 6, + PERF_RBBM_COM_BUSY = 7, + PERF_RBBM_DCOM_BUSY = 8, + PERF_RBBM_VBIF_BUSY = 9, + PERF_RBBM_VSC_BUSY = 10, + PERF_RBBM_TESS_BUSY = 11, + PERF_RBBM_UCHE_BUSY = 12, + PERF_RBBM_HLSQ_BUSY = 13, +}; + +enum a5xx_pc_perfcounter_select { + PERF_PC_BUSY_CYCLES = 0, + PERF_PC_WORKING_CYCLES = 1, + PERF_PC_STALL_CYCLES_VFD = 2, + PERF_PC_STALL_CYCLES_TSE = 3, + PERF_PC_STALL_CYCLES_VPC = 4, + PERF_PC_STALL_CYCLES_UCHE = 5, + PERF_PC_STALL_CYCLES_TESS = 6, + PERF_PC_STALL_CYCLES_TSE_ONLY = 7, + PERF_PC_STALL_CYCLES_VPC_ONLY = 8, + PERF_PC_PASS1_TF_STALL_CYCLES = 9, + PERF_PC_STARVE_CYCLES_FOR_INDEX = 10, + PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11, + PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12, + PERF_PC_STARVE_CYCLES_FOR_POSITION = 13, + PERF_PC_STARVE_CYCLES_DI = 14, + PERF_PC_VIS_STREAMS_LOADED = 15, + PERF_PC_INSTANCES = 16, + PERF_PC_VPC_PRIMITIVES = 17, + PERF_PC_DEAD_PRIM = 18, + PERF_PC_LIVE_PRIM = 19, + PERF_PC_VERTEX_HITS = 20, + PERF_PC_IA_VERTICES = 21, + PERF_PC_IA_PRIMITIVES = 22, + PERF_PC_GS_PRIMITIVES = 23, + PERF_PC_HS_INVOCATIONS = 24, + PERF_PC_DS_INVOCATIONS = 25, + PERF_PC_VS_INVOCATIONS = 26, + PERF_PC_GS_INVOCATIONS = 27, + PERF_PC_DS_PRIMITIVES = 28, + PERF_PC_VPC_POS_DATA_TRANSACTION = 29, + PERF_PC_3D_DRAWCALLS = 30, + PERF_PC_2D_DRAWCALLS = 31, + PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32, + PERF_TESS_BUSY_CYCLES = 33, + PERF_TESS_WORKING_CYCLES = 34, + PERF_TESS_STALL_CYCLES_PC = 35, + PERF_TESS_STARVE_CYCLES_PC = 36, +}; + +enum a5xx_vfd_perfcounter_select { + PERF_VFD_BUSY_CYCLES = 0, + PERF_VFD_STALL_CYCLES_UCHE = 1, + PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2, + PERF_VFD_STALL_CYCLES_MISS_VB = 3, + PERF_VFD_STALL_CYCLES_MISS_Q = 4, + PERF_VFD_STALL_CYCLES_SP_INFO = 5, + PERF_VFD_STALL_CYCLES_SP_ATTR = 6, + PERF_VFD_STALL_CYCLES_VFDP_VB = 7, + PERF_VFD_STALL_CYCLES_VFDP_Q = 8, + PERF_VFD_DECODER_PACKER_STALL = 9, + PERF_VFD_STARVE_CYCLES_UCHE = 10, + PERF_VFD_RBUFFER_FULL = 11, + PERF_VFD_ATTR_INFO_FIFO_FULL = 12, + PERF_VFD_DECODED_ATTRIBUTE_BYTES = 13, + PERF_VFD_NUM_ATTRIBUTES = 14, + PERF_VFD_INSTRUCTIONS = 15, + PERF_VFD_UPPER_SHADER_FIBERS = 16, + PERF_VFD_LOWER_SHADER_FIBERS = 17, + PERF_VFD_MODE_0_FIBERS = 18, + PERF_VFD_MODE_1_FIBERS = 19, + PERF_VFD_MODE_2_FIBERS = 20, + PERF_VFD_MODE_3_FIBERS = 21, + PERF_VFD_MODE_4_FIBERS = 22, + PERF_VFD_TOTAL_VERTICES = 23, + PERF_VFD_NUM_ATTR_MISS = 24, + PERF_VFD_1_BURST_REQ = 25, + PERF_VFDP_STALL_CYCLES_VFD = 26, + PERF_VFDP_STALL_CYCLES_VFD_INDEX = 27, + PERF_VFDP_STALL_CYCLES_VFD_PROG = 28, + PERF_VFDP_STARVE_CYCLES_PC = 29, + PERF_VFDP_VS_STAGE_32_WAVES = 30, +}; + +enum a5xx_hlsq_perfcounter_select { + PERF_HLSQ_BUSY_CYCLES = 0, + PERF_HLSQ_STALL_CYCLES_UCHE = 1, + PERF_HLSQ_STALL_CYCLES_SP_STATE = 2, + PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3, + PERF_HLSQ_UCHE_LATENCY_CYCLES = 4, + PERF_HLSQ_UCHE_LATENCY_COUNT = 5, + PERF_HLSQ_FS_STAGE_32_WAVES = 6, + PERF_HLSQ_FS_STAGE_64_WAVES = 7, + PERF_HLSQ_QUADS = 8, + PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE = 9, + PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE = 10, + PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE = 11, + PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE = 12, + PERF_HLSQ_CS_INVOCATIONS = 13, + PERF_HLSQ_COMPUTE_DRAWCALLS = 14, +}; + +enum a5xx_vpc_perfcounter_select { + PERF_VPC_BUSY_CYCLES = 0, + PERF_VPC_WORKING_CYCLES = 1, + PERF_VPC_STALL_CYCLES_UCHE = 2, + PERF_VPC_STALL_CYCLES_VFD_WACK = 3, + PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4, + PERF_VPC_STALL_CYCLES_PC = 5, + PERF_VPC_STALL_CYCLES_SP_LM = 6, + PERF_VPC_POS_EXPORT_STALL_CYCLES = 7, + PERF_VPC_STARVE_CYCLES_SP = 8, + PERF_VPC_STARVE_CYCLES_LRZ = 9, + PERF_VPC_PC_PRIMITIVES = 10, + PERF_VPC_SP_COMPONENTS = 11, + PERF_VPC_SP_LM_PRIMITIVES = 12, + PERF_VPC_SP_LM_COMPONENTS = 13, + PERF_VPC_SP_LM_DWORDS = 14, + PERF_VPC_STREAMOUT_COMPONENTS = 15, + PERF_VPC_GRANT_PHASES = 16, +}; + +enum a5xx_tse_perfcounter_select { + PERF_TSE_BUSY_CYCLES = 0, + PERF_TSE_CLIPPING_CYCLES = 1, + PERF_TSE_STALL_CYCLES_RAS = 2, + PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3, + PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4, + PERF_TSE_STARVE_CYCLES_PC = 5, + PERF_TSE_INPUT_PRIM = 6, + PERF_TSE_INPUT_NULL_PRIM = 7, + PERF_TSE_TRIVAL_REJ_PRIM = 8, + PERF_TSE_CLIPPED_PRIM = 9, + PERF_TSE_ZERO_AREA_PRIM = 10, + PERF_TSE_FACENESS_CULLED_PRIM = 11, + PERF_TSE_ZERO_PIXEL_PRIM = 12, + PERF_TSE_OUTPUT_NULL_PRIM = 13, + PERF_TSE_OUTPUT_VISIBLE_PRIM = 14, + PERF_TSE_CINVOCATION = 15, + PERF_TSE_CPRIMITIVES = 16, + PERF_TSE_2D_INPUT_PRIM = 17, + PERF_TSE_2D_ALIVE_CLCLES = 18, +}; + +enum a5xx_ras_perfcounter_select { + PERF_RAS_BUSY_CYCLES = 0, + PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1, + PERF_RAS_STALL_CYCLES_LRZ = 2, + PERF_RAS_STARVE_CYCLES_TSE = 3, + PERF_RAS_SUPER_TILES = 4, + PERF_RAS_8X4_TILES = 5, + PERF_RAS_MASKGEN_ACTIVE = 6, + PERF_RAS_FULLY_COVERED_SUPER_TILES = 7, + PERF_RAS_FULLY_COVERED_8X4_TILES = 8, + PERF_RAS_PRIM_KILLED_INVISILBE = 9, +}; + +enum a5xx_lrz_perfcounter_select { + PERF_LRZ_BUSY_CYCLES = 0, + PERF_LRZ_STARVE_CYCLES_RAS = 1, + PERF_LRZ_STALL_CYCLES_RB = 2, + PERF_LRZ_STALL_CYCLES_VSC = 3, + PERF_LRZ_STALL_CYCLES_VPC = 4, + PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5, + PERF_LRZ_STALL_CYCLES_UCHE = 6, + PERF_LRZ_LRZ_READ = 7, + PERF_LRZ_LRZ_WRITE = 8, + PERF_LRZ_READ_LATENCY = 9, + PERF_LRZ_MERGE_CACHE_UPDATING = 10, + PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11, + PERF_LRZ_PRIM_KILLED_BY_LRZ = 12, + PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13, + PERF_LRZ_FULL_8X8_TILES = 14, + PERF_LRZ_PARTIAL_8X8_TILES = 15, + PERF_LRZ_TILE_KILLED = 16, + PERF_LRZ_TOTAL_PIXEL = 17, + PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18, +}; + +enum a5xx_uche_perfcounter_select { + PERF_UCHE_BUSY_CYCLES = 0, + PERF_UCHE_STALL_CYCLES_VBIF = 1, + PERF_UCHE_VBIF_LATENCY_CYCLES = 2, + PERF_UCHE_VBIF_LATENCY_SAMPLES = 3, + PERF_UCHE_VBIF_READ_BEATS_TP = 4, + PERF_UCHE_VBIF_READ_BEATS_VFD = 5, + PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6, + PERF_UCHE_VBIF_READ_BEATS_LRZ = 7, + PERF_UCHE_VBIF_READ_BEATS_SP = 8, + PERF_UCHE_READ_REQUESTS_TP = 9, + PERF_UCHE_READ_REQUESTS_VFD = 10, + PERF_UCHE_READ_REQUESTS_HLSQ = 11, + PERF_UCHE_READ_REQUESTS_LRZ = 12, + PERF_UCHE_READ_REQUESTS_SP = 13, + PERF_UCHE_WRITE_REQUESTS_LRZ = 14, + PERF_UCHE_WRITE_REQUESTS_SP = 15, + PERF_UCHE_WRITE_REQUESTS_VPC = 16, + PERF_UCHE_WRITE_REQUESTS_VSC = 17, + PERF_UCHE_EVICTS = 18, + PERF_UCHE_BANK_REQ0 = 19, + PERF_UCHE_BANK_REQ1 = 20, + PERF_UCHE_BANK_REQ2 = 21, + PERF_UCHE_BANK_REQ3 = 22, + PERF_UCHE_BANK_REQ4 = 23, + PERF_UCHE_BANK_REQ5 = 24, + PERF_UCHE_BANK_REQ6 = 25, + PERF_UCHE_BANK_REQ7 = 26, + PERF_UCHE_VBIF_READ_BEATS_CH0 = 27, + PERF_UCHE_VBIF_READ_BEATS_CH1 = 28, + PERF_UCHE_GMEM_READ_BEATS = 29, + PERF_UCHE_FLAG_COUNT = 30, +}; + +enum a5xx_tp_perfcounter_select { + PERF_TP_BUSY_CYCLES = 0, + PERF_TP_STALL_CYCLES_UCHE = 1, + PERF_TP_LATENCY_CYCLES = 2, + PERF_TP_LATENCY_TRANS = 3, + PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4, + PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5, + PERF_TP_L1_CACHELINE_REQUESTS = 6, + PERF_TP_L1_CACHELINE_MISSES = 7, + PERF_TP_SP_TP_TRANS = 8, + PERF_TP_TP_SP_TRANS = 9, + PERF_TP_OUTPUT_PIXELS = 10, + PERF_TP_FILTER_WORKLOAD_16BIT = 11, + PERF_TP_FILTER_WORKLOAD_32BIT = 12, + PERF_TP_QUADS_RECEIVED = 13, + PERF_TP_QUADS_OFFSET = 14, + PERF_TP_QUADS_SHADOW = 15, + PERF_TP_QUADS_ARRAY = 16, + PERF_TP_QUADS_GRADIENT = 17, + PERF_TP_QUADS_1D = 18, + PERF_TP_QUADS_2D = 19, + PERF_TP_QUADS_BUFFER = 20, + PERF_TP_QUADS_3D = 21, + PERF_TP_QUADS_CUBE = 22, + PERF_TP_STATE_CACHE_REQUESTS = 23, + PERF_TP_STATE_CACHE_MISSES = 24, + PERF_TP_DIVERGENT_QUADS_RECEIVED = 25, + PERF_TP_BINDLESS_STATE_CACHE_REQUESTS = 26, + PERF_TP_BINDLESS_STATE_CACHE_MISSES = 27, + PERF_TP_PRT_NON_RESIDENT_EVENTS = 28, + PERF_TP_OUTPUT_PIXELS_POINT = 29, + PERF_TP_OUTPUT_PIXELS_BILINEAR = 30, + PERF_TP_OUTPUT_PIXELS_MIP = 31, + PERF_TP_OUTPUT_PIXELS_ANISO = 32, + PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 33, + PERF_TP_FLAG_CACHE_REQUESTS = 34, + PERF_TP_FLAG_CACHE_MISSES = 35, + PERF_TP_L1_5_L2_REQUESTS = 36, + PERF_TP_2D_OUTPUT_PIXELS = 37, + PERF_TP_2D_OUTPUT_PIXELS_POINT = 38, + PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 39, + PERF_TP_2D_FILTER_WORKLOAD_16BIT = 40, + PERF_TP_2D_FILTER_WORKLOAD_32BIT = 41, +}; + +enum a5xx_sp_perfcounter_select { + PERF_SP_BUSY_CYCLES = 0, + PERF_SP_ALU_WORKING_CYCLES = 1, + PERF_SP_EFU_WORKING_CYCLES = 2, + PERF_SP_STALL_CYCLES_VPC = 3, + PERF_SP_STALL_CYCLES_TP = 4, + PERF_SP_STALL_CYCLES_UCHE = 5, + PERF_SP_STALL_CYCLES_RB = 6, + PERF_SP_SCHEDULER_NON_WORKING = 7, + PERF_SP_WAVE_CONTEXTS = 8, + PERF_SP_WAVE_CONTEXT_CYCLES = 9, + PERF_SP_FS_STAGE_WAVE_CYCLES = 10, + PERF_SP_FS_STAGE_WAVE_SAMPLES = 11, + PERF_SP_VS_STAGE_WAVE_CYCLES = 12, + PERF_SP_VS_STAGE_WAVE_SAMPLES = 13, + PERF_SP_FS_STAGE_DURATION_CYCLES = 14, + PERF_SP_VS_STAGE_DURATION_CYCLES = 15, + PERF_SP_WAVE_CTRL_CYCLES = 16, + PERF_SP_WAVE_LOAD_CYCLES = 17, + PERF_SP_WAVE_EMIT_CYCLES = 18, + PERF_SP_WAVE_NOP_CYCLES = 19, + PERF_SP_WAVE_WAIT_CYCLES = 20, + PERF_SP_WAVE_FETCH_CYCLES = 21, + PERF_SP_WAVE_IDLE_CYCLES = 22, + PERF_SP_WAVE_END_CYCLES = 23, + PERF_SP_WAVE_LONG_SYNC_CYCLES = 24, + PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25, + PERF_SP_WAVE_JOIN_CYCLES = 26, + PERF_SP_LM_LOAD_INSTRUCTIONS = 27, + PERF_SP_LM_STORE_INSTRUCTIONS = 28, + PERF_SP_LM_ATOMICS = 29, + PERF_SP_GM_LOAD_INSTRUCTIONS = 30, + PERF_SP_GM_STORE_INSTRUCTIONS = 31, + PERF_SP_GM_ATOMICS = 32, + PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33, + PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS = 34, + PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 35, + PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 36, + PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 37, + PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 38, + PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 39, + PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 40, + PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 41, + PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 42, + PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 43, + PERF_SP_VS_INSTRUCTIONS = 44, + PERF_SP_FS_INSTRUCTIONS = 45, + PERF_SP_ADDR_LOCK_COUNT = 46, + PERF_SP_UCHE_READ_TRANS = 47, + PERF_SP_UCHE_WRITE_TRANS = 48, + PERF_SP_EXPORT_VPC_TRANS = 49, + PERF_SP_EXPORT_RB_TRANS = 50, + PERF_SP_PIXELS_KILLED = 51, + PERF_SP_ICL1_REQUESTS = 52, + PERF_SP_ICL1_MISSES = 53, + PERF_SP_ICL0_REQUESTS = 54, + PERF_SP_ICL0_MISSES = 55, + PERF_SP_HS_INSTRUCTIONS = 56, + PERF_SP_DS_INSTRUCTIONS = 57, + PERF_SP_GS_INSTRUCTIONS = 58, + PERF_SP_CS_INSTRUCTIONS = 59, + PERF_SP_GPR_READ = 60, + PERF_SP_GPR_WRITE = 61, + PERF_SP_LM_CH0_REQUESTS = 62, + PERF_SP_LM_CH1_REQUESTS = 63, + PERF_SP_LM_BANK_CONFLICTS = 64, +}; + +enum a5xx_rb_perfcounter_select { + PERF_RB_BUSY_CYCLES = 0, + PERF_RB_STALL_CYCLES_CCU = 1, + PERF_RB_STALL_CYCLES_HLSQ = 2, + PERF_RB_STALL_CYCLES_FIFO0_FULL = 3, + PERF_RB_STALL_CYCLES_FIFO1_FULL = 4, + PERF_RB_STALL_CYCLES_FIFO2_FULL = 5, + PERF_RB_STARVE_CYCLES_SP = 6, + PERF_RB_STARVE_CYCLES_LRZ_TILE = 7, + PERF_RB_STARVE_CYCLES_CCU = 8, + PERF_RB_STARVE_CYCLES_Z_PLANE = 9, + PERF_RB_STARVE_CYCLES_BARY_PLANE = 10, + PERF_RB_Z_WORKLOAD = 11, + PERF_RB_HLSQ_ACTIVE = 12, + PERF_RB_Z_READ = 13, + PERF_RB_Z_WRITE = 14, + PERF_RB_C_READ = 15, + PERF_RB_C_WRITE = 16, + PERF_RB_TOTAL_PASS = 17, + PERF_RB_Z_PASS = 18, + PERF_RB_Z_FAIL = 19, + PERF_RB_S_FAIL = 20, + PERF_RB_BLENDED_FXP_COMPONENTS = 21, + PERF_RB_BLENDED_FP16_COMPONENTS = 22, + RB_RESERVED = 23, + PERF_RB_2D_ALIVE_CYCLES = 24, + PERF_RB_2D_STALL_CYCLES_A2D = 25, + PERF_RB_2D_STARVE_CYCLES_SRC = 26, + PERF_RB_2D_STARVE_CYCLES_SP = 27, + PERF_RB_2D_STARVE_CYCLES_DST = 28, + PERF_RB_2D_VALID_PIXELS = 29, +}; + +enum a5xx_rb_samples_perfcounter_select { + TOTAL_SAMPLES = 0, + ZPASS_SAMPLES = 1, + ZFAIL_SAMPLES = 2, + SFAIL_SAMPLES = 3, +}; + +enum a5xx_vsc_perfcounter_select { + PERF_VSC_BUSY_CYCLES = 0, + PERF_VSC_WORKING_CYCLES = 1, + PERF_VSC_STALL_CYCLES_UCHE = 2, + PERF_VSC_EOT_NUM = 3, +}; + +enum a5xx_ccu_perfcounter_select { + PERF_CCU_BUSY_CYCLES = 0, + PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1, + PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2, + PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3, + PERF_CCU_DEPTH_BLOCKS = 4, + PERF_CCU_COLOR_BLOCKS = 5, + PERF_CCU_DEPTH_BLOCK_HIT = 6, + PERF_CCU_COLOR_BLOCK_HIT = 7, + PERF_CCU_PARTIAL_BLOCK_READ = 8, + PERF_CCU_GMEM_READ = 9, + PERF_CCU_GMEM_WRITE = 10, + PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11, + PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12, + PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13, + PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14, + PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15, + PERF_CCU_COLOR_READ_FLAG0_COUNT = 16, + PERF_CCU_COLOR_READ_FLAG1_COUNT = 17, + PERF_CCU_COLOR_READ_FLAG2_COUNT = 18, + PERF_CCU_COLOR_READ_FLAG3_COUNT = 19, + PERF_CCU_COLOR_READ_FLAG4_COUNT = 20, + PERF_CCU_2D_BUSY_CYCLES = 21, + PERF_CCU_2D_RD_REQ = 22, + PERF_CCU_2D_WR_REQ = 23, + PERF_CCU_2D_REORDER_STARVE_CYCLES = 24, + PERF_CCU_2D_PIXELS = 25, +}; + +enum a5xx_cmp_perfcounter_select { + PERF_CMPDECMP_STALL_CYCLES_VBIF = 0, + PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1, + PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2, + PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3, + PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4, + PERF_CMPDECMP_VBIF_READ_REQUEST = 5, + PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6, + PERF_CMPDECMP_VBIF_READ_DATA = 7, + PERF_CMPDECMP_VBIF_WRITE_DATA = 8, + PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9, + PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10, + PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11, + PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12, + PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13, + PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14, + PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 15, + PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 16, + PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 17, + PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 18, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 19, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 20, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 21, + PERF_CMPDECMP_2D_RD_DATA = 22, + PERF_CMPDECMP_2D_WR_DATA = 23, +}; + +enum a5xx_vbif_perfcounter_select { + AXI_READ_REQUESTS_ID_0 = 0, + AXI_READ_REQUESTS_ID_1 = 1, + AXI_READ_REQUESTS_ID_2 = 2, + AXI_READ_REQUESTS_ID_3 = 3, + AXI_READ_REQUESTS_ID_4 = 4, + AXI_READ_REQUESTS_ID_5 = 5, + AXI_READ_REQUESTS_ID_6 = 6, + AXI_READ_REQUESTS_ID_7 = 7, + AXI_READ_REQUESTS_ID_8 = 8, + AXI_READ_REQUESTS_ID_9 = 9, + AXI_READ_REQUESTS_ID_10 = 10, + AXI_READ_REQUESTS_ID_11 = 11, + AXI_READ_REQUESTS_ID_12 = 12, + AXI_READ_REQUESTS_ID_13 = 13, + AXI_READ_REQUESTS_ID_14 = 14, + AXI_READ_REQUESTS_ID_15 = 15, + AXI0_READ_REQUESTS_TOTAL = 16, + AXI1_READ_REQUESTS_TOTAL = 17, + AXI2_READ_REQUESTS_TOTAL = 18, + AXI3_READ_REQUESTS_TOTAL = 19, + AXI_READ_REQUESTS_TOTAL = 20, + AXI_WRITE_REQUESTS_ID_0 = 21, + AXI_WRITE_REQUESTS_ID_1 = 22, + AXI_WRITE_REQUESTS_ID_2 = 23, + AXI_WRITE_REQUESTS_ID_3 = 24, + AXI_WRITE_REQUESTS_ID_4 = 25, + AXI_WRITE_REQUESTS_ID_5 = 26, + AXI_WRITE_REQUESTS_ID_6 = 27, + AXI_WRITE_REQUESTS_ID_7 = 28, + AXI_WRITE_REQUESTS_ID_8 = 29, + AXI_WRITE_REQUESTS_ID_9 = 30, + AXI_WRITE_REQUESTS_ID_10 = 31, + AXI_WRITE_REQUESTS_ID_11 = 32, + AXI_WRITE_REQUESTS_ID_12 = 33, + AXI_WRITE_REQUESTS_ID_13 = 34, + AXI_WRITE_REQUESTS_ID_14 = 35, + AXI_WRITE_REQUESTS_ID_15 = 36, + AXI0_WRITE_REQUESTS_TOTAL = 37, + AXI1_WRITE_REQUESTS_TOTAL = 38, + AXI2_WRITE_REQUESTS_TOTAL = 39, + AXI3_WRITE_REQUESTS_TOTAL = 40, + AXI_WRITE_REQUESTS_TOTAL = 41, + AXI_TOTAL_REQUESTS = 42, + AXI_READ_DATA_BEATS_ID_0 = 43, + AXI_READ_DATA_BEATS_ID_1 = 44, + AXI_READ_DATA_BEATS_ID_2 = 45, + AXI_READ_DATA_BEATS_ID_3 = 46, + AXI_READ_DATA_BEATS_ID_4 = 47, + AXI_READ_DATA_BEATS_ID_5 = 48, + AXI_READ_DATA_BEATS_ID_6 = 49, + AXI_READ_DATA_BEATS_ID_7 = 50, + AXI_READ_DATA_BEATS_ID_8 = 51, + AXI_READ_DATA_BEATS_ID_9 = 52, + AXI_READ_DATA_BEATS_ID_10 = 53, + AXI_READ_DATA_BEATS_ID_11 = 54, + AXI_READ_DATA_BEATS_ID_12 = 55, + AXI_READ_DATA_BEATS_ID_13 = 56, + AXI_READ_DATA_BEATS_ID_14 = 57, + AXI_READ_DATA_BEATS_ID_15 = 58, + AXI0_READ_DATA_BEATS_TOTAL = 59, + AXI1_READ_DATA_BEATS_TOTAL = 60, + AXI2_READ_DATA_BEATS_TOTAL = 61, + AXI3_READ_DATA_BEATS_TOTAL = 62, + AXI_READ_DATA_BEATS_TOTAL = 63, + AXI_WRITE_DATA_BEATS_ID_0 = 64, + AXI_WRITE_DATA_BEATS_ID_1 = 65, + AXI_WRITE_DATA_BEATS_ID_2 = 66, + AXI_WRITE_DATA_BEATS_ID_3 = 67, + AXI_WRITE_DATA_BEATS_ID_4 = 68, + AXI_WRITE_DATA_BEATS_ID_5 = 69, + AXI_WRITE_DATA_BEATS_ID_6 = 70, + AXI_WRITE_DATA_BEATS_ID_7 = 71, + AXI_WRITE_DATA_BEATS_ID_8 = 72, + AXI_WRITE_DATA_BEATS_ID_9 = 73, + AXI_WRITE_DATA_BEATS_ID_10 = 74, + AXI_WRITE_DATA_BEATS_ID_11 = 75, + AXI_WRITE_DATA_BEATS_ID_12 = 76, + AXI_WRITE_DATA_BEATS_ID_13 = 77, + AXI_WRITE_DATA_BEATS_ID_14 = 78, + AXI_WRITE_DATA_BEATS_ID_15 = 79, + AXI0_WRITE_DATA_BEATS_TOTAL = 80, + AXI1_WRITE_DATA_BEATS_TOTAL = 81, + AXI2_WRITE_DATA_BEATS_TOTAL = 82, + AXI3_WRITE_DATA_BEATS_TOTAL = 83, + AXI_WRITE_DATA_BEATS_TOTAL = 84, + AXI_DATA_BEATS_TOTAL = 85, +}; + +enum a5xx_tex_filter { + A5XX_TEX_NEAREST = 0, + A5XX_TEX_LINEAR = 1, + A5XX_TEX_ANISO = 2, +}; + +enum a5xx_tex_clamp { + A5XX_TEX_REPEAT = 0, + A5XX_TEX_CLAMP_TO_EDGE = 1, + A5XX_TEX_MIRROR_REPEAT = 2, + A5XX_TEX_CLAMP_TO_BORDER = 3, + A5XX_TEX_MIRROR_CLAMP = 4, +}; + +enum a5xx_tex_aniso { + A5XX_TEX_ANISO_1 = 0, + A5XX_TEX_ANISO_2 = 1, + A5XX_TEX_ANISO_4 = 2, + A5XX_TEX_ANISO_8 = 3, + A5XX_TEX_ANISO_16 = 4, +}; + +enum a5xx_tex_swiz { + A5XX_TEX_X = 0, + A5XX_TEX_Y = 1, + A5XX_TEX_Z = 2, + A5XX_TEX_W = 3, + A5XX_TEX_ZERO = 4, + A5XX_TEX_ONE = 5, +}; + +enum a5xx_tex_type { + A5XX_TEX_1D = 0, + A5XX_TEX_2D = 1, + A5XX_TEX_CUBE = 2, + A5XX_TEX_3D = 3, +}; + +#define A5XX_INT0_RBBM_GPU_IDLE 0x00000001 +#define A5XX_INT0_RBBM_AHB_ERROR 0x00000002 +#define A5XX_INT0_RBBM_TRANSFER_TIMEOUT 0x00000004 +#define A5XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 +#define A5XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 +#define A5XX_INT0_RBBM_ETS_MS_TIMEOUT 0x00000020 +#define A5XX_INT0_RBBM_ATB_ASYNC_OVERFLOW 0x00000040 +#define A5XX_INT0_RBBM_GPC_ERROR 0x00000080 +#define A5XX_INT0_CP_SW 0x00000100 +#define A5XX_INT0_CP_HW_ERROR 0x00000200 +#define A5XX_INT0_CP_CCU_FLUSH_DEPTH_TS 0x00000400 +#define A5XX_INT0_CP_CCU_FLUSH_COLOR_TS 0x00000800 +#define A5XX_INT0_CP_CCU_RESOLVE_TS 0x00001000 +#define A5XX_INT0_CP_IB2 0x00002000 +#define A5XX_INT0_CP_IB1 0x00004000 +#define A5XX_INT0_CP_RB 0x00008000 +#define A5XX_INT0_CP_UNUSED_1 0x00010000 +#define A5XX_INT0_CP_RB_DONE_TS 0x00020000 +#define A5XX_INT0_CP_WT_DONE_TS 0x00040000 +#define A5XX_INT0_UNKNOWN_1 0x00080000 +#define A5XX_INT0_CP_CACHE_FLUSH_TS 0x00100000 +#define A5XX_INT0_UNUSED_2 0x00200000 +#define A5XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00400000 +#define A5XX_INT0_MISC_HANG_DETECT 0x00800000 +#define A5XX_INT0_UCHE_OOB_ACCESS 0x01000000 +#define A5XX_INT0_UCHE_TRAP_INTR 0x02000000 +#define A5XX_INT0_DEBBUS_INTR_0 0x04000000 +#define A5XX_INT0_DEBBUS_INTR_1 0x08000000 +#define A5XX_INT0_GPMU_VOLTAGE_DROOP 0x10000000 +#define A5XX_INT0_GPMU_FIRMWARE 0x20000000 +#define A5XX_INT0_ISDB_CPU_IRQ 0x40000000 +#define A5XX_INT0_ISDB_UNDER_DEBUG 0x80000000 +#define A5XX_CP_INT_CP_OPCODE_ERROR 0x00000001 +#define A5XX_CP_INT_CP_RESERVED_BIT_ERROR 0x00000002 +#define A5XX_CP_INT_CP_HW_FAULT_ERROR 0x00000004 +#define A5XX_CP_INT_CP_DMA_ERROR 0x00000008 +#define A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR 0x00000010 +#define A5XX_CP_INT_CP_AHB_ERROR 0x00000020 +#define REG_A5XX_CP_RB_BASE 0x00000800 + +#define REG_A5XX_CP_RB_BASE_HI 0x00000801 + +#define REG_A5XX_CP_RB_CNTL 0x00000802 + +#define REG_A5XX_CP_RB_RPTR_ADDR 0x00000804 + +#define REG_A5XX_CP_RB_RPTR_ADDR_HI 0x00000805 + +#define REG_A5XX_CP_RB_RPTR 0x00000806 + +#define REG_A5XX_CP_RB_WPTR 0x00000807 + +#define REG_A5XX_CP_PFP_STAT_ADDR 0x00000808 + +#define REG_A5XX_CP_PFP_STAT_DATA 0x00000809 + +#define REG_A5XX_CP_DRAW_STATE_ADDR 0x0000080b + +#define REG_A5XX_CP_DRAW_STATE_DATA 0x0000080c + +#define REG_A5XX_CP_ME_NRT_ADDR_LO 0x0000080d + +#define REG_A5XX_CP_ME_NRT_ADDR_HI 0x0000080e + +#define REG_A5XX_CP_ME_NRT_DATA 0x00000810 + +#define REG_A5XX_CP_CRASH_SCRIPT_BASE_LO 0x00000817 + +#define REG_A5XX_CP_CRASH_SCRIPT_BASE_HI 0x00000818 + +#define REG_A5XX_CP_CRASH_DUMP_CNTL 0x00000819 + +#define REG_A5XX_CP_ME_STAT_ADDR 0x0000081a + +#define REG_A5XX_CP_ROQ_THRESHOLDS_1 0x0000081f + +#define REG_A5XX_CP_ROQ_THRESHOLDS_2 0x00000820 + +#define REG_A5XX_CP_ROQ_DBG_ADDR 0x00000821 + +#define REG_A5XX_CP_ROQ_DBG_DATA 0x00000822 + +#define REG_A5XX_CP_MEQ_DBG_ADDR 0x00000823 + +#define REG_A5XX_CP_MEQ_DBG_DATA 0x00000824 + +#define REG_A5XX_CP_MEQ_THRESHOLDS 0x00000825 + +#define REG_A5XX_CP_MERCIU_SIZE 0x00000826 + +#define REG_A5XX_CP_MERCIU_DBG_ADDR 0x00000827 + +#define REG_A5XX_CP_MERCIU_DBG_DATA_1 0x00000828 + +#define REG_A5XX_CP_MERCIU_DBG_DATA_2 0x00000829 + +#define REG_A5XX_CP_PFP_UCODE_DBG_ADDR 0x0000082a + +#define REG_A5XX_CP_PFP_UCODE_DBG_DATA 0x0000082b + +#define REG_A5XX_CP_ME_UCODE_DBG_ADDR 0x0000082f + +#define REG_A5XX_CP_ME_UCODE_DBG_DATA 0x00000830 + +#define REG_A5XX_CP_CNTL 0x00000831 + +#define REG_A5XX_CP_PFP_ME_CNTL 0x00000832 + +#define REG_A5XX_CP_CHICKEN_DBG 0x00000833 + +#define REG_A5XX_CP_PFP_INSTR_BASE_LO 0x00000835 + +#define REG_A5XX_CP_PFP_INSTR_BASE_HI 0x00000836 + +#define REG_A5XX_CP_ME_INSTR_BASE_LO 0x00000838 + +#define REG_A5XX_CP_ME_INSTR_BASE_HI 0x00000839 + +#define REG_A5XX_CP_CONTEXT_SWITCH_CNTL 0x0000083b + +#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO 0x0000083c + +#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI 0x0000083d + +#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO 0x0000083e + +#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI 0x0000083f + +#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x00000840 + +#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x00000841 + +#define REG_A5XX_CP_ADDR_MODE_CNTL 0x00000860 + +#define REG_A5XX_CP_ME_STAT_DATA 0x00000b14 + +#define REG_A5XX_CP_WFI_PEND_CTR 0x00000b15 + +#define REG_A5XX_CP_INTERRUPT_STATUS 0x00000b18 + +#define REG_A5XX_CP_HW_FAULT 0x00000b1a + +#define REG_A5XX_CP_PROTECT_STATUS 0x00000b1c + +#define REG_A5XX_CP_IB1_BASE 0x00000b1f + +#define REG_A5XX_CP_IB1_BASE_HI 0x00000b20 + +#define REG_A5XX_CP_IB1_BUFSZ 0x00000b21 + +#define REG_A5XX_CP_IB2_BASE 0x00000b22 + +#define REG_A5XX_CP_IB2_BASE_HI 0x00000b23 + +#define REG_A5XX_CP_IB2_BUFSZ 0x00000b24 + +static inline uint32_t REG_A5XX_CP_SCRATCH(uint32_t i0) { return 0x00000b78 + 0x1*i0; } + +static inline uint32_t REG_A5XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000b78 + 0x1*i0; } + +static inline uint32_t REG_A5XX_CP_PROTECT(uint32_t i0) { return 0x00000880 + 0x1*i0; } + +static inline uint32_t REG_A5XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000880 + 0x1*i0; } +#define A5XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0001ffff +#define A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 +static inline uint32_t A5XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) +{ + return ((val) << A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A5XX_CP_PROTECT_REG_BASE_ADDR__MASK; +} +#define A5XX_CP_PROTECT_REG_MASK_LEN__MASK 0x1f000000 +#define A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT 24 +static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) +{ + return ((val) << A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A5XX_CP_PROTECT_REG_MASK_LEN__MASK; +} +#define A5XX_CP_PROTECT_REG_TRAP_WRITE 0x20000000 +#define A5XX_CP_PROTECT_REG_TRAP_READ 0x40000000 + +#define REG_A5XX_CP_PROTECT_CNTL 0x000008a0 + +#define REG_A5XX_CP_AHB_FAULT 0x00000b1b + +#define REG_A5XX_CP_PERFCTR_CP_SEL_0 0x00000bb0 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_1 0x00000bb1 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_2 0x00000bb2 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_3 0x00000bb3 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_4 0x00000bb4 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_5 0x00000bb5 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_6 0x00000bb6 + +#define REG_A5XX_CP_PERFCTR_CP_SEL_7 0x00000bb7 + +#define REG_A5XX_VSC_ADDR_MODE_CNTL 0x00000bc1 + +#define REG_A5XX_CP_POWERCTR_CP_SEL_0 0x00000bba + +#define REG_A5XX_CP_POWERCTR_CP_SEL_1 0x00000bbb + +#define REG_A5XX_CP_POWERCTR_CP_SEL_2 0x00000bbc + +#define REG_A5XX_CP_POWERCTR_CP_SEL_3 0x00000bbd + +#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_A 0x00000004 + +#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_B 0x00000005 + +#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_C 0x00000006 + +#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_D 0x00000007 + +#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLT 0x00000008 + +#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLM 0x00000009 + +#define REG_A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT 0x00000018 + +#define REG_A5XX_RBBM_CFG_DBGBUS_OPL 0x0000000a + +#define REG_A5XX_RBBM_CFG_DBGBUS_OPE 0x0000000b + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_0 0x0000000c + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_1 0x0000000d + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_2 0x0000000e + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_3 0x0000000f + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_0 0x00000010 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_1 0x00000011 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_2 0x00000012 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_3 0x00000013 + +#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_0 0x00000014 + +#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_1 0x00000015 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_0 0x00000016 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_1 0x00000017 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_2 0x00000018 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_3 0x00000019 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_0 0x0000001a + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_1 0x0000001b + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_2 0x0000001c + +#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_3 0x0000001d + +#define REG_A5XX_RBBM_CFG_DBGBUS_NIBBLEE 0x0000001e + +#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC0 0x0000001f + +#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC1 0x00000020 + +#define REG_A5XX_RBBM_CFG_DBGBUS_LOADREG 0x00000021 + +#define REG_A5XX_RBBM_CFG_DBGBUS_IDX 0x00000022 + +#define REG_A5XX_RBBM_CFG_DBGBUS_CLRC 0x00000023 + +#define REG_A5XX_RBBM_CFG_DBGBUS_LOADIVT 0x00000024 + +#define REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0000002f + +#define REG_A5XX_RBBM_INT_CLEAR_CMD 0x00000037 + +#define REG_A5XX_RBBM_INT_0_MASK 0x00000038 +#define A5XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE 0x00000001 +#define A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR 0x00000002 +#define A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT 0x00000004 +#define A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT 0x00000008 +#define A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT 0x00000010 +#define A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT 0x00000020 +#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW 0x00000040 +#define A5XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR 0x00000080 +#define A5XX_RBBM_INT_0_MASK_CP_SW 0x00000100 +#define A5XX_RBBM_INT_0_MASK_CP_HW_ERROR 0x00000200 +#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS 0x00000400 +#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS 0x00000800 +#define A5XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS 0x00001000 +#define A5XX_RBBM_INT_0_MASK_CP_IB2 0x00002000 +#define A5XX_RBBM_INT_0_MASK_CP_IB1 0x00004000 +#define A5XX_RBBM_INT_0_MASK_CP_RB 0x00008000 +#define A5XX_RBBM_INT_0_MASK_CP_RB_DONE_TS 0x00020000 +#define A5XX_RBBM_INT_0_MASK_CP_WT_DONE_TS 0x00040000 +#define A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS 0x00100000 +#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW 0x00400000 +#define A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT 0x00800000 +#define A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS 0x01000000 +#define A5XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR 0x02000000 +#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_0 0x04000000 +#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_1 0x08000000 +#define A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP 0x10000000 +#define A5XX_RBBM_INT_0_MASK_GPMU_FIRMWARE 0x20000000 +#define A5XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ 0x40000000 +#define A5XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG 0x80000000 + +#define REG_A5XX_RBBM_AHB_DBG_CNTL 0x0000003f + +#define REG_A5XX_RBBM_EXT_VBIF_DBG_CNTL 0x00000041 + +#define REG_A5XX_RBBM_SW_RESET_CMD 0x00000043 + +#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD 0x00000045 + +#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD2 0x00000046 + +#define REG_A5XX_RBBM_DBG_LO_HI_GPIO 0x00000048 + +#define REG_A5XX_RBBM_EXT_TRACE_BUS_CNTL 0x00000049 + +#define REG_A5XX_RBBM_CLOCK_CNTL_TP0 0x0000004a + +#define REG_A5XX_RBBM_CLOCK_CNTL_TP1 0x0000004b + +#define REG_A5XX_RBBM_CLOCK_CNTL_TP2 0x0000004c + +#define REG_A5XX_RBBM_CLOCK_CNTL_TP3 0x0000004d + +#define REG_A5XX_RBBM_CLOCK_CNTL2_TP0 0x0000004e + +#define REG_A5XX_RBBM_CLOCK_CNTL2_TP1 0x0000004f + +#define REG_A5XX_RBBM_CLOCK_CNTL2_TP2 0x00000050 + +#define REG_A5XX_RBBM_CLOCK_CNTL2_TP3 0x00000051 + +#define REG_A5XX_RBBM_CLOCK_CNTL3_TP0 0x00000052 + +#define REG_A5XX_RBBM_CLOCK_CNTL3_TP1 0x00000053 + +#define REG_A5XX_RBBM_CLOCK_CNTL3_TP2 0x00000054 + +#define REG_A5XX_RBBM_CLOCK_CNTL3_TP3 0x00000055 + +#define REG_A5XX_RBBM_READ_AHB_THROUGH_DBG 0x00000059 + +#define REG_A5XX_RBBM_CLOCK_CNTL_UCHE 0x0000005a + +#define REG_A5XX_RBBM_CLOCK_CNTL2_UCHE 0x0000005b + +#define REG_A5XX_RBBM_CLOCK_CNTL3_UCHE 0x0000005c + +#define REG_A5XX_RBBM_CLOCK_CNTL4_UCHE 0x0000005d + +#define REG_A5XX_RBBM_CLOCK_HYST_UCHE 0x0000005e + +#define REG_A5XX_RBBM_CLOCK_DELAY_UCHE 0x0000005f + +#define REG_A5XX_RBBM_CLOCK_MODE_GPC 0x00000060 + +#define REG_A5XX_RBBM_CLOCK_DELAY_GPC 0x00000061 + +#define REG_A5XX_RBBM_CLOCK_HYST_GPC 0x00000062 + +#define REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00000063 + +#define REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x00000064 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00000065 + +#define REG_A5XX_RBBM_CLOCK_DELAY_HLSQ 0x00000066 + +#define REG_A5XX_RBBM_CLOCK_CNTL 0x00000067 + +#define REG_A5XX_RBBM_CLOCK_CNTL_SP0 0x00000068 + +#define REG_A5XX_RBBM_CLOCK_CNTL_SP1 0x00000069 + +#define REG_A5XX_RBBM_CLOCK_CNTL_SP2 0x0000006a + +#define REG_A5XX_RBBM_CLOCK_CNTL_SP3 0x0000006b + +#define REG_A5XX_RBBM_CLOCK_CNTL2_SP0 0x0000006c + +#define REG_A5XX_RBBM_CLOCK_CNTL2_SP1 0x0000006d + +#define REG_A5XX_RBBM_CLOCK_CNTL2_SP2 0x0000006e + +#define REG_A5XX_RBBM_CLOCK_CNTL2_SP3 0x0000006f + +#define REG_A5XX_RBBM_CLOCK_HYST_SP0 0x00000070 + +#define REG_A5XX_RBBM_CLOCK_HYST_SP1 0x00000071 + +#define REG_A5XX_RBBM_CLOCK_HYST_SP2 0x00000072 + +#define REG_A5XX_RBBM_CLOCK_HYST_SP3 0x00000073 + +#define REG_A5XX_RBBM_CLOCK_DELAY_SP0 0x00000074 + +#define REG_A5XX_RBBM_CLOCK_DELAY_SP1 0x00000075 + +#define REG_A5XX_RBBM_CLOCK_DELAY_SP2 0x00000076 + +#define REG_A5XX_RBBM_CLOCK_DELAY_SP3 0x00000077 + +#define REG_A5XX_RBBM_CLOCK_CNTL_RB0 0x00000078 + +#define REG_A5XX_RBBM_CLOCK_CNTL_RB1 0x00000079 + +#define REG_A5XX_RBBM_CLOCK_CNTL_RB2 0x0000007a + +#define REG_A5XX_RBBM_CLOCK_CNTL_RB3 0x0000007b + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RB0 0x0000007c + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RB1 0x0000007d + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RB2 0x0000007e + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RB3 0x0000007f + +#define REG_A5XX_RBBM_CLOCK_HYST_RAC 0x00000080 + +#define REG_A5XX_RBBM_CLOCK_DELAY_RAC 0x00000081 + +#define REG_A5XX_RBBM_CLOCK_CNTL_CCU0 0x00000082 + +#define REG_A5XX_RBBM_CLOCK_CNTL_CCU1 0x00000083 + +#define REG_A5XX_RBBM_CLOCK_CNTL_CCU2 0x00000084 + +#define REG_A5XX_RBBM_CLOCK_CNTL_CCU3 0x00000085 + +#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0 0x00000086 + +#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1 0x00000087 + +#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2 0x00000088 + +#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3 0x00000089 + +#define REG_A5XX_RBBM_CLOCK_CNTL_RAC 0x0000008a + +#define REG_A5XX_RBBM_CLOCK_CNTL2_RAC 0x0000008b + +#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0 0x0000008c + +#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1 0x0000008d + +#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2 0x0000008e + +#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3 0x0000008f + +#define REG_A5XX_RBBM_CLOCK_HYST_VFD 0x00000090 + +#define REG_A5XX_RBBM_CLOCK_MODE_VFD 0x00000091 + +#define REG_A5XX_RBBM_CLOCK_DELAY_VFD 0x00000092 + +#define REG_A5XX_RBBM_AHB_CNTL0 0x00000093 + +#define REG_A5XX_RBBM_AHB_CNTL1 0x00000094 + +#define REG_A5XX_RBBM_AHB_CNTL2 0x00000095 + +#define REG_A5XX_RBBM_AHB_CMD 0x00000096 + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11 0x0000009c + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12 0x0000009d + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13 0x0000009e + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14 0x0000009f + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15 0x000000a0 + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16 0x000000a1 + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17 0x000000a2 + +#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18 0x000000a3 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TP0 0x000000a4 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TP1 0x000000a5 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TP2 0x000000a6 + +#define REG_A5XX_RBBM_CLOCK_DELAY_TP3 0x000000a7 + +#define REG_A5XX_RBBM_CLOCK_DELAY2_TP0 0x000000a8 + +#define REG_A5XX_RBBM_CLOCK_DELAY2_TP1 0x000000a9 + +#define REG_A5XX_RBBM_CLOCK_DELAY2_TP2 0x000000aa + +#define REG_A5XX_RBBM_CLOCK_DELAY2_TP3 0x000000ab + +#define REG_A5XX_RBBM_CLOCK_DELAY3_TP0 0x000000ac + +#define REG_A5XX_RBBM_CLOCK_DELAY3_TP1 0x000000ad + +#define REG_A5XX_RBBM_CLOCK_DELAY3_TP2 0x000000ae + +#define REG_A5XX_RBBM_CLOCK_DELAY3_TP3 0x000000af + +#define REG_A5XX_RBBM_CLOCK_HYST_TP0 0x000000b0 + +#define REG_A5XX_RBBM_CLOCK_HYST_TP1 0x000000b1 + +#define REG_A5XX_RBBM_CLOCK_HYST_TP2 0x000000b2 + +#define REG_A5XX_RBBM_CLOCK_HYST_TP3 0x000000b3 + +#define REG_A5XX_RBBM_CLOCK_HYST2_TP0 0x000000b4 + +#define REG_A5XX_RBBM_CLOCK_HYST2_TP1 0x000000b5 + +#define REG_A5XX_RBBM_CLOCK_HYST2_TP2 0x000000b6 + +#define REG_A5XX_RBBM_CLOCK_HYST2_TP3 0x000000b7 + +#define REG_A5XX_RBBM_CLOCK_HYST3_TP0 0x000000b8 + +#define REG_A5XX_RBBM_CLOCK_HYST3_TP1 0x000000b9 + +#define REG_A5XX_RBBM_CLOCK_HYST3_TP2 0x000000ba + +#define REG_A5XX_RBBM_CLOCK_HYST3_TP3 0x000000bb + +#define REG_A5XX_RBBM_CLOCK_CNTL_GPMU 0x000000c8 + +#define REG_A5XX_RBBM_CLOCK_DELAY_GPMU 0x000000c9 + +#define REG_A5XX_RBBM_CLOCK_HYST_GPMU 0x000000ca + +#define REG_A5XX_RBBM_PERFCTR_CP_0_LO 0x000003a0 + +#define REG_A5XX_RBBM_PERFCTR_CP_0_HI 0x000003a1 + +#define REG_A5XX_RBBM_PERFCTR_CP_1_LO 0x000003a2 + +#define REG_A5XX_RBBM_PERFCTR_CP_1_HI 0x000003a3 + +#define REG_A5XX_RBBM_PERFCTR_CP_2_LO 0x000003a4 + +#define REG_A5XX_RBBM_PERFCTR_CP_2_HI 0x000003a5 + +#define REG_A5XX_RBBM_PERFCTR_CP_3_LO 0x000003a6 + +#define REG_A5XX_RBBM_PERFCTR_CP_3_HI 0x000003a7 + +#define REG_A5XX_RBBM_PERFCTR_CP_4_LO 0x000003a8 + +#define REG_A5XX_RBBM_PERFCTR_CP_4_HI 0x000003a9 + +#define REG_A5XX_RBBM_PERFCTR_CP_5_LO 0x000003aa + +#define REG_A5XX_RBBM_PERFCTR_CP_5_HI 0x000003ab + +#define REG_A5XX_RBBM_PERFCTR_CP_6_LO 0x000003ac + +#define REG_A5XX_RBBM_PERFCTR_CP_6_HI 0x000003ad + +#define REG_A5XX_RBBM_PERFCTR_CP_7_LO 0x000003ae + +#define REG_A5XX_RBBM_PERFCTR_CP_7_HI 0x000003af + +#define REG_A5XX_RBBM_PERFCTR_RBBM_0_LO 0x000003b0 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_0_HI 0x000003b1 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_1_LO 0x000003b2 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_1_HI 0x000003b3 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_2_LO 0x000003b4 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_2_HI 0x000003b5 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_3_LO 0x000003b6 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_3_HI 0x000003b7 + +#define REG_A5XX_RBBM_PERFCTR_PC_0_LO 0x000003b8 + +#define REG_A5XX_RBBM_PERFCTR_PC_0_HI 0x000003b9 + +#define REG_A5XX_RBBM_PERFCTR_PC_1_LO 0x000003ba + +#define REG_A5XX_RBBM_PERFCTR_PC_1_HI 0x000003bb + +#define REG_A5XX_RBBM_PERFCTR_PC_2_LO 0x000003bc + +#define REG_A5XX_RBBM_PERFCTR_PC_2_HI 0x000003bd + +#define REG_A5XX_RBBM_PERFCTR_PC_3_LO 0x000003be + +#define REG_A5XX_RBBM_PERFCTR_PC_3_HI 0x000003bf + +#define REG_A5XX_RBBM_PERFCTR_PC_4_LO 0x000003c0 + +#define REG_A5XX_RBBM_PERFCTR_PC_4_HI 0x000003c1 + +#define REG_A5XX_RBBM_PERFCTR_PC_5_LO 0x000003c2 + +#define REG_A5XX_RBBM_PERFCTR_PC_5_HI 0x000003c3 + +#define REG_A5XX_RBBM_PERFCTR_PC_6_LO 0x000003c4 + +#define REG_A5XX_RBBM_PERFCTR_PC_6_HI 0x000003c5 + +#define REG_A5XX_RBBM_PERFCTR_PC_7_LO 0x000003c6 + +#define REG_A5XX_RBBM_PERFCTR_PC_7_HI 0x000003c7 + +#define REG_A5XX_RBBM_PERFCTR_VFD_0_LO 0x000003c8 + +#define REG_A5XX_RBBM_PERFCTR_VFD_0_HI 0x000003c9 + +#define REG_A5XX_RBBM_PERFCTR_VFD_1_LO 0x000003ca + +#define REG_A5XX_RBBM_PERFCTR_VFD_1_HI 0x000003cb + +#define REG_A5XX_RBBM_PERFCTR_VFD_2_LO 0x000003cc + +#define REG_A5XX_RBBM_PERFCTR_VFD_2_HI 0x000003cd + +#define REG_A5XX_RBBM_PERFCTR_VFD_3_LO 0x000003ce + +#define REG_A5XX_RBBM_PERFCTR_VFD_3_HI 0x000003cf + +#define REG_A5XX_RBBM_PERFCTR_VFD_4_LO 0x000003d0 + +#define REG_A5XX_RBBM_PERFCTR_VFD_4_HI 0x000003d1 + +#define REG_A5XX_RBBM_PERFCTR_VFD_5_LO 0x000003d2 + +#define REG_A5XX_RBBM_PERFCTR_VFD_5_HI 0x000003d3 + +#define REG_A5XX_RBBM_PERFCTR_VFD_6_LO 0x000003d4 + +#define REG_A5XX_RBBM_PERFCTR_VFD_6_HI 0x000003d5 + +#define REG_A5XX_RBBM_PERFCTR_VFD_7_LO 0x000003d6 + +#define REG_A5XX_RBBM_PERFCTR_VFD_7_HI 0x000003d7 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_LO 0x000003d8 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_HI 0x000003d9 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_LO 0x000003da + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_HI 0x000003db + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_LO 0x000003dc + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_HI 0x000003dd + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_LO 0x000003de + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_HI 0x000003df + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_LO 0x000003e0 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_HI 0x000003e1 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_LO 0x000003e2 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_HI 0x000003e3 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_LO 0x000003e4 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_HI 0x000003e5 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_LO 0x000003e6 + +#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_HI 0x000003e7 + +#define REG_A5XX_RBBM_PERFCTR_VPC_0_LO 0x000003e8 + +#define REG_A5XX_RBBM_PERFCTR_VPC_0_HI 0x000003e9 + +#define REG_A5XX_RBBM_PERFCTR_VPC_1_LO 0x000003ea + +#define REG_A5XX_RBBM_PERFCTR_VPC_1_HI 0x000003eb + +#define REG_A5XX_RBBM_PERFCTR_VPC_2_LO 0x000003ec + +#define REG_A5XX_RBBM_PERFCTR_VPC_2_HI 0x000003ed + +#define REG_A5XX_RBBM_PERFCTR_VPC_3_LO 0x000003ee + +#define REG_A5XX_RBBM_PERFCTR_VPC_3_HI 0x000003ef + +#define REG_A5XX_RBBM_PERFCTR_CCU_0_LO 0x000003f0 + +#define REG_A5XX_RBBM_PERFCTR_CCU_0_HI 0x000003f1 + +#define REG_A5XX_RBBM_PERFCTR_CCU_1_LO 0x000003f2 + +#define REG_A5XX_RBBM_PERFCTR_CCU_1_HI 0x000003f3 + +#define REG_A5XX_RBBM_PERFCTR_CCU_2_LO 0x000003f4 + +#define REG_A5XX_RBBM_PERFCTR_CCU_2_HI 0x000003f5 + +#define REG_A5XX_RBBM_PERFCTR_CCU_3_LO 0x000003f6 + +#define REG_A5XX_RBBM_PERFCTR_CCU_3_HI 0x000003f7 + +#define REG_A5XX_RBBM_PERFCTR_TSE_0_LO 0x000003f8 + +#define REG_A5XX_RBBM_PERFCTR_TSE_0_HI 0x000003f9 + +#define REG_A5XX_RBBM_PERFCTR_TSE_1_LO 0x000003fa + +#define REG_A5XX_RBBM_PERFCTR_TSE_1_HI 0x000003fb + +#define REG_A5XX_RBBM_PERFCTR_TSE_2_LO 0x000003fc + +#define REG_A5XX_RBBM_PERFCTR_TSE_2_HI 0x000003fd + +#define REG_A5XX_RBBM_PERFCTR_TSE_3_LO 0x000003fe + +#define REG_A5XX_RBBM_PERFCTR_TSE_3_HI 0x000003ff + +#define REG_A5XX_RBBM_PERFCTR_RAS_0_LO 0x00000400 + +#define REG_A5XX_RBBM_PERFCTR_RAS_0_HI 0x00000401 + +#define REG_A5XX_RBBM_PERFCTR_RAS_1_LO 0x00000402 + +#define REG_A5XX_RBBM_PERFCTR_RAS_1_HI 0x00000403 + +#define REG_A5XX_RBBM_PERFCTR_RAS_2_LO 0x00000404 + +#define REG_A5XX_RBBM_PERFCTR_RAS_2_HI 0x00000405 + +#define REG_A5XX_RBBM_PERFCTR_RAS_3_LO 0x00000406 + +#define REG_A5XX_RBBM_PERFCTR_RAS_3_HI 0x00000407 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_0_LO 0x00000408 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_0_HI 0x00000409 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_1_LO 0x0000040a + +#define REG_A5XX_RBBM_PERFCTR_UCHE_1_HI 0x0000040b + +#define REG_A5XX_RBBM_PERFCTR_UCHE_2_LO 0x0000040c + +#define REG_A5XX_RBBM_PERFCTR_UCHE_2_HI 0x0000040d + +#define REG_A5XX_RBBM_PERFCTR_UCHE_3_LO 0x0000040e + +#define REG_A5XX_RBBM_PERFCTR_UCHE_3_HI 0x0000040f + +#define REG_A5XX_RBBM_PERFCTR_UCHE_4_LO 0x00000410 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_4_HI 0x00000411 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_5_LO 0x00000412 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_5_HI 0x00000413 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_6_LO 0x00000414 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_6_HI 0x00000415 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_7_LO 0x00000416 + +#define REG_A5XX_RBBM_PERFCTR_UCHE_7_HI 0x00000417 + +#define REG_A5XX_RBBM_PERFCTR_TP_0_LO 0x00000418 + +#define REG_A5XX_RBBM_PERFCTR_TP_0_HI 0x00000419 + +#define REG_A5XX_RBBM_PERFCTR_TP_1_LO 0x0000041a + +#define REG_A5XX_RBBM_PERFCTR_TP_1_HI 0x0000041b + +#define REG_A5XX_RBBM_PERFCTR_TP_2_LO 0x0000041c + +#define REG_A5XX_RBBM_PERFCTR_TP_2_HI 0x0000041d + +#define REG_A5XX_RBBM_PERFCTR_TP_3_LO 0x0000041e + +#define REG_A5XX_RBBM_PERFCTR_TP_3_HI 0x0000041f + +#define REG_A5XX_RBBM_PERFCTR_TP_4_LO 0x00000420 + +#define REG_A5XX_RBBM_PERFCTR_TP_4_HI 0x00000421 + +#define REG_A5XX_RBBM_PERFCTR_TP_5_LO 0x00000422 + +#define REG_A5XX_RBBM_PERFCTR_TP_5_HI 0x00000423 + +#define REG_A5XX_RBBM_PERFCTR_TP_6_LO 0x00000424 + +#define REG_A5XX_RBBM_PERFCTR_TP_6_HI 0x00000425 + +#define REG_A5XX_RBBM_PERFCTR_TP_7_LO 0x00000426 + +#define REG_A5XX_RBBM_PERFCTR_TP_7_HI 0x00000427 + +#define REG_A5XX_RBBM_PERFCTR_SP_0_LO 0x00000428 + +#define REG_A5XX_RBBM_PERFCTR_SP_0_HI 0x00000429 + +#define REG_A5XX_RBBM_PERFCTR_SP_1_LO 0x0000042a + +#define REG_A5XX_RBBM_PERFCTR_SP_1_HI 0x0000042b + +#define REG_A5XX_RBBM_PERFCTR_SP_2_LO 0x0000042c + +#define REG_A5XX_RBBM_PERFCTR_SP_2_HI 0x0000042d + +#define REG_A5XX_RBBM_PERFCTR_SP_3_LO 0x0000042e + +#define REG_A5XX_RBBM_PERFCTR_SP_3_HI 0x0000042f + +#define REG_A5XX_RBBM_PERFCTR_SP_4_LO 0x00000430 + +#define REG_A5XX_RBBM_PERFCTR_SP_4_HI 0x00000431 + +#define REG_A5XX_RBBM_PERFCTR_SP_5_LO 0x00000432 + +#define REG_A5XX_RBBM_PERFCTR_SP_5_HI 0x00000433 + +#define REG_A5XX_RBBM_PERFCTR_SP_6_LO 0x00000434 + +#define REG_A5XX_RBBM_PERFCTR_SP_6_HI 0x00000435 + +#define REG_A5XX_RBBM_PERFCTR_SP_7_LO 0x00000436 + +#define REG_A5XX_RBBM_PERFCTR_SP_7_HI 0x00000437 + +#define REG_A5XX_RBBM_PERFCTR_SP_8_LO 0x00000438 + +#define REG_A5XX_RBBM_PERFCTR_SP_8_HI 0x00000439 + +#define REG_A5XX_RBBM_PERFCTR_SP_9_LO 0x0000043a + +#define REG_A5XX_RBBM_PERFCTR_SP_9_HI 0x0000043b + +#define REG_A5XX_RBBM_PERFCTR_SP_10_LO 0x0000043c + +#define REG_A5XX_RBBM_PERFCTR_SP_10_HI 0x0000043d + +#define REG_A5XX_RBBM_PERFCTR_SP_11_LO 0x0000043e + +#define REG_A5XX_RBBM_PERFCTR_SP_11_HI 0x0000043f + +#define REG_A5XX_RBBM_PERFCTR_RB_0_LO 0x00000440 + +#define REG_A5XX_RBBM_PERFCTR_RB_0_HI 0x00000441 + +#define REG_A5XX_RBBM_PERFCTR_RB_1_LO 0x00000442 + +#define REG_A5XX_RBBM_PERFCTR_RB_1_HI 0x00000443 + +#define REG_A5XX_RBBM_PERFCTR_RB_2_LO 0x00000444 + +#define REG_A5XX_RBBM_PERFCTR_RB_2_HI 0x00000445 + +#define REG_A5XX_RBBM_PERFCTR_RB_3_LO 0x00000446 + +#define REG_A5XX_RBBM_PERFCTR_RB_3_HI 0x00000447 + +#define REG_A5XX_RBBM_PERFCTR_RB_4_LO 0x00000448 + +#define REG_A5XX_RBBM_PERFCTR_RB_4_HI 0x00000449 + +#define REG_A5XX_RBBM_PERFCTR_RB_5_LO 0x0000044a + +#define REG_A5XX_RBBM_PERFCTR_RB_5_HI 0x0000044b + +#define REG_A5XX_RBBM_PERFCTR_RB_6_LO 0x0000044c + +#define REG_A5XX_RBBM_PERFCTR_RB_6_HI 0x0000044d + +#define REG_A5XX_RBBM_PERFCTR_RB_7_LO 0x0000044e + +#define REG_A5XX_RBBM_PERFCTR_RB_7_HI 0x0000044f + +#define REG_A5XX_RBBM_PERFCTR_VSC_0_LO 0x00000450 + +#define REG_A5XX_RBBM_PERFCTR_VSC_0_HI 0x00000451 + +#define REG_A5XX_RBBM_PERFCTR_VSC_1_LO 0x00000452 + +#define REG_A5XX_RBBM_PERFCTR_VSC_1_HI 0x00000453 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_0_LO 0x00000454 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_0_HI 0x00000455 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_1_LO 0x00000456 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_1_HI 0x00000457 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_2_LO 0x00000458 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_2_HI 0x00000459 + +#define REG_A5XX_RBBM_PERFCTR_LRZ_3_LO 0x0000045a + +#define REG_A5XX_RBBM_PERFCTR_LRZ_3_HI 0x0000045b + +#define REG_A5XX_RBBM_PERFCTR_CMP_0_LO 0x0000045c + +#define REG_A5XX_RBBM_PERFCTR_CMP_0_HI 0x0000045d + +#define REG_A5XX_RBBM_PERFCTR_CMP_1_LO 0x0000045e + +#define REG_A5XX_RBBM_PERFCTR_CMP_1_HI 0x0000045f + +#define REG_A5XX_RBBM_PERFCTR_CMP_2_LO 0x00000460 + +#define REG_A5XX_RBBM_PERFCTR_CMP_2_HI 0x00000461 + +#define REG_A5XX_RBBM_PERFCTR_CMP_3_LO 0x00000462 + +#define REG_A5XX_RBBM_PERFCTR_CMP_3_HI 0x00000463 + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x0000046b + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x0000046c + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x0000046d + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000046e + +#define REG_A5XX_RBBM_ALWAYSON_COUNTER_LO 0x000004d2 + +#define REG_A5XX_RBBM_ALWAYSON_COUNTER_HI 0x000004d3 + +#define REG_A5XX_RBBM_STATUS 0x000004f5 +#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB 0x80000000 +#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP 0x40000000 +#define A5XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 +#define A5XX_RBBM_STATUS_VSC_BUSY 0x10000000 +#define A5XX_RBBM_STATUS_TPL1_BUSY 0x08000000 +#define A5XX_RBBM_STATUS_SP_BUSY 0x04000000 +#define A5XX_RBBM_STATUS_UCHE_BUSY 0x02000000 +#define A5XX_RBBM_STATUS_VPC_BUSY 0x01000000 +#define A5XX_RBBM_STATUS_VFDP_BUSY 0x00800000 +#define A5XX_RBBM_STATUS_VFD_BUSY 0x00400000 +#define A5XX_RBBM_STATUS_TESS_BUSY 0x00200000 +#define A5XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 +#define A5XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 +#define A5XX_RBBM_STATUS_GPMU_SLAVE_BUSY 0x00040000 +#define A5XX_RBBM_STATUS_DCOM_BUSY 0x00020000 +#define A5XX_RBBM_STATUS_COM_BUSY 0x00010000 +#define A5XX_RBBM_STATUS_LRZ_BUZY 0x00008000 +#define A5XX_RBBM_STATUS_A2D_DSP_BUSY 0x00004000 +#define A5XX_RBBM_STATUS_CCUFCHE_BUSY 0x00002000 +#define A5XX_RBBM_STATUS_RB_BUSY 0x00001000 +#define A5XX_RBBM_STATUS_RAS_BUSY 0x00000800 +#define A5XX_RBBM_STATUS_TSE_BUSY 0x00000400 +#define A5XX_RBBM_STATUS_VBIF_BUSY 0x00000200 +#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_HYST 0x00000100 +#define A5XX_RBBM_STATUS_CP_BUSY_IGN_HYST 0x00000080 +#define A5XX_RBBM_STATUS_CP_BUSY 0x00000040 +#define A5XX_RBBM_STATUS_GPMU_MASTER_BUSY 0x00000020 +#define A5XX_RBBM_STATUS_CP_CRASH_BUSY 0x00000010 +#define A5XX_RBBM_STATUS_CP_ETS_BUSY 0x00000008 +#define A5XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 +#define A5XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 +#define A5XX_RBBM_STATUS_HI_BUSY 0x00000001 + +#define REG_A5XX_RBBM_STATUS3 0x00000530 + +#define REG_A5XX_RBBM_INT_0_STATUS 0x000004e1 + +#define REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS 0x000004f0 + +#define REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS 0x000004f1 + +#define REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS 0x000004f3 + +#define REG_A5XX_RBBM_AHB_ERROR_STATUS 0x000004f4 + +#define REG_A5XX_RBBM_PERFCTR_CNTL 0x00000464 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD0 0x00000465 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD1 0x00000466 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD2 0x00000467 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD3 0x00000468 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000469 + +#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x0000046a + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x0000046b + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x0000046c + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x0000046d + +#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000046e + +#define REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x0000046f + +#define REG_A5XX_RBBM_AHB_ERROR 0x000004ed + +#define REG_A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC 0x00000504 + +#define REG_A5XX_RBBM_CFG_DBGBUS_OVER 0x00000505 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT0 0x00000506 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT1 0x00000507 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT2 0x00000508 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT3 0x00000509 + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT4 0x0000050a + +#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT5 0x0000050b + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR 0x0000050c + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 0x0000050d + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 0x0000050e + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 0x0000050f + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 0x00000510 + +#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 0x00000511 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MISR0 0x00000512 + +#define REG_A5XX_RBBM_CFG_DBGBUS_MISR1 0x00000513 + +#define REG_A5XX_RBBM_ISDB_CNT 0x00000533 + +#define REG_A5XX_RBBM_SECVID_TRUST_CONFIG 0x0000f000 + +#define REG_A5XX_RBBM_SECVID_TRUST_CNTL 0x0000f400 + +#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0x0000f800 + +#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0x0000f801 + +#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0x0000f802 + +#define REG_A5XX_RBBM_SECVID_TSB_CNTL 0x0000f803 + +#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_LO 0x0000f804 + +#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_HI 0x0000f805 + +#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_LO 0x0000f806 + +#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_HI 0x0000f807 + +#define REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0x0000f810 + +#define REG_A5XX_VSC_BIN_SIZE 0x00000bc2 +#define A5XX_VSC_BIN_SIZE_WIDTH__MASK 0x000000ff +#define A5XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A5XX_VSC_BIN_SIZE_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A5XX_VSC_BIN_SIZE_WIDTH__MASK; +} +#define A5XX_VSC_BIN_SIZE_HEIGHT__MASK 0x0001fe00 +#define A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT 9 +static inline uint32_t A5XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A5XX_VSC_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A5XX_VSC_SIZE_ADDRESS_LO 0x00000bc3 + +#define REG_A5XX_VSC_SIZE_ADDRESS_HI 0x00000bc4 + +#define REG_A5XX_UNKNOWN_0BC5 0x00000bc5 + +#define REG_A5XX_UNKNOWN_0BC6 0x00000bc6 + +static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } +#define A5XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff +#define A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_X__MASK; +} +#define A5XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 +#define A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_Y__MASK; +} +#define A5XX_VSC_PIPE_CONFIG_REG_W__MASK 0x00f00000 +#define A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_W__MASK; +} +#define A5XX_VSC_PIPE_CONFIG_REG_H__MASK 0x0f000000 +#define A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT 24 +static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) +{ + return ((val) << A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_H__MASK; +} + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000be0 + 0x2*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(uint32_t i0) { return 0x00000be0 + 0x2*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_HI(uint32_t i0) { return 0x00000be1 + 0x2*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c00 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c00 + 0x1*i0; } + +#define REG_A5XX_VSC_PERFCTR_VSC_SEL_0 0x00000c60 + +#define REG_A5XX_VSC_PERFCTR_VSC_SEL_1 0x00000c61 + +#define REG_A5XX_VSC_RESOLVE_CNTL 0x00000cdd +#define A5XX_VSC_RESOLVE_CNTL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_VSC_RESOLVE_CNTL_X__MASK 0x00007fff +#define A5XX_VSC_RESOLVE_CNTL_X__SHIFT 0 +static inline uint32_t A5XX_VSC_RESOLVE_CNTL_X(uint32_t val) +{ + return ((val) << A5XX_VSC_RESOLVE_CNTL_X__SHIFT) & A5XX_VSC_RESOLVE_CNTL_X__MASK; +} +#define A5XX_VSC_RESOLVE_CNTL_Y__MASK 0x7fff0000 +#define A5XX_VSC_RESOLVE_CNTL_Y__SHIFT 16 +static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val) +{ + return ((val) << A5XX_VSC_RESOLVE_CNTL_Y__SHIFT) & A5XX_VSC_RESOLVE_CNTL_Y__MASK; +} + +#define REG_A5XX_GRAS_ADDR_MODE_CNTL 0x00000c81 + +#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c90 + +#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c91 + +#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c92 + +#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c93 + +#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c94 + +#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c95 + +#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c96 + +#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c97 + +#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_0 0x00000c98 + +#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_1 0x00000c99 + +#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_2 0x00000c9a + +#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_3 0x00000c9b + +#define REG_A5XX_RB_DBG_ECO_CNTL 0x00000cc4 + +#define REG_A5XX_RB_ADDR_MODE_CNTL 0x00000cc5 + +#define REG_A5XX_RB_MODE_CNTL 0x00000cc6 + +#define REG_A5XX_RB_CCU_CNTL 0x00000cc7 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_0 0x00000cd0 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_1 0x00000cd1 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_2 0x00000cd2 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_3 0x00000cd3 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_4 0x00000cd4 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_5 0x00000cd5 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_6 0x00000cd6 + +#define REG_A5XX_RB_PERFCTR_RB_SEL_7 0x00000cd7 + +#define REG_A5XX_RB_PERFCTR_CCU_SEL_0 0x00000cd8 + +#define REG_A5XX_RB_PERFCTR_CCU_SEL_1 0x00000cd9 + +#define REG_A5XX_RB_PERFCTR_CCU_SEL_2 0x00000cda + +#define REG_A5XX_RB_PERFCTR_CCU_SEL_3 0x00000cdb + +#define REG_A5XX_RB_POWERCTR_RB_SEL_0 0x00000ce0 + +#define REG_A5XX_RB_POWERCTR_RB_SEL_1 0x00000ce1 + +#define REG_A5XX_RB_POWERCTR_RB_SEL_2 0x00000ce2 + +#define REG_A5XX_RB_POWERCTR_RB_SEL_3 0x00000ce3 + +#define REG_A5XX_RB_POWERCTR_CCU_SEL_0 0x00000ce4 + +#define REG_A5XX_RB_POWERCTR_CCU_SEL_1 0x00000ce5 + +#define REG_A5XX_RB_PERFCTR_CMP_SEL_0 0x00000cec + +#define REG_A5XX_RB_PERFCTR_CMP_SEL_1 0x00000ced + +#define REG_A5XX_RB_PERFCTR_CMP_SEL_2 0x00000cee + +#define REG_A5XX_RB_PERFCTR_CMP_SEL_3 0x00000cef + +#define REG_A5XX_PC_DBG_ECO_CNTL 0x00000d00 +#define A5XX_PC_DBG_ECO_CNTL_TWOPASSUSEWFI 0x00000100 + +#define REG_A5XX_PC_ADDR_MODE_CNTL 0x00000d01 + +#define REG_A5XX_PC_MODE_CNTL 0x00000d02 + +#define REG_A5XX_PC_INDEX_BUF_LO 0x00000d04 + +#define REG_A5XX_PC_INDEX_BUF_HI 0x00000d05 + +#define REG_A5XX_PC_START_INDEX 0x00000d06 + +#define REG_A5XX_PC_MAX_INDEX 0x00000d07 + +#define REG_A5XX_PC_TESSFACTOR_ADDR_LO 0x00000d08 + +#define REG_A5XX_PC_TESSFACTOR_ADDR_HI 0x00000d09 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_0 0x00000d10 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_1 0x00000d11 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_2 0x00000d12 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_3 0x00000d13 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_4 0x00000d14 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_5 0x00000d15 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_6 0x00000d16 + +#define REG_A5XX_PC_PERFCTR_PC_SEL_7 0x00000d17 + +#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0 0x00000e00 + +#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_1 0x00000e01 + +#define REG_A5XX_HLSQ_ADDR_MODE_CNTL 0x00000e05 + +#define REG_A5XX_HLSQ_MODE_CNTL 0x00000e06 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e10 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e11 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e12 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e13 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e14 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e15 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e16 + +#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e17 + +#define REG_A5XX_HLSQ_SPTP_RDSEL 0x00000f08 + +#define REG_A5XX_HLSQ_DBG_READ_SEL 0x0000bc00 + +#define REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE 0x0000a000 + +#define REG_A5XX_VFD_ADDR_MODE_CNTL 0x00000e41 + +#define REG_A5XX_VFD_MODE_CNTL 0x00000e42 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_0 0x00000e50 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_1 0x00000e51 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_2 0x00000e52 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_3 0x00000e53 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_4 0x00000e54 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_5 0x00000e55 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_6 0x00000e56 + +#define REG_A5XX_VFD_PERFCTR_VFD_SEL_7 0x00000e57 + +#define REG_A5XX_VPC_DBG_ECO_CNTL 0x00000e60 + +#define REG_A5XX_VPC_ADDR_MODE_CNTL 0x00000e61 + +#define REG_A5XX_VPC_MODE_CNTL 0x00000e62 +#define A5XX_VPC_MODE_CNTL_BINNING_PASS 0x00000001 + +#define REG_A5XX_VPC_PERFCTR_VPC_SEL_0 0x00000e64 + +#define REG_A5XX_VPC_PERFCTR_VPC_SEL_1 0x00000e65 + +#define REG_A5XX_VPC_PERFCTR_VPC_SEL_2 0x00000e66 + +#define REG_A5XX_VPC_PERFCTR_VPC_SEL_3 0x00000e67 + +#define REG_A5XX_UCHE_ADDR_MODE_CNTL 0x00000e80 + +#define REG_A5XX_UCHE_SVM_CNTL 0x00000e82 + +#define REG_A5XX_UCHE_WRITE_THRU_BASE_LO 0x00000e87 + +#define REG_A5XX_UCHE_WRITE_THRU_BASE_HI 0x00000e88 + +#define REG_A5XX_UCHE_TRAP_BASE_LO 0x00000e89 + +#define REG_A5XX_UCHE_TRAP_BASE_HI 0x00000e8a + +#define REG_A5XX_UCHE_GMEM_RANGE_MIN_LO 0x00000e8b + +#define REG_A5XX_UCHE_GMEM_RANGE_MIN_HI 0x00000e8c + +#define REG_A5XX_UCHE_GMEM_RANGE_MAX_LO 0x00000e8d + +#define REG_A5XX_UCHE_GMEM_RANGE_MAX_HI 0x00000e8e + +#define REG_A5XX_UCHE_DBG_ECO_CNTL_2 0x00000e8f + +#define REG_A5XX_UCHE_DBG_ECO_CNTL 0x00000e90 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO 0x00000e91 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_HI 0x00000e92 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_LO 0x00000e93 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_HI 0x00000e94 + +#define REG_A5XX_UCHE_CACHE_INVALIDATE 0x00000e95 + +#define REG_A5XX_UCHE_CACHE_WAYS 0x00000e96 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000ea0 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000ea1 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000ea2 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000ea3 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000ea4 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000ea5 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000ea6 + +#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000ea7 + +#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_0 0x00000ea8 + +#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_1 0x00000ea9 + +#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_2 0x00000eaa + +#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_3 0x00000eab + +#define REG_A5XX_UCHE_TRAP_LOG_LO 0x00000eb1 + +#define REG_A5XX_UCHE_TRAP_LOG_HI 0x00000eb2 + +#define REG_A5XX_SP_DBG_ECO_CNTL 0x00000ec0 + +#define REG_A5XX_SP_ADDR_MODE_CNTL 0x00000ec1 + +#define REG_A5XX_SP_MODE_CNTL 0x00000ec2 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_0 0x00000ed0 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_1 0x00000ed1 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_2 0x00000ed2 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_3 0x00000ed3 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_4 0x00000ed4 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_5 0x00000ed5 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_6 0x00000ed6 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_7 0x00000ed7 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_8 0x00000ed8 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_9 0x00000ed9 + +#define REG_A5XX_SP_PERFCTR_SP_SEL_10 0x00000eda + +#define REG_A5XX_SP_PERFCTR_SP_SEL_11 0x00000edb + +#define REG_A5XX_SP_POWERCTR_SP_SEL_0 0x00000edc + +#define REG_A5XX_SP_POWERCTR_SP_SEL_1 0x00000edd + +#define REG_A5XX_SP_POWERCTR_SP_SEL_2 0x00000ede + +#define REG_A5XX_SP_POWERCTR_SP_SEL_3 0x00000edf + +#define REG_A5XX_TPL1_ADDR_MODE_CNTL 0x00000f01 + +#define REG_A5XX_TPL1_MODE_CNTL 0x00000f02 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_0 0x00000f10 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_1 0x00000f11 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_2 0x00000f12 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_3 0x00000f13 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_4 0x00000f14 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_5 0x00000f15 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_6 0x00000f16 + +#define REG_A5XX_TPL1_PERFCTR_TP_SEL_7 0x00000f17 + +#define REG_A5XX_TPL1_POWERCTR_TP_SEL_0 0x00000f18 + +#define REG_A5XX_TPL1_POWERCTR_TP_SEL_1 0x00000f19 + +#define REG_A5XX_TPL1_POWERCTR_TP_SEL_2 0x00000f1a + +#define REG_A5XX_TPL1_POWERCTR_TP_SEL_3 0x00000f1b + +#define REG_A5XX_VBIF_VERSION 0x00003000 + +#define REG_A5XX_VBIF_CLKON 0x00003001 + +#define REG_A5XX_VBIF_ABIT_SORT 0x00003028 + +#define REG_A5XX_VBIF_ABIT_SORT_CONF 0x00003029 + +#define REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 + +#define REG_A5XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a + +#define REG_A5XX_VBIF_IN_RD_LIM_CONF0 0x0000302c + +#define REG_A5XX_VBIF_IN_RD_LIM_CONF1 0x0000302d + +#define REG_A5XX_VBIF_XIN_HALT_CTRL0 0x00003080 + +#define REG_A5XX_VBIF_XIN_HALT_CTRL1 0x00003081 + +#define REG_A5XX_VBIF_TEST_BUS_OUT_CTRL 0x00003084 + +#define REG_A5XX_VBIF_TEST_BUS1_CTRL0 0x00003085 + +#define REG_A5XX_VBIF_TEST_BUS1_CTRL1 0x00003086 + +#define REG_A5XX_VBIF_TEST_BUS2_CTRL0 0x00003087 + +#define REG_A5XX_VBIF_TEST_BUS2_CTRL1 0x00003088 + +#define REG_A5XX_VBIF_TEST_BUS_OUT 0x0000308c + +#define REG_A5XX_VBIF_PERF_CNT_EN0 0x000030c0 + +#define REG_A5XX_VBIF_PERF_CNT_EN1 0x000030c1 + +#define REG_A5XX_VBIF_PERF_CNT_EN2 0x000030c2 + +#define REG_A5XX_VBIF_PERF_CNT_EN3 0x000030c3 + +#define REG_A5XX_VBIF_PERF_CNT_CLR0 0x000030c8 + +#define REG_A5XX_VBIF_PERF_CNT_CLR1 0x000030c9 + +#define REG_A5XX_VBIF_PERF_CNT_CLR2 0x000030ca + +#define REG_A5XX_VBIF_PERF_CNT_CLR3 0x000030cb + +#define REG_A5XX_VBIF_PERF_CNT_SEL0 0x000030d0 + +#define REG_A5XX_VBIF_PERF_CNT_SEL1 0x000030d1 + +#define REG_A5XX_VBIF_PERF_CNT_SEL2 0x000030d2 + +#define REG_A5XX_VBIF_PERF_CNT_SEL3 0x000030d3 + +#define REG_A5XX_VBIF_PERF_CNT_LOW0 0x000030d8 + +#define REG_A5XX_VBIF_PERF_CNT_LOW1 0x000030d9 + +#define REG_A5XX_VBIF_PERF_CNT_LOW2 0x000030da + +#define REG_A5XX_VBIF_PERF_CNT_LOW3 0x000030db + +#define REG_A5XX_VBIF_PERF_CNT_HIGH0 0x000030e0 + +#define REG_A5XX_VBIF_PERF_CNT_HIGH1 0x000030e1 + +#define REG_A5XX_VBIF_PERF_CNT_HIGH2 0x000030e2 + +#define REG_A5XX_VBIF_PERF_CNT_HIGH3 0x000030e3 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW0 0x00003110 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW1 0x00003111 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW2 0x00003112 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH0 0x00003118 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH1 0x00003119 + +#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH2 0x0000311a + +#define REG_A5XX_GPMU_INST_RAM_BASE 0x00008800 + +#define REG_A5XX_GPMU_DATA_RAM_BASE 0x00009800 + +#define REG_A5XX_GPMU_SP_POWER_CNTL 0x0000a881 + +#define REG_A5XX_GPMU_RBCCU_CLOCK_CNTL 0x0000a886 + +#define REG_A5XX_GPMU_RBCCU_POWER_CNTL 0x0000a887 + +#define REG_A5XX_GPMU_SP_PWR_CLK_STATUS 0x0000a88b +#define A5XX_GPMU_SP_PWR_CLK_STATUS_PWR_ON 0x00100000 + +#define REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS 0x0000a88d +#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS_PWR_ON 0x00100000 + +#define REG_A5XX_GPMU_PWR_COL_STAGGER_DELAY 0x0000a891 + +#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0x0000a892 + +#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0x0000a893 + +#define REG_A5XX_GPMU_PWR_COL_BINNING_CTRL 0x0000a894 + +#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL 0x0000a8a3 + +#define REG_A5XX_GPMU_WFI_CONFIG 0x0000a8c1 + +#define REG_A5XX_GPMU_RBBM_INTR_INFO 0x0000a8d6 + +#define REG_A5XX_GPMU_CM3_SYSRESET 0x0000a8d8 + +#define REG_A5XX_GPMU_GENERAL_0 0x0000a8e0 + +#define REG_A5XX_GPMU_GENERAL_1 0x0000a8e1 + +#define REG_A5XX_SP_POWER_COUNTER_0_LO 0x0000a840 + +#define REG_A5XX_SP_POWER_COUNTER_0_HI 0x0000a841 + +#define REG_A5XX_SP_POWER_COUNTER_1_LO 0x0000a842 + +#define REG_A5XX_SP_POWER_COUNTER_1_HI 0x0000a843 + +#define REG_A5XX_SP_POWER_COUNTER_2_LO 0x0000a844 + +#define REG_A5XX_SP_POWER_COUNTER_2_HI 0x0000a845 + +#define REG_A5XX_SP_POWER_COUNTER_3_LO 0x0000a846 + +#define REG_A5XX_SP_POWER_COUNTER_3_HI 0x0000a847 + +#define REG_A5XX_TP_POWER_COUNTER_0_LO 0x0000a848 + +#define REG_A5XX_TP_POWER_COUNTER_0_HI 0x0000a849 + +#define REG_A5XX_TP_POWER_COUNTER_1_LO 0x0000a84a + +#define REG_A5XX_TP_POWER_COUNTER_1_HI 0x0000a84b + +#define REG_A5XX_TP_POWER_COUNTER_2_LO 0x0000a84c + +#define REG_A5XX_TP_POWER_COUNTER_2_HI 0x0000a84d + +#define REG_A5XX_TP_POWER_COUNTER_3_LO 0x0000a84e + +#define REG_A5XX_TP_POWER_COUNTER_3_HI 0x0000a84f + +#define REG_A5XX_RB_POWER_COUNTER_0_LO 0x0000a850 + +#define REG_A5XX_RB_POWER_COUNTER_0_HI 0x0000a851 + +#define REG_A5XX_RB_POWER_COUNTER_1_LO 0x0000a852 + +#define REG_A5XX_RB_POWER_COUNTER_1_HI 0x0000a853 + +#define REG_A5XX_RB_POWER_COUNTER_2_LO 0x0000a854 + +#define REG_A5XX_RB_POWER_COUNTER_2_HI 0x0000a855 + +#define REG_A5XX_RB_POWER_COUNTER_3_LO 0x0000a856 + +#define REG_A5XX_RB_POWER_COUNTER_3_HI 0x0000a857 + +#define REG_A5XX_CCU_POWER_COUNTER_0_LO 0x0000a858 + +#define REG_A5XX_CCU_POWER_COUNTER_0_HI 0x0000a859 + +#define REG_A5XX_CCU_POWER_COUNTER_1_LO 0x0000a85a + +#define REG_A5XX_CCU_POWER_COUNTER_1_HI 0x0000a85b + +#define REG_A5XX_UCHE_POWER_COUNTER_0_LO 0x0000a85c + +#define REG_A5XX_UCHE_POWER_COUNTER_0_HI 0x0000a85d + +#define REG_A5XX_UCHE_POWER_COUNTER_1_LO 0x0000a85e + +#define REG_A5XX_UCHE_POWER_COUNTER_1_HI 0x0000a85f + +#define REG_A5XX_UCHE_POWER_COUNTER_2_LO 0x0000a860 + +#define REG_A5XX_UCHE_POWER_COUNTER_2_HI 0x0000a861 + +#define REG_A5XX_UCHE_POWER_COUNTER_3_LO 0x0000a862 + +#define REG_A5XX_UCHE_POWER_COUNTER_3_HI 0x0000a863 + +#define REG_A5XX_CP_POWER_COUNTER_0_LO 0x0000a864 + +#define REG_A5XX_CP_POWER_COUNTER_0_HI 0x0000a865 + +#define REG_A5XX_CP_POWER_COUNTER_1_LO 0x0000a866 + +#define REG_A5XX_CP_POWER_COUNTER_1_HI 0x0000a867 + +#define REG_A5XX_CP_POWER_COUNTER_2_LO 0x0000a868 + +#define REG_A5XX_CP_POWER_COUNTER_2_HI 0x0000a869 + +#define REG_A5XX_CP_POWER_COUNTER_3_LO 0x0000a86a + +#define REG_A5XX_CP_POWER_COUNTER_3_HI 0x0000a86b + +#define REG_A5XX_GPMU_POWER_COUNTER_0_LO 0x0000a86c + +#define REG_A5XX_GPMU_POWER_COUNTER_0_HI 0x0000a86d + +#define REG_A5XX_GPMU_POWER_COUNTER_1_LO 0x0000a86e + +#define REG_A5XX_GPMU_POWER_COUNTER_1_HI 0x0000a86f + +#define REG_A5XX_GPMU_POWER_COUNTER_2_LO 0x0000a870 + +#define REG_A5XX_GPMU_POWER_COUNTER_2_HI 0x0000a871 + +#define REG_A5XX_GPMU_POWER_COUNTER_3_LO 0x0000a872 + +#define REG_A5XX_GPMU_POWER_COUNTER_3_HI 0x0000a873 + +#define REG_A5XX_GPMU_POWER_COUNTER_4_LO 0x0000a874 + +#define REG_A5XX_GPMU_POWER_COUNTER_4_HI 0x0000a875 + +#define REG_A5XX_GPMU_POWER_COUNTER_5_LO 0x0000a876 + +#define REG_A5XX_GPMU_POWER_COUNTER_5_HI 0x0000a877 + +#define REG_A5XX_GPMU_POWER_COUNTER_ENABLE 0x0000a878 + +#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_LO 0x0000a879 + +#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_HI 0x0000a87a + +#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_RESET 0x0000a87b + +#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_0 0x0000a87c + +#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_1 0x0000a87d + +#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL 0x0000a8a3 + +#define REG_A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL 0x0000a8a8 + +#define REG_A5XX_GPMU_TEMP_SENSOR_ID 0x0000ac00 + +#define REG_A5XX_GPMU_TEMP_SENSOR_CONFIG 0x0000ac01 + +#define REG_A5XX_GPMU_TEMP_VAL 0x0000ac02 + +#define REG_A5XX_GPMU_DELTA_TEMP_THRESHOLD 0x0000ac03 + +#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_STATUS 0x0000ac05 + +#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK 0x0000ac06 + +#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1 0x0000ac40 + +#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3 0x0000ac41 + +#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1 0x0000ac42 + +#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3 0x0000ac43 + +#define REG_A5XX_GPMU_BASE_LEAKAGE 0x0000ac46 + +#define REG_A5XX_GPMU_GPMU_VOLTAGE 0x0000ac60 + +#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS 0x0000ac61 + +#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK 0x0000ac62 + +#define REG_A5XX_GPMU_GPMU_PWR_THRESHOLD 0x0000ac80 + +#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL 0x0000acc4 + +#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS 0x0000acc5 + +#define REG_A5XX_GDPM_CONFIG1 0x0000b80c + +#define REG_A5XX_GDPM_CONFIG2 0x0000b80d + +#define REG_A5XX_GDPM_INT_EN 0x0000b80f + +#define REG_A5XX_GDPM_INT_MASK 0x0000b811 + +#define REG_A5XX_GPMU_BEC_ENABLE 0x0000b9a0 + +#define REG_A5XX_GPU_CS_SENSOR_GENERAL_STATUS 0x0000c41a + +#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0 0x0000c41d + +#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2 0x0000c41f + +#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4 0x0000c421 + +#define REG_A5XX_GPU_CS_ENABLE_REG 0x0000c520 + +#define REG_A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1 0x0000c557 + +#define REG_A5XX_GRAS_CL_CNTL 0x0000e000 +#define A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z 0x00000040 + +#define REG_A5XX_UNKNOWN_E001 0x0000e001 + +#define REG_A5XX_UNKNOWN_E004 0x0000e004 + +#define REG_A5XX_GRAS_CNTL 0x0000e005 +#define A5XX_GRAS_CNTL_VARYING 0x00000001 +#define A5XX_GRAS_CNTL_UNK3 0x00000008 +#define A5XX_GRAS_CNTL_XCOORD 0x00000040 +#define A5XX_GRAS_CNTL_YCOORD 0x00000080 +#define A5XX_GRAS_CNTL_ZCOORD 0x00000100 +#define A5XX_GRAS_CNTL_WCOORD 0x00000200 + +#define REG_A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ 0x0000e006 +#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK 0x000003ff +#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val) +{ + return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK; +} +#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK 0x000ffc00 +#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT 10 +static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val) +{ + return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_XOFFSET_0 0x0000e010 +#define A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_XOFFSET_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_XSCALE_0 0x0000e011 +#define A5XX_GRAS_CL_VPORT_XSCALE_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_XSCALE_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_XSCALE_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_YOFFSET_0 0x0000e012 +#define A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_YOFFSET_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_YSCALE_0 0x0000e013 +#define A5XX_GRAS_CL_VPORT_YSCALE_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_YSCALE_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_YSCALE_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_ZOFFSET_0 0x0000e014 +#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_ZOFFSET_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK; +} + +#define REG_A5XX_GRAS_CL_VPORT_ZSCALE_0 0x0000e015 +#define A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK 0xffffffff +#define A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT 0 +static inline uint32_t A5XX_GRAS_CL_VPORT_ZSCALE_0(float val) +{ + return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK; +} + +#define REG_A5XX_GRAS_SU_CNTL 0x0000e090 +#define A5XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001 +#define A5XX_GRAS_SU_CNTL_CULL_BACK 0x00000002 +#define A5XX_GRAS_SU_CNTL_FRONT_CW 0x00000004 +#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8 +#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3 +static inline uint32_t A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val) +{ + return ((((int32_t)(val * 4.0))) << A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; +} +#define A5XX_GRAS_SU_CNTL_POLY_OFFSET 0x00000800 +#define A5XX_GRAS_SU_CNTL_MSAA_ENABLE 0x00002000 + +#define REG_A5XX_GRAS_SU_POINT_MINMAX 0x0000e091 +#define A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff +#define A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MIN(float val) +{ + return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK; +} +#define A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 +#define A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 +static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MAX(float val) +{ + return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK; +} + +#define REG_A5XX_GRAS_SU_POINT_SIZE 0x0000e092 +#define A5XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff +#define A5XX_GRAS_SU_POINT_SIZE__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POINT_SIZE(float val) +{ + return ((((int32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_SIZE__SHIFT) & A5XX_GRAS_SU_POINT_SIZE__MASK; +} + +#define REG_A5XX_GRAS_SU_LAYERED 0x0000e093 + +#define REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL 0x0000e094 +#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 +#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1 0x00000002 + +#define REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000e095 +#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff +#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_SCALE(float val) +{ + return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK; +} + +#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000e096 +#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff +#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) +{ + return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; +} + +#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP 0x0000e097 +#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK 0xffffffff +#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val) +{ + return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK; +} + +#define REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO 0x0000e098 +#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 +#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val) +{ + return ((val) << A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; +} + +#define REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL 0x0000e099 + +#define REG_A5XX_GRAS_SC_CNTL 0x0000e0a0 +#define A5XX_GRAS_SC_CNTL_BINNING_PASS 0x00000001 +#define A5XX_GRAS_SC_CNTL_SAMPLES_PASSED 0x00008000 + +#define REG_A5XX_GRAS_SC_BIN_CNTL 0x0000e0a1 + +#define REG_A5XX_GRAS_SC_RAS_MSAA_CNTL 0x0000e0a2 +#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A5XX_GRAS_SC_DEST_MSAA_CNTL 0x0000e0a3 +#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL 0x0000e0a4 + +#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0 0x0000e0aa +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK 0x00007fff +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK; +} +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0 0x0000e0ab +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK 0x00007fff +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK; +} +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0 0x0000e0ca +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK 0x00007fff +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK; +} +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0 0x0000e0cb +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK 0x00007fff +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK; +} +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL 0x0000e0ea +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; +} +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; +} + +#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000e0eb +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; +} +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; +} + +#define REG_A5XX_GRAS_LRZ_CNTL 0x0000e100 +#define A5XX_GRAS_LRZ_CNTL_ENABLE 0x00000001 +#define A5XX_GRAS_LRZ_CNTL_LRZ_WRITE 0x00000002 +#define A5XX_GRAS_LRZ_CNTL_GREATER 0x00000004 + +#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO 0x0000e101 + +#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_HI 0x0000e102 + +#define REG_A5XX_GRAS_LRZ_BUFFER_PITCH 0x0000e103 +#define A5XX_GRAS_LRZ_BUFFER_PITCH__MASK 0xffffffff +#define A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A5XX_GRAS_LRZ_BUFFER_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT) & A5XX_GRAS_LRZ_BUFFER_PITCH__MASK; +} + +#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO 0x0000e104 + +#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI 0x0000e105 + +#define REG_A5XX_RB_CNTL 0x0000e140 +#define A5XX_RB_CNTL_WIDTH__MASK 0x000000ff +#define A5XX_RB_CNTL_WIDTH__SHIFT 0 +static inline uint32_t A5XX_RB_CNTL_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_RB_CNTL_WIDTH__SHIFT) & A5XX_RB_CNTL_WIDTH__MASK; +} +#define A5XX_RB_CNTL_HEIGHT__MASK 0x0001fe00 +#define A5XX_RB_CNTL_HEIGHT__SHIFT 9 +static inline uint32_t A5XX_RB_CNTL_HEIGHT(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_RB_CNTL_HEIGHT__SHIFT) & A5XX_RB_CNTL_HEIGHT__MASK; +} +#define A5XX_RB_CNTL_BYPASS 0x00020000 + +#define REG_A5XX_RB_RENDER_CNTL 0x0000e141 +#define A5XX_RB_RENDER_CNTL_BINNING_PASS 0x00000001 +#define A5XX_RB_RENDER_CNTL_SAMPLES_PASSED 0x00000040 +#define A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE 0x00000080 +#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH 0x00004000 +#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH2 0x00008000 +#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK 0x00ff0000 +#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT 16 +static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK; +} +#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK 0xff000000 +#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT 24 +static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS2(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK; +} + +#define REG_A5XX_RB_RAS_MSAA_CNTL 0x0000e142 +#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A5XX_RB_DEST_MSAA_CNTL 0x0000e143 +#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A5XX_RB_RENDER_CONTROL0 0x0000e144 +#define A5XX_RB_RENDER_CONTROL0_VARYING 0x00000001 +#define A5XX_RB_RENDER_CONTROL0_UNK3 0x00000008 +#define A5XX_RB_RENDER_CONTROL0_XCOORD 0x00000040 +#define A5XX_RB_RENDER_CONTROL0_YCOORD 0x00000080 +#define A5XX_RB_RENDER_CONTROL0_ZCOORD 0x00000100 +#define A5XX_RB_RENDER_CONTROL0_WCOORD 0x00000200 + +#define REG_A5XX_RB_RENDER_CONTROL1 0x0000e145 +#define A5XX_RB_RENDER_CONTROL1_SAMPLEMASK 0x00000001 +#define A5XX_RB_RENDER_CONTROL1_FACENESS 0x00000002 +#define A5XX_RB_RENDER_CONTROL1_SAMPLEID 0x00000004 + +#define REG_A5XX_RB_FS_OUTPUT_CNTL 0x0000e146 +#define A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f +#define A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT 0 +static inline uint32_t A5XX_RB_FS_OUTPUT_CNTL_MRT(uint32_t val) +{ + return ((val) << A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK; +} +#define A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z 0x00000020 + +#define REG_A5XX_RB_RENDER_COMPONENTS 0x0000e147 +#define A5XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f +#define A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT0__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 +#define A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT1__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 +#define A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT2__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 +#define A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT3__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 +#define A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT4__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 +#define A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT5__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 +#define A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT6__MASK; +} +#define A5XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 +#define A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 +static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) +{ + return ((val) << A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT7__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT(uint32_t i0) { return 0x0000e150 + 0x7*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_CONTROL(uint32_t i0) { return 0x0000e150 + 0x7*i0; } +#define A5XX_RB_MRT_CONTROL_BLEND 0x00000001 +#define A5XX_RB_MRT_CONTROL_BLEND2 0x00000002 +#define A5XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000004 +#define A5XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000078 +#define A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 3 +static inline uint32_t A5XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A5XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} +#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x00000780 +#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 7 +static inline uint32_t A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x0000e151 + 0x7*i0; } +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 +#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; +} +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 +#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 +static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x0000e152 + 0x7*i0; } +#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x00000300 +#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; +} +#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK 0x00001800 +#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT 11 +static inline uint32_t A5XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK; +} +#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00006000 +#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 13 +static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; +} +#define A5XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00008000 + +static inline uint32_t REG_A5XX_RB_MRT_PITCH(uint32_t i0) { return 0x0000e153 + 0x7*i0; } +#define A5XX_RB_MRT_PITCH__MASK 0xffffffff +#define A5XX_RB_MRT_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_MRT_PITCH__SHIFT) & A5XX_RB_MRT_PITCH__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x0000e154 + 0x7*i0; } +#define A5XX_RB_MRT_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_MRT_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_MRT_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_ARRAY_PITCH__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x0000e155 + 0x7*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x0000e156 + 0x7*i0; } + +#define REG_A5XX_RB_BLEND_RED 0x0000e1a0 +#define A5XX_RB_BLEND_RED_UINT__MASK 0x000000ff +#define A5XX_RB_BLEND_RED_UINT__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_RED_UINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_RED_UINT__SHIFT) & A5XX_RB_BLEND_RED_UINT__MASK; +} +#define A5XX_RB_BLEND_RED_SINT__MASK 0x0000ff00 +#define A5XX_RB_BLEND_RED_SINT__SHIFT 8 +static inline uint32_t A5XX_RB_BLEND_RED_SINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_RED_SINT__SHIFT) & A5XX_RB_BLEND_RED_SINT__MASK; +} +#define A5XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 +#define A5XX_RB_BLEND_RED_FLOAT__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_RED_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A5XX_RB_BLEND_RED_FLOAT__SHIFT) & A5XX_RB_BLEND_RED_FLOAT__MASK; +} + +#define REG_A5XX_RB_BLEND_RED_F32 0x0000e1a1 +#define A5XX_RB_BLEND_RED_F32__MASK 0xffffffff +#define A5XX_RB_BLEND_RED_F32__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_RED_F32(float val) +{ + return ((fui(val)) << A5XX_RB_BLEND_RED_F32__SHIFT) & A5XX_RB_BLEND_RED_F32__MASK; +} + +#define REG_A5XX_RB_BLEND_GREEN 0x0000e1a2 +#define A5XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff +#define A5XX_RB_BLEND_GREEN_UINT__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_GREEN_UINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_GREEN_UINT__SHIFT) & A5XX_RB_BLEND_GREEN_UINT__MASK; +} +#define A5XX_RB_BLEND_GREEN_SINT__MASK 0x0000ff00 +#define A5XX_RB_BLEND_GREEN_SINT__SHIFT 8 +static inline uint32_t A5XX_RB_BLEND_GREEN_SINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_GREEN_SINT__SHIFT) & A5XX_RB_BLEND_GREEN_SINT__MASK; +} +#define A5XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 +#define A5XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_GREEN_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A5XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A5XX_RB_BLEND_GREEN_FLOAT__MASK; +} + +#define REG_A5XX_RB_BLEND_GREEN_F32 0x0000e1a3 +#define A5XX_RB_BLEND_GREEN_F32__MASK 0xffffffff +#define A5XX_RB_BLEND_GREEN_F32__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_GREEN_F32(float val) +{ + return ((fui(val)) << A5XX_RB_BLEND_GREEN_F32__SHIFT) & A5XX_RB_BLEND_GREEN_F32__MASK; +} + +#define REG_A5XX_RB_BLEND_BLUE 0x0000e1a4 +#define A5XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff +#define A5XX_RB_BLEND_BLUE_UINT__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_BLUE_UINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_BLUE_UINT__SHIFT) & A5XX_RB_BLEND_BLUE_UINT__MASK; +} +#define A5XX_RB_BLEND_BLUE_SINT__MASK 0x0000ff00 +#define A5XX_RB_BLEND_BLUE_SINT__SHIFT 8 +static inline uint32_t A5XX_RB_BLEND_BLUE_SINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_BLUE_SINT__SHIFT) & A5XX_RB_BLEND_BLUE_SINT__MASK; +} +#define A5XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 +#define A5XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_BLUE_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A5XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A5XX_RB_BLEND_BLUE_FLOAT__MASK; +} + +#define REG_A5XX_RB_BLEND_BLUE_F32 0x0000e1a5 +#define A5XX_RB_BLEND_BLUE_F32__MASK 0xffffffff +#define A5XX_RB_BLEND_BLUE_F32__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_BLUE_F32(float val) +{ + return ((fui(val)) << A5XX_RB_BLEND_BLUE_F32__SHIFT) & A5XX_RB_BLEND_BLUE_F32__MASK; +} + +#define REG_A5XX_RB_BLEND_ALPHA 0x0000e1a6 +#define A5XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff +#define A5XX_RB_BLEND_ALPHA_UINT__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_ALPHA_UINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_ALPHA_UINT__SHIFT) & A5XX_RB_BLEND_ALPHA_UINT__MASK; +} +#define A5XX_RB_BLEND_ALPHA_SINT__MASK 0x0000ff00 +#define A5XX_RB_BLEND_ALPHA_SINT__SHIFT 8 +static inline uint32_t A5XX_RB_BLEND_ALPHA_SINT(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_ALPHA_SINT__SHIFT) & A5XX_RB_BLEND_ALPHA_SINT__MASK; +} +#define A5XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 +#define A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_ALPHA_FLOAT(float val) +{ + return ((util_float_to_half(val)) << A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A5XX_RB_BLEND_ALPHA_FLOAT__MASK; +} + +#define REG_A5XX_RB_BLEND_ALPHA_F32 0x0000e1a7 +#define A5XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff +#define A5XX_RB_BLEND_ALPHA_F32__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_ALPHA_F32(float val) +{ + return ((fui(val)) << A5XX_RB_BLEND_ALPHA_F32__SHIFT) & A5XX_RB_BLEND_ALPHA_F32__MASK; +} + +#define REG_A5XX_RB_ALPHA_CONTROL 0x0000e1a8 +#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff +#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 +static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val) +{ + return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK; +} +#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST 0x00000100 +#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK 0x00000e00 +#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT 9 +static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) +{ + return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK; +} + +#define REG_A5XX_RB_BLEND_CNTL 0x0000e1a9 +#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK 0x000000ff +#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT 0 +static inline uint32_t A5XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; +} +#define A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND 0x00000100 +#define A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 +#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK 0xffff0000 +#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT 16 +static inline uint32_t A5XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val) +{ + return ((val) << A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK; +} + +#define REG_A5XX_RB_DEPTH_PLANE_CNTL 0x0000e1b0 +#define A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 +#define A5XX_RB_DEPTH_PLANE_CNTL_UNK1 0x00000002 + +#define REG_A5XX_RB_DEPTH_CNTL 0x0000e1b1 +#define A5XX_RB_DEPTH_CNTL_Z_ENABLE 0x00000001 +#define A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE 0x00000002 +#define A5XX_RB_DEPTH_CNTL_ZFUNC__MASK 0x0000001c +#define A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT 2 +static inline uint32_t A5XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val) +{ + return ((val) << A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A5XX_RB_DEPTH_CNTL_ZFUNC__MASK; +} +#define A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE 0x00000040 + +#define REG_A5XX_RB_DEPTH_BUFFER_INFO 0x0000e1b2 +#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 +#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val) +{ + return ((val) << A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; +} + +#define REG_A5XX_RB_DEPTH_BUFFER_BASE_LO 0x0000e1b3 + +#define REG_A5XX_RB_DEPTH_BUFFER_BASE_HI 0x0000e1b4 + +#define REG_A5XX_RB_DEPTH_BUFFER_PITCH 0x0000e1b5 +#define A5XX_RB_DEPTH_BUFFER_PITCH__MASK 0xffffffff +#define A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_DEPTH_BUFFER_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_PITCH__MASK; +} + +#define REG_A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH 0x0000e1b6 +#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_STENCIL_CONTROL 0x0000e1c0 +#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 +#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 +#define A5XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 +#define A5XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 +#define A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 +#define A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 +#define A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 +#define A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 +#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 +#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 +#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; +} +#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 +#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 +static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; +} + +#define REG_A5XX_RB_STENCIL_INFO 0x0000e1c1 +#define A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001 + +#define REG_A5XX_RB_STENCIL_BASE_LO 0x0000e1c2 + +#define REG_A5XX_RB_STENCIL_BASE_HI 0x0000e1c3 + +#define REG_A5XX_RB_STENCIL_PITCH 0x0000e1c4 +#define A5XX_RB_STENCIL_PITCH__MASK 0xffffffff +#define A5XX_RB_STENCIL_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_STENCIL_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_STENCIL_PITCH__SHIFT) & A5XX_RB_STENCIL_PITCH__MASK; +} + +#define REG_A5XX_RB_STENCIL_ARRAY_PITCH 0x0000e1c5 +#define A5XX_RB_STENCIL_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_STENCIL_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT) & A5XX_RB_STENCIL_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_STENCILREFMASK 0x0000e1c6 +#define A5XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff +#define A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 +static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILREF__MASK; +} +#define A5XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 +#define A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 +static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILMASK__MASK; +} +#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 +#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; +} + +#define REG_A5XX_RB_STENCILREFMASK_BF 0x0000e1c7 +#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff +#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 +static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; +} +#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 +#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 +static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; +} +#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 +#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; +} + +#define REG_A5XX_RB_WINDOW_OFFSET 0x0000e1d0 +#define A5XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_RB_WINDOW_OFFSET_X__MASK 0x00007fff +#define A5XX_RB_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A5XX_RB_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A5XX_RB_WINDOW_OFFSET_X__SHIFT) & A5XX_RB_WINDOW_OFFSET_X__MASK; +} +#define A5XX_RB_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A5XX_RB_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A5XX_RB_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A5XX_RB_WINDOW_OFFSET_Y__SHIFT) & A5XX_RB_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A5XX_RB_SAMPLE_COUNT_CONTROL 0x0000e1d1 +#define A5XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 + +#define REG_A5XX_RB_BLIT_CNTL 0x0000e210 +#define A5XX_RB_BLIT_CNTL_BUF__MASK 0x0000000f +#define A5XX_RB_BLIT_CNTL_BUF__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_CNTL_BUF(enum a5xx_blit_buf val) +{ + return ((val) << A5XX_RB_BLIT_CNTL_BUF__SHIFT) & A5XX_RB_BLIT_CNTL_BUF__MASK; +} + +#define REG_A5XX_RB_RESOLVE_CNTL_1 0x0000e211 +#define A5XX_RB_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_RB_RESOLVE_CNTL_1_X__MASK 0x00007fff +#define A5XX_RB_RESOLVE_CNTL_1_X__SHIFT 0 +static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_X(uint32_t val) +{ + return ((val) << A5XX_RB_RESOLVE_CNTL_1_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_X__MASK; +} +#define A5XX_RB_RESOLVE_CNTL_1_Y__MASK 0x7fff0000 +#define A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT 16 +static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_Y(uint32_t val) +{ + return ((val) << A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_Y__MASK; +} + +#define REG_A5XX_RB_RESOLVE_CNTL_2 0x0000e212 +#define A5XX_RB_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE 0x80000000 +#define A5XX_RB_RESOLVE_CNTL_2_X__MASK 0x00007fff +#define A5XX_RB_RESOLVE_CNTL_2_X__SHIFT 0 +static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_X(uint32_t val) +{ + return ((val) << A5XX_RB_RESOLVE_CNTL_2_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_X__MASK; +} +#define A5XX_RB_RESOLVE_CNTL_2_Y__MASK 0x7fff0000 +#define A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT 16 +static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_Y(uint32_t val) +{ + return ((val) << A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_Y__MASK; +} + +#define REG_A5XX_RB_RESOLVE_CNTL_3 0x0000e213 +#define A5XX_RB_RESOLVE_CNTL_3_TILED 0x00000001 + +#define REG_A5XX_RB_BLIT_DST_LO 0x0000e214 + +#define REG_A5XX_RB_BLIT_DST_HI 0x0000e215 + +#define REG_A5XX_RB_BLIT_DST_PITCH 0x0000e216 +#define A5XX_RB_BLIT_DST_PITCH__MASK 0xffffffff +#define A5XX_RB_BLIT_DST_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_DST_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_BLIT_DST_PITCH__SHIFT) & A5XX_RB_BLIT_DST_PITCH__MASK; +} + +#define REG_A5XX_RB_BLIT_DST_ARRAY_PITCH 0x0000e217 +#define A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_CLEAR_COLOR_DW0 0x0000e218 + +#define REG_A5XX_RB_CLEAR_COLOR_DW1 0x0000e219 + +#define REG_A5XX_RB_CLEAR_COLOR_DW2 0x0000e21a + +#define REG_A5XX_RB_CLEAR_COLOR_DW3 0x0000e21b + +#define REG_A5XX_RB_CLEAR_CNTL 0x0000e21c +#define A5XX_RB_CLEAR_CNTL_FAST_CLEAR 0x00000002 +#define A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE 0x00000004 +#define A5XX_RB_CLEAR_CNTL_MASK__MASK 0x000000f0 +#define A5XX_RB_CLEAR_CNTL_MASK__SHIFT 4 +static inline uint32_t A5XX_RB_CLEAR_CNTL_MASK(uint32_t val) +{ + return ((val) << A5XX_RB_CLEAR_CNTL_MASK__SHIFT) & A5XX_RB_CLEAR_CNTL_MASK__MASK; +} + +#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO 0x0000e240 + +#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_HI 0x0000e241 + +#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_PITCH 0x0000e242 + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x0000e243 + 0x4*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x0000e243 + 0x4*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x0000e244 + 0x4*i0; } + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x0000e245 + 0x4*i0; } +#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK 0xffffffff +#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK; +} + +static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t i0) { return 0x0000e246 + 0x4*i0; } +#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_BLIT_FLAG_DST_LO 0x0000e263 + +#define REG_A5XX_RB_BLIT_FLAG_DST_HI 0x0000e264 + +#define REG_A5XX_RB_BLIT_FLAG_DST_PITCH 0x0000e265 +#define A5XX_RB_BLIT_FLAG_DST_PITCH__MASK 0xffffffff +#define A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_FLAG_DST_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_PITCH__MASK; +} + +#define REG_A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH 0x0000e266 +#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK 0xffffffff +#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO 0x0000e267 + +#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_HI 0x0000e268 + +#define REG_A5XX_VPC_CNTL_0 0x0000e280 +#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK 0x0000007f +#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A5XX_VPC_CNTL_0_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT) & A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK; +} +#define A5XX_VPC_CNTL_0_VARYING 0x00000800 + +static inline uint32_t REG_A5XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x0000e282 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x0000e282 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000e28a + 0x1*i0; } + +static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000e28a + 0x1*i0; } + +#define REG_A5XX_UNKNOWN_E292 0x0000e292 + +#define REG_A5XX_UNKNOWN_E293 0x0000e293 + +static inline uint32_t REG_A5XX_VPC_VAR(uint32_t i0) { return 0x0000e294 + 0x1*i0; } + +static inline uint32_t REG_A5XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x0000e294 + 0x1*i0; } + +#define REG_A5XX_VPC_GS_SIV_CNTL 0x0000e298 + +#define REG_A5XX_UNKNOWN_E29A 0x0000e29a + +#define REG_A5XX_VPC_PACK 0x0000e29d +#define A5XX_VPC_PACK_NUMNONPOSVAR__MASK 0x000000ff +#define A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT 0 +static inline uint32_t A5XX_VPC_PACK_NUMNONPOSVAR(uint32_t val) +{ + return ((val) << A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A5XX_VPC_PACK_NUMNONPOSVAR__MASK; +} +#define A5XX_VPC_PACK_PSIZELOC__MASK 0x0000ff00 +#define A5XX_VPC_PACK_PSIZELOC__SHIFT 8 +static inline uint32_t A5XX_VPC_PACK_PSIZELOC(uint32_t val) +{ + return ((val) << A5XX_VPC_PACK_PSIZELOC__SHIFT) & A5XX_VPC_PACK_PSIZELOC__MASK; +} + +#define REG_A5XX_VPC_FS_PRIMITIVEID_CNTL 0x0000e2a0 + +#define REG_A5XX_VPC_SO_BUF_CNTL 0x0000e2a1 +#define A5XX_VPC_SO_BUF_CNTL_BUF0 0x00000001 +#define A5XX_VPC_SO_BUF_CNTL_BUF1 0x00000008 +#define A5XX_VPC_SO_BUF_CNTL_BUF2 0x00000040 +#define A5XX_VPC_SO_BUF_CNTL_BUF3 0x00000200 +#define A5XX_VPC_SO_BUF_CNTL_ENABLE 0x00008000 + +#define REG_A5XX_VPC_SO_OVERRIDE 0x0000e2a2 +#define A5XX_VPC_SO_OVERRIDE_SO_DISABLE 0x00000001 + +#define REG_A5XX_VPC_SO_CNTL 0x0000e2a3 +#define A5XX_VPC_SO_CNTL_ENABLE 0x00010000 + +#define REG_A5XX_VPC_SO_PROG 0x0000e2a4 +#define A5XX_VPC_SO_PROG_A_BUF__MASK 0x00000003 +#define A5XX_VPC_SO_PROG_A_BUF__SHIFT 0 +static inline uint32_t A5XX_VPC_SO_PROG_A_BUF(uint32_t val) +{ + return ((val) << A5XX_VPC_SO_PROG_A_BUF__SHIFT) & A5XX_VPC_SO_PROG_A_BUF__MASK; +} +#define A5XX_VPC_SO_PROG_A_OFF__MASK 0x000007fc +#define A5XX_VPC_SO_PROG_A_OFF__SHIFT 2 +static inline uint32_t A5XX_VPC_SO_PROG_A_OFF(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A5XX_VPC_SO_PROG_A_OFF__SHIFT) & A5XX_VPC_SO_PROG_A_OFF__MASK; +} +#define A5XX_VPC_SO_PROG_A_EN 0x00000800 +#define A5XX_VPC_SO_PROG_B_BUF__MASK 0x00003000 +#define A5XX_VPC_SO_PROG_B_BUF__SHIFT 12 +static inline uint32_t A5XX_VPC_SO_PROG_B_BUF(uint32_t val) +{ + return ((val) << A5XX_VPC_SO_PROG_B_BUF__SHIFT) & A5XX_VPC_SO_PROG_B_BUF__MASK; +} +#define A5XX_VPC_SO_PROG_B_OFF__MASK 0x007fc000 +#define A5XX_VPC_SO_PROG_B_OFF__SHIFT 14 +static inline uint32_t A5XX_VPC_SO_PROG_B_OFF(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A5XX_VPC_SO_PROG_B_OFF__SHIFT) & A5XX_VPC_SO_PROG_B_OFF__MASK; +} +#define A5XX_VPC_SO_PROG_B_EN 0x00800000 + +static inline uint32_t REG_A5XX_VPC_SO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000e2a8 + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000e2a9 + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000e2aa + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000e2ab + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000e2ac + 0x7*i0; } + +static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x0000e2ad + 0x7*i0; } + +#define REG_A5XX_PC_PRIMITIVE_CNTL 0x0000e384 +#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK 0x0000007f +#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK; +} +#define A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART 0x00000100 +#define A5XX_PC_PRIMITIVE_CNTL_COUNT_PRIMITIVES 0x00000200 +#define A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST 0x00000400 + +#define REG_A5XX_PC_PRIM_VTX_CNTL 0x0000e385 +#define A5XX_PC_PRIM_VTX_CNTL_PSIZE 0x00000800 + +#define REG_A5XX_PC_RASTER_CNTL 0x0000e388 +#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK 0x00000007 +#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT 0 +static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK; +} +#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK 0x00000038 +#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT 3 +static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK; +} +#define A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE 0x00000040 + +#define REG_A5XX_UNKNOWN_E389 0x0000e389 + +#define REG_A5XX_PC_RESTART_INDEX 0x0000e38c + +#define REG_A5XX_PC_GS_LAYERED 0x0000e38d + +#define REG_A5XX_PC_GS_PARAM 0x0000e38e +#define A5XX_PC_GS_PARAM_MAX_VERTICES__MASK 0x000003ff +#define A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT 0 +static inline uint32_t A5XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val) +{ + return ((val) << A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A5XX_PC_GS_PARAM_MAX_VERTICES__MASK; +} +#define A5XX_PC_GS_PARAM_INVOCATIONS__MASK 0x0000f800 +#define A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT 11 +static inline uint32_t A5XX_PC_GS_PARAM_INVOCATIONS(uint32_t val) +{ + return ((val) << A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A5XX_PC_GS_PARAM_INVOCATIONS__MASK; +} +#define A5XX_PC_GS_PARAM_PRIMTYPE__MASK 0x01800000 +#define A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT 23 +static inline uint32_t A5XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A5XX_PC_GS_PARAM_PRIMTYPE__MASK; +} + +#define REG_A5XX_PC_HS_PARAM 0x0000e38f +#define A5XX_PC_HS_PARAM_VERTICES_OUT__MASK 0x0000003f +#define A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT 0 +static inline uint32_t A5XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val) +{ + return ((val) << A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A5XX_PC_HS_PARAM_VERTICES_OUT__MASK; +} +#define A5XX_PC_HS_PARAM_SPACING__MASK 0x00600000 +#define A5XX_PC_HS_PARAM_SPACING__SHIFT 21 +static inline uint32_t A5XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val) +{ + return ((val) << A5XX_PC_HS_PARAM_SPACING__SHIFT) & A5XX_PC_HS_PARAM_SPACING__MASK; +} +#define A5XX_PC_HS_PARAM_CW 0x00800000 +#define A5XX_PC_HS_PARAM_CONNECTED 0x01000000 + +#define REG_A5XX_PC_POWER_CNTL 0x0000e3b0 + +#define REG_A5XX_VFD_CONTROL_0 0x0000e400 +#define A5XX_VFD_CONTROL_0_VTXCNT__MASK 0x0000003f +#define A5XX_VFD_CONTROL_0_VTXCNT__SHIFT 0 +static inline uint32_t A5XX_VFD_CONTROL_0_VTXCNT(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A5XX_VFD_CONTROL_0_VTXCNT__MASK; +} + +#define REG_A5XX_VFD_CONTROL_1 0x0000e401 +#define A5XX_VFD_CONTROL_1_REGID4VTX__MASK 0x000000ff +#define A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT 0 +static inline uint32_t A5XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A5XX_VFD_CONTROL_1_REGID4VTX__MASK; +} +#define A5XX_VFD_CONTROL_1_REGID4INST__MASK 0x0000ff00 +#define A5XX_VFD_CONTROL_1_REGID4INST__SHIFT 8 +static inline uint32_t A5XX_VFD_CONTROL_1_REGID4INST(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A5XX_VFD_CONTROL_1_REGID4INST__MASK; +} +#define A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK 0x00ff0000 +#define A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT 16 +static inline uint32_t A5XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK; +} + +#define REG_A5XX_VFD_CONTROL_2 0x0000e402 +#define A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK 0x000000ff +#define A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT 0 +static inline uint32_t A5XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK; +} + +#define REG_A5XX_VFD_CONTROL_3 0x0000e403 +#define A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK 0x0000ff00 +#define A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT 8 +static inline uint32_t A5XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK; +} +#define A5XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000 +#define A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16 +static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSX__MASK; +} +#define A5XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000 +#define A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24 +static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val) +{ + return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSY__MASK; +} + +#define REG_A5XX_VFD_CONTROL_4 0x0000e404 + +#define REG_A5XX_VFD_CONTROL_5 0x0000e405 + +#define REG_A5XX_VFD_INDEX_OFFSET 0x0000e408 + +#define REG_A5XX_VFD_INSTANCE_START_OFFSET 0x0000e409 + +static inline uint32_t REG_A5XX_VFD_FETCH(uint32_t i0) { return 0x0000e40a + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000e40a + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000e40b + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000e40c + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000e40d + 0x4*i0; } + +static inline uint32_t REG_A5XX_VFD_DECODE(uint32_t i0) { return 0x0000e48a + 0x2*i0; } + +static inline uint32_t REG_A5XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000e48a + 0x2*i0; } +#define A5XX_VFD_DECODE_INSTR_IDX__MASK 0x0000001f +#define A5XX_VFD_DECODE_INSTR_IDX__SHIFT 0 +static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val) +{ + return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK; +} +#define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000 +#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK 0x0ff00000 +#define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20 +static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val) +{ + return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK; +} +#define A5XX_VFD_DECODE_INSTR_SWAP__MASK 0x30000000 +#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT 28 +static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK; +} +#define A5XX_VFD_DECODE_INSTR_UNK30 0x40000000 +#define A5XX_VFD_DECODE_INSTR_FLOAT 0x80000000 + +static inline uint32_t REG_A5XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000e48b + 0x2*i0; } + +static inline uint32_t REG_A5XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000e4ca + 0x1*i0; } + +static inline uint32_t REG_A5XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000e4ca + 0x1*i0; } +#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK 0x0000000f +#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT 0 +static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val) +{ + return ((val) << A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK; +} +#define A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK 0x00000ff0 +#define A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT 4 +static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val) +{ + return ((val) << A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK; +} + +#define REG_A5XX_VFD_POWER_CNTL 0x0000e4f0 + +#define REG_A5XX_SP_SP_CNTL 0x0000e580 + +#define REG_A5XX_SP_VS_CONFIG 0x0000e584 +#define A5XX_SP_VS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_FS_CONFIG 0x0000e585 +#define A5XX_SP_FS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_HS_CONFIG 0x0000e586 +#define A5XX_SP_HS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_DS_CONFIG 0x0000e587 +#define A5XX_SP_DS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_GS_CONFIG 0x0000e588 +#define A5XX_SP_GS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_CS_CONFIG 0x0000e589 +#define A5XX_SP_CS_CONFIG_ENABLED 0x00000001 +#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_SP_VS_CONFIG_MAX_CONST 0x0000e58a + +#define REG_A5XX_SP_FS_CONFIG_MAX_CONST 0x0000e58b + +#define REG_A5XX_SP_VS_CTRL_REG0 0x0000e590 +#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_VS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK; +} + +#define REG_A5XX_SP_PRIMITIVE_CNTL 0x0000e592 +#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK 0x0000001f +#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT 0 +static inline uint32_t A5XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val) +{ + return ((val) << A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK; +} + +static inline uint32_t REG_A5XX_SP_VS_OUT(uint32_t i0) { return 0x0000e593 + 0x1*i0; } + +static inline uint32_t REG_A5XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000e593 + 0x1*i0; } +#define A5XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff +#define A5XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 +static inline uint32_t A5XX_SP_VS_OUT_REG_A_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_A_REGID__MASK; +} +#define A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00000f00 +#define A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 8 +static inline uint32_t A5XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) +{ + return ((val) << A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK; +} +#define A5XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 +#define A5XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 +static inline uint32_t A5XX_SP_VS_OUT_REG_B_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_B_REGID__MASK; +} +#define A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x0f000000 +#define A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 24 +static inline uint32_t A5XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) +{ + return ((val) << A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK; +} + +static inline uint32_t REG_A5XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; } + +static inline uint32_t REG_A5XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; } +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 +static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) +{ + return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; +} +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 +static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) +{ + return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; +} +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 +static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) +{ + return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; +} +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 +#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 +static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) +{ + return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; +} + +#define REG_A5XX_UNKNOWN_E5AB 0x0000e5ab + +#define REG_A5XX_SP_VS_OBJ_START_LO 0x0000e5ac + +#define REG_A5XX_SP_VS_OBJ_START_HI 0x0000e5ad + +#define REG_A5XX_SP_FS_CTRL_REG0 0x0000e5c0 +#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_FS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK; +} + +#define REG_A5XX_UNKNOWN_E5C2 0x0000e5c2 + +#define REG_A5XX_SP_FS_OBJ_START_LO 0x0000e5c3 + +#define REG_A5XX_SP_FS_OBJ_START_HI 0x0000e5c4 + +#define REG_A5XX_SP_BLEND_CNTL 0x0000e5c9 +#define A5XX_SP_BLEND_CNTL_ENABLED 0x00000001 +#define A5XX_SP_BLEND_CNTL_UNK8 0x00000100 +#define A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 + +#define REG_A5XX_SP_FS_OUTPUT_CNTL 0x0000e5ca +#define A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f +#define A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT 0 +static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_MRT(uint32_t val) +{ + return ((val) << A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK; +} +#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK 0x00001fe0 +#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT 5 +static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK; +} +#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK 0x001fe000 +#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT 13 +static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK; +} + +static inline uint32_t REG_A5XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000e5cb + 0x1*i0; } + +static inline uint32_t REG_A5XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000e5cb + 0x1*i0; } +#define A5XX_SP_FS_OUTPUT_REG_REGID__MASK 0x000000ff +#define A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT 0 +static inline uint32_t A5XX_SP_FS_OUTPUT_REG_REGID(uint32_t val) +{ + return ((val) << A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_REG_REGID__MASK; +} +#define A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION 0x00000100 + +static inline uint32_t REG_A5XX_SP_FS_MRT(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; } + +static inline uint32_t REG_A5XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; } +#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK; +} +#define A5XX_SP_FS_MRT_REG_COLOR_SINT 0x00000100 +#define A5XX_SP_FS_MRT_REG_COLOR_UINT 0x00000200 +#define A5XX_SP_FS_MRT_REG_COLOR_SRGB 0x00000400 + +#define REG_A5XX_UNKNOWN_E5DB 0x0000e5db + +#define REG_A5XX_SP_CS_CTRL_REG0 0x0000e5f0 +#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_CS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_CS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK; +} + +#define REG_A5XX_UNKNOWN_E5F2 0x0000e5f2 + +#define REG_A5XX_SP_CS_OBJ_START_LO 0x0000e5f3 + +#define REG_A5XX_SP_CS_OBJ_START_HI 0x0000e5f4 + +#define REG_A5XX_SP_HS_CTRL_REG0 0x0000e600 +#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_HS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_HS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK; +} + +#define REG_A5XX_UNKNOWN_E602 0x0000e602 + +#define REG_A5XX_SP_HS_OBJ_START_LO 0x0000e603 + +#define REG_A5XX_SP_HS_OBJ_START_HI 0x0000e604 + +#define REG_A5XX_SP_DS_CTRL_REG0 0x0000e610 +#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_DS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_DS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK; +} + +#define REG_A5XX_UNKNOWN_E62B 0x0000e62b + +#define REG_A5XX_SP_DS_OBJ_START_LO 0x0000e62c + +#define REG_A5XX_SP_DS_OBJ_START_HI 0x0000e62d + +#define REG_A5XX_SP_GS_CTRL_REG0 0x0000e640 +#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK 0x00000008 +#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT 3 +static inline uint32_t A5XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK; +} +#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 +#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A5XX_SP_GS_CTRL_REG0_VARYING 0x00010000 +#define A5XX_SP_GS_CTRL_REG0_PIXLODENABLE 0x00100000 +#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 +#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT 25 +static inline uint32_t A5XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK; +} + +#define REG_A5XX_UNKNOWN_E65B 0x0000e65b + +#define REG_A5XX_SP_GS_OBJ_START_LO 0x0000e65c + +#define REG_A5XX_SP_GS_OBJ_START_HI 0x0000e65d + +#define REG_A5XX_TPL1_TP_RAS_MSAA_CNTL 0x0000e704 +#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A5XX_TPL1_TP_DEST_MSAA_CNTL 0x0000e705 +#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO 0x0000e706 + +#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_HI 0x0000e707 + +#define REG_A5XX_TPL1_VS_TEX_COUNT 0x0000e700 + +#define REG_A5XX_TPL1_HS_TEX_COUNT 0x0000e701 + +#define REG_A5XX_TPL1_DS_TEX_COUNT 0x0000e702 + +#define REG_A5XX_TPL1_GS_TEX_COUNT 0x0000e703 + +#define REG_A5XX_TPL1_VS_TEX_SAMP_LO 0x0000e722 + +#define REG_A5XX_TPL1_VS_TEX_SAMP_HI 0x0000e723 + +#define REG_A5XX_TPL1_HS_TEX_SAMP_LO 0x0000e724 + +#define REG_A5XX_TPL1_HS_TEX_SAMP_HI 0x0000e725 + +#define REG_A5XX_TPL1_DS_TEX_SAMP_LO 0x0000e726 + +#define REG_A5XX_TPL1_DS_TEX_SAMP_HI 0x0000e727 + +#define REG_A5XX_TPL1_GS_TEX_SAMP_LO 0x0000e728 + +#define REG_A5XX_TPL1_GS_TEX_SAMP_HI 0x0000e729 + +#define REG_A5XX_TPL1_VS_TEX_CONST_LO 0x0000e72a + +#define REG_A5XX_TPL1_VS_TEX_CONST_HI 0x0000e72b + +#define REG_A5XX_TPL1_HS_TEX_CONST_LO 0x0000e72c + +#define REG_A5XX_TPL1_HS_TEX_CONST_HI 0x0000e72d + +#define REG_A5XX_TPL1_DS_TEX_CONST_LO 0x0000e72e + +#define REG_A5XX_TPL1_DS_TEX_CONST_HI 0x0000e72f + +#define REG_A5XX_TPL1_GS_TEX_CONST_LO 0x0000e730 + +#define REG_A5XX_TPL1_GS_TEX_CONST_HI 0x0000e731 + +#define REG_A5XX_TPL1_FS_TEX_COUNT 0x0000e750 + +#define REG_A5XX_TPL1_CS_TEX_COUNT 0x0000e751 + +#define REG_A5XX_TPL1_FS_TEX_SAMP_LO 0x0000e75a + +#define REG_A5XX_TPL1_FS_TEX_SAMP_HI 0x0000e75b + +#define REG_A5XX_TPL1_CS_TEX_SAMP_LO 0x0000e75c + +#define REG_A5XX_TPL1_CS_TEX_SAMP_HI 0x0000e75d + +#define REG_A5XX_TPL1_FS_TEX_CONST_LO 0x0000e75e + +#define REG_A5XX_TPL1_FS_TEX_CONST_HI 0x0000e75f + +#define REG_A5XX_TPL1_CS_TEX_CONST_LO 0x0000e760 + +#define REG_A5XX_TPL1_CS_TEX_CONST_HI 0x0000e761 + +#define REG_A5XX_TPL1_TP_FS_ROTATION_CNTL 0x0000e764 + +#define REG_A5XX_HLSQ_CONTROL_0_REG 0x0000e784 +#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000001 +#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; +} +#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK 0x00000004 +#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT 2 +static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK; +} + +#define REG_A5XX_HLSQ_CONTROL_1_REG 0x0000e785 +#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK 0x0000003f +#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT) & A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK; +} + +#define REG_A5XX_HLSQ_CONTROL_2_REG 0x0000e786 +#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK 0x000000ff +#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; +} +#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK 0x0000ff00 +#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT 8 +static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK; +} +#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK 0x00ff0000 +#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT 16 +static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK; +} + +#define REG_A5XX_HLSQ_CONTROL_3_REG 0x0000e787 +#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK 0x000000ff +#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK; +} + +#define REG_A5XX_HLSQ_CONTROL_4_REG 0x0000e788 +#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK 0x00ff0000 +#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT 16 +static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK; +} +#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK 0xff000000 +#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT 24 +static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK; +} + +#define REG_A5XX_HLSQ_UPDATE_CNTL 0x0000e78a + +#define REG_A5XX_HLSQ_VS_CONFIG 0x0000e78b +#define A5XX_HLSQ_VS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_FS_CONFIG 0x0000e78c +#define A5XX_HLSQ_FS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_HS_CONFIG 0x0000e78d +#define A5XX_HLSQ_HS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_DS_CONFIG 0x0000e78e +#define A5XX_HLSQ_DS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_GS_CONFIG 0x0000e78f +#define A5XX_HLSQ_GS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_CS_CONFIG 0x0000e790 +#define A5XX_HLSQ_CS_CONFIG_ENABLED 0x00000001 +#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe +#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 +static inline uint32_t A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK; +} +#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 +#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 +static inline uint32_t A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK; +} + +#define REG_A5XX_HLSQ_VS_CNTL 0x0000e791 +#define A5XX_HLSQ_VS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_VS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_FS_CNTL 0x0000e792 +#define A5XX_HLSQ_FS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_FS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_HS_CNTL 0x0000e793 +#define A5XX_HLSQ_HS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_HS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_DS_CNTL 0x0000e794 +#define A5XX_HLSQ_DS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_DS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_GS_CNTL 0x0000e795 +#define A5XX_HLSQ_GS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_GS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_CS_CNTL 0x0000e796 +#define A5XX_HLSQ_CS_CNTL_SSBO_ENABLE 0x00000001 +#define A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK 0xfffffffe +#define A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT 1 +static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK; +} + +#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_X 0x0000e7b9 + +#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Y 0x0000e7ba + +#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Z 0x0000e7bb + +#define REG_A5XX_HLSQ_CS_NDRANGE_0 0x0000e7b0 +#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK 0x00000003 +#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK; +} +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT 2 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK; +} +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT 12 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK; +} +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 +#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_1 0x0000e7b1 +#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_2 0x0000e7b2 +#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_3 0x0000e7b3 +#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_4 0x0000e7b4 +#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_5 0x0000e7b5 +#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK; +} + +#define REG_A5XX_HLSQ_CS_NDRANGE_6 0x0000e7b6 +#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK 0xffffffff +#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK; +} + +#define REG_A5XX_HLSQ_CS_CNTL_0 0x0000e7b7 +#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK 0x000000ff +#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT 0 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK; +} +#define A5XX_HLSQ_CS_CNTL_0_UNK0__MASK 0x0000ff00 +#define A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT 8 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK0__MASK; +} +#define A5XX_HLSQ_CS_CNTL_0_UNK1__MASK 0x00ff0000 +#define A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT 16 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK1__MASK; +} +#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK 0xff000000 +#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT 24 +static inline uint32_t A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val) +{ + return ((val) << A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK; +} + +#define REG_A5XX_HLSQ_CS_CNTL_1 0x0000e7b8 + +#define REG_A5XX_UNKNOWN_E7C0 0x0000e7c0 + +#define REG_A5XX_HLSQ_VS_CONSTLEN 0x0000e7c3 + +#define REG_A5XX_HLSQ_VS_INSTRLEN 0x0000e7c4 + +#define REG_A5XX_UNKNOWN_E7C5 0x0000e7c5 + +#define REG_A5XX_HLSQ_HS_CONSTLEN 0x0000e7c8 + +#define REG_A5XX_HLSQ_HS_INSTRLEN 0x0000e7c9 + +#define REG_A5XX_UNKNOWN_E7CA 0x0000e7ca + +#define REG_A5XX_HLSQ_DS_CONSTLEN 0x0000e7cd + +#define REG_A5XX_HLSQ_DS_INSTRLEN 0x0000e7ce + +#define REG_A5XX_UNKNOWN_E7CF 0x0000e7cf + +#define REG_A5XX_HLSQ_GS_CONSTLEN 0x0000e7d2 + +#define REG_A5XX_HLSQ_GS_INSTRLEN 0x0000e7d3 + +#define REG_A5XX_UNKNOWN_E7D4 0x0000e7d4 + +#define REG_A5XX_HLSQ_FS_CONSTLEN 0x0000e7d7 + +#define REG_A5XX_HLSQ_FS_INSTRLEN 0x0000e7d8 + +#define REG_A5XX_UNKNOWN_E7D9 0x0000e7d9 + +#define REG_A5XX_HLSQ_CS_CONSTLEN 0x0000e7dc + +#define REG_A5XX_HLSQ_CS_INSTRLEN 0x0000e7dd + +#define REG_A5XX_RB_2D_BLIT_CNTL 0x00002100 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW0 0x00002101 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW1 0x00002102 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW2 0x00002103 + +#define REG_A5XX_RB_2D_SRC_SOLID_DW3 0x00002104 + +#define REG_A5XX_RB_2D_SRC_INFO 0x00002107 +#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 +#define A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_RB_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK; +} +#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK; +} +#define A5XX_RB_2D_SRC_INFO_FLAGS 0x00001000 + +#define REG_A5XX_RB_2D_SRC_LO 0x00002108 + +#define REG_A5XX_RB_2D_SRC_HI 0x00002109 + +#define REG_A5XX_RB_2D_SRC_SIZE 0x0000210a +#define A5XX_RB_2D_SRC_SIZE_PITCH__MASK 0x0000ffff +#define A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_2D_SRC_SIZE_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_PITCH__MASK; +} +#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK 0xffff0000 +#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT 16 +static inline uint32_t A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_2D_DST_INFO 0x00002110 +#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_RB_2D_DST_INFO_TILE_MODE__MASK 0x00000300 +#define A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_RB_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_DST_INFO_TILE_MODE__MASK; +} +#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK; +} +#define A5XX_RB_2D_DST_INFO_FLAGS 0x00001000 + +#define REG_A5XX_RB_2D_DST_LO 0x00002111 + +#define REG_A5XX_RB_2D_DST_HI 0x00002112 + +#define REG_A5XX_RB_2D_DST_SIZE 0x00002113 +#define A5XX_RB_2D_DST_SIZE_PITCH__MASK 0x0000ffff +#define A5XX_RB_2D_DST_SIZE_PITCH__SHIFT 0 +static inline uint32_t A5XX_RB_2D_DST_SIZE_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_PITCH__MASK; +} +#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK 0xffff0000 +#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT 16 +static inline uint32_t A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_RB_2D_SRC_FLAGS_LO 0x00002140 + +#define REG_A5XX_RB_2D_SRC_FLAGS_HI 0x00002141 + +#define REG_A5XX_RB_2D_DST_FLAGS_LO 0x00002143 + +#define REG_A5XX_RB_2D_DST_FLAGS_HI 0x00002144 + +#define REG_A5XX_GRAS_2D_BLIT_CNTL 0x00002180 + +#define REG_A5XX_GRAS_2D_SRC_INFO 0x00002181 +#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 +#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_GRAS_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK; +} +#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK; +} +#define A5XX_GRAS_2D_SRC_INFO_FLAGS 0x00001000 + +#define REG_A5XX_GRAS_2D_DST_INFO 0x00002182 +#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) +{ + return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK; +} +#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK 0x00000300 +#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A5XX_GRAS_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK; +} +#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK; +} +#define A5XX_GRAS_2D_DST_INFO_FLAGS 0x00001000 + +#define REG_A5XX_UNKNOWN_2100 0x00002100 + +#define REG_A5XX_UNKNOWN_2180 0x00002180 + +#define REG_A5XX_UNKNOWN_2184 0x00002184 + +#define REG_A5XX_TEX_SAMP_0 0x00000000 +#define A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 +#define A5XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 +#define A5XX_TEX_SAMP_0_XY_MAG__SHIFT 1 +static inline uint32_t A5XX_TEX_SAMP_0_XY_MAG(enum a5xx_tex_filter val) +{ + return ((val) << A5XX_TEX_SAMP_0_XY_MAG__SHIFT) & A5XX_TEX_SAMP_0_XY_MAG__MASK; +} +#define A5XX_TEX_SAMP_0_XY_MIN__MASK 0x00000018 +#define A5XX_TEX_SAMP_0_XY_MIN__SHIFT 3 +static inline uint32_t A5XX_TEX_SAMP_0_XY_MIN(enum a5xx_tex_filter val) +{ + return ((val) << A5XX_TEX_SAMP_0_XY_MIN__SHIFT) & A5XX_TEX_SAMP_0_XY_MIN__MASK; +} +#define A5XX_TEX_SAMP_0_WRAP_S__MASK 0x000000e0 +#define A5XX_TEX_SAMP_0_WRAP_S__SHIFT 5 +static inline uint32_t A5XX_TEX_SAMP_0_WRAP_S(enum a5xx_tex_clamp val) +{ + return ((val) << A5XX_TEX_SAMP_0_WRAP_S__SHIFT) & A5XX_TEX_SAMP_0_WRAP_S__MASK; +} +#define A5XX_TEX_SAMP_0_WRAP_T__MASK 0x00000700 +#define A5XX_TEX_SAMP_0_WRAP_T__SHIFT 8 +static inline uint32_t A5XX_TEX_SAMP_0_WRAP_T(enum a5xx_tex_clamp val) +{ + return ((val) << A5XX_TEX_SAMP_0_WRAP_T__SHIFT) & A5XX_TEX_SAMP_0_WRAP_T__MASK; +} +#define A5XX_TEX_SAMP_0_WRAP_R__MASK 0x00003800 +#define A5XX_TEX_SAMP_0_WRAP_R__SHIFT 11 +static inline uint32_t A5XX_TEX_SAMP_0_WRAP_R(enum a5xx_tex_clamp val) +{ + return ((val) << A5XX_TEX_SAMP_0_WRAP_R__SHIFT) & A5XX_TEX_SAMP_0_WRAP_R__MASK; +} +#define A5XX_TEX_SAMP_0_ANISO__MASK 0x0001c000 +#define A5XX_TEX_SAMP_0_ANISO__SHIFT 14 +static inline uint32_t A5XX_TEX_SAMP_0_ANISO(enum a5xx_tex_aniso val) +{ + return ((val) << A5XX_TEX_SAMP_0_ANISO__SHIFT) & A5XX_TEX_SAMP_0_ANISO__MASK; +} +#define A5XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 +#define A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 +static inline uint32_t A5XX_TEX_SAMP_0_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 256.0))) << A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A5XX_TEX_SAMP_0_LOD_BIAS__MASK; +} + +#define REG_A5XX_TEX_SAMP_1 0x00000001 +#define A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e +#define A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT 1 +static inline uint32_t A5XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val) +{ + return ((val) << A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK; +} +#define A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 +#define A5XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 +#define A5XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 +#define A5XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 +#define A5XX_TEX_SAMP_1_MAX_LOD__SHIFT 8 +static inline uint32_t A5XX_TEX_SAMP_1_MAX_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A5XX_TEX_SAMP_1_MAX_LOD__MASK; +} +#define A5XX_TEX_SAMP_1_MIN_LOD__MASK 0xfff00000 +#define A5XX_TEX_SAMP_1_MIN_LOD__SHIFT 20 +static inline uint32_t A5XX_TEX_SAMP_1_MIN_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A5XX_TEX_SAMP_1_MIN_LOD__MASK; +} + +#define REG_A5XX_TEX_SAMP_2 0x00000002 +#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK 0xfffffff0 +#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT 4 +static inline uint32_t A5XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val) +{ + return ((val) << A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK; +} + +#define REG_A5XX_TEX_SAMP_3 0x00000003 + +#define REG_A5XX_TEX_CONST_0 0x00000000 +#define A5XX_TEX_CONST_0_TILE_MODE__MASK 0x00000003 +#define A5XX_TEX_CONST_0_TILE_MODE__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_0_TILE_MODE(enum a5xx_tile_mode val) +{ + return ((val) << A5XX_TEX_CONST_0_TILE_MODE__SHIFT) & A5XX_TEX_CONST_0_TILE_MODE__MASK; +} +#define A5XX_TEX_CONST_0_SRGB 0x00000004 +#define A5XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 +#define A5XX_TEX_CONST_0_SWIZ_X__SHIFT 4 +static inline uint32_t A5XX_TEX_CONST_0_SWIZ_X(enum a5xx_tex_swiz val) +{ + return ((val) << A5XX_TEX_CONST_0_SWIZ_X__SHIFT) & A5XX_TEX_CONST_0_SWIZ_X__MASK; +} +#define A5XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 +#define A5XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 +static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Y(enum a5xx_tex_swiz val) +{ + return ((val) << A5XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Y__MASK; +} +#define A5XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 +#define A5XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 +static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Z(enum a5xx_tex_swiz val) +{ + return ((val) << A5XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Z__MASK; +} +#define A5XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 +#define A5XX_TEX_CONST_0_SWIZ_W__SHIFT 13 +static inline uint32_t A5XX_TEX_CONST_0_SWIZ_W(enum a5xx_tex_swiz val) +{ + return ((val) << A5XX_TEX_CONST_0_SWIZ_W__SHIFT) & A5XX_TEX_CONST_0_SWIZ_W__MASK; +} +#define A5XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 +#define A5XX_TEX_CONST_0_MIPLVLS__SHIFT 16 +static inline uint32_t A5XX_TEX_CONST_0_MIPLVLS(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_0_MIPLVLS__SHIFT) & A5XX_TEX_CONST_0_MIPLVLS__MASK; +} +#define A5XX_TEX_CONST_0_SAMPLES__MASK 0x00300000 +#define A5XX_TEX_CONST_0_SAMPLES__SHIFT 20 +static inline uint32_t A5XX_TEX_CONST_0_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A5XX_TEX_CONST_0_SAMPLES__SHIFT) & A5XX_TEX_CONST_0_SAMPLES__MASK; +} +#define A5XX_TEX_CONST_0_FMT__MASK 0x3fc00000 +#define A5XX_TEX_CONST_0_FMT__SHIFT 22 +static inline uint32_t A5XX_TEX_CONST_0_FMT(enum a5xx_tex_fmt val) +{ + return ((val) << A5XX_TEX_CONST_0_FMT__SHIFT) & A5XX_TEX_CONST_0_FMT__MASK; +} +#define A5XX_TEX_CONST_0_SWAP__MASK 0xc0000000 +#define A5XX_TEX_CONST_0_SWAP__SHIFT 30 +static inline uint32_t A5XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A5XX_TEX_CONST_0_SWAP__SHIFT) & A5XX_TEX_CONST_0_SWAP__MASK; +} + +#define REG_A5XX_TEX_CONST_1 0x00000001 +#define A5XX_TEX_CONST_1_WIDTH__MASK 0x00007fff +#define A5XX_TEX_CONST_1_WIDTH__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_1_WIDTH(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_1_WIDTH__SHIFT) & A5XX_TEX_CONST_1_WIDTH__MASK; +} +#define A5XX_TEX_CONST_1_HEIGHT__MASK 0x3fff8000 +#define A5XX_TEX_CONST_1_HEIGHT__SHIFT 15 +static inline uint32_t A5XX_TEX_CONST_1_HEIGHT(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_1_HEIGHT__SHIFT) & A5XX_TEX_CONST_1_HEIGHT__MASK; +} + +#define REG_A5XX_TEX_CONST_2 0x00000002 +#define A5XX_TEX_CONST_2_FETCHSIZE__MASK 0x0000000f +#define A5XX_TEX_CONST_2_FETCHSIZE__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_2_FETCHSIZE(enum a5xx_tex_fetchsize val) +{ + return ((val) << A5XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A5XX_TEX_CONST_2_FETCHSIZE__MASK; +} +#define A5XX_TEX_CONST_2_PITCH__MASK 0x1fffff80 +#define A5XX_TEX_CONST_2_PITCH__SHIFT 7 +static inline uint32_t A5XX_TEX_CONST_2_PITCH(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_2_PITCH__SHIFT) & A5XX_TEX_CONST_2_PITCH__MASK; +} +#define A5XX_TEX_CONST_2_TYPE__MASK 0x60000000 +#define A5XX_TEX_CONST_2_TYPE__SHIFT 29 +static inline uint32_t A5XX_TEX_CONST_2_TYPE(enum a5xx_tex_type val) +{ + return ((val) << A5XX_TEX_CONST_2_TYPE__SHIFT) & A5XX_TEX_CONST_2_TYPE__MASK; +} + +#define REG_A5XX_TEX_CONST_3 0x00000003 +#define A5XX_TEX_CONST_3_ARRAY_PITCH__MASK 0x00003fff +#define A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A5XX_TEX_CONST_3_ARRAY_PITCH__MASK; +} +#define A5XX_TEX_CONST_3_FLAG 0x10000000 + +#define REG_A5XX_TEX_CONST_4 0x00000004 +#define A5XX_TEX_CONST_4_BASE_LO__MASK 0xffffffe0 +#define A5XX_TEX_CONST_4_BASE_LO__SHIFT 5 +static inline uint32_t A5XX_TEX_CONST_4_BASE_LO(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_TEX_CONST_4_BASE_LO__SHIFT) & A5XX_TEX_CONST_4_BASE_LO__MASK; +} + +#define REG_A5XX_TEX_CONST_5 0x00000005 +#define A5XX_TEX_CONST_5_BASE_HI__MASK 0x0001ffff +#define A5XX_TEX_CONST_5_BASE_HI__SHIFT 0 +static inline uint32_t A5XX_TEX_CONST_5_BASE_HI(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_5_BASE_HI__SHIFT) & A5XX_TEX_CONST_5_BASE_HI__MASK; +} +#define A5XX_TEX_CONST_5_DEPTH__MASK 0x3ffe0000 +#define A5XX_TEX_CONST_5_DEPTH__SHIFT 17 +static inline uint32_t A5XX_TEX_CONST_5_DEPTH(uint32_t val) +{ + return ((val) << A5XX_TEX_CONST_5_DEPTH__SHIFT) & A5XX_TEX_CONST_5_DEPTH__MASK; +} + +#define REG_A5XX_TEX_CONST_6 0x00000006 + +#define REG_A5XX_TEX_CONST_7 0x00000007 + +#define REG_A5XX_TEX_CONST_8 0x00000008 + +#define REG_A5XX_TEX_CONST_9 0x00000009 + +#define REG_A5XX_TEX_CONST_10 0x0000000a + +#define REG_A5XX_TEX_CONST_11 0x0000000b + +#define REG_A5XX_SSBO_0_0 0x00000000 +#define A5XX_SSBO_0_0_BASE_LO__MASK 0xffffffe0 +#define A5XX_SSBO_0_0_BASE_LO__SHIFT 5 +static inline uint32_t A5XX_SSBO_0_0_BASE_LO(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A5XX_SSBO_0_0_BASE_LO__SHIFT) & A5XX_SSBO_0_0_BASE_LO__MASK; +} + +#define REG_A5XX_SSBO_0_1 0x00000001 +#define A5XX_SSBO_0_1_PITCH__MASK 0x003fffff +#define A5XX_SSBO_0_1_PITCH__SHIFT 0 +static inline uint32_t A5XX_SSBO_0_1_PITCH(uint32_t val) +{ + return ((val) << A5XX_SSBO_0_1_PITCH__SHIFT) & A5XX_SSBO_0_1_PITCH__MASK; +} + +#define REG_A5XX_SSBO_0_2 0x00000002 +#define A5XX_SSBO_0_2_ARRAY_PITCH__MASK 0x03fff000 +#define A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT 12 +static inline uint32_t A5XX_SSBO_0_2_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A5XX_SSBO_0_2_ARRAY_PITCH__MASK; +} + +#define REG_A5XX_SSBO_0_3 0x00000003 +#define A5XX_SSBO_0_3_CPP__MASK 0x0000003f +#define A5XX_SSBO_0_3_CPP__SHIFT 0 +static inline uint32_t A5XX_SSBO_0_3_CPP(uint32_t val) +{ + return ((val) << A5XX_SSBO_0_3_CPP__SHIFT) & A5XX_SSBO_0_3_CPP__MASK; +} + +#define REG_A5XX_SSBO_1_0 0x00000000 +#define A5XX_SSBO_1_0_FMT__MASK 0x0000ff00 +#define A5XX_SSBO_1_0_FMT__SHIFT 8 +static inline uint32_t A5XX_SSBO_1_0_FMT(enum a5xx_tex_fmt val) +{ + return ((val) << A5XX_SSBO_1_0_FMT__SHIFT) & A5XX_SSBO_1_0_FMT__MASK; +} +#define A5XX_SSBO_1_0_WIDTH__MASK 0xffff0000 +#define A5XX_SSBO_1_0_WIDTH__SHIFT 16 +static inline uint32_t A5XX_SSBO_1_0_WIDTH(uint32_t val) +{ + return ((val) << A5XX_SSBO_1_0_WIDTH__SHIFT) & A5XX_SSBO_1_0_WIDTH__MASK; +} + +#define REG_A5XX_SSBO_1_1 0x00000001 +#define A5XX_SSBO_1_1_HEIGHT__MASK 0x0000ffff +#define A5XX_SSBO_1_1_HEIGHT__SHIFT 0 +static inline uint32_t A5XX_SSBO_1_1_HEIGHT(uint32_t val) +{ + return ((val) << A5XX_SSBO_1_1_HEIGHT__SHIFT) & A5XX_SSBO_1_1_HEIGHT__MASK; +} +#define A5XX_SSBO_1_1_DEPTH__MASK 0xffff0000 +#define A5XX_SSBO_1_1_DEPTH__SHIFT 16 +static inline uint32_t A5XX_SSBO_1_1_DEPTH(uint32_t val) +{ + return ((val) << A5XX_SSBO_1_1_DEPTH__SHIFT) & A5XX_SSBO_1_1_DEPTH__MASK; +} + +#define REG_A5XX_SSBO_2_0 0x00000000 +#define A5XX_SSBO_2_0_BASE_LO__MASK 0xffffffff +#define A5XX_SSBO_2_0_BASE_LO__SHIFT 0 +static inline uint32_t A5XX_SSBO_2_0_BASE_LO(uint32_t val) +{ + return ((val) << A5XX_SSBO_2_0_BASE_LO__SHIFT) & A5XX_SSBO_2_0_BASE_LO__MASK; +} + +#define REG_A5XX_SSBO_2_1 0x00000001 +#define A5XX_SSBO_2_1_BASE_HI__MASK 0xffffffff +#define A5XX_SSBO_2_1_BASE_HI__SHIFT 0 +static inline uint32_t A5XX_SSBO_2_1_BASE_HI(uint32_t val) +{ + return ((val) << A5XX_SSBO_2_1_BASE_HI__SHIFT) & A5XX_SSBO_2_1_BASE_HI__MASK; +} + + +#endif /* A5XX_XML */ diff -Nru mesa-18.3.3/src/freedreno/registers/a6xx.xml.h mesa-19.0.1/src/freedreno/registers/a6xx.xml.h --- mesa-18.3.3/src/freedreno/registers/a6xx.xml.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/registers/a6xx.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,5506 @@ +#ifndef A6XX_XML +#define A6XX_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 42463 bytes, from 2018-11-19 13:44:03) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14239 bytes, from 2018-12-05 15:25:53) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 43052 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 141895 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: +- Rob Clark (robclark) +- Ilia Mirkin (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum a6xx_color_fmt { + RB6_A8_UNORM = 2, + RB6_R8_UNORM = 3, + RB6_R8_SNORM = 4, + RB6_R8_UINT = 5, + RB6_R8_SINT = 6, + RB6_R4G4B4A4_UNORM = 8, + RB6_R5G5B5A1_UNORM = 10, + RB6_R5G6B5_UNORM = 14, + RB6_R8G8_UNORM = 15, + RB6_R8G8_SNORM = 16, + RB6_R8G8_UINT = 17, + RB6_R8G8_SINT = 18, + RB6_R16_UNORM = 21, + RB6_R16_SNORM = 22, + RB6_R16_FLOAT = 23, + RB6_R16_UINT = 24, + RB6_R16_SINT = 25, + RB6_R8G8B8A8_UNORM = 48, + RB6_R8G8B8_UNORM = 49, + RB6_R8G8B8A8_SNORM = 50, + RB6_R8G8B8A8_UINT = 51, + RB6_R8G8B8A8_SINT = 52, + RB6_R10G10B10A2_UNORM = 55, + RB6_R10G10B10A2_UINT = 58, + RB6_R11G11B10_FLOAT = 66, + RB6_R16G16_UNORM = 67, + RB6_R16G16_SNORM = 68, + RB6_R16G16_FLOAT = 69, + RB6_R16G16_UINT = 70, + RB6_R16G16_SINT = 71, + RB6_R32_FLOAT = 74, + RB6_R32_UINT = 75, + RB6_R32_SINT = 76, + RB6_R16G16B16A16_UNORM = 96, + RB6_R16G16B16A16_SNORM = 97, + RB6_R16G16B16A16_FLOAT = 98, + RB6_R16G16B16A16_UINT = 99, + RB6_R16G16B16A16_SINT = 100, + RB6_R32G32_FLOAT = 103, + RB6_R32G32_UINT = 104, + RB6_R32G32_SINT = 105, + RB6_R32G32B32A32_FLOAT = 130, + RB6_R32G32B32A32_UINT = 131, + RB6_R32G32B32A32_SINT = 132, + RB6_X8Z24_UNORM = 160, +}; + +enum a6xx_tile_mode { + TILE6_LINEAR = 0, + TILE6_2 = 2, + TILE6_3 = 3, +}; + +enum a6xx_vtx_fmt { + VFMT6_8_UNORM = 3, + VFMT6_8_SNORM = 4, + VFMT6_8_UINT = 5, + VFMT6_8_SINT = 6, + VFMT6_8_8_UNORM = 15, + VFMT6_8_8_SNORM = 16, + VFMT6_8_8_UINT = 17, + VFMT6_8_8_SINT = 18, + VFMT6_16_UNORM = 21, + VFMT6_16_SNORM = 22, + VFMT6_16_FLOAT = 23, + VFMT6_16_UINT = 24, + VFMT6_16_SINT = 25, + VFMT6_8_8_8_UNORM = 33, + VFMT6_8_8_8_SNORM = 34, + VFMT6_8_8_8_UINT = 35, + VFMT6_8_8_8_SINT = 36, + VFMT6_8_8_8_8_UNORM = 48, + VFMT6_8_8_8_8_SNORM = 50, + VFMT6_8_8_8_8_UINT = 51, + VFMT6_8_8_8_8_SINT = 52, + VFMT6_10_10_10_2_UNORM = 54, + VFMT6_10_10_10_2_SNORM = 57, + VFMT6_10_10_10_2_UINT = 58, + VFMT6_10_10_10_2_SINT = 59, + VFMT6_11_11_10_FLOAT = 66, + VFMT6_16_16_UNORM = 67, + VFMT6_16_16_SNORM = 68, + VFMT6_16_16_FLOAT = 69, + VFMT6_16_16_UINT = 70, + VFMT6_16_16_SINT = 71, + VFMT6_32_UNORM = 72, + VFMT6_32_SNORM = 73, + VFMT6_32_FLOAT = 74, + VFMT6_32_UINT = 75, + VFMT6_32_SINT = 76, + VFMT6_32_FIXED = 77, + VFMT6_16_16_16_UNORM = 88, + VFMT6_16_16_16_SNORM = 89, + VFMT6_16_16_16_FLOAT = 90, + VFMT6_16_16_16_UINT = 91, + VFMT6_16_16_16_SINT = 92, + VFMT6_16_16_16_16_UNORM = 96, + VFMT6_16_16_16_16_SNORM = 97, + VFMT6_16_16_16_16_FLOAT = 98, + VFMT6_16_16_16_16_UINT = 99, + VFMT6_16_16_16_16_SINT = 100, + VFMT6_32_32_UNORM = 101, + VFMT6_32_32_SNORM = 102, + VFMT6_32_32_FLOAT = 103, + VFMT6_32_32_UINT = 104, + VFMT6_32_32_SINT = 105, + VFMT6_32_32_FIXED = 106, + VFMT6_32_32_32_UNORM = 112, + VFMT6_32_32_32_SNORM = 113, + VFMT6_32_32_32_UINT = 114, + VFMT6_32_32_32_SINT = 115, + VFMT6_32_32_32_FLOAT = 116, + VFMT6_32_32_32_FIXED = 117, + VFMT6_32_32_32_32_UNORM = 128, + VFMT6_32_32_32_32_SNORM = 129, + VFMT6_32_32_32_32_FLOAT = 130, + VFMT6_32_32_32_32_UINT = 131, + VFMT6_32_32_32_32_SINT = 132, + VFMT6_32_32_32_32_FIXED = 133, +}; + +enum a6xx_tex_fmt { + TFMT6_A8_UNORM = 2, + TFMT6_8_UNORM = 3, + TFMT6_8_SNORM = 4, + TFMT6_8_UINT = 5, + TFMT6_8_SINT = 6, + TFMT6_4_4_4_4_UNORM = 8, + TFMT6_5_5_5_1_UNORM = 10, + TFMT6_5_6_5_UNORM = 14, + TFMT6_8_8_UNORM = 15, + TFMT6_8_8_SNORM = 16, + TFMT6_8_8_UINT = 17, + TFMT6_8_8_SINT = 18, + TFMT6_L8_A8_UNORM = 19, + TFMT6_16_UNORM = 21, + TFMT6_16_SNORM = 22, + TFMT6_16_FLOAT = 23, + TFMT6_16_UINT = 24, + TFMT6_16_SINT = 25, + TFMT6_8_8_8_8_UNORM = 48, + TFMT6_8_8_8_UNORM = 49, + TFMT6_8_8_8_8_SNORM = 50, + TFMT6_8_8_8_8_UINT = 51, + TFMT6_8_8_8_8_SINT = 52, + TFMT6_9_9_9_E5_FLOAT = 53, + TFMT6_10_10_10_2_UNORM = 54, + TFMT6_10_10_10_2_UINT = 58, + TFMT6_11_11_10_FLOAT = 66, + TFMT6_16_16_UNORM = 67, + TFMT6_16_16_SNORM = 68, + TFMT6_16_16_FLOAT = 69, + TFMT6_16_16_UINT = 70, + TFMT6_16_16_SINT = 71, + TFMT6_32_FLOAT = 74, + TFMT6_32_UINT = 75, + TFMT6_32_SINT = 76, + TFMT6_16_16_16_16_UNORM = 96, + TFMT6_16_16_16_16_SNORM = 97, + TFMT6_16_16_16_16_FLOAT = 98, + TFMT6_16_16_16_16_UINT = 99, + TFMT6_16_16_16_16_SINT = 100, + TFMT6_32_32_FLOAT = 103, + TFMT6_32_32_UINT = 104, + TFMT6_32_32_SINT = 105, + TFMT6_32_32_32_UINT = 114, + TFMT6_32_32_32_SINT = 115, + TFMT6_32_32_32_FLOAT = 116, + TFMT6_32_32_32_32_FLOAT = 130, + TFMT6_32_32_32_32_UINT = 131, + TFMT6_32_32_32_32_SINT = 132, + TFMT6_X8Z24_UNORM = 160, + TFMT6_ETC2_RG11_UNORM = 171, + TFMT6_ETC2_RG11_SNORM = 172, + TFMT6_ETC2_R11_UNORM = 173, + TFMT6_ETC2_R11_SNORM = 174, + TFMT6_ETC1 = 175, + TFMT6_ETC2_RGB8 = 176, + TFMT6_ETC2_RGBA8 = 177, + TFMT6_ETC2_RGB8A1 = 178, + TFMT6_DXT1 = 179, + TFMT6_DXT3 = 180, + TFMT6_DXT5 = 181, + TFMT6_RGTC1_UNORM = 183, + TFMT6_RGTC1_SNORM = 184, + TFMT6_RGTC2_UNORM = 187, + TFMT6_RGTC2_SNORM = 188, + TFMT6_BPTC_UFLOAT = 190, + TFMT6_BPTC_FLOAT = 191, + TFMT6_BPTC = 192, + TFMT6_ASTC_4x4 = 193, + TFMT6_ASTC_5x4 = 194, + TFMT6_ASTC_5x5 = 195, + TFMT6_ASTC_6x5 = 196, + TFMT6_ASTC_6x6 = 197, + TFMT6_ASTC_8x5 = 198, + TFMT6_ASTC_8x6 = 199, + TFMT6_ASTC_8x8 = 200, + TFMT6_ASTC_10x5 = 201, + TFMT6_ASTC_10x6 = 202, + TFMT6_ASTC_10x8 = 203, + TFMT6_ASTC_10x10 = 204, + TFMT6_ASTC_12x10 = 205, + TFMT6_ASTC_12x12 = 206, +}; + +enum a6xx_tex_fetchsize { + TFETCH6_1_BYTE = 0, + TFETCH6_2_BYTE = 1, + TFETCH6_4_BYTE = 2, + TFETCH6_8_BYTE = 3, + TFETCH6_16_BYTE = 4, +}; + +enum a6xx_depth_format { + DEPTH6_NONE = 0, + DEPTH6_16 = 1, + DEPTH6_24_8 = 2, + DEPTH6_32 = 4, +}; + +enum a6xx_shader_id { + A6XX_TP0_TMO_DATA = 9, + A6XX_TP0_SMO_DATA = 10, + A6XX_TP0_MIPMAP_BASE_DATA = 11, + A6XX_TP1_TMO_DATA = 25, + A6XX_TP1_SMO_DATA = 26, + A6XX_TP1_MIPMAP_BASE_DATA = 27, + A6XX_SP_INST_DATA = 41, + A6XX_SP_LB_0_DATA = 42, + A6XX_SP_LB_1_DATA = 43, + A6XX_SP_LB_2_DATA = 44, + A6XX_SP_LB_3_DATA = 45, + A6XX_SP_LB_4_DATA = 46, + A6XX_SP_LB_5_DATA = 47, + A6XX_SP_CB_BINDLESS_DATA = 48, + A6XX_SP_CB_LEGACY_DATA = 49, + A6XX_SP_UAV_DATA = 50, + A6XX_SP_INST_TAG = 51, + A6XX_SP_CB_BINDLESS_TAG = 52, + A6XX_SP_TMO_UMO_TAG = 53, + A6XX_SP_SMO_TAG = 54, + A6XX_SP_STATE_DATA = 55, + A6XX_HLSQ_CHUNK_CVS_RAM = 73, + A6XX_HLSQ_CHUNK_CPS_RAM = 74, + A6XX_HLSQ_CHUNK_CVS_RAM_TAG = 75, + A6XX_HLSQ_CHUNK_CPS_RAM_TAG = 76, + A6XX_HLSQ_ICB_CVS_CB_BASE_TAG = 77, + A6XX_HLSQ_ICB_CPS_CB_BASE_TAG = 78, + A6XX_HLSQ_CVS_MISC_RAM = 80, + A6XX_HLSQ_CPS_MISC_RAM = 81, + A6XX_HLSQ_INST_RAM = 82, + A6XX_HLSQ_GFX_CVS_CONST_RAM = 83, + A6XX_HLSQ_GFX_CPS_CONST_RAM = 84, + A6XX_HLSQ_CVS_MISC_RAM_TAG = 85, + A6XX_HLSQ_CPS_MISC_RAM_TAG = 86, + A6XX_HLSQ_INST_RAM_TAG = 87, + A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG = 88, + A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG = 89, + A6XX_HLSQ_PWR_REST_RAM = 90, + A6XX_HLSQ_PWR_REST_TAG = 91, + A6XX_HLSQ_DATAPATH_META = 96, + A6XX_HLSQ_FRONTEND_META = 97, + A6XX_HLSQ_INDIRECT_META = 98, + A6XX_HLSQ_BACKEND_META = 99, +}; + +enum a6xx_debugbus_id { + A6XX_DBGBUS_CP = 1, + A6XX_DBGBUS_RBBM = 2, + A6XX_DBGBUS_VBIF = 3, + A6XX_DBGBUS_HLSQ = 4, + A6XX_DBGBUS_UCHE = 5, + A6XX_DBGBUS_DPM = 6, + A6XX_DBGBUS_TESS = 7, + A6XX_DBGBUS_PC = 8, + A6XX_DBGBUS_VFDP = 9, + A6XX_DBGBUS_VPC = 10, + A6XX_DBGBUS_TSE = 11, + A6XX_DBGBUS_RAS = 12, + A6XX_DBGBUS_VSC = 13, + A6XX_DBGBUS_COM = 14, + A6XX_DBGBUS_LRZ = 16, + A6XX_DBGBUS_A2D = 17, + A6XX_DBGBUS_CCUFCHE = 18, + A6XX_DBGBUS_GMU_CX = 19, + A6XX_DBGBUS_RBP = 20, + A6XX_DBGBUS_DCS = 21, + A6XX_DBGBUS_DBGC = 22, + A6XX_DBGBUS_CX = 23, + A6XX_DBGBUS_GMU_GX = 24, + A6XX_DBGBUS_TPFCHE = 25, + A6XX_DBGBUS_GBIF_GX = 26, + A6XX_DBGBUS_GPC = 29, + A6XX_DBGBUS_LARC = 30, + A6XX_DBGBUS_HLSQ_SPTP = 31, + A6XX_DBGBUS_RB_0 = 32, + A6XX_DBGBUS_RB_1 = 33, + A6XX_DBGBUS_UCHE_WRAPPER = 36, + A6XX_DBGBUS_CCU_0 = 40, + A6XX_DBGBUS_CCU_1 = 41, + A6XX_DBGBUS_VFD_0 = 56, + A6XX_DBGBUS_VFD_1 = 57, + A6XX_DBGBUS_VFD_2 = 58, + A6XX_DBGBUS_VFD_3 = 59, + A6XX_DBGBUS_SP_0 = 64, + A6XX_DBGBUS_SP_1 = 65, + A6XX_DBGBUS_TPL1_0 = 72, + A6XX_DBGBUS_TPL1_1 = 73, + A6XX_DBGBUS_TPL1_2 = 74, + A6XX_DBGBUS_TPL1_3 = 75, +}; + +enum a6xx_cp_perfcounter_select { + PERF_CP_ALWAYS_COUNT = 0, + PERF_CP_BUSY_GFX_CORE_IDLE = 1, + PERF_CP_BUSY_CYCLES = 2, + PERF_CP_NUM_PREEMPTIONS = 3, + PERF_CP_PREEMPTION_REACTION_DELAY = 4, + PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 5, + PERF_CP_PREEMPTION_SWITCH_IN_TIME = 6, + PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 7, + PERF_CP_PREDICATED_DRAWS_KILLED = 8, + PERF_CP_MODE_SWITCH = 9, + PERF_CP_ZPASS_DONE = 10, + PERF_CP_CONTEXT_DONE = 11, + PERF_CP_CACHE_FLUSH = 12, + PERF_CP_LONG_PREEMPTIONS = 13, + PERF_CP_SQE_I_CACHE_STARVE = 14, + PERF_CP_SQE_IDLE = 15, + PERF_CP_SQE_PM4_STARVE_RB_IB = 16, + PERF_CP_SQE_PM4_STARVE_SDS = 17, + PERF_CP_SQE_MRB_STARVE = 18, + PERF_CP_SQE_RRB_STARVE = 19, + PERF_CP_SQE_VSD_STARVE = 20, + PERF_CP_VSD_DECODE_STARVE = 21, + PERF_CP_SQE_PIPE_OUT_STALL = 22, + PERF_CP_SQE_SYNC_STALL = 23, + PERF_CP_SQE_PM4_WFI_STALL = 24, + PERF_CP_SQE_SYS_WFI_STALL = 25, + PERF_CP_SQE_T4_EXEC = 26, + PERF_CP_SQE_LOAD_STATE_EXEC = 27, + PERF_CP_SQE_SAVE_SDS_STATE = 28, + PERF_CP_SQE_DRAW_EXEC = 29, + PERF_CP_SQE_CTXT_REG_BUNCH_EXEC = 30, + PERF_CP_SQE_EXEC_PROFILED = 31, + PERF_CP_MEMORY_POOL_EMPTY = 32, + PERF_CP_MEMORY_POOL_SYNC_STALL = 33, + PERF_CP_MEMORY_POOL_ABOVE_THRESH = 34, + PERF_CP_AHB_WR_STALL_PRE_DRAWS = 35, + PERF_CP_AHB_STALL_SQE_GMU = 36, + PERF_CP_AHB_STALL_SQE_WR_OTHER = 37, + PERF_CP_AHB_STALL_SQE_RD_OTHER = 38, + PERF_CP_CLUSTER0_EMPTY = 39, + PERF_CP_CLUSTER1_EMPTY = 40, + PERF_CP_CLUSTER2_EMPTY = 41, + PERF_CP_CLUSTER3_EMPTY = 42, + PERF_CP_CLUSTER4_EMPTY = 43, + PERF_CP_CLUSTER5_EMPTY = 44, + PERF_CP_PM4_DATA = 45, + PERF_CP_PM4_HEADERS = 46, + PERF_CP_VBIF_READ_BEATS = 47, + PERF_CP_VBIF_WRITE_BEATS = 48, + PERF_CP_SQE_INSTR_COUNTER = 49, +}; + +enum a6xx_rbbm_perfcounter_select { + PERF_RBBM_ALWAYS_COUNT = 0, + PERF_RBBM_ALWAYS_ON = 1, + PERF_RBBM_TSE_BUSY = 2, + PERF_RBBM_RAS_BUSY = 3, + PERF_RBBM_PC_DCALL_BUSY = 4, + PERF_RBBM_PC_VSD_BUSY = 5, + PERF_RBBM_STATUS_MASKED = 6, + PERF_RBBM_COM_BUSY = 7, + PERF_RBBM_DCOM_BUSY = 8, + PERF_RBBM_VBIF_BUSY = 9, + PERF_RBBM_VSC_BUSY = 10, + PERF_RBBM_TESS_BUSY = 11, + PERF_RBBM_UCHE_BUSY = 12, + PERF_RBBM_HLSQ_BUSY = 13, +}; + +enum a6xx_pc_perfcounter_select { + PERF_PC_BUSY_CYCLES = 0, + PERF_PC_WORKING_CYCLES = 1, + PERF_PC_STALL_CYCLES_VFD = 2, + PERF_PC_STALL_CYCLES_TSE = 3, + PERF_PC_STALL_CYCLES_VPC = 4, + PERF_PC_STALL_CYCLES_UCHE = 5, + PERF_PC_STALL_CYCLES_TESS = 6, + PERF_PC_STALL_CYCLES_TSE_ONLY = 7, + PERF_PC_STALL_CYCLES_VPC_ONLY = 8, + PERF_PC_PASS1_TF_STALL_CYCLES = 9, + PERF_PC_STARVE_CYCLES_FOR_INDEX = 10, + PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11, + PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12, + PERF_PC_STARVE_CYCLES_FOR_POSITION = 13, + PERF_PC_STARVE_CYCLES_DI = 14, + PERF_PC_VIS_STREAMS_LOADED = 15, + PERF_PC_INSTANCES = 16, + PERF_PC_VPC_PRIMITIVES = 17, + PERF_PC_DEAD_PRIM = 18, + PERF_PC_LIVE_PRIM = 19, + PERF_PC_VERTEX_HITS = 20, + PERF_PC_IA_VERTICES = 21, + PERF_PC_IA_PRIMITIVES = 22, + PERF_PC_GS_PRIMITIVES = 23, + PERF_PC_HS_INVOCATIONS = 24, + PERF_PC_DS_INVOCATIONS = 25, + PERF_PC_VS_INVOCATIONS = 26, + PERF_PC_GS_INVOCATIONS = 27, + PERF_PC_DS_PRIMITIVES = 28, + PERF_PC_VPC_POS_DATA_TRANSACTION = 29, + PERF_PC_3D_DRAWCALLS = 30, + PERF_PC_2D_DRAWCALLS = 31, + PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32, + PERF_TESS_BUSY_CYCLES = 33, + PERF_TESS_WORKING_CYCLES = 34, + PERF_TESS_STALL_CYCLES_PC = 35, + PERF_TESS_STARVE_CYCLES_PC = 36, + PERF_PC_TSE_TRANSACTION = 37, + PERF_PC_TSE_VERTEX = 38, + PERF_PC_TESS_PC_UV_TRANS = 39, + PERF_PC_TESS_PC_UV_PATCHES = 40, + PERF_PC_TESS_FACTOR_TRANS = 41, +}; + +enum a6xx_vfd_perfcounter_select { + PERF_VFD_BUSY_CYCLES = 0, + PERF_VFD_STALL_CYCLES_UCHE = 1, + PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2, + PERF_VFD_STALL_CYCLES_SP_INFO = 3, + PERF_VFD_STALL_CYCLES_SP_ATTR = 4, + PERF_VFD_STARVE_CYCLES_UCHE = 5, + PERF_VFD_RBUFFER_FULL = 6, + PERF_VFD_ATTR_INFO_FIFO_FULL = 7, + PERF_VFD_DECODED_ATTRIBUTE_BYTES = 8, + PERF_VFD_NUM_ATTRIBUTES = 9, + PERF_VFD_UPPER_SHADER_FIBERS = 10, + PERF_VFD_LOWER_SHADER_FIBERS = 11, + PERF_VFD_MODE_0_FIBERS = 12, + PERF_VFD_MODE_1_FIBERS = 13, + PERF_VFD_MODE_2_FIBERS = 14, + PERF_VFD_MODE_3_FIBERS = 15, + PERF_VFD_MODE_4_FIBERS = 16, + PERF_VFD_TOTAL_VERTICES = 17, + PERF_VFDP_STALL_CYCLES_VFD = 18, + PERF_VFDP_STALL_CYCLES_VFD_INDEX = 19, + PERF_VFDP_STALL_CYCLES_VFD_PROG = 20, + PERF_VFDP_STARVE_CYCLES_PC = 21, + PERF_VFDP_VS_STAGE_WAVES = 22, +}; + +enum a6xx_hlsq_perfcounter_select { + PERF_HLSQ_BUSY_CYCLES = 0, + PERF_HLSQ_STALL_CYCLES_UCHE = 1, + PERF_HLSQ_STALL_CYCLES_SP_STATE = 2, + PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3, + PERF_HLSQ_UCHE_LATENCY_CYCLES = 4, + PERF_HLSQ_UCHE_LATENCY_COUNT = 5, + PERF_HLSQ_FS_STAGE_1X_WAVES = 6, + PERF_HLSQ_FS_STAGE_2X_WAVES = 7, + PERF_HLSQ_QUADS = 8, + PERF_HLSQ_CS_INVOCATIONS = 9, + PERF_HLSQ_COMPUTE_DRAWCALLS = 10, + PERF_HLSQ_FS_DATA_WAIT_PROGRAMMING = 11, + PERF_HLSQ_DUAL_FS_PROG_ACTIVE = 12, + PERF_HLSQ_DUAL_VS_PROG_ACTIVE = 13, + PERF_HLSQ_FS_BATCH_COUNT_ZERO = 14, + PERF_HLSQ_VS_BATCH_COUNT_ZERO = 15, + PERF_HLSQ_WAVE_PENDING_NO_QUAD = 16, + PERF_HLSQ_WAVE_PENDING_NO_PRIM_BASE = 17, + PERF_HLSQ_STALL_CYCLES_VPC = 18, + PERF_HLSQ_PIXELS = 19, + PERF_HLSQ_DRAW_MODE_SWITCH_VSFS_SYNC = 20, +}; + +enum a6xx_vpc_perfcounter_select { + PERF_VPC_BUSY_CYCLES = 0, + PERF_VPC_WORKING_CYCLES = 1, + PERF_VPC_STALL_CYCLES_UCHE = 2, + PERF_VPC_STALL_CYCLES_VFD_WACK = 3, + PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4, + PERF_VPC_STALL_CYCLES_PC = 5, + PERF_VPC_STALL_CYCLES_SP_LM = 6, + PERF_VPC_STARVE_CYCLES_SP = 7, + PERF_VPC_STARVE_CYCLES_LRZ = 8, + PERF_VPC_PC_PRIMITIVES = 9, + PERF_VPC_SP_COMPONENTS = 10, + PERF_VPC_STALL_CYCLES_VPCRAM_POS = 11, + PERF_VPC_LRZ_ASSIGN_PRIMITIVES = 12, + PERF_VPC_RB_VISIBLE_PRIMITIVES = 13, + PERF_VPC_LM_TRANSACTION = 14, + PERF_VPC_STREAMOUT_TRANSACTION = 15, + PERF_VPC_VS_BUSY_CYCLES = 16, + PERF_VPC_PS_BUSY_CYCLES = 17, + PERF_VPC_VS_WORKING_CYCLES = 18, + PERF_VPC_PS_WORKING_CYCLES = 19, + PERF_VPC_STARVE_CYCLES_RB = 20, + PERF_VPC_NUM_VPCRAM_READ_POS = 21, + PERF_VPC_WIT_FULL_CYCLES = 22, + PERF_VPC_VPCRAM_FULL_CYCLES = 23, + PERF_VPC_LM_FULL_WAIT_FOR_INTP_END = 24, + PERF_VPC_NUM_VPCRAM_WRITE = 25, + PERF_VPC_NUM_VPCRAM_READ_SO = 26, + PERF_VPC_NUM_ATTR_REQ_LM = 27, +}; + +enum a6xx_tse_perfcounter_select { + PERF_TSE_BUSY_CYCLES = 0, + PERF_TSE_CLIPPING_CYCLES = 1, + PERF_TSE_STALL_CYCLES_RAS = 2, + PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3, + PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4, + PERF_TSE_STARVE_CYCLES_PC = 5, + PERF_TSE_INPUT_PRIM = 6, + PERF_TSE_INPUT_NULL_PRIM = 7, + PERF_TSE_TRIVAL_REJ_PRIM = 8, + PERF_TSE_CLIPPED_PRIM = 9, + PERF_TSE_ZERO_AREA_PRIM = 10, + PERF_TSE_FACENESS_CULLED_PRIM = 11, + PERF_TSE_ZERO_PIXEL_PRIM = 12, + PERF_TSE_OUTPUT_NULL_PRIM = 13, + PERF_TSE_OUTPUT_VISIBLE_PRIM = 14, + PERF_TSE_CINVOCATION = 15, + PERF_TSE_CPRIMITIVES = 16, + PERF_TSE_2D_INPUT_PRIM = 17, + PERF_TSE_2D_ALIVE_CYCLES = 18, + PERF_TSE_CLIP_PLANES = 19, +}; + +enum a6xx_ras_perfcounter_select { + PERF_RAS_BUSY_CYCLES = 0, + PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1, + PERF_RAS_STALL_CYCLES_LRZ = 2, + PERF_RAS_STARVE_CYCLES_TSE = 3, + PERF_RAS_SUPER_TILES = 4, + PERF_RAS_8X4_TILES = 5, + PERF_RAS_MASKGEN_ACTIVE = 6, + PERF_RAS_FULLY_COVERED_SUPER_TILES = 7, + PERF_RAS_FULLY_COVERED_8X4_TILES = 8, + PERF_RAS_PRIM_KILLED_INVISILBE = 9, + PERF_RAS_SUPERTILE_GEN_ACTIVE_CYCLES = 10, + PERF_RAS_LRZ_INTF_WORKING_CYCLES = 11, + PERF_RAS_BLOCKS = 12, +}; + +enum a6xx_uche_perfcounter_select { + PERF_UCHE_BUSY_CYCLES = 0, + PERF_UCHE_STALL_CYCLES_ARBITER = 1, + PERF_UCHE_VBIF_LATENCY_CYCLES = 2, + PERF_UCHE_VBIF_LATENCY_SAMPLES = 3, + PERF_UCHE_VBIF_READ_BEATS_TP = 4, + PERF_UCHE_VBIF_READ_BEATS_VFD = 5, + PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6, + PERF_UCHE_VBIF_READ_BEATS_LRZ = 7, + PERF_UCHE_VBIF_READ_BEATS_SP = 8, + PERF_UCHE_READ_REQUESTS_TP = 9, + PERF_UCHE_READ_REQUESTS_VFD = 10, + PERF_UCHE_READ_REQUESTS_HLSQ = 11, + PERF_UCHE_READ_REQUESTS_LRZ = 12, + PERF_UCHE_READ_REQUESTS_SP = 13, + PERF_UCHE_WRITE_REQUESTS_LRZ = 14, + PERF_UCHE_WRITE_REQUESTS_SP = 15, + PERF_UCHE_WRITE_REQUESTS_VPC = 16, + PERF_UCHE_WRITE_REQUESTS_VSC = 17, + PERF_UCHE_EVICTS = 18, + PERF_UCHE_BANK_REQ0 = 19, + PERF_UCHE_BANK_REQ1 = 20, + PERF_UCHE_BANK_REQ2 = 21, + PERF_UCHE_BANK_REQ3 = 22, + PERF_UCHE_BANK_REQ4 = 23, + PERF_UCHE_BANK_REQ5 = 24, + PERF_UCHE_BANK_REQ6 = 25, + PERF_UCHE_BANK_REQ7 = 26, + PERF_UCHE_VBIF_READ_BEATS_CH0 = 27, + PERF_UCHE_VBIF_READ_BEATS_CH1 = 28, + PERF_UCHE_GMEM_READ_BEATS = 29, + PERF_UCHE_TPH_REF_FULL = 30, + PERF_UCHE_TPH_VICTIM_FULL = 31, + PERF_UCHE_TPH_EXT_FULL = 32, + PERF_UCHE_VBIF_STALL_WRITE_DATA = 33, + PERF_UCHE_DCMP_LATENCY_SAMPLES = 34, + PERF_UCHE_DCMP_LATENCY_CYCLES = 35, + PERF_UCHE_VBIF_READ_BEATS_PC = 36, + PERF_UCHE_READ_REQUESTS_PC = 37, + PERF_UCHE_RAM_READ_REQ = 38, + PERF_UCHE_RAM_WRITE_REQ = 39, +}; + +enum a6xx_tp_perfcounter_select { + PERF_TP_BUSY_CYCLES = 0, + PERF_TP_STALL_CYCLES_UCHE = 1, + PERF_TP_LATENCY_CYCLES = 2, + PERF_TP_LATENCY_TRANS = 3, + PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4, + PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5, + PERF_TP_L1_CACHELINE_REQUESTS = 6, + PERF_TP_L1_CACHELINE_MISSES = 7, + PERF_TP_SP_TP_TRANS = 8, + PERF_TP_TP_SP_TRANS = 9, + PERF_TP_OUTPUT_PIXELS = 10, + PERF_TP_FILTER_WORKLOAD_16BIT = 11, + PERF_TP_FILTER_WORKLOAD_32BIT = 12, + PERF_TP_QUADS_RECEIVED = 13, + PERF_TP_QUADS_OFFSET = 14, + PERF_TP_QUADS_SHADOW = 15, + PERF_TP_QUADS_ARRAY = 16, + PERF_TP_QUADS_GRADIENT = 17, + PERF_TP_QUADS_1D = 18, + PERF_TP_QUADS_2D = 19, + PERF_TP_QUADS_BUFFER = 20, + PERF_TP_QUADS_3D = 21, + PERF_TP_QUADS_CUBE = 22, + PERF_TP_DIVERGENT_QUADS_RECEIVED = 23, + PERF_TP_PRT_NON_RESIDENT_EVENTS = 24, + PERF_TP_OUTPUT_PIXELS_POINT = 25, + PERF_TP_OUTPUT_PIXELS_BILINEAR = 26, + PERF_TP_OUTPUT_PIXELS_MIP = 27, + PERF_TP_OUTPUT_PIXELS_ANISO = 28, + PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 29, + PERF_TP_FLAG_CACHE_REQUESTS = 30, + PERF_TP_FLAG_CACHE_MISSES = 31, + PERF_TP_L1_5_L2_REQUESTS = 32, + PERF_TP_2D_OUTPUT_PIXELS = 33, + PERF_TP_2D_OUTPUT_PIXELS_POINT = 34, + PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 35, + PERF_TP_2D_FILTER_WORKLOAD_16BIT = 36, + PERF_TP_2D_FILTER_WORKLOAD_32BIT = 37, + PERF_TP_TPA2TPC_TRANS = 38, + PERF_TP_L1_MISSES_ASTC_1TILE = 39, + PERF_TP_L1_MISSES_ASTC_2TILE = 40, + PERF_TP_L1_MISSES_ASTC_4TILE = 41, + PERF_TP_L1_5_L2_COMPRESS_REQS = 42, + PERF_TP_L1_5_L2_COMPRESS_MISS = 43, + PERF_TP_L1_BANK_CONFLICT = 44, + PERF_TP_L1_5_MISS_LATENCY_CYCLES = 45, + PERF_TP_L1_5_MISS_LATENCY_TRANS = 46, + PERF_TP_QUADS_CONSTANT_MULTIPLIED = 47, + PERF_TP_FRONTEND_WORKING_CYCLES = 48, + PERF_TP_L1_TAG_WORKING_CYCLES = 49, + PERF_TP_L1_DATA_WRITE_WORKING_CYCLES = 50, + PERF_TP_PRE_L1_DECOM_WORKING_CYCLES = 51, + PERF_TP_BACKEND_WORKING_CYCLES = 52, + PERF_TP_FLAG_CACHE_WORKING_CYCLES = 53, + PERF_TP_L1_5_CACHE_WORKING_CYCLES = 54, + PERF_TP_STARVE_CYCLES_SP = 55, + PERF_TP_STARVE_CYCLES_UCHE = 56, +}; + +enum a6xx_sp_perfcounter_select { + PERF_SP_BUSY_CYCLES = 0, + PERF_SP_ALU_WORKING_CYCLES = 1, + PERF_SP_EFU_WORKING_CYCLES = 2, + PERF_SP_STALL_CYCLES_VPC = 3, + PERF_SP_STALL_CYCLES_TP = 4, + PERF_SP_STALL_CYCLES_UCHE = 5, + PERF_SP_STALL_CYCLES_RB = 6, + PERF_SP_NON_EXECUTION_CYCLES = 7, + PERF_SP_WAVE_CONTEXTS = 8, + PERF_SP_WAVE_CONTEXT_CYCLES = 9, + PERF_SP_FS_STAGE_WAVE_CYCLES = 10, + PERF_SP_FS_STAGE_WAVE_SAMPLES = 11, + PERF_SP_VS_STAGE_WAVE_CYCLES = 12, + PERF_SP_VS_STAGE_WAVE_SAMPLES = 13, + PERF_SP_FS_STAGE_DURATION_CYCLES = 14, + PERF_SP_VS_STAGE_DURATION_CYCLES = 15, + PERF_SP_WAVE_CTRL_CYCLES = 16, + PERF_SP_WAVE_LOAD_CYCLES = 17, + PERF_SP_WAVE_EMIT_CYCLES = 18, + PERF_SP_WAVE_NOP_CYCLES = 19, + PERF_SP_WAVE_WAIT_CYCLES = 20, + PERF_SP_WAVE_FETCH_CYCLES = 21, + PERF_SP_WAVE_IDLE_CYCLES = 22, + PERF_SP_WAVE_END_CYCLES = 23, + PERF_SP_WAVE_LONG_SYNC_CYCLES = 24, + PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25, + PERF_SP_WAVE_JOIN_CYCLES = 26, + PERF_SP_LM_LOAD_INSTRUCTIONS = 27, + PERF_SP_LM_STORE_INSTRUCTIONS = 28, + PERF_SP_LM_ATOMICS = 29, + PERF_SP_GM_LOAD_INSTRUCTIONS = 30, + PERF_SP_GM_STORE_INSTRUCTIONS = 31, + PERF_SP_GM_ATOMICS = 32, + PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33, + PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 34, + PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 35, + PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 36, + PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 37, + PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 38, + PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 39, + PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 40, + PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 41, + PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 42, + PERF_SP_VS_INSTRUCTIONS = 43, + PERF_SP_FS_INSTRUCTIONS = 44, + PERF_SP_ADDR_LOCK_COUNT = 45, + PERF_SP_UCHE_READ_TRANS = 46, + PERF_SP_UCHE_WRITE_TRANS = 47, + PERF_SP_EXPORT_VPC_TRANS = 48, + PERF_SP_EXPORT_RB_TRANS = 49, + PERF_SP_PIXELS_KILLED = 50, + PERF_SP_ICL1_REQUESTS = 51, + PERF_SP_ICL1_MISSES = 52, + PERF_SP_HS_INSTRUCTIONS = 53, + PERF_SP_DS_INSTRUCTIONS = 54, + PERF_SP_GS_INSTRUCTIONS = 55, + PERF_SP_CS_INSTRUCTIONS = 56, + PERF_SP_GPR_READ = 57, + PERF_SP_GPR_WRITE = 58, + PERF_SP_FS_STAGE_HALF_EFU_INSTRUCTIONS = 59, + PERF_SP_VS_STAGE_HALF_EFU_INSTRUCTIONS = 60, + PERF_SP_LM_BANK_CONFLICTS = 61, + PERF_SP_TEX_CONTROL_WORKING_CYCLES = 62, + PERF_SP_LOAD_CONTROL_WORKING_CYCLES = 63, + PERF_SP_FLOW_CONTROL_WORKING_CYCLES = 64, + PERF_SP_LM_WORKING_CYCLES = 65, + PERF_SP_DISPATCHER_WORKING_CYCLES = 66, + PERF_SP_SEQUENCER_WORKING_CYCLES = 67, + PERF_SP_LOW_EFFICIENCY_STARVED_BY_TP = 68, + PERF_SP_STARVE_CYCLES_HLSQ = 69, + PERF_SP_NON_EXECUTION_LS_CYCLES = 70, + PERF_SP_WORKING_EU = 71, + PERF_SP_ANY_EU_WORKING = 72, + PERF_SP_WORKING_EU_FS_STAGE = 73, + PERF_SP_ANY_EU_WORKING_FS_STAGE = 74, + PERF_SP_WORKING_EU_VS_STAGE = 75, + PERF_SP_ANY_EU_WORKING_VS_STAGE = 76, + PERF_SP_WORKING_EU_CS_STAGE = 77, + PERF_SP_ANY_EU_WORKING_CS_STAGE = 78, + PERF_SP_GPR_READ_PREFETCH = 79, + PERF_SP_GPR_READ_CONFLICT = 80, + PERF_SP_GPR_WRITE_CONFLICT = 81, + PERF_SP_GM_LOAD_LATENCY_CYCLES = 82, + PERF_SP_GM_LOAD_LATENCY_SAMPLES = 83, + PERF_SP_EXECUTABLE_WAVES = 84, +}; + +enum a6xx_rb_perfcounter_select { + PERF_RB_BUSY_CYCLES = 0, + PERF_RB_STALL_CYCLES_HLSQ = 1, + PERF_RB_STALL_CYCLES_FIFO0_FULL = 2, + PERF_RB_STALL_CYCLES_FIFO1_FULL = 3, + PERF_RB_STALL_CYCLES_FIFO2_FULL = 4, + PERF_RB_STARVE_CYCLES_SP = 5, + PERF_RB_STARVE_CYCLES_LRZ_TILE = 6, + PERF_RB_STARVE_CYCLES_CCU = 7, + PERF_RB_STARVE_CYCLES_Z_PLANE = 8, + PERF_RB_STARVE_CYCLES_BARY_PLANE = 9, + PERF_RB_Z_WORKLOAD = 10, + PERF_RB_HLSQ_ACTIVE = 11, + PERF_RB_Z_READ = 12, + PERF_RB_Z_WRITE = 13, + PERF_RB_C_READ = 14, + PERF_RB_C_WRITE = 15, + PERF_RB_TOTAL_PASS = 16, + PERF_RB_Z_PASS = 17, + PERF_RB_Z_FAIL = 18, + PERF_RB_S_FAIL = 19, + PERF_RB_BLENDED_FXP_COMPONENTS = 20, + PERF_RB_BLENDED_FP16_COMPONENTS = 21, + PERF_RB_PS_INVOCATIONS = 22, + PERF_RB_2D_ALIVE_CYCLES = 23, + PERF_RB_2D_STALL_CYCLES_A2D = 24, + PERF_RB_2D_STARVE_CYCLES_SRC = 25, + PERF_RB_2D_STARVE_CYCLES_SP = 26, + PERF_RB_2D_STARVE_CYCLES_DST = 27, + PERF_RB_2D_VALID_PIXELS = 28, + PERF_RB_3D_PIXELS = 29, + PERF_RB_BLENDER_WORKING_CYCLES = 30, + PERF_RB_ZPROC_WORKING_CYCLES = 31, + PERF_RB_CPROC_WORKING_CYCLES = 32, + PERF_RB_SAMPLER_WORKING_CYCLES = 33, + PERF_RB_STALL_CYCLES_CCU_COLOR_READ = 34, + PERF_RB_STALL_CYCLES_CCU_COLOR_WRITE = 35, + PERF_RB_STALL_CYCLES_CCU_DEPTH_READ = 36, + PERF_RB_STALL_CYCLES_CCU_DEPTH_WRITE = 37, + PERF_RB_STALL_CYCLES_VPC = 38, + PERF_RB_2D_INPUT_TRANS = 39, + PERF_RB_2D_OUTPUT_RB_DST_TRANS = 40, + PERF_RB_2D_OUTPUT_RB_SRC_TRANS = 41, + PERF_RB_BLENDED_FP32_COMPONENTS = 42, + PERF_RB_COLOR_PIX_TILES = 43, + PERF_RB_STALL_CYCLES_CCU = 44, + PERF_RB_EARLY_Z_ARB3_GRANT = 45, + PERF_RB_LATE_Z_ARB3_GRANT = 46, + PERF_RB_EARLY_Z_SKIP_GRANT = 47, +}; + +enum a6xx_vsc_perfcounter_select { + PERF_VSC_BUSY_CYCLES = 0, + PERF_VSC_WORKING_CYCLES = 1, + PERF_VSC_STALL_CYCLES_UCHE = 2, + PERF_VSC_EOT_NUM = 3, + PERF_VSC_INPUT_TILES = 4, +}; + +enum a6xx_ccu_perfcounter_select { + PERF_CCU_BUSY_CYCLES = 0, + PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1, + PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2, + PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3, + PERF_CCU_DEPTH_BLOCKS = 4, + PERF_CCU_COLOR_BLOCKS = 5, + PERF_CCU_DEPTH_BLOCK_HIT = 6, + PERF_CCU_COLOR_BLOCK_HIT = 7, + PERF_CCU_PARTIAL_BLOCK_READ = 8, + PERF_CCU_GMEM_READ = 9, + PERF_CCU_GMEM_WRITE = 10, + PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11, + PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12, + PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13, + PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14, + PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15, + PERF_CCU_DEPTH_READ_FLAG5_COUNT = 16, + PERF_CCU_DEPTH_READ_FLAG6_COUNT = 17, + PERF_CCU_DEPTH_READ_FLAG8_COUNT = 18, + PERF_CCU_COLOR_READ_FLAG0_COUNT = 19, + PERF_CCU_COLOR_READ_FLAG1_COUNT = 20, + PERF_CCU_COLOR_READ_FLAG2_COUNT = 21, + PERF_CCU_COLOR_READ_FLAG3_COUNT = 22, + PERF_CCU_COLOR_READ_FLAG4_COUNT = 23, + PERF_CCU_COLOR_READ_FLAG5_COUNT = 24, + PERF_CCU_COLOR_READ_FLAG6_COUNT = 25, + PERF_CCU_COLOR_READ_FLAG8_COUNT = 26, + PERF_CCU_2D_RD_REQ = 27, + PERF_CCU_2D_WR_REQ = 28, +}; + +enum a6xx_lrz_perfcounter_select { + PERF_LRZ_BUSY_CYCLES = 0, + PERF_LRZ_STARVE_CYCLES_RAS = 1, + PERF_LRZ_STALL_CYCLES_RB = 2, + PERF_LRZ_STALL_CYCLES_VSC = 3, + PERF_LRZ_STALL_CYCLES_VPC = 4, + PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5, + PERF_LRZ_STALL_CYCLES_UCHE = 6, + PERF_LRZ_LRZ_READ = 7, + PERF_LRZ_LRZ_WRITE = 8, + PERF_LRZ_READ_LATENCY = 9, + PERF_LRZ_MERGE_CACHE_UPDATING = 10, + PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11, + PERF_LRZ_PRIM_KILLED_BY_LRZ = 12, + PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13, + PERF_LRZ_FULL_8X8_TILES = 14, + PERF_LRZ_PARTIAL_8X8_TILES = 15, + PERF_LRZ_TILE_KILLED = 16, + PERF_LRZ_TOTAL_PIXEL = 17, + PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18, + PERF_LRZ_FULLY_COVERED_TILES = 19, + PERF_LRZ_PARTIAL_COVERED_TILES = 20, + PERF_LRZ_FEEDBACK_ACCEPT = 21, + PERF_LRZ_FEEDBACK_DISCARD = 22, + PERF_LRZ_FEEDBACK_STALL = 23, + PERF_LRZ_STALL_CYCLES_RB_ZPLANE = 24, + PERF_LRZ_STALL_CYCLES_RB_BPLANE = 25, + PERF_LRZ_STALL_CYCLES_VC = 26, + PERF_LRZ_RAS_MASK_TRANS = 27, +}; + +enum a6xx_cmp_perfcounter_select { + PERF_CMPDECMP_STALL_CYCLES_ARB = 0, + PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1, + PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2, + PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3, + PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4, + PERF_CMPDECMP_VBIF_READ_REQUEST = 5, + PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6, + PERF_CMPDECMP_VBIF_READ_DATA = 7, + PERF_CMPDECMP_VBIF_WRITE_DATA = 8, + PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9, + PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10, + PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11, + PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12, + PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13, + PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14, + PERF_CMPDECMP_DEPTH_WRITE_FLAG5_COUNT = 15, + PERF_CMPDECMP_DEPTH_WRITE_FLAG6_COUNT = 16, + PERF_CMPDECMP_DEPTH_WRITE_FLAG8_COUNT = 17, + PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 18, + PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 19, + PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 20, + PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 21, + PERF_CMPDECMP_COLOR_WRITE_FLAG5_COUNT = 22, + PERF_CMPDECMP_COLOR_WRITE_FLAG6_COUNT = 23, + PERF_CMPDECMP_COLOR_WRITE_FLAG8_COUNT = 24, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 25, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 26, + PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 27, + PERF_CMPDECMP_2D_RD_DATA = 28, + PERF_CMPDECMP_2D_WR_DATA = 29, + PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH0 = 30, + PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH1 = 31, + PERF_CMPDECMP_2D_OUTPUT_TRANS = 32, + PERF_CMPDECMP_VBIF_WRITE_DATA_UCHE = 33, + PERF_CMPDECMP_DEPTH_WRITE_FLAG0_COUNT = 34, + PERF_CMPDECMP_COLOR_WRITE_FLAG0_COUNT = 35, + PERF_CMPDECMP_COLOR_WRITE_FLAGALPHA_COUNT = 36, + PERF_CMPDECMP_2D_BUSY_CYCLES = 37, + PERF_CMPDECMP_2D_REORDER_STARVE_CYCLES = 38, + PERF_CMPDECMP_2D_PIXELS = 39, +}; + +enum a6xx_2d_ifmt { + R2D_UNORM8 = 16, + R2D_INT32 = 7, + R2D_INT16 = 6, + R2D_INT8 = 5, + R2D_FLOAT32 = 4, + R2D_FLOAT16 = 3, +}; + +enum a6xx_tex_filter { + A6XX_TEX_NEAREST = 0, + A6XX_TEX_LINEAR = 1, + A6XX_TEX_ANISO = 2, +}; + +enum a6xx_tex_clamp { + A6XX_TEX_REPEAT = 0, + A6XX_TEX_CLAMP_TO_EDGE = 1, + A6XX_TEX_MIRROR_REPEAT = 2, + A6XX_TEX_CLAMP_TO_BORDER = 3, + A6XX_TEX_MIRROR_CLAMP = 4, +}; + +enum a6xx_tex_aniso { + A6XX_TEX_ANISO_1 = 0, + A6XX_TEX_ANISO_2 = 1, + A6XX_TEX_ANISO_4 = 2, + A6XX_TEX_ANISO_8 = 3, + A6XX_TEX_ANISO_16 = 4, +}; + +enum a6xx_tex_swiz { + A6XX_TEX_X = 0, + A6XX_TEX_Y = 1, + A6XX_TEX_Z = 2, + A6XX_TEX_W = 3, + A6XX_TEX_ZERO = 4, + A6XX_TEX_ONE = 5, +}; + +enum a6xx_tex_type { + A6XX_TEX_1D = 0, + A6XX_TEX_2D = 1, + A6XX_TEX_CUBE = 2, + A6XX_TEX_3D = 3, +}; + +#define A6XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE 0x00000001 +#define A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR 0x00000002 +#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW 0x00000040 +#define A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR 0x00000080 +#define A6XX_RBBM_INT_0_MASK_CP_SW 0x00000100 +#define A6XX_RBBM_INT_0_MASK_CP_HW_ERROR 0x00000200 +#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS 0x00000400 +#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS 0x00000800 +#define A6XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS 0x00001000 +#define A6XX_RBBM_INT_0_MASK_CP_IB2 0x00002000 +#define A6XX_RBBM_INT_0_MASK_CP_IB1 0x00004000 +#define A6XX_RBBM_INT_0_MASK_CP_RB 0x00008000 +#define A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS 0x00020000 +#define A6XX_RBBM_INT_0_MASK_CP_WT_DONE_TS 0x00040000 +#define A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS 0x00100000 +#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW 0x00400000 +#define A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT 0x00800000 +#define A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS 0x01000000 +#define A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR 0x02000000 +#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_0 0x04000000 +#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_1 0x08000000 +#define A6XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ 0x40000000 +#define A6XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG 0x80000000 +#define A6XX_CP_INT_CP_OPCODE_ERROR 0x00000001 +#define A6XX_CP_INT_CP_UCODE_ERROR 0x00000002 +#define A6XX_CP_INT_CP_HW_FAULT_ERROR 0x00000004 +#define A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR 0x00000010 +#define A6XX_CP_INT_CP_AHB_ERROR 0x00000020 +#define A6XX_CP_INT_CP_VSD_PARITY_ERROR 0x00000040 +#define A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR 0x00000080 +#define REG_A6XX_CP_RB_BASE 0x00000800 + +#define REG_A6XX_CP_RB_BASE_HI 0x00000801 + +#define REG_A6XX_CP_RB_CNTL 0x00000802 + +#define REG_A6XX_CP_RB_RPTR_ADDR_LO 0x00000804 + +#define REG_A6XX_CP_RB_RPTR_ADDR_HI 0x00000805 + +#define REG_A6XX_CP_RB_RPTR 0x00000806 + +#define REG_A6XX_CP_RB_WPTR 0x00000807 + +#define REG_A6XX_CP_SQE_CNTL 0x00000808 + +#define REG_A6XX_CP_HW_FAULT 0x00000821 + +#define REG_A6XX_CP_INTERRUPT_STATUS 0x00000823 + +#define REG_A6XX_CP_PROTECT_STATUS 0x00000824 + +#define REG_A6XX_CP_SQE_INSTR_BASE_LO 0x00000830 + +#define REG_A6XX_CP_SQE_INSTR_BASE_HI 0x00000831 + +#define REG_A6XX_CP_MISC_CNTL 0x00000840 + +#define REG_A6XX_CP_ROQ_THRESHOLDS_1 0x000008c1 + +#define REG_A6XX_CP_ROQ_THRESHOLDS_2 0x000008c2 + +#define REG_A6XX_CP_MEM_POOL_SIZE 0x000008c3 + +#define REG_A6XX_CP_CHICKEN_DBG 0x00000841 + +#define REG_A6XX_CP_ADDR_MODE_CNTL 0x00000842 + +#define REG_A6XX_CP_DBG_ECO_CNTL 0x00000843 + +#define REG_A6XX_CP_PROTECT_CNTL 0x0000084f + +static inline uint32_t REG_A6XX_CP_SCRATCH(uint32_t i0) { return 0x00000883 + 0x1*i0; } + +static inline uint32_t REG_A6XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000883 + 0x1*i0; } + +static inline uint32_t REG_A6XX_CP_PROTECT(uint32_t i0) { return 0x00000850 + 0x1*i0; } + +static inline uint32_t REG_A6XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000850 + 0x1*i0; } +#define A6XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0003ffff +#define A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 +static inline uint32_t A6XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) +{ + return ((val) << A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A6XX_CP_PROTECT_REG_BASE_ADDR__MASK; +} +#define A6XX_CP_PROTECT_REG_MASK_LEN__MASK 0x7ffc0000 +#define A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT 18 +static inline uint32_t A6XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) +{ + return ((val) << A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A6XX_CP_PROTECT_REG_MASK_LEN__MASK; +} +#define A6XX_CP_PROTECT_REG_READ 0x80000000 + +#define REG_A6XX_CP_CONTEXT_SWITCH_CNTL 0x000008a0 + +#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x000008a1 + +#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x000008a2 + +#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO 0x000008a3 + +#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI 0x000008a4 + +#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO 0x000008a5 + +#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI 0x000008a6 + +#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO 0x000008a7 + +#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI 0x000008a8 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_0 0x000008d0 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_1 0x000008d1 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_2 0x000008d2 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_3 0x000008d3 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_4 0x000008d4 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_5 0x000008d5 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_6 0x000008d6 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_7 0x000008d7 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_8 0x000008d8 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_9 0x000008d9 + +#define REG_A6XX_CP_PERFCTR_CP_SEL_10 0x000008da + +#define REG_A6XX_CP_PERFCTR_CP_SEL_11 0x000008db + +#define REG_A6XX_CP_PERFCTR_CP_SEL_12 0x000008dc + +#define REG_A6XX_CP_PERFCTR_CP_SEL_13 0x000008dd + +#define REG_A6XX_CP_CRASH_SCRIPT_BASE_LO 0x00000900 + +#define REG_A6XX_CP_CRASH_SCRIPT_BASE_HI 0x00000901 + +#define REG_A6XX_CP_CRASH_DUMP_CNTL 0x00000902 + +#define REG_A6XX_CP_CRASH_DUMP_STATUS 0x00000903 + +#define REG_A6XX_CP_SQE_STAT_ADDR 0x00000908 + +#define REG_A6XX_CP_SQE_STAT_DATA 0x00000909 + +#define REG_A6XX_CP_DRAW_STATE_ADDR 0x0000090a + +#define REG_A6XX_CP_DRAW_STATE_DATA 0x0000090b + +#define REG_A6XX_CP_ROQ_DBG_ADDR 0x0000090c + +#define REG_A6XX_CP_ROQ_DBG_DATA 0x0000090d + +#define REG_A6XX_CP_MEM_POOL_DBG_ADDR 0x0000090e + +#define REG_A6XX_CP_MEM_POOL_DBG_DATA 0x0000090f + +#define REG_A6XX_CP_SQE_UCODE_DBG_ADDR 0x00000910 + +#define REG_A6XX_CP_SQE_UCODE_DBG_DATA 0x00000911 + +#define REG_A6XX_CP_IB1_BASE 0x00000928 + +#define REG_A6XX_CP_IB1_BASE_HI 0x00000929 + +#define REG_A6XX_CP_IB1_REM_SIZE 0x0000092a + +#define REG_A6XX_CP_IB2_BASE 0x0000092b + +#define REG_A6XX_CP_IB2_BASE_HI 0x0000092c + +#define REG_A6XX_CP_IB2_REM_SIZE 0x0000092d + +#define REG_A6XX_CP_ALWAYS_ON_COUNTER_LO 0x00000980 + +#define REG_A6XX_CP_ALWAYS_ON_COUNTER_HI 0x00000981 + +#define REG_A6XX_CP_AHB_CNTL 0x0000098d + +#define REG_A6XX_CP_APERTURE_CNTL_HOST 0x00000a00 + +#define REG_A6XX_CP_APERTURE_CNTL_CD 0x00000a03 + +#define REG_A6XX_VSC_ADDR_MODE_CNTL 0x00000c01 + +#define REG_A6XX_RBBM_INT_0_STATUS 0x00000201 + +#define REG_A6XX_RBBM_STATUS 0x00000210 +#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB 0x00800000 +#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP 0x00400000 +#define A6XX_RBBM_STATUS_HLSQ_BUSY 0x00200000 +#define A6XX_RBBM_STATUS_VSC_BUSY 0x00100000 +#define A6XX_RBBM_STATUS_TPL1_BUSY 0x00080000 +#define A6XX_RBBM_STATUS_SP_BUSY 0x00040000 +#define A6XX_RBBM_STATUS_UCHE_BUSY 0x00020000 +#define A6XX_RBBM_STATUS_VPC_BUSY 0x00010000 +#define A6XX_RBBM_STATUS_VFD_BUSY 0x00008000 +#define A6XX_RBBM_STATUS_TESS_BUSY 0x00004000 +#define A6XX_RBBM_STATUS_PC_VSD_BUSY 0x00002000 +#define A6XX_RBBM_STATUS_PC_DCALL_BUSY 0x00001000 +#define A6XX_RBBM_STATUS_COM_DCOM_BUSY 0x00000800 +#define A6XX_RBBM_STATUS_LRZ_BUSY 0x00000400 +#define A6XX_RBBM_STATUS_A2D_BUSY 0x00000200 +#define A6XX_RBBM_STATUS_CCU_BUSY 0x00000100 +#define A6XX_RBBM_STATUS_RB_BUSY 0x00000080 +#define A6XX_RBBM_STATUS_RAS_BUSY 0x00000040 +#define A6XX_RBBM_STATUS_TSE_BUSY 0x00000020 +#define A6XX_RBBM_STATUS_VBIF_BUSY 0x00000010 +#define A6XX_RBBM_STATUS_GFX_DBGC_BUSY 0x00000008 +#define A6XX_RBBM_STATUS_CP_BUSY 0x00000004 +#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CP_MASTER 0x00000002 +#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER 0x00000001 + +#define REG_A6XX_RBBM_STATUS3 0x00000213 + +#define REG_A6XX_RBBM_VBIF_GX_RESET_STATUS 0x00000215 + +#define REG_A6XX_RBBM_PERFCTR_CP_0_LO 0x00000400 + +#define REG_A6XX_RBBM_PERFCTR_CP_0_HI 0x00000401 + +#define REG_A6XX_RBBM_PERFCTR_CP_1_LO 0x00000402 + +#define REG_A6XX_RBBM_PERFCTR_CP_1_HI 0x00000403 + +#define REG_A6XX_RBBM_PERFCTR_CP_2_LO 0x00000404 + +#define REG_A6XX_RBBM_PERFCTR_CP_2_HI 0x00000405 + +#define REG_A6XX_RBBM_PERFCTR_CP_3_LO 0x00000406 + +#define REG_A6XX_RBBM_PERFCTR_CP_3_HI 0x00000407 + +#define REG_A6XX_RBBM_PERFCTR_CP_4_LO 0x00000408 + +#define REG_A6XX_RBBM_PERFCTR_CP_4_HI 0x00000409 + +#define REG_A6XX_RBBM_PERFCTR_CP_5_LO 0x0000040a + +#define REG_A6XX_RBBM_PERFCTR_CP_5_HI 0x0000040b + +#define REG_A6XX_RBBM_PERFCTR_CP_6_LO 0x0000040c + +#define REG_A6XX_RBBM_PERFCTR_CP_6_HI 0x0000040d + +#define REG_A6XX_RBBM_PERFCTR_CP_7_LO 0x0000040e + +#define REG_A6XX_RBBM_PERFCTR_CP_7_HI 0x0000040f + +#define REG_A6XX_RBBM_PERFCTR_CP_8_LO 0x00000410 + +#define REG_A6XX_RBBM_PERFCTR_CP_8_HI 0x00000411 + +#define REG_A6XX_RBBM_PERFCTR_CP_9_LO 0x00000412 + +#define REG_A6XX_RBBM_PERFCTR_CP_9_HI 0x00000413 + +#define REG_A6XX_RBBM_PERFCTR_CP_10_LO 0x00000414 + +#define REG_A6XX_RBBM_PERFCTR_CP_10_HI 0x00000415 + +#define REG_A6XX_RBBM_PERFCTR_CP_11_LO 0x00000416 + +#define REG_A6XX_RBBM_PERFCTR_CP_11_HI 0x00000417 + +#define REG_A6XX_RBBM_PERFCTR_CP_12_LO 0x00000418 + +#define REG_A6XX_RBBM_PERFCTR_CP_12_HI 0x00000419 + +#define REG_A6XX_RBBM_PERFCTR_CP_13_LO 0x0000041a + +#define REG_A6XX_RBBM_PERFCTR_CP_13_HI 0x0000041b + +#define REG_A6XX_RBBM_PERFCTR_RBBM_0_LO 0x0000041c + +#define REG_A6XX_RBBM_PERFCTR_RBBM_0_HI 0x0000041d + +#define REG_A6XX_RBBM_PERFCTR_RBBM_1_LO 0x0000041e + +#define REG_A6XX_RBBM_PERFCTR_RBBM_1_HI 0x0000041f + +#define REG_A6XX_RBBM_PERFCTR_RBBM_2_LO 0x00000420 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_2_HI 0x00000421 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_3_LO 0x00000422 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_3_HI 0x00000423 + +#define REG_A6XX_RBBM_PERFCTR_PC_0_LO 0x00000424 + +#define REG_A6XX_RBBM_PERFCTR_PC_0_HI 0x00000425 + +#define REG_A6XX_RBBM_PERFCTR_PC_1_LO 0x00000426 + +#define REG_A6XX_RBBM_PERFCTR_PC_1_HI 0x00000427 + +#define REG_A6XX_RBBM_PERFCTR_PC_2_LO 0x00000428 + +#define REG_A6XX_RBBM_PERFCTR_PC_2_HI 0x00000429 + +#define REG_A6XX_RBBM_PERFCTR_PC_3_LO 0x0000042a + +#define REG_A6XX_RBBM_PERFCTR_PC_3_HI 0x0000042b + +#define REG_A6XX_RBBM_PERFCTR_PC_4_LO 0x0000042c + +#define REG_A6XX_RBBM_PERFCTR_PC_4_HI 0x0000042d + +#define REG_A6XX_RBBM_PERFCTR_PC_5_LO 0x0000042e + +#define REG_A6XX_RBBM_PERFCTR_PC_5_HI 0x0000042f + +#define REG_A6XX_RBBM_PERFCTR_PC_6_LO 0x00000430 + +#define REG_A6XX_RBBM_PERFCTR_PC_6_HI 0x00000431 + +#define REG_A6XX_RBBM_PERFCTR_PC_7_LO 0x00000432 + +#define REG_A6XX_RBBM_PERFCTR_PC_7_HI 0x00000433 + +#define REG_A6XX_RBBM_PERFCTR_VFD_0_LO 0x00000434 + +#define REG_A6XX_RBBM_PERFCTR_VFD_0_HI 0x00000435 + +#define REG_A6XX_RBBM_PERFCTR_VFD_1_LO 0x00000436 + +#define REG_A6XX_RBBM_PERFCTR_VFD_1_HI 0x00000437 + +#define REG_A6XX_RBBM_PERFCTR_VFD_2_LO 0x00000438 + +#define REG_A6XX_RBBM_PERFCTR_VFD_2_HI 0x00000439 + +#define REG_A6XX_RBBM_PERFCTR_VFD_3_LO 0x0000043a + +#define REG_A6XX_RBBM_PERFCTR_VFD_3_HI 0x0000043b + +#define REG_A6XX_RBBM_PERFCTR_VFD_4_LO 0x0000043c + +#define REG_A6XX_RBBM_PERFCTR_VFD_4_HI 0x0000043d + +#define REG_A6XX_RBBM_PERFCTR_VFD_5_LO 0x0000043e + +#define REG_A6XX_RBBM_PERFCTR_VFD_5_HI 0x0000043f + +#define REG_A6XX_RBBM_PERFCTR_VFD_6_LO 0x00000440 + +#define REG_A6XX_RBBM_PERFCTR_VFD_6_HI 0x00000441 + +#define REG_A6XX_RBBM_PERFCTR_VFD_7_LO 0x00000442 + +#define REG_A6XX_RBBM_PERFCTR_VFD_7_HI 0x00000443 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_LO 0x00000444 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_HI 0x00000445 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_LO 0x00000446 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_HI 0x00000447 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_LO 0x00000448 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_HI 0x00000449 + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_LO 0x0000044a + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_HI 0x0000044b + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_LO 0x0000044c + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_HI 0x0000044d + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_LO 0x0000044e + +#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_HI 0x0000044f + +#define REG_A6XX_RBBM_PERFCTR_VPC_0_LO 0x00000450 + +#define REG_A6XX_RBBM_PERFCTR_VPC_0_HI 0x00000451 + +#define REG_A6XX_RBBM_PERFCTR_VPC_1_LO 0x00000452 + +#define REG_A6XX_RBBM_PERFCTR_VPC_1_HI 0x00000453 + +#define REG_A6XX_RBBM_PERFCTR_VPC_2_LO 0x00000454 + +#define REG_A6XX_RBBM_PERFCTR_VPC_2_HI 0x00000455 + +#define REG_A6XX_RBBM_PERFCTR_VPC_3_LO 0x00000456 + +#define REG_A6XX_RBBM_PERFCTR_VPC_3_HI 0x00000457 + +#define REG_A6XX_RBBM_PERFCTR_VPC_4_LO 0x00000458 + +#define REG_A6XX_RBBM_PERFCTR_VPC_4_HI 0x00000459 + +#define REG_A6XX_RBBM_PERFCTR_VPC_5_LO 0x0000045a + +#define REG_A6XX_RBBM_PERFCTR_VPC_5_HI 0x0000045b + +#define REG_A6XX_RBBM_PERFCTR_CCU_0_LO 0x0000045c + +#define REG_A6XX_RBBM_PERFCTR_CCU_0_HI 0x0000045d + +#define REG_A6XX_RBBM_PERFCTR_CCU_1_LO 0x0000045e + +#define REG_A6XX_RBBM_PERFCTR_CCU_1_HI 0x0000045f + +#define REG_A6XX_RBBM_PERFCTR_CCU_2_LO 0x00000460 + +#define REG_A6XX_RBBM_PERFCTR_CCU_2_HI 0x00000461 + +#define REG_A6XX_RBBM_PERFCTR_CCU_3_LO 0x00000462 + +#define REG_A6XX_RBBM_PERFCTR_CCU_3_HI 0x00000463 + +#define REG_A6XX_RBBM_PERFCTR_CCU_4_LO 0x00000464 + +#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI 0x00000465 + +#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO 0x00000466 + +#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI 0x00000467 + +#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO 0x00000468 + +#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI 0x00000469 + +#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO 0x0000046a + +#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI 0x00000465 + +#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO 0x00000466 + +#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI 0x00000467 + +#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO 0x00000468 + +#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI 0x00000469 + +#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO 0x0000046a + +#define REG_A6XX_RBBM_PERFCTR_TSE_2_HI 0x0000046b + +#define REG_A6XX_RBBM_PERFCTR_TSE_3_LO 0x0000046c + +#define REG_A6XX_RBBM_PERFCTR_TSE_3_HI 0x0000046d + +#define REG_A6XX_RBBM_PERFCTR_RAS_0_LO 0x0000046e + +#define REG_A6XX_RBBM_PERFCTR_RAS_0_HI 0x0000046f + +#define REG_A6XX_RBBM_PERFCTR_RAS_1_LO 0x00000470 + +#define REG_A6XX_RBBM_PERFCTR_RAS_1_HI 0x00000471 + +#define REG_A6XX_RBBM_PERFCTR_RAS_2_LO 0x00000472 + +#define REG_A6XX_RBBM_PERFCTR_RAS_2_HI 0x00000473 + +#define REG_A6XX_RBBM_PERFCTR_RAS_3_LO 0x00000474 + +#define REG_A6XX_RBBM_PERFCTR_RAS_3_HI 0x00000475 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_0_LO 0x00000476 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_0_HI 0x00000477 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_1_LO 0x00000478 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_1_HI 0x00000479 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_2_LO 0x0000047a + +#define REG_A6XX_RBBM_PERFCTR_UCHE_2_HI 0x0000047b + +#define REG_A6XX_RBBM_PERFCTR_UCHE_3_LO 0x0000047c + +#define REG_A6XX_RBBM_PERFCTR_UCHE_3_HI 0x0000047d + +#define REG_A6XX_RBBM_PERFCTR_UCHE_4_LO 0x0000047e + +#define REG_A6XX_RBBM_PERFCTR_UCHE_4_HI 0x0000047f + +#define REG_A6XX_RBBM_PERFCTR_UCHE_5_LO 0x00000480 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_5_HI 0x00000481 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_6_LO 0x00000482 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_6_HI 0x00000483 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_7_LO 0x00000484 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_7_HI 0x00000485 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_8_LO 0x00000486 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_8_HI 0x00000487 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_9_LO 0x00000488 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_9_HI 0x00000489 + +#define REG_A6XX_RBBM_PERFCTR_UCHE_10_LO 0x0000048a + +#define REG_A6XX_RBBM_PERFCTR_UCHE_10_HI 0x0000048b + +#define REG_A6XX_RBBM_PERFCTR_UCHE_11_LO 0x0000048c + +#define REG_A6XX_RBBM_PERFCTR_UCHE_11_HI 0x0000048d + +#define REG_A6XX_RBBM_PERFCTR_TP_0_LO 0x0000048e + +#define REG_A6XX_RBBM_PERFCTR_TP_0_HI 0x0000048f + +#define REG_A6XX_RBBM_PERFCTR_TP_1_LO 0x00000490 + +#define REG_A6XX_RBBM_PERFCTR_TP_1_HI 0x00000491 + +#define REG_A6XX_RBBM_PERFCTR_TP_2_LO 0x00000492 + +#define REG_A6XX_RBBM_PERFCTR_TP_2_HI 0x00000493 + +#define REG_A6XX_RBBM_PERFCTR_TP_3_LO 0x00000494 + +#define REG_A6XX_RBBM_PERFCTR_TP_3_HI 0x00000495 + +#define REG_A6XX_RBBM_PERFCTR_TP_4_LO 0x00000496 + +#define REG_A6XX_RBBM_PERFCTR_TP_4_HI 0x00000497 + +#define REG_A6XX_RBBM_PERFCTR_TP_5_LO 0x00000498 + +#define REG_A6XX_RBBM_PERFCTR_TP_5_HI 0x00000499 + +#define REG_A6XX_RBBM_PERFCTR_TP_6_LO 0x0000049a + +#define REG_A6XX_RBBM_PERFCTR_TP_6_HI 0x0000049b + +#define REG_A6XX_RBBM_PERFCTR_TP_7_LO 0x0000049c + +#define REG_A6XX_RBBM_PERFCTR_TP_7_HI 0x0000049d + +#define REG_A6XX_RBBM_PERFCTR_TP_8_LO 0x0000049e + +#define REG_A6XX_RBBM_PERFCTR_TP_8_HI 0x0000049f + +#define REG_A6XX_RBBM_PERFCTR_TP_9_LO 0x000004a0 + +#define REG_A6XX_RBBM_PERFCTR_TP_9_HI 0x000004a1 + +#define REG_A6XX_RBBM_PERFCTR_TP_10_LO 0x000004a2 + +#define REG_A6XX_RBBM_PERFCTR_TP_10_HI 0x000004a3 + +#define REG_A6XX_RBBM_PERFCTR_TP_11_LO 0x000004a4 + +#define REG_A6XX_RBBM_PERFCTR_TP_11_HI 0x000004a5 + +#define REG_A6XX_RBBM_PERFCTR_SP_0_LO 0x000004a6 + +#define REG_A6XX_RBBM_PERFCTR_SP_0_HI 0x000004a7 + +#define REG_A6XX_RBBM_PERFCTR_SP_1_LO 0x000004a8 + +#define REG_A6XX_RBBM_PERFCTR_SP_1_HI 0x000004a9 + +#define REG_A6XX_RBBM_PERFCTR_SP_2_LO 0x000004aa + +#define REG_A6XX_RBBM_PERFCTR_SP_2_HI 0x000004ab + +#define REG_A6XX_RBBM_PERFCTR_SP_3_LO 0x000004ac + +#define REG_A6XX_RBBM_PERFCTR_SP_3_HI 0x000004ad + +#define REG_A6XX_RBBM_PERFCTR_SP_4_LO 0x000004ae + +#define REG_A6XX_RBBM_PERFCTR_SP_4_HI 0x000004af + +#define REG_A6XX_RBBM_PERFCTR_SP_5_LO 0x000004b0 + +#define REG_A6XX_RBBM_PERFCTR_SP_5_HI 0x000004b1 + +#define REG_A6XX_RBBM_PERFCTR_SP_6_LO 0x000004b2 + +#define REG_A6XX_RBBM_PERFCTR_SP_6_HI 0x000004b3 + +#define REG_A6XX_RBBM_PERFCTR_SP_7_LO 0x000004b4 + +#define REG_A6XX_RBBM_PERFCTR_SP_7_HI 0x000004b5 + +#define REG_A6XX_RBBM_PERFCTR_SP_8_LO 0x000004b6 + +#define REG_A6XX_RBBM_PERFCTR_SP_8_HI 0x000004b7 + +#define REG_A6XX_RBBM_PERFCTR_SP_9_LO 0x000004b8 + +#define REG_A6XX_RBBM_PERFCTR_SP_9_HI 0x000004b9 + +#define REG_A6XX_RBBM_PERFCTR_SP_10_LO 0x000004ba + +#define REG_A6XX_RBBM_PERFCTR_SP_10_HI 0x000004bb + +#define REG_A6XX_RBBM_PERFCTR_SP_11_LO 0x000004bc + +#define REG_A6XX_RBBM_PERFCTR_SP_11_HI 0x000004bd + +#define REG_A6XX_RBBM_PERFCTR_SP_12_LO 0x000004be + +#define REG_A6XX_RBBM_PERFCTR_SP_12_HI 0x000004bf + +#define REG_A6XX_RBBM_PERFCTR_SP_13_LO 0x000004c0 + +#define REG_A6XX_RBBM_PERFCTR_SP_13_HI 0x000004c1 + +#define REG_A6XX_RBBM_PERFCTR_SP_14_LO 0x000004c2 + +#define REG_A6XX_RBBM_PERFCTR_SP_14_HI 0x000004c3 + +#define REG_A6XX_RBBM_PERFCTR_SP_15_LO 0x000004c4 + +#define REG_A6XX_RBBM_PERFCTR_SP_15_HI 0x000004c5 + +#define REG_A6XX_RBBM_PERFCTR_SP_16_LO 0x000004c6 + +#define REG_A6XX_RBBM_PERFCTR_SP_16_HI 0x000004c7 + +#define REG_A6XX_RBBM_PERFCTR_SP_17_LO 0x000004c8 + +#define REG_A6XX_RBBM_PERFCTR_SP_17_HI 0x000004c9 + +#define REG_A6XX_RBBM_PERFCTR_SP_18_LO 0x000004ca + +#define REG_A6XX_RBBM_PERFCTR_SP_18_HI 0x000004cb + +#define REG_A6XX_RBBM_PERFCTR_SP_19_LO 0x000004cc + +#define REG_A6XX_RBBM_PERFCTR_SP_19_HI 0x000004cd + +#define REG_A6XX_RBBM_PERFCTR_SP_20_LO 0x000004ce + +#define REG_A6XX_RBBM_PERFCTR_SP_20_HI 0x000004cf + +#define REG_A6XX_RBBM_PERFCTR_SP_21_LO 0x000004d0 + +#define REG_A6XX_RBBM_PERFCTR_SP_21_HI 0x000004d1 + +#define REG_A6XX_RBBM_PERFCTR_SP_22_LO 0x000004d2 + +#define REG_A6XX_RBBM_PERFCTR_SP_22_HI 0x000004d3 + +#define REG_A6XX_RBBM_PERFCTR_SP_23_LO 0x000004d4 + +#define REG_A6XX_RBBM_PERFCTR_SP_23_HI 0x000004d5 + +#define REG_A6XX_RBBM_PERFCTR_RB_0_LO 0x000004d6 + +#define REG_A6XX_RBBM_PERFCTR_RB_0_HI 0x000004d7 + +#define REG_A6XX_RBBM_PERFCTR_RB_1_LO 0x000004d8 + +#define REG_A6XX_RBBM_PERFCTR_RB_1_HI 0x000004d9 + +#define REG_A6XX_RBBM_PERFCTR_RB_2_LO 0x000004da + +#define REG_A6XX_RBBM_PERFCTR_RB_2_HI 0x000004db + +#define REG_A6XX_RBBM_PERFCTR_RB_3_LO 0x000004dc + +#define REG_A6XX_RBBM_PERFCTR_RB_3_HI 0x000004dd + +#define REG_A6XX_RBBM_PERFCTR_RB_4_LO 0x000004de + +#define REG_A6XX_RBBM_PERFCTR_RB_4_HI 0x000004df + +#define REG_A6XX_RBBM_PERFCTR_RB_5_LO 0x000004e0 + +#define REG_A6XX_RBBM_PERFCTR_RB_5_HI 0x000004e1 + +#define REG_A6XX_RBBM_PERFCTR_RB_6_LO 0x000004e2 + +#define REG_A6XX_RBBM_PERFCTR_RB_6_HI 0x000004e3 + +#define REG_A6XX_RBBM_PERFCTR_RB_7_LO 0x000004e4 + +#define REG_A6XX_RBBM_PERFCTR_RB_7_HI 0x000004e5 + +#define REG_A6XX_RBBM_PERFCTR_VSC_0_LO 0x000004e6 + +#define REG_A6XX_RBBM_PERFCTR_VSC_0_HI 0x000004e7 + +#define REG_A6XX_RBBM_PERFCTR_VSC_1_LO 0x000004e8 + +#define REG_A6XX_RBBM_PERFCTR_VSC_1_HI 0x000004e9 + +#define REG_A6XX_RBBM_PERFCTR_LRZ_0_LO 0x000004ea + +#define REG_A6XX_RBBM_PERFCTR_LRZ_0_HI 0x000004eb + +#define REG_A6XX_RBBM_PERFCTR_LRZ_1_LO 0x000004ec + +#define REG_A6XX_RBBM_PERFCTR_LRZ_1_HI 0x000004ed + +#define REG_A6XX_RBBM_PERFCTR_LRZ_2_LO 0x000004ee + +#define REG_A6XX_RBBM_PERFCTR_LRZ_2_HI 0x000004ef + +#define REG_A6XX_RBBM_PERFCTR_LRZ_3_LO 0x000004f0 + +#define REG_A6XX_RBBM_PERFCTR_LRZ_3_HI 0x000004f1 + +#define REG_A6XX_RBBM_PERFCTR_CMP_0_LO 0x000004f2 + +#define REG_A6XX_RBBM_PERFCTR_CMP_0_HI 0x000004f3 + +#define REG_A6XX_RBBM_PERFCTR_CMP_1_LO 0x000004f4 + +#define REG_A6XX_RBBM_PERFCTR_CMP_1_HI 0x000004f5 + +#define REG_A6XX_RBBM_PERFCTR_CMP_2_LO 0x000004f6 + +#define REG_A6XX_RBBM_PERFCTR_CMP_2_HI 0x000004f7 + +#define REG_A6XX_RBBM_PERFCTR_CMP_3_LO 0x000004f8 + +#define REG_A6XX_RBBM_PERFCTR_CMP_3_HI 0x000004f9 + +#define REG_A6XX_RBBM_PERFCTR_CNTL 0x00000500 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD0 0x00000501 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD1 0x00000502 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD2 0x00000503 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD3 0x00000504 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000505 + +#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000506 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000507 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000508 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000509 + +#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000050a + +#define REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x0000050b + +#define REG_A6XX_RBBM_ISDB_CNT 0x00000533 + +#define REG_A6XX_RBBM_SECVID_TRUST_CNTL 0x0000f400 + +#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0x0000f800 + +#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0x0000f801 + +#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0x0000f802 + +#define REG_A6XX_RBBM_SECVID_TSB_CNTL 0x0000f803 + +#define REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0x0000f810 + +#define REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL 0x00000010 + +#define REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0000001f + +#define REG_A6XX_RBBM_INT_CLEAR_CMD 0x00000037 + +#define REG_A6XX_RBBM_INT_0_MASK 0x00000038 + +#define REG_A6XX_RBBM_SP_HYST_CNT 0x00000042 + +#define REG_A6XX_RBBM_SW_RESET_CMD 0x00000043 + +#define REG_A6XX_RBBM_RAC_THRESHOLD_CNT 0x00000044 + +#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD 0x00000045 + +#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD2 0x00000046 + +#define REG_A6XX_RBBM_CLOCK_CNTL 0x000000ae + +#define REG_A6XX_RBBM_CLOCK_CNTL_SP0 0x000000b0 + +#define REG_A6XX_RBBM_CLOCK_CNTL_SP1 0x000000b1 + +#define REG_A6XX_RBBM_CLOCK_CNTL_SP2 0x000000b2 + +#define REG_A6XX_RBBM_CLOCK_CNTL_SP3 0x000000b3 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_SP0 0x000000b4 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_SP1 0x000000b5 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_SP2 0x000000b6 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_SP3 0x000000b7 + +#define REG_A6XX_RBBM_CLOCK_DELAY_SP0 0x000000b8 + +#define REG_A6XX_RBBM_CLOCK_DELAY_SP1 0x000000b9 + +#define REG_A6XX_RBBM_CLOCK_DELAY_SP2 0x000000ba + +#define REG_A6XX_RBBM_CLOCK_DELAY_SP3 0x000000bb + +#define REG_A6XX_RBBM_CLOCK_HYST_SP0 0x000000bc + +#define REG_A6XX_RBBM_CLOCK_HYST_SP1 0x000000bd + +#define REG_A6XX_RBBM_CLOCK_HYST_SP2 0x000000be + +#define REG_A6XX_RBBM_CLOCK_HYST_SP3 0x000000bf + +#define REG_A6XX_RBBM_CLOCK_CNTL_TP0 0x000000c0 + +#define REG_A6XX_RBBM_CLOCK_CNTL_TP1 0x000000c1 + +#define REG_A6XX_RBBM_CLOCK_CNTL_TP2 0x000000c2 + +#define REG_A6XX_RBBM_CLOCK_CNTL_TP3 0x000000c3 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_TP0 0x000000c4 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_TP1 0x000000c5 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_TP2 0x000000c6 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_TP3 0x000000c7 + +#define REG_A6XX_RBBM_CLOCK_CNTL3_TP0 0x000000c8 + +#define REG_A6XX_RBBM_CLOCK_CNTL3_TP1 0x000000c9 + +#define REG_A6XX_RBBM_CLOCK_CNTL3_TP2 0x000000ca + +#define REG_A6XX_RBBM_CLOCK_CNTL3_TP3 0x000000cb + +#define REG_A6XX_RBBM_CLOCK_CNTL4_TP0 0x000000cc + +#define REG_A6XX_RBBM_CLOCK_CNTL4_TP1 0x000000cd + +#define REG_A6XX_RBBM_CLOCK_CNTL4_TP2 0x000000ce + +#define REG_A6XX_RBBM_CLOCK_CNTL4_TP3 0x000000cf + +#define REG_A6XX_RBBM_CLOCK_DELAY_TP0 0x000000d0 + +#define REG_A6XX_RBBM_CLOCK_DELAY_TP1 0x000000d1 + +#define REG_A6XX_RBBM_CLOCK_DELAY_TP2 0x000000d2 + +#define REG_A6XX_RBBM_CLOCK_DELAY_TP3 0x000000d3 + +#define REG_A6XX_RBBM_CLOCK_DELAY2_TP0 0x000000d4 + +#define REG_A6XX_RBBM_CLOCK_DELAY2_TP1 0x000000d5 + +#define REG_A6XX_RBBM_CLOCK_DELAY2_TP2 0x000000d6 + +#define REG_A6XX_RBBM_CLOCK_DELAY2_TP3 0x000000d7 + +#define REG_A6XX_RBBM_CLOCK_DELAY3_TP0 0x000000d8 + +#define REG_A6XX_RBBM_CLOCK_DELAY3_TP1 0x000000d9 + +#define REG_A6XX_RBBM_CLOCK_DELAY3_TP2 0x000000da + +#define REG_A6XX_RBBM_CLOCK_DELAY3_TP3 0x000000db + +#define REG_A6XX_RBBM_CLOCK_DELAY4_TP0 0x000000dc + +#define REG_A6XX_RBBM_CLOCK_DELAY4_TP1 0x000000dd + +#define REG_A6XX_RBBM_CLOCK_DELAY4_TP2 0x000000de + +#define REG_A6XX_RBBM_CLOCK_DELAY4_TP3 0x000000df + +#define REG_A6XX_RBBM_CLOCK_HYST_TP0 0x000000e0 + +#define REG_A6XX_RBBM_CLOCK_HYST_TP1 0x000000e1 + +#define REG_A6XX_RBBM_CLOCK_HYST_TP2 0x000000e2 + +#define REG_A6XX_RBBM_CLOCK_HYST_TP3 0x000000e3 + +#define REG_A6XX_RBBM_CLOCK_HYST2_TP0 0x000000e4 + +#define REG_A6XX_RBBM_CLOCK_HYST2_TP1 0x000000e5 + +#define REG_A6XX_RBBM_CLOCK_HYST2_TP2 0x000000e6 + +#define REG_A6XX_RBBM_CLOCK_HYST2_TP3 0x000000e7 + +#define REG_A6XX_RBBM_CLOCK_HYST3_TP0 0x000000e8 + +#define REG_A6XX_RBBM_CLOCK_HYST3_TP1 0x000000e9 + +#define REG_A6XX_RBBM_CLOCK_HYST3_TP2 0x000000ea + +#define REG_A6XX_RBBM_CLOCK_HYST3_TP3 0x000000eb + +#define REG_A6XX_RBBM_CLOCK_HYST4_TP0 0x000000ec + +#define REG_A6XX_RBBM_CLOCK_HYST4_TP1 0x000000ed + +#define REG_A6XX_RBBM_CLOCK_HYST4_TP2 0x000000ee + +#define REG_A6XX_RBBM_CLOCK_HYST4_TP3 0x000000ef + +#define REG_A6XX_RBBM_CLOCK_CNTL_RB0 0x000000f0 + +#define REG_A6XX_RBBM_CLOCK_CNTL_RB1 0x000000f1 + +#define REG_A6XX_RBBM_CLOCK_CNTL_RB2 0x000000f2 + +#define REG_A6XX_RBBM_CLOCK_CNTL_RB3 0x000000f3 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RB0 0x000000f4 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RB1 0x000000f5 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RB2 0x000000f6 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RB3 0x000000f7 + +#define REG_A6XX_RBBM_CLOCK_CNTL_CCU0 0x000000f8 + +#define REG_A6XX_RBBM_CLOCK_CNTL_CCU1 0x000000f9 + +#define REG_A6XX_RBBM_CLOCK_CNTL_CCU2 0x000000fa + +#define REG_A6XX_RBBM_CLOCK_CNTL_CCU3 0x000000fb + +#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0 0x00000100 + +#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1 0x00000101 + +#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2 0x00000102 + +#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3 0x00000103 + +#define REG_A6XX_RBBM_CLOCK_CNTL_RAC 0x00000104 + +#define REG_A6XX_RBBM_CLOCK_CNTL2_RAC 0x00000105 + +#define REG_A6XX_RBBM_CLOCK_DELAY_RAC 0x00000106 + +#define REG_A6XX_RBBM_CLOCK_HYST_RAC 0x00000107 + +#define REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00000108 + +#define REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00000109 + +#define REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0000010a + +#define REG_A6XX_RBBM_CLOCK_CNTL_UCHE 0x0000010b + +#define REG_A6XX_RBBM_CLOCK_CNTL2_UCHE 0x0000010c + +#define REG_A6XX_RBBM_CLOCK_CNTL3_UCHE 0x0000010d + +#define REG_A6XX_RBBM_CLOCK_CNTL4_UCHE 0x0000010e + +#define REG_A6XX_RBBM_CLOCK_DELAY_UCHE 0x0000010f + +#define REG_A6XX_RBBM_CLOCK_HYST_UCHE 0x00000110 + +#define REG_A6XX_RBBM_CLOCK_MODE_VFD 0x00000111 + +#define REG_A6XX_RBBM_CLOCK_DELAY_VFD 0x00000112 + +#define REG_A6XX_RBBM_CLOCK_HYST_VFD 0x00000113 + +#define REG_A6XX_RBBM_CLOCK_MODE_GPC 0x00000114 + +#define REG_A6XX_RBBM_CLOCK_DELAY_GPC 0x00000115 + +#define REG_A6XX_RBBM_CLOCK_HYST_GPC 0x00000116 + +#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2 0x00000117 + +#define REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX 0x00000118 + +#define REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX 0x00000119 + +#define REG_A6XX_RBBM_CLOCK_HYST_GMU_GX 0x0000011a + +#define REG_A6XX_RBBM_CLOCK_MODE_HLSQ 0x0000011b + +#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ 0x0000011c + +#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_A 0x00000600 + +#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_B 0x00000601 + +#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_C 0x00000602 + +#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_D 0x00000603 +#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK 0x000000ff +#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT 0 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK 0x0000ff00 +#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT 8 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLT 0x00000604 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK 0x0000003f +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT 0 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK 0x00007000 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT 12 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK 0xf0000000 +#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT 28 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLM 0x00000605 +#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK 0x0f000000 +#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT 24 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0 0x00000608 + +#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1 0x00000609 + +#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2 0x0000060a + +#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3 0x0000060b + +#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0 0x0000060c + +#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1 0x0000060d + +#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2 0x0000060e + +#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3 0x0000060f + +#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0 0x00000610 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK 0x0000000f +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT 0 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK 0x000000f0 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT 4 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK 0x00000f00 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT 8 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK 0x0000f000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT 12 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK 0x000f0000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT 16 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK 0x00f00000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT 20 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK 0x0f000000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT 24 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK 0xf0000000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT 28 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1 0x00000611 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK 0x0000000f +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT 0 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK 0x000000f0 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT 4 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK 0x00000f00 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT 8 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK 0x0000f000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT 12 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK 0x000f0000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT 16 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK 0x00f00000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT 20 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK 0x0f000000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT 24 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK; +} +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK 0xf0000000 +#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT 28 +static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val) +{ + return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK; +} + +#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x0000062f + +#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x00000630 + +#define REG_A6XX_VSC_PERFCTR_VSC_SEL_0 0x00000cd8 + +#define REG_A6XX_VSC_PERFCTR_VSC_SEL_1 0x00000cd9 + +#define REG_A6XX_GRAS_ADDR_MODE_CNTL 0x00008601 + +#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_0 0x00008610 + +#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_1 0x00008611 + +#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_2 0x00008612 + +#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_3 0x00008613 + +#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_0 0x00008614 + +#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_1 0x00008615 + +#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_2 0x00008616 + +#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_3 0x00008617 + +#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_0 0x00008618 + +#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_1 0x00008619 + +#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_2 0x0000861a + +#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_3 0x0000861b + +#define REG_A6XX_RB_ADDR_MODE_CNTL 0x00008e05 + +#define REG_A6XX_RB_NC_MODE_CNTL 0x00008e08 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_0 0x00008e10 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_1 0x00008e11 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_2 0x00008e12 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_3 0x00008e13 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_4 0x00008e14 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_5 0x00008e15 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_6 0x00008e16 + +#define REG_A6XX_RB_PERFCTR_RB_SEL_7 0x00008e17 + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_0 0x00008e18 + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_1 0x00008e19 + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_2 0x00008e1a + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_3 0x00008e1b + +#define REG_A6XX_RB_PERFCTR_CCU_SEL_4 0x00008e1c + +#define REG_A6XX_RB_PERFCTR_CMP_SEL_0 0x00008e2c + +#define REG_A6XX_RB_PERFCTR_CMP_SEL_1 0x00008e2d + +#define REG_A6XX_RB_PERFCTR_CMP_SEL_2 0x00008e2e + +#define REG_A6XX_RB_PERFCTR_CMP_SEL_3 0x00008e2f + +#define REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD 0x00008e3d + +#define REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x00008e50 + +#define REG_A6XX_PC_DBG_ECO_CNTL 0x00009e00 + +#define REG_A6XX_PC_ADDR_MODE_CNTL 0x00009e01 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_0 0x00009e34 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_1 0x00009e35 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_2 0x00009e36 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_3 0x00009e37 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_4 0x00009e38 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_5 0x00009e39 + +#define REG_A6XX_PC_PERFCTR_PC_SEL_6 0x00009e3a + +#define REG_A6XX_PC_PERFCTR_PC_SEL_7 0x00009e3b + +#define REG_A6XX_HLSQ_ADDR_MODE_CNTL 0x0000be05 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x0000be10 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x0000be11 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x0000be12 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x0000be13 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x0000be14 + +#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x0000be15 + +#define REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE 0x0000c800 + +#define REG_A6XX_HLSQ_DBG_READ_SEL 0x0000d000 + +#define REG_A6XX_VFD_ADDR_MODE_CNTL 0x0000a601 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_0 0x0000a610 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_1 0x0000a611 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_2 0x0000a612 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_3 0x0000a613 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_4 0x0000a614 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_5 0x0000a615 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_6 0x0000a616 + +#define REG_A6XX_VFD_PERFCTR_VFD_SEL_7 0x0000a617 + +#define REG_A6XX_VPC_ADDR_MODE_CNTL 0x00009601 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_0 0x00009604 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_1 0x00009605 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_2 0x00009606 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_3 0x00009607 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_4 0x00009608 + +#define REG_A6XX_VPC_PERFCTR_VPC_SEL_5 0x00009609 + +#define REG_A6XX_UCHE_ADDR_MODE_CNTL 0x00000e00 + +#define REG_A6XX_UCHE_MODE_CNTL 0x00000e01 + +#define REG_A6XX_UCHE_WRITE_RANGE_MAX_LO 0x00000e05 + +#define REG_A6XX_UCHE_WRITE_RANGE_MAX_HI 0x00000e06 + +#define REG_A6XX_UCHE_WRITE_THRU_BASE_LO 0x00000e07 + +#define REG_A6XX_UCHE_WRITE_THRU_BASE_HI 0x00000e08 + +#define REG_A6XX_UCHE_TRAP_BASE_LO 0x00000e09 + +#define REG_A6XX_UCHE_TRAP_BASE_HI 0x00000e0a + +#define REG_A6XX_UCHE_GMEM_RANGE_MIN_LO 0x00000e0b + +#define REG_A6XX_UCHE_GMEM_RANGE_MIN_HI 0x00000e0c + +#define REG_A6XX_UCHE_GMEM_RANGE_MAX_LO 0x00000e0d + +#define REG_A6XX_UCHE_GMEM_RANGE_MAX_HI 0x00000e0e + +#define REG_A6XX_UCHE_CACHE_WAYS 0x00000e17 + +#define REG_A6XX_UCHE_FILTER_CNTL 0x00000e18 + +#define REG_A6XX_UCHE_CLIENT_PF 0x00000e19 +#define A6XX_UCHE_CLIENT_PF_PERFSEL__MASK 0x000000ff +#define A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT 0 +static inline uint32_t A6XX_UCHE_CLIENT_PF_PERFSEL(uint32_t val) +{ + return ((val) << A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT) & A6XX_UCHE_CLIENT_PF_PERFSEL__MASK; +} + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e1c + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e1d + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e1e + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e1f + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e20 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e21 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e22 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e23 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_8 0x00000e24 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_9 0x00000e25 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_10 0x00000e26 + +#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_11 0x00000e27 + +#define REG_A6XX_SP_ADDR_MODE_CNTL 0x0000ae01 + +#define REG_A6XX_SP_NC_MODE_CNTL 0x0000ae02 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_0 0x0000ae10 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_1 0x0000ae11 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_2 0x0000ae12 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_3 0x0000ae13 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_4 0x0000ae14 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_5 0x0000ae15 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_6 0x0000ae16 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_7 0x0000ae17 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_8 0x0000ae18 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_9 0x0000ae19 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_10 0x0000ae1a + +#define REG_A6XX_SP_PERFCTR_SP_SEL_11 0x0000ae1b + +#define REG_A6XX_SP_PERFCTR_SP_SEL_12 0x0000ae1c + +#define REG_A6XX_SP_PERFCTR_SP_SEL_13 0x0000ae1d + +#define REG_A6XX_SP_PERFCTR_SP_SEL_14 0x0000ae1e + +#define REG_A6XX_SP_PERFCTR_SP_SEL_15 0x0000ae1f + +#define REG_A6XX_SP_PERFCTR_SP_SEL_16 0x0000ae20 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_17 0x0000ae21 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_18 0x0000ae22 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_19 0x0000ae23 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_20 0x0000ae24 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_21 0x0000ae25 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_22 0x0000ae26 + +#define REG_A6XX_SP_PERFCTR_SP_SEL_23 0x0000ae27 + +#define REG_A6XX_TPL1_ADDR_MODE_CNTL 0x0000b601 + +#define REG_A6XX_TPL1_NC_MODE_CNTL 0x0000b604 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_0 0x0000b610 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_1 0x0000b611 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_2 0x0000b612 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_3 0x0000b613 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_4 0x0000b614 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_5 0x0000b615 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_6 0x0000b616 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_7 0x0000b617 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_8 0x0000b618 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_9 0x0000b619 + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_10 0x0000b61a + +#define REG_A6XX_TPL1_PERFCTR_TP_SEL_11 0x0000b61b + +#define REG_A6XX_VBIF_VERSION 0x00003000 + +#define REG_A6XX_VBIF_CLKON 0x00003001 +#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS 0x00000002 + +#define REG_A6XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a + +#define REG_A6XX_VBIF_XIN_HALT_CTRL0 0x00003080 + +#define REG_A6XX_VBIF_XIN_HALT_CTRL1 0x00003081 + +#define REG_A6XX_VBIF_TEST_BUS_OUT_CTRL 0x00003084 + +#define REG_A6XX_VBIF_TEST_BUS1_CTRL0 0x00003085 + +#define REG_A6XX_VBIF_TEST_BUS1_CTRL1 0x00003086 +#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__MASK 0x0000000f +#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__SHIFT 0 +static inline uint32_t A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL(uint32_t val) +{ + return ((val) << A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__SHIFT) & A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__MASK; +} + +#define REG_A6XX_VBIF_TEST_BUS2_CTRL0 0x00003087 + +#define REG_A6XX_VBIF_TEST_BUS2_CTRL1 0x00003088 +#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__MASK 0x000001ff +#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__SHIFT 0 +static inline uint32_t A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL(uint32_t val) +{ + return ((val) << A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__SHIFT) & A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__MASK; +} + +#define REG_A6XX_VBIF_TEST_BUS_OUT 0x0000308c + +#define REG_A6XX_VBIF_PERF_CNT_SEL0 0x000030d0 + +#define REG_A6XX_VBIF_PERF_CNT_SEL1 0x000030d1 + +#define REG_A6XX_VBIF_PERF_CNT_SEL2 0x000030d2 + +#define REG_A6XX_VBIF_PERF_CNT_SEL3 0x000030d3 + +#define REG_A6XX_VBIF_PERF_CNT_LOW0 0x000030d8 + +#define REG_A6XX_VBIF_PERF_CNT_LOW1 0x000030d9 + +#define REG_A6XX_VBIF_PERF_CNT_LOW2 0x000030da + +#define REG_A6XX_VBIF_PERF_CNT_LOW3 0x000030db + +#define REG_A6XX_VBIF_PERF_CNT_HIGH0 0x000030e0 + +#define REG_A6XX_VBIF_PERF_CNT_HIGH1 0x000030e1 + +#define REG_A6XX_VBIF_PERF_CNT_HIGH2 0x000030e2 + +#define REG_A6XX_VBIF_PERF_CNT_HIGH3 0x000030e3 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW0 0x00003110 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW1 0x00003111 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW2 0x00003112 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH0 0x00003118 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH1 0x00003119 + +#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2 0x0000311a + +#define REG_A6XX_RB_WINDOW_OFFSET2 0x000088d4 +#define A6XX_RB_WINDOW_OFFSET2_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_RB_WINDOW_OFFSET2_X__MASK 0x00007fff +#define A6XX_RB_WINDOW_OFFSET2_X__SHIFT 0 +static inline uint32_t A6XX_RB_WINDOW_OFFSET2_X(uint32_t val) +{ + return ((val) << A6XX_RB_WINDOW_OFFSET2_X__SHIFT) & A6XX_RB_WINDOW_OFFSET2_X__MASK; +} +#define A6XX_RB_WINDOW_OFFSET2_Y__MASK 0x7fff0000 +#define A6XX_RB_WINDOW_OFFSET2_Y__SHIFT 16 +static inline uint32_t A6XX_RB_WINDOW_OFFSET2_Y(uint32_t val) +{ + return ((val) << A6XX_RB_WINDOW_OFFSET2_Y__SHIFT) & A6XX_RB_WINDOW_OFFSET2_Y__MASK; +} + +#define REG_A6XX_SP_WINDOW_OFFSET 0x0000b4d1 +#define A6XX_SP_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_SP_WINDOW_OFFSET_X__MASK 0x00007fff +#define A6XX_SP_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A6XX_SP_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A6XX_SP_WINDOW_OFFSET_X__SHIFT) & A6XX_SP_WINDOW_OFFSET_X__MASK; +} +#define A6XX_SP_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A6XX_SP_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A6XX_SP_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A6XX_SP_WINDOW_OFFSET_Y__SHIFT) & A6XX_SP_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A6XX_SP_TP_WINDOW_OFFSET 0x0000b307 +#define A6XX_SP_TP_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_SP_TP_WINDOW_OFFSET_X__MASK 0x00007fff +#define A6XX_SP_TP_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A6XX_SP_TP_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A6XX_SP_TP_WINDOW_OFFSET_X__SHIFT) & A6XX_SP_TP_WINDOW_OFFSET_X__MASK; +} +#define A6XX_SP_TP_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A6XX_SP_TP_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A6XX_SP_TP_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A6XX_SP_TP_WINDOW_OFFSET_Y__SHIFT) & A6XX_SP_TP_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A6XX_GRAS_BIN_CONTROL 0x000080a1 +#define A6XX_GRAS_BIN_CONTROL_BINW__MASK 0x000000ff +#define A6XX_GRAS_BIN_CONTROL_BINW__SHIFT 0 +static inline uint32_t A6XX_GRAS_BIN_CONTROL_BINW(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_GRAS_BIN_CONTROL_BINW__SHIFT) & A6XX_GRAS_BIN_CONTROL_BINW__MASK; +} +#define A6XX_GRAS_BIN_CONTROL_BINH__MASK 0x0001ff00 +#define A6XX_GRAS_BIN_CONTROL_BINH__SHIFT 8 +static inline uint32_t A6XX_GRAS_BIN_CONTROL_BINH(uint32_t val) +{ + assert(!(val & 0xf)); + return ((val >> 4) << A6XX_GRAS_BIN_CONTROL_BINH__SHIFT) & A6XX_GRAS_BIN_CONTROL_BINH__MASK; +} +#define A6XX_GRAS_BIN_CONTROL_BINNING_PASS 0x00040000 +#define A6XX_GRAS_BIN_CONTROL_USE_VIZ 0x00200000 + +#define REG_A6XX_RB_BIN_CONTROL2 0x000088d3 +#define A6XX_RB_BIN_CONTROL2_BINW__MASK 0x000000ff +#define A6XX_RB_BIN_CONTROL2_BINW__SHIFT 0 +static inline uint32_t A6XX_RB_BIN_CONTROL2_BINW(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_RB_BIN_CONTROL2_BINW__SHIFT) & A6XX_RB_BIN_CONTROL2_BINW__MASK; +} +#define A6XX_RB_BIN_CONTROL2_BINH__MASK 0x0001ff00 +#define A6XX_RB_BIN_CONTROL2_BINH__SHIFT 8 +static inline uint32_t A6XX_RB_BIN_CONTROL2_BINH(uint32_t val) +{ + assert(!(val & 0xf)); + return ((val >> 4) << A6XX_RB_BIN_CONTROL2_BINH__SHIFT) & A6XX_RB_BIN_CONTROL2_BINH__MASK; +} + +#define REG_A6XX_VSC_BIN_SIZE 0x00000c02 +#define A6XX_VSC_BIN_SIZE_WIDTH__MASK 0x000000ff +#define A6XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A6XX_VSC_BIN_SIZE_WIDTH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A6XX_VSC_BIN_SIZE_WIDTH__MASK; +} +#define A6XX_VSC_BIN_SIZE_HEIGHT__MASK 0x0001ff00 +#define A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT 8 +static inline uint32_t A6XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) +{ + assert(!(val & 0xf)); + return ((val >> 4) << A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A6XX_VSC_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A6XX_VSC_SIZE_ADDRESS_LO 0x00000c03 + +#define REG_A6XX_VSC_SIZE_ADDRESS_HI 0x00000c04 + +#define REG_A6XX_VSC_BIN_COUNT 0x00000c06 +#define A6XX_VSC_BIN_COUNT_NX__MASK 0x000007fe +#define A6XX_VSC_BIN_COUNT_NX__SHIFT 1 +static inline uint32_t A6XX_VSC_BIN_COUNT_NX(uint32_t val) +{ + return ((val) << A6XX_VSC_BIN_COUNT_NX__SHIFT) & A6XX_VSC_BIN_COUNT_NX__MASK; +} +#define A6XX_VSC_BIN_COUNT_NY__MASK 0x001ff800 +#define A6XX_VSC_BIN_COUNT_NY__SHIFT 11 +static inline uint32_t A6XX_VSC_BIN_COUNT_NY(uint32_t val) +{ + return ((val) << A6XX_VSC_BIN_COUNT_NY__SHIFT) & A6XX_VSC_BIN_COUNT_NY__MASK; +} + +static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c10 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; } +#define A6XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff +#define A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 +static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) +{ + return ((val) << A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_X__MASK; +} +#define A6XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 +#define A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 +static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) +{ + return ((val) << A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_Y__MASK; +} +#define A6XX_VSC_PIPE_CONFIG_REG_W__MASK 0x03f00000 +#define A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 +static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) +{ + return ((val) << A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_W__MASK; +} +#define A6XX_VSC_PIPE_CONFIG_REG_H__MASK 0xfc000000 +#define A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT 26 +static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) +{ + return ((val) << A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_H__MASK; +} + +#define REG_A6XX_VSC_PIPE_DATA2_ADDRESS_LO 0x00000c30 + +#define REG_A6XX_VSC_PIPE_DATA2_ADDRESS_HI 0x00000c31 + +#define REG_A6XX_VSC_PIPE_DATA2_PITCH 0x00000c32 + +#define REG_A6XX_VSC_PIPE_DATA2_ARRAY_PITCH 0x00000c33 +#define A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_VSC_PIPE_DATA2_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0xf)); + return ((val >> 4) << A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__SHIFT) & A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO 0x00000c34 + +#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_HI 0x00000c35 + +#define REG_A6XX_VSC_PIPE_DATA_PITCH 0x00000c36 + +#define REG_A6XX_VSC_PIPE_DATA_ARRAY_PITCH 0x00000c37 +#define A6XX_VSC_PIPE_DATA_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_VSC_PIPE_DATA_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_VSC_PIPE_DATA_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0xf)); + return ((val >> 4) << A6XX_VSC_PIPE_DATA_ARRAY_PITCH__SHIFT) & A6XX_VSC_PIPE_DATA_ARRAY_PITCH__MASK; +} + +static inline uint32_t REG_A6XX_VSC_SIZE(uint32_t i0) { return 0x00000c78 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VSC_SIZE_REG(uint32_t i0) { return 0x00000c78 + 0x1*i0; } + +#define REG_A6XX_UCHE_UNKNOWN_0E12 0x00000e12 + +#define REG_A6XX_GRAS_UNKNOWN_8000 0x00008000 + +#define REG_A6XX_GRAS_UNKNOWN_8001 0x00008001 + +#define REG_A6XX_GRAS_UNKNOWN_8004 0x00008004 + +#define REG_A6XX_GRAS_CNTL 0x00008005 +#define A6XX_GRAS_CNTL_VARYING 0x00000001 +#define A6XX_GRAS_CNTL_UNK3 0x00000008 +#define A6XX_GRAS_CNTL_XCOORD 0x00000040 +#define A6XX_GRAS_CNTL_YCOORD 0x00000080 +#define A6XX_GRAS_CNTL_ZCOORD 0x00000100 +#define A6XX_GRAS_CNTL_WCOORD 0x00000200 + +#define REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ 0x00008006 +#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK 0x000003ff +#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val) +{ + return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK; +} +#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK 0x000ffc00 +#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT 10 +static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val) +{ + return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_XOFFSET_0 0x00008010 +#define A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_XOFFSET_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_XSCALE_0 0x00008011 +#define A6XX_GRAS_CL_VPORT_XSCALE_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_XSCALE_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_XSCALE_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_YOFFSET_0 0x00008012 +#define A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_YOFFSET_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_YSCALE_0 0x00008013 +#define A6XX_GRAS_CL_VPORT_YSCALE_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_YSCALE_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_YSCALE_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_ZOFFSET_0 0x00008014 +#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_ZOFFSET_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK; +} + +#define REG_A6XX_GRAS_CL_VPORT_ZSCALE_0 0x00008015 +#define A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK 0xffffffff +#define A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT 0 +static inline uint32_t A6XX_GRAS_CL_VPORT_ZSCALE_0(float val) +{ + return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK; +} + +#define REG_A6XX_GRAS_SU_CNTL 0x00008090 +#define A6XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001 +#define A6XX_GRAS_SU_CNTL_CULL_BACK 0x00000002 +#define A6XX_GRAS_SU_CNTL_FRONT_CW 0x00000004 +#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8 +#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3 +static inline uint32_t A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val) +{ + return ((((int32_t)(val * 4.0))) << A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; +} +#define A6XX_GRAS_SU_CNTL_POLY_OFFSET 0x00000800 +#define A6XX_GRAS_SU_CNTL_MSAA_ENABLE 0x00002000 + +#define REG_A6XX_GRAS_SU_POINT_MINMAX 0x00008091 +#define A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff +#define A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MIN(float val) +{ + return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK; +} +#define A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 +#define A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 +static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MAX(float val) +{ + return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK; +} + +#define REG_A6XX_GRAS_SU_POINT_SIZE 0x00008092 +#define A6XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff +#define A6XX_GRAS_SU_POINT_SIZE__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POINT_SIZE(float val) +{ + return ((((int32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_SIZE__SHIFT) & A6XX_GRAS_SU_POINT_SIZE__MASK; +} + +#define REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL 0x00008094 +#define A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 + +#define REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE 0x00008095 +#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff +#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_SCALE(float val) +{ + return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK; +} + +#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET 0x00008096 +#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff +#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) +{ + return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; +} + +#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP 0x00008097 +#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK 0xffffffff +#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val) +{ + return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK; +} + +#define REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO 0x00008098 +#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 +#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val) +{ + return ((val) << A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; +} + +#define REG_A6XX_GRAS_UNKNOWN_8099 0x00008099 + +#define REG_A6XX_GRAS_UNKNOWN_809B 0x0000809b + +#define REG_A6XX_GRAS_UNKNOWN_80A0 0x000080a0 + +#define REG_A6XX_GRAS_RAS_MSAA_CNTL 0x000080a2 +#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A6XX_GRAS_DEST_MSAA_CNTL 0x000080a3 +#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A6XX_GRAS_UNKNOWN_80A4 0x000080a4 + +#define REG_A6XX_GRAS_UNKNOWN_80A5 0x000080a5 + +#define REG_A6XX_GRAS_UNKNOWN_80A6 0x000080a6 + +#define REG_A6XX_GRAS_UNKNOWN_80AF 0x000080af + +#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0 0x000080b0 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK 0x00007fff +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK; +} +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0 0x000080b1 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK 0x00007fff +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK; +} +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0 0x000080d0 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK 0x00007fff +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK; +} +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0 0x000080d1 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK 0x00007fff +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK; +} +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL 0x000080f0 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; +} +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; +} + +#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_BR 0x000080f1 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; +} +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; +} + +#define REG_A6XX_GRAS_LRZ_CNTL 0x00008100 +#define A6XX_GRAS_LRZ_CNTL_ENABLE 0x00000001 +#define A6XX_GRAS_LRZ_CNTL_LRZ_WRITE 0x00000002 +#define A6XX_GRAS_LRZ_CNTL_GREATER 0x00000004 +#define A6XX_GRAS_LRZ_CNTL_UNK3 0x00000008 +#define A6XX_GRAS_LRZ_CNTL_UNK4 0x00000010 + +#define REG_A6XX_GRAS_UNKNOWN_8101 0x00008101 + +#define REG_A6XX_GRAS_2D_BLIT_INFO 0x00008102 +#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT) & A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK; +} + +#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO 0x00008103 + +#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_HI 0x00008104 + +#define REG_A6XX_GRAS_LRZ_BUFFER_PITCH 0x00008105 +#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK 0x000007ff +#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT 0 +static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK; +} +#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK 0x003ff800 +#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT 11 +static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO 0x00008106 + +#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI 0x00008107 + +#define REG_A6XX_GRAS_UNKNOWN_8109 0x00008109 + +#define REG_A6XX_GRAS_UNKNOWN_8110 0x00008110 + +#define REG_A6XX_GRAS_2D_BLIT_CNTL 0x00008400 +#define A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__MASK 0x0000ff00 +#define A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT 8 +static inline uint32_t A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT) & A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__MASK; +} +#define A6XX_GRAS_2D_BLIT_CNTL_SCISSOR 0x00010000 +#define A6XX_GRAS_2D_BLIT_CNTL_IFMT__MASK 0x1f000000 +#define A6XX_GRAS_2D_BLIT_CNTL_IFMT__SHIFT 24 +static inline uint32_t A6XX_GRAS_2D_BLIT_CNTL_IFMT(enum a6xx_2d_ifmt val) +{ + return ((val) << A6XX_GRAS_2D_BLIT_CNTL_IFMT__SHIFT) & A6XX_GRAS_2D_BLIT_CNTL_IFMT__MASK; +} + +#define REG_A6XX_GRAS_2D_SRC_TL_X 0x00008401 +#define A6XX_GRAS_2D_SRC_TL_X_X__MASK 0x00ffff00 +#define A6XX_GRAS_2D_SRC_TL_X_X__SHIFT 8 +static inline uint32_t A6XX_GRAS_2D_SRC_TL_X_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_SRC_TL_X_X__SHIFT) & A6XX_GRAS_2D_SRC_TL_X_X__MASK; +} + +#define REG_A6XX_GRAS_2D_SRC_BR_X 0x00008402 +#define A6XX_GRAS_2D_SRC_BR_X_X__MASK 0x00ffff00 +#define A6XX_GRAS_2D_SRC_BR_X_X__SHIFT 8 +static inline uint32_t A6XX_GRAS_2D_SRC_BR_X_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_SRC_BR_X_X__SHIFT) & A6XX_GRAS_2D_SRC_BR_X_X__MASK; +} + +#define REG_A6XX_GRAS_2D_SRC_TL_Y 0x00008403 +#define A6XX_GRAS_2D_SRC_TL_Y_Y__MASK 0x00ffff00 +#define A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT 8 +static inline uint32_t A6XX_GRAS_2D_SRC_TL_Y_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_TL_Y_Y__MASK; +} + +#define REG_A6XX_GRAS_2D_SRC_BR_Y 0x00008404 +#define A6XX_GRAS_2D_SRC_BR_Y_Y__MASK 0x00ffff00 +#define A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT 8 +static inline uint32_t A6XX_GRAS_2D_SRC_BR_Y_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_BR_Y_Y__MASK; +} + +#define REG_A6XX_GRAS_2D_DST_TL 0x00008405 +#define A6XX_GRAS_2D_DST_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_2D_DST_TL_X__MASK 0x00007fff +#define A6XX_GRAS_2D_DST_TL_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_2D_DST_TL_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_DST_TL_X__SHIFT) & A6XX_GRAS_2D_DST_TL_X__MASK; +} +#define A6XX_GRAS_2D_DST_TL_Y__MASK 0x7fff0000 +#define A6XX_GRAS_2D_DST_TL_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_2D_DST_TL_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_DST_TL_Y__SHIFT) & A6XX_GRAS_2D_DST_TL_Y__MASK; +} + +#define REG_A6XX_GRAS_2D_DST_BR 0x00008406 +#define A6XX_GRAS_2D_DST_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_2D_DST_BR_X__MASK 0x00007fff +#define A6XX_GRAS_2D_DST_BR_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_2D_DST_BR_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_DST_BR_X__SHIFT) & A6XX_GRAS_2D_DST_BR_X__MASK; +} +#define A6XX_GRAS_2D_DST_BR_Y__MASK 0x7fff0000 +#define A6XX_GRAS_2D_DST_BR_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_2D_DST_BR_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_2D_DST_BR_Y__SHIFT) & A6XX_GRAS_2D_DST_BR_Y__MASK; +} + +#define REG_A6XX_GRAS_RESOLVE_CNTL_1 0x0000840a +#define A6XX_GRAS_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_RESOLVE_CNTL_1_X__MASK 0x00007fff +#define A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_X__MASK; +} +#define A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK 0x7fff0000 +#define A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK; +} + +#define REG_A6XX_GRAS_RESOLVE_CNTL_2 0x0000840b +#define A6XX_GRAS_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_GRAS_RESOLVE_CNTL_2_X__MASK 0x00007fff +#define A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT 0 +static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_X(uint32_t val) +{ + return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_X__MASK; +} +#define A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK 0x7fff0000 +#define A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT 16 +static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_Y(uint32_t val) +{ + return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK; +} + +#define REG_A6XX_GRAS_UNKNOWN_8600 0x00008600 + +#define REG_A6XX_RB_BIN_CONTROL 0x00008800 +#define A6XX_RB_BIN_CONTROL_BINW__MASK 0x000000ff +#define A6XX_RB_BIN_CONTROL_BINW__SHIFT 0 +static inline uint32_t A6XX_RB_BIN_CONTROL_BINW(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_RB_BIN_CONTROL_BINW__SHIFT) & A6XX_RB_BIN_CONTROL_BINW__MASK; +} +#define A6XX_RB_BIN_CONTROL_BINH__MASK 0x0001ff00 +#define A6XX_RB_BIN_CONTROL_BINH__SHIFT 8 +static inline uint32_t A6XX_RB_BIN_CONTROL_BINH(uint32_t val) +{ + assert(!(val & 0xf)); + return ((val >> 4) << A6XX_RB_BIN_CONTROL_BINH__SHIFT) & A6XX_RB_BIN_CONTROL_BINH__MASK; +} +#define A6XX_RB_BIN_CONTROL_BINNING_PASS 0x00040000 +#define A6XX_RB_BIN_CONTROL_USE_VIZ 0x00200000 + +#define REG_A6XX_RB_RENDER_CNTL 0x00008801 +#define A6XX_RB_RENDER_CNTL_UNK4 0x00000010 +#define A6XX_RB_RENDER_CNTL_BINNING 0x00000080 +#define A6XX_RB_RENDER_CNTL_FLAG_DEPTH 0x00004000 +#define A6XX_RB_RENDER_CNTL_FLAG_MRTS__MASK 0x00ff0000 +#define A6XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT 16 +static inline uint32_t A6XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A6XX_RB_RENDER_CNTL_FLAG_MRTS__MASK; +} + +#define REG_A6XX_RB_RAS_MSAA_CNTL 0x00008802 +#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A6XX_RB_DEST_MSAA_CNTL 0x00008803 +#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A6XX_RB_UNKNOWN_8804 0x00008804 + +#define REG_A6XX_RB_UNKNOWN_8805 0x00008805 + +#define REG_A6XX_RB_UNKNOWN_8806 0x00008806 + +#define REG_A6XX_RB_RENDER_CONTROL0 0x00008809 +#define A6XX_RB_RENDER_CONTROL0_VARYING 0x00000001 +#define A6XX_RB_RENDER_CONTROL0_UNK3 0x00000008 +#define A6XX_RB_RENDER_CONTROL0_XCOORD 0x00000040 +#define A6XX_RB_RENDER_CONTROL0_YCOORD 0x00000080 +#define A6XX_RB_RENDER_CONTROL0_ZCOORD 0x00000100 +#define A6XX_RB_RENDER_CONTROL0_WCOORD 0x00000200 +#define A6XX_RB_RENDER_CONTROL0_UNK10 0x00000400 + +#define REG_A6XX_RB_RENDER_CONTROL1 0x0000880a +#define A6XX_RB_RENDER_CONTROL1_SAMPLEMASK 0x00000001 +#define A6XX_RB_RENDER_CONTROL1_FACENESS 0x00000002 +#define A6XX_RB_RENDER_CONTROL1_SAMPLEID 0x00000008 + +#define REG_A6XX_RB_FS_OUTPUT_CNTL0 0x0000880b +#define A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z 0x00000002 + +#define REG_A6XX_RB_FS_OUTPUT_CNTL1 0x0000880c +#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK 0x0000000f +#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT 0 +static inline uint32_t A6XX_RB_FS_OUTPUT_CNTL1_MRT(uint32_t val) +{ + return ((val) << A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK; +} + +#define REG_A6XX_RB_RENDER_COMPONENTS 0x0000880d +#define A6XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f +#define A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT0__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 +#define A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT1__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 +#define A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT2__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 +#define A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT3__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 +#define A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT4__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 +#define A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT5__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 +#define A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT6__MASK; +} +#define A6XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 +#define A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 +static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) +{ + return ((val) << A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT7__MASK; +} + +#define REG_A6XX_RB_DITHER_CNTL 0x0000880e +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK 0x00000003 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT 0 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK 0x0000000c +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT 2 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK 0x00000030 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT 4 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK 0x000000c0 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT 6 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK 0x00000300 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT 8 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK 0x00000c00 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT 10 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK 0x00001000 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT 12 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK; +} +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK 0x0000c000 +#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT 14 +static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7(enum adreno_rb_dither_mode val) +{ + return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK; +} + +#define REG_A6XX_RB_SRGB_CNTL 0x0000880f +#define A6XX_RB_SRGB_CNTL_SRGB_MRT0 0x00000001 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT1 0x00000002 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT2 0x00000004 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT3 0x00000008 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT4 0x00000010 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT5 0x00000020 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT6 0x00000040 +#define A6XX_RB_SRGB_CNTL_SRGB_MRT7 0x00000080 + +#define REG_A6XX_RB_UNKNOWN_8810 0x00008810 + +#define REG_A6XX_RB_UNKNOWN_8811 0x00008811 + +#define REG_A6XX_RB_UNKNOWN_8818 0x00008818 + +#define REG_A6XX_RB_UNKNOWN_8819 0x00008819 + +#define REG_A6XX_RB_UNKNOWN_881A 0x0000881a + +#define REG_A6XX_RB_UNKNOWN_881B 0x0000881b + +#define REG_A6XX_RB_UNKNOWN_881C 0x0000881c + +#define REG_A6XX_RB_UNKNOWN_881D 0x0000881d + +#define REG_A6XX_RB_UNKNOWN_881E 0x0000881e + +static inline uint32_t REG_A6XX_RB_MRT(uint32_t i0) { return 0x00008820 + 0x8*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_CONTROL(uint32_t i0) { return 0x00008820 + 0x8*i0; } +#define A6XX_RB_MRT_CONTROL_BLEND 0x00000001 +#define A6XX_RB_MRT_CONTROL_BLEND2 0x00000002 +#define A6XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000004 +#define A6XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000078 +#define A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 3 +static inline uint32_t A6XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A6XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} +#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x00000780 +#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 7 +static inline uint32_t A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; +} + +static inline uint32_t REG_A6XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x00008821 + 0x8*i0; } +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 +#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; +} +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 +#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 +static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; +} + +static inline uint32_t REG_A6XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x00008822 + 0x8*i0; } +#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; +} +#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x00000300 +#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 8 +static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; +} +#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00006000 +#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 13 +static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; +} + +static inline uint32_t REG_A6XX_RB_MRT_PITCH(uint32_t i0) { return 0x00008823 + 0x8*i0; } +#define A6XX_RB_MRT_PITCH__MASK 0xffffffff +#define A6XX_RB_MRT_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_RB_MRT_PITCH__SHIFT) & A6XX_RB_MRT_PITCH__MASK; +} + +static inline uint32_t REG_A6XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x00008824 + 0x8*i0; } +#define A6XX_RB_MRT_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_RB_MRT_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_RB_MRT_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_ARRAY_PITCH__MASK; +} + +static inline uint32_t REG_A6XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x00008825 + 0x8*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x00008826 + 0x8*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_BASE_GMEM(uint32_t i0) { return 0x00008827 + 0x8*i0; } + +#define REG_A6XX_RB_BLEND_RED_F32 0x00008860 +#define A6XX_RB_BLEND_RED_F32__MASK 0xffffffff +#define A6XX_RB_BLEND_RED_F32__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_RED_F32(float val) +{ + return ((fui(val)) << A6XX_RB_BLEND_RED_F32__SHIFT) & A6XX_RB_BLEND_RED_F32__MASK; +} + +#define REG_A6XX_RB_BLEND_GREEN_F32 0x00008861 +#define A6XX_RB_BLEND_GREEN_F32__MASK 0xffffffff +#define A6XX_RB_BLEND_GREEN_F32__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_GREEN_F32(float val) +{ + return ((fui(val)) << A6XX_RB_BLEND_GREEN_F32__SHIFT) & A6XX_RB_BLEND_GREEN_F32__MASK; +} + +#define REG_A6XX_RB_BLEND_BLUE_F32 0x00008862 +#define A6XX_RB_BLEND_BLUE_F32__MASK 0xffffffff +#define A6XX_RB_BLEND_BLUE_F32__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_BLUE_F32(float val) +{ + return ((fui(val)) << A6XX_RB_BLEND_BLUE_F32__SHIFT) & A6XX_RB_BLEND_BLUE_F32__MASK; +} + +#define REG_A6XX_RB_BLEND_ALPHA_F32 0x00008863 +#define A6XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff +#define A6XX_RB_BLEND_ALPHA_F32__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_ALPHA_F32(float val) +{ + return ((fui(val)) << A6XX_RB_BLEND_ALPHA_F32__SHIFT) & A6XX_RB_BLEND_ALPHA_F32__MASK; +} + +#define REG_A6XX_RB_ALPHA_CONTROL 0x00008864 +#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff +#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 +static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val) +{ + return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK; +} +#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST 0x00000100 +#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK 0x00000e00 +#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT 9 +static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) +{ + return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK; +} + +#define REG_A6XX_RB_BLEND_CNTL 0x00008865 +#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK 0x000000ff +#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT 0 +static inline uint32_t A6XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val) +{ + return ((val) << A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; +} +#define A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND 0x00000100 +#define A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 +#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK 0xffff0000 +#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT 16 +static inline uint32_t A6XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val) +{ + return ((val) << A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK; +} + +#define REG_A6XX_RB_DEPTH_PLANE_CNTL 0x00008870 +#define A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 + +#define REG_A6XX_RB_DEPTH_CNTL 0x00008871 +#define A6XX_RB_DEPTH_CNTL_Z_ENABLE 0x00000001 +#define A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE 0x00000002 +#define A6XX_RB_DEPTH_CNTL_ZFUNC__MASK 0x0000001c +#define A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT 2 +static inline uint32_t A6XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val) +{ + return ((val) << A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK; +} +#define A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE 0x00000040 + +#define REG_A6XX_RB_DEPTH_BUFFER_INFO 0x00008872 +#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 +#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val) +{ + return ((val) << A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; +} + +#define REG_A6XX_RB_DEPTH_BUFFER_PITCH 0x00008873 +#define A6XX_RB_DEPTH_BUFFER_PITCH__MASK 0xffffffff +#define A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_DEPTH_BUFFER_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_PITCH__MASK; +} + +#define REG_A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH 0x00008874 +#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_RB_DEPTH_BUFFER_BASE_LO 0x00008875 + +#define REG_A6XX_RB_DEPTH_BUFFER_BASE_HI 0x00008876 + +#define REG_A6XX_RB_DEPTH_BUFFER_BASE_GMEM 0x00008877 + +#define REG_A6XX_RB_UNKNOWN_8878 0x00008878 + +#define REG_A6XX_RB_UNKNOWN_8879 0x00008879 + +#define REG_A6XX_RB_STENCIL_CONTROL 0x00008880 +#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 +#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 +#define A6XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 +#define A6XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 +#define A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 +#define A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 +#define A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 +#define A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 +#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 +#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 +#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; +} +#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 +#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 +static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; +} + +#define REG_A6XX_RB_STENCIL_INFO 0x00008881 +#define A6XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001 + +#define REG_A6XX_RB_STENCIL_BUFFER_PITCH 0x00008882 +#define A6XX_RB_STENCIL_BUFFER_PITCH__MASK 0xffffffff +#define A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_STENCIL_BUFFER_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_PITCH__MASK; +} + +#define REG_A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH 0x00008883 +#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_RB_STENCIL_BUFFER_BASE_LO 0x00008884 + +#define REG_A6XX_RB_STENCIL_BUFFER_BASE_HI 0x00008885 + +#define REG_A6XX_RB_STENCIL_BUFFER_BASE_GMEM 0x00008886 + +#define REG_A6XX_RB_STENCILREF 0x00008887 +#define A6XX_RB_STENCILREF_REF__MASK 0x000000ff +#define A6XX_RB_STENCILREF_REF__SHIFT 0 +static inline uint32_t A6XX_RB_STENCILREF_REF(uint32_t val) +{ + return ((val) << A6XX_RB_STENCILREF_REF__SHIFT) & A6XX_RB_STENCILREF_REF__MASK; +} +#define A6XX_RB_STENCILREF_BFREF__MASK 0x0000ff00 +#define A6XX_RB_STENCILREF_BFREF__SHIFT 8 +static inline uint32_t A6XX_RB_STENCILREF_BFREF(uint32_t val) +{ + return ((val) << A6XX_RB_STENCILREF_BFREF__SHIFT) & A6XX_RB_STENCILREF_BFREF__MASK; +} + +#define REG_A6XX_RB_STENCILMASK 0x00008888 +#define A6XX_RB_STENCILMASK_MASK__MASK 0x000000ff +#define A6XX_RB_STENCILMASK_MASK__SHIFT 0 +static inline uint32_t A6XX_RB_STENCILMASK_MASK(uint32_t val) +{ + return ((val) << A6XX_RB_STENCILMASK_MASK__SHIFT) & A6XX_RB_STENCILMASK_MASK__MASK; +} +#define A6XX_RB_STENCILMASK_BFMASK__MASK 0x0000ff00 +#define A6XX_RB_STENCILMASK_BFMASK__SHIFT 8 +static inline uint32_t A6XX_RB_STENCILMASK_BFMASK(uint32_t val) +{ + return ((val) << A6XX_RB_STENCILMASK_BFMASK__SHIFT) & A6XX_RB_STENCILMASK_BFMASK__MASK; +} + +#define REG_A6XX_RB_STENCILWRMASK 0x00008889 +#define A6XX_RB_STENCILWRMASK_WRMASK__MASK 0x000000ff +#define A6XX_RB_STENCILWRMASK_WRMASK__SHIFT 0 +static inline uint32_t A6XX_RB_STENCILWRMASK_WRMASK(uint32_t val) +{ + return ((val) << A6XX_RB_STENCILWRMASK_WRMASK__SHIFT) & A6XX_RB_STENCILWRMASK_WRMASK__MASK; +} +#define A6XX_RB_STENCILWRMASK_BFWRMASK__MASK 0x0000ff00 +#define A6XX_RB_STENCILWRMASK_BFWRMASK__SHIFT 8 +static inline uint32_t A6XX_RB_STENCILWRMASK_BFWRMASK(uint32_t val) +{ + return ((val) << A6XX_RB_STENCILWRMASK_BFWRMASK__SHIFT) & A6XX_RB_STENCILWRMASK_BFWRMASK__MASK; +} + +#define REG_A6XX_RB_WINDOW_OFFSET 0x00008890 +#define A6XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_RB_WINDOW_OFFSET_X__MASK 0x00007fff +#define A6XX_RB_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A6XX_RB_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A6XX_RB_WINDOW_OFFSET_X__SHIFT) & A6XX_RB_WINDOW_OFFSET_X__MASK; +} +#define A6XX_RB_WINDOW_OFFSET_Y__MASK 0x7fff0000 +#define A6XX_RB_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A6XX_RB_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A6XX_RB_WINDOW_OFFSET_Y__SHIFT) & A6XX_RB_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A6XX_RB_SAMPLE_COUNT_CONTROL 0x00008891 +#define A6XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 + +#define REG_A6XX_RB_LRZ_CNTL 0x00008898 +#define A6XX_RB_LRZ_CNTL_ENABLE 0x00000001 + +#define REG_A6XX_RB_UNKNOWN_88D0 0x000088d0 + +#define REG_A6XX_RB_BLIT_SCISSOR_TL 0x000088d1 +#define A6XX_RB_BLIT_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_RB_BLIT_SCISSOR_TL_X__MASK 0x00007fff +#define A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_X__MASK; +} +#define A6XX_RB_BLIT_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_Y__MASK; +} + +#define REG_A6XX_RB_BLIT_SCISSOR_BR 0x000088d2 +#define A6XX_RB_BLIT_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A6XX_RB_BLIT_SCISSOR_BR_X__MASK 0x00007fff +#define A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_X__MASK; +} +#define A6XX_RB_BLIT_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_Y__MASK; +} + +#define REG_A6XX_RB_MSAA_CNTL 0x000088d5 +#define A6XX_RB_MSAA_CNTL_SAMPLES__MASK 0x00000018 +#define A6XX_RB_MSAA_CNTL_SAMPLES__SHIFT 3 +static inline uint32_t A6XX_RB_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_RB_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A6XX_RB_BLIT_BASE_GMEM 0x000088d6 + +#define REG_A6XX_RB_BLIT_DST_INFO 0x000088d7 +#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK 0x00000003 +#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_DST_INFO_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK; +} +#define A6XX_RB_BLIT_DST_INFO_FLAGS 0x00000004 +#define A6XX_RB_BLIT_DST_INFO_SAMPLES__MASK 0x00000018 +#define A6XX_RB_BLIT_DST_INFO_SAMPLES__SHIFT 3 +static inline uint32_t A6XX_RB_BLIT_DST_INFO_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_RB_BLIT_DST_INFO_SAMPLES__SHIFT) & A6XX_RB_BLIT_DST_INFO_SAMPLES__MASK; +} +#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK 0x00007f80 +#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT 7 +static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK; +} +#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK 0x00000060 +#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT 5 +static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK; +} + +#define REG_A6XX_RB_BLIT_DST_LO 0x000088d8 + +#define REG_A6XX_RB_BLIT_DST_HI 0x000088d9 + +#define REG_A6XX_RB_BLIT_DST_PITCH 0x000088da +#define A6XX_RB_BLIT_DST_PITCH__MASK 0xffffffff +#define A6XX_RB_BLIT_DST_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_DST_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_RB_BLIT_DST_PITCH__SHIFT) & A6XX_RB_BLIT_DST_PITCH__MASK; +} + +#define REG_A6XX_RB_BLIT_DST_ARRAY_PITCH 0x000088db +#define A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK 0xffffffff +#define A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_RB_BLIT_FLAG_DST_LO 0x000088dc + +#define REG_A6XX_RB_BLIT_FLAG_DST_HI 0x000088dd + +#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0 0x000088df + +#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW1 0x000088e0 + +#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW2 0x000088e1 + +#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW3 0x000088e2 + +#define REG_A6XX_RB_BLIT_INFO 0x000088e3 +#define A6XX_RB_BLIT_INFO_UNK0 0x00000001 +#define A6XX_RB_BLIT_INFO_GMEM 0x00000002 +#define A6XX_RB_BLIT_INFO_INTEGER 0x00000004 +#define A6XX_RB_BLIT_INFO_DEPTH 0x00000008 +#define A6XX_RB_BLIT_INFO_CLEAR_MASK__MASK 0x000000f0 +#define A6XX_RB_BLIT_INFO_CLEAR_MASK__SHIFT 4 +static inline uint32_t A6XX_RB_BLIT_INFO_CLEAR_MASK(uint32_t val) +{ + return ((val) << A6XX_RB_BLIT_INFO_CLEAR_MASK__SHIFT) & A6XX_RB_BLIT_INFO_CLEAR_MASK__MASK; +} + +#define REG_A6XX_RB_UNKNOWN_88F0 0x000088f0 + +#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO 0x00008900 + +#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_HI 0x00008901 + +#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_PITCH 0x00008902 + +static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x00008903 + 0x3*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x00008903 + 0x3*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x00008904 + 0x3*i0; } + +static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x00008905 + 0x3*i0; } +#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK 0x000007ff +#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK; +} +#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK 0x003ff800 +#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT 11 +static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK; +} + +#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO 0x00008927 + +#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_HI 0x00008928 + +#define REG_A6XX_RB_2D_BLIT_CNTL 0x00008c00 +#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK 0x0000ff00 +#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT 8 +static inline uint32_t A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK; +} +#define A6XX_RB_2D_BLIT_CNTL_SCISSOR 0x00010000 +#define A6XX_RB_2D_BLIT_CNTL_IFMT__MASK 0x1f000000 +#define A6XX_RB_2D_BLIT_CNTL_IFMT__SHIFT 24 +static inline uint32_t A6XX_RB_2D_BLIT_CNTL_IFMT(enum a6xx_2d_ifmt val) +{ + return ((val) << A6XX_RB_2D_BLIT_CNTL_IFMT__SHIFT) & A6XX_RB_2D_BLIT_CNTL_IFMT__MASK; +} + +#define REG_A6XX_RB_UNKNOWN_8C01 0x00008c01 + +#define REG_A6XX_RB_2D_DST_INFO 0x00008c17 +#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; +} +#define A6XX_RB_2D_DST_INFO_TILE_MODE__MASK 0x00000300 +#define A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A6XX_RB_2D_DST_INFO_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_2D_DST_INFO_TILE_MODE__MASK; +} +#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK; +} +#define A6XX_RB_2D_DST_INFO_FLAGS 0x00001000 + +#define REG_A6XX_RB_2D_DST_LO 0x00008c18 + +#define REG_A6XX_RB_2D_DST_HI 0x00008c19 + +#define REG_A6XX_RB_2D_DST_SIZE 0x00008c1a +#define A6XX_RB_2D_DST_SIZE_PITCH__MASK 0x0000ffff +#define A6XX_RB_2D_DST_SIZE_PITCH__SHIFT 0 +static inline uint32_t A6XX_RB_2D_DST_SIZE_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A6XX_RB_2D_DST_SIZE_PITCH__MASK; +} + +#define REG_A6XX_RB_2D_DST_FLAGS_LO 0x00008c20 + +#define REG_A6XX_RB_2D_DST_FLAGS_HI 0x00008c21 + +#define REG_A6XX_RB_2D_SRC_SOLID_C0 0x00008c2c + +#define REG_A6XX_RB_2D_SRC_SOLID_C1 0x00008c2d + +#define REG_A6XX_RB_2D_SRC_SOLID_C2 0x00008c2e + +#define REG_A6XX_RB_2D_SRC_SOLID_C3 0x00008c2f + +#define REG_A6XX_RB_UNKNOWN_8E01 0x00008e01 + +#define REG_A6XX_RB_UNKNOWN_8E04 0x00008e04 + +#define REG_A6XX_RB_CCU_CNTL 0x00008e07 + +#define REG_A6XX_VPC_UNKNOWN_9101 0x00009101 + +#define REG_A6XX_VPC_GS_SIV_CNTL 0x00009104 + +#define REG_A6XX_VPC_UNKNOWN_9107 0x00009107 + +#define REG_A6XX_VPC_UNKNOWN_9108 0x00009108 + +static inline uint32_t REG_A6XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00009200 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00009200 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00009208 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00009208 + 0x1*i0; } + +#define REG_A6XX_VPC_UNKNOWN_9210 0x00009210 + +#define REG_A6XX_VPC_UNKNOWN_9211 0x00009211 + +static inline uint32_t REG_A6XX_VPC_VAR(uint32_t i0) { return 0x00009212 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x00009212 + 0x1*i0; } + +#define REG_A6XX_VPC_SO_CNTL 0x00009216 +#define A6XX_VPC_SO_CNTL_ENABLE 0x00010000 + +#define REG_A6XX_VPC_SO_PROG 0x00009217 +#define A6XX_VPC_SO_PROG_A_BUF__MASK 0x00000003 +#define A6XX_VPC_SO_PROG_A_BUF__SHIFT 0 +static inline uint32_t A6XX_VPC_SO_PROG_A_BUF(uint32_t val) +{ + return ((val) << A6XX_VPC_SO_PROG_A_BUF__SHIFT) & A6XX_VPC_SO_PROG_A_BUF__MASK; +} +#define A6XX_VPC_SO_PROG_A_OFF__MASK 0x000007fc +#define A6XX_VPC_SO_PROG_A_OFF__SHIFT 2 +static inline uint32_t A6XX_VPC_SO_PROG_A_OFF(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A6XX_VPC_SO_PROG_A_OFF__SHIFT) & A6XX_VPC_SO_PROG_A_OFF__MASK; +} +#define A6XX_VPC_SO_PROG_A_EN 0x00000800 +#define A6XX_VPC_SO_PROG_B_BUF__MASK 0x00003000 +#define A6XX_VPC_SO_PROG_B_BUF__SHIFT 12 +static inline uint32_t A6XX_VPC_SO_PROG_B_BUF(uint32_t val) +{ + return ((val) << A6XX_VPC_SO_PROG_B_BUF__SHIFT) & A6XX_VPC_SO_PROG_B_BUF__MASK; +} +#define A6XX_VPC_SO_PROG_B_OFF__MASK 0x007fc000 +#define A6XX_VPC_SO_PROG_B_OFF__SHIFT 14 +static inline uint32_t A6XX_VPC_SO_PROG_B_OFF(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A6XX_VPC_SO_PROG_B_OFF__SHIFT) & A6XX_VPC_SO_PROG_B_OFF__MASK; +} +#define A6XX_VPC_SO_PROG_B_EN 0x00800000 + +static inline uint32_t REG_A6XX_VPC_SO(uint32_t i0) { return 0x0000921a + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000921a + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000921b + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000921c + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000921d + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000921e + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000921f + 0x7*i0; } + +static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x00009220 + 0x7*i0; } + +#define REG_A6XX_VPC_UNKNOWN_9236 0x00009236 + +#define REG_A6XX_VPC_UNKNOWN_9300 0x00009300 + +#define REG_A6XX_VPC_PACK 0x00009301 +#define A6XX_VPC_PACK_STRIDE_IN_VPC__MASK 0x000000ff +#define A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A6XX_VPC_PACK_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT) & A6XX_VPC_PACK_STRIDE_IN_VPC__MASK; +} +#define A6XX_VPC_PACK_NUMNONPOSVAR__MASK 0x0000ff00 +#define A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT 8 +static inline uint32_t A6XX_VPC_PACK_NUMNONPOSVAR(uint32_t val) +{ + return ((val) << A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A6XX_VPC_PACK_NUMNONPOSVAR__MASK; +} +#define A6XX_VPC_PACK_PSIZELOC__MASK 0x00ff0000 +#define A6XX_VPC_PACK_PSIZELOC__SHIFT 16 +static inline uint32_t A6XX_VPC_PACK_PSIZELOC(uint32_t val) +{ + return ((val) << A6XX_VPC_PACK_PSIZELOC__SHIFT) & A6XX_VPC_PACK_PSIZELOC__MASK; +} + +#define REG_A6XX_VPC_CNTL_0 0x00009304 +#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK 0x000000ff +#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT 0 +static inline uint32_t A6XX_VPC_CNTL_0_NUMNONPOSVAR(uint32_t val) +{ + return ((val) << A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT) & A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK; +} +#define A6XX_VPC_CNTL_0_VARYING 0x00010000 + +#define REG_A6XX_VPC_SO_BUF_CNTL 0x00009305 +#define A6XX_VPC_SO_BUF_CNTL_BUF0 0x00000001 +#define A6XX_VPC_SO_BUF_CNTL_BUF1 0x00000008 +#define A6XX_VPC_SO_BUF_CNTL_BUF2 0x00000040 +#define A6XX_VPC_SO_BUF_CNTL_BUF3 0x00000200 +#define A6XX_VPC_SO_BUF_CNTL_ENABLE 0x00008000 + +#define REG_A6XX_VPC_SO_OVERRIDE 0x00009306 +#define A6XX_VPC_SO_OVERRIDE_SO_DISABLE 0x00000001 + +#define REG_A6XX_VPC_UNKNOWN_9600 0x00009600 + +#define REG_A6XX_VPC_UNKNOWN_9602 0x00009602 + +#define REG_A6XX_PC_UNKNOWN_9801 0x00009801 + +#define REG_A6XX_PC_RESTART_INDEX 0x00009803 + +#define REG_A6XX_PC_MODE_CNTL 0x00009804 + +#define REG_A6XX_PC_UNKNOWN_9805 0x00009805 + +#define REG_A6XX_PC_UNKNOWN_9806 0x00009806 + +#define REG_A6XX_PC_UNKNOWN_9980 0x00009980 + +#define REG_A6XX_PC_UNKNOWN_9981 0x00009981 + +#define REG_A6XX_PC_UNKNOWN_9990 0x00009990 + +#define REG_A6XX_PC_PRIMITIVE_CNTL_0 0x00009b00 +#define A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART 0x00000001 +#define A6XX_PC_PRIMITIVE_CNTL_0_PROVOKING_VTX_LAST 0x00000002 + +#define REG_A6XX_PC_PRIMITIVE_CNTL_1 0x00009b01 +#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK 0x0000007f +#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT) & A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK; +} +#define A6XX_PC_PRIMITIVE_CNTL_1_PSIZE 0x00000100 + +#define REG_A6XX_PC_UNKNOWN_9B06 0x00009b06 + +#define REG_A6XX_PC_UNKNOWN_9B07 0x00009b07 + +#define REG_A6XX_PC_TESSFACTOR_ADDR_LO 0x00009e08 + +#define REG_A6XX_PC_TESSFACTOR_ADDR_HI 0x00009e09 + +#define REG_A6XX_PC_UNKNOWN_9E72 0x00009e72 + +#define REG_A6XX_VFD_CONTROL_0 0x0000a000 +#define A6XX_VFD_CONTROL_0_VTXCNT__MASK 0x0000003f +#define A6XX_VFD_CONTROL_0_VTXCNT__SHIFT 0 +static inline uint32_t A6XX_VFD_CONTROL_0_VTXCNT(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A6XX_VFD_CONTROL_0_VTXCNT__MASK; +} + +#define REG_A6XX_VFD_CONTROL_1 0x0000a001 +#define A6XX_VFD_CONTROL_1_REGID4VTX__MASK 0x000000ff +#define A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT 0 +static inline uint32_t A6XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A6XX_VFD_CONTROL_1_REGID4VTX__MASK; +} +#define A6XX_VFD_CONTROL_1_REGID4INST__MASK 0x0000ff00 +#define A6XX_VFD_CONTROL_1_REGID4INST__SHIFT 8 +static inline uint32_t A6XX_VFD_CONTROL_1_REGID4INST(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A6XX_VFD_CONTROL_1_REGID4INST__MASK; +} +#define A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK 0x00ff0000 +#define A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT 16 +static inline uint32_t A6XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK; +} + +#define REG_A6XX_VFD_CONTROL_2 0x0000a002 +#define A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK 0x000000ff +#define A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT 0 +static inline uint32_t A6XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK; +} + +#define REG_A6XX_VFD_CONTROL_3 0x0000a003 +#define A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK 0x0000ff00 +#define A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT 8 +static inline uint32_t A6XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK; +} +#define A6XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000 +#define A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16 +static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSX__MASK; +} +#define A6XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000 +#define A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24 +static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val) +{ + return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSY__MASK; +} + +#define REG_A6XX_VFD_CONTROL_4 0x0000a004 + +#define REG_A6XX_VFD_CONTROL_5 0x0000a005 + +#define REG_A6XX_VFD_CONTROL_6 0x0000a006 + +#define REG_A6XX_VFD_MODE_CNTL 0x0000a007 +#define A6XX_VFD_MODE_CNTL_BINNING_PASS 0x00000001 + +#define REG_A6XX_VFD_UNKNOWN_A008 0x0000a008 + +#define REG_A6XX_VFD_UNKNOWN_A009 0x0000a009 + +#define REG_A6XX_VFD_INDEX_OFFSET 0x0000a00e + +#define REG_A6XX_VFD_INSTANCE_START_OFFSET 0x0000a00f + +static inline uint32_t REG_A6XX_VFD_FETCH(uint32_t i0) { return 0x0000a010 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000a010 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000a011 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000a012 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000a013 + 0x4*i0; } + +static inline uint32_t REG_A6XX_VFD_DECODE(uint32_t i0) { return 0x0000a090 + 0x2*i0; } + +static inline uint32_t REG_A6XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000a090 + 0x2*i0; } +#define A6XX_VFD_DECODE_INSTR_IDX__MASK 0x0000001f +#define A6XX_VFD_DECODE_INSTR_IDX__SHIFT 0 +static inline uint32_t A6XX_VFD_DECODE_INSTR_IDX(uint32_t val) +{ + return ((val) << A6XX_VFD_DECODE_INSTR_IDX__SHIFT) & A6XX_VFD_DECODE_INSTR_IDX__MASK; +} +#define A6XX_VFD_DECODE_INSTR_INSTANCED 0x00020000 +#define A6XX_VFD_DECODE_INSTR_FORMAT__MASK 0x0ff00000 +#define A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20 +static inline uint32_t A6XX_VFD_DECODE_INSTR_FORMAT(enum a6xx_vtx_fmt val) +{ + return ((val) << A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A6XX_VFD_DECODE_INSTR_FORMAT__MASK; +} +#define A6XX_VFD_DECODE_INSTR_SWAP__MASK 0x30000000 +#define A6XX_VFD_DECODE_INSTR_SWAP__SHIFT 28 +static inline uint32_t A6XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A6XX_VFD_DECODE_INSTR_SWAP__MASK; +} +#define A6XX_VFD_DECODE_INSTR_UNK30 0x40000000 +#define A6XX_VFD_DECODE_INSTR_FLOAT 0x80000000 + +static inline uint32_t REG_A6XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000a091 + 0x2*i0; } + +static inline uint32_t REG_A6XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; } + +static inline uint32_t REG_A6XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; } +#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK 0x0000000f +#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT 0 +static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val) +{ + return ((val) << A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK; +} +#define A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK 0x00000ff0 +#define A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT 4 +static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val) +{ + return ((val) << A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK; +} + +#define REG_A6XX_SP_UNKNOWN_A0F8 0x0000a0f8 + +#define REG_A6XX_SP_PRIMITIVE_CNTL 0x0000a802 +#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK 0x0000001f +#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT 0 +static inline uint32_t A6XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val) +{ + return ((val) << A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK; +} + +static inline uint32_t REG_A6XX_SP_VS_OUT(uint32_t i0) { return 0x0000a803 + 0x1*i0; } + +static inline uint32_t REG_A6XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000a803 + 0x1*i0; } +#define A6XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff +#define A6XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 +static inline uint32_t A6XX_SP_VS_OUT_REG_A_REGID(uint32_t val) +{ + return ((val) << A6XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_A_REGID__MASK; +} +#define A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00000f00 +#define A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 8 +static inline uint32_t A6XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) +{ + return ((val) << A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK; +} +#define A6XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 +#define A6XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 +static inline uint32_t A6XX_SP_VS_OUT_REG_B_REGID(uint32_t val) +{ + return ((val) << A6XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_B_REGID__MASK; +} +#define A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x0f000000 +#define A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 24 +static inline uint32_t A6XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) +{ + return ((val) << A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK; +} + +static inline uint32_t REG_A6XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000a813 + 0x1*i0; } + +static inline uint32_t REG_A6XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000a813 + 0x1*i0; } +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 +static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) +{ + return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; +} +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 +static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) +{ + return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; +} +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 +static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) +{ + return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; +} +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 +#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 +static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) +{ + return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; +} + +#define REG_A6XX_SP_VS_CTRL_REG0 0x0000a800 +#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_VS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_VS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_UNKNOWN_A81B 0x0000a81b + +#define REG_A6XX_SP_VS_OBJ_START_LO 0x0000a81c + +#define REG_A6XX_SP_VS_OBJ_START_HI 0x0000a81d + +#define REG_A6XX_SP_VS_TEX_COUNT 0x0000a822 + +#define REG_A6XX_SP_VS_CONFIG 0x0000a823 +#define A6XX_SP_VS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_VS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_VS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_VS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CONFIG_NTEX__SHIFT) & A6XX_SP_VS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_VS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_VS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_VS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_VS_CONFIG_NSAMP__SHIFT) & A6XX_SP_VS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_VS_INSTRLEN 0x0000a824 + +#define REG_A6XX_SP_HS_CTRL_REG0 0x0000a830 +#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_HS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_HS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_HS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_HS_UNKNOWN_A831 0x0000a831 + +#define REG_A6XX_SP_HS_OBJ_START_LO 0x0000a834 + +#define REG_A6XX_SP_HS_OBJ_START_HI 0x0000a835 + +#define REG_A6XX_SP_HS_TEX_COUNT 0x0000a83a + +#define REG_A6XX_SP_HS_CONFIG 0x0000a83b +#define A6XX_SP_HS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_HS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_HS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_HS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CONFIG_NTEX__SHIFT) & A6XX_SP_HS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_HS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_HS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_HS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_HS_CONFIG_NSAMP__SHIFT) & A6XX_SP_HS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_HS_INSTRLEN 0x0000a83c + +#define REG_A6XX_SP_DS_CTRL_REG0 0x0000a840 +#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_DS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_DS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_DS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_DS_OBJ_START_LO 0x0000a85c + +#define REG_A6XX_SP_DS_OBJ_START_HI 0x0000a85d + +#define REG_A6XX_SP_DS_TEX_COUNT 0x0000a862 + +#define REG_A6XX_SP_DS_CONFIG 0x0000a863 +#define A6XX_SP_DS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_DS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_DS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_DS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CONFIG_NTEX__SHIFT) & A6XX_SP_DS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_DS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_DS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_DS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_DS_CONFIG_NSAMP__SHIFT) & A6XX_SP_DS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_DS_INSTRLEN 0x0000a864 + +#define REG_A6XX_SP_GS_CTRL_REG0 0x0000a870 +#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_GS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_GS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_GS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_GS_UNKNOWN_A871 0x0000a871 + +#define REG_A6XX_SP_GS_OBJ_START_LO 0x0000a88d + +#define REG_A6XX_SP_GS_OBJ_START_HI 0x0000a88e + +#define REG_A6XX_SP_GS_TEX_COUNT 0x0000a893 + +#define REG_A6XX_SP_GS_CONFIG 0x0000a894 +#define A6XX_SP_GS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_GS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_GS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_GS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CONFIG_NTEX__SHIFT) & A6XX_SP_GS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_GS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_GS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_GS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_GS_CONFIG_NSAMP__SHIFT) & A6XX_SP_GS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_GS_INSTRLEN 0x0000a895 + +#define REG_A6XX_SP_VS_TEX_SAMP_LO 0x0000a8a0 + +#define REG_A6XX_SP_VS_TEX_SAMP_HI 0x0000a8a1 + +#define REG_A6XX_SP_HS_TEX_SAMP_LO 0x0000a8a2 + +#define REG_A6XX_SP_HS_TEX_SAMP_HI 0x0000a8a3 + +#define REG_A6XX_SP_DS_TEX_SAMP_LO 0x0000a8a4 + +#define REG_A6XX_SP_DS_TEX_SAMP_HI 0x0000a8a5 + +#define REG_A6XX_SP_GS_TEX_SAMP_LO 0x0000a8a6 + +#define REG_A6XX_SP_GS_TEX_SAMP_HI 0x0000a8a7 + +#define REG_A6XX_SP_VS_TEX_CONST_LO 0x0000a8a8 + +#define REG_A6XX_SP_VS_TEX_CONST_HI 0x0000a8a9 + +#define REG_A6XX_SP_HS_TEX_CONST_LO 0x0000a8aa + +#define REG_A6XX_SP_HS_TEX_CONST_HI 0x0000a8ab + +#define REG_A6XX_SP_DS_TEX_CONST_LO 0x0000a8ac + +#define REG_A6XX_SP_DS_TEX_CONST_HI 0x0000a8ad + +#define REG_A6XX_SP_GS_TEX_CONST_LO 0x0000a8ae + +#define REG_A6XX_SP_GS_TEX_CONST_HI 0x0000a8af + +#define REG_A6XX_SP_FS_CTRL_REG0 0x0000a980 +#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_FS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_FS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_UNKNOWN_A982 0x0000a982 + +#define REG_A6XX_SP_FS_OBJ_START_LO 0x0000a983 + +#define REG_A6XX_SP_FS_OBJ_START_HI 0x0000a984 + +#define REG_A6XX_SP_BLEND_CNTL 0x0000a989 +#define A6XX_SP_BLEND_CNTL_ENABLED 0x00000001 +#define A6XX_SP_BLEND_CNTL_UNK8 0x00000100 +#define A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 + +#define REG_A6XX_SP_SRGB_CNTL 0x0000a98a +#define A6XX_SP_SRGB_CNTL_SRGB_MRT0 0x00000001 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT1 0x00000002 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT2 0x00000004 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT3 0x00000008 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT4 0x00000010 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT5 0x00000020 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT6 0x00000040 +#define A6XX_SP_SRGB_CNTL_SRGB_MRT7 0x00000080 + +#define REG_A6XX_SP_FS_RENDER_COMPONENTS 0x0000a98b +#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK 0x0000000f +#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT 0 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT0(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK 0x000000f0 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT 4 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT1(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK 0x00000f00 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT 8 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT2(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK 0x0000f000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT 12 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT3(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK 0x000f0000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT 16 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT4(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK 0x00f00000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT 20 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT5(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK 0x0f000000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT 24 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT6(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK; +} +#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK 0xf0000000 +#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT 28 +static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT7(uint32_t val) +{ + return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK; +} + +#define REG_A6XX_SP_FS_OUTPUT_CNTL0 0x0000a98c +#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK 0x0000ff00 +#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT 8 +static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(uint32_t val) +{ + return ((val) << A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK; +} + +#define REG_A6XX_SP_FS_OUTPUT_CNTL1 0x0000a98d +#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK 0x0000000f +#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT 0 +static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL1_MRT(uint32_t val) +{ + return ((val) << A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK; +} + +static inline uint32_t REG_A6XX_SP_FS_MRT(uint32_t i0) { return 0x0000a996 + 0x1*i0; } + +static inline uint32_t REG_A6XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000a996 + 0x1*i0; } +#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK; +} +#define A6XX_SP_FS_MRT_REG_COLOR_SINT 0x00000100 +#define A6XX_SP_FS_MRT_REG_COLOR_UINT 0x00000200 + +#define REG_A6XX_SP_UNKNOWN_A99E 0x0000a99e + +#define REG_A6XX_SP_FS_TEX_COUNT 0x0000a9a7 + +#define REG_A6XX_SP_UNKNOWN_A9A8 0x0000a9a8 + +#define REG_A6XX_SP_FS_TEX_SAMP_LO 0x0000a9e0 + +#define REG_A6XX_SP_FS_TEX_SAMP_HI 0x0000a9e1 + +#define REG_A6XX_SP_CS_TEX_SAMP_LO 0x0000a9e2 + +#define REG_A6XX_SP_CS_TEX_SAMP_HI 0x0000a9e3 + +#define REG_A6XX_SP_FS_TEX_CONST_LO 0x0000a9e4 + +#define REG_A6XX_SP_FS_TEX_CONST_HI 0x0000a9e5 + +#define REG_A6XX_SP_CS_TEX_CONST_LO 0x0000a9e6 + +#define REG_A6XX_SP_CS_TEX_CONST_HI 0x0000a9e7 + +static inline uint32_t REG_A6XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000a98e + 0x1*i0; } + +static inline uint32_t REG_A6XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000a98e + 0x1*i0; } +#define A6XX_SP_FS_OUTPUT_REG_REGID__MASK 0x000000ff +#define A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT 0 +static inline uint32_t A6XX_SP_FS_OUTPUT_REG_REGID(uint32_t val) +{ + return ((val) << A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_REG_REGID__MASK; +} +#define A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION 0x00000100 + +#define REG_A6XX_SP_CS_CTRL_REG0 0x0000a9b0 +#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e +#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 +static inline uint32_t A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 +#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 +static inline uint32_t A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 +#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT 14 +static inline uint32_t A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) +{ + return ((val) << A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK; +} +#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A6XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK; +} +#define A6XX_SP_CS_CTRL_REG0_VARYING 0x00400000 +#define A6XX_SP_CS_CTRL_REG0_PIXLODENABLE 0x04000000 +#define A6XX_SP_CS_CTRL_REG0_MERGEDREGS 0x80000000 + +#define REG_A6XX_SP_CS_OBJ_START_LO 0x0000a9b4 + +#define REG_A6XX_SP_CS_OBJ_START_HI 0x0000a9b5 + +#define REG_A6XX_SP_CS_INSTRLEN 0x0000a9bc + +#define REG_A6XX_SP_UNKNOWN_AB00 0x0000ab00 + +#define REG_A6XX_SP_FS_CONFIG 0x0000ab04 +#define A6XX_SP_FS_CONFIG_ENABLED 0x00000100 +#define A6XX_SP_FS_CONFIG_NTEX__MASK 0x0001fe00 +#define A6XX_SP_FS_CONFIG_NTEX__SHIFT 9 +static inline uint32_t A6XX_SP_FS_CONFIG_NTEX(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CONFIG_NTEX__SHIFT) & A6XX_SP_FS_CONFIG_NTEX__MASK; +} +#define A6XX_SP_FS_CONFIG_NSAMP__MASK 0x01fe0000 +#define A6XX_SP_FS_CONFIG_NSAMP__SHIFT 17 +static inline uint32_t A6XX_SP_FS_CONFIG_NSAMP(uint32_t val) +{ + return ((val) << A6XX_SP_FS_CONFIG_NSAMP__SHIFT) & A6XX_SP_FS_CONFIG_NSAMP__MASK; +} + +#define REG_A6XX_SP_FS_INSTRLEN 0x0000ab05 + +#define REG_A6XX_SP_UNKNOWN_AB20 0x0000ab20 + +#define REG_A6XX_SP_2D_SRC_FORMAT 0x0000acc0 +#define A6XX_SP_2D_SRC_FORMAT_NORM 0x00000001 +#define A6XX_SP_2D_SRC_FORMAT_SINT 0x00000002 +#define A6XX_SP_2D_SRC_FORMAT_UINT 0x00000004 +#define A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT__MASK 0x000007f8 +#define A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT__SHIFT 3 +static inline uint32_t A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT__SHIFT) & A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT__MASK; +} + +#define REG_A6XX_SP_UNKNOWN_AE00 0x0000ae00 + +#define REG_A6XX_SP_UNKNOWN_AE03 0x0000ae03 + +#define REG_A6XX_SP_UNKNOWN_AE04 0x0000ae04 + +#define REG_A6XX_SP_UNKNOWN_AE0F 0x0000ae0f + +#define REG_A6XX_SP_UNKNOWN_B182 0x0000b182 + +#define REG_A6XX_SP_UNKNOWN_B183 0x0000b183 + +#define REG_A6XX_SP_TP_RAS_MSAA_CNTL 0x0000b300 +#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK; +} + +#define REG_A6XX_SP_TP_DEST_MSAA_CNTL 0x0000b301 +#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 +#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 +static inline uint32_t A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK; +} +#define A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 + +#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO 0x0000b302 + +#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_HI 0x0000b303 + +#define REG_A6XX_SP_TP_UNKNOWN_B304 0x0000b304 + +#define REG_A6XX_SP_TP_UNKNOWN_B309 0x0000b309 + +#define REG_A6XX_SP_PS_2D_SRC_INFO 0x0000b4c0 +#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff +#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) +{ + return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK; +} +#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 +#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT 8 +static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK; +} +#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK; +} +#define A6XX_SP_PS_2D_SRC_INFO_FLAGS 0x00001000 +#define A6XX_SP_PS_2D_SRC_INFO_FILTER 0x00010000 + +#define REG_A6XX_SP_PS_2D_SRC_SIZE 0x0000b4c1 +#define A6XX_SP_PS_2D_SRC_SIZE_WIDTH__MASK 0x00007fff +#define A6XX_SP_PS_2D_SRC_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A6XX_SP_PS_2D_SRC_SIZE_WIDTH(uint32_t val) +{ + return ((val) << A6XX_SP_PS_2D_SRC_SIZE_WIDTH__SHIFT) & A6XX_SP_PS_2D_SRC_SIZE_WIDTH__MASK; +} +#define A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__MASK 0x3fff8000 +#define A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__SHIFT 15 +static inline uint32_t A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(uint32_t val) +{ + return ((val) << A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__SHIFT) & A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__MASK; +} + +#define REG_A6XX_SP_PS_2D_SRC_LO 0x0000b4c2 + +#define REG_A6XX_SP_PS_2D_SRC_HI 0x0000b4c3 + +#define REG_A6XX_SP_PS_2D_SRC_PITCH 0x0000b4c4 +#define A6XX_SP_PS_2D_SRC_PITCH_PITCH__MASK 0x01fffe00 +#define A6XX_SP_PS_2D_SRC_PITCH_PITCH__SHIFT 9 +static inline uint32_t A6XX_SP_PS_2D_SRC_PITCH_PITCH(uint32_t val) +{ + assert(!(val & 0x3f)); + return ((val >> 6) << A6XX_SP_PS_2D_SRC_PITCH_PITCH__SHIFT) & A6XX_SP_PS_2D_SRC_PITCH_PITCH__MASK; +} + +#define REG_A6XX_SP_PS_2D_SRC_FLAGS_LO 0x0000b4ca + +#define REG_A6XX_SP_PS_2D_SRC_FLAGS_HI 0x0000b4cb + +#define REG_A6XX_SP_UNKNOWN_B600 0x0000b600 + +#define REG_A6XX_SP_UNKNOWN_B605 0x0000b605 + +#define REG_A6XX_HLSQ_VS_CNTL 0x0000b800 +#define A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_VS_CNTL_CONSTLEN(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_HS_CNTL 0x0000b801 +#define A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_HS_CNTL_CONSTLEN(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_DS_CNTL 0x0000b802 +#define A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_DS_CNTL_CONSTLEN(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_GS_CNTL 0x0000b803 +#define A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_GS_CNTL_CONSTLEN(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_UNKNOWN_B980 0x0000b980 + +#define REG_A6XX_HLSQ_CONTROL_1_REG 0x0000b982 + +#define REG_A6XX_HLSQ_CONTROL_2_REG 0x0000b983 +#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK 0x000000ff +#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; +} +#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK 0x0000ff00 +#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT 8 +static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK; +} +#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK 0x00ff0000 +#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT 16 +static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK; +} + +#define REG_A6XX_HLSQ_CONTROL_3_REG 0x0000b984 +#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK 0x000000ff +#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK; +} + +#define REG_A6XX_HLSQ_CONTROL_4_REG 0x0000b985 +#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK 0x00ff0000 +#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT 16 +static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK; +} +#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK 0xff000000 +#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT 24 +static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK; +} + +#define REG_A6XX_HLSQ_CONTROL_5_REG 0x0000b986 + +#define REG_A6XX_HLSQ_CS_NDRANGE_0 0x0000b990 +#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK 0x00000003 +#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK; +} +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT 2 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK; +} +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT 12 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK; +} +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 +#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_1 0x0000b991 +#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_2 0x0000b992 +#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_3 0x0000b993 +#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_4 0x0000b994 +#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_5 0x0000b995 +#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK; +} + +#define REG_A6XX_HLSQ_CS_NDRANGE_6 0x0000b996 +#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK 0xffffffff +#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK; +} + +#define REG_A6XX_HLSQ_CS_CNTL_0 0x0000b997 +#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK 0x000000ff +#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT 0 +static inline uint32_t A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK; +} +#define A6XX_HLSQ_CS_CNTL_0_UNK0__MASK 0x0000ff00 +#define A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT 8 +static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK0__MASK; +} +#define A6XX_HLSQ_CS_CNTL_0_UNK1__MASK 0x00ff0000 +#define A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT 16 +static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK1__MASK; +} +#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK 0xff000000 +#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT 24 +static inline uint32_t A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val) +{ + return ((val) << A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK; +} + +#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_X 0x0000b999 + +#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Y 0x0000b99a + +#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Z 0x0000b99b + +#define REG_A6XX_HLSQ_UPDATE_CNTL 0x0000bb08 + +#define REG_A6XX_HLSQ_FS_CNTL 0x0000bb10 +#define A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK 0x000000ff +#define A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT 0 +static inline uint32_t A6XX_HLSQ_FS_CNTL_CONSTLEN(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK; +} + +#define REG_A6XX_HLSQ_UNKNOWN_BB11 0x0000bb11 + +#define REG_A6XX_HLSQ_UNKNOWN_BE00 0x0000be00 + +#define REG_A6XX_HLSQ_UNKNOWN_BE01 0x0000be01 + +#define REG_A6XX_HLSQ_UNKNOWN_BE04 0x0000be04 + +#define REG_A6XX_TEX_SAMP_0 0x00000000 +#define A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 +#define A6XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 +#define A6XX_TEX_SAMP_0_XY_MAG__SHIFT 1 +static inline uint32_t A6XX_TEX_SAMP_0_XY_MAG(enum a6xx_tex_filter val) +{ + return ((val) << A6XX_TEX_SAMP_0_XY_MAG__SHIFT) & A6XX_TEX_SAMP_0_XY_MAG__MASK; +} +#define A6XX_TEX_SAMP_0_XY_MIN__MASK 0x00000018 +#define A6XX_TEX_SAMP_0_XY_MIN__SHIFT 3 +static inline uint32_t A6XX_TEX_SAMP_0_XY_MIN(enum a6xx_tex_filter val) +{ + return ((val) << A6XX_TEX_SAMP_0_XY_MIN__SHIFT) & A6XX_TEX_SAMP_0_XY_MIN__MASK; +} +#define A6XX_TEX_SAMP_0_WRAP_S__MASK 0x000000e0 +#define A6XX_TEX_SAMP_0_WRAP_S__SHIFT 5 +static inline uint32_t A6XX_TEX_SAMP_0_WRAP_S(enum a6xx_tex_clamp val) +{ + return ((val) << A6XX_TEX_SAMP_0_WRAP_S__SHIFT) & A6XX_TEX_SAMP_0_WRAP_S__MASK; +} +#define A6XX_TEX_SAMP_0_WRAP_T__MASK 0x00000700 +#define A6XX_TEX_SAMP_0_WRAP_T__SHIFT 8 +static inline uint32_t A6XX_TEX_SAMP_0_WRAP_T(enum a6xx_tex_clamp val) +{ + return ((val) << A6XX_TEX_SAMP_0_WRAP_T__SHIFT) & A6XX_TEX_SAMP_0_WRAP_T__MASK; +} +#define A6XX_TEX_SAMP_0_WRAP_R__MASK 0x00003800 +#define A6XX_TEX_SAMP_0_WRAP_R__SHIFT 11 +static inline uint32_t A6XX_TEX_SAMP_0_WRAP_R(enum a6xx_tex_clamp val) +{ + return ((val) << A6XX_TEX_SAMP_0_WRAP_R__SHIFT) & A6XX_TEX_SAMP_0_WRAP_R__MASK; +} +#define A6XX_TEX_SAMP_0_ANISO__MASK 0x0001c000 +#define A6XX_TEX_SAMP_0_ANISO__SHIFT 14 +static inline uint32_t A6XX_TEX_SAMP_0_ANISO(enum a6xx_tex_aniso val) +{ + return ((val) << A6XX_TEX_SAMP_0_ANISO__SHIFT) & A6XX_TEX_SAMP_0_ANISO__MASK; +} +#define A6XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 +#define A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 +static inline uint32_t A6XX_TEX_SAMP_0_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 256.0))) << A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A6XX_TEX_SAMP_0_LOD_BIAS__MASK; +} + +#define REG_A6XX_TEX_SAMP_1 0x00000001 +#define A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e +#define A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT 1 +static inline uint32_t A6XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val) +{ + return ((val) << A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK; +} +#define A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 +#define A6XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 +#define A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 +#define A6XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 +#define A6XX_TEX_SAMP_1_MAX_LOD__SHIFT 8 +static inline uint32_t A6XX_TEX_SAMP_1_MAX_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A6XX_TEX_SAMP_1_MAX_LOD__MASK; +} +#define A6XX_TEX_SAMP_1_MIN_LOD__MASK 0xfff00000 +#define A6XX_TEX_SAMP_1_MIN_LOD__SHIFT 20 +static inline uint32_t A6XX_TEX_SAMP_1_MIN_LOD(float val) +{ + return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A6XX_TEX_SAMP_1_MIN_LOD__MASK; +} + +#define REG_A6XX_TEX_SAMP_2 0x00000002 +#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK 0xfffffff0 +#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT 4 +static inline uint32_t A6XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val) +{ + return ((val) << A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK; +} + +#define REG_A6XX_TEX_SAMP_3 0x00000003 + +#define REG_A6XX_TEX_CONST_0 0x00000000 +#define A6XX_TEX_CONST_0_TILE_MODE__MASK 0x00000003 +#define A6XX_TEX_CONST_0_TILE_MODE__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_0_TILE_MODE(enum a6xx_tile_mode val) +{ + return ((val) << A6XX_TEX_CONST_0_TILE_MODE__SHIFT) & A6XX_TEX_CONST_0_TILE_MODE__MASK; +} +#define A6XX_TEX_CONST_0_SRGB 0x00000004 +#define A6XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 +#define A6XX_TEX_CONST_0_SWIZ_X__SHIFT 4 +static inline uint32_t A6XX_TEX_CONST_0_SWIZ_X(enum a6xx_tex_swiz val) +{ + return ((val) << A6XX_TEX_CONST_0_SWIZ_X__SHIFT) & A6XX_TEX_CONST_0_SWIZ_X__MASK; +} +#define A6XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 +#define A6XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 +static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Y(enum a6xx_tex_swiz val) +{ + return ((val) << A6XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Y__MASK; +} +#define A6XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 +#define A6XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 +static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Z(enum a6xx_tex_swiz val) +{ + return ((val) << A6XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Z__MASK; +} +#define A6XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 +#define A6XX_TEX_CONST_0_SWIZ_W__SHIFT 13 +static inline uint32_t A6XX_TEX_CONST_0_SWIZ_W(enum a6xx_tex_swiz val) +{ + return ((val) << A6XX_TEX_CONST_0_SWIZ_W__SHIFT) & A6XX_TEX_CONST_0_SWIZ_W__MASK; +} +#define A6XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 +#define A6XX_TEX_CONST_0_MIPLVLS__SHIFT 16 +static inline uint32_t A6XX_TEX_CONST_0_MIPLVLS(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_0_MIPLVLS__SHIFT) & A6XX_TEX_CONST_0_MIPLVLS__MASK; +} +#define A6XX_TEX_CONST_0_SAMPLES__MASK 0x00300000 +#define A6XX_TEX_CONST_0_SAMPLES__SHIFT 20 +static inline uint32_t A6XX_TEX_CONST_0_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A6XX_TEX_CONST_0_SAMPLES__SHIFT) & A6XX_TEX_CONST_0_SAMPLES__MASK; +} +#define A6XX_TEX_CONST_0_FMT__MASK 0x3fc00000 +#define A6XX_TEX_CONST_0_FMT__SHIFT 22 +static inline uint32_t A6XX_TEX_CONST_0_FMT(enum a6xx_tex_fmt val) +{ + return ((val) << A6XX_TEX_CONST_0_FMT__SHIFT) & A6XX_TEX_CONST_0_FMT__MASK; +} +#define A6XX_TEX_CONST_0_SWAP__MASK 0xc0000000 +#define A6XX_TEX_CONST_0_SWAP__SHIFT 30 +static inline uint32_t A6XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A6XX_TEX_CONST_0_SWAP__SHIFT) & A6XX_TEX_CONST_0_SWAP__MASK; +} + +#define REG_A6XX_TEX_CONST_1 0x00000001 +#define A6XX_TEX_CONST_1_WIDTH__MASK 0x00007fff +#define A6XX_TEX_CONST_1_WIDTH__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_1_WIDTH(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_1_WIDTH__SHIFT) & A6XX_TEX_CONST_1_WIDTH__MASK; +} +#define A6XX_TEX_CONST_1_HEIGHT__MASK 0x3fff8000 +#define A6XX_TEX_CONST_1_HEIGHT__SHIFT 15 +static inline uint32_t A6XX_TEX_CONST_1_HEIGHT(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_1_HEIGHT__SHIFT) & A6XX_TEX_CONST_1_HEIGHT__MASK; +} + +#define REG_A6XX_TEX_CONST_2 0x00000002 +#define A6XX_TEX_CONST_2_FETCHSIZE__MASK 0x0000000f +#define A6XX_TEX_CONST_2_FETCHSIZE__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_2_FETCHSIZE(enum a6xx_tex_fetchsize val) +{ + return ((val) << A6XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A6XX_TEX_CONST_2_FETCHSIZE__MASK; +} +#define A6XX_TEX_CONST_2_PITCH__MASK 0x1fffff80 +#define A6XX_TEX_CONST_2_PITCH__SHIFT 7 +static inline uint32_t A6XX_TEX_CONST_2_PITCH(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_2_PITCH__SHIFT) & A6XX_TEX_CONST_2_PITCH__MASK; +} +#define A6XX_TEX_CONST_2_TYPE__MASK 0x60000000 +#define A6XX_TEX_CONST_2_TYPE__SHIFT 29 +static inline uint32_t A6XX_TEX_CONST_2_TYPE(enum a6xx_tex_type val) +{ + return ((val) << A6XX_TEX_CONST_2_TYPE__SHIFT) & A6XX_TEX_CONST_2_TYPE__MASK; +} + +#define REG_A6XX_TEX_CONST_3 0x00000003 +#define A6XX_TEX_CONST_3_ARRAY_PITCH__MASK 0x00003fff +#define A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A6XX_TEX_CONST_3_ARRAY_PITCH__MASK; +} +#define A6XX_TEX_CONST_3_MIN_LAYERSZ__MASK 0x07800000 +#define A6XX_TEX_CONST_3_MIN_LAYERSZ__SHIFT 23 +static inline uint32_t A6XX_TEX_CONST_3_MIN_LAYERSZ(uint32_t val) +{ + assert(!(val & 0xfff)); + return ((val >> 12) << A6XX_TEX_CONST_3_MIN_LAYERSZ__SHIFT) & A6XX_TEX_CONST_3_MIN_LAYERSZ__MASK; +} +#define A6XX_TEX_CONST_3_FLAG 0x10000000 + +#define REG_A6XX_TEX_CONST_4 0x00000004 +#define A6XX_TEX_CONST_4_BASE_LO__MASK 0xffffffe0 +#define A6XX_TEX_CONST_4_BASE_LO__SHIFT 5 +static inline uint32_t A6XX_TEX_CONST_4_BASE_LO(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_TEX_CONST_4_BASE_LO__SHIFT) & A6XX_TEX_CONST_4_BASE_LO__MASK; +} + +#define REG_A6XX_TEX_CONST_5 0x00000005 +#define A6XX_TEX_CONST_5_BASE_HI__MASK 0x0001ffff +#define A6XX_TEX_CONST_5_BASE_HI__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_5_BASE_HI(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_5_BASE_HI__SHIFT) & A6XX_TEX_CONST_5_BASE_HI__MASK; +} +#define A6XX_TEX_CONST_5_DEPTH__MASK 0x3ffe0000 +#define A6XX_TEX_CONST_5_DEPTH__SHIFT 17 +static inline uint32_t A6XX_TEX_CONST_5_DEPTH(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_5_DEPTH__SHIFT) & A6XX_TEX_CONST_5_DEPTH__MASK; +} + +#define REG_A6XX_TEX_CONST_6 0x00000006 + +#define REG_A6XX_TEX_CONST_7 0x00000007 +#define A6XX_TEX_CONST_7_FLAG_LO__MASK 0xffffffe0 +#define A6XX_TEX_CONST_7_FLAG_LO__SHIFT 5 +static inline uint32_t A6XX_TEX_CONST_7_FLAG_LO(uint32_t val) +{ + assert(!(val & 0x1f)); + return ((val >> 5) << A6XX_TEX_CONST_7_FLAG_LO__SHIFT) & A6XX_TEX_CONST_7_FLAG_LO__MASK; +} + +#define REG_A6XX_TEX_CONST_8 0x00000008 +#define A6XX_TEX_CONST_8_FLAG_HI__MASK 0x0001ffff +#define A6XX_TEX_CONST_8_FLAG_HI__SHIFT 0 +static inline uint32_t A6XX_TEX_CONST_8_FLAG_HI(uint32_t val) +{ + return ((val) << A6XX_TEX_CONST_8_FLAG_HI__SHIFT) & A6XX_TEX_CONST_8_FLAG_HI__MASK; +} + +#define REG_A6XX_TEX_CONST_9 0x00000009 + +#define REG_A6XX_TEX_CONST_10 0x0000000a + +#define REG_A6XX_TEX_CONST_11 0x0000000b + +#define REG_A6XX_TEX_CONST_12 0x0000000c + +#define REG_A6XX_TEX_CONST_13 0x0000000d + +#define REG_A6XX_TEX_CONST_14 0x0000000e + +#define REG_A6XX_TEX_CONST_15 0x0000000f + +#define REG_A6XX_PDC_GPU_ENABLE_PDC 0x00001140 + +#define REG_A6XX_PDC_GPU_SEQ_START_ADDR 0x00001148 + +#define REG_A6XX_PDC_GPU_TCS0_CONTROL 0x00001540 + +#define REG_A6XX_PDC_GPU_TCS0_CMD_ENABLE_BANK 0x00001541 + +#define REG_A6XX_PDC_GPU_TCS0_CMD_WAIT_FOR_CMPL_BANK 0x00001542 + +#define REG_A6XX_PDC_GPU_TCS0_CMD0_MSGID 0x00001543 + +#define REG_A6XX_PDC_GPU_TCS0_CMD0_ADDR 0x00001544 + +#define REG_A6XX_PDC_GPU_TCS0_CMD0_DATA 0x00001545 + +#define REG_A6XX_PDC_GPU_TCS1_CONTROL 0x00001572 + +#define REG_A6XX_PDC_GPU_TCS1_CMD_ENABLE_BANK 0x00001573 + +#define REG_A6XX_PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK 0x00001574 + +#define REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID 0x00001575 + +#define REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR 0x00001576 + +#define REG_A6XX_PDC_GPU_TCS1_CMD0_DATA 0x00001577 + +#define REG_A6XX_PDC_GPU_TCS2_CONTROL 0x000015a4 + +#define REG_A6XX_PDC_GPU_TCS2_CMD_ENABLE_BANK 0x000015a5 + +#define REG_A6XX_PDC_GPU_TCS2_CMD_WAIT_FOR_CMPL_BANK 0x000015a6 + +#define REG_A6XX_PDC_GPU_TCS2_CMD0_MSGID 0x000015a7 + +#define REG_A6XX_PDC_GPU_TCS2_CMD0_ADDR 0x000015a8 + +#define REG_A6XX_PDC_GPU_TCS2_CMD0_DATA 0x000015a9 + +#define REG_A6XX_PDC_GPU_TCS3_CONTROL 0x000015d6 + +#define REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK 0x000015d7 + +#define REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK 0x000015d8 + +#define REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID 0x000015d9 + +#define REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR 0x000015da + +#define REG_A6XX_PDC_GPU_TCS3_CMD0_DATA 0x000015db + +#define REG_A6XX_PDC_GPU_SEQ_MEM_0 0x00000000 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A 0x00000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__MASK 0x000000ff +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT 0 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK 0x0000ff00 +#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT 8 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B 0x00000001 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C 0x00000002 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D 0x00000003 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT 0x00000004 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK 0x0000003f +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT 0 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK 0x00007000 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT 12 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK 0xf0000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT 28 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM 0x00000005 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK 0x0f000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT 24 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0 0x00000008 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1 0x00000009 + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2 0x0000000a + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3 0x0000000b + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0 0x0000000c + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1 0x0000000d + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2 0x0000000e + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3 0x0000000f + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0 0x00000010 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK 0x0000000f +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT 0 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK 0x000000f0 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT 4 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK 0x00000f00 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT 8 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK 0x0000f000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT 12 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK 0x000f0000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT 16 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK 0x00f00000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT 20 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK 0x0f000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT 24 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK 0xf0000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT 28 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1 0x00000011 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK 0x0000000f +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT 0 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK 0x000000f0 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT 4 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK 0x00000f00 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT 8 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK 0x0000f000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT 12 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK 0x000f0000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT 16 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK 0x00f00000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT 20 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK 0x0f000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT 24 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK; +} +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK 0xf0000000 +#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT 28 +static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val) +{ + return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK; +} + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x0000002f + +#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x00000030 + +#define REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0 0x00000001 + +#define REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1 0x00000002 + + +#endif /* A6XX_XML */ diff -Nru mesa-18.3.3/src/freedreno/registers/adreno_common.xml.h mesa-19.0.1/src/freedreno/registers/adreno_common.xml.h --- mesa-18.3.3/src/freedreno/registers/adreno_common.xml.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/registers/adreno_common.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,536 @@ +#ifndef ADRENO_COMMON_XML +#define ADRENO_COMMON_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 42463 bytes, from 2018-11-19 13:44:03) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14239 bytes, from 2018-12-05 15:25:53) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 43052 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 141895 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: +- Rob Clark (robclark) +- Ilia Mirkin (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum chip { + A2XX = 0, + A3XX = 0, + A4XX = 0, + A5XX = 0, + A6XX = 0, +}; + +enum adreno_pa_su_sc_draw { + PC_DRAW_POINTS = 0, + PC_DRAW_LINES = 1, + PC_DRAW_TRIANGLES = 2, +}; + +enum adreno_compare_func { + FUNC_NEVER = 0, + FUNC_LESS = 1, + FUNC_EQUAL = 2, + FUNC_LEQUAL = 3, + FUNC_GREATER = 4, + FUNC_NOTEQUAL = 5, + FUNC_GEQUAL = 6, + FUNC_ALWAYS = 7, +}; + +enum adreno_stencil_op { + STENCIL_KEEP = 0, + STENCIL_ZERO = 1, + STENCIL_REPLACE = 2, + STENCIL_INCR_CLAMP = 3, + STENCIL_DECR_CLAMP = 4, + STENCIL_INVERT = 5, + STENCIL_INCR_WRAP = 6, + STENCIL_DECR_WRAP = 7, +}; + +enum adreno_rb_blend_factor { + FACTOR_ZERO = 0, + FACTOR_ONE = 1, + FACTOR_SRC_COLOR = 4, + FACTOR_ONE_MINUS_SRC_COLOR = 5, + FACTOR_SRC_ALPHA = 6, + FACTOR_ONE_MINUS_SRC_ALPHA = 7, + FACTOR_DST_COLOR = 8, + FACTOR_ONE_MINUS_DST_COLOR = 9, + FACTOR_DST_ALPHA = 10, + FACTOR_ONE_MINUS_DST_ALPHA = 11, + FACTOR_CONSTANT_COLOR = 12, + FACTOR_ONE_MINUS_CONSTANT_COLOR = 13, + FACTOR_CONSTANT_ALPHA = 14, + FACTOR_ONE_MINUS_CONSTANT_ALPHA = 15, + FACTOR_SRC_ALPHA_SATURATE = 16, + FACTOR_SRC1_COLOR = 20, + FACTOR_ONE_MINUS_SRC1_COLOR = 21, + FACTOR_SRC1_ALPHA = 22, + FACTOR_ONE_MINUS_SRC1_ALPHA = 23, +}; + +enum adreno_rb_surface_endian { + ENDIAN_NONE = 0, + ENDIAN_8IN16 = 1, + ENDIAN_8IN32 = 2, + ENDIAN_16IN32 = 3, + ENDIAN_8IN64 = 4, + ENDIAN_8IN128 = 5, +}; + +enum adreno_rb_dither_mode { + DITHER_DISABLE = 0, + DITHER_ALWAYS = 1, + DITHER_IF_ALPHA_OFF = 2, +}; + +enum adreno_rb_depth_format { + DEPTHX_16 = 0, + DEPTHX_24_8 = 1, + DEPTHX_32 = 2, +}; + +enum adreno_rb_copy_control_mode { + RB_COPY_RESOLVE = 1, + RB_COPY_CLEAR = 2, + RB_COPY_DEPTH_STENCIL = 5, +}; + +enum a3xx_rop_code { + ROP_CLEAR = 0, + ROP_NOR = 1, + ROP_AND_INVERTED = 2, + ROP_COPY_INVERTED = 3, + ROP_AND_REVERSE = 4, + ROP_INVERT = 5, + ROP_XOR = 6, + ROP_NAND = 7, + ROP_AND = 8, + ROP_EQUIV = 9, + ROP_NOOP = 10, + ROP_OR_INVERTED = 11, + ROP_COPY = 12, + ROP_OR_REVERSE = 13, + ROP_OR = 14, + ROP_SET = 15, +}; + +enum a3xx_render_mode { + RB_RENDERING_PASS = 0, + RB_TILING_PASS = 1, + RB_RESOLVE_PASS = 2, + RB_COMPUTE_PASS = 3, +}; + +enum a3xx_msaa_samples { + MSAA_ONE = 0, + MSAA_TWO = 1, + MSAA_FOUR = 2, + MSAA_EIGHT = 3, +}; + +enum a3xx_threadmode { + MULTI = 0, + SINGLE = 1, +}; + +enum a3xx_instrbuffermode { + CACHE = 0, + BUFFER = 1, +}; + +enum a3xx_threadsize { + TWO_QUADS = 0, + FOUR_QUADS = 1, +}; + +enum a3xx_color_swap { + WZYX = 0, + WXYZ = 1, + ZYXW = 2, + XYZW = 3, +}; + +enum a3xx_rb_blend_opcode { + BLEND_DST_PLUS_SRC = 0, + BLEND_SRC_MINUS_DST = 1, + BLEND_DST_MINUS_SRC = 2, + BLEND_MIN_DST_SRC = 3, + BLEND_MAX_DST_SRC = 4, +}; + +enum a4xx_tess_spacing { + EQUAL_SPACING = 0, + ODD_SPACING = 2, + EVEN_SPACING = 3, +}; + +#define REG_AXXX_CP_RB_BASE 0x000001c0 + +#define REG_AXXX_CP_RB_CNTL 0x000001c1 +#define AXXX_CP_RB_CNTL_BUFSZ__MASK 0x0000003f +#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT 0 +static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val) +{ + return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK; +} +#define AXXX_CP_RB_CNTL_BLKSZ__MASK 0x00003f00 +#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT 8 +static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val) +{ + return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK; +} +#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK 0x00030000 +#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT 16 +static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val) +{ + return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK; +} +#define AXXX_CP_RB_CNTL_POLL_EN 0x00100000 +#define AXXX_CP_RB_CNTL_NO_UPDATE 0x08000000 +#define AXXX_CP_RB_CNTL_RPTR_WR_EN 0x80000000 + +#define REG_AXXX_CP_RB_RPTR_ADDR 0x000001c3 +#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK 0x00000003 +#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT 0 +static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val) +{ + return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK; +} +#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK 0xfffffffc +#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT 2 +static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK; +} + +#define REG_AXXX_CP_RB_RPTR 0x000001c4 + +#define REG_AXXX_CP_RB_WPTR 0x000001c5 + +#define REG_AXXX_CP_RB_WPTR_DELAY 0x000001c6 + +#define REG_AXXX_CP_RB_RPTR_WR 0x000001c7 + +#define REG_AXXX_CP_RB_WPTR_BASE 0x000001c8 + +#define REG_AXXX_CP_QUEUE_THRESHOLDS 0x000001d5 +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK 0x0000000f +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT 0 +static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val) +{ + return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK; +} +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK 0x00000f00 +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT 8 +static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val) +{ + return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK; +} +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK 0x000f0000 +#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT 16 +static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val) +{ + return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK; +} + +#define REG_AXXX_CP_MEQ_THRESHOLDS 0x000001d6 +#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK 0x001f0000 +#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT 16 +static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_MEQ_END(uint32_t val) +{ + return ((val) << AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK; +} +#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK 0x1f000000 +#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT 24 +static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_ROQ_END(uint32_t val) +{ + return ((val) << AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK; +} + +#define REG_AXXX_CP_CSQ_AVAIL 0x000001d7 +#define AXXX_CP_CSQ_AVAIL_RING__MASK 0x0000007f +#define AXXX_CP_CSQ_AVAIL_RING__SHIFT 0 +static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK; +} +#define AXXX_CP_CSQ_AVAIL_IB1__MASK 0x00007f00 +#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT 8 +static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK; +} +#define AXXX_CP_CSQ_AVAIL_IB2__MASK 0x007f0000 +#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT 16 +static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK; +} + +#define REG_AXXX_CP_STQ_AVAIL 0x000001d8 +#define AXXX_CP_STQ_AVAIL_ST__MASK 0x0000007f +#define AXXX_CP_STQ_AVAIL_ST__SHIFT 0 +static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val) +{ + return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK; +} + +#define REG_AXXX_CP_MEQ_AVAIL 0x000001d9 +#define AXXX_CP_MEQ_AVAIL_MEQ__MASK 0x0000001f +#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT 0 +static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val) +{ + return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK; +} + +#define REG_AXXX_SCRATCH_UMSK 0x000001dc +#define AXXX_SCRATCH_UMSK_UMSK__MASK 0x000000ff +#define AXXX_SCRATCH_UMSK_UMSK__SHIFT 0 +static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val) +{ + return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK; +} +#define AXXX_SCRATCH_UMSK_SWAP__MASK 0x00030000 +#define AXXX_SCRATCH_UMSK_SWAP__SHIFT 16 +static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val) +{ + return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK; +} + +#define REG_AXXX_SCRATCH_ADDR 0x000001dd + +#define REG_AXXX_CP_ME_RDADDR 0x000001ea + +#define REG_AXXX_CP_STATE_DEBUG_INDEX 0x000001ec + +#define REG_AXXX_CP_STATE_DEBUG_DATA 0x000001ed + +#define REG_AXXX_CP_INT_CNTL 0x000001f2 +#define AXXX_CP_INT_CNTL_SW_INT_MASK 0x00080000 +#define AXXX_CP_INT_CNTL_T0_PACKET_IN_IB_MASK 0x00800000 +#define AXXX_CP_INT_CNTL_OPCODE_ERROR_MASK 0x01000000 +#define AXXX_CP_INT_CNTL_PROTECTED_MODE_ERROR_MASK 0x02000000 +#define AXXX_CP_INT_CNTL_RESERVED_BIT_ERROR_MASK 0x04000000 +#define AXXX_CP_INT_CNTL_IB_ERROR_MASK 0x08000000 +#define AXXX_CP_INT_CNTL_IB2_INT_MASK 0x20000000 +#define AXXX_CP_INT_CNTL_IB1_INT_MASK 0x40000000 +#define AXXX_CP_INT_CNTL_RB_INT_MASK 0x80000000 + +#define REG_AXXX_CP_INT_STATUS 0x000001f3 + +#define REG_AXXX_CP_INT_ACK 0x000001f4 + +#define REG_AXXX_CP_ME_CNTL 0x000001f6 +#define AXXX_CP_ME_CNTL_BUSY 0x20000000 +#define AXXX_CP_ME_CNTL_HALT 0x10000000 + +#define REG_AXXX_CP_ME_STATUS 0x000001f7 + +#define REG_AXXX_CP_ME_RAM_WADDR 0x000001f8 + +#define REG_AXXX_CP_ME_RAM_RADDR 0x000001f9 + +#define REG_AXXX_CP_ME_RAM_DATA 0x000001fa + +#define REG_AXXX_CP_DEBUG 0x000001fc +#define AXXX_CP_DEBUG_PREDICATE_DISABLE 0x00800000 +#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE 0x01000000 +#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE 0x02000000 +#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS 0x04000000 +#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE 0x08000000 +#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE 0x10000000 +#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL 0x40000000 +#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE 0x80000000 + +#define REG_AXXX_CP_CSQ_RB_STAT 0x000001fd +#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK 0x0000007f +#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT 0 +static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK; +} +#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK 0x007f0000 +#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT 16 +static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK; +} + +#define REG_AXXX_CP_CSQ_IB1_STAT 0x000001fe +#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK 0x0000007f +#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT 0 +static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK; +} +#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK 0x007f0000 +#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT 16 +static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK; +} + +#define REG_AXXX_CP_CSQ_IB2_STAT 0x000001ff +#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK 0x0000007f +#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT 0 +static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK; +} +#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK 0x007f0000 +#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT 16 +static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val) +{ + return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK; +} + +#define REG_AXXX_CP_NON_PREFETCH_CNTRS 0x00000440 + +#define REG_AXXX_CP_STQ_ST_STAT 0x00000443 + +#define REG_AXXX_CP_ST_BASE 0x0000044d + +#define REG_AXXX_CP_ST_BUFSZ 0x0000044e + +#define REG_AXXX_CP_MEQ_STAT 0x0000044f + +#define REG_AXXX_CP_MIU_TAG_STAT 0x00000452 + +#define REG_AXXX_CP_BIN_MASK_LO 0x00000454 + +#define REG_AXXX_CP_BIN_MASK_HI 0x00000455 + +#define REG_AXXX_CP_BIN_SELECT_LO 0x00000456 + +#define REG_AXXX_CP_BIN_SELECT_HI 0x00000457 + +#define REG_AXXX_CP_IB1_BASE 0x00000458 + +#define REG_AXXX_CP_IB1_BUFSZ 0x00000459 + +#define REG_AXXX_CP_IB2_BASE 0x0000045a + +#define REG_AXXX_CP_IB2_BUFSZ 0x0000045b + +#define REG_AXXX_CP_STAT 0x0000047f +#define AXXX_CP_STAT_CP_BUSY 0x80000000 +#define AXXX_CP_STAT_VS_EVENT_FIFO_BUSY 0x40000000 +#define AXXX_CP_STAT_PS_EVENT_FIFO_BUSY 0x20000000 +#define AXXX_CP_STAT_CF_EVENT_FIFO_BUSY 0x10000000 +#define AXXX_CP_STAT_RB_EVENT_FIFO_BUSY 0x08000000 +#define AXXX_CP_STAT_ME_BUSY 0x04000000 +#define AXXX_CP_STAT_MIU_WR_C_BUSY 0x02000000 +#define AXXX_CP_STAT_CP_3D_BUSY 0x00800000 +#define AXXX_CP_STAT_CP_NRT_BUSY 0x00400000 +#define AXXX_CP_STAT_RBIU_SCRATCH_BUSY 0x00200000 +#define AXXX_CP_STAT_RCIU_ME_BUSY 0x00100000 +#define AXXX_CP_STAT_RCIU_PFP_BUSY 0x00080000 +#define AXXX_CP_STAT_MEQ_RING_BUSY 0x00040000 +#define AXXX_CP_STAT_PFP_BUSY 0x00020000 +#define AXXX_CP_STAT_ST_QUEUE_BUSY 0x00010000 +#define AXXX_CP_STAT_INDIRECT2_QUEUE_BUSY 0x00002000 +#define AXXX_CP_STAT_INDIRECTS_QUEUE_BUSY 0x00001000 +#define AXXX_CP_STAT_RING_QUEUE_BUSY 0x00000800 +#define AXXX_CP_STAT_CSF_BUSY 0x00000400 +#define AXXX_CP_STAT_CSF_ST_BUSY 0x00000200 +#define AXXX_CP_STAT_EVENT_BUSY 0x00000100 +#define AXXX_CP_STAT_CSF_INDIRECT2_BUSY 0x00000080 +#define AXXX_CP_STAT_CSF_INDIRECTS_BUSY 0x00000040 +#define AXXX_CP_STAT_CSF_RING_BUSY 0x00000020 +#define AXXX_CP_STAT_RCIU_BUSY 0x00000010 +#define AXXX_CP_STAT_RBIU_BUSY 0x00000008 +#define AXXX_CP_STAT_MIU_RD_RETURN_BUSY 0x00000004 +#define AXXX_CP_STAT_MIU_RD_REQ_BUSY 0x00000002 +#define AXXX_CP_STAT_MIU_WR_BUSY 0x00000001 + +#define REG_AXXX_CP_SCRATCH_REG0 0x00000578 + +#define REG_AXXX_CP_SCRATCH_REG1 0x00000579 + +#define REG_AXXX_CP_SCRATCH_REG2 0x0000057a + +#define REG_AXXX_CP_SCRATCH_REG3 0x0000057b + +#define REG_AXXX_CP_SCRATCH_REG4 0x0000057c + +#define REG_AXXX_CP_SCRATCH_REG5 0x0000057d + +#define REG_AXXX_CP_SCRATCH_REG6 0x0000057e + +#define REG_AXXX_CP_SCRATCH_REG7 0x0000057f + +#define REG_AXXX_CP_ME_VS_EVENT_SRC 0x00000600 + +#define REG_AXXX_CP_ME_VS_EVENT_ADDR 0x00000601 + +#define REG_AXXX_CP_ME_VS_EVENT_DATA 0x00000602 + +#define REG_AXXX_CP_ME_VS_EVENT_ADDR_SWM 0x00000603 + +#define REG_AXXX_CP_ME_VS_EVENT_DATA_SWM 0x00000604 + +#define REG_AXXX_CP_ME_PS_EVENT_SRC 0x00000605 + +#define REG_AXXX_CP_ME_PS_EVENT_ADDR 0x00000606 + +#define REG_AXXX_CP_ME_PS_EVENT_DATA 0x00000607 + +#define REG_AXXX_CP_ME_PS_EVENT_ADDR_SWM 0x00000608 + +#define REG_AXXX_CP_ME_PS_EVENT_DATA_SWM 0x00000609 + +#define REG_AXXX_CP_ME_CF_EVENT_SRC 0x0000060a + +#define REG_AXXX_CP_ME_CF_EVENT_ADDR 0x0000060b + +#define REG_AXXX_CP_ME_CF_EVENT_DATA 0x0000060c + +#define REG_AXXX_CP_ME_NRT_ADDR 0x0000060d + +#define REG_AXXX_CP_ME_NRT_DATA 0x0000060e + +#define REG_AXXX_CP_ME_VS_FETCH_DONE_SRC 0x00000612 + +#define REG_AXXX_CP_ME_VS_FETCH_DONE_ADDR 0x00000613 + +#define REG_AXXX_CP_ME_VS_FETCH_DONE_DATA 0x00000614 + + +#endif /* ADRENO_COMMON_XML */ diff -Nru mesa-18.3.3/src/freedreno/registers/adreno_pm4.xml.h mesa-19.0.1/src/freedreno/registers/adreno_pm4.xml.h --- mesa-18.3.3/src/freedreno/registers/adreno_pm4.xml.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/freedreno/registers/adreno_pm4.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,1569 @@ +#ifndef ADRENO_PM4_XML +#define ADRENO_PM4_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/freedreno/envytools/ +git clone https://github.com/freedreno/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 42463 bytes, from 2018-11-19 13:44:03) +- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14239 bytes, from 2018-12-05 15:25:53) +- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 43052 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) +- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 141895 bytes, from 2018-12-21 18:21:34) +- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) +- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) + +Copyright (C) 2013-2018 by the following authors: +- Rob Clark (robclark) +- Ilia Mirkin (imirkin) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum vgt_event_type { + VS_DEALLOC = 0, + PS_DEALLOC = 1, + VS_DONE_TS = 2, + PS_DONE_TS = 3, + CACHE_FLUSH_TS = 4, + CONTEXT_DONE = 5, + CACHE_FLUSH = 6, + HLSQ_FLUSH = 7, + VIZQUERY_START = 7, + VIZQUERY_END = 8, + SC_WAIT_WC = 9, + RST_PIX_CNT = 13, + RST_VTX_CNT = 14, + TILE_FLUSH = 15, + STAT_EVENT = 16, + CACHE_FLUSH_AND_INV_TS_EVENT = 20, + ZPASS_DONE = 21, + CACHE_FLUSH_AND_INV_EVENT = 22, + PERFCOUNTER_START = 23, + PERFCOUNTER_STOP = 24, + VS_FETCH_DONE = 27, + FACENESS_FLUSH = 28, + FLUSH_SO_0 = 17, + FLUSH_SO_1 = 18, + FLUSH_SO_2 = 19, + FLUSH_SO_3 = 20, + PC_CCU_INVALIDATE_DEPTH = 24, + PC_CCU_INVALIDATE_COLOR = 25, + UNK_1C = 28, + UNK_1D = 29, + BLIT = 30, + UNK_25 = 37, + LRZ_FLUSH = 38, + UNK_2C = 44, + UNK_2D = 45, +}; + +enum pc_di_primtype { + DI_PT_NONE = 0, + DI_PT_POINTLIST_PSIZE = 1, + DI_PT_LINELIST = 2, + DI_PT_LINESTRIP = 3, + DI_PT_TRILIST = 4, + DI_PT_TRIFAN = 5, + DI_PT_TRISTRIP = 6, + DI_PT_LINELOOP = 7, + DI_PT_RECTLIST = 8, + DI_PT_POINTLIST = 9, + DI_PT_LINE_ADJ = 10, + DI_PT_LINESTRIP_ADJ = 11, + DI_PT_TRI_ADJ = 12, + DI_PT_TRISTRIP_ADJ = 13, +}; + +enum pc_di_src_sel { + DI_SRC_SEL_DMA = 0, + DI_SRC_SEL_IMMEDIATE = 1, + DI_SRC_SEL_AUTO_INDEX = 2, + DI_SRC_SEL_RESERVED = 3, +}; + +enum pc_di_face_cull_sel { + DI_FACE_CULL_NONE = 0, + DI_FACE_CULL_FETCH = 1, + DI_FACE_BACKFACE_CULL = 2, + DI_FACE_FRONTFACE_CULL = 3, +}; + +enum pc_di_index_size { + INDEX_SIZE_IGN = 0, + INDEX_SIZE_16_BIT = 0, + INDEX_SIZE_32_BIT = 1, + INDEX_SIZE_8_BIT = 2, + INDEX_SIZE_INVALID = 0, +}; + +enum pc_di_vis_cull_mode { + IGNORE_VISIBILITY = 0, + USE_VISIBILITY = 1, +}; + +enum adreno_pm4_packet_type { + CP_TYPE0_PKT = 0, + CP_TYPE1_PKT = 0x40000000, + CP_TYPE2_PKT = 0x80000000, + CP_TYPE3_PKT = 0xc0000000, + CP_TYPE4_PKT = 0x40000000, + CP_TYPE7_PKT = 0x70000000, +}; + +enum adreno_pm4_type3_packets { + CP_ME_INIT = 72, + CP_NOP = 16, + CP_PREEMPT_ENABLE = 28, + CP_PREEMPT_TOKEN = 30, + CP_INDIRECT_BUFFER = 63, + CP_INDIRECT_BUFFER_PFD = 55, + CP_WAIT_FOR_IDLE = 38, + CP_WAIT_REG_MEM = 60, + CP_WAIT_REG_EQ = 82, + CP_WAIT_REG_GTE = 83, + CP_WAIT_UNTIL_READ = 92, + CP_WAIT_IB_PFD_COMPLETE = 93, + CP_REG_RMW = 33, + CP_SET_BIN_DATA = 47, + CP_SET_BIN_DATA5 = 47, + CP_REG_TO_MEM = 62, + CP_MEM_WRITE = 61, + CP_MEM_WRITE_CNTR = 79, + CP_COND_EXEC = 68, + CP_COND_WRITE = 69, + CP_COND_WRITE5 = 69, + CP_EVENT_WRITE = 70, + CP_EVENT_WRITE_SHD = 88, + CP_EVENT_WRITE_CFL = 89, + CP_EVENT_WRITE_ZPD = 91, + CP_RUN_OPENCL = 49, + CP_DRAW_INDX = 34, + CP_DRAW_INDX_2 = 54, + CP_DRAW_INDX_BIN = 52, + CP_DRAW_INDX_2_BIN = 53, + CP_VIZ_QUERY = 35, + CP_SET_STATE = 37, + CP_SET_CONSTANT = 45, + CP_IM_LOAD = 39, + CP_IM_LOAD_IMMEDIATE = 43, + CP_LOAD_CONSTANT_CONTEXT = 46, + CP_INVALIDATE_STATE = 59, + CP_SET_SHADER_BASES = 74, + CP_SET_BIN_MASK = 80, + CP_SET_BIN_SELECT = 81, + CP_CONTEXT_UPDATE = 94, + CP_INTERRUPT = 64, + CP_IM_STORE = 44, + CP_SET_DRAW_INIT_FLAGS = 75, + CP_SET_PROTECTED_MODE = 95, + CP_BOOTSTRAP_UCODE = 111, + CP_LOAD_STATE = 48, + CP_LOAD_STATE4 = 48, + CP_COND_INDIRECT_BUFFER_PFE = 58, + CP_COND_INDIRECT_BUFFER_PFD = 50, + CP_INDIRECT_BUFFER_PFE = 63, + CP_SET_BIN = 76, + CP_TEST_TWO_MEMS = 113, + CP_REG_WR_NO_CTXT = 120, + CP_RECORD_PFP_TIMESTAMP = 17, + CP_SET_SECURE_MODE = 102, + CP_WAIT_FOR_ME = 19, + CP_SET_DRAW_STATE = 67, + CP_DRAW_INDX_OFFSET = 56, + CP_DRAW_INDIRECT = 40, + CP_DRAW_INDX_INDIRECT = 41, + CP_DRAW_AUTO = 36, + CP_UNKNOWN_19 = 25, + CP_UNKNOWN_1A = 26, + CP_UNKNOWN_4E = 78, + CP_WIDE_REG_WRITE = 116, + CP_SCRATCH_TO_REG = 77, + CP_REG_TO_SCRATCH = 74, + CP_WAIT_MEM_WRITES = 18, + CP_COND_REG_EXEC = 71, + CP_MEM_TO_REG = 66, + CP_EXEC_CS_INDIRECT = 65, + CP_EXEC_CS = 51, + CP_PERFCOUNTER_ACTION = 80, + CP_SMMU_TABLE_UPDATE = 83, + CP_SET_MARKER = 101, + CP_SET_PSEUDO_REG = 86, + CP_CONTEXT_REG_BUNCH = 92, + CP_YIELD_ENABLE = 28, + CP_SKIP_IB2_ENABLE_GLOBAL = 29, + CP_SKIP_IB2_ENABLE_LOCAL = 35, + CP_SET_SUBDRAW_SIZE = 53, + CP_SET_VISIBILITY_OVERRIDE = 100, + CP_PREEMPT_ENABLE_GLOBAL = 105, + CP_PREEMPT_ENABLE_LOCAL = 106, + CP_CONTEXT_SWITCH_YIELD = 107, + CP_SET_RENDER_MODE = 108, + CP_COMPUTE_CHECKPOINT = 110, + CP_MEM_TO_MEM = 115, + CP_BLIT = 44, + CP_REG_TEST = 57, + CP_SET_MODE = 99, + CP_LOAD_STATE6_GEOM = 50, + CP_LOAD_STATE6_FRAG = 52, + IN_IB_PREFETCH_END = 23, + IN_SUBBLK_PREFETCH = 31, + IN_INSTR_PREFETCH = 32, + IN_INSTR_MATCH = 71, + IN_CONST_PREFETCH = 73, + IN_INCR_UPDT_STATE = 85, + IN_INCR_UPDT_CONST = 86, + IN_INCR_UPDT_INSTR = 87, + PKT4 = 4, + CP_UNK_A6XX_14 = 20, + CP_UNK_A6XX_36 = 54, + CP_UNK_A6XX_55 = 85, + CP_REG_WRITE = 109, +}; + +enum adreno_state_block { + SB_VERT_TEX = 0, + SB_VERT_MIPADDR = 1, + SB_FRAG_TEX = 2, + SB_FRAG_MIPADDR = 3, + SB_VERT_SHADER = 4, + SB_GEOM_SHADER = 5, + SB_FRAG_SHADER = 6, + SB_COMPUTE_SHADER = 7, +}; + +enum adreno_state_type { + ST_SHADER = 0, + ST_CONSTANTS = 1, +}; + +enum adreno_state_src { + SS_DIRECT = 0, + SS_INVALID_ALL_IC = 2, + SS_INVALID_PART_IC = 3, + SS_INDIRECT = 4, + SS_INDIRECT_TCM = 5, + SS_INDIRECT_STM = 6, +}; + +enum a4xx_state_block { + SB4_VS_TEX = 0, + SB4_HS_TEX = 1, + SB4_DS_TEX = 2, + SB4_GS_TEX = 3, + SB4_FS_TEX = 4, + SB4_CS_TEX = 5, + SB4_VS_SHADER = 8, + SB4_HS_SHADER = 9, + SB4_DS_SHADER = 10, + SB4_GS_SHADER = 11, + SB4_FS_SHADER = 12, + SB4_CS_SHADER = 13, + SB4_SSBO = 14, + SB4_CS_SSBO = 15, +}; + +enum a4xx_state_type { + ST4_SHADER = 0, + ST4_CONSTANTS = 1, +}; + +enum a4xx_state_src { + SS4_DIRECT = 0, + SS4_INDIRECT = 2, +}; + +enum a6xx_state_block { + SB6_VS_TEX = 0, + SB6_HS_TEX = 1, + SB6_DS_TEX = 2, + SB6_GS_TEX = 3, + SB6_FS_TEX = 4, + SB6_CS_TEX = 5, + SB6_VS_SHADER = 8, + SB6_HS_SHADER = 9, + SB6_DS_SHADER = 10, + SB6_GS_SHADER = 11, + SB6_FS_SHADER = 12, + SB6_CS_SHADER = 13, + SB6_SSBO = 14, + SB6_CS_SSBO = 15, +}; + +enum a6xx_state_type { + ST6_SHADER = 0, + ST6_CONSTANTS = 1, +}; + +enum a6xx_state_src { + SS6_DIRECT = 0, + SS6_INDIRECT = 2, +}; + +enum a4xx_index_size { + INDEX4_SIZE_8_BIT = 0, + INDEX4_SIZE_16_BIT = 1, + INDEX4_SIZE_32_BIT = 2, +}; + +enum cp_cond_function { + WRITE_ALWAYS = 0, + WRITE_LT = 1, + WRITE_LE = 2, + WRITE_EQ = 3, + WRITE_NE = 4, + WRITE_GE = 5, + WRITE_GT = 6, +}; + +enum render_mode_cmd { + BYPASS = 1, + BINNING = 2, + GMEM = 3, + BLIT2D = 5, + BLIT2DSCALE = 7, + END2D = 8, +}; + +enum cp_blit_cmd { + BLIT_OP_FILL = 0, + BLIT_OP_COPY = 1, + BLIT_OP_SCALE = 3, +}; + +enum a6xx_render_mode { + RM6_BYPASS = 1, + RM6_BINNING = 2, + RM6_GMEM = 4, + RM6_BLIT2D = 5, + RM6_RESOLVE = 6, + RM6_BLIT2DSCALE = 12, +}; + +enum pseudo_reg { + SMMU_INFO = 0, + NON_SECURE_SAVE_ADDR = 1, + SECURE_SAVE_ADDR = 2, + NON_PRIV_SAVE_ADDR = 3, + COUNTER = 4, +}; + +#define REG_CP_LOAD_STATE_0 0x00000000 +#define CP_LOAD_STATE_0_DST_OFF__MASK 0x0000ffff +#define CP_LOAD_STATE_0_DST_OFF__SHIFT 0 +static inline uint32_t CP_LOAD_STATE_0_DST_OFF(uint32_t val) +{ + return ((val) << CP_LOAD_STATE_0_DST_OFF__SHIFT) & CP_LOAD_STATE_0_DST_OFF__MASK; +} +#define CP_LOAD_STATE_0_STATE_SRC__MASK 0x00070000 +#define CP_LOAD_STATE_0_STATE_SRC__SHIFT 16 +static inline uint32_t CP_LOAD_STATE_0_STATE_SRC(enum adreno_state_src val) +{ + return ((val) << CP_LOAD_STATE_0_STATE_SRC__SHIFT) & CP_LOAD_STATE_0_STATE_SRC__MASK; +} +#define CP_LOAD_STATE_0_STATE_BLOCK__MASK 0x00380000 +#define CP_LOAD_STATE_0_STATE_BLOCK__SHIFT 19 +static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val) +{ + return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK; +} +#define CP_LOAD_STATE_0_NUM_UNIT__MASK 0xffc00000 +#define CP_LOAD_STATE_0_NUM_UNIT__SHIFT 22 +static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val) +{ + return ((val) << CP_LOAD_STATE_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE_0_NUM_UNIT__MASK; +} + +#define REG_CP_LOAD_STATE_1 0x00000001 +#define CP_LOAD_STATE_1_STATE_TYPE__MASK 0x00000003 +#define CP_LOAD_STATE_1_STATE_TYPE__SHIFT 0 +static inline uint32_t CP_LOAD_STATE_1_STATE_TYPE(enum adreno_state_type val) +{ + return ((val) << CP_LOAD_STATE_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE_1_STATE_TYPE__MASK; +} +#define CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK 0xfffffffc +#define CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT 2 +static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK; +} + +#define REG_CP_LOAD_STATE4_0 0x00000000 +#define CP_LOAD_STATE4_0_DST_OFF__MASK 0x00003fff +#define CP_LOAD_STATE4_0_DST_OFF__SHIFT 0 +static inline uint32_t CP_LOAD_STATE4_0_DST_OFF(uint32_t val) +{ + return ((val) << CP_LOAD_STATE4_0_DST_OFF__SHIFT) & CP_LOAD_STATE4_0_DST_OFF__MASK; +} +#define CP_LOAD_STATE4_0_STATE_SRC__MASK 0x00030000 +#define CP_LOAD_STATE4_0_STATE_SRC__SHIFT 16 +static inline uint32_t CP_LOAD_STATE4_0_STATE_SRC(enum a4xx_state_src val) +{ + return ((val) << CP_LOAD_STATE4_0_STATE_SRC__SHIFT) & CP_LOAD_STATE4_0_STATE_SRC__MASK; +} +#define CP_LOAD_STATE4_0_STATE_BLOCK__MASK 0x003c0000 +#define CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT 18 +static inline uint32_t CP_LOAD_STATE4_0_STATE_BLOCK(enum a4xx_state_block val) +{ + return ((val) << CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE4_0_STATE_BLOCK__MASK; +} +#define CP_LOAD_STATE4_0_NUM_UNIT__MASK 0xffc00000 +#define CP_LOAD_STATE4_0_NUM_UNIT__SHIFT 22 +static inline uint32_t CP_LOAD_STATE4_0_NUM_UNIT(uint32_t val) +{ + return ((val) << CP_LOAD_STATE4_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE4_0_NUM_UNIT__MASK; +} + +#define REG_CP_LOAD_STATE4_1 0x00000001 +#define CP_LOAD_STATE4_1_STATE_TYPE__MASK 0x00000003 +#define CP_LOAD_STATE4_1_STATE_TYPE__SHIFT 0 +static inline uint32_t CP_LOAD_STATE4_1_STATE_TYPE(enum a4xx_state_type val) +{ + return ((val) << CP_LOAD_STATE4_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE4_1_STATE_TYPE__MASK; +} +#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK 0xfffffffc +#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT 2 +static inline uint32_t CP_LOAD_STATE4_1_EXT_SRC_ADDR(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK; +} + +#define REG_CP_LOAD_STATE4_2 0x00000002 +#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK 0xffffffff +#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT 0 +static inline uint32_t CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(uint32_t val) +{ + return ((val) << CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK; +} + +#define REG_CP_LOAD_STATE6_0 0x00000000 +#define CP_LOAD_STATE6_0_DST_OFF__MASK 0x00003fff +#define CP_LOAD_STATE6_0_DST_OFF__SHIFT 0 +static inline uint32_t CP_LOAD_STATE6_0_DST_OFF(uint32_t val) +{ + return ((val) << CP_LOAD_STATE6_0_DST_OFF__SHIFT) & CP_LOAD_STATE6_0_DST_OFF__MASK; +} +#define CP_LOAD_STATE6_0_STATE_TYPE__MASK 0x00004000 +#define CP_LOAD_STATE6_0_STATE_TYPE__SHIFT 14 +static inline uint32_t CP_LOAD_STATE6_0_STATE_TYPE(enum a6xx_state_type val) +{ + return ((val) << CP_LOAD_STATE6_0_STATE_TYPE__SHIFT) & CP_LOAD_STATE6_0_STATE_TYPE__MASK; +} +#define CP_LOAD_STATE6_0_STATE_SRC__MASK 0x00030000 +#define CP_LOAD_STATE6_0_STATE_SRC__SHIFT 16 +static inline uint32_t CP_LOAD_STATE6_0_STATE_SRC(enum a6xx_state_src val) +{ + return ((val) << CP_LOAD_STATE6_0_STATE_SRC__SHIFT) & CP_LOAD_STATE6_0_STATE_SRC__MASK; +} +#define CP_LOAD_STATE6_0_STATE_BLOCK__MASK 0x003c0000 +#define CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT 18 +static inline uint32_t CP_LOAD_STATE6_0_STATE_BLOCK(enum a6xx_state_block val) +{ + return ((val) << CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE6_0_STATE_BLOCK__MASK; +} +#define CP_LOAD_STATE6_0_NUM_UNIT__MASK 0xffc00000 +#define CP_LOAD_STATE6_0_NUM_UNIT__SHIFT 22 +static inline uint32_t CP_LOAD_STATE6_0_NUM_UNIT(uint32_t val) +{ + return ((val) << CP_LOAD_STATE6_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE6_0_NUM_UNIT__MASK; +} + +#define REG_CP_LOAD_STATE6_1 0x00000001 +#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK 0xfffffffc +#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT 2 +static inline uint32_t CP_LOAD_STATE6_1_EXT_SRC_ADDR(uint32_t val) +{ + assert(!(val & 0x3)); + return ((val >> 2) << CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK; +} + +#define REG_CP_LOAD_STATE6_2 0x00000002 +#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK 0xffffffff +#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT 0 +static inline uint32_t CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(uint32_t val) +{ + return ((val) << CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK; +} + +#define REG_CP_DRAW_INDX_0 0x00000000 +#define CP_DRAW_INDX_0_VIZ_QUERY__MASK 0xffffffff +#define CP_DRAW_INDX_0_VIZ_QUERY__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_0_VIZ_QUERY(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_0_VIZ_QUERY__MASK; +} + +#define REG_CP_DRAW_INDX_1 0x00000001 +#define CP_DRAW_INDX_1_PRIM_TYPE__MASK 0x0000003f +#define CP_DRAW_INDX_1_PRIM_TYPE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_1_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << CP_DRAW_INDX_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_1_PRIM_TYPE__MASK; +} +#define CP_DRAW_INDX_1_SOURCE_SELECT__MASK 0x000000c0 +#define CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT 6 +static inline uint32_t CP_DRAW_INDX_1_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_1_SOURCE_SELECT__MASK; +} +#define CP_DRAW_INDX_1_VIS_CULL__MASK 0x00000600 +#define CP_DRAW_INDX_1_VIS_CULL__SHIFT 9 +static inline uint32_t CP_DRAW_INDX_1_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << CP_DRAW_INDX_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_1_VIS_CULL__MASK; +} +#define CP_DRAW_INDX_1_INDEX_SIZE__MASK 0x00000800 +#define CP_DRAW_INDX_1_INDEX_SIZE__SHIFT 11 +static inline uint32_t CP_DRAW_INDX_1_INDEX_SIZE(enum pc_di_index_size val) +{ + return ((val) << CP_DRAW_INDX_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_1_INDEX_SIZE__MASK; +} +#define CP_DRAW_INDX_1_NOT_EOP 0x00001000 +#define CP_DRAW_INDX_1_SMALL_INDEX 0x00002000 +#define CP_DRAW_INDX_1_PRE_DRAW_INITIATOR_ENABLE 0x00004000 +#define CP_DRAW_INDX_1_NUM_INSTANCES__MASK 0xff000000 +#define CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT 24 +static inline uint32_t CP_DRAW_INDX_1_NUM_INSTANCES(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_1_NUM_INSTANCES__MASK; +} + +#define REG_CP_DRAW_INDX_2 0x00000002 +#define CP_DRAW_INDX_2_NUM_INDICES__MASK 0xffffffff +#define CP_DRAW_INDX_2_NUM_INDICES__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_2_NUM_INDICES(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_NUM_INDICES__MASK; +} + +#define REG_CP_DRAW_INDX_3 0x00000003 +#define CP_DRAW_INDX_3_INDX_BASE__MASK 0xffffffff +#define CP_DRAW_INDX_3_INDX_BASE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_3_INDX_BASE(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_3_INDX_BASE__SHIFT) & CP_DRAW_INDX_3_INDX_BASE__MASK; +} + +#define REG_CP_DRAW_INDX_4 0x00000004 +#define CP_DRAW_INDX_4_INDX_SIZE__MASK 0xffffffff +#define CP_DRAW_INDX_4_INDX_SIZE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_4_INDX_SIZE(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_4_INDX_SIZE__SHIFT) & CP_DRAW_INDX_4_INDX_SIZE__MASK; +} + +#define REG_CP_DRAW_INDX_2_0 0x00000000 +#define CP_DRAW_INDX_2_0_VIZ_QUERY__MASK 0xffffffff +#define CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_2_0_VIZ_QUERY(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_2_0_VIZ_QUERY__MASK; +} + +#define REG_CP_DRAW_INDX_2_1 0x00000001 +#define CP_DRAW_INDX_2_1_PRIM_TYPE__MASK 0x0000003f +#define CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_2_1_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_2_1_PRIM_TYPE__MASK; +} +#define CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK 0x000000c0 +#define CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT 6 +static inline uint32_t CP_DRAW_INDX_2_1_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK; +} +#define CP_DRAW_INDX_2_1_VIS_CULL__MASK 0x00000600 +#define CP_DRAW_INDX_2_1_VIS_CULL__SHIFT 9 +static inline uint32_t CP_DRAW_INDX_2_1_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << CP_DRAW_INDX_2_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_2_1_VIS_CULL__MASK; +} +#define CP_DRAW_INDX_2_1_INDEX_SIZE__MASK 0x00000800 +#define CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT 11 +static inline uint32_t CP_DRAW_INDX_2_1_INDEX_SIZE(enum pc_di_index_size val) +{ + return ((val) << CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_2_1_INDEX_SIZE__MASK; +} +#define CP_DRAW_INDX_2_1_NOT_EOP 0x00001000 +#define CP_DRAW_INDX_2_1_SMALL_INDEX 0x00002000 +#define CP_DRAW_INDX_2_1_PRE_DRAW_INITIATOR_ENABLE 0x00004000 +#define CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK 0xff000000 +#define CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT 24 +static inline uint32_t CP_DRAW_INDX_2_1_NUM_INSTANCES(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK; +} + +#define REG_CP_DRAW_INDX_2_2 0x00000002 +#define CP_DRAW_INDX_2_2_NUM_INDICES__MASK 0xffffffff +#define CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_2_2_NUM_INDICES(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_2_NUM_INDICES__MASK; +} + +#define REG_CP_DRAW_INDX_OFFSET_0 0x00000000 +#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK 0x0000003f +#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK; +} +#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK 0x000000c0 +#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT 6 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK; +} +#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK 0x00000300 +#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT 8 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT) & CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK; +} +#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK 0x00000c00 +#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT 10 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum a4xx_index_size val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK; +} +#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK 0x01f00000 +#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT 20 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_TESS_MODE(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT) & CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK; +} + +#define REG_CP_DRAW_INDX_OFFSET_1 0x00000001 +#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK 0xffffffff +#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK; +} + +#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002 +#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK 0xffffffff +#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_OFFSET_2_NUM_INDICES(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK; +} + +#define REG_CP_DRAW_INDX_OFFSET_3 0x00000003 + +#define REG_CP_DRAW_INDX_OFFSET_4 0x00000004 +#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK 0xffffffff +#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_OFFSET_4_INDX_BASE(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT) & CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK; +} + +#define REG_CP_DRAW_INDX_OFFSET_5 0x00000005 +#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK 0xffffffff +#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT 0 +static inline uint32_t CP_DRAW_INDX_OFFSET_5_INDX_SIZE(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK; +} + +#define REG_A4XX_CP_DRAW_INDIRECT_0 0x00000000 +#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK 0x0000003f +#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK; +} +#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK 0x000000c0 +#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT 6 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK; +} +#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK 0x00000300 +#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT 8 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK; +} +#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK 0x00000c00 +#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT 10 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK; +} +#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK 0x01f00000 +#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT 20 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_TESS_MODE(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK; +} + +#define REG_A4XX_CP_DRAW_INDIRECT_1 0x00000001 +#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK 0xffffffff +#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDIRECT_1_INDIRECT(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK; +} + + +#define REG_A5XX_CP_DRAW_INDIRECT_2 0x00000002 +#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK 0xffffffff +#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK; +} + +#define REG_A4XX_CP_DRAW_INDX_INDIRECT_0 0x00000000 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK 0x0000003f +#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK; +} +#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK 0x000000c0 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT 6 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK; +} +#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK 0x00000300 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT 8 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK; +} +#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK 0x00000c00 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT 10 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK; +} +#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK 0x01f00000 +#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT 20 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK; +} + + +#define REG_A4XX_CP_DRAW_INDX_INDIRECT_1 0x00000001 +#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK 0xffffffff +#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK; +} + +#define REG_A4XX_CP_DRAW_INDX_INDIRECT_2 0x00000002 +#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK 0xffffffff +#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK; +} + +#define REG_A4XX_CP_DRAW_INDX_INDIRECT_3 0x00000003 +#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK 0xffffffff +#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT 0 +static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT(uint32_t val) +{ + return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK; +} + + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_1 0x00000001 +#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK; +} + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_2 0x00000002 +#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK; +} + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_3 0x00000003 +#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK; +} + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_4 0x00000004 +#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK; +} + +#define REG_A5XX_CP_DRAW_INDX_INDIRECT_5 0x00000005 +#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK 0xffffffff +#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT 0 +static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI(uint32_t val) +{ + return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK; +} + +static inline uint32_t REG_CP_SET_DRAW_STATE_(uint32_t i0) { return 0x00000000 + 0x3*i0; } + +static inline uint32_t REG_CP_SET_DRAW_STATE__0(uint32_t i0) { return 0x00000000 + 0x3*i0; } +#define CP_SET_DRAW_STATE__0_COUNT__MASK 0x0000ffff +#define CP_SET_DRAW_STATE__0_COUNT__SHIFT 0 +static inline uint32_t CP_SET_DRAW_STATE__0_COUNT(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE__0_COUNT__SHIFT) & CP_SET_DRAW_STATE__0_COUNT__MASK; +} +#define CP_SET_DRAW_STATE__0_DIRTY 0x00010000 +#define CP_SET_DRAW_STATE__0_DISABLE 0x00020000 +#define CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS 0x00040000 +#define CP_SET_DRAW_STATE__0_LOAD_IMMED 0x00080000 +#define CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK 0x00f00000 +#define CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT 20 +static inline uint32_t CP_SET_DRAW_STATE__0_ENABLE_MASK(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT) & CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK; +} +#define CP_SET_DRAW_STATE__0_GROUP_ID__MASK 0x1f000000 +#define CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT 24 +static inline uint32_t CP_SET_DRAW_STATE__0_GROUP_ID(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE__0_GROUP_ID__MASK; +} + +static inline uint32_t REG_CP_SET_DRAW_STATE__1(uint32_t i0) { return 0x00000001 + 0x3*i0; } +#define CP_SET_DRAW_STATE__1_ADDR_LO__MASK 0xffffffff +#define CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT 0 +static inline uint32_t CP_SET_DRAW_STATE__1_ADDR_LO(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT) & CP_SET_DRAW_STATE__1_ADDR_LO__MASK; +} + +static inline uint32_t REG_CP_SET_DRAW_STATE__2(uint32_t i0) { return 0x00000002 + 0x3*i0; } +#define CP_SET_DRAW_STATE__2_ADDR_HI__MASK 0xffffffff +#define CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT 0 +static inline uint32_t CP_SET_DRAW_STATE__2_ADDR_HI(uint32_t val) +{ + return ((val) << CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT) & CP_SET_DRAW_STATE__2_ADDR_HI__MASK; +} + +#define REG_CP_SET_BIN_0 0x00000000 + +#define REG_CP_SET_BIN_1 0x00000001 +#define CP_SET_BIN_1_X1__MASK 0x0000ffff +#define CP_SET_BIN_1_X1__SHIFT 0 +static inline uint32_t CP_SET_BIN_1_X1(uint32_t val) +{ + return ((val) << CP_SET_BIN_1_X1__SHIFT) & CP_SET_BIN_1_X1__MASK; +} +#define CP_SET_BIN_1_Y1__MASK 0xffff0000 +#define CP_SET_BIN_1_Y1__SHIFT 16 +static inline uint32_t CP_SET_BIN_1_Y1(uint32_t val) +{ + return ((val) << CP_SET_BIN_1_Y1__SHIFT) & CP_SET_BIN_1_Y1__MASK; +} + +#define REG_CP_SET_BIN_2 0x00000002 +#define CP_SET_BIN_2_X2__MASK 0x0000ffff +#define CP_SET_BIN_2_X2__SHIFT 0 +static inline uint32_t CP_SET_BIN_2_X2(uint32_t val) +{ + return ((val) << CP_SET_BIN_2_X2__SHIFT) & CP_SET_BIN_2_X2__MASK; +} +#define CP_SET_BIN_2_Y2__MASK 0xffff0000 +#define CP_SET_BIN_2_Y2__SHIFT 16 +static inline uint32_t CP_SET_BIN_2_Y2(uint32_t val) +{ + return ((val) << CP_SET_BIN_2_Y2__SHIFT) & CP_SET_BIN_2_Y2__MASK; +} + +#define REG_CP_SET_BIN_DATA_0 0x00000000 +#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK 0xffffffff +#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA_0_BIN_DATA_ADDR(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT) & CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK; +} + +#define REG_CP_SET_BIN_DATA_1 0x00000001 +#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK 0xffffffff +#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK; +} + +#define REG_CP_SET_BIN_DATA5_0 0x00000000 +#define CP_SET_BIN_DATA5_0_VSC_SIZE__MASK 0x003f0000 +#define CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT 16 +static inline uint32_t CP_SET_BIN_DATA5_0_VSC_SIZE(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT) & CP_SET_BIN_DATA5_0_VSC_SIZE__MASK; +} +#define CP_SET_BIN_DATA5_0_VSC_N__MASK 0x07c00000 +#define CP_SET_BIN_DATA5_0_VSC_N__SHIFT 22 +static inline uint32_t CP_SET_BIN_DATA5_0_VSC_N(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_0_VSC_N__SHIFT) & CP_SET_BIN_DATA5_0_VSC_N__MASK; +} + +#define REG_CP_SET_BIN_DATA5_1 0x00000001 +#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK 0xffffffff +#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT) & CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK; +} + +#define REG_CP_SET_BIN_DATA5_2 0x00000002 +#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK 0xffffffff +#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT) & CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK; +} + +#define REG_CP_SET_BIN_DATA5_3 0x00000003 +#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK 0xffffffff +#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT) & CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK; +} + +#define REG_CP_SET_BIN_DATA5_4 0x00000004 +#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK 0xffffffff +#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT) & CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK; +} + +#define REG_CP_SET_BIN_DATA5_5 0x00000005 +#define CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__MASK 0xffffffff +#define CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__SHIFT) & CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__MASK; +} + +#define REG_CP_SET_BIN_DATA5_6 0x00000006 +#define CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__MASK 0xffffffff +#define CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__SHIFT 0 +static inline uint32_t CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO(uint32_t val) +{ + return ((val) << CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__SHIFT) & CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__MASK; +} + +#define REG_CP_REG_TO_MEM_0 0x00000000 +#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff +#define CP_REG_TO_MEM_0_REG__SHIFT 0 +static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK; +} +#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000 +#define CP_REG_TO_MEM_0_CNT__SHIFT 19 +static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK; +} +#define CP_REG_TO_MEM_0_64B 0x40000000 +#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000 + +#define REG_CP_REG_TO_MEM_1 0x00000001 +#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff +#define CP_REG_TO_MEM_1_DEST__SHIFT 0 +static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK; +} + +#define REG_CP_REG_TO_MEM_2 0x00000002 +#define CP_REG_TO_MEM_2_DEST_HI__MASK 0xffffffff +#define CP_REG_TO_MEM_2_DEST_HI__SHIFT 0 +static inline uint32_t CP_REG_TO_MEM_2_DEST_HI(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_2_DEST_HI__SHIFT) & CP_REG_TO_MEM_2_DEST_HI__MASK; +} + +#define REG_CP_MEM_TO_REG_0 0x00000000 +#define CP_MEM_TO_REG_0_REG__MASK 0x0000ffff +#define CP_MEM_TO_REG_0_REG__SHIFT 0 +static inline uint32_t CP_MEM_TO_REG_0_REG(uint32_t val) +{ + return ((val) << CP_MEM_TO_REG_0_REG__SHIFT) & CP_MEM_TO_REG_0_REG__MASK; +} +#define CP_MEM_TO_REG_0_CNT__MASK 0x3ff80000 +#define CP_MEM_TO_REG_0_CNT__SHIFT 19 +static inline uint32_t CP_MEM_TO_REG_0_CNT(uint32_t val) +{ + return ((val) << CP_MEM_TO_REG_0_CNT__SHIFT) & CP_MEM_TO_REG_0_CNT__MASK; +} +#define CP_MEM_TO_REG_0_64B 0x40000000 +#define CP_MEM_TO_REG_0_ACCUMULATE 0x80000000 + +#define REG_CP_MEM_TO_REG_1 0x00000001 +#define CP_MEM_TO_REG_1_SRC__MASK 0xffffffff +#define CP_MEM_TO_REG_1_SRC__SHIFT 0 +static inline uint32_t CP_MEM_TO_REG_1_SRC(uint32_t val) +{ + return ((val) << CP_MEM_TO_REG_1_SRC__SHIFT) & CP_MEM_TO_REG_1_SRC__MASK; +} + +#define REG_CP_MEM_TO_REG_2 0x00000002 +#define CP_MEM_TO_REG_2_SRC_HI__MASK 0xffffffff +#define CP_MEM_TO_REG_2_SRC_HI__SHIFT 0 +static inline uint32_t CP_MEM_TO_REG_2_SRC_HI(uint32_t val) +{ + return ((val) << CP_MEM_TO_REG_2_SRC_HI__SHIFT) & CP_MEM_TO_REG_2_SRC_HI__MASK; +} + +#define REG_CP_MEM_TO_MEM_0 0x00000000 +#define CP_MEM_TO_MEM_0_NEG_A 0x00000001 +#define CP_MEM_TO_MEM_0_NEG_B 0x00000002 +#define CP_MEM_TO_MEM_0_NEG_C 0x00000004 +#define CP_MEM_TO_MEM_0_DOUBLE 0x20000000 + +#define REG_CP_COND_WRITE_0 0x00000000 +#define CP_COND_WRITE_0_FUNCTION__MASK 0x00000007 +#define CP_COND_WRITE_0_FUNCTION__SHIFT 0 +static inline uint32_t CP_COND_WRITE_0_FUNCTION(enum cp_cond_function val) +{ + return ((val) << CP_COND_WRITE_0_FUNCTION__SHIFT) & CP_COND_WRITE_0_FUNCTION__MASK; +} +#define CP_COND_WRITE_0_POLL_MEMORY 0x00000010 +#define CP_COND_WRITE_0_WRITE_MEMORY 0x00000100 + +#define REG_CP_COND_WRITE_1 0x00000001 +#define CP_COND_WRITE_1_POLL_ADDR__MASK 0xffffffff +#define CP_COND_WRITE_1_POLL_ADDR__SHIFT 0 +static inline uint32_t CP_COND_WRITE_1_POLL_ADDR(uint32_t val) +{ + return ((val) << CP_COND_WRITE_1_POLL_ADDR__SHIFT) & CP_COND_WRITE_1_POLL_ADDR__MASK; +} + +#define REG_CP_COND_WRITE_2 0x00000002 +#define CP_COND_WRITE_2_REF__MASK 0xffffffff +#define CP_COND_WRITE_2_REF__SHIFT 0 +static inline uint32_t CP_COND_WRITE_2_REF(uint32_t val) +{ + return ((val) << CP_COND_WRITE_2_REF__SHIFT) & CP_COND_WRITE_2_REF__MASK; +} + +#define REG_CP_COND_WRITE_3 0x00000003 +#define CP_COND_WRITE_3_MASK__MASK 0xffffffff +#define CP_COND_WRITE_3_MASK__SHIFT 0 +static inline uint32_t CP_COND_WRITE_3_MASK(uint32_t val) +{ + return ((val) << CP_COND_WRITE_3_MASK__SHIFT) & CP_COND_WRITE_3_MASK__MASK; +} + +#define REG_CP_COND_WRITE_4 0x00000004 +#define CP_COND_WRITE_4_WRITE_ADDR__MASK 0xffffffff +#define CP_COND_WRITE_4_WRITE_ADDR__SHIFT 0 +static inline uint32_t CP_COND_WRITE_4_WRITE_ADDR(uint32_t val) +{ + return ((val) << CP_COND_WRITE_4_WRITE_ADDR__SHIFT) & CP_COND_WRITE_4_WRITE_ADDR__MASK; +} + +#define REG_CP_COND_WRITE_5 0x00000005 +#define CP_COND_WRITE_5_WRITE_DATA__MASK 0xffffffff +#define CP_COND_WRITE_5_WRITE_DATA__SHIFT 0 +static inline uint32_t CP_COND_WRITE_5_WRITE_DATA(uint32_t val) +{ + return ((val) << CP_COND_WRITE_5_WRITE_DATA__SHIFT) & CP_COND_WRITE_5_WRITE_DATA__MASK; +} + +#define REG_CP_COND_WRITE5_0 0x00000000 +#define CP_COND_WRITE5_0_FUNCTION__MASK 0x00000007 +#define CP_COND_WRITE5_0_FUNCTION__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_0_FUNCTION(enum cp_cond_function val) +{ + return ((val) << CP_COND_WRITE5_0_FUNCTION__SHIFT) & CP_COND_WRITE5_0_FUNCTION__MASK; +} +#define CP_COND_WRITE5_0_POLL_MEMORY 0x00000010 +#define CP_COND_WRITE5_0_WRITE_MEMORY 0x00000100 + +#define REG_CP_COND_WRITE5_1 0x00000001 +#define CP_COND_WRITE5_1_POLL_ADDR_LO__MASK 0xffffffff +#define CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_1_POLL_ADDR_LO(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT) & CP_COND_WRITE5_1_POLL_ADDR_LO__MASK; +} + +#define REG_CP_COND_WRITE5_2 0x00000002 +#define CP_COND_WRITE5_2_POLL_ADDR_HI__MASK 0xffffffff +#define CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_2_POLL_ADDR_HI(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT) & CP_COND_WRITE5_2_POLL_ADDR_HI__MASK; +} + +#define REG_CP_COND_WRITE5_3 0x00000003 +#define CP_COND_WRITE5_3_REF__MASK 0xffffffff +#define CP_COND_WRITE5_3_REF__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_3_REF(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_3_REF__SHIFT) & CP_COND_WRITE5_3_REF__MASK; +} + +#define REG_CP_COND_WRITE5_4 0x00000004 +#define CP_COND_WRITE5_4_MASK__MASK 0xffffffff +#define CP_COND_WRITE5_4_MASK__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_4_MASK(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_4_MASK__SHIFT) & CP_COND_WRITE5_4_MASK__MASK; +} + +#define REG_CP_COND_WRITE5_5 0x00000005 +#define CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK 0xffffffff +#define CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_5_WRITE_ADDR_LO(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT) & CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK; +} + +#define REG_CP_COND_WRITE5_6 0x00000006 +#define CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK 0xffffffff +#define CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_6_WRITE_ADDR_HI(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT) & CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK; +} + +#define REG_CP_COND_WRITE5_7 0x00000007 +#define CP_COND_WRITE5_7_WRITE_DATA__MASK 0xffffffff +#define CP_COND_WRITE5_7_WRITE_DATA__SHIFT 0 +static inline uint32_t CP_COND_WRITE5_7_WRITE_DATA(uint32_t val) +{ + return ((val) << CP_COND_WRITE5_7_WRITE_DATA__SHIFT) & CP_COND_WRITE5_7_WRITE_DATA__MASK; +} + +#define REG_CP_DISPATCH_COMPUTE_0 0x00000000 + +#define REG_CP_DISPATCH_COMPUTE_1 0x00000001 +#define CP_DISPATCH_COMPUTE_1_X__MASK 0xffffffff +#define CP_DISPATCH_COMPUTE_1_X__SHIFT 0 +static inline uint32_t CP_DISPATCH_COMPUTE_1_X(uint32_t val) +{ + return ((val) << CP_DISPATCH_COMPUTE_1_X__SHIFT) & CP_DISPATCH_COMPUTE_1_X__MASK; +} + +#define REG_CP_DISPATCH_COMPUTE_2 0x00000002 +#define CP_DISPATCH_COMPUTE_2_Y__MASK 0xffffffff +#define CP_DISPATCH_COMPUTE_2_Y__SHIFT 0 +static inline uint32_t CP_DISPATCH_COMPUTE_2_Y(uint32_t val) +{ + return ((val) << CP_DISPATCH_COMPUTE_2_Y__SHIFT) & CP_DISPATCH_COMPUTE_2_Y__MASK; +} + +#define REG_CP_DISPATCH_COMPUTE_3 0x00000003 +#define CP_DISPATCH_COMPUTE_3_Z__MASK 0xffffffff +#define CP_DISPATCH_COMPUTE_3_Z__SHIFT 0 +static inline uint32_t CP_DISPATCH_COMPUTE_3_Z(uint32_t val) +{ + return ((val) << CP_DISPATCH_COMPUTE_3_Z__SHIFT) & CP_DISPATCH_COMPUTE_3_Z__MASK; +} + +#define REG_CP_SET_RENDER_MODE_0 0x00000000 +#define CP_SET_RENDER_MODE_0_MODE__MASK 0x000001ff +#define CP_SET_RENDER_MODE_0_MODE__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_0_MODE(enum render_mode_cmd val) +{ + return ((val) << CP_SET_RENDER_MODE_0_MODE__SHIFT) & CP_SET_RENDER_MODE_0_MODE__MASK; +} + +#define REG_CP_SET_RENDER_MODE_1 0x00000001 +#define CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK 0xffffffff +#define CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_1_ADDR_0_LO(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT) & CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK; +} + +#define REG_CP_SET_RENDER_MODE_2 0x00000002 +#define CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK 0xffffffff +#define CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_2_ADDR_0_HI(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT) & CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK; +} + +#define REG_CP_SET_RENDER_MODE_3 0x00000003 +#define CP_SET_RENDER_MODE_3_VSC_ENABLE 0x00000008 +#define CP_SET_RENDER_MODE_3_GMEM_ENABLE 0x00000010 + +#define REG_CP_SET_RENDER_MODE_4 0x00000004 + +#define REG_CP_SET_RENDER_MODE_5 0x00000005 +#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK 0xffffffff +#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_5_ADDR_1_LEN(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT) & CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK; +} + +#define REG_CP_SET_RENDER_MODE_6 0x00000006 +#define CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK 0xffffffff +#define CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_6_ADDR_1_LO(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT) & CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK; +} + +#define REG_CP_SET_RENDER_MODE_7 0x00000007 +#define CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK 0xffffffff +#define CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT 0 +static inline uint32_t CP_SET_RENDER_MODE_7_ADDR_1_HI(uint32_t val) +{ + return ((val) << CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT) & CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_0 0x00000000 +#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_1 0x00000001 +#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_2 0x00000002 + +#define REG_CP_COMPUTE_CHECKPOINT_3 0x00000003 +#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT) & CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_4 0x00000004 + +#define REG_CP_COMPUTE_CHECKPOINT_5 0x00000005 +#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_6 0x00000006 +#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK 0xffffffff +#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT 0 +static inline uint32_t CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI(uint32_t val) +{ + return ((val) << CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK; +} + +#define REG_CP_COMPUTE_CHECKPOINT_7 0x00000007 + +#define REG_CP_PERFCOUNTER_ACTION_0 0x00000000 + +#define REG_CP_PERFCOUNTER_ACTION_1 0x00000001 +#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK 0xffffffff +#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT 0 +static inline uint32_t CP_PERFCOUNTER_ACTION_1_ADDR_0_LO(uint32_t val) +{ + return ((val) << CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT) & CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK; +} + +#define REG_CP_PERFCOUNTER_ACTION_2 0x00000002 +#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK 0xffffffff +#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT 0 +static inline uint32_t CP_PERFCOUNTER_ACTION_2_ADDR_0_HI(uint32_t val) +{ + return ((val) << CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT) & CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK; +} + +#define REG_CP_EVENT_WRITE_0 0x00000000 +#define CP_EVENT_WRITE_0_EVENT__MASK 0x000000ff +#define CP_EVENT_WRITE_0_EVENT__SHIFT 0 +static inline uint32_t CP_EVENT_WRITE_0_EVENT(enum vgt_event_type val) +{ + return ((val) << CP_EVENT_WRITE_0_EVENT__SHIFT) & CP_EVENT_WRITE_0_EVENT__MASK; +} +#define CP_EVENT_WRITE_0_TIMESTAMP 0x40000000 + +#define REG_CP_EVENT_WRITE_1 0x00000001 +#define CP_EVENT_WRITE_1_ADDR_0_LO__MASK 0xffffffff +#define CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT 0 +static inline uint32_t CP_EVENT_WRITE_1_ADDR_0_LO(uint32_t val) +{ + return ((val) << CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT) & CP_EVENT_WRITE_1_ADDR_0_LO__MASK; +} + +#define REG_CP_EVENT_WRITE_2 0x00000002 +#define CP_EVENT_WRITE_2_ADDR_0_HI__MASK 0xffffffff +#define CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT 0 +static inline uint32_t CP_EVENT_WRITE_2_ADDR_0_HI(uint32_t val) +{ + return ((val) << CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT) & CP_EVENT_WRITE_2_ADDR_0_HI__MASK; +} + +#define REG_CP_EVENT_WRITE_3 0x00000003 + +#define REG_CP_BLIT_0 0x00000000 +#define CP_BLIT_0_OP__MASK 0x0000000f +#define CP_BLIT_0_OP__SHIFT 0 +static inline uint32_t CP_BLIT_0_OP(enum cp_blit_cmd val) +{ + return ((val) << CP_BLIT_0_OP__SHIFT) & CP_BLIT_0_OP__MASK; +} + +#define REG_CP_BLIT_1 0x00000001 +#define CP_BLIT_1_SRC_X1__MASK 0x00003fff +#define CP_BLIT_1_SRC_X1__SHIFT 0 +static inline uint32_t CP_BLIT_1_SRC_X1(uint32_t val) +{ + return ((val) << CP_BLIT_1_SRC_X1__SHIFT) & CP_BLIT_1_SRC_X1__MASK; +} +#define CP_BLIT_1_SRC_Y1__MASK 0x3fff0000 +#define CP_BLIT_1_SRC_Y1__SHIFT 16 +static inline uint32_t CP_BLIT_1_SRC_Y1(uint32_t val) +{ + return ((val) << CP_BLIT_1_SRC_Y1__SHIFT) & CP_BLIT_1_SRC_Y1__MASK; +} + +#define REG_CP_BLIT_2 0x00000002 +#define CP_BLIT_2_SRC_X2__MASK 0x00003fff +#define CP_BLIT_2_SRC_X2__SHIFT 0 +static inline uint32_t CP_BLIT_2_SRC_X2(uint32_t val) +{ + return ((val) << CP_BLIT_2_SRC_X2__SHIFT) & CP_BLIT_2_SRC_X2__MASK; +} +#define CP_BLIT_2_SRC_Y2__MASK 0x3fff0000 +#define CP_BLIT_2_SRC_Y2__SHIFT 16 +static inline uint32_t CP_BLIT_2_SRC_Y2(uint32_t val) +{ + return ((val) << CP_BLIT_2_SRC_Y2__SHIFT) & CP_BLIT_2_SRC_Y2__MASK; +} + +#define REG_CP_BLIT_3 0x00000003 +#define CP_BLIT_3_DST_X1__MASK 0x00003fff +#define CP_BLIT_3_DST_X1__SHIFT 0 +static inline uint32_t CP_BLIT_3_DST_X1(uint32_t val) +{ + return ((val) << CP_BLIT_3_DST_X1__SHIFT) & CP_BLIT_3_DST_X1__MASK; +} +#define CP_BLIT_3_DST_Y1__MASK 0x3fff0000 +#define CP_BLIT_3_DST_Y1__SHIFT 16 +static inline uint32_t CP_BLIT_3_DST_Y1(uint32_t val) +{ + return ((val) << CP_BLIT_3_DST_Y1__SHIFT) & CP_BLIT_3_DST_Y1__MASK; +} + +#define REG_CP_BLIT_4 0x00000004 +#define CP_BLIT_4_DST_X2__MASK 0x00003fff +#define CP_BLIT_4_DST_X2__SHIFT 0 +static inline uint32_t CP_BLIT_4_DST_X2(uint32_t val) +{ + return ((val) << CP_BLIT_4_DST_X2__SHIFT) & CP_BLIT_4_DST_X2__MASK; +} +#define CP_BLIT_4_DST_Y2__MASK 0x3fff0000 +#define CP_BLIT_4_DST_Y2__SHIFT 16 +static inline uint32_t CP_BLIT_4_DST_Y2(uint32_t val) +{ + return ((val) << CP_BLIT_4_DST_Y2__SHIFT) & CP_BLIT_4_DST_Y2__MASK; +} + +#define REG_CP_EXEC_CS_0 0x00000000 + +#define REG_CP_EXEC_CS_1 0x00000001 +#define CP_EXEC_CS_1_NGROUPS_X__MASK 0xffffffff +#define CP_EXEC_CS_1_NGROUPS_X__SHIFT 0 +static inline uint32_t CP_EXEC_CS_1_NGROUPS_X(uint32_t val) +{ + return ((val) << CP_EXEC_CS_1_NGROUPS_X__SHIFT) & CP_EXEC_CS_1_NGROUPS_X__MASK; +} + +#define REG_CP_EXEC_CS_2 0x00000002 +#define CP_EXEC_CS_2_NGROUPS_Y__MASK 0xffffffff +#define CP_EXEC_CS_2_NGROUPS_Y__SHIFT 0 +static inline uint32_t CP_EXEC_CS_2_NGROUPS_Y(uint32_t val) +{ + return ((val) << CP_EXEC_CS_2_NGROUPS_Y__SHIFT) & CP_EXEC_CS_2_NGROUPS_Y__MASK; +} + +#define REG_CP_EXEC_CS_3 0x00000003 +#define CP_EXEC_CS_3_NGROUPS_Z__MASK 0xffffffff +#define CP_EXEC_CS_3_NGROUPS_Z__SHIFT 0 +static inline uint32_t CP_EXEC_CS_3_NGROUPS_Z(uint32_t val) +{ + return ((val) << CP_EXEC_CS_3_NGROUPS_Z__SHIFT) & CP_EXEC_CS_3_NGROUPS_Z__MASK; +} + +#define REG_A4XX_CP_EXEC_CS_INDIRECT_0 0x00000000 + + +#define REG_A4XX_CP_EXEC_CS_INDIRECT_1 0x00000001 +#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK 0xffffffff +#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT 0 +static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_1_ADDR(uint32_t val) +{ + return ((val) << A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK; +} + +#define REG_A4XX_CP_EXEC_CS_INDIRECT_2 0x00000002 +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK 0x00000ffc +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT 2 +static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX(uint32_t val) +{ + return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK; +} +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK 0x003ff000 +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT 12 +static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY(uint32_t val) +{ + return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK; +} +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK 0xffc00000 +#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK; +} + + +#define REG_A5XX_CP_EXEC_CS_INDIRECT_1 0x00000001 +#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK 0xffffffff +#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT 0 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK; +} + +#define REG_A5XX_CP_EXEC_CS_INDIRECT_2 0x00000002 +#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK 0xffffffff +#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT 0 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK; +} + +#define REG_A5XX_CP_EXEC_CS_INDIRECT_3 0x00000003 +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK 0x00000ffc +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT 2 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK; +} +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK 0x003ff000 +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT 12 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK; +} +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK 0xffc00000 +#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT 22 +static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(uint32_t val) +{ + return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK; +} + +#define REG_A2XX_CP_SET_MARKER_0 0x00000000 +#define A2XX_CP_SET_MARKER_0_MARKER__MASK 0x0000000f +#define A2XX_CP_SET_MARKER_0_MARKER__SHIFT 0 +static inline uint32_t A2XX_CP_SET_MARKER_0_MARKER(uint32_t val) +{ + return ((val) << A2XX_CP_SET_MARKER_0_MARKER__SHIFT) & A2XX_CP_SET_MARKER_0_MARKER__MASK; +} +#define A2XX_CP_SET_MARKER_0_MODE__MASK 0x0000000f +#define A2XX_CP_SET_MARKER_0_MODE__SHIFT 0 +static inline uint32_t A2XX_CP_SET_MARKER_0_MODE(enum a6xx_render_mode val) +{ + return ((val) << A2XX_CP_SET_MARKER_0_MODE__SHIFT) & A2XX_CP_SET_MARKER_0_MODE__MASK; +} +#define A2XX_CP_SET_MARKER_0_IFPC 0x00000100 + +static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG_(uint32_t i0) { return 0x00000000 + 0x3*i0; } + +static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__0(uint32_t i0) { return 0x00000000 + 0x3*i0; } +#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK 0x00000007 +#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT 0 +static inline uint32_t A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG(enum pseudo_reg val) +{ + return ((val) << A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT) & A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK; +} + +static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__1(uint32_t i0) { return 0x00000001 + 0x3*i0; } +#define A2XX_CP_SET_PSEUDO_REG__1_LO__MASK 0xffffffff +#define A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT 0 +static inline uint32_t A2XX_CP_SET_PSEUDO_REG__1_LO(uint32_t val) +{ + return ((val) << A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT) & A2XX_CP_SET_PSEUDO_REG__1_LO__MASK; +} + +static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__2(uint32_t i0) { return 0x00000002 + 0x3*i0; } +#define A2XX_CP_SET_PSEUDO_REG__2_HI__MASK 0xffffffff +#define A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT 0 +static inline uint32_t A2XX_CP_SET_PSEUDO_REG__2_HI(uint32_t val) +{ + return ((val) << A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT) & A2XX_CP_SET_PSEUDO_REG__2_HI__MASK; +} + +#define REG_A2XX_CP_REG_TEST_0 0x00000000 +#define A2XX_CP_REG_TEST_0_REG__MASK 0x00000fff +#define A2XX_CP_REG_TEST_0_REG__SHIFT 0 +static inline uint32_t A2XX_CP_REG_TEST_0_REG(uint32_t val) +{ + return ((val) << A2XX_CP_REG_TEST_0_REG__SHIFT) & A2XX_CP_REG_TEST_0_REG__MASK; +} +#define A2XX_CP_REG_TEST_0_BIT__MASK 0x01f00000 +#define A2XX_CP_REG_TEST_0_BIT__SHIFT 20 +static inline uint32_t A2XX_CP_REG_TEST_0_BIT(uint32_t val) +{ + return ((val) << A2XX_CP_REG_TEST_0_BIT__SHIFT) & A2XX_CP_REG_TEST_0_BIT__MASK; +} +#define A2XX_CP_REG_TEST_0_UNK25 0x02000000 + + +#endif /* ADRENO_PM4_XML */ diff -Nru mesa-18.3.3/src/gallium/Android.mk mesa-19.0.1/src/gallium/Android.mk --- mesa-18.3.3/src/gallium/Android.mk 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/Android.mk 2019-03-31 23:16:37.000000000 +0000 @@ -38,7 +38,7 @@ SUBDIRS += winsys/freedreno/drm drivers/freedreno SUBDIRS += winsys/i915/drm drivers/i915 SUBDIRS += winsys/nouveau/drm drivers/nouveau -SUBDIRS += winsys/pl111/drm drivers/pl111 +SUBDIRS += winsys/kmsro/drm drivers/kmsro SUBDIRS += winsys/radeon/drm drivers/r300 SUBDIRS += winsys/radeon/drm drivers/r600 SUBDIRS += winsys/radeon/drm winsys/amdgpu/drm drivers/radeonsi @@ -46,7 +46,6 @@ SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl SUBDIRS += winsys/svga/drm drivers/svga SUBDIRS += winsys/etnaviv/drm drivers/etnaviv drivers/renderonly -SUBDIRS += winsys/imx/drm SUBDIRS += state_trackers/dri # sort to eliminate any duplicates diff -Nru mesa-18.3.3/src/gallium/auxiliary/cso_cache/cso_context.c mesa-19.0.1/src/gallium/auxiliary/cso_cache/cso_context.c --- mesa-18.3.3/src/gallium/auxiliary/cso_cache/cso_context.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/cso_cache/cso_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -329,7 +329,8 @@ int supported_irs = pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE, PIPE_SHADER_CAP_SUPPORTED_IRS); - if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) { + if (supported_irs & ((1 << PIPE_SHADER_IR_TGSI) | + (1 << PIPE_SHADER_IR_NIR))) { ctx->has_compute_shader = TRUE; } } diff -Nru mesa-18.3.3/src/gallium/auxiliary/draw/draw_pipe_stipple.c mesa-19.0.1/src/gallium/auxiliary/draw/draw_pipe_stipple.c --- mesa-18.3.3/src/gallium/auxiliary/draw/draw_pipe_stipple.c 2018-03-13 20:41:43.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/draw/draw_pipe_stipple.c 2019-03-31 23:16:37.000000000 +0000 @@ -48,8 +48,8 @@ struct stipple_stage { struct draw_stage stage; float counter; - uint pattern; - uint factor; + ushort pattern; + ushort factor; bool smooth; }; @@ -110,7 +110,7 @@ static inline bool -stipple_test(int counter, ushort pattern, int factor) +stipple_test(int counter, ushort pattern, ushort factor) { int b = (counter / factor) & 0xf; return !!((1 << b) & pattern); @@ -136,6 +136,10 @@ float length; int i; + int intlength; + + if (header->flags & DRAW_PIPE_RESET_STIPPLE) + stipple->counter = 0; if (stipple->smooth) { float dx = x1 - x0; @@ -147,21 +151,21 @@ length = MAX2(dx, dy); } - if (header->flags & DRAW_PIPE_RESET_STIPPLE) - stipple->counter = 0; + if (util_is_inf_or_nan(length)) + intlength = 0; + else + intlength = ceilf(length); /* XXX ToDo: instead of iterating pixel-by-pixel, use a look-up table. */ - for (i = 0; i < length; i++) { + for (i = 0; i < intlength; i++) { bool result = stipple_test((int)stipple->counter + i, - (ushort)stipple->pattern, stipple->factor); + stipple->pattern, stipple->factor); if (result != state) { /* changing from "off" to "on" or vice versa */ if (state) { - if (start != i) { - /* finishing an "on" segment */ - emit_segment(stage, header, start / length, i / length); - } + /* finishing an "on" segment */ + emit_segment(stage, header, start / length, i / length); } else { /* starting an "on" segment */ diff -Nru mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_context.c mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_context.c --- mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -596,7 +596,6 @@ cnd_destroy(&dctx->cond); assert(list_empty(&dctx->records)); - assert(!dctx->record_pending); if (pipe->set_log_context) { pipe->set_log_context(pipe, NULL); diff -Nru mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_draw.c mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_draw.c --- mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_draw.c 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -988,10 +988,8 @@ encountered_hang = true; } - if (num_later || dctx->record_pending) { - fprintf(stderr, "... and %u%s additional draws.\n", num_later, - dctx->record_pending ? "+1 (pending)" : ""); - } + if (num_later) + fprintf(stderr, "... and %u additional draws.\n", num_later); fprintf(stderr, "\nDone.\n"); dd_kill_process(); @@ -1008,9 +1006,6 @@ for (;;) { struct list_head records; - struct pipe_fence_handle *fence; - struct pipe_fence_handle *fence2 = NULL; - list_replace(&dctx->records, &records); list_inithead(&dctx->records); dctx->num_records = 0; @@ -1018,36 +1013,36 @@ if (dctx->api_stalled) cnd_signal(&dctx->cond); - if (!list_empty(&records)) { - /* Wait for the youngest draw. This means hangs can take a bit longer - * to detect, but it's more efficient this way. */ - struct dd_draw_record *youngest = - LIST_ENTRY(struct dd_draw_record, records.prev, list); - fence = youngest->bottom_of_pipe; - } else if (dctx->record_pending) { - /* Wait for pending fences, in case the driver ends up hanging internally. */ - fence = dctx->record_pending->prev_bottom_of_pipe; - fence2 = dctx->record_pending->top_of_pipe; - } else if (dctx->kill_thread) { - break; - } else { + if (list_empty(&records)) { + if (dctx->kill_thread) + break; + cnd_wait(&dctx->cond, &dctx->mutex); continue; } + mtx_unlock(&dctx->mutex); - /* Fences can be NULL legitimately when timeout detection is disabled. */ - if ((fence && - !screen->fence_finish(screen, NULL, fence, - (uint64_t)dscreen->timeout_ms * 1000*1000)) || - (fence2 && - !screen->fence_finish(screen, NULL, fence2, - (uint64_t)dscreen->timeout_ms * 1000*1000))) { - mtx_lock(&dctx->mutex); - list_splice(&records, &dctx->records); - dd_report_hang(dctx); - /* we won't actually get here */ - mtx_unlock(&dctx->mutex); + /* Wait for the youngest draw. This means hangs can take a bit longer + * to detect, but it's more efficient this way. */ + struct dd_draw_record *youngest = + list_last_entry(&records, struct dd_draw_record, list); + + if (dscreen->timeout_ms > 0) { + uint64_t abs_timeout = os_time_get_absolute_timeout( + (uint64_t)dscreen->timeout_ms * 1000*1000); + + if (!util_queue_fence_wait_timeout(&youngest->driver_finished, abs_timeout) || + !screen->fence_finish(screen, NULL, youngest->bottom_of_pipe, + (uint64_t)dscreen->timeout_ms * 1000*1000)) { + mtx_lock(&dctx->mutex); + list_splice(&records, &dctx->records); + dd_report_hang(dctx); + /* we won't actually get here */ + mtx_unlock(&dctx->mutex); + } + } else { + util_queue_fence_wait(&youngest->driver_finished); } list_for_each_entry_safe(struct dd_draw_record, record, &records, list) { @@ -1079,6 +1074,7 @@ record->bottom_of_pipe = NULL; record->log_page = NULL; util_queue_fence_init(&record->driver_finished); + util_queue_fence_reset(&record->driver_finished); dd_init_copy_of_draw_state(&record->draw_state); dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state); @@ -1115,13 +1111,25 @@ pipe->flush(pipe, &record->top_of_pipe, PIPE_FLUSH_DEFERRED | PIPE_FLUSH_TOP_OF_PIPE); } + } else if (dscreen->flush_always && dctx->num_draw_calls >= dscreen->skip_count) { + pipe->flush(pipe, NULL, 0); + } - mtx_lock(&dctx->mutex); - dctx->record_pending = record; - if (list_empty(&dctx->records)) - cnd_signal(&dctx->cond); - mtx_unlock(&dctx->mutex); + mtx_lock(&dctx->mutex); + if (unlikely(dctx->num_records > 10000)) { + dctx->api_stalled = true; + /* Since this is only a heuristic to prevent the API thread from getting + * too far ahead, we don't need a loop here. */ + cnd_wait(&dctx->cond, &dctx->mutex); + dctx->api_stalled = false; } + + if (list_empty(&dctx->records)) + cnd_signal(&dctx->cond); + + list_addtail(&record->list, &dctx->records); + dctx->num_records++; + mtx_unlock(&dctx->mutex); } static void @@ -1134,8 +1142,7 @@ record->log_page = u_log_new_page(&dctx->log); record->time_after = os_time_get_nano(); - if (!util_queue_fence_is_signalled(&record->driver_finished)) - util_queue_fence_signal(&record->driver_finished); + util_queue_fence_signal(&record->driver_finished); if (dscreen->dump_mode == DD_DUMP_APITRACE_CALL && dscreen->apitrace_dump_call > dctx->draw_state.apitrace_call_number) { @@ -1158,34 +1165,14 @@ else flush_flags = PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE; pipe->flush(pipe, &record->bottom_of_pipe, flush_flags); - - assert(record == dctx->record_pending); } if (pipe->callback) { - util_queue_fence_reset(&record->driver_finished); pipe->callback(pipe, dd_after_draw_async, record, true); } else { dd_after_draw_async(record); } - mtx_lock(&dctx->mutex); - if (unlikely(dctx->num_records > 10000)) { - dctx->api_stalled = true; - /* Since this is only a heuristic to prevent the API thread from getting - * too far ahead, we don't need a loop here. */ - cnd_wait(&dctx->cond, &dctx->mutex); - dctx->api_stalled = false; - } - - if (list_empty(&dctx->records)) - cnd_signal(&dctx->cond); - - list_addtail(&record->list, &dctx->records); - dctx->record_pending = NULL; - dctx->num_records++; - mtx_unlock(&dctx->mutex); - ++dctx->num_draw_calls; if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0) fprintf(stderr, "Gallium debugger reached %u draw calls.\n", diff -Nru mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_pipe.h mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_pipe.h --- mesa-18.3.3/src/gallium/auxiliary/driver_ddebug/dd_pipe.h 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/driver_ddebug/dd_pipe.h 2019-03-31 23:16:37.000000000 +0000 @@ -274,6 +274,7 @@ int64_t time_after; unsigned draw_call; + /* The fence pointers are guaranteed to be valid once driver_finished is signalled */ struct pipe_fence_handle *prev_bottom_of_pipe; struct pipe_fence_handle *top_of_pipe; struct pipe_fence_handle *bottom_of_pipe; @@ -297,24 +298,18 @@ /* Pipelined hang detection. * - * This is without unnecessary flushes and waits. There is a memory-based - * fence that is incremented by clear_buffer every draw call. Driver fences - * are not used. + * Before each draw call, a new dd_draw_record is created that contains + * a copy of all states. After each draw call, the driver's log is added + * to this record. Additionally, deferred fences are associated to each + * record both before and after the draw. * - * After each draw call, a new dd_draw_record is created that contains - * a copy of all states, the output of pipe_context::dump_debug_state, - * and it has a fence number assigned. That's done without knowing whether - * that draw call is problematic or not. The record is added into the list - * of all records. - * - * An independent, separate thread loops over the list of records and checks - * their fences. Records with signalled fences are freed. On fence timeout, - * the thread dumps the records of in-flight draws. + * The records are handed off to a separate thread which waits on the + * records' fences. Records with signalled fences are freed. When a timeout + * is detected, the thread dumps the records of in-flight draws. */ thrd_t thread; mtx_t mutex; cnd_t cond; - struct dd_draw_record *record_pending; /* currently inside the driver */ struct list_head records; /* oldest record first */ unsigned num_records; bool kill_thread; diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_arit.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_arit.c --- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_arit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_arit.c 2019-03-31 23:16:37.000000000 +0000 @@ -1992,6 +1992,8 @@ else if ((util_cpu_caps.has_altivec && (type.width == 32 && type.length == 4))) return TRUE; + else if (util_cpu_caps.has_neon) + return TRUE; return FALSE; } @@ -2099,7 +2101,7 @@ LLVMValueRef a, enum lp_build_round_mode mode) { - if (util_cpu_caps.has_sse4_1) { + if (util_cpu_caps.has_sse4_1 || util_cpu_caps.has_neon) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const char *intrinsic_root; @@ -2477,7 +2479,7 @@ else { LLVMValueRef half; - half = lp_build_const_vec(bld->gallivm, type, 0.5); + half = lp_build_const_vec(bld->gallivm, type, nextafterf(0.5, 0.0)); if (type.sign) { LLVMTypeRef vec_type = bld->vec_type; diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c --- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c 2019-03-31 23:16:37.000000000 +0000 @@ -464,6 +464,7 @@ * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). + * \param cache optional value pointing to a lp_build_format_cache structure * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef @@ -728,7 +729,7 @@ * s3tc rgb formats */ - if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && cache) { + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { struct lp_type tmp_type; LLVMValueRef tmp; @@ -737,7 +738,7 @@ tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; - tmp = lp_build_fetch_cached_texels(gallivm, + tmp = lp_build_fetch_s3tc_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c --- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,374 +0,0 @@ -/************************************************************************** - * - * Copyright 2015 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "lp_bld_format.h" -#include "lp_bld_type.h" -#include "lp_bld_struct.h" -#include "lp_bld_const.h" -#include "lp_bld_flow.h" -#include "lp_bld_swizzle.h" - -#include "util/u_math.h" - - -/** - * @file - * Complex block-compression based formats are handled here by using a cache, - * so re-decoding of every pixel is not required. - * Especially for bilinear filtering, texel reuse is very high hence even - * a small cache helps. - * The elements in the cache are the decoded blocks - currently things - * are restricted to formats which are 4x4 block based, and the decoded - * texels must fit into 4x8 bits. - * The cache is direct mapped so hitrates aren't all that great and cache - * thrashing could happen. - * - * @author Roland Scheidegger - */ - - -#if LP_BUILD_FORMAT_CACHE_DEBUG -static void -update_cache_access(struct gallivm_state *gallivm, - LLVMValueRef ptr, - unsigned count, - unsigned index) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef member_ptr, cache_access; - - assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL || - index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); - - member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, ""); - cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access"); - cache_access = LLVMBuildAdd(builder, cache_access, - LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), - count, 0), ""); - LLVMBuildStore(builder, cache_access, member_ptr); -} -#endif - - -static void -store_cached_block(struct gallivm_state *gallivm, - LLVMValueRef *col, - LLVMValueRef tag_value, - LLVMValueRef hash_index, - LLVMValueRef cache) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef ptr, indices[3]; - LLVMTypeRef type_ptr4x32; - unsigned count; - - type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); - indices[0] = lp_build_const_int32(gallivm, 0); - indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); - indices[2] = hash_index; - ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); - LLVMBuildStore(builder, tag_value, ptr); - - indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); - hash_index = LLVMBuildMul(builder, hash_index, - lp_build_const_int32(gallivm, 16), ""); - for (count = 0; count < 4; count++) { - indices[2] = hash_index; - ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); - ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, ""); - LLVMBuildStore(builder, col[count], ptr); - hash_index = LLVMBuildAdd(builder, hash_index, - lp_build_const_int32(gallivm, 4), ""); - } -} - - -static LLVMValueRef -lookup_cached_pixel(struct gallivm_state *gallivm, - LLVMValueRef ptr, - LLVMValueRef index) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef member_ptr, indices[3]; - - indices[0] = lp_build_const_int32(gallivm, 0); - indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); - indices[2] = index; - member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), ""); - return LLVMBuildLoad(builder, member_ptr, "cache_data"); -} - - -static LLVMValueRef -lookup_tag_data(struct gallivm_state *gallivm, - LLVMValueRef ptr, - LLVMValueRef index) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef member_ptr, indices[3]; - - indices[0] = lp_build_const_int32(gallivm, 0); - indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); - indices[2] = index; - member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), ""); - return LLVMBuildLoad(builder, member_ptr, "tag_data"); -} - - -static void -update_cached_block(struct gallivm_state *gallivm, - const struct util_format_description *format_desc, - LLVMValueRef ptr_addr, - LLVMValueRef hash_index, - LLVMValueRef cache) - -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); - LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); - LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); - LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4); - LLVMValueRef function; - LLVMValueRef tag_value, tmp_ptr; - LLVMValueRef col[4]; - unsigned i, j; - - /* - * Use format_desc->fetch_rgba_8unorm() for each pixel in the block. - * This doesn't actually make any sense whatsoever, someone would need - * to write a function doing this for all pixels in a block (either as - * an external c function or with generated code). Don't ask. - */ - - { - /* - * Function to call looks like: - * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) - */ - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[4]; - LLVMTypeRef function_type; - - assert(format_desc->fetch_rgba_8unorm); - - ret_type = LLVMVoidTypeInContext(gallivm->context); - arg_types[0] = pi8t; - arg_types[1] = pi8t; - arg_types[2] = i32t; - arg_types[3] = i32t; - function_type = LLVMFunctionType(ret_type, arg_types, - ARRAY_SIZE(arg_types), 0); - - /* make const pointer for the C fetch_rgba_8unorm function */ - function = lp_build_const_int_pointer(gallivm, - func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); - - /* cast the callee pointer to the function's type */ - function = LLVMBuildBitCast(builder, function, - LLVMPointerType(function_type, 0), - "cast callee"); - } - - tmp_ptr = lp_build_array_alloca(gallivm, i32x4, - lp_build_const_int32(gallivm, 16), - "tmp_decode_store"); - tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); - - /* - * Invoke format_desc->fetch_rgba_8unorm() for each pixel. - * This is going to be really really slow. - * Note: the block store format is actually - * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ... - */ - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - LLVMValueRef args[4]; - LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4); - - /* - * Note we actually supply a pointer to the start of the block, - * not the start of the texture. - */ - args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, ""); - args[1] = ptr_addr; - args[2] = LLVMConstInt(i32t, i, 0); - args[3] = LLVMConstInt(i32t, j, 0); - LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), ""); - } - } - - /* Finally store the block - pointless mem copy + update tag. */ - tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), ""); - for (i = 0; i < 4; ++i) { - LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i); - LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, ""); - col[i] = LLVMBuildLoad(builder, ptr, ""); - } - - tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr, - LLVMInt64TypeInContext(gallivm->context), ""); - store_cached_block(gallivm, col, tag_value, hash_index, cache); -} - - -/* - * Do a cached lookup. - * - * Returns (vectors of) 4x8 rgba aos value - */ -LLVMValueRef -lp_build_fetch_cached_texels(struct gallivm_state *gallivm, - const struct util_format_description *format_desc, - unsigned n, - LLVMValueRef base_ptr, - LLVMValueRef offset, - LLVMValueRef i, - LLVMValueRef j, - LLVMValueRef cache) - -{ - LLVMBuilderRef builder = gallivm->builder; - unsigned count, low_bit, log2size; - LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp; - LLVMValueRef ij_index, hash_index, hash_mask, block_index; - LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); - LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); - LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); - struct lp_type type; - struct lp_build_context bld32; - memset(&type, 0, sizeof type); - type.width = 32; - type.length = n; - - assert(format_desc->block.width == 4); - assert(format_desc->block.height == 4); - - lp_build_context_init(&bld32, gallivm, type); - - /* - * compute hash - we use direct mapped cache, the hash function could - * be better but it needs to be simple - * per-element: - * compare offset with offset stored at tag (hash) - * if not equal decode/store block, update tag - * extract color from cache - * assemble result vector - */ - - /* TODO: not ideal with 32bit pointers... */ - - low_bit = util_logbase2(format_desc->block.bits / 8); - log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE); - addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, ""); - ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, ""); - ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc); - /* For the hash function, first mask off the unused lowest bits. Then just - do some xor with address bits - only use lower 32bits */ - ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, ""); - ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, - lp_build_const_int_vec(gallivm, type, low_bit), ""); - /* This only really makes sense for size 64,128,256 */ - hash_index = ptr_addrtrunc; - ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, - lp_build_const_int_vec(gallivm, type, 2*log2size), ""); - hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, ""); - tmp = LLVMBuildLShr(builder, hash_index, - lp_build_const_int_vec(gallivm, type, log2size), ""); - hash_index = LLVMBuildXor(builder, hash_index, tmp, ""); - - hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1); - hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, ""); - ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), ""); - ij_index = LLVMBuildAdd(builder, ij_index, j, ""); - block_index = LLVMBuildShl(builder, hash_index, - lp_build_const_int_vec(gallivm, type, 4), ""); - block_index = LLVMBuildAdd(builder, ij_index, block_index, ""); - - if (n > 1) { - color = LLVMGetUndef(LLVMVectorType(i32t, n)); - for (count = 0; count < n; count++) { - LLVMValueRef index, cond, colorx; - LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx; - struct lp_build_if_state if_ctx; - - index = lp_build_const_int32(gallivm, count); - offsetx = LLVMBuildExtractElement(builder, offset, index, ""); - addrx = LLVMBuildZExt(builder, offsetx, i64t, ""); - addrx = LLVMBuildAdd(builder, addrx, addr, ""); - block_indexx = LLVMBuildExtractElement(builder, block_index, index, ""); - hash_indexx = LLVMBuildLShr(builder, block_indexx, - lp_build_const_int32(gallivm, 4), ""); - offset_stored = lookup_tag_data(gallivm, cache, hash_indexx); - cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, ""); - - lp_build_if(&if_ctx, gallivm, cond); - { - ptr_addrx = LLVMBuildIntToPtr(builder, addrx, - LLVMPointerType(i8t, 0), ""); - update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache); -#if LP_BUILD_FORMAT_CACHE_DEBUG - update_cache_access(gallivm, cache, 1, - LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); -#endif - } - lp_build_endif(&if_ctx); - - colorx = lookup_cached_pixel(gallivm, cache, block_indexx); - - color = LLVMBuildInsertElement(builder, color, colorx, - lp_build_const_int32(gallivm, count), ""); - } - } - else { - LLVMValueRef cond; - struct lp_build_if_state if_ctx; - - tmp = LLVMBuildZExt(builder, offset, i64t, ""); - addr = LLVMBuildAdd(builder, tmp, addr, ""); - offset_stored = lookup_tag_data(gallivm, cache, hash_index); - cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, ""); - - lp_build_if(&if_ctx, gallivm, cond); - { - tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), ""); - update_cached_block(gallivm, format_desc, tmp, hash_index, cache); -#if LP_BUILD_FORMAT_CACHE_DEBUG - update_cache_access(gallivm, cache, 1, - LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); -#endif - } - lp_build_endif(&if_ctx); - - color = lookup_cached_pixel(gallivm, cache, block_index); - } -#if LP_BUILD_FORMAT_CACHE_DEBUG - update_cache_access(gallivm, cache, n, - LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL); -#endif - return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), ""); -} - diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format.h mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format.h --- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format.h 2019-03-31 23:16:37.000000000 +0000 @@ -165,8 +165,12 @@ LLVMValueRef j); +/* + * S3TC + */ + LLVMValueRef -lp_build_fetch_cached_texels(struct gallivm_state *gallivm, +lp_build_fetch_s3tc_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, unsigned n, LLVMValueRef base_ptr, diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c --- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,2266 @@ +/************************************************************************** + * + * Copyright 2010-2018 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +/** + * @file + * s3tc pixel format manipulation. + * + * @author Roland Scheidegger + */ + + +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_string.h" +#include "util/u_cpu_detect.h" +#include "util/u_debug.h" + +#include "lp_bld_arit.h" +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_gather.h" +#include "lp_bld_format.h" +#include "lp_bld_logic.h" +#include "lp_bld_pack.h" +#include "lp_bld_flow.h" +#include "lp_bld_printf.h" +#include "lp_bld_struct.h" +#include "lp_bld_swizzle.h" +#include "lp_bld_init.h" +#include "lp_bld_debug.h" +#include "lp_bld_intr.h" + + +/** + * Reverse an interleave2_half + * (ie. pick every second element, independent lower/upper halfs) + * sse2 can only do that with 32bit (shufps) or larger elements + * natively. (Otherwise, and/pack (even) or shift/pack (odd) + * could be used, ideally llvm would do that for us.) + * XXX: Unfortunately, this does NOT translate to a shufps if those + * are int vectors (and casting will not help, llvm needs to recognize it + * as "real" float). Instead, llvm will use a pshufd/pshufd/punpcklqdq + * sequence which I'm pretty sure is a lot worse despite domain transition + * penalties with shufps (except maybe on Nehalem). + */ +static LLVMValueRef +lp_build_uninterleave2_half(struct gallivm_state *gallivm, + struct lp_type type, + LLVMValueRef a, + LLVMValueRef b, + unsigned lo_hi) +{ + LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH]; + unsigned i, j; + + assert(type.length <= LP_MAX_VECTOR_LENGTH); + assert(lo_hi < 2); + + if (type.length * type.width == 256) { + assert(type.length >= 4); + for (i = 0, j = 0; i < type.length; ++i) { + if (i == type.length / 4) { + j = type.length; + } else if (i == type.length / 2) { + j = type.length / 2; + } else if (i == 3 * type.length / 4) { + j = 3 * type.length / 4; + } else { + j += 2; + } + elems[i] = lp_build_const_int32(gallivm, j + lo_hi); + } + } else { + for (i = 0; i < type.length; ++i) { + elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi); + } + } + + shuffle = LLVMConstVector(elems, type.length); + + return LLVMBuildShuffleVector(gallivm->builder, a, b, shuffle, ""); + +} + + +/** + * Build shuffle for extending vectors. + */ +static LLVMValueRef +lp_build_const_extend_shuffle(struct gallivm_state *gallivm, + unsigned n, unsigned length) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(n <= length); + assert(length <= LP_MAX_VECTOR_LENGTH); + + /* TODO: cache results in a static table */ + + for(i = 0; i < n; i++) { + elems[i] = lp_build_const_int32(gallivm, i); + } + for (i = n; i < length; i++) { + elems[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); + } + + return LLVMConstVector(elems, length); +} + +static LLVMValueRef +lp_build_const_unpackx2_shuffle(struct gallivm_state *gallivm, unsigned n) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i, j; + + assert(n <= LP_MAX_VECTOR_LENGTH); + + /* TODO: cache results in a static table */ + + for(i = 0, j = 0; i < n; i += 2, ++j) { + elems[i + 0] = lp_build_const_int32(gallivm, 0 + j); + elems[i + 1] = lp_build_const_int32(gallivm, n + j); + elems[n + i + 0] = lp_build_const_int32(gallivm, 0 + n/2 + j); + elems[n + i + 1] = lp_build_const_int32(gallivm, n + n/2 + j); + } + + return LLVMConstVector(elems, n * 2); +} + +/* + * broadcast 1 element to all elements + */ +static LLVMValueRef +lp_build_const_shuffle1(struct gallivm_state *gallivm, + unsigned index, unsigned n) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(n <= LP_MAX_VECTOR_LENGTH); + + /* TODO: cache results in a static table */ + + for (i = 0; i < n; i++) { + elems[i] = lp_build_const_int32(gallivm, index); + } + + return LLVMConstVector(elems, n); +} + +/* + * move 1 element to pos 0, rest undef + */ +static LLVMValueRef +lp_build_shuffle1undef(struct gallivm_state *gallivm, + LLVMValueRef a, unsigned index, unsigned n) +{ + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH], shuf; + unsigned i; + + assert(n <= LP_MAX_VECTOR_LENGTH); + + elems[0] = lp_build_const_int32(gallivm, index); + + for (i = 1; i < n; i++) { + elems[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); + } + shuf = LLVMConstVector(elems, n); + + return LLVMBuildShuffleVector(gallivm->builder, a, a, shuf, ""); +} + +static boolean +format_dxt1_variant(enum pipe_format format) +{ + return format == PIPE_FORMAT_DXT1_RGB || + format == PIPE_FORMAT_DXT1_RGBA || + format == PIPE_FORMAT_DXT1_SRGB || + format == PIPE_FORMAT_DXT1_SRGBA; + +} + +/** + * Gather elements from scatter positions in memory into vectors. + * This is customised for fetching texels from s3tc textures. + * For SSE, typical value is length=4. + * + * @param length length of the offsets + * @param colors the stored colors of the blocks will be extracted into this. + * @param codewords the codewords of the blocks will be extracted into this. + * @param alpha_lo used for storing lower 32bit of alpha components for dxt3/5 + * @param alpha_hi used for storing higher 32bit of alpha components for dxt3/5 + * @param base_ptr base pointer, should be a i8 pointer type. + * @param offsets vector with offsets + */ +static void +lp_build_gather_s3tc(struct gallivm_state *gallivm, + unsigned length, + const struct util_format_description *format_desc, + LLVMValueRef *colors, + LLVMValueRef *codewords, + LLVMValueRef *alpha_lo, + LLVMValueRef *alpha_hi, + LLVMValueRef base_ptr, + LLVMValueRef offsets) +{ + LLVMBuilderRef builder = gallivm->builder; + unsigned block_bits = format_desc->block.bits; + unsigned i; + LLVMValueRef elems[8]; + LLVMTypeRef type32 = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef type64 = LLVMInt64TypeInContext(gallivm->context); + LLVMTypeRef type32dxt; + struct lp_type lp_type32dxt; + + memset(&lp_type32dxt, 0, sizeof lp_type32dxt); + lp_type32dxt.width = 32; + lp_type32dxt.length = block_bits / 32; + type32dxt = lp_build_vec_type(gallivm, lp_type32dxt); + + assert(block_bits == 64 || block_bits == 128); + assert(length == 1 || length == 4 || length == 8); + + for (i = 0; i < length; ++i) { + elems[i] = lp_build_gather_elem(gallivm, length, + block_bits, block_bits, TRUE, + base_ptr, offsets, i, FALSE); + elems[i] = LLVMBuildBitCast(builder, elems[i], type32dxt, ""); + } + if (length == 1) { + LLVMValueRef elem = elems[0]; + if (block_bits == 128) { + *alpha_lo = LLVMBuildExtractElement(builder, elem, + lp_build_const_int32(gallivm, 0), ""); + *alpha_hi = LLVMBuildExtractElement(builder, elem, + lp_build_const_int32(gallivm, 1), ""); + *colors = LLVMBuildExtractElement(builder, elem, + lp_build_const_int32(gallivm, 2), ""); + *codewords = LLVMBuildExtractElement(builder, elem, + lp_build_const_int32(gallivm, 3), ""); + } + else { + *alpha_lo = LLVMGetUndef(type32); + *alpha_hi = LLVMGetUndef(type32); + *colors = LLVMBuildExtractElement(builder, elem, + lp_build_const_int32(gallivm, 0), ""); + *codewords = LLVMBuildExtractElement(builder, elem, + lp_build_const_int32(gallivm, 1), ""); + } + } + else { + LLVMValueRef tmp[4], cc01, cc23; + struct lp_type lp_type32, lp_type64, lp_type32dxt; + memset(&lp_type32, 0, sizeof lp_type32); + lp_type32.width = 32; + lp_type32.length = length; + memset(&lp_type64, 0, sizeof lp_type64); + lp_type64.width = 64; + lp_type64.length = length/2; + + if (block_bits == 128) { + if (length == 8) { + for (i = 0; i < 4; ++i) { + tmp[0] = elems[i]; + tmp[1] = elems[i+4]; + elems[i] = lp_build_concat(gallivm, tmp, lp_type32dxt, 2); + } + } + lp_build_transpose_aos(gallivm, lp_type32, elems, tmp); + *colors = tmp[2]; + *codewords = tmp[3]; + *alpha_lo = tmp[0]; + *alpha_hi = tmp[1]; + } else { + LLVMTypeRef type64_vec = LLVMVectorType(type64, length/2); + LLVMTypeRef type32_vec = LLVMVectorType(type32, length); + + for (i = 0; i < length; ++i) { + /* no-op shuffle */ + elems[i] = LLVMBuildShuffleVector(builder, elems[i], + LLVMGetUndef(type32dxt), + lp_build_const_extend_shuffle(gallivm, 2, 4), ""); + } + if (length == 8) { + for (i = 0; i < 4; ++i) { + tmp[0] = elems[i]; + tmp[1] = elems[i+4]; + elems[i] = lp_build_concat(gallivm, tmp, lp_type32, 2); + } + } + cc01 = lp_build_interleave2_half(gallivm, lp_type32, elems[0], elems[1], 0); + cc23 = lp_build_interleave2_half(gallivm, lp_type32, elems[2], elems[3], 0); + cc01 = LLVMBuildBitCast(builder, cc01, type64_vec, ""); + cc23 = LLVMBuildBitCast(builder, cc23, type64_vec, ""); + *colors = lp_build_interleave2_half(gallivm, lp_type64, cc01, cc23, 0); + *codewords = lp_build_interleave2_half(gallivm, lp_type64, cc01, cc23, 1); + *colors = LLVMBuildBitCast(builder, *colors, type32_vec, ""); + *codewords = LLVMBuildBitCast(builder, *codewords, type32_vec, ""); + } + } +} + +/** Convert from containing 2 x n rgb565 colors + * to 2 rgba8888 colors + * This is the most optimized version I can think of + * should be nearly as fast as decoding only one color + * NOTE: alpha channel will be set to 0 + * @param colors is a vector containing the rgb565 colors + */ +static void +color_expand2_565_to_8888(struct gallivm_state *gallivm, + unsigned n, + LLVMValueRef colors, + LLVMValueRef *color0, + LLVMValueRef *color1) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef r, g, b, rblo, glo; + LLVMValueRef rgblomask, rb, rgb0, rgb1; + struct lp_type type, type16, type8; + + assert(n > 1); + + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + memset(&type16, 0, sizeof type16); + type16.width = 16; + type16.length = 2 * n; + + memset(&type8, 0, sizeof type8); + type8.width = 8; + type8.length = 4 * n; + + rgblomask = lp_build_const_int_vec(gallivm, type16, 0x0707); + colors = LLVMBuildBitCast(builder, colors, + lp_build_vec_type(gallivm, type16), ""); + /* move r into low 8 bits, b into high 8 bits, g into another reg (low bits) + * make sure low bits of r are zero - could use AND but requires constant */ + r = LLVMBuildLShr(builder, colors, lp_build_const_int_vec(gallivm, type16, 11), ""); + r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type16, 3), ""); + b = LLVMBuildShl(builder, colors, lp_build_const_int_vec(gallivm, type16, 11), ""); + rb = LLVMBuildOr(builder, r, b, ""); + rblo = LLVMBuildLShr(builder, rb, lp_build_const_int_vec(gallivm, type16, 5), ""); + /* don't have byte shift hence need mask */ + rblo = LLVMBuildAnd(builder, rblo, rgblomask, ""); + rb = LLVMBuildOr(builder, rb, rblo, ""); + + /* make sure low bits of g are zero */ + g = LLVMBuildAnd(builder, colors, lp_build_const_int_vec(gallivm, type16, 0x07e0), ""); + g = LLVMBuildLShr(builder, g, lp_build_const_int_vec(gallivm, type16, 3), ""); + glo = LLVMBuildLShr(builder, g, lp_build_const_int_vec(gallivm, type16, 6), ""); + g = LLVMBuildOr(builder, g, glo, ""); + + rb = LLVMBuildBitCast(builder, rb, lp_build_vec_type(gallivm, type8), ""); + g = LLVMBuildBitCast(builder, g, lp_build_vec_type(gallivm, type8), ""); + rgb0 = lp_build_interleave2_half(gallivm, type8, rb, g, 0); + rgb1 = lp_build_interleave2_half(gallivm, type8, rb, g, 1); + + rgb0 = LLVMBuildBitCast(builder, rgb0, lp_build_vec_type(gallivm, type), ""); + rgb1 = LLVMBuildBitCast(builder, rgb1, lp_build_vec_type(gallivm, type), ""); + + /* rgb0 is rgb00, rgb01, rgb10, rgb11 + * instead of rgb00, rgb10, rgb20, rgb30 hence need reshuffle + * on x86 this _should_ just generate one shufps... + */ + *color0 = lp_build_uninterleave2_half(gallivm, type, rgb0, rgb1, 0); + *color1 = lp_build_uninterleave2_half(gallivm, type, rgb0, rgb1, 1); +} + + +/** Convert from containing rgb565 colors + * (in first 16 bits) to rgba8888 colors + * bits 16-31 MBZ + * NOTE: alpha channel will be set to 0 + * @param colors is a vector containing the rgb565 colors + */ +static LLVMValueRef +color_expand_565_to_8888(struct gallivm_state *gallivm, + unsigned n, + LLVMValueRef colors) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef rgba, r, g, b, rgblo, glo; + LLVMValueRef rbhimask, g6mask, rgblomask; + struct lp_type type; + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + /* color expansion: + * first extract and shift colors into their final locations + * (high bits - low bits zero at this point) + * then replicate highest bits to the lowest bits + * note rb replication can be done in parallel but not g + * (different shift) + * r5mask = 0xf800, g6mask = 0x07e0, b5mask = 0x001f + * rhigh = 8, ghigh = 5, bhigh = 19 + * rblow = 5, glow = 6 + * rgblowmask = 0x00070307 + * r = colors >> rhigh + * b = colors << bhigh + * g = (colors & g6mask) << ghigh + * rb = (r | b) rbhimask + * rbtmp = rb >> rblow + * gtmp = rb >> glow + * rbtmp = rbtmp | gtmp + * rbtmp = rbtmp & rgblowmask + * rgb = rb | g | rbtmp + */ + g6mask = lp_build_const_int_vec(gallivm, type, 0x07e0); + rbhimask = lp_build_const_int_vec(gallivm, type, 0x00f800f8); + rgblomask = lp_build_const_int_vec(gallivm, type, 0x00070307); + + r = LLVMBuildLShr(builder, colors, lp_build_const_int_vec(gallivm, type, 8), ""); + b = LLVMBuildShl(builder, colors, lp_build_const_int_vec(gallivm, type, 19), ""); + g = LLVMBuildAnd(builder, colors, g6mask, ""); + g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 5), ""); + rgba = LLVMBuildOr(builder, r, b, ""); + rgba = LLVMBuildAnd(builder, rgba, rbhimask, ""); + rgblo = LLVMBuildLShr(builder, rgba, lp_build_const_int_vec(gallivm, type, 5), ""); + glo = LLVMBuildLShr(builder, g, lp_build_const_int_vec(gallivm, type, 6), ""); + rgblo = LLVMBuildOr(builder, rgblo, glo, ""); + rgblo = LLVMBuildAnd(builder, rgblo, rgblomask, ""); + rgba = LLVMBuildOr(builder, rgba, g, ""); + rgba = LLVMBuildOr(builder, rgba, rgblo, ""); + + return rgba; +} + + +/* + * Average two byte vectors. (Will always round up.) + */ +static LLVMValueRef +lp_build_pavgb(struct lp_build_context *bld8, + LLVMValueRef v0, + LLVMValueRef v1) +{ + struct gallivm_state *gallivm = bld8->gallivm; + LLVMBuilderRef builder = gallivm->builder; + assert(bld8->type.width == 8); + assert(bld8->type.length == 16 || bld8->type.length == 32); + if (HAVE_LLVM < 0x0600) { + LLVMValueRef intrargs[2]; + char *intr_name = bld8->type.length == 32 ? "llvm.x86.avx2.pavg.b" : + "llvm.x86.sse2.pavg.b"; + intrargs[0] = v0; + intrargs[1] = v1; + return lp_build_intrinsic(builder, intr_name, + bld8->vec_type, intrargs, 2, 0); + } else { + /* + * Must match llvm's autoupgrade of pavg.b intrinsic to be useful. + * You better hope the backend code manages to detect the pattern, and + * the pattern doesn't change there... + */ + struct lp_type type_ext = bld8->type; + LLVMTypeRef vec_type_ext; + LLVMValueRef res; + LLVMValueRef ext_one; + type_ext.width = 16; + vec_type_ext = lp_build_vec_type(gallivm, type_ext); + ext_one = lp_build_const_vec(gallivm, type_ext, 1); + + v0 = LLVMBuildZExt(builder, v0, vec_type_ext, ""); + v1 = LLVMBuildZExt(builder, v1, vec_type_ext, ""); + res = LLVMBuildAdd(builder, v0, v1, ""); + res = LLVMBuildAdd(builder, res, ext_one, ""); + res = LLVMBuildLShr(builder, res, ext_one, ""); + res = LLVMBuildTrunc(builder, res, bld8->vec_type, ""); + return res; + } +} + +/** + * Calculate 1/3(v1-v0) + v0 + * and 2*1/3(v1-v0) + v0 + */ +static void +lp_build_lerp23(struct lp_build_context *bld, + LLVMValueRef v0, + LLVMValueRef v1, + LLVMValueRef *res0, + LLVMValueRef *res1) +{ + struct gallivm_state *gallivm = bld->gallivm; + LLVMValueRef x, x_lo, x_hi, delta_lo, delta_hi; + LLVMValueRef mul_lo, mul_hi, v0_lo, v0_hi, v1_lo, v1_hi, tmp; + const struct lp_type type = bld->type; + LLVMBuilderRef builder = bld->gallivm->builder; + struct lp_type i16_type = lp_wider_type(type); + struct lp_build_context bld2; + + assert(lp_check_value(type, v0)); + assert(lp_check_value(type, v1)); + assert(!type.floating && !type.fixed && !type.norm && type.width == 8); + + lp_build_context_init(&bld2, gallivm, i16_type); + bld2.type.sign = TRUE; + x = lp_build_const_int_vec(gallivm, bld->type, 255*1/3); + + /* FIXME: use native avx256 unpack/pack */ + lp_build_unpack2(gallivm, type, i16_type, x, &x_lo, &x_hi); + lp_build_unpack2(gallivm, type, i16_type, v0, &v0_lo, &v0_hi); + lp_build_unpack2(gallivm, type, i16_type, v1, &v1_lo, &v1_hi); + delta_lo = lp_build_sub(&bld2, v1_lo, v0_lo); + delta_hi = lp_build_sub(&bld2, v1_hi, v0_hi); + + mul_lo = LLVMBuildMul(builder, x_lo, delta_lo, ""); + mul_hi = LLVMBuildMul(builder, x_hi, delta_hi, ""); + + x_lo = LLVMBuildLShr(builder, mul_lo, lp_build_const_int_vec(gallivm, i16_type, 8), ""); + x_hi = LLVMBuildLShr(builder, mul_hi, lp_build_const_int_vec(gallivm, i16_type, 8), ""); + /* lerp optimization: pack now, do add afterwards */ + tmp = lp_build_pack2(gallivm, i16_type, type, x_lo, x_hi); + *res0 = lp_build_add(bld, tmp, v0); + + x_lo = LLVMBuildLShr(builder, mul_lo, lp_build_const_int_vec(gallivm, i16_type, 7), ""); + x_hi = LLVMBuildLShr(builder, mul_hi, lp_build_const_int_vec(gallivm, i16_type, 7), ""); + /* unlike above still need mask (but add still afterwards). */ + x_lo = LLVMBuildAnd(builder, x_lo, lp_build_const_int_vec(gallivm, i16_type, 0xff), ""); + x_hi = LLVMBuildAnd(builder, x_hi, lp_build_const_int_vec(gallivm, i16_type, 0xff), ""); + tmp = lp_build_pack2(gallivm, i16_type, type, x_lo, x_hi); + *res1 = lp_build_add(bld, tmp, v0); +} + +/** + * Convert from s3tc dxt1 to <4n x i8> RGBA AoS + * @param colors is a vector with n x 2x16bit colors + * @param codewords is a vector containing the codewords + * @param i is a vector with the x pixel coordinate (0 to 3) + * @param j is a vector with the y pixel coordinate (0 to 3) + */ +static LLVMValueRef +s3tc_dxt1_full_to_rgba_aos(struct gallivm_state *gallivm, + unsigned n, + enum pipe_format format, + LLVMValueRef colors, + LLVMValueRef codewords, + LLVMValueRef i, + LLVMValueRef j) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef color0, color1, color2, color3, color2_2, color3_2; + LLVMValueRef rgba, a, colors0, colors1, col0, col1, const2; + LLVMValueRef bit_pos, sel_mask, sel_lo, sel_hi, indices; + struct lp_type type, type8; + struct lp_build_context bld8, bld32; + boolean is_dxt1_variant = format_dxt1_variant(format); + + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + memset(&type8, 0, sizeof type8); + type8.width = 8; + type8.length = 4*n; + + assert(lp_check_value(type, i)); + assert(lp_check_value(type, j)); + + a = lp_build_const_int_vec(gallivm, type, 0xff000000); + + lp_build_context_init(&bld32, gallivm, type); + lp_build_context_init(&bld8, gallivm, type8); + + /* + * works as follows: + * - expand color0/color1 to rgba8888 + * - calculate color2/3 (interpolation) according to color0 < color1 rules + * - calculate color2/3 according to color0 >= color1 rules + * - do selection of color2/3 according to comparison of color0/1 + * - extract indices (vector shift). + * - use compare/select to select the correct color. Since we have 2bit + * indices (and 4 colors), needs at least three compare/selects. + */ + /* + * expand the two colors + */ + col0 = LLVMBuildAnd(builder, colors, lp_build_const_int_vec(gallivm, type, 0x0000ffff), ""); + col1 = LLVMBuildLShr(builder, colors, lp_build_const_int_vec(gallivm, type, 16), ""); + if (n > 1) { + color_expand2_565_to_8888(gallivm, n, colors, &color0, &color1); + } + else { + color0 = color_expand_565_to_8888(gallivm, n, col0); + color1 = color_expand_565_to_8888(gallivm, n, col1); + } + + /* + * interpolate colors + * color2_1 is 2/3 color0 + 1/3 color1 + * color3_1 is 1/3 color0 + 2/3 color1 + * color2_2 is 1/2 color0 + 1/2 color1 + * color3_2 is 0 + */ + + colors0 = LLVMBuildBitCast(builder, color0, bld8.vec_type, ""); + colors1 = LLVMBuildBitCast(builder, color1, bld8.vec_type, ""); + /* can combine 2 lerps into one mostly - still looks expensive enough. */ + lp_build_lerp23(&bld8, colors0, colors1, &color2, &color3); + color2 = LLVMBuildBitCast(builder, color2, bld32.vec_type, ""); + color3 = LLVMBuildBitCast(builder, color3, bld32.vec_type, ""); + + /* dxt3/5 always use 4-color encoding */ + if (is_dxt1_variant) { + /* fix up alpha */ + if (format == PIPE_FORMAT_DXT1_RGBA || + format == PIPE_FORMAT_DXT1_SRGBA) { + color0 = LLVMBuildOr(builder, color0, a, ""); + color1 = LLVMBuildOr(builder, color1, a, ""); + color3 = LLVMBuildOr(builder, color3, a, ""); + } + /* + * XXX with sse2 and 16x8 vectors, should use pavgb even when n == 1. + * Much cheaper (but we don't care that much if n == 1). + */ + if ((util_cpu_caps.has_sse2 && n == 4) || + (util_cpu_caps.has_avx2 && n == 8)) { + color2_2 = lp_build_pavgb(&bld8, colors0, colors1); + color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, ""); + } + else { + struct lp_type i16_type = lp_wider_type(type8); + struct lp_build_context bld2; + LLVMValueRef v0_lo, v0_hi, v1_lo, v1_hi, addlo, addhi; + + lp_build_context_init(&bld2, gallivm, i16_type); + bld2.type.sign = TRUE; + + /* + * This isn't as expensive as it looks (the unpack is the same as + * for lerp23), with correct rounding. + * (Note that while rounding is correct, this will always round down, + * whereas pavgb will always round up.) + */ + /* FIXME: use native avx256 unpack/pack */ + lp_build_unpack2(gallivm, type8, i16_type, colors0, &v0_lo, &v0_hi); + lp_build_unpack2(gallivm, type8, i16_type, colors1, &v1_lo, &v1_hi); + + addlo = lp_build_add(&bld2, v0_lo, v1_lo); + addhi = lp_build_add(&bld2, v0_hi, v1_hi); + addlo = LLVMBuildLShr(builder, addlo, + lp_build_const_int_vec(gallivm, i16_type, 1), ""); + addhi = LLVMBuildLShr(builder, addhi, + lp_build_const_int_vec(gallivm, i16_type, 1), ""); + color2_2 = lp_build_pack2(gallivm, i16_type, type8, addlo, addhi); + color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, ""); + } + color3_2 = lp_build_const_int_vec(gallivm, type, 0); + + /* select between colors2/3 */ + /* signed compare is faster saves some xors */ + type.sign = TRUE; + sel_mask = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, col0, col1); + color2 = lp_build_select(&bld32, sel_mask, color2, color2_2); + color3 = lp_build_select(&bld32, sel_mask, color3, color3_2); + type.sign = FALSE; + + if (format == PIPE_FORMAT_DXT1_RGBA || + format == PIPE_FORMAT_DXT1_SRGBA) { + color2 = LLVMBuildOr(builder, color2, a, ""); + } + } + + const2 = lp_build_const_int_vec(gallivm, type, 2); + /* extract 2-bit index values */ + bit_pos = LLVMBuildShl(builder, j, const2, ""); + bit_pos = LLVMBuildAdd(builder, bit_pos, i, ""); + bit_pos = LLVMBuildAdd(builder, bit_pos, bit_pos, ""); + /* + * NOTE: This innocent looking shift is very expensive with x86/ssex. + * Shifts with per-elemnent shift count get roughly translated to + * extract (count), extract (value), shift, move (back to xmm), unpack + * per element! + * So about 20 instructions here for 4xi32. + * Newer llvm versions (3.7+) will not do extract/insert but use a + * a couple constant count vector shifts plus shuffles. About same + * amount of instructions unfortunately... + * Would get much worse with 8xi16 even... + * We could actually do better here: + * - subtract bit_pos from 128+30, shl 23, convert float to int... + * - now do mul with codewords followed by shr 30... + * But requires 32bit->32bit mul, sse41 only (well that's emulatable + * with 2 32bit->64bit muls...) and not exactly cheap + * AVX2, of course, fixes this nonsense. + */ + indices = LLVMBuildLShr(builder, codewords, bit_pos, ""); + + /* finally select the colors */ + sel_lo = LLVMBuildAnd(builder, indices, bld32.one, ""); + sel_lo = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, sel_lo, bld32.one); + color0 = lp_build_select(&bld32, sel_lo, color1, color0); + color2 = lp_build_select(&bld32, sel_lo, color3, color2); + sel_hi = LLVMBuildAnd(builder, indices, const2, ""); + sel_hi = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, sel_hi, const2); + rgba = lp_build_select(&bld32, sel_hi, color2, color0); + + /* fix up alpha */ + if (format == PIPE_FORMAT_DXT1_RGB || + format == PIPE_FORMAT_DXT1_SRGB) { + rgba = LLVMBuildOr(builder, rgba, a, ""); + } + return LLVMBuildBitCast(builder, rgba, bld8.vec_type, ""); +} + + +static LLVMValueRef +s3tc_dxt1_to_rgba_aos(struct gallivm_state *gallivm, + unsigned n, + enum pipe_format format, + LLVMValueRef colors, + LLVMValueRef codewords, + LLVMValueRef i, + LLVMValueRef j) +{ + return s3tc_dxt1_full_to_rgba_aos(gallivm, n, format, + colors, codewords, i, j); +} + + +/** + * Convert from s3tc dxt3 to <4n x i8> RGBA AoS + * @param colors is a vector with n x 2x16bit colors + * @param codewords is a vector containing the codewords + * @param alphas is a vector containing the alpha values + * @param i is a vector with the x pixel coordinate (0 to 3) + * @param j is a vector with the y pixel coordinate (0 to 3) + */ +static LLVMValueRef +s3tc_dxt3_to_rgba_aos(struct gallivm_state *gallivm, + unsigned n, + enum pipe_format format, + LLVMValueRef colors, + LLVMValueRef codewords, + LLVMValueRef alpha_low, + LLVMValueRef alpha_hi, + LLVMValueRef i, + LLVMValueRef j) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef rgba, tmp, tmp2; + LLVMValueRef bit_pos, sel_mask; + struct lp_type type, type8; + struct lp_build_context bld; + + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + memset(&type8, 0, sizeof type8); + type8.width = 8; + type8.length = n*4; + + assert(lp_check_value(type, i)); + assert(lp_check_value(type, j)); + + lp_build_context_init(&bld, gallivm, type); + + rgba = s3tc_dxt1_to_rgba_aos(gallivm, n, format, + colors, codewords, i, j); + + rgba = LLVMBuildBitCast(builder, rgba, bld.vec_type, ""); + + /* + * Extract alpha values. Since we now need to select from + * which 32bit vector values are fetched, construct selection + * mask from highest bit of bit_pos, and use select, then shift + * according to the bit_pos (without the highest bit). + * Note this is pointless for n == 1 case. Could just + * directly use 64bit arithmetic if we'd extract 64bit + * alpha value instead of 2x32... + */ + /* pos = 4*(4j+i) */ + bit_pos = LLVMBuildShl(builder, j, lp_build_const_int_vec(gallivm, type, 2), ""); + bit_pos = LLVMBuildAdd(builder, bit_pos, i, ""); + bit_pos = LLVMBuildShl(builder, bit_pos, + lp_build_const_int_vec(gallivm, type, 2), ""); + sel_mask = LLVMBuildLShr(builder, bit_pos, + lp_build_const_int_vec(gallivm, type, 5), ""); + sel_mask = LLVMBuildSub(builder, sel_mask, bld.one, ""); + tmp = lp_build_select(&bld, sel_mask, alpha_low, alpha_hi); + bit_pos = LLVMBuildAnd(builder, bit_pos, + lp_build_const_int_vec(gallivm, type, 0xffffffdf), ""); + /* Warning: slow shift with per element count */ + /* + * Could do pshufb here as well - just use appropriate 2 bits in bit_pos + * to select the right byte with pshufb. Then for the remaining one bit + * just do shift/select. + */ + tmp = LLVMBuildLShr(builder, tmp, bit_pos, ""); + + /* combined expand from a4 to a8 and shift into position */ + tmp = LLVMBuildShl(builder, tmp, lp_build_const_int_vec(gallivm, type, 28), ""); + tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 4), ""); + tmp = LLVMBuildOr(builder, tmp, tmp2, ""); + + rgba = LLVMBuildOr(builder, tmp, rgba, ""); + + return LLVMBuildBitCast(builder, rgba, lp_build_vec_type(gallivm, type8), ""); +} + +static LLVMValueRef +lp_build_lerpdxta(struct gallivm_state *gallivm, + LLVMValueRef alpha0, + LLVMValueRef alpha1, + LLVMValueRef code, + LLVMValueRef sel_mask, + unsigned n) +{ + /* + * note we're doing lerp in 16bit since 32bit pmulld is only available in sse41 + * (plus pmullw is actually faster...) + * we just pretend our 32bit values (which are really only 8bit) are 16bits. + * Note that this is obviously a disaster for the scalar case. + */ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef delta, ainterp; + LLVMValueRef weight5, weight7, weight; + struct lp_type type32, type16, type8; + struct lp_build_context bld16; + + memset(&type32, 0, sizeof type32); + type32.width = 32; + type32.length = n; + memset(&type16, 0, sizeof type16); + type16.width = 16; + type16.length = 2*n; + type16.sign = TRUE; + memset(&type8, 0, sizeof type8); + type8.width = 8; + type8.length = 4*n; + + lp_build_context_init(&bld16, gallivm, type16); + /* 255/7 is a bit off - increase accuracy at the expense of shift later */ + sel_mask = LLVMBuildBitCast(builder, sel_mask, bld16.vec_type, ""); + weight5 = lp_build_const_int_vec(gallivm, type16, 255*64/5); + weight7 = lp_build_const_int_vec(gallivm, type16, 255*64/7); + weight = lp_build_select(&bld16, sel_mask, weight7, weight5); + + alpha0 = LLVMBuildBitCast(builder, alpha0, bld16.vec_type, ""); + alpha1 = LLVMBuildBitCast(builder, alpha1, bld16.vec_type, ""); + code = LLVMBuildBitCast(builder, code, bld16.vec_type, ""); + /* we'll get garbage in the elements which had code 0 (or larger than 5 or 7) + but we don't care */ + code = LLVMBuildSub(builder, code, bld16.one, ""); + + weight = LLVMBuildMul(builder, weight, code, ""); + weight = LLVMBuildLShr(builder, weight, + lp_build_const_int_vec(gallivm, type16, 6), ""); + + delta = LLVMBuildSub(builder, alpha1, alpha0, ""); + + ainterp = LLVMBuildMul(builder, delta, weight, ""); + ainterp = LLVMBuildLShr(builder, ainterp, + lp_build_const_int_vec(gallivm, type16, 8), ""); + + ainterp = LLVMBuildBitCast(builder, ainterp, lp_build_vec_type(gallivm, type8), ""); + alpha0 = LLVMBuildBitCast(builder, alpha0, lp_build_vec_type(gallivm, type8), ""); + ainterp = LLVMBuildAdd(builder, alpha0, ainterp, ""); + ainterp = LLVMBuildBitCast(builder, ainterp, lp_build_vec_type(gallivm, type32), ""); + + return ainterp; +} + +/** + * Convert from s3tc dxt5 to <4n x i8> RGBA AoS + * @param colors is a vector with n x 2x16bit colors + * @param codewords is a vector containing the codewords + * @param alphas is a vector containing the alpha values + * @param i is a vector with the x pixel coordinate (0 to 3) + * @param j is a vector with the y pixel coordinate (0 to 3) + */ +static LLVMValueRef +s3tc_dxt5_full_to_rgba_aos(struct gallivm_state *gallivm, + unsigned n, + enum pipe_format format, + LLVMValueRef colors, + LLVMValueRef codewords, + LLVMValueRef alpha_lo, + LLVMValueRef alpha_hi, + LLVMValueRef i, + LLVMValueRef j) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef rgba, tmp, alpha0, alpha1, alphac, alphac0, bit_pos, shift; + LLVMValueRef sel_mask, tmp_mask, alpha, alpha64, code_s; + LLVMValueRef mask6, mask7, ainterp; + LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); + struct lp_type type, type8; + struct lp_build_context bld32; + + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + memset(&type8, 0, sizeof type8); + type8.width = 8; + type8.length = n*4; + + assert(lp_check_value(type, i)); + assert(lp_check_value(type, j)); + + lp_build_context_init(&bld32, gallivm, type); + + assert(lp_check_value(type, i)); + assert(lp_check_value(type, j)); + + rgba = s3tc_dxt1_to_rgba_aos(gallivm, n, format, + colors, codewords, i, j); + + rgba = LLVMBuildBitCast(builder, rgba, bld32.vec_type, ""); + + /* this looks pretty complex for vectorization: + * extract a0/a1 values + * extract code + * select weights for interpolation depending on a0 > a1 + * mul weights by code - 1 + * lerp a0/a1/weights + * use selects for getting either a0, a1, interp a, interp a/0.0, interp a/1.0 + */ + + alpha0 = LLVMBuildAnd(builder, alpha_lo, + lp_build_const_int_vec(gallivm, type, 0xff), ""); + alpha1 = LLVMBuildLShr(builder, alpha_lo, + lp_build_const_int_vec(gallivm, type, 8), ""); + alpha1 = LLVMBuildAnd(builder, alpha1, + lp_build_const_int_vec(gallivm, type, 0xff), ""); + + /* pos = 3*(4j+i) */ + bit_pos = LLVMBuildShl(builder, j, lp_build_const_int_vec(gallivm, type, 2), ""); + bit_pos = LLVMBuildAdd(builder, bit_pos, i, ""); + tmp = LLVMBuildAdd(builder, bit_pos, bit_pos, ""); + bit_pos = LLVMBuildAdd(builder, bit_pos, tmp, ""); + /* get rid of first 2 bytes - saves shifts of alpha_lo/hi */ + bit_pos = LLVMBuildAdd(builder, bit_pos, + lp_build_const_int_vec(gallivm, type, 16), ""); + + if (n == 1) { + struct lp_type type64; + memset(&type64, 0, sizeof type64); + type64.width = 64; + type64.length = 1; + /* This is pretty pointless could avoid by just directly extracting + 64bit in the first place but makes it more complicated elsewhere */ + alpha_lo = LLVMBuildZExt(builder, alpha_lo, i64t, ""); + alpha_hi = LLVMBuildZExt(builder, alpha_hi, i64t, ""); + alphac0 = LLVMBuildShl(builder, alpha_hi, + lp_build_const_int_vec(gallivm, type64, 32), ""); + alphac0 = LLVMBuildOr(builder, alpha_lo, alphac0, ""); + + shift = LLVMBuildZExt(builder, bit_pos, i64t, ""); + alphac0 = LLVMBuildLShr(builder, alphac0, shift, ""); + alphac0 = LLVMBuildTrunc(builder, alphac0, i32t, ""); + alphac = LLVMBuildAnd(builder, alphac0, + lp_build_const_int_vec(gallivm, type, 0x7), ""); + } + else { + /* + * Using non-native vector length here (actually, with avx2 and + * n == 4 llvm will indeed expand to ymm regs...) + * At least newer llvm versions handle that ok. + * llvm 3.7+ will even handle the emulated 64bit shift with variable + * shift count without extraction (and it's actually easier to + * emulate than the 32bit one). + */ + alpha64 = LLVMBuildShuffleVector(builder, alpha_lo, alpha_hi, + lp_build_const_unpackx2_shuffle(gallivm, n), ""); + + alpha64 = LLVMBuildBitCast(builder, alpha64, LLVMVectorType(i64t, n), ""); + shift = LLVMBuildZExt(builder, bit_pos, LLVMVectorType(i64t, n), ""); + alphac = LLVMBuildLShr(builder, alpha64, shift, ""); + alphac = LLVMBuildTrunc(builder, alphac, bld32.vec_type, ""); + + alphac = LLVMBuildAnd(builder, alphac, + lp_build_const_int_vec(gallivm, type, 0x7), ""); + } + + /* signed compare is faster saves some xors */ + type.sign = TRUE; + /* alpha0 > alpha1 selection */ + sel_mask = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, + alpha0, alpha1); + ainterp = lp_build_lerpdxta(gallivm, alpha0, alpha1, alphac, sel_mask, n); + + /* + * if a0 > a1 then we select a0 for case 0, a1 for case 1, interp otherwise. + * else we select a0 for case 0, a1 for case 1, + * interp for case 2-5, 00 for 6 and 0xff(ffffff) for 7 + * a = (c == 0) ? a0 : a1 + * a = (c > 1) ? ainterp : a + * Finally handle case 6/7 for !(a0 > a1) + * a = (!(a0 > a1) && c == 6) ? 0 : a (andnot with mask) + * a = (!(a0 > a1) && c == 7) ? 0xffffffff : a (or with mask) + */ + tmp_mask = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, + alphac, bld32.zero); + alpha = lp_build_select(&bld32, tmp_mask, alpha0, alpha1); + tmp_mask = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, + alphac, bld32.one); + alpha = lp_build_select(&bld32, tmp_mask, ainterp, alpha); + + code_s = LLVMBuildAnd(builder, alphac, + LLVMBuildNot(builder, sel_mask, ""), ""); + mask6 = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, + code_s, lp_build_const_int_vec(gallivm, type, 6)); + mask7 = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, + code_s, lp_build_const_int_vec(gallivm, type, 7)); + alpha = LLVMBuildAnd(builder, alpha, LLVMBuildNot(builder, mask6, ""), ""); + alpha = LLVMBuildOr(builder, alpha, mask7, ""); + + alpha = LLVMBuildShl(builder, alpha, lp_build_const_int_vec(gallivm, type, 24), ""); + rgba = LLVMBuildOr(builder, alpha, rgba, ""); + + return LLVMBuildBitCast(builder, rgba, lp_build_vec_type(gallivm, type8), ""); +} + + +static void +lp_build_gather_s3tc_simple_scalar(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + LLVMValueRef *dxt_block, + LLVMValueRef ptr) +{ + LLVMBuilderRef builder = gallivm->builder; + unsigned block_bits = format_desc->block.bits; + LLVMValueRef elem, shuf; + LLVMTypeRef type32 = LLVMIntTypeInContext(gallivm->context, 32); + LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, block_bits); + LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); + LLVMTypeRef type32_4 = LLVMVectorType(type32, 4); + + assert(block_bits == 64 || block_bits == 128); + + ptr = LLVMBuildBitCast(builder, ptr, src_ptr_type, ""); + elem = LLVMBuildLoad(builder, ptr, ""); + + if (block_bits == 128) { + /* just return block as is */ + *dxt_block = LLVMBuildBitCast(builder, elem, type32_4, ""); + } + else { + LLVMTypeRef type32_2 = LLVMVectorType(type32, 2); + shuf = lp_build_const_extend_shuffle(gallivm, 2, 4); + elem = LLVMBuildBitCast(builder, elem, type32_2, ""); + *dxt_block = LLVMBuildShuffleVector(builder, elem, + LLVMGetUndef(type32_2), shuf, ""); + } +} + + +static void +s3tc_store_cached_block(struct gallivm_state *gallivm, + LLVMValueRef *col, + LLVMValueRef tag_value, + LLVMValueRef hash_index, + LLVMValueRef cache) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ptr, indices[3]; + LLVMTypeRef type_ptr4x32; + unsigned count; + + type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); + indices[2] = hash_index; + ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); + LLVMBuildStore(builder, tag_value, ptr); + + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); + hash_index = LLVMBuildMul(builder, hash_index, + lp_build_const_int32(gallivm, 16), ""); + for (count = 0; count < 4; count++) { + indices[2] = hash_index; + ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), ""); + ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, ""); + LLVMBuildStore(builder, col[count], ptr); + hash_index = LLVMBuildAdd(builder, hash_index, + lp_build_const_int32(gallivm, 4), ""); + } +} + +static LLVMValueRef +s3tc_lookup_cached_pixel(struct gallivm_state *gallivm, + LLVMValueRef ptr, + LLVMValueRef index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef member_ptr, indices[3]; + + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA); + indices[2] = index; + member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), ""); + return LLVMBuildLoad(builder, member_ptr, "cache_data"); +} + +static LLVMValueRef +s3tc_lookup_tag_data(struct gallivm_state *gallivm, + LLVMValueRef ptr, + LLVMValueRef index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef member_ptr, indices[3]; + + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS); + indices[2] = index; + member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), ""); + return LLVMBuildLoad(builder, member_ptr, "tag_data"); +} + +#if LP_BUILD_FORMAT_CACHE_DEBUG +static void +s3tc_update_cache_access(struct gallivm_state *gallivm, + LLVMValueRef ptr, + unsigned count, + unsigned index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef member_ptr, cache_access; + + assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL || + index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); + + member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, ""); + cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access"); + cache_access = LLVMBuildAdd(builder, cache_access, + LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), + count, 0), ""); + LLVMBuildStore(builder, cache_access, member_ptr); +} +#endif + +/** + * Calculate 1/3(v1-v0) + v0 and 2*1/3(v1-v0) + v0. + * The lerp is performed between the first 2 32bit colors + * in the source vector, both results are returned packed in result vector. + */ +static LLVMValueRef +lp_build_lerp23_single(struct lp_build_context *bld, + LLVMValueRef v01) +{ + struct gallivm_state *gallivm = bld->gallivm; + LLVMValueRef x, mul, delta, res, v0, v1, elems[8]; + const struct lp_type type = bld->type; + LLVMBuilderRef builder = bld->gallivm->builder; + struct lp_type i16_type = lp_wider_type(type); + struct lp_type i32_type = lp_wider_type(i16_type); + struct lp_build_context bld2; + + assert(!type.floating && !type.fixed && !type.norm && type.width == 8); + + lp_build_context_init(&bld2, gallivm, i16_type); + bld2.type.sign = TRUE; + + /* weights 256/3, 256*2/3, with correct rounding */ + elems[0] = elems[1] = elems[2] = elems[3] = + lp_build_const_elem(gallivm, i16_type, 255*1/3); + elems[4] = elems[5] = elems[6] = elems[7] = + lp_build_const_elem(gallivm, i16_type, 171); + x = LLVMConstVector(elems, 8); + + /* + * v01 has col0 in 32bit elem 0, col1 in elem 1. + * Interleave/unpack will give us separate v0/v1 vectors. + */ + v01 = lp_build_interleave2(gallivm, i32_type, v01, v01, 0); + v01 = LLVMBuildBitCast(builder, v01, bld->vec_type, ""); + + lp_build_unpack2(gallivm, type, i16_type, v01, &v0, &v1); + delta = lp_build_sub(&bld2, v1, v0); + + mul = LLVMBuildMul(builder, x, delta, ""); + + mul = LLVMBuildLShr(builder, mul, lp_build_const_int_vec(gallivm, i16_type, 8), ""); + /* lerp optimization: pack now, do add afterwards */ + res = lp_build_pack2(gallivm, i16_type, type, mul, bld2.undef); + /* only lower 2 elems are valid - for these v0 is really v0 */ + return lp_build_add(bld, res, v01); +} + +/* + * decode one dxt1 block. + */ +static void +s3tc_decode_block_dxt1(struct gallivm_state *gallivm, + enum pipe_format format, + LLVMValueRef dxt_block, + LLVMValueRef *col) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef color01, color23, color01_16, color0123; + LLVMValueRef rgba, tmp, a, sel_mask, indices, code, const2; + struct lp_type type8, type32, type16, type64; + struct lp_build_context bld8, bld32, bld16, bld64; + unsigned i; + boolean is_dxt1_variant = format_dxt1_variant(format); + + memset(&type32, 0, sizeof type32); + type32.width = 32; + type32.length = 4; + type32.sign = TRUE; + + memset(&type8, 0, sizeof type8); + type8.width = 8; + type8.length = 16; + + memset(&type16, 0, sizeof type16); + type16.width = 16; + type16.length = 8; + + memset(&type64, 0, sizeof type64); + type64.width = 64; + type64.length = 2; + + a = lp_build_const_int_vec(gallivm, type32, 0xff000000); + const2 = lp_build_const_int_vec(gallivm, type32, 2); + + lp_build_context_init(&bld32, gallivm, type32); + lp_build_context_init(&bld16, gallivm, type16); + lp_build_context_init(&bld8, gallivm, type8); + lp_build_context_init(&bld64, gallivm, type64); + + if (is_dxt1_variant) { + color01 = lp_build_shuffle1undef(gallivm, dxt_block, 0, 4); + code = lp_build_shuffle1undef(gallivm, dxt_block, 1, 4); + } else { + color01 = lp_build_shuffle1undef(gallivm, dxt_block, 2, 4); + code = lp_build_shuffle1undef(gallivm, dxt_block, 3, 4); + } + code = LLVMBuildBitCast(builder, code, bld8.vec_type, ""); + /* expand bytes to dwords */ + code = lp_build_interleave2(gallivm, type8, code, code, 0); + code = lp_build_interleave2(gallivm, type8, code, code, 0); + + + /* + * works as follows: + * - expand color0/color1 to rgba8888 + * - calculate color2/3 (interpolation) according to color0 < color1 rules + * - calculate color2/3 according to color0 >= color1 rules + * - do selection of color2/3 according to comparison of color0/1 + * - extract indices. + * - use compare/select to select the correct color. Since we have 2bit + * indices (and 4 colors), needs at least three compare/selects. + */ + + /* + * expand the two colors + */ + color01 = LLVMBuildBitCast(builder, color01, bld16.vec_type, ""); + color01 = lp_build_interleave2(gallivm, type16, color01, + bld16.zero, 0); + color01_16 = LLVMBuildBitCast(builder, color01, bld32.vec_type, ""); + color01 = color_expand_565_to_8888(gallivm, 4, color01_16); + + /* + * interpolate colors + * color2_1 is 2/3 color0 + 1/3 color1 + * color3_1 is 1/3 color0 + 2/3 color1 + * color2_2 is 1/2 color0 + 1/2 color1 + * color3_2 is 0 + */ + + /* TODO: since this is now always scalar, should + * probably just use control flow here instead of calculating + * both cases and then selection + */ + if (format == PIPE_FORMAT_DXT1_RGBA || + format == PIPE_FORMAT_DXT1_SRGBA) { + color01 = LLVMBuildOr(builder, color01, a, ""); + } + /* can combine 2 lerps into one mostly */ + color23 = lp_build_lerp23_single(&bld8, color01); + color23 = LLVMBuildBitCast(builder, color23, bld32.vec_type, ""); + + /* dxt3/5 always use 4-color encoding */ + if (is_dxt1_variant) { + LLVMValueRef color23_2, color2_2; + + if (util_cpu_caps.has_sse2) { + LLVMValueRef intrargs[2]; + intrargs[0] = LLVMBuildBitCast(builder, color01, bld8.vec_type, ""); + /* same interleave as for lerp23 - correct result in 2nd element */ + intrargs[1] = lp_build_interleave2(gallivm, type32, color01, color01, 0); + intrargs[1] = LLVMBuildBitCast(builder, intrargs[1], bld8.vec_type, ""); + color2_2 = lp_build_pavgb(&bld8, intrargs[0], intrargs[1]); + } + else { + LLVMValueRef v01, v0, v1, vhalf; + /* + * This isn't as expensive as it looks (the unpack is the same as + * for lerp23, which is the reason why we do the pointless + * interleave2 too), with correct rounding (the two lower elements + * will be the same). + */ + v01 = lp_build_interleave2(gallivm, type32, color01, color01, 0); + v01 = LLVMBuildBitCast(builder, v01, bld8.vec_type, ""); + lp_build_unpack2(gallivm, type8, type16, v01, &v0, &v1); + vhalf = lp_build_add(&bld16, v0, v1); + vhalf = LLVMBuildLShr(builder, vhalf, bld16.one, ""); + color2_2 = lp_build_pack2(gallivm, type16, type8, vhalf, bld16.undef); + } + /* shuffle in color 3 as elem 2 zero, color 2 elem 1 */ + color23_2 = LLVMBuildBitCast(builder, color2_2, bld64.vec_type, ""); + color23_2 = LLVMBuildLShr(builder, color23_2, + lp_build_const_int_vec(gallivm, type64, 32), ""); + color23_2 = LLVMBuildBitCast(builder, color23_2, bld32.vec_type, ""); + + tmp = LLVMBuildBitCast(builder, color01_16, bld64.vec_type, ""); + tmp = LLVMBuildLShr(builder, tmp, + lp_build_const_int_vec(gallivm, type64, 32), ""); + tmp = LLVMBuildBitCast(builder, tmp, bld32.vec_type, ""); + sel_mask = lp_build_compare(gallivm, type32, PIPE_FUNC_GREATER, + color01_16, tmp); + sel_mask = lp_build_interleave2(gallivm, type32, sel_mask, sel_mask, 0); + color23 = lp_build_select(&bld32, sel_mask, color23, color23_2); + } + + if (util_cpu_caps.has_ssse3) { + /* + * Use pshufb as mini-lut. (Only doable with intrinsics as the + * final shuffles are non-constant. pshufb is awesome!) + */ + LLVMValueRef shuf[16], low2mask; + LLVMValueRef intrargs[2], lut_ind, lut_adj; + + color01 = LLVMBuildBitCast(builder, color01, bld64.vec_type, ""); + color23 = LLVMBuildBitCast(builder, color23, bld64.vec_type, ""); + color0123 = lp_build_interleave2(gallivm, type64, color01, color23, 0); + color0123 = LLVMBuildBitCast(builder, color0123, bld32.vec_type, ""); + + if (format == PIPE_FORMAT_DXT1_RGB || + format == PIPE_FORMAT_DXT1_SRGB) { + color0123 = LLVMBuildOr(builder, color0123, a, ""); + } + + /* shuffle as r0r1r2r3g0g1... */ + for (i = 0; i < 4; i++) { + shuf[4*i] = lp_build_const_int32(gallivm, 0 + i); + shuf[4*i+1] = lp_build_const_int32(gallivm, 4 + i); + shuf[4*i+2] = lp_build_const_int32(gallivm, 8 + i); + shuf[4*i+3] = lp_build_const_int32(gallivm, 12 + i); + } + color0123 = LLVMBuildBitCast(builder, color0123, bld8.vec_type, ""); + color0123 = LLVMBuildShuffleVector(builder, color0123, bld8.undef, + LLVMConstVector(shuf, 16), ""); + + /* lowest 2 bits of each 8 bit value contain index into "LUT" */ + low2mask = lp_build_const_int_vec(gallivm, type8, 3); + /* add 0/4/8/12 for r/g/b/a */ + lut_adj = lp_build_const_int_vec(gallivm, type32, 0x0c080400); + lut_adj = LLVMBuildBitCast(builder, lut_adj, bld8.vec_type, ""); + intrargs[0] = color0123; + for (i = 0; i < 4; i++) { + lut_ind = LLVMBuildAnd(builder, code, low2mask, ""); + lut_ind = LLVMBuildOr(builder, lut_ind, lut_adj, ""); + intrargs[1] = lut_ind; + col[i] = lp_build_intrinsic(builder, "llvm.x86.ssse3.pshuf.b.128", + bld8.vec_type, intrargs, 2, 0); + col[i] = LLVMBuildBitCast(builder, col[i], bld32.vec_type, ""); + code = LLVMBuildBitCast(builder, code, bld32.vec_type, ""); + code = LLVMBuildLShr(builder, code, const2, ""); + code = LLVMBuildBitCast(builder, code, bld8.vec_type, ""); + } + } + else { + /* Thanks to vectorization can do 4 texels in parallel */ + LLVMValueRef color0, color1, color2, color3; + if (format == PIPE_FORMAT_DXT1_RGB || + format == PIPE_FORMAT_DXT1_SRGB) { + color01 = LLVMBuildOr(builder, color01, a, ""); + color23 = LLVMBuildOr(builder, color23, a, ""); + } + color0 = LLVMBuildShuffleVector(builder, color01, bld32.undef, + lp_build_const_shuffle1(gallivm, 0, 4), ""); + color1 = LLVMBuildShuffleVector(builder, color01, bld32.undef, + lp_build_const_shuffle1(gallivm, 1, 4), ""); + color2 = LLVMBuildShuffleVector(builder, color23, bld32.undef, + lp_build_const_shuffle1(gallivm, 0, 4), ""); + color3 = LLVMBuildShuffleVector(builder, color23, bld32.undef, + lp_build_const_shuffle1(gallivm, 1, 4), ""); + code = LLVMBuildBitCast(builder, code, bld32.vec_type, ""); + + for (i = 0; i < 4; i++) { + /* select the colors */ + LLVMValueRef selmasklo, rgba01, rgba23, bitlo; + bitlo = bld32.one; + indices = LLVMBuildAnd(builder, code, bitlo, ""); + selmasklo = lp_build_compare(gallivm, type32, PIPE_FUNC_EQUAL, + indices, bitlo); + rgba01 = lp_build_select(&bld32, selmasklo, color1, color0); + + LLVMValueRef selmaskhi; + indices = LLVMBuildAnd(builder, code, const2, ""); + selmaskhi = lp_build_compare(gallivm, type32, PIPE_FUNC_EQUAL, + indices, const2); + rgba23 = lp_build_select(&bld32, selmasklo, color3, color2); + rgba = lp_build_select(&bld32, selmaskhi, rgba23, rgba01); + + /* + * Note that this will give "wrong" order. + * col0 will be rgba0, rgba4, rgba8, rgba12, col1 rgba1, rgba5, ... + * This would be easily fixable by using different shuffle, bitlo/hi + * vectors above (and different shift), but seems slightly easier to + * deal with for dxt3/dxt5 alpha too. So instead change lookup. + */ + col[i] = rgba; + code = LLVMBuildLShr(builder, code, const2, ""); + } + } +} + +/* + * decode one dxt3 block. + */ +static void +s3tc_decode_block_dxt3(struct gallivm_state *gallivm, + enum pipe_format format, + LLVMValueRef dxt_block, + LLVMValueRef *col) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef alpha, alphas0, alphas1, shift4_16, a[4], mask8hi; + struct lp_type type32, type8, type16; + unsigned i; + + memset(&type32, 0, sizeof type32); + type32.width = 32; + type32.length = 4; + + memset(&type8, 0, sizeof type8); + type8.width = 8; + type8.length = 16; + + memset(&type16, 0, sizeof type16); + type16.width = 16; + type16.length = 8; + + s3tc_decode_block_dxt1(gallivm, format, dxt_block, col); + + shift4_16 = lp_build_const_int_vec(gallivm, type16, 4); + mask8hi = lp_build_const_int_vec(gallivm, type32, 0xff000000); + + alpha = LLVMBuildBitCast(builder, dxt_block, + lp_build_vec_type(gallivm, type8), ""); + alpha = lp_build_interleave2(gallivm, type8, alpha, alpha, 0); + alpha = LLVMBuildBitCast(builder, alpha, + lp_build_vec_type(gallivm, type16), ""); + alpha = LLVMBuildAnd(builder, alpha, + lp_build_const_int_vec(gallivm, type16, 0xf00f), ""); + alphas0 = LLVMBuildLShr(builder, alpha, shift4_16, ""); + alphas1 = LLVMBuildShl(builder, alpha, shift4_16, ""); + alpha = LLVMBuildOr(builder, alphas0, alpha, ""); + alpha = LLVMBuildOr(builder, alphas1, alpha, ""); + alpha = LLVMBuildBitCast(builder, alpha, + lp_build_vec_type(gallivm, type32), ""); + /* + * alpha now contains elems 0,1,2,3,... (ubytes) + * we need 0,4,8,12, 1,5,9,13 etc. in dwords to match color (which + * is just as easy as "natural" order - 3 shift/and instead of 6 unpack). + */ + a[0] = LLVMBuildShl(builder, alpha, + lp_build_const_int_vec(gallivm, type32, 24), ""); + a[1] = LLVMBuildShl(builder, alpha, + lp_build_const_int_vec(gallivm, type32, 16), ""); + a[1] = LLVMBuildAnd(builder, a[1], mask8hi, ""); + a[2] = LLVMBuildShl(builder, alpha, + lp_build_const_int_vec(gallivm, type32, 8), ""); + a[2] = LLVMBuildAnd(builder, a[2], mask8hi, ""); + a[3] = LLVMBuildAnd(builder, alpha, mask8hi, ""); + + for (i = 0; i < 4; i++) { + col[i] = LLVMBuildOr(builder, col[i], a[i], ""); + } +} + + +static LLVMValueRef +lp_build_lerpdxta_block(struct gallivm_state *gallivm, + LLVMValueRef alpha0, + LLVMValueRef alpha1, + LLVMValueRef code, + LLVMValueRef sel_mask) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef delta, ainterp; + LLVMValueRef weight5, weight7, weight; + struct lp_type type16; + struct lp_build_context bld; + + memset(&type16, 0, sizeof type16); + type16.width = 16; + type16.length = 8; + type16.sign = TRUE; + + lp_build_context_init(&bld, gallivm, type16); + /* + * 256/7 is only 36.57 so we'd lose quite some precision. Since it would + * actually be desirable to do this here with even higher accuracy than + * even 8 bit (more or less required for rgtc, albeit that's not handled + * here right now), shift the weights after multiplication by code. + */ + weight5 = lp_build_const_int_vec(gallivm, type16, 256*64/5); + weight7 = lp_build_const_int_vec(gallivm, type16, 256*64/7); + weight = lp_build_select(&bld, sel_mask, weight7, weight5); + + /* + * we'll get garbage in the elements which had code 0 (or larger than + * 5 or 7) but we don't care (or rather, need to fix up anyway). + */ + code = LLVMBuildSub(builder, code, bld.one, ""); + + weight = LLVMBuildMul(builder, weight, code, ""); + weight = LLVMBuildLShr(builder, weight, + lp_build_const_int_vec(gallivm, type16, 6), ""); + + delta = LLVMBuildSub(builder, alpha1, alpha0, ""); + + ainterp = LLVMBuildMul(builder, delta, weight, ""); + ainterp = LLVMBuildLShr(builder, ainterp, + lp_build_const_int_vec(gallivm, type16, 8), ""); + + /* lerp is done later (with packed values) */ + + return ainterp; +} + + +/* + * decode one dxt5 block. + */ +static void +s3tc_decode_block_dxt5(struct gallivm_state *gallivm, + enum pipe_format format, + LLVMValueRef dxt_block, + LLVMValueRef *col) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef alpha, alpha0, alpha1, ares; + LLVMValueRef ainterp, ainterp0, ainterp1, shuffle1, sel_mask, sel_mask2; + LLVMValueRef a[4], acode, tmp0, tmp1; + LLVMTypeRef i64t, i32t; + struct lp_type type32, type64, type8, type16; + struct lp_build_context bld16, bld8; + unsigned i; + + memset(&type32, 0, sizeof type32); + type32.width = 32; + type32.length = 4; + + memset(&type64, 0, sizeof type64); + type64.width = 64; + type64.length = 2; + + memset(&type8, 0, sizeof type8); + type8.width = 8; + type8.length = 16; + + memset(&type16, 0, sizeof type16); + type16.width = 16; + type16.length = 8; + + lp_build_context_init(&bld16, gallivm, type16); + lp_build_context_init(&bld8, gallivm, type8); + + i64t = lp_build_vec_type(gallivm, type64); + i32t = lp_build_vec_type(gallivm, type32); + + s3tc_decode_block_dxt1(gallivm, format, dxt_block, col); + + /* + * three possible strategies for vectorizing alpha: + * 1) compute all 8 values then use scalar extraction + * (i.e. have all 8 alpha values packed in one 64bit scalar + * and do something like ax = vals >> (codex * 8) followed + * by inserting these values back into color) + * 2) same as 8 but just use pshufb as a mini-LUT for selection. + * (without pshufb would need boatloads of cmp/selects trying to + * keep things vectorized for essentially scalar selection). + * 3) do something similar to the uncached case + * needs more calculations (need to calc 16 values instead of 8 though + * that's only an issue for the lerp which we need to do twice otherwise + * everything still fits into 128bit) but keeps things vectorized mostly. + * Trying 3) here though not sure it's really faster... + * With pshufb, we try 2) (cheaper and more accurate) + */ + + /* + * Ideally, we'd use 2 variable 16bit shifts here (byte shifts wouldn't + * help since code crosses 8bit boundaries). But variable shifts are + * AVX2 only, and even then only dword/quadword (intel _really_ hates + * shifts!). Instead, emulate by 16bit muls. + * Also, the required byte shuffles are essentially non-emulatable, so + * require ssse3 (albeit other archs might do them fine). + * This is not directly tied to ssse3 - just need sane byte shuffles. + * But ordering is going to be different below so use same condition. + */ + + + /* vectorize alpha */ + alpha = LLVMBuildBitCast(builder, dxt_block, i64t, ""); + alpha0 = LLVMBuildAnd(builder, alpha, + lp_build_const_int_vec(gallivm, type64, 0xff), ""); + alpha0 = LLVMBuildBitCast(builder, alpha0, bld16.vec_type, ""); + alpha = LLVMBuildBitCast(builder, alpha, bld16.vec_type, ""); + alpha1 = LLVMBuildLShr(builder, alpha, + lp_build_const_int_vec(gallivm, type16, 8), ""); + alpha = LLVMBuildBitCast(builder, alpha, i64t, ""); + shuffle1 = lp_build_const_shuffle1(gallivm, 0, 8); + /* XXX this shuffle broken with LLVM 2.8 */ + alpha0 = LLVMBuildShuffleVector(builder, alpha0, alpha0, shuffle1, ""); + alpha1 = LLVMBuildShuffleVector(builder, alpha1, alpha1, shuffle1, ""); + + type16.sign = TRUE; + sel_mask = lp_build_compare(gallivm, type16, PIPE_FUNC_GREATER, + alpha0, alpha1); + type16.sign = FALSE; + sel_mask = LLVMBuildBitCast(builder, sel_mask, bld8.vec_type, ""); + + if (!util_cpu_caps.has_ssse3) { + LLVMValueRef acodeg, mask1, acode0, acode1; + + /* extraction of the 3 bit values into something more useful is HARD */ + /* first steps are actually scalar */ + acode = LLVMBuildLShr(builder, alpha, + lp_build_const_int_vec(gallivm, type64, 16), ""); + tmp0 = LLVMBuildAnd(builder, acode, + lp_build_const_int_vec(gallivm, type64, 0xffffff), ""); + tmp1 = LLVMBuildLShr(builder, acode, + lp_build_const_int_vec(gallivm, type64, 24), ""); + tmp0 = LLVMBuildBitCast(builder, tmp0, i32t, ""); + tmp1 = LLVMBuildBitCast(builder, tmp1, i32t, ""); + acode = lp_build_interleave2(gallivm, type32, tmp0, tmp1, 0); + /* now have 2x24bit in 4x32bit, order 01234567, 89..., undef, undef */ + tmp0 = LLVMBuildAnd(builder, acode, + lp_build_const_int_vec(gallivm, type32, 0xfff), ""); + tmp1 = LLVMBuildLShr(builder, acode, + lp_build_const_int_vec(gallivm, type32, 12), ""); + acode = lp_build_interleave2(gallivm, type32, tmp0, tmp1, 0); + /* now have 4x12bit in 4x32bit, order 0123, 4567, ,,, */ + tmp0 = LLVMBuildAnd(builder, acode, + lp_build_const_int_vec(gallivm, type32, 0x3f), ""); + tmp1 = LLVMBuildLShr(builder, acode, + lp_build_const_int_vec(gallivm, type32, 6), ""); + /* use signed pack doesn't matter and otherwise need sse41 */ + type32.sign = type16.sign = TRUE; + acode = lp_build_pack2(gallivm, type32, type16, tmp0, tmp1); + type32.sign = type16.sign = FALSE; + /* now have 8x6bit in 8x16bit, 01, 45, 89, ..., 23, 67, ... */ + acode0 = LLVMBuildAnd(builder, acode, + lp_build_const_int_vec(gallivm, type16, 0x7), ""); + acode1 = LLVMBuildLShr(builder, acode, + lp_build_const_int_vec(gallivm, type16, 3), ""); + acode = lp_build_pack2(gallivm, type16, type8, acode0, acode1); + /* acode0 contains elems 0,4,8,12,2,6,10,14, acode1 1,5,9,... */ + + acodeg = LLVMBuildAnd(builder, acode, + LLVMBuildNot(builder, sel_mask, ""), ""); + mask1 = lp_build_compare(gallivm, type8, PIPE_FUNC_EQUAL, + acode, bld8.one); + + sel_mask = LLVMBuildBitCast(builder, sel_mask, bld16.vec_type, ""); + ainterp0 = lp_build_lerpdxta_block(gallivm, alpha0, alpha1, acode0, sel_mask); + ainterp1 = lp_build_lerpdxta_block(gallivm, alpha0, alpha1, acode1, sel_mask); + sel_mask = LLVMBuildBitCast(builder, sel_mask, bld8.vec_type, ""); + ainterp = lp_build_pack2(gallivm, type16, type8, ainterp0, ainterp1); + alpha0 = lp_build_pack2(gallivm, type16, type8, alpha0, alpha0); + alpha1 = lp_build_pack2(gallivm, type16, type8, alpha1, alpha1); + ainterp = LLVMBuildAdd(builder, ainterp, alpha0, ""); + /* Fix up val01 */ + sel_mask2 = lp_build_compare(gallivm, type8, PIPE_FUNC_EQUAL, + acode, bld8.zero); + ainterp = lp_build_select(&bld8, sel_mask2, alpha0, ainterp); + ainterp = lp_build_select(&bld8, mask1, alpha1, ainterp); + + /* fix up val67 if a0 <= a1 */ + sel_mask2 = lp_build_compare(gallivm, type8, PIPE_FUNC_EQUAL, + acodeg, lp_build_const_int_vec(gallivm, type8, 6)); + ares = LLVMBuildAnd(builder, ainterp, LLVMBuildNot(builder, sel_mask2, ""), ""); + sel_mask2 = lp_build_compare(gallivm, type8, PIPE_FUNC_EQUAL, + acodeg, lp_build_const_int_vec(gallivm, type8, 7)); + ares = LLVMBuildOr(builder, ares, sel_mask2, ""); + + /* unpack in right order (0,4,8,12,1,5,..) */ + /* this gives us zero, a0, zero, a4, zero, a8, ... for tmp0 */ + tmp0 = lp_build_interleave2(gallivm, type8, bld8.zero, ares, 0); + tmp1 = lp_build_interleave2(gallivm, type8, bld8.zero, ares, 1); + tmp0 = LLVMBuildBitCast(builder, tmp0, bld16.vec_type, ""); + tmp1 = LLVMBuildBitCast(builder, tmp1, bld16.vec_type, ""); + + a[0] = lp_build_interleave2(gallivm, type16, bld16.zero, tmp0, 0); + a[1] = lp_build_interleave2(gallivm, type16, bld16.zero, tmp1, 0); + a[2] = lp_build_interleave2(gallivm, type16, bld16.zero, tmp0, 1); + a[3] = lp_build_interleave2(gallivm, type16, bld16.zero, tmp1, 1); + } + else { + LLVMValueRef elems[16], intrargs[2], shufa, mulclo, mulchi, mask8hi; + LLVMTypeRef type16s = LLVMInt16TypeInContext(gallivm->context); + LLVMTypeRef type8s = LLVMInt8TypeInContext(gallivm->context); + unsigned i, j; + /* + * Ideally, we'd use 2 variable 16bit shifts here (byte shifts wouldn't + * help since code crosses 8bit boundaries). But variable shifts are + * AVX2 only, and even then only dword/quadword (intel _really_ hates + * shifts!). Instead, emulate by 16bit muls. + * Also, the required byte shuffles are essentially non-emulatable, so + * require ssse3 (albeit other archs might do them fine, but the + * complete path is ssse3 only for now). + */ + for (i = 0, j = 0; i < 16; i += 8, j += 3) { + elems[i+0] = elems[i+1] = elems[i+2] = lp_build_const_int32(gallivm, j+2); + elems[i+3] = elems[i+4] = lp_build_const_int32(gallivm, j+3); + elems[i+5] = elems[i+6] = elems[i+7] = lp_build_const_int32(gallivm, j+4); + } + shufa = LLVMConstVector(elems, 16); + alpha = LLVMBuildBitCast(builder, alpha, bld8.vec_type, ""); + acode = LLVMBuildShuffleVector(builder, alpha, bld8.undef, shufa, ""); + acode = LLVMBuildBitCast(builder, acode, bld16.vec_type, ""); + /* + * Put 0/2/4/6 into high 3 bits of 16 bits (save AND mask) + * Do the same for 1/3/5/7 (albeit still need mask there - ideally + * we'd place them into bits 4-7 so could save shift but impossible.) + */ + for (i = 0; i < 8; i += 4) { + elems[i+0] = LLVMConstInt(type16s, 1 << (13-0), 0); + elems[i+1] = LLVMConstInt(type16s, 1 << (13-6), 0); + elems[i+2] = LLVMConstInt(type16s, 1 << (13-4), 0); + elems[i+3] = LLVMConstInt(type16s, 1 << (13-2), 0); + } + mulclo = LLVMConstVector(elems, 8); + for (i = 0; i < 8; i += 4) { + elems[i+0] = LLVMConstInt(type16s, 1 << (13-3), 0); + elems[i+1] = LLVMConstInt(type16s, 1 << (13-9), 0); + elems[i+2] = LLVMConstInt(type16s, 1 << (13-7), 0); + elems[i+3] = LLVMConstInt(type16s, 1 << (13-5), 0); + } + mulchi = LLVMConstVector(elems, 8); + + tmp0 = LLVMBuildMul(builder, acode, mulclo, ""); + tmp1 = LLVMBuildMul(builder, acode, mulchi, ""); + tmp0 = LLVMBuildLShr(builder, tmp0, + lp_build_const_int_vec(gallivm, type16, 13), ""); + tmp1 = LLVMBuildLShr(builder, tmp1, + lp_build_const_int_vec(gallivm, type16, 5), ""); + tmp1 = LLVMBuildAnd(builder, tmp1, + lp_build_const_int_vec(gallivm, type16, 0x700), ""); + acode = LLVMBuildOr(builder, tmp0, tmp1, ""); + acode = LLVMBuildBitCast(builder, acode, bld8.vec_type, ""); + + /* + * Note that ordering is different here to non-ssse3 path: + * 0/1/2/3/4/5... + */ + + LLVMValueRef weight0, weight1, weight, delta; + LLVMValueRef constff_elem7, const0_elem6; + /* weights, correctly rounded (round(256*x/7)) */ + elems[0] = LLVMConstInt(type16s, 256, 0); + elems[1] = LLVMConstInt(type16s, 0, 0); + elems[2] = LLVMConstInt(type16s, 219, 0); + elems[3] = LLVMConstInt(type16s, 183, 0); + elems[4] = LLVMConstInt(type16s, 146, 0); + elems[5] = LLVMConstInt(type16s, 110, 0); + elems[6] = LLVMConstInt(type16s, 73, 0); + elems[7] = LLVMConstInt(type16s, 37, 0); + weight0 = LLVMConstVector(elems, 8); + + elems[0] = LLVMConstInt(type16s, 256, 0); + elems[1] = LLVMConstInt(type16s, 0, 0); + elems[2] = LLVMConstInt(type16s, 205, 0); + elems[3] = LLVMConstInt(type16s, 154, 0); + elems[4] = LLVMConstInt(type16s, 102, 0); + elems[5] = LLVMConstInt(type16s, 51, 0); + elems[6] = LLVMConstInt(type16s, 0, 0); + elems[7] = LLVMConstInt(type16s, 0, 0); + weight1 = LLVMConstVector(elems, 8); + + weight0 = LLVMBuildBitCast(builder, weight0, bld8.vec_type, ""); + weight1 = LLVMBuildBitCast(builder, weight1, bld8.vec_type, ""); + weight = lp_build_select(&bld8, sel_mask, weight0, weight1); + weight = LLVMBuildBitCast(builder, weight, bld16.vec_type, ""); + + for (i = 0; i < 16; i++) { + elems[i] = LLVMConstNull(type8s); + } + elems[7] = LLVMConstInt(type8s, 255, 0); + constff_elem7 = LLVMConstVector(elems, 16); + + for (i = 0; i < 16; i++) { + elems[i] = LLVMConstInt(type8s, 255, 0); + } + elems[6] = LLVMConstInt(type8s, 0, 0); + const0_elem6 = LLVMConstVector(elems, 16); + + /* standard simple lerp - but the version we need isn't available */ + delta = LLVMBuildSub(builder, alpha0, alpha1, ""); + ainterp = LLVMBuildMul(builder, delta, weight, ""); + ainterp = LLVMBuildLShr(builder, ainterp, + lp_build_const_int_vec(gallivm, type16, 8), ""); + ainterp = LLVMBuildBitCast(builder, ainterp, bld8.vec_type, ""); + alpha1 = LLVMBuildBitCast(builder, alpha1, bld8.vec_type, ""); + ainterp = LLVMBuildAdd(builder, ainterp, alpha1, ""); + ainterp = LLVMBuildBitCast(builder, ainterp, bld16.vec_type, ""); + ainterp = lp_build_pack2(gallivm, type16, type8, ainterp, bld16.undef); + + /* fixing 0/0xff case is slightly more complex */ + constff_elem7 = LLVMBuildAnd(builder, constff_elem7, + LLVMBuildNot(builder, sel_mask, ""), ""); + const0_elem6 = LLVMBuildOr(builder, const0_elem6, sel_mask, ""); + ainterp = LLVMBuildOr(builder, ainterp, constff_elem7, ""); + ainterp = LLVMBuildAnd(builder, ainterp, const0_elem6, ""); + + /* now pick all 16 elements at once! */ + intrargs[0] = ainterp; + intrargs[1] = acode; + ares = lp_build_intrinsic(builder, "llvm.x86.ssse3.pshuf.b.128", + bld8.vec_type, intrargs, 2, 0); + + ares = LLVMBuildBitCast(builder, ares, i32t, ""); + mask8hi = lp_build_const_int_vec(gallivm, type32, 0xff000000); + a[0] = LLVMBuildShl(builder, ares, + lp_build_const_int_vec(gallivm, type32, 24), ""); + a[1] = LLVMBuildShl(builder, ares, + lp_build_const_int_vec(gallivm, type32, 16), ""); + a[1] = LLVMBuildAnd(builder, a[1], mask8hi, ""); + a[2] = LLVMBuildShl(builder, ares, + lp_build_const_int_vec(gallivm, type32, 8), ""); + a[2] = LLVMBuildAnd(builder, a[2], mask8hi, ""); + a[3] = LLVMBuildAnd(builder, ares, mask8hi, ""); + } + + for (i = 0; i < 4; i++) { + a[i] = LLVMBuildBitCast(builder, a[i], i32t, ""); + col[i] = LLVMBuildOr(builder, col[i], a[i], ""); + } +} + + +static void +generate_update_cache_one_block(struct gallivm_state *gallivm, + LLVMValueRef function, + const struct util_format_description *format_desc) +{ + LLVMBasicBlockRef block; + LLVMBuilderRef old_builder; + LLVMValueRef ptr_addr; + LLVMValueRef hash_index; + LLVMValueRef cache; + LLVMValueRef dxt_block, tag_value; + LLVMValueRef col[LP_MAX_VECTOR_LENGTH]; + + ptr_addr = LLVMGetParam(function, 0); + hash_index = LLVMGetParam(function, 1); + cache = LLVMGetParam(function, 2); + + lp_build_name(ptr_addr, "ptr_addr" ); + lp_build_name(hash_index, "hash_index"); + lp_build_name(cache, "cache_addr"); + + /* + * Function body + */ + + old_builder = gallivm->builder; + block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); + gallivm->builder = LLVMCreateBuilderInContext(gallivm->context); + LLVMPositionBuilderAtEnd(gallivm->builder, block); + + lp_build_gather_s3tc_simple_scalar(gallivm, format_desc, &dxt_block, + ptr_addr); + + switch (format_desc->format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: + s3tc_decode_block_dxt1(gallivm, format_desc->format, dxt_block, col); + break; + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: + s3tc_decode_block_dxt3(gallivm, format_desc->format, dxt_block, col); + break; + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT5_SRGBA: + s3tc_decode_block_dxt5(gallivm, format_desc->format, dxt_block, col); + break; + default: + assert(0); + s3tc_decode_block_dxt1(gallivm, format_desc->format, dxt_block, col); + break; + } + + tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr, + LLVMInt64TypeInContext(gallivm->context), ""); + s3tc_store_cached_block(gallivm, col, tag_value, hash_index, cache); + + LLVMBuildRetVoid(gallivm->builder); + + LLVMDisposeBuilder(gallivm->builder); + gallivm->builder = old_builder; + + gallivm_verify_function(gallivm, function); +} + + +static void +update_cached_block(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + LLVMValueRef ptr_addr, + LLVMValueRef hash_index, + LLVMValueRef cache) + +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMModuleRef module = gallivm->module; + char name[256]; + LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); + LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); + LLVMValueRef function, inst; + LLVMBasicBlockRef bb; + LLVMValueRef args[3]; + + util_snprintf(name, sizeof name, "%s_update_cache_one_block", + format_desc->short_name); + function = LLVMGetNamedFunction(module, name); + + if (!function) { + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[3]; + LLVMTypeRef function_type; + unsigned arg; + + /* + * Generate the function prototype. + */ + + ret_type = LLVMVoidTypeInContext(gallivm->context); + arg_types[0] = pi8t; + arg_types[1] = LLVMInt32TypeInContext(gallivm->context); + arg_types[2] = LLVMTypeOf(cache); // XXX: put right type here + function_type = LLVMFunctionType(ret_type, arg_types, ARRAY_SIZE(arg_types), 0); + function = LLVMAddFunction(module, name, function_type); + + for (arg = 0; arg < ARRAY_SIZE(arg_types); ++arg) + if (LLVMGetTypeKind(arg_types[arg]) == LLVMPointerTypeKind) + lp_add_function_attr(function, arg + 1, LP_FUNC_ATTR_NOALIAS); + + LLVMSetFunctionCallConv(function, LLVMFastCallConv); + LLVMSetVisibility(function, LLVMHiddenVisibility); + generate_update_cache_one_block(gallivm, function, format_desc); + } + + args[0] = ptr_addr; + args[1] = hash_index; + args[2] = cache; + + LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), ""); + bb = LLVMGetInsertBlock(builder); + inst = LLVMGetLastInstruction(bb); + LLVMSetInstructionCallConv(inst, LLVMFastCallConv); +} + +/* + * cached lookup + */ +static LLVMValueRef +compressed_fetch_cached(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j, + LLVMValueRef cache) + +{ + LLVMBuilderRef builder = gallivm->builder; + unsigned count, low_bit, log2size; + LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp; + LLVMValueRef ij_index, hash_index, hash_mask, block_index; + LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context); + struct lp_type type; + struct lp_build_context bld32; + memset(&type, 0, sizeof type); + type.width = 32; + type.length = n; + + lp_build_context_init(&bld32, gallivm, type); + + /* + * compute hash - we use direct mapped cache, the hash function could + * be better but it needs to be simple + * per-element: + * compare offset with offset stored at tag (hash) + * if not equal extract block, store block, update tag + * extract color from cache + * assemble colors + */ + + low_bit = util_logbase2(format_desc->block.bits / 8); + log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE); + addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, ""); + ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, ""); + ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc); + /* For the hash function, first mask off the unused lowest bits. Then just + do some xor with address bits - only use lower 32bits */ + ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, ""); + ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, + lp_build_const_int_vec(gallivm, type, low_bit), ""); + /* This only really makes sense for size 64,128,256 */ + hash_index = ptr_addrtrunc; + ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc, + lp_build_const_int_vec(gallivm, type, 2*log2size), ""); + hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, ""); + tmp = LLVMBuildLShr(builder, hash_index, + lp_build_const_int_vec(gallivm, type, log2size), ""); + hash_index = LLVMBuildXor(builder, hash_index, tmp, ""); + + hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1); + hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, ""); + ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), ""); + ij_index = LLVMBuildAdd(builder, ij_index, j, ""); + block_index = LLVMBuildShl(builder, hash_index, + lp_build_const_int_vec(gallivm, type, 4), ""); + block_index = LLVMBuildAdd(builder, ij_index, block_index, ""); + + if (n > 1) { + color = bld32.undef; + for (count = 0; count < n; count++) { + LLVMValueRef index, cond, colorx; + LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx; + struct lp_build_if_state if_ctx; + + index = lp_build_const_int32(gallivm, count); + offsetx = LLVMBuildExtractElement(builder, offset, index, ""); + addrx = LLVMBuildZExt(builder, offsetx, i64t, ""); + addrx = LLVMBuildAdd(builder, addrx, addr, ""); + block_indexx = LLVMBuildExtractElement(builder, block_index, index, ""); + hash_indexx = LLVMBuildLShr(builder, block_indexx, + lp_build_const_int32(gallivm, 4), ""); + offset_stored = s3tc_lookup_tag_data(gallivm, cache, hash_indexx); + cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, ""); + + lp_build_if(&if_ctx, gallivm, cond); + { + ptr_addrx = LLVMBuildIntToPtr(builder, addrx, + LLVMPointerType(i8t, 0), ""); + update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache); +#if LP_BUILD_FORMAT_CACHE_DEBUG + s3tc_update_cache_access(gallivm, cache, 1, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); +#endif + } + lp_build_endif(&if_ctx); + + colorx = s3tc_lookup_cached_pixel(gallivm, cache, block_indexx); + + color = LLVMBuildInsertElement(builder, color, colorx, + lp_build_const_int32(gallivm, count), ""); + } + } + else { + LLVMValueRef cond; + struct lp_build_if_state if_ctx; + + tmp = LLVMBuildZExt(builder, offset, i64t, ""); + addr = LLVMBuildAdd(builder, tmp, addr, ""); + offset_stored = s3tc_lookup_tag_data(gallivm, cache, hash_index); + cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, ""); + + lp_build_if(&if_ctx, gallivm, cond); + { + tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), ""); + update_cached_block(gallivm, format_desc, tmp, hash_index, cache); +#if LP_BUILD_FORMAT_CACHE_DEBUG + s3tc_update_cache_access(gallivm, cache, 1, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS); +#endif + } + lp_build_endif(&if_ctx); + + color = s3tc_lookup_cached_pixel(gallivm, cache, block_index); + } +#if LP_BUILD_FORMAT_CACHE_DEBUG + s3tc_update_cache_access(gallivm, cache, n, + LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL); +#endif + return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), ""); +} + + +static LLVMValueRef +s3tc_dxt5_to_rgba_aos(struct gallivm_state *gallivm, + unsigned n, + enum pipe_format format, + LLVMValueRef colors, + LLVMValueRef codewords, + LLVMValueRef alpha_lo, + LLVMValueRef alpha_hi, + LLVMValueRef i, + LLVMValueRef j) +{ + return s3tc_dxt5_full_to_rgba_aos(gallivm, n, format, colors, + codewords, alpha_lo, alpha_hi, i, j); +} + + +/** + * @param n number of pixels processed (usually n=4, but it should also work with n=1 + * and multiples of 4) + * @param base_ptr base pointer (32bit or 64bit pointer depending on the architecture) + * @param offset vector with the relative offsets of the S3TC blocks + * @param i is a vector with the x subpixel coordinate (0..3) + * @param j is a vector with the y subpixel coordinate (0..3) + * @return a <4*n x i8> vector with the pixel RGBA values in AoS + */ +LLVMValueRef +lp_build_fetch_s3tc_rgba_aos(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + unsigned n, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef i, + LLVMValueRef j, + LLVMValueRef cache) +{ + LLVMValueRef rgba; + LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); + LLVMBuilderRef builder = gallivm->builder; + + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC); + assert(format_desc->block.width == 4); + assert(format_desc->block.height == 4); + + assert((n == 1) || (n % 4 == 0)); + +/* debug_printf("format = %d\n", format_desc->format);*/ + if (cache) { + rgba = compressed_fetch_cached(gallivm, format_desc, n, + base_ptr, offset, i, j, cache); + return rgba; + } + + if (n > 4) { + unsigned count; + LLVMTypeRef i8_vectype = LLVMVectorType(i8t, 4 * n); + LLVMTypeRef i128_type = LLVMIntTypeInContext(gallivm->context, 128); + LLVMTypeRef i128_vectype = LLVMVectorType(i128_type, n / 4); + LLVMTypeRef i324_vectype = LLVMVectorType(LLVMInt32TypeInContext( + gallivm->context), 4); + LLVMValueRef offset4, i4, j4, rgba4[LP_MAX_VECTOR_LENGTH/16]; + struct lp_type lp_324_vectype = lp_type_uint_vec(32, 128); + + assert(n / 4 <= ARRAY_SIZE(rgba4)); + + rgba = LLVMGetUndef(i128_vectype); + + for (count = 0; count < n / 4; count++) { + LLVMValueRef colors, codewords, alpha_lo, alpha_hi; + + i4 = lp_build_extract_range(gallivm, i, count * 4, 4); + j4 = lp_build_extract_range(gallivm, j, count * 4, 4); + offset4 = lp_build_extract_range(gallivm, offset, count * 4, 4); + + lp_build_gather_s3tc(gallivm, 4, format_desc, &colors, &codewords, + &alpha_lo, &alpha_hi, base_ptr, offset4); + + switch (format_desc->format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: + rgba4[count] = s3tc_dxt1_to_rgba_aos(gallivm, 4, format_desc->format, + colors, codewords, i4, j4); + break; + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: + rgba4[count] = s3tc_dxt3_to_rgba_aos(gallivm, 4, format_desc->format, colors, + codewords, alpha_lo, alpha_hi, i4, j4); + break; + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT5_SRGBA: + rgba4[count] = s3tc_dxt5_to_rgba_aos(gallivm, 4, format_desc->format, colors, + codewords, alpha_lo, alpha_hi, i4, j4); + break; + default: + assert(0); + rgba4[count] = LLVMGetUndef(LLVMVectorType(i8t, 4)); + break; + } + /* shuffles typically give best results with dword elements...*/ + rgba4[count] = LLVMBuildBitCast(builder, rgba4[count], i324_vectype, ""); + } + rgba = lp_build_concat(gallivm, rgba4, lp_324_vectype, n / 4); + rgba = LLVMBuildBitCast(builder, rgba, i8_vectype, ""); + } + else { + LLVMValueRef colors, codewords, alpha_lo, alpha_hi; + + lp_build_gather_s3tc(gallivm, n, format_desc, &colors, &codewords, + &alpha_lo, &alpha_hi, base_ptr, offset); + + switch (format_desc->format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: + rgba = s3tc_dxt1_to_rgba_aos(gallivm, n, format_desc->format, + colors, codewords, i, j); + break; + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: + rgba = s3tc_dxt3_to_rgba_aos(gallivm, n, format_desc->format, colors, + codewords, alpha_lo, alpha_hi, i, j); + break; + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT5_SRGBA: + rgba = s3tc_dxt5_to_rgba_aos(gallivm, n, format_desc->format, colors, + codewords, alpha_lo, alpha_hi, i, j); + break; + default: + assert(0); + rgba = LLVMGetUndef(LLVMVectorType(i8t, 4*n)); + break; + } + } + + /* always return just decompressed values - srgb conversion is done later */ + + return rgba; +} diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_intr.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_intr.c --- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_intr.c 2018-01-29 17:10:31.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_intr.c 2019-03-31 23:16:37.000000000 +0000 @@ -241,6 +241,16 @@ function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); + /* + * If llvm removes an intrinsic we use, we'll hit this abort (rather + * than a call to address zero in the jited code). + */ + if (LLVMGetIntrinsicID(function) == 0) { + _debug_printf("llvm (version 0x%x) found no intrinsic for %s, going to crash...\n", + HAVE_LLVM, name); + abort(); + } + if (!set_callsite_attrs) lp_add_func_attributes(function, attr_mask); diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c --- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c 2019-03-31 23:16:37.000000000 +0000 @@ -132,68 +132,6 @@ /** - * Build LLVM code for texture coord wrapping, for nearest filtering, - * for float texcoords. - * \param coord the incoming texcoord (s,t or r) - * \param length the texture size along one dimension - * \param offset the texel offset along the coord axis - * \param is_pot if TRUE, length is a power of two - * \param wrap_mode one of PIPE_TEX_WRAP_x - * \param icoord the texcoord after wrapping, as int - */ -static void -lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld, - LLVMValueRef coord, - LLVMValueRef length, - LLVMValueRef offset, - boolean is_pot, - unsigned wrap_mode, - LLVMValueRef *icoord) -{ - struct lp_build_context *coord_bld = &bld->coord_bld; - LLVMValueRef length_minus_one; - - switch(wrap_mode) { - case PIPE_TEX_WRAP_REPEAT: - if (offset) { - /* this is definitely not ideal for POT case */ - offset = lp_build_int_to_float(coord_bld, offset); - offset = lp_build_div(coord_bld, offset, length); - coord = lp_build_add(coord_bld, coord, offset); - } - /* take fraction, unnormalize */ - coord = lp_build_fract_safe(coord_bld, coord); - coord = lp_build_mul(coord_bld, coord, length); - *icoord = lp_build_itrunc(coord_bld, coord); - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one); - if (bld->static_sampler_state->normalized_coords) { - /* scale coord to length */ - coord = lp_build_mul(coord_bld, coord, length); - } - if (offset) { - offset = lp_build_int_to_float(coord_bld, offset); - coord = lp_build_add(coord_bld, coord, offset); - } - coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, - length_minus_one); - *icoord = lp_build_itrunc(coord_bld, coord); - break; - - case PIPE_TEX_WRAP_CLAMP: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - case PIPE_TEX_WRAP_MIRROR_REPEAT: - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - default: - assert(0); - } -} - - -/** * Helper to compute the first coord and the weight for * linear wrap repeat npot textures */ @@ -425,129 +363,6 @@ /** - * Build LLVM code for texture coord wrapping, for linear filtering, - * for float texcoords. - * \param block_length is the length of the pixel block along the - * coordinate axis - * \param coord the incoming texcoord (s,t or r) - * \param length the texture size along one dimension - * \param offset the texel offset along the coord axis - * \param is_pot if TRUE, length is a power of two - * \param wrap_mode one of PIPE_TEX_WRAP_x - * \param coord0 the first texcoord after wrapping, as int - * \param coord1 the second texcoord after wrapping, as int - * \param weight the filter weight as int (0-255) - * \param force_nearest if this coord actually uses nearest filtering - */ -static void -lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld, - unsigned block_length, - LLVMValueRef coord, - LLVMValueRef length, - LLVMValueRef offset, - boolean is_pot, - unsigned wrap_mode, - LLVMValueRef *coord0, - LLVMValueRef *coord1, - LLVMValueRef *weight, - unsigned force_nearest) -{ - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - struct lp_build_context *coord_bld = &bld->coord_bld; - LLVMBuilderRef builder = bld->gallivm->builder; - LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); - LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one); - - switch(wrap_mode) { - case PIPE_TEX_WRAP_REPEAT: - if (is_pot) { - /* mul by size and subtract 0.5 */ - coord = lp_build_mul(coord_bld, coord, length); - if (offset) { - offset = lp_build_int_to_float(coord_bld, offset); - coord = lp_build_add(coord_bld, coord, offset); - } - if (!force_nearest) - coord = lp_build_sub(coord_bld, coord, half); - *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); - /* convert to int, compute lerp weight */ - lp_build_ifloor_fract(coord_bld, coord, coord0, weight); - *coord1 = lp_build_ifloor(coord_bld, *coord1); - /* repeat wrap */ - length_minus_one = lp_build_itrunc(coord_bld, length_minus_one); - *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, ""); - *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, ""); - } - else { - LLVMValueRef mask; - if (offset) { - offset = lp_build_int_to_float(coord_bld, offset); - offset = lp_build_div(coord_bld, offset, length); - coord = lp_build_add(coord_bld, coord, offset); - } - /* wrap with normalized floats is just fract */ - coord = lp_build_fract(coord_bld, coord); - /* unnormalize */ - coord = lp_build_mul(coord_bld, coord, length); - /* - * we avoided the 0.5/length division, have to fix up wrong - * edge cases with selects - */ - *coord1 = lp_build_add(coord_bld, coord, half); - coord = lp_build_sub(coord_bld, coord, half); - *weight = lp_build_fract(coord_bld, coord); - /* - * It is important for this comparison to be unordered - * (or need fract_safe above). - */ - mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, - PIPE_FUNC_LESS, coord, coord_bld->zero); - *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord); - *coord0 = lp_build_itrunc(coord_bld, *coord0); - mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, - PIPE_FUNC_LESS, *coord1, length); - *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero); - *coord1 = lp_build_itrunc(coord_bld, *coord1); - } - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - if (bld->static_sampler_state->normalized_coords) { - /* mul by tex size */ - coord = lp_build_mul(coord_bld, coord, length); - } - if (offset) { - offset = lp_build_int_to_float(coord_bld, offset); - coord = lp_build_add(coord_bld, coord, offset); - } - /* subtract 0.5 */ - if (!force_nearest) { - coord = lp_build_sub(coord_bld, coord, half); - } - /* clamp to [0, length - 1] */ - coord = lp_build_min_ext(coord_bld, coord, length_minus_one, - GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); - coord = lp_build_max(coord_bld, coord, coord_bld->zero); - *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); - /* convert to int, compute lerp weight */ - lp_build_ifloor_fract(coord_bld, coord, coord0, weight); - /* coord1 = min(coord1, length-1) */ - *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one); - *coord1 = lp_build_itrunc(coord_bld, *coord1); - break; - default: - assert(0); - *coord0 = int_coord_bld->zero; - *coord1 = int_coord_bld->zero; - *weight = coord_bld->zero; - break; - } - *weight = lp_build_mul_imm(coord_bld, *weight, 256); - *weight = lp_build_itrunc(coord_bld, *weight); - return; -} - - -/** * Fetch texels for image with nearest sampling. * Return filtered color as two vectors of 16-bit fixed point values. */ @@ -737,96 +552,6 @@ /** - * Sample a single texture image with nearest sampling. - * If sampling a cube texture, r = cube face in [0,5]. - * Return filtered color as two vectors of 16-bit fixed point values. - * Does address calcs (except offsets) with floats. - * Useful for AVX which has support for 8x32 floats but not 8x32 ints. - */ -static void -lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld, - LLVMValueRef int_size, - LLVMValueRef row_stride_vec, - LLVMValueRef img_stride_vec, - LLVMValueRef data_ptr, - LLVMValueRef mipoffsets, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - const LLVMValueRef *offsets, - LLVMValueRef *colors) - { - const unsigned dims = bld->dims; - LLVMValueRef width_vec, height_vec, depth_vec; - LLVMValueRef offset; - LLVMValueRef x_subcoord, y_subcoord; - LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL; - LLVMValueRef flt_size; - - flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); - - lp_build_extract_image_sizes(bld, - &bld->float_size_bld, - bld->coord_type, - flt_size, - &width_vec, - &height_vec, - &depth_vec); - - /* Do texcoord wrapping */ - lp_build_sample_wrap_nearest_float(bld, - s, width_vec, offsets[0], - bld->static_texture_state->pot_width, - bld->static_sampler_state->wrap_s, - &x_icoord); - - if (dims >= 2) { - lp_build_sample_wrap_nearest_float(bld, - t, height_vec, offsets[1], - bld->static_texture_state->pot_height, - bld->static_sampler_state->wrap_t, - &y_icoord); - - if (dims >= 3) { - lp_build_sample_wrap_nearest_float(bld, - r, depth_vec, offsets[2], - bld->static_texture_state->pot_depth, - bld->static_sampler_state->wrap_r, - &z_icoord); - } - } - if (has_layer_coord(bld->static_texture_state->target)) { - z_icoord = r; - } - - /* - * From here on we deal with ints, and we should split up the 256bit - * vectors manually for better generated code. - */ - - /* - * compute texel offsets - - * cannot do offset calc with floats, difficult for block-based formats, - * and not enough precision anyway. - */ - lp_build_sample_offset(&bld->int_coord_bld, - bld->format_desc, - x_icoord, y_icoord, - z_icoord, - row_stride_vec, img_stride_vec, - &offset, - &x_subcoord, &y_subcoord); - if (mipoffsets) { - offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets); - } - - lp_build_sample_fetch_image_nearest(bld, data_ptr, offset, - x_subcoord, y_subcoord, - colors); -} - - -/** * Fetch texels for image with linear sampling. * Return filtered color as two vectors of 16-bit fixed point values. */ @@ -1213,175 +938,6 @@ /** - * Sample a single texture image with (bi-)(tri-)linear sampling. - * Return filtered color as two vectors of 16-bit fixed point values. - * Does address calcs (except offsets) with floats. - * Useful for AVX which has support for 8x32 floats but not 8x32 ints. - */ -static void -lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, - LLVMValueRef int_size, - LLVMValueRef row_stride_vec, - LLVMValueRef img_stride_vec, - LLVMValueRef data_ptr, - LLVMValueRef mipoffsets, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - const LLVMValueRef *offsets, - LLVMValueRef *colors) -{ - const unsigned dims = bld->dims; - LLVMValueRef width_vec, height_vec, depth_vec; - LLVMValueRef s_fpart; - LLVMValueRef t_fpart = NULL; - LLVMValueRef r_fpart = NULL; - LLVMValueRef x_stride, y_stride, z_stride; - LLVMValueRef x_offset0, x_offset1; - LLVMValueRef y_offset0, y_offset1; - LLVMValueRef z_offset0, z_offset1; - LLVMValueRef offset[2][2][2]; /* [z][y][x] */ - LLVMValueRef x_subcoord[2], y_subcoord[2]; - LLVMValueRef flt_size; - LLVMValueRef x_icoord0, x_icoord1; - LLVMValueRef y_icoord0, y_icoord1; - LLVMValueRef z_icoord0, z_icoord1; - unsigned x, y, z; - - flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); - - lp_build_extract_image_sizes(bld, - &bld->float_size_bld, - bld->coord_type, - flt_size, - &width_vec, - &height_vec, - &depth_vec); - - /* do texcoord wrapping and compute texel offsets */ - lp_build_sample_wrap_linear_float(bld, - bld->format_desc->block.width, - s, width_vec, offsets[0], - bld->static_texture_state->pot_width, - bld->static_sampler_state->wrap_s, - &x_icoord0, &x_icoord1, - &s_fpart, - bld->static_sampler_state->force_nearest_s); - - if (dims >= 2) { - lp_build_sample_wrap_linear_float(bld, - bld->format_desc->block.height, - t, height_vec, offsets[1], - bld->static_texture_state->pot_height, - bld->static_sampler_state->wrap_t, - &y_icoord0, &y_icoord1, - &t_fpart, - bld->static_sampler_state->force_nearest_t); - - if (dims >= 3) { - lp_build_sample_wrap_linear_float(bld, - 1, /* block length (depth) */ - r, depth_vec, offsets[2], - bld->static_texture_state->pot_depth, - bld->static_sampler_state->wrap_r, - &z_icoord0, &z_icoord1, - &r_fpart, 0); - } - } - - /* - * From here on we deal with ints, and we should split up the 256bit - * vectors manually for better generated code. - */ - - /* get pixel, row and image strides */ - x_stride = lp_build_const_vec(bld->gallivm, - bld->int_coord_bld.type, - bld->format_desc->block.bits/8); - y_stride = row_stride_vec; - z_stride = img_stride_vec; - - /* - * compute texel offset - - * cannot do offset calc with floats, difficult for block-based formats, - * and not enough precision anyway. - */ - lp_build_sample_partial_offset(&bld->int_coord_bld, - bld->format_desc->block.width, - x_icoord0, x_stride, - &x_offset0, &x_subcoord[0]); - lp_build_sample_partial_offset(&bld->int_coord_bld, - bld->format_desc->block.width, - x_icoord1, x_stride, - &x_offset1, &x_subcoord[1]); - - /* add potential cube/array/mip offsets now as they are constant per pixel */ - if (has_layer_coord(bld->static_texture_state->target)) { - LLVMValueRef z_offset; - z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); - /* The r coord is the cube face in [0,5] or array layer */ - x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset); - x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset); - } - if (mipoffsets) { - x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets); - x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets); - } - - for (z = 0; z < 2; z++) { - for (y = 0; y < 2; y++) { - offset[z][y][0] = x_offset0; - offset[z][y][1] = x_offset1; - } - } - - if (dims >= 2) { - lp_build_sample_partial_offset(&bld->int_coord_bld, - bld->format_desc->block.height, - y_icoord0, y_stride, - &y_offset0, &y_subcoord[0]); - lp_build_sample_partial_offset(&bld->int_coord_bld, - bld->format_desc->block.height, - y_icoord1, y_stride, - &y_offset1, &y_subcoord[1]); - for (z = 0; z < 2; z++) { - for (x = 0; x < 2; x++) { - offset[z][0][x] = lp_build_add(&bld->int_coord_bld, - offset[z][0][x], y_offset0); - offset[z][1][x] = lp_build_add(&bld->int_coord_bld, - offset[z][1][x], y_offset1); - } - } - } - - if (dims >= 3) { - LLVMValueRef z_subcoord[2]; - lp_build_sample_partial_offset(&bld->int_coord_bld, - 1, - z_icoord0, z_stride, - &z_offset0, &z_subcoord[0]); - lp_build_sample_partial_offset(&bld->int_coord_bld, - 1, - z_icoord1, z_stride, - &z_offset1, &z_subcoord[1]); - for (y = 0; y < 2; y++) { - for (x = 0; x < 2; x++) { - offset[0][y][x] = lp_build_add(&bld->int_coord_bld, - offset[0][y][x], z_offset0); - offset[1][y][x] = lp_build_add(&bld->int_coord_bld, - offset[1][y][x], z_offset1); - } - } - } - - lp_build_sample_fetch_image_linear(bld, data_ptr, offset, - x_subcoord, y_subcoord, - s_fpart, t_fpart, r_fpart, - colors); -} - - -/** * Sample the texture/mipmap using given image filter and mip filter. * data0_ptr and data1_ptr point to the two mipmap levels to sample * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. @@ -1413,9 +969,6 @@ LLVMValueRef mipoff1 = NULL; LLVMValueRef colors0; LLVMValueRef colors1; - boolean use_floats = util_cpu_caps.has_avx && - !util_cpu_caps.has_avx2 && - bld->coord_type.length > 4; /* sample the first mipmap level */ lp_build_mipmap_level_sizes(bld, ilevel0, @@ -1430,39 +983,20 @@ mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); } - if (use_floats) { - if (img_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest_afloat(bld, - size0, - row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, offsets, - &colors0); - } - else { - assert(img_filter == PIPE_TEX_FILTER_LINEAR); - lp_build_sample_image_linear_afloat(bld, - size0, - row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, offsets, - &colors0); - } + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + size0, + row_stride0_vec, img_stride0_vec, + data_ptr0, mipoff0, s, t, r, offsets, + &colors0); } else { - if (img_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest(bld, - size0, - row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, offsets, - &colors0); - } - else { - assert(img_filter == PIPE_TEX_FILTER_LINEAR); - lp_build_sample_image_linear(bld, - size0, - row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, offsets, - &colors0); - } + assert(img_filter == PIPE_TEX_FILTER_LINEAR); + lp_build_sample_image_linear(bld, + size0, + row_stride0_vec, img_stride0_vec, + data_ptr0, mipoff0, s, t, r, offsets, + &colors0); } /* Store the first level's colors in the output variables */ @@ -1521,37 +1055,19 @@ mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); } - if (use_floats) { - if (img_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest_afloat(bld, - size1, - row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, offsets, - &colors1); - } - else { - lp_build_sample_image_linear_afloat(bld, - size1, - row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, offsets, - &colors1); - } + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, mipoff1, s, t, r, offsets, + &colors1); } else { - if (img_filter == PIPE_TEX_FILTER_NEAREST) { - lp_build_sample_image_nearest(bld, - size1, - row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, offsets, - &colors1); - } - else { - lp_build_sample_image_linear(bld, - size1, - row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, offsets, - &colors1); - } + lp_build_sample_image_linear(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, mipoff1, s, t, r, offsets, + &colors1); } /* interpolate samples from the two mipmap levels */ diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c --- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c 2019-03-31 23:16:37.000000000 +0000 @@ -3549,10 +3549,6 @@ const struct util_format_description *format_desc; format_desc = util_format_description(static_texture_state->format); if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - /* - * This is not 100% correct, if we have cache but the - * util_format_s3tc_prefer is true the cache won't get used - * regardless (could hook up the block decode there...) */ need_cache = TRUE; } } diff -Nru mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c --- mesa-18.3.3/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 2019-03-31 23:16:37.000000000 +0000 @@ -41,6 +41,7 @@ #include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_prim.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_info.h" @@ -1059,7 +1060,8 @@ static LLVMValueRef get_indirect_index(struct lp_build_tgsi_soa_context *bld, unsigned reg_file, unsigned reg_index, - const struct tgsi_ind_register *indirect_reg) + const struct tgsi_ind_register *indirect_reg, + int index_limit) { LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; @@ -1106,9 +1108,9 @@ * larger than the declared size but smaller than the buffer size. */ if (reg_file != TGSI_FILE_CONSTANT) { + assert(index_limit > 0); max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, - uint_bld->type, - bld->bld_base.info->file_max[reg_file]); + uint_bld->type, index_limit); assert(!uint_bld->type.sign); index = lp_build_min(uint_bld, index, max_index); @@ -1225,7 +1227,8 @@ indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); /* All fetches are from the same constant buffer, so * we need to propagate the size to a vector to do a @@ -1364,7 +1367,8 @@ indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); /* * Unlike for other reg classes, adding pixel offsets is unnecessary - * immediates are stored as full vectors (FIXME??? - might be better @@ -1438,7 +1442,8 @@ indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); index_vec = get_soa_array_offsets(&bld_base->uint_bld, indirect_index, @@ -1524,19 +1529,33 @@ } if (reg->Register.Indirect) { + /* + * XXX: this is possibly not quite the right value, since file_max may be + * larger than the max attrib index, due to it being the max of declared + * inputs AND the max vertices per prim (which is 6 for tri adj). + * It should however be safe to use (since we always allocate + * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit). + */ + int index_limit = info->file_max[reg->Register.File]; attrib_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + index_limit); } else { attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); } if (reg->Dimension.Indirect) { + /* + * A fixed 6 should do as well (which is what we allocate). + */ + int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]); vertex_index = get_indirect_index(bld, reg->Register.File, reg->Dimension.Index, - ®->DimIndirect); + ®->DimIndirect, + index_limit); } else { vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); } @@ -1591,7 +1610,8 @@ indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); index_vec = get_soa_array_offsets(&bld_base->uint_bld, indirect_index, @@ -1811,7 +1831,8 @@ indirect_index = get_indirect_index(bld, reg->Register.File, reg->Register.Index, - ®->Indirect); + ®->Indirect, + bld->bld_base.info->file_max[reg->Register.File]); } else { assert(reg->Register.Index <= bld_base->info->file_max[reg->Register.File]); diff -Nru mesa-18.3.3/src/gallium/auxiliary/Makefile.sources mesa-19.0.1/src/gallium/auxiliary/Makefile.sources --- mesa-18.3.3/src/gallium/auxiliary/Makefile.sources 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -102,10 +102,6 @@ indices/u_indices_priv.h \ indices/u_primconvert.c \ indices/u_primconvert.h \ - os/os_memory_aligned.h \ - os/os_memory_debug.h \ - os/os_memory_stdc.h \ - os/os_memory.h \ os/os_mman.h \ os/os_process.c \ os/os_process.h \ @@ -290,7 +286,6 @@ util/u_linear.h \ util/u_log.c \ util/u_log.h \ - util/u_memory.h \ util/u_mm.c \ util/u_mm.h \ util/u_network.c \ @@ -423,11 +418,11 @@ gallivm/lp_bld_flow.h \ gallivm/lp_bld_format_aos_array.c \ gallivm/lp_bld_format_aos.c \ - gallivm/lp_bld_format_cached.c \ gallivm/lp_bld_format_float.c \ gallivm/lp_bld_format.c \ gallivm/lp_bld_format.h \ gallivm/lp_bld_format_soa.c \ + gallivm/lp_bld_format_s3tc.c \ gallivm/lp_bld_format_srgb.c \ gallivm/lp_bld_format_yuv.c \ gallivm/lp_bld_gather.c \ diff -Nru mesa-18.3.3/src/gallium/auxiliary/meson.build mesa-19.0.1/src/gallium/auxiliary/meson.build --- mesa-18.3.3/src/gallium/auxiliary/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -122,10 +122,6 @@ 'indices/u_indices_priv.h', 'indices/u_primconvert.c', 'indices/u_primconvert.h', - 'os/os_memory_aligned.h', - 'os/os_memory_debug.h', - 'os/os_memory_stdc.h', - 'os/os_memory.h', 'os/os_mman.h', 'os/os_process.c', 'os/os_process.h', @@ -310,7 +306,6 @@ 'util/u_linear.h', 'util/u_log.c', 'util/u_log.h', - 'util/u_memory.h', 'util/u_mm.c', 'util/u_mm.h', 'util/u_network.c', @@ -394,8 +389,8 @@ 'gallivm/lp_bld_flow.h', 'gallivm/lp_bld_format_aos_array.c', 'gallivm/lp_bld_format_aos.c', - 'gallivm/lp_bld_format_cached.c', 'gallivm/lp_bld_format_float.c', + 'gallivm/lp_bld_format_s3tc.c', 'gallivm/lp_bld_format.c', 'gallivm/lp_bld_format.h', 'gallivm/lp_bld_format_soa.c', diff -Nru mesa-18.3.3/src/gallium/auxiliary/nir/tgsi_to_nir.c mesa-19.0.1/src/gallium/auxiliary/nir/tgsi_to_nir.c --- mesa-18.3.3/src/gallium/auxiliary/nir/tgsi_to_nir.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/nir/tgsi_to_nir.c 2019-03-31 23:16:37.000000000 +0000 @@ -181,8 +181,8 @@ /* for arrays, we create variables instead of registers: */ nir_variable *var = rzalloc(b->shader, nir_variable); - var->type = glsl_array_type(glsl_vec4_type(), array_size); - var->data.mode = nir_var_global; + var->type = glsl_array_type(glsl_vec4_type(), array_size, 0); + var->data.mode = nir_var_shader_temp; var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID); exec_list_push_tail(&b->shader->globals, &var->node); @@ -265,7 +265,7 @@ var->type = glsl_vec4_type(); if (is_array) - var->type = glsl_array_type(var->type, array_size); + var->type = glsl_array_type(var->type, array_size, 0); switch (file) { case TGSI_FILE_INPUT: @@ -516,8 +516,7 @@ c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) { nir_ssa_def *tgsi_frontface[4] = { nir_bcsel(&c->build, - nir_load_system_value(&c->build, - nir_intrinsic_load_front_face, 0), + nir_load_front_face(&c->build, 1), nir_imm_float(&c->build, 1.0), nir_imm_float(&c->build, -1.0)), nir_imm_float(&c->build, 0.0), @@ -938,9 +937,7 @@ static void ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) { - nir_ssa_def *cmp = nir_bany_inequal4(b, nir_flt(b, src[0], - nir_imm_float(b, 0.0)), - nir_imm_int(b, 0)); + nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))); nir_intrinsic_instr *discard = nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if); discard->src[0] = nir_src_for_ssa(cmp); @@ -1471,10 +1468,10 @@ [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */ [TGSI_OPCODE_NOP] = 0, - [TGSI_OPCODE_FSEQ] = nir_op_feq, - [TGSI_OPCODE_FSGE] = nir_op_fge, - [TGSI_OPCODE_FSLT] = nir_op_flt, - [TGSI_OPCODE_FSNE] = nir_op_fne, + [TGSI_OPCODE_FSEQ] = nir_op_feq32, + [TGSI_OPCODE_FSGE] = nir_op_fge32, + [TGSI_OPCODE_FSLT] = nir_op_flt32, + [TGSI_OPCODE_FSNE] = nir_op_fne32, [TGSI_OPCODE_KILL_IF] = 0, @@ -1485,9 +1482,9 @@ [TGSI_OPCODE_IMAX] = nir_op_imax, [TGSI_OPCODE_IMIN] = nir_op_imin, [TGSI_OPCODE_INEG] = nir_op_ineg, - [TGSI_OPCODE_ISGE] = nir_op_ige, + [TGSI_OPCODE_ISGE] = nir_op_ige32, [TGSI_OPCODE_ISHR] = nir_op_ishr, - [TGSI_OPCODE_ISLT] = nir_op_ilt, + [TGSI_OPCODE_ISLT] = nir_op_ilt32, [TGSI_OPCODE_F2U] = nir_op_f2u32, [TGSI_OPCODE_U2F] = nir_op_u2f32, [TGSI_OPCODE_UADD] = nir_op_iadd, @@ -1497,11 +1494,11 @@ [TGSI_OPCODE_UMIN] = nir_op_umin, [TGSI_OPCODE_UMOD] = nir_op_umod, [TGSI_OPCODE_UMUL] = nir_op_imul, - [TGSI_OPCODE_USEQ] = nir_op_ieq, - [TGSI_OPCODE_USGE] = nir_op_uge, + [TGSI_OPCODE_USEQ] = nir_op_ieq32, + [TGSI_OPCODE_USGE] = nir_op_uge32, [TGSI_OPCODE_USHR] = nir_op_ushr, - [TGSI_OPCODE_USLT] = nir_op_ult, - [TGSI_OPCODE_USNE] = nir_op_ine, + [TGSI_OPCODE_USLT] = nir_op_ult32, + [TGSI_OPCODE_USNE] = nir_op_ine32, [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */ [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */ diff -Nru mesa-18.3.3/src/gallium/auxiliary/os/os_memory_aligned.h mesa-19.0.1/src/gallium/auxiliary/os/os_memory_aligned.h --- mesa-18.3.3/src/gallium/auxiliary/os/os_memory_aligned.h 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/os/os_memory_aligned.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,98 +0,0 @@ -/************************************************************************** - * - * Copyright 2008-2010 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/* - * Memory alignment wrappers. - */ - - -#ifndef _OS_MEMORY_H_ -#error "Must not be included directly. Include os_memory.h instead" -#endif - - -#include "pipe/p_compiler.h" - - - -/** - * Add two size_t values with integer overflow check. - * TODO: leverage __builtin_add_overflow where available - */ -static inline bool -add_overflow_size_t(size_t a, size_t b, size_t *res) -{ - *res = a + b; - return *res < a || *res < b; -} - - -/** - * Return memory on given byte alignment - */ -static inline void * -os_malloc_aligned(size_t size, size_t alignment) -{ - char *ptr, *buf; - size_t alloc_size; - - /* - * Calculate - * - * alloc_size = size + alignment + sizeof(void *) - * - * while checking for overflow. - */ - if (add_overflow_size_t(size, alignment, &alloc_size) || - add_overflow_size_t(alloc_size, sizeof(void *), &alloc_size)) { - return NULL; - } - - ptr = (char *) os_malloc(alloc_size); - if (!ptr) - return NULL; - - buf = (char *)(((uintptr_t)ptr + sizeof(void *) + alignment - 1) & ~((uintptr_t)(alignment - 1))); - *(char **)(buf - sizeof(void *)) = ptr; - - return buf; -} - - -/** - * Free memory returned by os_malloc_aligned(). - */ -static inline void -os_free_aligned(void *ptr) -{ - if (ptr) { - void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); - void *realAddr = *cubbyHole; - os_free(realAddr); - } -} diff -Nru mesa-18.3.3/src/gallium/auxiliary/os/os_memory_debug.h mesa-19.0.1/src/gallium/auxiliary/os/os_memory_debug.h --- mesa-18.3.3/src/gallium/auxiliary/os/os_memory_debug.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/os/os_memory_debug.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,92 +0,0 @@ -/************************************************************************** - * - * Copyright 2008-2010 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/* - * Debugging wrappers for OS memory management abstractions. - */ - - -#ifndef _OS_MEMORY_H_ -#error "Must not be included directly. Include os_memory.h instead" -#endif - - -#include "pipe/p_compiler.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -void * -debug_malloc(const char *file, unsigned line, const char *function, - size_t size); - -void * -debug_calloc(const char *file, unsigned line, const char *function, - size_t count, size_t size ); - -void -debug_free(const char *file, unsigned line, const char *function, - void *ptr); - -void * -debug_realloc(const char *file, unsigned line, const char *function, - void *old_ptr, size_t old_size, size_t new_size ); - -void -debug_memory_tag(void *ptr, unsigned tag); - -void -debug_memory_check_block(void *ptr); - -void -debug_memory_check(void); - - -#ifdef __cplusplus -} -#endif - - -#ifndef DEBUG_MEMORY_IMPLEMENTATION - -#define os_malloc( _size ) \ - debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) -#define os_calloc( _count, _size ) \ - debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) -#define os_free( _ptr ) \ - debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) -#define os_realloc( _ptr, _old_size, _new_size ) \ - debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _new_size ) - -/* TODO: wrap os_malloc_aligned() and os_free_aligned() too */ -#include "os_memory_aligned.h" - -#endif /* !DEBUG_MEMORY_IMPLEMENTATION */ diff -Nru mesa-18.3.3/src/gallium/auxiliary/os/os_memory.h mesa-19.0.1/src/gallium/auxiliary/os/os_memory.h --- mesa-18.3.3/src/gallium/auxiliary/os/os_memory.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/os/os_memory.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,80 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 Vmware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/* - * OS memory management abstractions - */ - - -#ifndef _OS_MEMORY_H_ -#define _OS_MEMORY_H_ - - -#include "pipe/p_config.h" -#include "pipe/p_compiler.h" - - -#if defined(PIPE_SUBSYSTEM_EMBEDDED) - -#ifdef __cplusplus -extern "C" { -#endif - -void * -os_malloc(size_t size); - -void * -os_calloc(size_t count, size_t size); - -void -os_free(void *ptr); - -void * -os_realloc(void *ptr, size_t old_size, size_t new_size); - -void * -os_malloc_aligned(size_t size, size_t alignment); - -void -os_free_aligned(void *ptr); - -#ifdef __cplusplus -} -#endif - -#elif defined(PIPE_OS_WINDOWS) && defined(DEBUG) && !defined(DEBUG_MEMORY_IMPLEMENTATION) - -# include "os_memory_debug.h" - -#else - -# include "os_memory_stdc.h" - -#endif - -#endif /* _OS_MEMORY_H_ */ diff -Nru mesa-18.3.3/src/gallium/auxiliary/os/os_memory_stdc.h mesa-19.0.1/src/gallium/auxiliary/os/os_memory_stdc.h --- mesa-18.3.3/src/gallium/auxiliary/os/os_memory_stdc.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/os/os_memory_stdc.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ -/************************************************************************** - * - * Copyright 2008-2010 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/* - * OS memory management abstractions for the standard C library. - */ - - -#ifndef _OS_MEMORY_H_ -#error "Must not be included directly. Include os_memory.h instead" -#endif - -#include - -#include "pipe/p_compiler.h" - - -#define os_malloc(_size) malloc(_size) -#define os_calloc(_count, _size ) calloc(_count, _size ) -#define os_free(_ptr) free(_ptr) - -#define os_realloc( _old_ptr, _old_size, _new_size) \ - realloc(_old_ptr, _new_size + 0*(_old_size)) - - -#if defined(HAVE_POSIX_MEMALIGN) - -static inline void * -os_malloc_aligned(size_t size, size_t alignment) -{ - void *ptr; - alignment = (alignment + sizeof(void*) - 1) & ~(sizeof(void*) - 1); - if(posix_memalign(&ptr, alignment, size) != 0) - return NULL; - return ptr; -} - -#define os_free_aligned(_ptr) free(_ptr) - -#elif defined(PIPE_OS_WINDOWS) - -#include - -#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align) -#define os_free_aligned(_ptr) _aligned_free(_ptr) - -#else - -#include "os_memory_aligned.h" - -#endif diff -Nru mesa-18.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c mesa-19.0.1/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c --- mesa-18.3.3/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 2019-03-31 23:16:37.000000000 +0000 @@ -107,11 +107,6 @@ .configuration = pipe_default_configuration_query, }, { - .driver_name = "pl111", - .create_screen = pipe_pl111_create_screen, - .configuration = pipe_default_configuration_query, - }, - { .driver_name = "virtio_gpu", .create_screen = pipe_virgl_create_screen, .configuration = pipe_default_configuration_query, @@ -132,16 +127,18 @@ .configuration = pipe_default_configuration_query, }, { - .driver_name = "imx-drm", - .create_screen = pipe_imx_drm_create_screen, - .configuration = pipe_default_configuration_query, - }, - { .driver_name = "tegra", .create_screen = pipe_tegra_create_screen, .configuration = pipe_default_configuration_query, }, }; + +static const struct drm_driver_descriptor default_driver_descriptor = { + .driver_name = "kmsro", + .create_screen = pipe_kmsro_create_screen, + .configuration = pipe_default_configuration_query, +}; + #endif static const struct drm_driver_descriptor * @@ -152,6 +149,7 @@ if (strcmp(driver_descriptors[i].driver_name, driver_name) == 0) return &driver_descriptors[i]; } + return &default_driver_descriptor; #else *plib = pipe_loader_find_module(driver_name, PIPE_SEARCH_DIR); if (!*plib) diff -Nru mesa-18.3.3/src/gallium/auxiliary/target-helpers/drm_helper.h mesa-19.0.1/src/gallium/auxiliary/target-helpers/drm_helper.h --- mesa-18.3.3/src/gallium/auxiliary/target-helpers/drm_helper.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/target-helpers/drm_helper.h 2019-03-31 23:16:37.000000000 +0000 @@ -83,24 +83,24 @@ #endif -#ifdef GALLIUM_PL111 -#include "pl111/drm/pl111_drm_public.h" +#ifdef GALLIUM_KMSRO +#include "kmsro/drm/kmsro_drm_public.h" struct pipe_screen * -pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config) +pipe_kmsro_create_screen(int fd, const struct pipe_screen_config *config) { struct pipe_screen *screen; - screen = pl111_drm_screen_create(fd); + screen = kmsro_drm_screen_create(fd); return screen ? debug_screen_wrap(screen) : NULL; } #else struct pipe_screen * -pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config) +pipe_kmsro_create_screen(int fd, const struct pipe_screen_config *config) { - fprintf(stderr, "pl111: driver missing\n"); + fprintf(stderr, "kmsro: driver missing\n"); return NULL; } @@ -248,7 +248,7 @@ { struct pipe_screen *screen; - screen = fd_drm_screen_create(fd); + screen = fd_drm_screen_create(fd, NULL); return screen ? debug_screen_wrap(screen) : NULL; } @@ -354,29 +354,6 @@ return NULL; } -#endif - -#ifdef GALLIUM_IMX -#include "imx/drm/imx_drm_public.h" - -struct pipe_screen * -pipe_imx_drm_create_screen(int fd, const struct pipe_screen_config *config) -{ - struct pipe_screen *screen; - - screen = imx_drm_screen_create(fd); - return screen ? debug_screen_wrap(screen) : NULL; -} - -#else - -struct pipe_screen * -pipe_imx_drm_create_screen(int fd, const struct pipe_screen_config *config) -{ - fprintf(stderr, "imx-drm: driver missing\n"); - return NULL; -} - #endif #ifdef GALLIUM_TEGRA diff -Nru mesa-18.3.3/src/gallium/auxiliary/target-helpers/drm_helper_public.h mesa-19.0.1/src/gallium/auxiliary/target-helpers/drm_helper_public.h --- mesa-18.3.3/src/gallium/auxiliary/target-helpers/drm_helper_public.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/target-helpers/drm_helper_public.h 2019-03-31 23:16:37.000000000 +0000 @@ -43,7 +43,7 @@ pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config); struct pipe_screen * -pipe_pl111_create_screen(int fd, const struct pipe_screen_config *config); +pipe_kmsro_create_screen(int fd, const struct pipe_screen_config *config); struct pipe_screen * pipe_etna_create_screen(int fd, const struct pipe_screen_config *config); diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_exec.c mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_exec.c --- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_exec.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_exec.c 2019-03-31 23:16:37.000000000 +0000 @@ -4253,6 +4253,9 @@ if (val == value[0].u[0]) val = value2[0].u[0]; break; + case TGSI_OPCODE_ATOMFADD: + val = fui(r[0].f[0] + value[0].f[0]); + break; default: break; } @@ -5933,6 +5936,7 @@ case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: exec_atomop(mach, inst); break; diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h --- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h 2019-03-31 23:16:37.000000000 +0000 @@ -101,7 +101,7 @@ OPCODE(0, 0, NONE, BGNSUB, .post_indent = 1) OPCODE(0, 0, NONE, ENDLOOP, .is_branch = 1, .pre_dedent = 1) OPCODE(0, 0, NONE, ENDSUB, .pre_dedent = 1) -OPCODE_GAP(103) /* removed */ +OPCODE(1, 3, OTHR, ATOMFADD, .is_store = 1) OPCODE(1, 1, OTHR, TXQS, .is_tex = 1) OPCODE(1, 1, OTHR, RESQ) OPCODE(1, 1, COMP, READ_FIRST) diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.c mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_scan.c --- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_scan.c 2019-03-31 23:16:37.000000000 +0000 @@ -368,7 +368,19 @@ info->uses_bindless_samplers = true; break; case TGSI_OPCODE_RESQ: + if (tgsi_is_bindless_image_file(fullinst->Src[0].Register.File)) + info->uses_bindless_images = true; + break; case TGSI_OPCODE_LOAD: + if (tgsi_is_bindless_image_file(fullinst->Src[0].Register.File)) { + info->uses_bindless_images = true; + + if (fullinst->Memory.Texture == TGSI_TEXTURE_BUFFER) + info->uses_bindless_buffer_load = true; + else + info->uses_bindless_image_load = true; + } + break; case TGSI_OPCODE_ATOMUADD: case TGSI_OPCODE_ATOMXCHG: case TGSI_OPCODE_ATOMCAS: @@ -379,12 +391,25 @@ case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMIMAX: - if (tgsi_is_bindless_image_file(fullinst->Src[0].Register.File)) + case TGSI_OPCODE_ATOMFADD: + if (tgsi_is_bindless_image_file(fullinst->Src[0].Register.File)) { info->uses_bindless_images = true; + + if (fullinst->Memory.Texture == TGSI_TEXTURE_BUFFER) + info->uses_bindless_buffer_atomic = true; + else + info->uses_bindless_image_atomic = true; + } break; case TGSI_OPCODE_STORE: - if (tgsi_is_bindless_image_file(fullinst->Dst[0].Register.File)) + if (tgsi_is_bindless_image_file(fullinst->Dst[0].Register.File)) { info->uses_bindless_images = true; + + if (fullinst->Memory.Texture == TGSI_TEXTURE_BUFFER) + info->uses_bindless_buffer_store = true; + else + info->uses_bindless_image_store = true; + } break; default: break; diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.h mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_scan.h --- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_scan.h 2018-02-27 16:44:19.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_scan.h 2019-03-31 23:16:37.000000000 +0000 @@ -153,6 +153,13 @@ unsigned shader_buffers_load; /**< bitmask of shader buffers using loads */ unsigned shader_buffers_store; /**< bitmask of shader buffers using stores */ unsigned shader_buffers_atomic; /**< bitmask of shader buffers using atomics */ + bool uses_bindless_buffer_load; + bool uses_bindless_buffer_store; + bool uses_bindless_buffer_atomic; + bool uses_bindless_image_load; + bool uses_bindless_image_store; + bool uses_bindless_image_atomic; + /** * Bitmask indicating which register files are accessed with * indirect addressing. The bits are (1 << TGSI_FILE_x), etc. @@ -213,7 +220,9 @@ { return file != TGSI_FILE_IMAGE && file != TGSI_FILE_MEMORY && - file != TGSI_FILE_BUFFER; + file != TGSI_FILE_BUFFER && + file != TGSI_FILE_CONSTBUF && + file != TGSI_FILE_HW_ATOMIC; } #ifdef __cplusplus diff -Nru mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_util.c mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_util.c --- mesa-18.3.3/src/gallium/auxiliary/tgsi/tgsi_util.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/tgsi/tgsi_util.c 2019-03-31 23:16:37.000000000 +0000 @@ -385,6 +385,7 @@ case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: if (src_idx == 0) { read_mask = TGSI_WRITEMASK_XY; /* bindless handle possible */ } else if (src_idx == 1) { diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_debug_memory.c mesa-19.0.1/src/gallium/auxiliary/util/u_debug_memory.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_debug_memory.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_debug_memory.c 2019-03-31 23:16:37.000000000 +0000 @@ -36,14 +36,14 @@ #define DEBUG_MEMORY_IMPLEMENTATION -#include "os/os_memory.h" -#include "os/os_memory_debug.h" #include "os/os_thread.h" #include "util/u_debug.h" #include "util/u_debug_gallium.h" #include "util/u_debug_stack.h" #include "util/list.h" +#include "util/os_memory.h" +#include "util/os_memory_debug.h" #define DEBUG_MEMORY_MAGIC 0x6e34090aU diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_debug_stack_android.cpp mesa-19.0.1/src/gallium/auxiliary/util/u_debug_stack_android.cpp --- mesa-18.3.3/src/gallium/auxiliary/util/u_debug_stack_android.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_debug_stack_android.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -23,7 +23,7 @@ #include -#include "u_debug.h" +#include "util/u_debug.h" #include "u_debug_stack.h" #include "util/hash_table.h" #include "os/os_thread.h" diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format.c mesa-19.0.1/src/gallium/auxiliary/util/u_format.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_format.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_format.c 2019-03-31 23:16:37.000000000 +0000 @@ -32,7 +32,7 @@ * @author Jose Fonseca */ -#include "u_memory.h" +#include "util/u_memory.h" #include "u_format.h" #include "u_format_s3tc.h" #include "u_surface.h" @@ -149,24 +149,25 @@ } /** - * Returns true if all non-void channels are normalized signed. + * Returns true if the format contains normalized signed channels. */ boolean util_format_is_snorm(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - int i; - if (desc->is_mixed) - return FALSE; + return desc->is_snorm; +} - i = util_format_get_first_non_void_channel(format); - if (i == -1) - return FALSE; +/** + * Returns true if the format contains normalized unsigned channels. + */ +boolean +util_format_is_unorm(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); - return desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED && - !desc->channel[i].pure_integer && - desc->channel[i].normalized; + return desc->is_unorm; } boolean @@ -865,3 +866,43 @@ } } } + +enum pipe_format +util_format_snorm8_to_sint8(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R8_SNORM: + return PIPE_FORMAT_R8_SINT; + case PIPE_FORMAT_R8G8_SNORM: + return PIPE_FORMAT_R8G8_SINT; + case PIPE_FORMAT_R8G8B8_SNORM: + return PIPE_FORMAT_R8G8B8_SINT; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return PIPE_FORMAT_R8G8B8A8_SINT; + + case PIPE_FORMAT_A8_SNORM: + return PIPE_FORMAT_A8_SINT; + case PIPE_FORMAT_L8_SNORM: + return PIPE_FORMAT_L8_SINT; + case PIPE_FORMAT_L8A8_SNORM: + return PIPE_FORMAT_L8A8_SINT; + case PIPE_FORMAT_I8_SNORM: + return PIPE_FORMAT_I8_SINT; + + case PIPE_FORMAT_R8G8B8X8_SNORM: + return PIPE_FORMAT_R8G8B8X8_SINT; + case PIPE_FORMAT_R8A8_SNORM: + return PIPE_FORMAT_R8A8_SINT; + case PIPE_FORMAT_A8L8_SNORM: + return PIPE_FORMAT_A8L8_SINT; + case PIPE_FORMAT_G8R8_SNORM: + return PIPE_FORMAT_G8R8_SINT; + case PIPE_FORMAT_A8B8G8R8_SNORM: + return PIPE_FORMAT_A8B8G8R8_SINT; + case PIPE_FORMAT_X8B8G8R8_SNORM: + return PIPE_FORMAT_X8B8G8R8_SINT; + + default: + return format; + } +} diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format.csv mesa-19.0.1/src/gallium/auxiliary/util/u_format.csv --- mesa-18.3.3/src/gallium/auxiliary/util/u_format.csv 2018-02-08 14:40:56.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_format.csv 2019-03-31 23:16:37.000000000 +0000 @@ -45,7 +45,7 @@ # - optionally followed by 'p' if it is pure # - number of bits # - channel swizzle -# - color space: rgb, yub, sz +# - color space: rgb, srgb, yuv, zs # - (optional) channel encoding for big-endian targets # - (optional) channel swizzle for big-endian targets # @@ -114,6 +114,7 @@ # SRGB formats PIPE_FORMAT_L8_SRGB , plain, 1, 1, un8 , , , , xxx1, srgb +PIPE_FORMAT_R8_SRGB , plain, 1, 1, un8 , , , , x001, srgb PIPE_FORMAT_L8A8_SRGB , plain, 1, 1, un8 , un8 , , , xxxy, srgb PIPE_FORMAT_R8G8B8_SRGB , plain, 1, 1, un8 , un8 , un8 , , xyz1, srgb PIPE_FORMAT_R8G8B8A8_SRGB , plain, 1, 1, un8 , un8 , un8 , un8 , xyzw, srgb @@ -433,13 +434,17 @@ PIPE_FORMAT_A8L8_UNORM , plain, 1, 1, un8 , un8 , , , yyyx, rgb PIPE_FORMAT_A8L8_SNORM , plain, 1, 1, sn8 , sn8 , , , yyyx, rgb +PIPE_FORMAT_A8L8_SINT , plain, 1, 1, sp8 , sp8 , , , yyyx, rgb PIPE_FORMAT_A8L8_SRGB , plain, 1, 1, un8 , un8 , , , yyyx, srgb PIPE_FORMAT_A16L16_UNORM , plain, 1, 1, un16, un16, , , yyyx, rgb PIPE_FORMAT_G8R8_UNORM , plain, 1, 1, un8 , un8 , , , yx01, rgb PIPE_FORMAT_G8R8_SNORM , plain, 1, 1, sn8 , sn8 , , , yx01, rgb +PIPE_FORMAT_G8R8_SINT , plain, 1, 1, sp8 , sp8 , , , yx01, rgb PIPE_FORMAT_G16R16_UNORM , plain, 1, 1, un16, un16, , , yx01, rgb PIPE_FORMAT_G16R16_SNORM , plain, 1, 1, sn16, sn16, , , yx01, rgb PIPE_FORMAT_A8B8G8R8_SNORM , plain, 1, 1, sn8 , sn8 , sn8 , sn8 , wzyx, rgb +PIPE_FORMAT_A8B8G8R8_SINT , plain, 1, 1, sp8 , sp8 , sp8 , sp8 , wzyx, rgb PIPE_FORMAT_X8B8G8R8_SNORM , plain, 1, 1, x8, sn8, sn8, sn8, wzy1, rgb +PIPE_FORMAT_X8B8G8R8_SINT , plain, 1, 1, x8, sp8, sp8, sp8, wzy1, rgb diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format.h mesa-19.0.1/src/gallium/auxiliary/util/u_format.h --- mesa-18.3.3/src/gallium/auxiliary/util/u_format.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_format.h 2019-03-31 23:16:37.000000000 +0000 @@ -178,6 +178,16 @@ unsigned is_mixed:1; /** + * Whether the format contains UNORM channels + */ + unsigned is_unorm:1; + + /** + * Whether the format contains SNORM channels + */ + unsigned is_snorm:1; + + /** * Input channel description, in the order XYZW. * * Only valid for UTIL_FORMAT_LAYOUT_PLAIN formats. @@ -727,6 +737,9 @@ util_format_is_snorm(enum pipe_format format); boolean +util_format_is_unorm(enum pipe_format format); + +boolean util_format_is_snorm8(enum pipe_format format); /** @@ -925,6 +938,8 @@ switch (format) { case PIPE_FORMAT_L8_UNORM: return PIPE_FORMAT_L8_SRGB; + case PIPE_FORMAT_R8_UNORM: + return PIPE_FORMAT_R8_SRGB; case PIPE_FORMAT_L8A8_UNORM: return PIPE_FORMAT_L8A8_SRGB; case PIPE_FORMAT_R8G8B8_UNORM: @@ -1001,6 +1016,8 @@ switch (format) { case PIPE_FORMAT_L8_SRGB: return PIPE_FORMAT_L8_UNORM; + case PIPE_FORMAT_R8_SRGB: + return PIPE_FORMAT_R8_UNORM; case PIPE_FORMAT_L8A8_SRGB: return PIPE_FORMAT_L8A8_UNORM; case PIPE_FORMAT_R8G8B8_SRGB: @@ -1351,6 +1368,9 @@ void util_format_unswizzle_4f(float *dst, const float *src, const unsigned char swz[4]); +enum pipe_format +util_format_snorm8_to_sint8(enum pipe_format format); + #ifdef __cplusplus } // extern "C" { #endif diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format_parse.py mesa-19.0.1/src/gallium/auxiliary/util/u_format_parse.py --- mesa-18.3.3/src/gallium/auxiliary/util/u_format_parse.py 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_format_parse.py 2019-03-31 23:16:37.000000000 +0000 @@ -187,6 +187,26 @@ return True return False + def is_compressed(self): + for channel in self.le_channels: + if channel.type != VOID: + return False + return True + + def is_unorm(self): + # Non-compressed formats all have unorm or srgb in their name. + for keyword in ['_UNORM', '_SRGB']: + if keyword in self.name: + return True + + # All the compressed formats in GLES3.2 and GL4.6 ("Table 8.14: Generic + # and specific compressed internal formats.") that aren't snorm for + # border colors are unorm, other than BPTC_*_FLOAT. + return self.is_compressed() and not ('FLOAT' in self.name or self.is_snorm()) + + def is_snorm(self): + return '_SNORM' in self.name + def is_pot(self): return is_pot(self.block_size()) diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format_table.py mesa-19.0.1/src/gallium/auxiliary/util/u_format_table.py --- mesa-18.3.3/src/gallium/auxiliary/util/u_format_table.py 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_format_table.py 2019-03-31 23:16:37.000000000 +0000 @@ -136,6 +136,8 @@ print(" %s,\t/* is_array */" % (bool_map(format.is_array()),)) print(" %s,\t/* is_bitmask */" % (bool_map(format.is_bitmask()),)) print(" %s,\t/* is_mixed */" % (bool_map(format.is_mixed()),)) + print(" %s,\t/* is_unorm */" % (bool_map(format.is_unorm()),)) + print(" %s,\t/* is_snorm */" % (bool_map(format.is_snorm()),)) u_format_pack.print_channels(format, do_channel_array) u_format_pack.print_channels(format, do_swizzle_array) print(" %s," % (colorspace_map(format.colorspace),)) diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_format_tests.c mesa-19.0.1/src/gallium/auxiliary/util/u_format_tests.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_format_tests.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_format_tests.c 2019-03-31 23:16:37.000000000 +0000 @@ -30,7 +30,7 @@ #include #include "pipe/p_config.h" -#include "u_memory.h" +#include "util/u_memory.h" #include "u_format_tests.h" @@ -236,6 +236,10 @@ {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xbc), UNPACKED_1x1(0.502886458033, 0.502886458033, 0.502886458033, 1.0)}, {PIPE_FORMAT_L8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 1.0, 1.0, 1.0)}, + {PIPE_FORMAT_R8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0x00), UNPACKED_1x1(0.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xbc), UNPACKED_1x1(0.502886458033, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_R8_SRGB, PACKED_1x8(0xff), PACKED_1x8(0xff), UNPACKED_1x1(1.0, 0.0, 0.0, 1.0)}, + {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x0000), UNPACKED_1x1(0.0, 0.0, 0.0, 0.0)}, {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x00bc), UNPACKED_1x1(0.502886458033, 0.502886458033, 0.502886458033, 0.0)}, {PIPE_FORMAT_L8A8_SRGB, PACKED_1x16(0xffff), PACKED_1x16(0x00ff), UNPACKED_1x1(1.0, 1.0, 1.0, 0.0)}, diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_framebuffer.c mesa-19.0.1/src/gallium/auxiliary/util/u_framebuffer.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_framebuffer.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_framebuffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -229,13 +229,19 @@ if (!(fb->nr_cbufs || fb->zsbuf)) return MAX2(fb->samples, 1); + /** + * If a driver doesn't advertise PIPE_CAP_SURFACE_SAMPLE_COUNT, + * pipe_surface::nr_samples will always be 0. + */ for (i = 0; i < fb->nr_cbufs; i++) { if (fb->cbufs[i]) { - return MAX2(1, fb->cbufs[i]->texture->nr_samples); + return MAX3(1, fb->cbufs[i]->texture->nr_samples, + fb->cbufs[i]->nr_samples); } } if (fb->zsbuf) { - return MAX2(1, fb->zsbuf->texture->nr_samples); + return MAX3(1, fb->zsbuf->texture->nr_samples, + fb->zsbuf->nr_samples); } return 1; diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_helpers.c mesa-19.0.1/src/gallium/auxiliary/util/u_helpers.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_helpers.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_helpers.c 2019-03-31 23:16:37.000000000 +0000 @@ -121,43 +121,6 @@ return *out_buffer != NULL; } -#ifdef HAVE_PTHREAD_SETAFFINITY - -static unsigned L3_cache_number; -static once_flag thread_pinning_once_flag = ONCE_FLAG_INIT; - -static void -util_set_full_cpu_affinity(void) -{ - cpu_set_t cpuset; - - CPU_ZERO(&cpuset); - for (unsigned i = 0; i < CPU_SETSIZE; i++) - CPU_SET(i, &cpuset); - - pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset); -} - -static void -util_init_thread_pinning(void) -{ - /* Get a semi-random number. */ - int64_t t = os_time_get_nano(); - L3_cache_number = (t ^ (t >> 8) ^ (t >> 16)); - - /* Reset thread affinity for all child processes to prevent them from - * inheriting the current thread's affinity. - * - * XXX: If the driver is unloaded after this, and the app later calls - * fork(), the child process will likely crash before fork() returns, - * because the address where util_set_full_cpu_affinity was located - * will either be unmapped or point to random other contents. - */ - pthread_atfork(NULL, NULL, util_set_full_cpu_affinity); -} - -#endif - /** * Called by MakeCurrent. Used to notify the driver that the application * thread may have been changed. @@ -170,30 +133,21 @@ * pinned. */ void -util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread) +util_pin_driver_threads_to_random_L3(struct pipe_context *ctx, + thrd_t *upper_thread) { -#ifdef HAVE_PTHREAD_SETAFFINITY /* If pinning has no effect, don't do anything. */ if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3) return; - thrd_t current = thrd_current(); - int cache = util_get_L3_for_pinned_thread(current, - util_cpu_caps.cores_per_L3); - - call_once(&thread_pinning_once_flag, util_init_thread_pinning); - - /* If the main thread is not pinned, choose the L3 cache. */ - if (cache == -1) { - unsigned num_L3_caches = util_cpu_caps.nr_cpus / - util_cpu_caps.cores_per_L3; - - /* Choose a different L3 cache for each subsequent MakeCurrent. */ - cache = p_atomic_inc_return(&L3_cache_number) % num_L3_caches; - util_pin_thread_to_L3(current, cache, util_cpu_caps.cores_per_L3); - } + unsigned num_L3_caches = util_cpu_caps.nr_cpus / + util_cpu_caps.cores_per_L3; + + /* Get a semi-random number. */ + int64_t t = os_time_get_nano(); + unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches; - /* Tell the driver to pin its threads to the same L3 cache. */ + /* Tell the driver to pin its threads to the selected L3 cache. */ if (ctx->set_context_param) { ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, cache); @@ -202,7 +156,6 @@ /* Do the same for the upper level thread if there is any (e.g. glthread) */ if (upper_thread) util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3); -#endif } /* This is a helper for hardware bring-up. Don't remove. */ diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_helpers.h mesa-19.0.1/src/gallium/auxiliary/util/u_helpers.h --- mesa-18.3.3/src/gallium/auxiliary/util/u_helpers.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_helpers.h 2019-03-31 23:16:37.000000000 +0000 @@ -52,7 +52,8 @@ unsigned *out_offset); void -util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread); +util_pin_driver_threads_to_random_L3(struct pipe_context *ctx, + thrd_t *upper_thread); struct pipe_query * util_begin_pipestat_query(struct pipe_context *ctx); diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_log.c mesa-19.0.1/src/gallium/auxiliary/util/u_log.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_log.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_log.c 2019-03-31 23:16:37.000000000 +0000 @@ -23,7 +23,7 @@ #include "u_log.h" -#include "u_memory.h" +#include "util/u_memory.h" #include "util/u_string.h" struct page_entry { diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_memory.h mesa-19.0.1/src/gallium/auxiliary/util/u_memory.h --- mesa-18.3.3/src/gallium/auxiliary/util/u_memory.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_memory.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,100 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/* - * Memory functions - */ - - -#ifndef U_MEMORY_H -#define U_MEMORY_H - - -#include "util/u_pointer.h" -#include "util/u_debug.h" -#include "os/os_memory.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -#define MALLOC(_size) os_malloc(_size) - -#define CALLOC(_count, _size) os_calloc(_count, _size) - -#define FREE(_ptr ) os_free(_ptr) - -#define REALLOC(_ptr, _old_size, _size) os_realloc(_ptr, _old_size, _size) - -#define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) - -#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) - -#define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size) ((struct T *) CALLOC(1, sizeof(struct T) + more_size)) - - -#define align_malloc(_size, _alignment) os_malloc_aligned(_size, _alignment) -#define align_free(_ptr) os_free_aligned(_ptr) - -static inline void * -align_calloc(size_t size, unsigned long alignment) -{ - void *ptr = align_malloc(size, alignment); - if (ptr) - memset(ptr, 0, size); - return ptr; -} - -/** - * Duplicate a block of memory. - */ -static inline void * -mem_dup(const void *src, uint size) -{ - void *dup = MALLOC(size); - if (dup) - memcpy(dup, src, size); - return dup; -} - - -/** - * Offset of a field in a struct, in bytes. - */ -#define Offset(TYPE, MEMBER) ((uintptr_t)&(((TYPE *)NULL)->MEMBER)) - - - -#ifdef __cplusplus -} -#endif - - -#endif /* U_MEMORY_H */ diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_prim_restart.c mesa-19.0.1/src/gallium/auxiliary/util/u_prim_restart.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_prim_restart.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_prim_restart.c 2019-03-31 23:16:37.000000000 +0000 @@ -26,7 +26,7 @@ #include "u_inlines.h" -#include "u_memory.h" +#include "util/u_memory.h" #include "u_prim_restart.h" diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_screen.c mesa-19.0.1/src/gallium/auxiliary/util/u_screen.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -77,6 +77,7 @@ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: return 0; case PIPE_CAP_MIN_TEXEL_OFFSET: @@ -145,6 +146,7 @@ return 1; case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: return 0; @@ -261,6 +263,9 @@ case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: return 1; + case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: + return 0; + case PIPE_CAP_TGSI_FS_FBFETCH: case PIPE_CAP_TGSI_MUL_ZERO_WINS: case PIPE_CAP_DOUBLES: @@ -311,6 +316,7 @@ case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS: case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS: case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS: + case PIPE_CAP_TGSI_ATOMFADD: return 0; case PIPE_CAP_MAX_GS_INVOCATIONS: @@ -326,6 +332,11 @@ case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET: return 2047; + case PIPE_CAP_SURFACE_SAMPLE_COUNT: + return 0; + case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: + return 1; + default: unreachable("bad PIPE_CAP_*"); } diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_tests.c mesa-19.0.1/src/gallium/auxiliary/util/u_tests.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_tests.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_tests.c 2019-03-31 23:16:37.000000000 +0000 @@ -787,6 +787,80 @@ util_report_result_helper(pass, name); } +static void +test_compute_clear_image(struct pipe_context *ctx) +{ + struct cso_context *cso; + struct pipe_resource *cb; + const char *text; + + cso = cso_create_context(ctx, 0); + cb = util_create_texture2d(ctx->screen, 256, 256, + PIPE_FORMAT_R8G8B8A8_UNORM, 1); + + /* Compute shader. */ + text = "COMP\n" + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" + "DCL SV[0], THREAD_ID\n" + "DCL SV[1], BLOCK_ID\n" + "DCL IMAGE[0], 2D, PIPE_FORMAT_R8G8B8A8_UNORM, WR\n" + "DCL TEMP[0]\n" + "IMM[0] UINT32 { 8, 8, 0, 0}\n" + "IMM[1] FLT32 { 1, 0, 0, 0}\n" + + /* TEMP[0].xy = SV[1] * IMM[0] + SV[0]; */ + "UMAD TEMP[0].xy, SV[1], IMM[0], SV[0]\n" + "STORE IMAGE[0], TEMP[0], IMM[1], 2D, PIPE_FORMAT_R8G8B8A8_UNORM\n" + "END\n"; + + struct tgsi_token tokens[1000]; + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { + assert(0); + util_report_result(FAIL); + return; + } + + struct pipe_compute_state state = {0}; + state.ir_type = PIPE_SHADER_IR_TGSI; + state.prog = tokens; + + void *compute_shader = ctx->create_compute_state(ctx, &state); + cso_set_compute_shader_handle(cso, compute_shader); + + /* Bind the image. */ + struct pipe_image_view image = {0}; + image.resource = cb; + image.shader_access = image.access = PIPE_IMAGE_ACCESS_READ_WRITE; + image.format = cb->format; + + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &image); + + /* Dispatch compute. */ + struct pipe_grid_info info = {0}; + info.block[0] = 8; + info.block[1] = 8; + info.block[2] = 1; + info.grid[0] = cb->width0 / 8; + info.grid[1] = cb->height0 / 8; + info.grid[2] = 1; + + ctx->launch_grid(ctx, &info); + + /* Check pixels. */ + static const float expected[] = {1.0, 0.0, 0.0, 0.0}; + bool pass = util_probe_rect_rgba(ctx, cb, 0, 0, + cb->width0, cb->height0, expected); + + /* Cleanup. */ + cso_destroy_context(cso); + ctx->delete_compute_state(ctx, compute_shader); + pipe_resource_reference(&cb, NULL); + + util_report_result(pass); +} + /** * Run all tests. This should be run with a clean context after * context_create. @@ -808,6 +882,8 @@ for (int i = 1; i <= 8; i = i * 2) test_texture_barrier(ctx, true, i); + test_compute_clear_image(ctx); + ctx->destroy(ctx); puts("Done. Exiting.."); diff -Nru mesa-18.3.3/src/gallium/auxiliary/util/u_threaded_context.c mesa-19.0.1/src/gallium/auxiliary/util/u_threaded_context.c --- mesa-18.3.3/src/gallium/auxiliary/util/u_threaded_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/auxiliary/util/u_threaded_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -1524,7 +1524,8 @@ if (ttrans->staging) { struct pipe_box src_box; - u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment, + u_box_1d(ttrans->offset + ttrans->b.box.x % tc->map_buffer_alignment + + (box->x - ttrans->b.box.x), box->width, &src_box); /* Copy the staging buffer into the original one. */ diff -Nru mesa-18.3.3/src/gallium/docs/source/context.rst mesa-19.0.1/src/gallium/docs/source/context.rst --- mesa-18.3.3/src/gallium/docs/source/context.rst 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/docs/source/context.rst 2019-03-31 23:16:37.000000000 +0000 @@ -491,6 +491,11 @@ If a shader type is not supported by the device/driver, the corresponding values should be set to 0. +``PIPE_QUERY_PIPELINE_STATISTICS_SINGLE`` returns a single counter from +the ``PIPE_QUERY_PIPELINE_STATISTICS`` group. The specific counter must +be selected when calling ``create_query`` by passing one of the +``PIPE_STAT_QUERY`` enums as the query's ``index``. + Gallium does not guarantee the availability of any query types; one must always check the capabilities of the :ref:`Screen` first. diff -Nru mesa-18.3.3/src/gallium/docs/source/screen.rst mesa-19.0.1/src/gallium/docs/source/screen.rst --- mesa-18.3.3/src/gallium/docs/source/screen.rst 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/docs/source/screen.rst 2019-03-31 23:16:37.000000000 +0000 @@ -409,7 +409,7 @@ for a driver that does not support multiple output streams (i.e., ``PIPE_CAP_MAX_VERTEX_STREAMS`` is 1), both query types are identical. * ``PIPE_CAP_MEMOBJ``: Whether operations on memory objects are supported. -* ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports TGSI_OPCODE_LOAD use +* ``PIPE_CAP_LOAD_CONSTBUF``: True if the driver supports ``TGSI_OPCODE_LOAD`` use with constant buffers. * ``PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS``: Any TGSI register can be used as an address for indirect register indexing. @@ -434,7 +434,7 @@ Whether pipe_vertex_buffer::buffer_offset is treated as signed. The u_vbuf module needs this for optimal performance in workstation applications. * ``PIPE_CAP_CONTEXT_PRIORITY_MASK``: For drivers that support per-context - priorities, this returns a bitmask of PIPE_CONTEXT_PRIORITY_x for the + priorities, this returns a bitmask of ``PIPE_CONTEXT_PRIORITY_x`` for the supported priority levels. A driver that does not support prioritized contexts can return 0. * ``PIPE_CAP_FENCE_SIGNAL``: True if the driver supports signaling semaphores @@ -446,17 +446,17 @@ * ``PIPE_CAP_PACKED_UNIFORMS``: True if the driver supports packed uniforms as opposed to padding to vec4s. * ``PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES``: Whether the - PIPE_CONSERVATIVE_RASTER_POST_SNAP mode is supported for triangles. + ``PIPE_CONSERVATIVE_RASTER_POST_SNAP`` mode is supported for triangles. * ``PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES``: Whether the -PIPE_CONSERVATIVE_RASTER_POST_SNAP mode is supported for points and lines. + ``PIPE_CONSERVATIVE_RASTER_POST_SNAP`` mode is supported for points and lines. * ``PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES``: Whether the -PIPE_CONSERVATIVE_RASTER_PRE_SNAP mode is supported for triangles. + ``PIPE_CONSERVATIVE_RASTER_PRE_SNAP`` mode is supported for triangles. * ``PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES``: Whether the -PIPE_CONSERVATIVE_RASTER_PRE_SNAP mode is supported for points and lines. -* ``PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE``: Whether PIPE_CAP_POST_DEPTH_COVERAGE -works with conservative rasterization. + ``PIPE_CONSERVATIVE_RASTER_PRE_SNAP`` mode is supported for points and lines. +* ``PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE``: Whether + ``PIPE_CAP_POST_DEPTH_COVERAGE`` works with conservative rasterization. * ``PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS``: The maximum -subpixel precision bias in bits during conservative rasterization. + subpixel precision bias in bits during conservative rasterization. * ``PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS``: True is the driver supports programmable sample location through ```get_sample_pixel_grid``` and ```set_sample_locations```. @@ -472,11 +472,25 @@ * ``PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS``: Maximum total number of atomic counter buffers. A value of 0 means the sum of all per-shader stage maximums (see ``PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS``). -* ``PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: Maximum recommend memory size +* ``PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET``: Maximum recommend memory size for all active texture uploads combined. This is a performance hint. 0 means no limit. * ``PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET``: The maximum supported value for of pipe_vertex_element::src_offset. +* ``PIPE_CAP_SURFACE_SAMPLE_COUNT``: Whether the driver + supports pipe_surface overrides of resource nr_samples. If set, will + enable EXT_multisampled_render_to_texture. +* ``PIPE_CAP_TGSI_ATOMFADD``: Atomic floating point adds are supported on + images, buffers, and shared memory. +* ``PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND``: True if the driver needs blend state to use zero/one instead of destination alpha for RGB/XRGB formats. +* ``PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS``: True if the driver wants TESSINNER and TESSOUTER to be inputs (rather than system values) for tessellation evaluation shaders. +* ``PIPE_CAP_DEST_SURFACE_SRGB_CONTROL``: Indicates whether the drivers + supports switching the format between sRGB and linear for a surface that is + used as destination in draw and blit calls. +* ``PIPE_CAP_MAX_VARYINGS``: The maximum number of fragment shader + varyings. This will generally correspond to + ``PIPE_SHADER_CAP_MAX_INPUTS`` for the fragment shader, but in some + cases may be a smaller number. .. _pipe_capf: diff -Nru mesa-18.3.3/src/gallium/docs/source/tgsi.rst mesa-19.0.1/src/gallium/docs/source/tgsi.rst --- mesa-18.3.3/src/gallium/docs/source/tgsi.rst 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/docs/source/tgsi.rst 2019-03-31 23:16:37.000000000 +0000 @@ -2684,6 +2684,21 @@ resource[offset] = dst_x + src_x +.. opcode:: ATOMFADD - Atomic floating point addition + + Syntax: ``ATOMFADD dst, resource, offset, src`` + + Example: ``ATOMFADD TEMP[0], BUFFER[0], TEMP[1], TEMP[2]`` + + The following operation is performed atomically: + +.. math:: + + dst_x = resource[offset] + + resource[offset] = dst_x + src_x + + .. opcode:: ATOMXCHG - Atomic exchange Syntax: ``ATOMXCHG dst, resource, offset, src`` @@ -3190,24 +3205,6 @@ last vertex processing stage is used. -TGSI_SEMANTIC_CULLDIST -"""""""""""""""""""""" - -Used as distance to plane for performing application-defined culling -of individual primitives against a plane. When components of vertex -elements are given this label, these values are assumed to be a -float32 signed distance to a plane. Primitives will be completely -discarded if the plane distance for all of the vertices in the -primitive are < 0. If a vertex has a cull distance of NaN, that -vertex counts as "out" (as if its < 0); -The limits on both clip and cull distances are bound -by the PIPE_MAX_CLIP_OR_CULL_DISTANCE_COUNT define which defines -the maximum number of components that can be used to hold the -distances and by the PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT -which specifies the maximum number of registers which can be -annotated with those semantics. - - TGSI_SEMANTIC_CLIPDIST """""""""""""""""""""" diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_blend.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_blend.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_blend.c 2017-11-23 00:32:52.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_blend.c 2019-03-31 23:16:37.000000000 +0000 @@ -114,10 +114,11 @@ struct pipe_blend_state *pblend = ctx->blend; struct etna_blend_state *blend = etna_blend_state(pblend); const struct pipe_rt_blend_state *rt0 = &pblend->rt[0]; + const struct util_format_description *desc; uint32_t colormask; if (pfb->cbufs[0] && - translate_rs_format_rb_swap(pfb->cbufs[0]->texture->format)) { + translate_rs_format_rb_swap(pfb->cbufs[0]->format)) { colormask = rt0->colormask & (PIPE_MASK_A | PIPE_MASK_G); if (rt0->colormask & PIPE_MASK_R) colormask |= PIPE_MASK_B; @@ -128,11 +129,13 @@ } /* If the complete render target is written, set full_overwrite: - * - The color mask is 1111 - * - No blending is used + * - The color mask covers all channels of the render target + * - No blending or logicop is used */ - bool full_overwrite = ((rt0->colormask == 0xf) && blend->fo_allowed) || - !pfb->cbufs[0]; + if (pfb->cbufs[0]) + desc = util_format_description(pfb->cbufs[0]->format); + bool full_overwrite = !pfb->cbufs[0] || ((blend->fo_allowed && + util_format_colormask_full(desc, colormask))); blend->PE_COLOR_FORMAT = VIVS_PE_COLOR_FORMAT_COMPONENTS(colormask) | COND(full_overwrite, VIVS_PE_COLOR_FORMAT_OVERWRITE); @@ -158,7 +161,7 @@ struct compiled_blend_color *cs = &ctx->blend_color; if (pfb->cbufs[0] && - translate_rs_format_rb_swap(pfb->cbufs[0]->texture->format)) { + translate_rs_format_rb_swap(pfb->cbufs[0]->format)) { cs->PE_ALPHA_BLEND_COLOR = VIVS_PE_ALPHA_BLEND_COLOR_R(etna_cfloat_to_uint8(cs->color[2])) | VIVS_PE_ALPHA_BLEND_COLOR_G(etna_cfloat_to_uint8(cs->color[1])) | diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_compiler.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_compiler.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_compiler.c 2017-12-02 01:35:56.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_compiler.c 2019-03-31 23:16:37.000000000 +0000 @@ -477,8 +477,7 @@ etna_compile_parse_declarations(struct etna_compile *c) { struct tgsi_parse_context ctx = { }; - unsigned status = TGSI_PARSE_OK; - status = tgsi_parse_init(&ctx, c->tokens); + MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); assert(status == TGSI_PARSE_OK); while (!tgsi_parse_end_of_tokens(&ctx)) { @@ -530,8 +529,7 @@ etna_compile_pass_check_usage(struct etna_compile *c) { struct tgsi_parse_context ctx = { }; - unsigned status = TGSI_PARSE_OK; - status = tgsi_parse_init(&ctx, c->tokens); + MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); assert(status == TGSI_PARSE_OK); for (int idx = 0; idx < c->total_decls; ++idx) { @@ -662,8 +660,7 @@ { struct tgsi_parse_context ctx = { }; int inst_idx = 0; - unsigned status = TGSI_PARSE_OK; - status = tgsi_parse_init(&ctx, c->tokens); + MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); assert(status == TGSI_PARSE_OK); while (!tgsi_parse_end_of_tokens(&ctx)) { @@ -1812,7 +1809,7 @@ etna_compile_pass_generate_code(struct etna_compile *c) { struct tgsi_parse_context ctx = { }; - unsigned status = tgsi_parse_init(&ctx, c->tokens); + MAYBE_UNUSED unsigned status = tgsi_parse_init(&ctx, c->tokens); assert(status == TGSI_PARSE_OK); int inst_idx = 0; diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_context.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_context.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_context.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -60,6 +60,9 @@ { struct etna_context *ctx = etna_context(pctx); + if (ctx->dummy_rt) + etna_bo_del(ctx->dummy_rt); + util_copy_framebuffer_state(&ctx->framebuffer_s, NULL); if (ctx->primconvert) @@ -211,13 +214,8 @@ ctx->dirty |= ETNA_DIRTY_INDEX_BUFFER; struct etna_shader_key key = {}; - struct etna_surface *cbuf = etna_surface(pfb->cbufs[0]); - - if (cbuf) { - struct etna_resource *res = etna_resource(cbuf->base.texture); - - key.frag_rb_swap = !!translate_rs_format_rb_swap(res->base.format); - } + if (pfb->cbufs[0]) + key.frag_rb_swap = !!translate_rs_format_rb_swap(pfb->cbufs[0]->format); if (!etna_get_vs(ctx, key) || !etna_get_fs(ctx, key)) { BUG("compiled shaders are not okay"); @@ -488,6 +486,16 @@ slab_create_child(&ctx->transfer_pool, &screen->transfer_pool); list_inithead(&ctx->active_hw_queries); + /* create dummy RT buffer, used when rendering with no color buffer */ + ctx->dummy_rt = etna_bo_new(ctx->screen->dev, 64 * 64 * 4, + DRM_ETNA_GEM_CACHE_WC); + if (!ctx->dummy_rt) + goto fail; + + ctx->dummy_rt_reloc.bo = ctx->dummy_rt; + ctx->dummy_rt_reloc.offset = 0; + ctx->dummy_rt_reloc.flags = ETNA_RELOC_READ | ETNA_RELOC_WRITE; + return pctx; fail: diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_context.h mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_context.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_context.h 2017-12-02 01:35:56.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -190,6 +190,9 @@ /* list of active hardware queries */ struct list_head active_hw_queries; + + struct etna_bo *dummy_rt; + struct etna_reloc dummy_rt_reloc; }; static inline struct etna_context * diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_resource.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_resource.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_resource.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_resource.c 2019-03-31 23:16:37.000000000 +0000 @@ -176,10 +176,20 @@ return size; } +/* Is rs alignment needed? */ +static bool is_rs_align(struct etna_screen *screen, + const struct pipe_resource *tmpl) +{ + return screen->specs.use_blt ? false : ( + VIV_FEATURE(screen, chipMinorFeatures1, TEXTURE_HALIGN) || + !etna_resource_sampler_only(tmpl)); +} + /* Create a new resource object, using the given template info */ struct pipe_resource * etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout, - uint64_t modifier, const struct pipe_resource *templat) + enum etna_resource_addressing_mode mode, uint64_t modifier, + const struct pipe_resource *templat) { struct etna_screen *screen = etna_screen(pscreen); struct etna_resource *rsc; @@ -217,11 +227,9 @@ * resolve engine's width. If not, we must not align resources used * only for textures. If this GPU uses the BLT engine, never do RS align. */ - bool rs_align = screen->specs.use_blt ? false : ( - VIV_FEATURE(screen, chipMinorFeatures1, TEXTURE_HALIGN) || - !etna_resource_sampler_only(templat)); - etna_layout_multiple(layout, screen->specs.pixel_pipes, rs_align, &paddingX, - &paddingY, &halign); + etna_layout_multiple(layout, screen->specs.pixel_pipes, + is_rs_align (screen, templat), + &paddingX, &paddingY, &halign); assert(paddingX && paddingY); } else { /* Compressed textures are padded to their block size, but we don't have @@ -273,6 +281,7 @@ rsc->base.nr_samples = nr_samples; rsc->layout = layout; rsc->halign = halign; + rsc->addressing_mode = mode; pipe_reference_init(&rsc->base.reference, 1); list_inithead(&rsc->list); @@ -309,12 +318,14 @@ { struct etna_screen *screen = etna_screen(pscreen); - /* Figure out what tiling to use -- for now, assume that texture cannot be linear. - * there is a capability LINEAR_TEXTURE_SUPPORT (supported on gc880 and - * gc2000 at least), but not sure how it works. + /* Figure out what tiling and address mode to use -- for now, assume that + * texture cannot be linear. there is a capability LINEAR_TEXTURE_SUPPORT + * (supported on gc880 and gc2000 at least), but not sure how it works. * Buffers always have LINEAR layout. */ unsigned layout = ETNA_LAYOUT_LINEAR; + enum etna_resource_addressing_mode mode = ETNA_ADDRESSING_MODE_TILED; + if (etna_resource_sampler_only(templat)) { /* The buffer is only used for texturing, so create something * directly compatible with the sampler. Such a buffer can @@ -357,7 +368,7 @@ layout = ETNA_LAYOUT_LINEAR; /* modifier is only used for scanout surfaces, so safe to use LINEAR here */ - return etna_resource_alloc(pscreen, layout, DRM_FORMAT_MOD_LINEAR, templat); + return etna_resource_alloc(pscreen, layout, mode, DRM_FORMAT_MOD_LINEAR, templat); } enum modifier_priority { @@ -438,7 +449,7 @@ tmpl.bind |= PIPE_BIND_SCANOUT; return etna_resource_alloc(pscreen, modifier_to_layout(modifier), - modifier, &tmpl); + ETNA_ADDRESSING_MODE_TILED, modifier, &tmpl); } static void @@ -511,6 +522,7 @@ rsc->seqno = 1; rsc->layout = modifier_to_layout(handle->modifier); rsc->halign = TEXTURE_HALIGN_FOUR; + rsc->addressing_mode = ETNA_ADDRESSING_MODE_TILED; level->width = tmpl->width0; @@ -519,7 +531,7 @@ /* Determine padding of the imported resource. */ unsigned paddingX = 0, paddingY = 0; etna_layout_multiple(rsc->layout, screen->specs.pixel_pipes, - VIV_FEATURE(screen, chipMinorFeatures1, TEXTURE_HALIGN), + is_rs_align(screen, tmpl), &paddingX, &paddingY, &rsc->halign); if (!screen->specs.use_blt) diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_resource.h mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_resource.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_resource.h 2017-11-16 18:44:33.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_resource.h 2019-03-31 23:16:37.000000000 +0000 @@ -49,6 +49,11 @@ bool ts_valid; }; +enum etna_resource_addressing_mode { + ETNA_ADDRESSING_MODE_TILED = 0, + ETNA_ADDRESSING_MODE_LINEAR, +}; + /* status of queued up but not flushed reads and write operations. * In _transfer_map() we need to know if queued up rendering needs * to be flushed to preserve the order of cpu and gpu access. */ @@ -66,6 +71,7 @@ /* only lod 0 used for non-texture buffers */ /* Layout for surface (tiled, multitiled, split tiled, ...) */ enum etna_surface_layout layout; + enum etna_resource_addressing_mode addressing_mode; /* Horizontal alignment for texture unit (TEXTURE_HALIGN_*) */ unsigned halign; struct etna_bo *bo; /* Surface video memory */ @@ -155,7 +161,8 @@ struct pipe_resource * etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout, - uint64_t modifier, const struct pipe_resource *templat); + enum etna_resource_addressing_mode mode, uint64_t modifier, + const struct pipe_resource *templat); void etna_resource_screen_init(struct pipe_screen *pscreen); diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_screen.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_screen.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -63,7 +63,7 @@ {"no_autodisable", ETNA_DBG_NO_AUTODISABLE, "Disable autodisable"}, {"no_supertile", ETNA_DBG_NO_SUPERTILE, "Disable supertiles"}, {"no_early_z", ETNA_DBG_NO_EARLY_Z, "Disable early z"}, - {"cflush_all", ETNA_DBG_CFLUSH_ALL, "Flush every cash before state update"}, + {"cflush_all", ETNA_DBG_CFLUSH_ALL, "Flush every cache before state update"}, {"msaa2x", ETNA_DBG_MSAA_2X, "Force 2x msaa"}, {"msaa4x", ETNA_DBG_MSAA_4X, "Force 4x msaa"}, {"flush_all", ETNA_DBG_FLUSH_ALL, "Flush after every rendered primitive"}, @@ -360,6 +360,9 @@ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return 0; + case PIPE_CAP_MAX_VARYINGS: + return screen->specs.max_varyings; + case PIPE_CAP_PCI_GROUP: case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_shader.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_shader.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_shader.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_shader.c 2019-03-31 23:16:37.000000000 +0000 @@ -230,8 +230,7 @@ } static bool -etna_shader_update_vs_inputs(struct etna_context *ctx, - struct compiled_shader_state *cs, +etna_shader_update_vs_inputs(struct compiled_shader_state *cs, const struct etna_shader_variant *vs, const struct compiled_vertex_elements_state *ves) { @@ -246,7 +245,7 @@ num_vs_inputs = MAX2(ves->num_elements, vs->infile.num_reg); if (num_vs_inputs != ves->num_elements) { BUG("Number of elements %u does not match the number of VS inputs %zu", - ctx->vertex_elements->num_elements, ctx->shader.vs->infile.num_reg); + ves->num_elements, vs->infile.num_reg); return false; } @@ -312,7 +311,7 @@ bool etna_shader_update_vertex(struct etna_context *ctx) { - return etna_shader_update_vs_inputs(ctx, &ctx->shader_state, ctx->shader.vs, + return etna_shader_update_vs_inputs(&ctx->shader_state, ctx->shader.vs, ctx->vertex_elements); } diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_state.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_state.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_state.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -190,8 +190,9 @@ cs->TS_COLOR_STATUS_BASE.bo = NULL; cs->TS_COLOR_SURFACE_BASE.bo = NULL; - for (int i = 0; i < ETNA_MAX_PIXELPIPES; i++) - cs->PE_PIPE_COLOR_ADDR[i].bo = NULL; + cs->PE_COLOR_ADDR = ctx->dummy_rt_reloc; + for (int i = 0; i < ctx->specs.pixel_pipes; i++) + cs->PE_PIPE_COLOR_ADDR[i] = ctx->dummy_rt_reloc; } if (sv->zsbuf != NULL) { diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture.c 2017-12-02 01:35:56.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -172,7 +172,9 @@ if (res->layout == ETNA_LAYOUT_SUPER_TILED && VIV_FEATURE(screen, chipMinorFeatures2, SUPERTILED_TEXTURE)) return true; - /* TODO: LINEAR_TEXTURE_SUPPORT */ + /* This GPU supports texturing from linear textures? */ + if (res->layout == ETNA_LAYOUT_LINEAR && VIV_FEATURE(screen, chipMinorFeatures1, LINEAR_TEXTURE_SUPPORT)) + return true; /* Otherwise, only support tiled layouts */ if (res->layout != ETNA_LAYOUT_TILED) @@ -203,6 +205,7 @@ PIPE_BIND_BLENDABLE); res->texture = etna_resource_alloc(pctx->screen, ETNA_LAYOUT_TILED, + ETNA_ADDRESSING_MODE_TILED, DRM_FORMAT_MOD_LINEAR, &templat); } diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture_state.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture_state.c 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -131,6 +131,17 @@ return NULL; } + if (res->addressing_mode == ETNA_ADDRESSING_MODE_LINEAR) { + sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(TEXTURE_ADDRESSING_MODE_LINEAR); + + for (int lod = 0; lod <= res->base.last_level; ++lod) + sv->TE_SAMPLER_LINEAR_STRIDE[lod] = res->levels[lod].stride; + + } else { + sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(TEXTURE_ADDRESSING_MODE_TILED); + memset(&sv->TE_SAMPLER_LINEAR_STRIDE, 0, sizeof(sv->TE_SAMPLER_LINEAR_STRIDE)); + } + sv->TE_SAMPLER_CONFIG1 = COND(ext, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(format)) | COND(astc, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(TEXTURE_FORMAT_EXT_ASTC)) | VIVS_TE_SAMPLER_CONFIG1_HALIGN(res->halign) | swiz; @@ -293,6 +304,16 @@ } } } + } + if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) { + for (int y = 0; y < VIVS_TE_SAMPLER_LINEAR_STRIDE__LEN; ++y) { + for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); + /*02C00*/ EMIT_STATE(TE_SAMPLER_LINEAR_STRIDE(x, y), sv->TE_SAMPLER_LINEAR_STRIDE[y]); + } + } + } } if (unlikely(ctx->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) { for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture_state.h mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_texture_state.h 2017-12-02 01:35:56.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_texture_state.h 2019-03-31 23:16:37.000000000 +0000 @@ -62,6 +62,7 @@ uint32_t TE_SAMPLER_SIZE; uint32_t TE_SAMPLER_LOG_SIZE; uint32_t TE_SAMPLER_ASTC0; + uint32_t TE_SAMPLER_LINEAR_STRIDE[VIVS_TE_SAMPLER_LINEAR_STRIDE__LEN]; struct etna_reloc TE_SAMPLER_LOD_ADDR[VIVS_TE_SAMPLER_LOD_ADDR__LEN]; unsigned min_lod, max_lod; /* 5.5 fixp */ diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_transfer.c mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_transfer.c --- mesa-18.3.3/src/gallium/drivers/etnaviv/etnaviv_transfer.c 2017-12-02 01:35:56.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/etnaviv_transfer.c 2019-03-31 23:16:37.000000000 +0000 @@ -208,7 +208,8 @@ templ.bind = PIPE_BIND_RENDER_TARGET; trans->rsc = etna_resource_alloc(pctx->screen, ETNA_LAYOUT_LINEAR, - DRM_FORMAT_MOD_LINEAR, &templ); + ETNA_ADDRESSING_MODE_TILED, DRM_FORMAT_MOD_LINEAR, + &templ); if (!trans->rsc) { slab_free(&ctx->transfer_pool, trans); return NULL; diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/cmdstream.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -8,11 +8,11 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- cmdstream.xml ( 16929 bytes, from 2017-10-13 12:22:46) -- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22) -- common.xml ( 26187 bytes, from 2017-10-31 19:05:01) +- cmdstream.xml ( 16930 bytes, from 2019-01-04 11:37:39) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- common.xml ( 35468 bytes, from 2018-02-10 13:09:26) -Copyright (C) 2012-2017 by the following authors: +Copyright (C) 2012-2019 by the following authors: - Wladimir J. van der Laan - Christian Gmeiner - Lucas Stach diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/common_3d.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/common_3d.xml.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/common_3d.xml.h 2017-11-16 18:44:33.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/common_3d.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -8,12 +8,12 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- texdesc_3d.xml ( 3183 bytes, from 2017-10-31 19:05:01) -- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22) -- common.xml ( 26187 bytes, from 2017-10-31 19:05:01) -- common_3d.xml ( 14615 bytes, from 2017-11-04 14:03:35) +- texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- common.xml ( 35468 bytes, from 2018-02-10 13:09:26) +- common_3d.xml ( 14843 bytes, from 2019-01-18 10:13:41) -Copyright (C) 2012-2017 by the following authors: +Copyright (C) 2012-2019 by the following authors: - Wladimir J. van der Laan - Christian Gmeiner - Lucas Stach @@ -127,6 +127,8 @@ #define TEXTURE_HALIGN_SPLIT_SUPER_TILED 0x00000004 #define TS_CACHE_MODE_128 0x00000000 #define TS_CACHE_MODE_256 0x00000001 +#define TEXTURE_ADDRESSING_MODE_TILED 0x00000000 +#define TEXTURE_ADDRESSING_MODE_LINEAR 0x00000003 #define COLOR_COMPRESSION_FORMAT_A4R4G4B4 0x00000000 #define COLOR_COMPRESSION_FORMAT_A1R5G5B5 0x00000001 #define COLOR_COMPRESSION_FORMAT_R5G6B5 0x00000002 diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/common.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/common.xml.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/common.xml.h 2017-11-16 18:44:33.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/common.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -8,12 +8,12 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- texdesc_3d.xml ( 3183 bytes, from 2017-10-31 19:05:01) -- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22) -- common.xml ( 26187 bytes, from 2017-10-31 19:05:01) -- common_3d.xml ( 14615 bytes, from 2017-11-04 14:03:35) +- texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- common.xml ( 35468 bytes, from 2018-02-10 13:09:26) +- common_3d.xml ( 14843 bytes, from 2019-01-18 10:13:41) -Copyright (C) 2012-2017 by the following authors: +Copyright (C) 2012-2018 by the following authors: - Wladimir J. van der Laan - Christian Gmeiner - Lucas Stach @@ -320,5 +320,166 @@ #define chipMinorFeatures6_DEC 0x00000004 #define chipMinorFeatures6_VS_TILE_NV12 0x00000008 #define chipMinorFeatures6_VS_TILE_NV12_10BIT 0x00000010 +#define chipMinorFeatures6_RENDER_TARGET_8 0x00000020 +#define chipMinorFeatures6_TEX_LOD_FLOW_CORR 0x00000040 +#define chipMinorFeatures6_FACE_LOD 0x00000080 +#define chipMinorFeatures6_MULTI_CORE_SEMAPHORE_STALL_V2 0x00000100 +#define chipMinorFeatures6_VMSAA 0x00000200 +#define chipMinorFeatures6_CHIP_ENABLE_LINK 0x00000400 +#define chipMinorFeatures6_MULTI_SRC_BLT_1_5_ENHANCEMENT 0x00000800 +#define chipMinorFeatures6_MULTI_SRC_BLT_BILINEAR_FILTER 0x00001000 +#define chipMinorFeatures6_RA_HZEZ_CLOCK_CONTROL 0x00002000 +#define chipMinorFeatures6_CACHE128B256BPERLINE 0x00004000 +#define chipMinorFeatures6_V4_COMPRESSION 0x00008000 +#define chipMinorFeatures6_PE2D_MAJOR_SUPER_TILE 0x00010000 +#define chipMinorFeatures6_PE_32BPC_COLORMASK_FIX 0x00020000 +#define chipMinorFeatures6_ALPHA_BLENDING_OPT 0x00040000 +#define chipMinorFeatures6_NEW_GPIPE 0x00080000 +#define chipMinorFeatures6_PIPELINE_32_ATTRIBUTES 0x00100000 +#define chipMinorFeatures6_MSAA_SHADING 0x00200000 +#define chipMinorFeatures6_NO_ANISTRO_FILTER 0x00400000 +#define chipMinorFeatures6_NO_ASTC 0x00800000 +#define chipMinorFeatures6_NO_DXT 0x01000000 +#define chipMinorFeatures6_HWTFB 0x02000000 +#define chipMinorFeatures6_RA_DEPTH_WRITE_MSAA1X_FIX 0x04000000 +#define chipMinorFeatures6_EZHZ_CLOCKGATE_FIX 0x08000000 +#define chipMinorFeatures6_SH_SNAP2PAGE_FIX 0x10000000 +#define chipMinorFeatures6_SH_HALFDEPENDENCY_FIX 0x20000000 +#define chipMinorFeatures6_USC_MCFILL_FIX 0x40000000 +#define chipMinorFeatures6_TPG_TCPERF_FIX 0x80000000 +#define chipMinorFeatures7_USC_MDFIFO_OVERFLOW_FIX 0x00000001 +#define chipMinorFeatures7_SH_TEXLD_BARRIER_IN_CS_FIX 0x00000002 +#define chipMinorFeatures7_RS_NEW_BASEADDR 0x00000004 +#define chipMinorFeatures7_PE_8BPP_DUALPIPE_FIX 0x00000008 +#define chipMinorFeatures7_SH_ADVANCED_INSTR 0x00000010 +#define chipMinorFeatures7_SH_FLAT_INTERPOLATION_DUAL16_FIX 0x00000020 +#define chipMinorFeatures7_USC_CONTINUOUS_FLUS_FIX 0x00000040 +#define chipMinorFeatures7_SH_SUPPORT_V4 0x00000080 +#define chipMinorFeatures7_SH_SUPPORT_ALPHA_KILL 0x00000100 +#define chipMinorFeatures7_PE_NO_ALPHA_TEST 0x00000200 +#define chipMinorFeatures7_TX_LOD_NEAREST_SELECT 0x00000400 +#define chipMinorFeatures7_SH_FIX_LDEXP 0x00000800 +#define chipMinorFeatures7_SUPPORT_MOVAI 0x00001000 +#define chipMinorFeatures7_SH_SNAP2PAGE_MAXPAGES_FIX 0x00002000 +#define chipMinorFeatures7_PE_RGBA16I_FIX 0x00004000 +#define chipMinorFeatures7_BLT_8bpp_256TILE_FC_FIX 0x00008000 +#define chipMinorFeatures7_PE_64BIT_FENCE_FIX 0x00010000 +#define chipMinorFeatures7_USC_FULL_CACHE_FIX 0x00020000 +#define chipMinorFeatures7_TX_YUV_ASSEMBLER_10BIT 0x00040000 +#define chipMinorFeatures7_FE_32BIT_INDEX_FIX 0x00080000 +#define chipMinorFeatures7_BLT_64BPP_MASKED_CLEAR_FIX 0x00100000 +#define chipMinorFeatures7_BIT_SECURITY 0x00200000 +#define chipMinorFeatures7_BIT_ROBUSTNESS 0x00400000 +#define chipMinorFeatures7_USC_ATOMIC_FIX 0x00800000 +#define chipMinorFeatures7_SH_PSO_MSAA1x_FIX 0x01000000 +#define chipMinorFeatures7_BIT_USC_VX_PERF_FIX 0x02000000 +#define chipMinorFeatures7_EVIS_NO_ABSDIFF 0x04000000 +#define chipMinorFeatures7_EVIS_NO_BITREPLACE 0x08000000 +#define chipMinorFeatures7_EVIS_NO_BOXFILTER 0x10000000 +#define chipMinorFeatures7_EVIS_NO_CORDIAC 0x20000000 +#define chipMinorFeatures7_EVIS_NO_DP32 0x40000000 +#define chipMinorFeatures7_EVIS_NO_FILTER 0x80000000 +#define chipMinorFeatures8_EVIS_NO_IADD 0x00000001 +#define chipMinorFeatures8_EVIS_NO_SELECTADD 0x00000002 +#define chipMinorFeatures8_EVIS_LERP_7OUTPUT 0x00000004 +#define chipMinorFeatures8_EVIS_ACCSQ_8OUTPUT 0x00000008 +#define chipMinorFeatures8_USC_GOS_ADDR_FIX 0x00000010 +#define chipMinorFeatures8_TX_8BIT_UVFRAC 0x00000020 +#define chipMinorFeatures8_TX_DESC_CACHE_CLOCKGATE_FIX 0x00000040 +#define chipMinorFeatures8_RSBLT_MSAA_DECOMPRESSION 0x00000080 +#define chipMinorFeatures8_TX_INTEGER_COORDINATE 0x00000100 +#define chipMinorFeatures8_DRAWID 0x00000200 +#define chipMinorFeatures8_PSIO_SAMPLEMASK_IN_R0ZW_FIX 0x00000400 +#define chipMinorFeatures8_TX_INTEGER_COORDINATE_V2 0x00000800 +#define chipMinorFeatures8_MULTI_CORE_BLOCK_SET_CONFIG 0x00001000 +#define chipMinorFeatures8_VG_RESOLVE_ENGINE 0x00002000 +#define chipMinorFeatures8_VG_PE_COLOR_KEY 0x00004000 +#define chipMinorFeatures8_VG_IM_INDEX_FORMAT 0x00008000 +#define chipMinorFeatures8_SNAPPAGE_CMD 0x00010000 +#define chipMinorFeatures8_SH_NO_INDEX_CONST_ON_A0 0x00020000 +#define chipMinorFeatures8_SH_NO_ONECONST_LIMIT 0x00040000 +#define chipMinorFeatures8_SH_IMG_LDST_ON_TEMP 0x00080000 +#define chipMinorFeatures8_COMPUTE_ONLY 0x00100000 +#define chipMinorFeatures8_SH_IMG_LDST_CLAMP 0x00200000 +#define chipMinorFeatures8_SH_ICACHE_ALLOC_COUNT_FIX 0x00400000 +#define chipMinorFeatures8_SH_ICACHE_PREFETCH 0x00800000 +#define chipMinorFeatures8_PE2D_SEPARATE_CACHE 0x01000000 +#define chipMinorFeatures8_VG_AYUV_INPUT_OUTPUT 0x02000000 +#define chipMinorFeatures8_VG_DOUBLE_IMAGE 0x04000000 +#define chipMinorFeatures8_VG_RECTANGLE_STRIPE_MODE 0x08000000 +#define chipMinorFeatures8_VG_MMU 0x10000000 +#define chipMinorFeatures8_VG_IM_FILTER 0x20000000 +#define chipMinorFeatures8_VG_IM_YUV_PACKET 0x40000000 +#define chipMinorFeatures8_VG_IM_YUV_PLANAR 0x80000000 +#define chipMinorFeatures9_VG_PE_YUV_PACKET 0x00000001 +#define chipMinorFeatures9_VG_COLOR_PRECISION_8_BIT 0x00000002 +#define chipMinorFeatures9_PE_MSAA_OQ_FIX 0x00000004 +#define chipMinorFeatures9_PSIO_MSAA_CL_FIX 0x00000008 +#define chipMinorFeatures9_USC_DEFER_FILL_FIX 0x00000010 +#define chipMinorFeatures9_SH_CLOCK_GATE_FIX 0x00000020 +#define chipMinorFeatures9_FE_NEED_DUMMYDRAW 0x00000040 +#define chipMinorFeatures9_PE2D_LINEAR_YUV420_OUTPUT 0x00000080 +#define chipMinorFeatures9_PE2D_LINEAR_YUV420_10BIT 0x00000100 +#define chipMinorFeatures9_MULTI_CLUSTER 0x00000200 +#define chipMinorFeatures9_VG_TS_CULLING 0x00000400 +#define chipMinorFeatures9_VG_FP25 0x00000800 +#define chipMinorFeatures9_SH_MULTI_WG_PACK 0x00001000 +#define chipMinorFeatures9_SH_DUAL16_SAMPLEMASK_ZW 0x00002000 +#define chipMinorFeatures9_TPG_TRIVIAL_MODE_FIX 0x00004000 +#define chipMinorFeatures9_TX_ASTC_MULTISLICE_FIX 0x00008000 +#define chipMinorFeatures9_FE_ROBUST_FIX 0x00010000 +#define chipMinorFeatures9_SH_GPIPE_ACCESS_FULLTEMPS 0x00020000 +#define chipMinorFeatures9_PSIO_INTERLOCK 0x00040000 +#define chipMinorFeatures9_PA_WIDELINE_FIX 0x00080000 +#define chipMinorFeatures9_WIDELINE_HELPER_FIX 0x00100000 +#define chipMinorFeatures9_G2D_3RD_PARTY_COMPRESSION_1_1 0x00200000 +#define chipMinorFeatures9_TX_FLUSH_L1CACHE 0x00400000 +#define chipMinorFeatures9_PE_DITHER_FIX2 0x00800000 +#define chipMinorFeatures9_G2D_DEC400 0x01000000 +#define chipMinorFeatures9_SH_TEXLD_U_FIX 0x02000000 +#define chipMinorFeatures9_MC_FCCACHE_BYTEMASK 0x04000000 +#define chipMinorFeatures9_SH_MULTI_WG_PACK_FIX 0x08000000 +#define chipMinorFeatures9_DC_OVERLAY_SCALING 0x10000000 +#define chipMinorFeatures9_DC_SOURCE_ROTATION 0x20000000 +#define chipMinorFeatures9_DC_TILED 0x40000000 +#define chipMinorFeatures9_DC_YUV_L1 0x80000000 +#define chipMinorFeatures10_DC_D30_OUTPUT 0x00000001 +#define chipMinorFeatures10_DC_MMU 0x00000002 +#define chipMinorFeatures10_DC_COMPRESSION 0x00000004 +#define chipMinorFeatures10_DC_QOS 0x00000008 +#define chipMinorFeatures10_PE_ADVANCE_BLEND_PART0 0x00000010 +#define chipMinorFeatures10_FE_PATCHLIST_FETCH_FIX 0x00000020 +#define chipMinorFeatures10_RA_CG_FIX 0x00000040 +#define chipMinorFeatures10_EVIS_VX2 0x00000080 +#define chipMinorFeatures10_NN_FLOAT 0x00000100 +#define chipMinorFeatures10_DEC400 0x00000200 +#define chipMinorFeatures10_LS_SUPPORT_PERCOMP_DEPENDENCY 0x00000400 +#define chipMinorFeatures10_TP_ENGINE 0x00000800 +#define chipMinorFeatures10_MULTI_CORE_BLOCK_SET_CONFIG2 0x00001000 +#define chipMinorFeatures10_PE_VMSAA_COVERAGE_CACHE_FIX 0x00002000 +#define chipMinorFeatures10_SECURITY_AHB 0x00004000 +#define chipMinorFeatures10_MULTICORE_SEMAPHORESTALL_V3 0x00008000 +#define chipMinorFeatures10_SMALLBATCH 0x00010000 +#define chipMinorFeatures10_SH_CMPLX 0x00020000 +#define chipMinorFeatures10_SH_IDIV0_SWZL_EHS 0x00040000 +#define chipMinorFeatures10_TX_LERP_LESS_BIT 0x00080000 +#define chipMinorFeatures10_SH_GM_ENDIAN 0x00100000 +#define chipMinorFeatures10_SH_GM_USC_UNALLOC 0x00200000 +#define chipMinorFeatures10_SH_END_OF_BB 0x00400000 +#define chipMinorFeatures10_VIP_V7 0x00800000 +#define chipMinorFeatures10_TX_BORDER_CLAMP_FIX 0x01000000 +#define chipMinorFeatures10_SH_IMG_LD_LASTPIXEL_FIX 0x02000000 +#define chipMinorFeatures10_ASYNC_BLT 0x04000000 +#define chipMinorFeatures10_ASYNC_FE_FENCE_FIX 0x08000000 +#define chipMinorFeatures10_PSCS_THROTTLE 0x10000000 +#define chipMinorFeatures10_SEPARATE_LS 0x20000000 +#define chipMinorFeatures10_MCFE 0x40000000 +#define chipMinorFeatures10_WIDELINE_TRIANGLE_EMU 0x80000000 +#define chipMinorFeatures11_VG_RESOLUTION_8K 0x00000001 +#define chipMinorFeatures11_FENCE_32BIT 0x00000002 +#define chipMinorFeatures11_FENCE_64BIT 0x00000004 +#define chipMinorFeatures11_NN_INTERLEVE8 0x00000008 +#define chipMinorFeatures11_TP_REORDER 0x00000010 +#define chipMinorFeatures11_PE_DEPTH_ONLY_OQFIX 0x00000020 #endif /* COMMON_XML */ diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/isa.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/isa.xml.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/isa.xml.h 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/isa.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -8,10 +8,10 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- isa.xml ( 37079 bytes, from 2017-10-19 09:48:25) -- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22) +- isa.xml ( 37079 bytes, from 2018-02-10 13:09:26) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) -Copyright (C) 2012-2017 by the following authors: +Copyright (C) 2012-2018 by the following authors: - Wladimir J. van der Laan - Christian Gmeiner - Lucas Stach diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state_3d.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state_3d.xml.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state_3d.xml.h 2017-11-16 18:44:33.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state_3d.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -8,17 +8,17 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- state.xml ( 26087 bytes, from 2017-10-30 13:44:54) -- common.xml ( 26187 bytes, from 2017-10-31 19:05:01) -- common_3d.xml ( 14615 bytes, from 2017-11-04 14:03:35) -- state_hi.xml ( 27733 bytes, from 2017-10-02 19:00:30) -- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22) -- state_2d.xml ( 51552 bytes, from 2016-10-29 07:29:22) -- state_3d.xml ( 79992 bytes, from 2017-11-07 10:44:35) -- state_blt.xml ( 13405 bytes, from 2017-10-16 17:42:46) -- state_vg.xml ( 5975 bytes, from 2016-10-29 07:29:22) +- state.xml ( 26087 bytes, from 2018-02-10 13:09:26) +- common.xml ( 35468 bytes, from 2018-02-10 13:09:26) +- common_3d.xml ( 14843 bytes, from 2019-01-18 10:13:41) +- state_hi.xml ( 30232 bytes, from 2018-03-30 07:48:22) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- state_2d.xml ( 51552 bytes, from 2018-02-10 13:09:26) +- state_3d.xml ( 79992 bytes, from 2019-01-18 10:10:57) +- state_blt.xml ( 13405 bytes, from 2018-02-10 13:09:26) +- state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26) -Copyright (C) 2012-2017 by the following authors: +Copyright (C) 2012-2019 by the following authors: - Wladimir J. van der Laan - Christian Gmeiner - Lucas Stach @@ -1400,6 +1400,9 @@ #define VIVS_TE_SAMPLER_CONFIG0_FORMAT__SHIFT 13 #define VIVS_TE_SAMPLER_CONFIG0_FORMAT(x) (((x) << VIVS_TE_SAMPLER_CONFIG0_FORMAT__SHIFT) & VIVS_TE_SAMPLER_CONFIG0_FORMAT__MASK) #define VIVS_TE_SAMPLER_CONFIG0_ROUND_UV 0x00080000 +#define VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE__MASK 0x00300000 +#define VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE__SHIFT 20 +#define VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(x) (((x) << VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE__SHIFT) & VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE__MASK) #define VIVS_TE_SAMPLER_CONFIG0_ENDIAN__MASK 0x00c00000 #define VIVS_TE_SAMPLER_CONFIG0_ENDIAN__SHIFT 22 #define VIVS_TE_SAMPLER_CONFIG0_ENDIAN(x) (((x) << VIVS_TE_SAMPLER_CONFIG0_ENDIAN__SHIFT) & VIVS_TE_SAMPLER_CONFIG0_ENDIAN__MASK) @@ -1520,6 +1523,9 @@ #define VIVS_NTE_SAMPLER_CONFIG0_FORMAT__SHIFT 13 #define VIVS_NTE_SAMPLER_CONFIG0_FORMAT(x) (((x) << VIVS_NTE_SAMPLER_CONFIG0_FORMAT__SHIFT) & VIVS_NTE_SAMPLER_CONFIG0_FORMAT__MASK) #define VIVS_NTE_SAMPLER_CONFIG0_ROUND_UV 0x00080000 +#define VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE__MASK 0x00300000 +#define VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE__SHIFT 20 +#define VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE(x) (((x) << VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE__SHIFT) & VIVS_NTE_SAMPLER_CONFIG0_ADDRESSING_MODE__MASK) #define VIVS_NTE_SAMPLER_CONFIG0_ENDIAN__MASK 0x00c00000 #define VIVS_NTE_SAMPLER_CONFIG0_ENDIAN__SHIFT 22 #define VIVS_NTE_SAMPLER_CONFIG0_ENDIAN(x) (((x) << VIVS_NTE_SAMPLER_CONFIG0_ENDIAN__SHIFT) & VIVS_NTE_SAMPLER_CONFIG0_ENDIAN__MASK) diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state_blt.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state_blt.xml.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state_blt.xml.h 2017-11-16 18:44:33.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state_blt.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -8,17 +8,17 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- state.xml ( 26087 bytes, from 2017-10-30 13:44:54) -- common.xml ( 26187 bytes, from 2017-10-31 19:05:01) -- common_3d.xml ( 14615 bytes, from 2017-11-04 14:03:35) -- state_hi.xml ( 27733 bytes, from 2017-10-02 19:00:30) -- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22) -- state_2d.xml ( 51552 bytes, from 2016-10-29 07:29:22) -- state_3d.xml ( 79992 bytes, from 2017-11-07 10:44:35) -- state_blt.xml ( 13405 bytes, from 2017-10-16 17:42:46) -- state_vg.xml ( 5975 bytes, from 2016-10-29 07:29:22) +- state.xml ( 26087 bytes, from 2018-02-10 13:09:26) +- common.xml ( 35468 bytes, from 2018-02-10 13:09:26) +- common_3d.xml ( 14843 bytes, from 2019-01-18 10:13:41) +- state_hi.xml ( 30232 bytes, from 2018-03-30 07:48:22) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- state_2d.xml ( 51552 bytes, from 2018-02-10 13:09:26) +- state_3d.xml ( 79992 bytes, from 2019-01-18 10:10:57) +- state_blt.xml ( 13405 bytes, from 2018-02-10 13:09:26) +- state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26) -Copyright (C) 2012-2017 by the following authors: +Copyright (C) 2012-2018 by the following authors: - Wladimir J. van der Laan - Christian Gmeiner - Lucas Stach diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state.xml.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/state.xml.h 2017-11-16 18:44:33.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/state.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -8,17 +8,17 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- state.xml ( 26087 bytes, from 2017-10-30 13:44:54) -- common.xml ( 26187 bytes, from 2017-10-31 19:05:01) -- common_3d.xml ( 14615 bytes, from 2017-11-04 14:03:35) -- state_hi.xml ( 27733 bytes, from 2017-10-02 19:00:30) -- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22) -- state_2d.xml ( 51552 bytes, from 2016-10-29 07:29:22) -- state_3d.xml ( 79992 bytes, from 2017-11-07 10:44:35) -- state_blt.xml ( 13405 bytes, from 2017-10-16 17:42:46) -- state_vg.xml ( 5975 bytes, from 2016-10-29 07:29:22) +- state.xml ( 26087 bytes, from 2018-02-10 13:09:26) +- common.xml ( 35468 bytes, from 2018-02-10 13:09:26) +- common_3d.xml ( 14843 bytes, from 2019-01-18 10:13:41) +- state_hi.xml ( 30232 bytes, from 2018-03-30 07:48:22) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- state_2d.xml ( 51552 bytes, from 2018-02-10 13:09:26) +- state_3d.xml ( 79992 bytes, from 2019-01-18 10:10:57) +- state_blt.xml ( 13405 bytes, from 2018-02-10 13:09:26) +- state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26) -Copyright (C) 2012-2017 by the following authors: +Copyright (C) 2012-2018 by the following authors: - Wladimir J. van der Laan - Christian Gmeiner - Lucas Stach diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h mesa-19.0.1/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h --- mesa-18.3.3/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h 2017-11-16 18:44:33.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h 2019-03-31 23:16:37.000000000 +0000 @@ -8,12 +8,12 @@ git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- texdesc_3d.xml ( 3183 bytes, from 2017-10-31 19:05:01) -- copyright.xml ( 1597 bytes, from 2016-10-29 07:29:22) -- common.xml ( 26187 bytes, from 2017-10-31 19:05:01) -- common_3d.xml ( 14615 bytes, from 2017-11-04 14:03:35) +- texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- common.xml ( 35468 bytes, from 2018-02-10 13:09:26) +- common_3d.xml ( 14843 bytes, from 2019-01-18 10:13:41) -Copyright (C) 2012-2017 by the following authors: +Copyright (C) 2012-2018 by the following authors: - Wladimir J. van der Laan - Christian Gmeiner - Lucas Stach @@ -67,6 +67,9 @@ #define TEXDESC_CONFIG0_FORMAT__SHIFT 13 #define TEXDESC_CONFIG0_FORMAT(x) (((x) << TEXDESC_CONFIG0_FORMAT__SHIFT) & TEXDESC_CONFIG0_FORMAT__MASK) #define TEXDESC_CONFIG0_ROUND_UV 0x00080000 +#define TEXDESC_CONFIG0_ADDRESSING_MODE__MASK 0x00300000 +#define TEXDESC_CONFIG0_ADDRESSING_MODE__SHIFT 20 +#define TEXDESC_CONFIG0_ADDRESSING_MODE(x) (((x) << TEXDESC_CONFIG0_ADDRESSING_MODE__SHIFT) & TEXDESC_CONFIG0_ADDRESSING_MODE__MASK) #define TEXDESC_CONFIG0_ENDIAN__MASK 0x00c00000 #define TEXDESC_CONFIG0_ENDIAN__SHIFT 22 #define TEXDESC_CONFIG0_ENDIAN(x) (((x) << TEXDESC_CONFIG0_ENDIAN__SHIFT) & TEXDESC_CONFIG0_ENDIAN__MASK) diff -Nru mesa-18.3.3/src/gallium/drivers/etnaviv/meson.build mesa-19.0.1/src/gallium/drivers/etnaviv/meson.build --- mesa-18.3.3/src/gallium/drivers/etnaviv/meson.build 2018-06-01 16:49:01.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/etnaviv/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -101,7 +101,8 @@ include_directories : [inc_include, inc_src, inc_gallium, inc_gallium_aux], link_with : [libmesa_util, libgallium, libetnaviv], dependencies : [dep_libdrm_etnaviv], - build_by_default : false, + build_by_default : with_tools.contains('etnaviv'), + install : with_tools.contains('etnaviv'), ) driver_etnaviv = declare_dependency( diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,1880 +0,0 @@ -#ifndef A2XX_XML -#define A2XX_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/freedreno/envytools/ -git clone https://github.com/freedreno/envytools.git - -The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 37936 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14201 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42864 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 140514 bytes, from 2018-10-08 21:57:35) -- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) -- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) - -Copyright (C) 2013-2018 by the following authors: -- Rob Clark (robclark) -- Ilia Mirkin (imirkin) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -enum a2xx_rb_dither_type { - DITHER_PIXEL = 0, - DITHER_SUBPIXEL = 1, -}; - -enum a2xx_colorformatx { - COLORX_4_4_4_4 = 0, - COLORX_1_5_5_5 = 1, - COLORX_5_6_5 = 2, - COLORX_8 = 3, - COLORX_8_8 = 4, - COLORX_8_8_8_8 = 5, - COLORX_S8_8_8_8 = 6, - COLORX_16_FLOAT = 7, - COLORX_16_16_FLOAT = 8, - COLORX_16_16_16_16_FLOAT = 9, - COLORX_32_FLOAT = 10, - COLORX_32_32_FLOAT = 11, - COLORX_32_32_32_32_FLOAT = 12, - COLORX_2_3_3 = 13, - COLORX_8_8_8 = 14, -}; - -enum a2xx_sq_surfaceformat { - FMT_1_REVERSE = 0, - FMT_1 = 1, - FMT_8 = 2, - FMT_1_5_5_5 = 3, - FMT_5_6_5 = 4, - FMT_6_5_5 = 5, - FMT_8_8_8_8 = 6, - FMT_2_10_10_10 = 7, - FMT_8_A = 8, - FMT_8_B = 9, - FMT_8_8 = 10, - FMT_Cr_Y1_Cb_Y0 = 11, - FMT_Y1_Cr_Y0_Cb = 12, - FMT_5_5_5_1 = 13, - FMT_8_8_8_8_A = 14, - FMT_4_4_4_4 = 15, - FMT_8_8_8 = 16, - FMT_DXT1 = 18, - FMT_DXT2_3 = 19, - FMT_DXT4_5 = 20, - FMT_10_10_10_2 = 21, - FMT_24_8 = 22, - FMT_16 = 24, - FMT_16_16 = 25, - FMT_16_16_16_16 = 26, - FMT_16_EXPAND = 27, - FMT_16_16_EXPAND = 28, - FMT_16_16_16_16_EXPAND = 29, - FMT_16_FLOAT = 30, - FMT_16_16_FLOAT = 31, - FMT_16_16_16_16_FLOAT = 32, - FMT_32 = 33, - FMT_32_32 = 34, - FMT_32_32_32_32 = 35, - FMT_32_FLOAT = 36, - FMT_32_32_FLOAT = 37, - FMT_32_32_32_32_FLOAT = 38, - FMT_ATI_TC_RGB = 39, - FMT_ATI_TC_RGBA = 40, - FMT_ATI_TC_555_565_RGB = 41, - FMT_ATI_TC_555_565_RGBA = 42, - FMT_ATI_TC_RGBA_INTERP = 43, - FMT_ATI_TC_555_565_RGBA_INTERP = 44, - FMT_ETC1_RGBA_INTERP = 46, - FMT_ETC1_RGB = 47, - FMT_ETC1_RGBA = 48, - FMT_DXN = 49, - FMT_2_3_3 = 51, - FMT_2_10_10_10_AS_16_16_16_16 = 54, - FMT_10_10_10_2_AS_16_16_16_16 = 55, - FMT_32_32_32_FLOAT = 57, - FMT_DXT3A = 58, - FMT_DXT5A = 59, - FMT_CTX1 = 60, -}; - -enum a2xx_sq_ps_vtx_mode { - POSITION_1_VECTOR = 0, - POSITION_2_VECTORS_UNUSED = 1, - POSITION_2_VECTORS_SPRITE = 2, - POSITION_2_VECTORS_EDGE = 3, - POSITION_2_VECTORS_KILL = 4, - POSITION_2_VECTORS_SPRITE_KILL = 5, - POSITION_2_VECTORS_EDGE_KILL = 6, - MULTIPASS = 7, -}; - -enum a2xx_sq_sample_cntl { - CENTROIDS_ONLY = 0, - CENTERS_ONLY = 1, - CENTROIDS_AND_CENTERS = 2, -}; - -enum a2xx_dx_clip_space { - DXCLIP_OPENGL = 0, - DXCLIP_DIRECTX = 1, -}; - -enum a2xx_pa_su_sc_polymode { - POLY_DISABLED = 0, - POLY_DUALMODE = 1, -}; - -enum a2xx_rb_edram_mode { - EDRAM_NOP = 0, - COLOR_DEPTH = 4, - DEPTH_ONLY = 5, - EDRAM_COPY = 6, -}; - -enum a2xx_pa_sc_pattern_bit_order { - LITTLE = 0, - BIG = 1, -}; - -enum a2xx_pa_sc_auto_reset_cntl { - NEVER = 0, - EACH_PRIMITIVE = 1, - EACH_PACKET = 2, -}; - -enum a2xx_pa_pixcenter { - PIXCENTER_D3D = 0, - PIXCENTER_OGL = 1, -}; - -enum a2xx_pa_roundmode { - TRUNCATE = 0, - ROUND = 1, - ROUNDTOEVEN = 2, - ROUNDTOODD = 3, -}; - -enum a2xx_pa_quantmode { - ONE_SIXTEENTH = 0, - ONE_EIGTH = 1, - ONE_QUARTER = 2, - ONE_HALF = 3, - ONE = 4, -}; - -enum a2xx_rb_copy_sample_select { - SAMPLE_0 = 0, - SAMPLE_1 = 1, - SAMPLE_2 = 2, - SAMPLE_3 = 3, - SAMPLE_01 = 4, - SAMPLE_23 = 5, - SAMPLE_0123 = 6, -}; - -enum a2xx_rb_blend_opcode { - BLEND2_DST_PLUS_SRC = 0, - BLEND2_SRC_MINUS_DST = 1, - BLEND2_MIN_DST_SRC = 2, - BLEND2_MAX_DST_SRC = 3, - BLEND2_DST_MINUS_SRC = 4, - BLEND2_DST_PLUS_SRC_BIAS = 5, -}; - -enum adreno_mmu_clnt_beh { - BEH_NEVR = 0, - BEH_TRAN_RNG = 1, - BEH_TRAN_FLT = 2, -}; - -enum sq_tex_clamp { - SQ_TEX_WRAP = 0, - SQ_TEX_MIRROR = 1, - SQ_TEX_CLAMP_LAST_TEXEL = 2, - SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 3, - SQ_TEX_CLAMP_HALF_BORDER = 4, - SQ_TEX_MIRROR_ONCE_HALF_BORDER = 5, - SQ_TEX_CLAMP_BORDER = 6, - SQ_TEX_MIRROR_ONCE_BORDER = 7, -}; - -enum sq_tex_swiz { - SQ_TEX_X = 0, - SQ_TEX_Y = 1, - SQ_TEX_Z = 2, - SQ_TEX_W = 3, - SQ_TEX_ZERO = 4, - SQ_TEX_ONE = 5, -}; - -enum sq_tex_filter { - SQ_TEX_FILTER_POINT = 0, - SQ_TEX_FILTER_BILINEAR = 1, - SQ_TEX_FILTER_BICUBIC = 2, -}; - -#define REG_A2XX_RBBM_PATCH_RELEASE 0x00000001 - -#define REG_A2XX_RBBM_CNTL 0x0000003b - -#define REG_A2XX_RBBM_SOFT_RESET 0x0000003c - -#define REG_A2XX_CP_PFP_UCODE_ADDR 0x000000c0 - -#define REG_A2XX_CP_PFP_UCODE_DATA 0x000000c1 - -#define REG_A2XX_MH_MMU_CONFIG 0x00000040 -#define A2XX_MH_MMU_CONFIG_MMU_ENABLE 0x00000001 -#define A2XX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE 0x00000002 -#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK 0x00000030 -#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT 4 -static inline uint32_t A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK 0x000000c0 -#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT 6 -static inline uint32_t A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK 0x00000300 -#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT 8 -static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK 0x00000c00 -#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT 10 -static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK 0x00003000 -#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT 12 -static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK 0x0000c000 -#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT 14 -static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK 0x00030000 -#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT 16 -static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK 0x000c0000 -#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT 18 -static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK 0x00300000 -#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT 20 -static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK 0x00c00000 -#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT 22 -static inline uint32_t A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK; -} -#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK 0x03000000 -#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT 24 -static inline uint32_t A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val) -{ - return ((val) << A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK; -} - -#define REG_A2XX_MH_MMU_VA_RANGE 0x00000041 -#define A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__MASK 0x00000fff -#define A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__SHIFT 0 -static inline uint32_t A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS(uint32_t val) -{ - return ((val) << A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__SHIFT) & A2XX_MH_MMU_VA_RANGE_NUM_64KB_REGIONS__MASK; -} -#define A2XX_MH_MMU_VA_RANGE_VA_BASE__MASK 0xfffff000 -#define A2XX_MH_MMU_VA_RANGE_VA_BASE__SHIFT 12 -static inline uint32_t A2XX_MH_MMU_VA_RANGE_VA_BASE(uint32_t val) -{ - return ((val) << A2XX_MH_MMU_VA_RANGE_VA_BASE__SHIFT) & A2XX_MH_MMU_VA_RANGE_VA_BASE__MASK; -} - -#define REG_A2XX_MH_MMU_PT_BASE 0x00000042 - -#define REG_A2XX_MH_MMU_PAGE_FAULT 0x00000043 - -#define REG_A2XX_MH_MMU_TRAN_ERROR 0x00000044 - -#define REG_A2XX_MH_MMU_INVALIDATE 0x00000045 -#define A2XX_MH_MMU_INVALIDATE_INVALIDATE_ALL 0x00000001 -#define A2XX_MH_MMU_INVALIDATE_INVALIDATE_TC 0x00000002 - -#define REG_A2XX_MH_MMU_MPU_BASE 0x00000046 - -#define REG_A2XX_MH_MMU_MPU_END 0x00000047 - -#define REG_A2XX_NQWAIT_UNTIL 0x00000394 - -#define REG_A2XX_RBBM_PERFCOUNTER1_SELECT 0x00000395 - -#define REG_A2XX_RBBM_PERFCOUNTER1_LO 0x00000397 - -#define REG_A2XX_RBBM_PERFCOUNTER1_HI 0x00000398 - -#define REG_A2XX_RBBM_DEBUG 0x0000039b - -#define REG_A2XX_RBBM_PM_OVERRIDE1 0x0000039c -#define A2XX_RBBM_PM_OVERRIDE1_RBBM_AHBCLK_PM_OVERRIDE 0x00000001 -#define A2XX_RBBM_PM_OVERRIDE1_SC_REG_SCLK_PM_OVERRIDE 0x00000002 -#define A2XX_RBBM_PM_OVERRIDE1_SC_SCLK_PM_OVERRIDE 0x00000004 -#define A2XX_RBBM_PM_OVERRIDE1_SP_TOP_SCLK_PM_OVERRIDE 0x00000008 -#define A2XX_RBBM_PM_OVERRIDE1_SP_V0_SCLK_PM_OVERRIDE 0x00000010 -#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_SCLK_PM_OVERRIDE 0x00000020 -#define A2XX_RBBM_PM_OVERRIDE1_SQ_REG_FIFOS_SCLK_PM_OVERRIDE 0x00000040 -#define A2XX_RBBM_PM_OVERRIDE1_SQ_CONST_MEM_SCLK_PM_OVERRIDE 0x00000080 -#define A2XX_RBBM_PM_OVERRIDE1_SQ_SQ_SCLK_PM_OVERRIDE 0x00000100 -#define A2XX_RBBM_PM_OVERRIDE1_SX_SCLK_PM_OVERRIDE 0x00000200 -#define A2XX_RBBM_PM_OVERRIDE1_SX_REG_SCLK_PM_OVERRIDE 0x00000400 -#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCO_SCLK_PM_OVERRIDE 0x00000800 -#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCM_SCLK_PM_OVERRIDE 0x00001000 -#define A2XX_RBBM_PM_OVERRIDE1_TCM_TCD_SCLK_PM_OVERRIDE 0x00002000 -#define A2XX_RBBM_PM_OVERRIDE1_TCM_REG_SCLK_PM_OVERRIDE 0x00004000 -#define A2XX_RBBM_PM_OVERRIDE1_TPC_TPC_SCLK_PM_OVERRIDE 0x00008000 -#define A2XX_RBBM_PM_OVERRIDE1_TPC_REG_SCLK_PM_OVERRIDE 0x00010000 -#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCA_SCLK_PM_OVERRIDE 0x00020000 -#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_SCLK_PM_OVERRIDE 0x00040000 -#define A2XX_RBBM_PM_OVERRIDE1_TCF_TCB_READ_SCLK_PM_OVERRIDE 0x00080000 -#define A2XX_RBBM_PM_OVERRIDE1_TP_TP_SCLK_PM_OVERRIDE 0x00100000 -#define A2XX_RBBM_PM_OVERRIDE1_TP_REG_SCLK_PM_OVERRIDE 0x00200000 -#define A2XX_RBBM_PM_OVERRIDE1_CP_G_SCLK_PM_OVERRIDE 0x00400000 -#define A2XX_RBBM_PM_OVERRIDE1_CP_REG_SCLK_PM_OVERRIDE 0x00800000 -#define A2XX_RBBM_PM_OVERRIDE1_CP_G_REG_SCLK_PM_OVERRIDE 0x01000000 -#define A2XX_RBBM_PM_OVERRIDE1_SPI_SCLK_PM_OVERRIDE 0x02000000 -#define A2XX_RBBM_PM_OVERRIDE1_RB_REG_SCLK_PM_OVERRIDE 0x04000000 -#define A2XX_RBBM_PM_OVERRIDE1_RB_SCLK_PM_OVERRIDE 0x08000000 -#define A2XX_RBBM_PM_OVERRIDE1_MH_MH_SCLK_PM_OVERRIDE 0x10000000 -#define A2XX_RBBM_PM_OVERRIDE1_MH_REG_SCLK_PM_OVERRIDE 0x20000000 -#define A2XX_RBBM_PM_OVERRIDE1_MH_MMU_SCLK_PM_OVERRIDE 0x40000000 -#define A2XX_RBBM_PM_OVERRIDE1_MH_TCROQ_SCLK_PM_OVERRIDE 0x80000000 - -#define REG_A2XX_RBBM_PM_OVERRIDE2 0x0000039d - -#define REG_A2XX_RBBM_DEBUG_OUT 0x000003a0 - -#define REG_A2XX_RBBM_DEBUG_CNTL 0x000003a1 - -#define REG_A2XX_RBBM_READ_ERROR 0x000003b3 - -#define REG_A2XX_RBBM_INT_CNTL 0x000003b4 -#define A2XX_RBBM_INT_CNTL_RDERR_INT_MASK 0x00000001 -#define A2XX_RBBM_INT_CNTL_DISPLAY_UPDATE_INT_MASK 0x00000002 -#define A2XX_RBBM_INT_CNTL_GUI_IDLE_INT_MASK 0x00080000 - -#define REG_A2XX_RBBM_INT_STATUS 0x000003b5 - -#define REG_A2XX_RBBM_INT_ACK 0x000003b6 - -#define REG_A2XX_MASTER_INT_SIGNAL 0x000003b7 -#define A2XX_MASTER_INT_SIGNAL_MH_INT_STAT 0x00000020 -#define A2XX_MASTER_INT_SIGNAL_SQ_INT_STAT 0x04000000 -#define A2XX_MASTER_INT_SIGNAL_CP_INT_STAT 0x40000000 -#define A2XX_MASTER_INT_SIGNAL_RBBM_INT_STAT 0x80000000 - -#define REG_A2XX_RBBM_PERIPHID1 0x000003f9 - -#define REG_A2XX_RBBM_PERIPHID2 0x000003fa - -#define REG_A2XX_CP_PERFMON_CNTL 0x00000444 - -#define REG_A2XX_CP_PERFCOUNTER_SELECT 0x00000445 - -#define REG_A2XX_CP_PERFCOUNTER_LO 0x00000446 - -#define REG_A2XX_CP_PERFCOUNTER_HI 0x00000447 - -#define REG_A2XX_RBBM_STATUS 0x000005d0 -#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK 0x0000001f -#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT 0 -static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val) -{ - return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK; -} -#define A2XX_RBBM_STATUS_TC_BUSY 0x00000020 -#define A2XX_RBBM_STATUS_HIRQ_PENDING 0x00000100 -#define A2XX_RBBM_STATUS_CPRQ_PENDING 0x00000200 -#define A2XX_RBBM_STATUS_CFRQ_PENDING 0x00000400 -#define A2XX_RBBM_STATUS_PFRQ_PENDING 0x00000800 -#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA 0x00001000 -#define A2XX_RBBM_STATUS_RBBM_WU_BUSY 0x00004000 -#define A2XX_RBBM_STATUS_CP_NRT_BUSY 0x00010000 -#define A2XX_RBBM_STATUS_MH_BUSY 0x00040000 -#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY 0x00080000 -#define A2XX_RBBM_STATUS_SX_BUSY 0x00200000 -#define A2XX_RBBM_STATUS_TPC_BUSY 0x00400000 -#define A2XX_RBBM_STATUS_SC_CNTX_BUSY 0x01000000 -#define A2XX_RBBM_STATUS_PA_BUSY 0x02000000 -#define A2XX_RBBM_STATUS_VGT_BUSY 0x04000000 -#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY 0x08000000 -#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY 0x10000000 -#define A2XX_RBBM_STATUS_RB_CNTX_BUSY 0x40000000 -#define A2XX_RBBM_STATUS_GUI_ACTIVE 0x80000000 - -#define REG_A2XX_MH_ARBITER_CONFIG 0x00000a40 -#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK 0x0000003f -#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT 0 -static inline uint32_t A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT(uint32_t val) -{ - return ((val) << A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK; -} -#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_GRANULARITY 0x00000040 -#define A2XX_MH_ARBITER_CONFIG_L1_ARB_ENABLE 0x00000080 -#define A2XX_MH_ARBITER_CONFIG_L1_ARB_HOLD_ENABLE 0x00000100 -#define A2XX_MH_ARBITER_CONFIG_L2_ARB_CONTROL 0x00000200 -#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK 0x00001c00 -#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT 10 -static inline uint32_t A2XX_MH_ARBITER_CONFIG_PAGE_SIZE(uint32_t val) -{ - return ((val) << A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT) & A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK; -} -#define A2XX_MH_ARBITER_CONFIG_TC_REORDER_ENABLE 0x00002000 -#define A2XX_MH_ARBITER_CONFIG_TC_ARB_HOLD_ENABLE 0x00004000 -#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT_ENABLE 0x00008000 -#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK 0x003f0000 -#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT 16 -static inline uint32_t A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT(uint32_t val) -{ - return ((val) << A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK; -} -#define A2XX_MH_ARBITER_CONFIG_CP_CLNT_ENABLE 0x00400000 -#define A2XX_MH_ARBITER_CONFIG_VGT_CLNT_ENABLE 0x00800000 -#define A2XX_MH_ARBITER_CONFIG_TC_CLNT_ENABLE 0x01000000 -#define A2XX_MH_ARBITER_CONFIG_RB_CLNT_ENABLE 0x02000000 -#define A2XX_MH_ARBITER_CONFIG_PA_CLNT_ENABLE 0x04000000 - -#define REG_A2XX_MH_INTERRUPT_MASK 0x00000a42 -#define A2XX_MH_INTERRUPT_MASK_AXI_READ_ERROR 0x00000001 -#define A2XX_MH_INTERRUPT_MASK_AXI_WRITE_ERROR 0x00000002 -#define A2XX_MH_INTERRUPT_MASK_MMU_PAGE_FAULT 0x00000004 - -#define REG_A2XX_MH_INTERRUPT_STATUS 0x00000a43 - -#define REG_A2XX_MH_INTERRUPT_CLEAR 0x00000a44 - -#define REG_A2XX_MH_CLNT_INTF_CTRL_CONFIG1 0x00000a54 - -#define REG_A2XX_MH_CLNT_INTF_CTRL_CONFIG2 0x00000a55 - -#define REG_A2XX_A220_VSC_BIN_SIZE 0x00000c01 -#define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f -#define A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT 0 -static inline uint32_t A2XX_A220_VSC_BIN_SIZE_WIDTH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT) & A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK; -} -#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 -#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT 5 -static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK; -} - -static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } - -static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } - -static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } - -static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } - -#define REG_A2XX_PC_DEBUG_CNTL 0x00000c38 - -#define REG_A2XX_PC_DEBUG_DATA 0x00000c39 - -#define REG_A2XX_PA_SC_VIZ_QUERY_STATUS 0x00000c44 - -#define REG_A2XX_GRAS_DEBUG_CNTL 0x00000c80 - -#define REG_A2XX_PA_SU_DEBUG_CNTL 0x00000c80 - -#define REG_A2XX_GRAS_DEBUG_DATA 0x00000c81 - -#define REG_A2XX_PA_SU_DEBUG_DATA 0x00000c81 - -#define REG_A2XX_PA_SU_FACE_DATA 0x00000c86 -#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK 0xffffffe0 -#define A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT 5 -static inline uint32_t A2XX_PA_SU_FACE_DATA_BASE_ADDR(uint32_t val) -{ - return ((val) << A2XX_PA_SU_FACE_DATA_BASE_ADDR__SHIFT) & A2XX_PA_SU_FACE_DATA_BASE_ADDR__MASK; -} - -#define REG_A2XX_SQ_GPR_MANAGEMENT 0x00000d00 -#define A2XX_SQ_GPR_MANAGEMENT_REG_DYNAMIC 0x00000001 -#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK 0x00000ff0 -#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT 4 -static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX(uint32_t val) -{ - return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_PIX__MASK; -} -#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK 0x000ff000 -#define A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT 12 -static inline uint32_t A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX(uint32_t val) -{ - return ((val) << A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__SHIFT) & A2XX_SQ_GPR_MANAGEMENT_REG_SIZE_VTX__MASK; -} - -#define REG_A2XX_SQ_FLOW_CONTROL 0x00000d01 - -#define REG_A2XX_SQ_INST_STORE_MANAGMENT 0x00000d02 -#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK 0x00000fff -#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT 0 -static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX(uint32_t val) -{ - return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_PIX__MASK; -} -#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK 0x0fff0000 -#define A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT 16 -static inline uint32_t A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX(uint32_t val) -{ - return ((val) << A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__SHIFT) & A2XX_SQ_INST_STORE_MANAGMENT_INST_BASE_VTX__MASK; -} - -#define REG_A2XX_SQ_DEBUG_MISC 0x00000d05 - -#define REG_A2XX_SQ_INT_CNTL 0x00000d34 - -#define REG_A2XX_SQ_INT_STATUS 0x00000d35 - -#define REG_A2XX_SQ_INT_ACK 0x00000d36 - -#define REG_A2XX_SQ_DEBUG_INPUT_FSM 0x00000dae - -#define REG_A2XX_SQ_DEBUG_CONST_MGR_FSM 0x00000daf - -#define REG_A2XX_SQ_DEBUG_TP_FSM 0x00000db0 - -#define REG_A2XX_SQ_DEBUG_FSM_ALU_0 0x00000db1 - -#define REG_A2XX_SQ_DEBUG_FSM_ALU_1 0x00000db2 - -#define REG_A2XX_SQ_DEBUG_EXP_ALLOC 0x00000db3 - -#define REG_A2XX_SQ_DEBUG_PTR_BUFF 0x00000db4 - -#define REG_A2XX_SQ_DEBUG_GPR_VTX 0x00000db5 - -#define REG_A2XX_SQ_DEBUG_GPR_PIX 0x00000db6 - -#define REG_A2XX_SQ_DEBUG_TB_STATUS_SEL 0x00000db7 - -#define REG_A2XX_SQ_DEBUG_VTX_TB_0 0x00000db8 - -#define REG_A2XX_SQ_DEBUG_VTX_TB_1 0x00000db9 - -#define REG_A2XX_SQ_DEBUG_VTX_TB_STATUS_REG 0x00000dba - -#define REG_A2XX_SQ_DEBUG_VTX_TB_STATE_MEM 0x00000dbb - -#define REG_A2XX_SQ_DEBUG_PIX_TB_0 0x00000dbc - -#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_0 0x00000dbd - -#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_1 0x00000dbe - -#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_2 0x00000dbf - -#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_3 0x00000dc0 - -#define REG_A2XX_SQ_DEBUG_PIX_TB_STATE_MEM 0x00000dc1 - -#define REG_A2XX_TC_CNTL_STATUS 0x00000e00 -#define A2XX_TC_CNTL_STATUS_L2_INVALIDATE 0x00000001 - -#define REG_A2XX_TP0_CHICKEN 0x00000e1e - -#define REG_A2XX_RB_BC_CONTROL 0x00000f01 -#define A2XX_RB_BC_CONTROL_ACCUM_LINEAR_MODE_ENABLE 0x00000001 -#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK 0x00000006 -#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT 1 -static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(uint32_t val) -{ - return ((val) << A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK; -} -#define A2XX_RB_BC_CONTROL_DISABLE_EDRAM_CAM 0x00000008 -#define A2XX_RB_BC_CONTROL_DISABLE_EZ_FAST_CONTEXT_SWITCH 0x00000010 -#define A2XX_RB_BC_CONTROL_DISABLE_EZ_NULL_ZCMD_DROP 0x00000020 -#define A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP 0x00000040 -#define A2XX_RB_BC_CONTROL_ENABLE_AZ_THROTTLE 0x00000080 -#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK 0x00001f00 -#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT 8 -static inline uint32_t A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT(uint32_t val) -{ - return ((val) << A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT) & A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK; -} -#define A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE 0x00004000 -#define A2XX_RB_BC_CONTROL_CRC_MODE 0x00008000 -#define A2XX_RB_BC_CONTROL_DISABLE_SAMPLE_COUNTERS 0x00010000 -#define A2XX_RB_BC_CONTROL_DISABLE_ACCUM 0x00020000 -#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK 0x003c0000 -#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT 18 -static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK(uint32_t val) -{ - return ((val) << A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK; -} -#define A2XX_RB_BC_CONTROL_LINEAR_PERFORMANCE_ENABLE 0x00400000 -#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK 0x07800000 -#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT 23 -static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(uint32_t val) -{ - return ((val) << A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK; -} -#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK 0x18000000 -#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT 27 -static inline uint32_t A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(uint32_t val) -{ - return ((val) << A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK; -} -#define A2XX_RB_BC_CONTROL_MEM_EXPORT_LINEAR_MODE_ENABLE 0x20000000 -#define A2XX_RB_BC_CONTROL_CRC_SYSTEM 0x40000000 -#define A2XX_RB_BC_CONTROL_RESERVED6 0x80000000 - -#define REG_A2XX_RB_EDRAM_INFO 0x00000f02 - -#define REG_A2XX_RB_DEBUG_CNTL 0x00000f26 - -#define REG_A2XX_RB_DEBUG_DATA 0x00000f27 - -#define REG_A2XX_RB_SURFACE_INFO 0x00002000 - -#define REG_A2XX_RB_COLOR_INFO 0x00002001 -#define A2XX_RB_COLOR_INFO_FORMAT__MASK 0x0000000f -#define A2XX_RB_COLOR_INFO_FORMAT__SHIFT 0 -static inline uint32_t A2XX_RB_COLOR_INFO_FORMAT(enum a2xx_colorformatx val) -{ - return ((val) << A2XX_RB_COLOR_INFO_FORMAT__SHIFT) & A2XX_RB_COLOR_INFO_FORMAT__MASK; -} -#define A2XX_RB_COLOR_INFO_ROUND_MODE__MASK 0x00000030 -#define A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT 4 -static inline uint32_t A2XX_RB_COLOR_INFO_ROUND_MODE(uint32_t val) -{ - return ((val) << A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT) & A2XX_RB_COLOR_INFO_ROUND_MODE__MASK; -} -#define A2XX_RB_COLOR_INFO_LINEAR 0x00000040 -#define A2XX_RB_COLOR_INFO_ENDIAN__MASK 0x00000180 -#define A2XX_RB_COLOR_INFO_ENDIAN__SHIFT 7 -static inline uint32_t A2XX_RB_COLOR_INFO_ENDIAN(uint32_t val) -{ - return ((val) << A2XX_RB_COLOR_INFO_ENDIAN__SHIFT) & A2XX_RB_COLOR_INFO_ENDIAN__MASK; -} -#define A2XX_RB_COLOR_INFO_SWAP__MASK 0x00000600 -#define A2XX_RB_COLOR_INFO_SWAP__SHIFT 9 -static inline uint32_t A2XX_RB_COLOR_INFO_SWAP(uint32_t val) -{ - return ((val) << A2XX_RB_COLOR_INFO_SWAP__SHIFT) & A2XX_RB_COLOR_INFO_SWAP__MASK; -} -#define A2XX_RB_COLOR_INFO_BASE__MASK 0xfffff000 -#define A2XX_RB_COLOR_INFO_BASE__SHIFT 12 -static inline uint32_t A2XX_RB_COLOR_INFO_BASE(uint32_t val) -{ - assert(!(val & 0x3ff)); - return ((val >> 10) << A2XX_RB_COLOR_INFO_BASE__SHIFT) & A2XX_RB_COLOR_INFO_BASE__MASK; -} - -#define REG_A2XX_RB_DEPTH_INFO 0x00002002 -#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK 0x00000001 -#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT 0 -static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val) -{ - return ((val) << A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK; -} -#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK 0xfffff000 -#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 12 -static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; -} - -#define REG_A2XX_A225_RB_COLOR_INFO3 0x00002005 - -#define REG_A2XX_COHER_DEST_BASE_0 0x00002006 - -#define REG_A2XX_PA_SC_SCREEN_SCISSOR_TL 0x0000200e -#define A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK 0x00007fff -#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK; -} -#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK; -} - -#define REG_A2XX_PA_SC_SCREEN_SCISSOR_BR 0x0000200f -#define A2XX_PA_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK 0x00007fff -#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK; -} -#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK; -} - -#define REG_A2XX_PA_SC_WINDOW_OFFSET 0x00002080 -#define A2XX_PA_SC_WINDOW_OFFSET_X__MASK 0x00007fff -#define A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT 0 -static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_X(int32_t val) -{ - return ((val) << A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_X__MASK; -} -#define A2XX_PA_SC_WINDOW_OFFSET_Y__MASK 0x7fff0000 -#define A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT 16 -static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_Y(int32_t val) -{ - return ((val) << A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_Y__MASK; -} -#define A2XX_PA_SC_WINDOW_OFFSET_DISABLE 0x80000000 - -#define REG_A2XX_PA_SC_WINDOW_SCISSOR_TL 0x00002081 -#define A2XX_PA_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff -#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK; -} -#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK; -} - -#define REG_A2XX_PA_SC_WINDOW_SCISSOR_BR 0x00002082 -#define A2XX_PA_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff -#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK; -} -#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK; -} - -#define REG_A2XX_UNKNOWN_2010 0x00002010 - -#define REG_A2XX_VGT_MAX_VTX_INDX 0x00002100 - -#define REG_A2XX_VGT_MIN_VTX_INDX 0x00002101 - -#define REG_A2XX_VGT_INDX_OFFSET 0x00002102 - -#define REG_A2XX_A225_PC_MULTI_PRIM_IB_RESET_INDX 0x00002103 - -#define REG_A2XX_RB_COLOR_MASK 0x00002104 -#define A2XX_RB_COLOR_MASK_WRITE_RED 0x00000001 -#define A2XX_RB_COLOR_MASK_WRITE_GREEN 0x00000002 -#define A2XX_RB_COLOR_MASK_WRITE_BLUE 0x00000004 -#define A2XX_RB_COLOR_MASK_WRITE_ALPHA 0x00000008 - -#define REG_A2XX_RB_BLEND_RED 0x00002105 - -#define REG_A2XX_RB_BLEND_GREEN 0x00002106 - -#define REG_A2XX_RB_BLEND_BLUE 0x00002107 - -#define REG_A2XX_RB_BLEND_ALPHA 0x00002108 - -#define REG_A2XX_RB_FOG_COLOR 0x00002109 -#define A2XX_RB_FOG_COLOR_FOG_RED__MASK 0x000000ff -#define A2XX_RB_FOG_COLOR_FOG_RED__SHIFT 0 -static inline uint32_t A2XX_RB_FOG_COLOR_FOG_RED(uint32_t val) -{ - return ((val) << A2XX_RB_FOG_COLOR_FOG_RED__SHIFT) & A2XX_RB_FOG_COLOR_FOG_RED__MASK; -} -#define A2XX_RB_FOG_COLOR_FOG_GREEN__MASK 0x0000ff00 -#define A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT 8 -static inline uint32_t A2XX_RB_FOG_COLOR_FOG_GREEN(uint32_t val) -{ - return ((val) << A2XX_RB_FOG_COLOR_FOG_GREEN__SHIFT) & A2XX_RB_FOG_COLOR_FOG_GREEN__MASK; -} -#define A2XX_RB_FOG_COLOR_FOG_BLUE__MASK 0x00ff0000 -#define A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT 16 -static inline uint32_t A2XX_RB_FOG_COLOR_FOG_BLUE(uint32_t val) -{ - return ((val) << A2XX_RB_FOG_COLOR_FOG_BLUE__SHIFT) & A2XX_RB_FOG_COLOR_FOG_BLUE__MASK; -} - -#define REG_A2XX_RB_STENCILREFMASK_BF 0x0000210c -#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff -#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 -static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) -{ - return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; -} -#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 -#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 -static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) -{ - return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; -} -#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 -#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; -} - -#define REG_A2XX_RB_STENCILREFMASK 0x0000210d -#define A2XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff -#define A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 -static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) -{ - return ((val) << A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILREF__MASK; -} -#define A2XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 -#define A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 -static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) -{ - return ((val) << A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILMASK__MASK; -} -#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 -#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; -} - -#define REG_A2XX_RB_ALPHA_REF 0x0000210e - -#define REG_A2XX_PA_CL_VPORT_XSCALE 0x0000210f -#define A2XX_PA_CL_VPORT_XSCALE__MASK 0xffffffff -#define A2XX_PA_CL_VPORT_XSCALE__SHIFT 0 -static inline uint32_t A2XX_PA_CL_VPORT_XSCALE(float val) -{ - return ((fui(val)) << A2XX_PA_CL_VPORT_XSCALE__SHIFT) & A2XX_PA_CL_VPORT_XSCALE__MASK; -} - -#define REG_A2XX_PA_CL_VPORT_XOFFSET 0x00002110 -#define A2XX_PA_CL_VPORT_XOFFSET__MASK 0xffffffff -#define A2XX_PA_CL_VPORT_XOFFSET__SHIFT 0 -static inline uint32_t A2XX_PA_CL_VPORT_XOFFSET(float val) -{ - return ((fui(val)) << A2XX_PA_CL_VPORT_XOFFSET__SHIFT) & A2XX_PA_CL_VPORT_XOFFSET__MASK; -} - -#define REG_A2XX_PA_CL_VPORT_YSCALE 0x00002111 -#define A2XX_PA_CL_VPORT_YSCALE__MASK 0xffffffff -#define A2XX_PA_CL_VPORT_YSCALE__SHIFT 0 -static inline uint32_t A2XX_PA_CL_VPORT_YSCALE(float val) -{ - return ((fui(val)) << A2XX_PA_CL_VPORT_YSCALE__SHIFT) & A2XX_PA_CL_VPORT_YSCALE__MASK; -} - -#define REG_A2XX_PA_CL_VPORT_YOFFSET 0x00002112 -#define A2XX_PA_CL_VPORT_YOFFSET__MASK 0xffffffff -#define A2XX_PA_CL_VPORT_YOFFSET__SHIFT 0 -static inline uint32_t A2XX_PA_CL_VPORT_YOFFSET(float val) -{ - return ((fui(val)) << A2XX_PA_CL_VPORT_YOFFSET__SHIFT) & A2XX_PA_CL_VPORT_YOFFSET__MASK; -} - -#define REG_A2XX_PA_CL_VPORT_ZSCALE 0x00002113 -#define A2XX_PA_CL_VPORT_ZSCALE__MASK 0xffffffff -#define A2XX_PA_CL_VPORT_ZSCALE__SHIFT 0 -static inline uint32_t A2XX_PA_CL_VPORT_ZSCALE(float val) -{ - return ((fui(val)) << A2XX_PA_CL_VPORT_ZSCALE__SHIFT) & A2XX_PA_CL_VPORT_ZSCALE__MASK; -} - -#define REG_A2XX_PA_CL_VPORT_ZOFFSET 0x00002114 -#define A2XX_PA_CL_VPORT_ZOFFSET__MASK 0xffffffff -#define A2XX_PA_CL_VPORT_ZOFFSET__SHIFT 0 -static inline uint32_t A2XX_PA_CL_VPORT_ZOFFSET(float val) -{ - return ((fui(val)) << A2XX_PA_CL_VPORT_ZOFFSET__SHIFT) & A2XX_PA_CL_VPORT_ZOFFSET__MASK; -} - -#define REG_A2XX_SQ_PROGRAM_CNTL 0x00002180 -#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK 0x000000ff -#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT 0 -static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_REGS(uint32_t val) -{ - return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK; -} -#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK 0x0000ff00 -#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT 8 -static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_REGS(uint32_t val) -{ - return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK; -} -#define A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE 0x00010000 -#define A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE 0x00020000 -#define A2XX_SQ_PROGRAM_CNTL_PARAM_GEN 0x00040000 -#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_PIX 0x00080000 -#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK 0x00f00000 -#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT 20 -static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(uint32_t val) -{ - return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK; -} -#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK 0x07000000 -#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT 24 -static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(enum a2xx_sq_ps_vtx_mode val) -{ - return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK; -} -#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK 0x78000000 -#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT 27 -static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(uint32_t val) -{ - return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK; -} -#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX 0x80000000 - -#define REG_A2XX_SQ_CONTEXT_MISC 0x00002181 -#define A2XX_SQ_CONTEXT_MISC_INST_PRED_OPTIMIZE 0x00000001 -#define A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY 0x00000002 -#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK 0x0000000c -#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT 2 -static inline uint32_t A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(enum a2xx_sq_sample_cntl val) -{ - return ((val) << A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT) & A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK; -} -#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK 0x0000ff00 -#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT 8 -static inline uint32_t A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(uint32_t val) -{ - return ((val) << A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT) & A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK; -} -#define A2XX_SQ_CONTEXT_MISC_PERFCOUNTER_REF 0x00010000 -#define A2XX_SQ_CONTEXT_MISC_YEILD_OPTIMIZE 0x00020000 -#define A2XX_SQ_CONTEXT_MISC_TX_CACHE_SEL 0x00040000 - -#define REG_A2XX_SQ_INTERPOLATOR_CNTL 0x00002182 -#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK 0x0000ffff -#define A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT 0 -static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE(uint32_t val) -{ - return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_PARAM_SHADE__MASK; -} -#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK 0xffff0000 -#define A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT 16 -static inline uint32_t A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN(uint32_t val) -{ - return ((val) << A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__SHIFT) & A2XX_SQ_INTERPOLATOR_CNTL_SAMPLING_PATTERN__MASK; -} - -#define REG_A2XX_SQ_WRAPPING_0 0x00002183 -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK 0x0000000f -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT 0 -static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_0(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_0__MASK; -} -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK 0x000000f0 -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT 4 -static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_1(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_1__MASK; -} -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK 0x00000f00 -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT 8 -static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_2(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_2__MASK; -} -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK 0x0000f000 -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT 12 -static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_3(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_3__MASK; -} -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK 0x000f0000 -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT 16 -static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_4(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_4__MASK; -} -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK 0x00f00000 -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT 20 -static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_5(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_5__MASK; -} -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK 0x0f000000 -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT 24 -static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_6(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_6__MASK; -} -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK 0xf0000000 -#define A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT 28 -static inline uint32_t A2XX_SQ_WRAPPING_0_PARAM_WRAP_7(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__SHIFT) & A2XX_SQ_WRAPPING_0_PARAM_WRAP_7__MASK; -} - -#define REG_A2XX_SQ_WRAPPING_1 0x00002184 -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK 0x0000000f -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT 0 -static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_8(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_8__MASK; -} -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK 0x000000f0 -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT 4 -static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_9(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_9__MASK; -} -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK 0x00000f00 -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT 8 -static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_10(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_10__MASK; -} -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK 0x0000f000 -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT 12 -static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_11(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_11__MASK; -} -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK 0x000f0000 -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT 16 -static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_12(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_12__MASK; -} -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK 0x00f00000 -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT 20 -static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_13(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_13__MASK; -} -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK 0x0f000000 -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT 24 -static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_14(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_14__MASK; -} -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK 0xf0000000 -#define A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT 28 -static inline uint32_t A2XX_SQ_WRAPPING_1_PARAM_WRAP_15(uint32_t val) -{ - return ((val) << A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__SHIFT) & A2XX_SQ_WRAPPING_1_PARAM_WRAP_15__MASK; -} - -#define REG_A2XX_SQ_PS_PROGRAM 0x000021f6 -#define A2XX_SQ_PS_PROGRAM_BASE__MASK 0x00000fff -#define A2XX_SQ_PS_PROGRAM_BASE__SHIFT 0 -static inline uint32_t A2XX_SQ_PS_PROGRAM_BASE(uint32_t val) -{ - return ((val) << A2XX_SQ_PS_PROGRAM_BASE__SHIFT) & A2XX_SQ_PS_PROGRAM_BASE__MASK; -} -#define A2XX_SQ_PS_PROGRAM_SIZE__MASK 0x00fff000 -#define A2XX_SQ_PS_PROGRAM_SIZE__SHIFT 12 -static inline uint32_t A2XX_SQ_PS_PROGRAM_SIZE(uint32_t val) -{ - return ((val) << A2XX_SQ_PS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_PS_PROGRAM_SIZE__MASK; -} - -#define REG_A2XX_SQ_VS_PROGRAM 0x000021f7 -#define A2XX_SQ_VS_PROGRAM_BASE__MASK 0x00000fff -#define A2XX_SQ_VS_PROGRAM_BASE__SHIFT 0 -static inline uint32_t A2XX_SQ_VS_PROGRAM_BASE(uint32_t val) -{ - return ((val) << A2XX_SQ_VS_PROGRAM_BASE__SHIFT) & A2XX_SQ_VS_PROGRAM_BASE__MASK; -} -#define A2XX_SQ_VS_PROGRAM_SIZE__MASK 0x00fff000 -#define A2XX_SQ_VS_PROGRAM_SIZE__SHIFT 12 -static inline uint32_t A2XX_SQ_VS_PROGRAM_SIZE(uint32_t val) -{ - return ((val) << A2XX_SQ_VS_PROGRAM_SIZE__SHIFT) & A2XX_SQ_VS_PROGRAM_SIZE__MASK; -} - -#define REG_A2XX_VGT_EVENT_INITIATOR 0x000021f9 - -#define REG_A2XX_VGT_DRAW_INITIATOR 0x000021fc -#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK 0x0000003f -#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT 0 -static inline uint32_t A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK; -} -#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK 0x000000c0 -#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT 6 -static inline uint32_t A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK; -} -#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK 0x00000600 -#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT 9 -static inline uint32_t A2XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK; -} -#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK 0x00000800 -#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT 11 -static inline uint32_t A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val) -{ - return ((val) << A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK; -} -#define A2XX_VGT_DRAW_INITIATOR_NOT_EOP 0x00001000 -#define A2XX_VGT_DRAW_INITIATOR_SMALL_INDEX 0x00002000 -#define A2XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x00004000 -#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK 0xff000000 -#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT 24 -static inline uint32_t A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val) -{ - return ((val) << A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK; -} - -#define REG_A2XX_VGT_IMMED_DATA 0x000021fd - -#define REG_A2XX_RB_DEPTHCONTROL 0x00002200 -#define A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE 0x00000001 -#define A2XX_RB_DEPTHCONTROL_Z_ENABLE 0x00000002 -#define A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE 0x00000004 -#define A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE 0x00000008 -#define A2XX_RB_DEPTHCONTROL_ZFUNC__MASK 0x00000070 -#define A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT 4 -static inline uint32_t A2XX_RB_DEPTHCONTROL_ZFUNC(enum adreno_compare_func val) -{ - return ((val) << A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_ZFUNC__MASK; -} -#define A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE 0x00000080 -#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK 0x00000700 -#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT 8 -static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC(enum adreno_compare_func val) -{ - return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK; -} -#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK 0x00003800 -#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT 11 -static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL(enum adreno_stencil_op val) -{ - return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK; -} -#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK 0x0001c000 -#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT 14 -static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS(enum adreno_stencil_op val) -{ - return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK; -} -#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK 0x000e0000 -#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT 17 -static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL(enum adreno_stencil_op val) -{ - return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK; -} -#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK 0x00700000 -#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT 20 -static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(enum adreno_compare_func val) -{ - return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK; -} -#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK 0x03800000 -#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT 23 -static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK; -} -#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK 0x1c000000 -#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT 26 -static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(enum adreno_stencil_op val) -{ - return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK; -} -#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK 0xe0000000 -#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT 29 -static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK; -} - -#define REG_A2XX_RB_BLEND_CONTROL 0x00002201 -#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK 0x0000001f -#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT 0 -static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(enum adreno_rb_blend_factor val) -{ - return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK; -} -#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK 0x000000e0 -#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT 5 -static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(enum a2xx_rb_blend_opcode val) -{ - return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK; -} -#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK 0x00001f00 -#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT 8 -static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(enum adreno_rb_blend_factor val) -{ - return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK; -} -#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK 0x001f0000 -#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT 16 -static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(enum adreno_rb_blend_factor val) -{ - return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK; -} -#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK 0x00e00000 -#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT 21 -static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(enum a2xx_rb_blend_opcode val) -{ - return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK; -} -#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK 0x1f000000 -#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT 24 -static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(enum adreno_rb_blend_factor val) -{ - return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK; -} -#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE_ENABLE 0x20000000 -#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE 0x40000000 - -#define REG_A2XX_RB_COLORCONTROL 0x00002202 -#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK 0x00000007 -#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT 0 -static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_FUNC(enum adreno_compare_func val) -{ - return ((val) << A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK; -} -#define A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE 0x00000008 -#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_ENABLE 0x00000010 -#define A2XX_RB_COLORCONTROL_BLEND_DISABLE 0x00000020 -#define A2XX_RB_COLORCONTROL_VOB_ENABLE 0x00000040 -#define A2XX_RB_COLORCONTROL_VS_EXPORTS_FOG 0x00000080 -#define A2XX_RB_COLORCONTROL_ROP_CODE__MASK 0x00000f00 -#define A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT 8 -static inline uint32_t A2XX_RB_COLORCONTROL_ROP_CODE(uint32_t val) -{ - return ((val) << A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT) & A2XX_RB_COLORCONTROL_ROP_CODE__MASK; -} -#define A2XX_RB_COLORCONTROL_DITHER_MODE__MASK 0x00003000 -#define A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT 12 -static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_MODE(enum adreno_rb_dither_mode val) -{ - return ((val) << A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_MODE__MASK; -} -#define A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK 0x0000c000 -#define A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT 14 -static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_TYPE(enum a2xx_rb_dither_type val) -{ - return ((val) << A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK; -} -#define A2XX_RB_COLORCONTROL_PIXEL_FOG 0x00010000 -#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK 0x03000000 -#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT 24 -static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0(uint32_t val) -{ - return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK; -} -#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK 0x0c000000 -#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT 26 -static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1(uint32_t val) -{ - return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK; -} -#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK 0x30000000 -#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT 28 -static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2(uint32_t val) -{ - return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK; -} -#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK 0xc0000000 -#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT 30 -static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3(uint32_t val) -{ - return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK; -} - -#define REG_A2XX_VGT_CURRENT_BIN_ID_MAX 0x00002203 -#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK 0x00000007 -#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT 0 -static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN(uint32_t val) -{ - return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK; -} -#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK 0x00000038 -#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT 3 -static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_ROW(uint32_t val) -{ - return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK; -} -#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK 0x000001c0 -#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT 6 -static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK(uint32_t val) -{ - return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK; -} - -#define REG_A2XX_PA_CL_CLIP_CNTL 0x00002204 -#define A2XX_PA_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 -#define A2XX_PA_CL_CLIP_CNTL_BOUNDARY_EDGE_FLAG_ENA 0x00040000 -#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK 0x00080000 -#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT 19 -static inline uint32_t A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF(enum a2xx_dx_clip_space val) -{ - return ((val) << A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT) & A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK; -} -#define A2XX_PA_CL_CLIP_CNTL_DIS_CLIP_ERR_DETECT 0x00100000 -#define A2XX_PA_CL_CLIP_CNTL_VTX_KILL_OR 0x00200000 -#define A2XX_PA_CL_CLIP_CNTL_XY_NAN_RETAIN 0x00400000 -#define A2XX_PA_CL_CLIP_CNTL_Z_NAN_RETAIN 0x00800000 -#define A2XX_PA_CL_CLIP_CNTL_W_NAN_RETAIN 0x01000000 - -#define REG_A2XX_PA_SU_SC_MODE_CNTL 0x00002205 -#define A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT 0x00000001 -#define A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK 0x00000002 -#define A2XX_PA_SU_SC_MODE_CNTL_FACE 0x00000004 -#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK 0x00000018 -#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT 3 -static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(enum a2xx_pa_su_sc_polymode val) -{ - return ((val) << A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK; -} -#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK 0x000000e0 -#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT 5 -static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK; -} -#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK 0x00000700 -#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT 8 -static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK; -} -#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE 0x00000800 -#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE 0x00001000 -#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE 0x00002000 -#define A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE 0x00008000 -#define A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE 0x00010000 -#define A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE 0x00040000 -#define A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST 0x00080000 -#define A2XX_PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS 0x00100000 -#define A2XX_PA_SU_SC_MODE_CNTL_MULTI_PRIM_IB_ENA 0x00200000 -#define A2XX_PA_SU_SC_MODE_CNTL_QUAD_ORDER_ENABLE 0x00800000 -#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_ALL_TRI 0x02000000 -#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_FIRST_TRI_NEW_STATE 0x04000000 -#define A2XX_PA_SU_SC_MODE_CNTL_CLAMPED_FACENESS 0x10000000 -#define A2XX_PA_SU_SC_MODE_CNTL_ZERO_AREA_FACENESS 0x20000000 -#define A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE 0x40000000 -#define A2XX_PA_SU_SC_MODE_CNTL_FACE_WRITE_ENABLE 0x80000000 - -#define REG_A2XX_PA_CL_VTE_CNTL 0x00002206 -#define A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA 0x00000001 -#define A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA 0x00000002 -#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA 0x00000004 -#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA 0x00000008 -#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA 0x00000010 -#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA 0x00000020 -#define A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT 0x00000100 -#define A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT 0x00000200 -#define A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT 0x00000400 -#define A2XX_PA_CL_VTE_CNTL_PERFCOUNTER_REF 0x00000800 - -#define REG_A2XX_VGT_CURRENT_BIN_ID_MIN 0x00002207 -#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK 0x00000007 -#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT 0 -static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN(uint32_t val) -{ - return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK; -} -#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK 0x00000038 -#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT 3 -static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_ROW(uint32_t val) -{ - return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK; -} -#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK 0x000001c0 -#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT 6 -static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK(uint32_t val) -{ - return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK; -} - -#define REG_A2XX_RB_MODECONTROL 0x00002208 -#define A2XX_RB_MODECONTROL_EDRAM_MODE__MASK 0x00000007 -#define A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT 0 -static inline uint32_t A2XX_RB_MODECONTROL_EDRAM_MODE(enum a2xx_rb_edram_mode val) -{ - return ((val) << A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT) & A2XX_RB_MODECONTROL_EDRAM_MODE__MASK; -} - -#define REG_A2XX_A220_RB_LRZ_VSC_CONTROL 0x00002209 - -#define REG_A2XX_RB_SAMPLE_POS 0x0000220a - -#define REG_A2XX_CLEAR_COLOR 0x0000220b -#define A2XX_CLEAR_COLOR_RED__MASK 0x000000ff -#define A2XX_CLEAR_COLOR_RED__SHIFT 0 -static inline uint32_t A2XX_CLEAR_COLOR_RED(uint32_t val) -{ - return ((val) << A2XX_CLEAR_COLOR_RED__SHIFT) & A2XX_CLEAR_COLOR_RED__MASK; -} -#define A2XX_CLEAR_COLOR_GREEN__MASK 0x0000ff00 -#define A2XX_CLEAR_COLOR_GREEN__SHIFT 8 -static inline uint32_t A2XX_CLEAR_COLOR_GREEN(uint32_t val) -{ - return ((val) << A2XX_CLEAR_COLOR_GREEN__SHIFT) & A2XX_CLEAR_COLOR_GREEN__MASK; -} -#define A2XX_CLEAR_COLOR_BLUE__MASK 0x00ff0000 -#define A2XX_CLEAR_COLOR_BLUE__SHIFT 16 -static inline uint32_t A2XX_CLEAR_COLOR_BLUE(uint32_t val) -{ - return ((val) << A2XX_CLEAR_COLOR_BLUE__SHIFT) & A2XX_CLEAR_COLOR_BLUE__MASK; -} -#define A2XX_CLEAR_COLOR_ALPHA__MASK 0xff000000 -#define A2XX_CLEAR_COLOR_ALPHA__SHIFT 24 -static inline uint32_t A2XX_CLEAR_COLOR_ALPHA(uint32_t val) -{ - return ((val) << A2XX_CLEAR_COLOR_ALPHA__SHIFT) & A2XX_CLEAR_COLOR_ALPHA__MASK; -} - -#define REG_A2XX_A220_GRAS_CONTROL 0x00002210 - -#define REG_A2XX_PA_SU_POINT_SIZE 0x00002280 -#define A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK 0x0000ffff -#define A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT 0 -static inline uint32_t A2XX_PA_SU_POINT_SIZE_HEIGHT(float val) -{ - return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT) & A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK; -} -#define A2XX_PA_SU_POINT_SIZE_WIDTH__MASK 0xffff0000 -#define A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT 16 -static inline uint32_t A2XX_PA_SU_POINT_SIZE_WIDTH(float val) -{ - return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT) & A2XX_PA_SU_POINT_SIZE_WIDTH__MASK; -} - -#define REG_A2XX_PA_SU_POINT_MINMAX 0x00002281 -#define A2XX_PA_SU_POINT_MINMAX_MIN__MASK 0x0000ffff -#define A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT 0 -static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MIN(float val) -{ - return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MIN__MASK; -} -#define A2XX_PA_SU_POINT_MINMAX_MAX__MASK 0xffff0000 -#define A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT 16 -static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MAX(float val) -{ - return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MAX__MASK; -} - -#define REG_A2XX_PA_SU_LINE_CNTL 0x00002282 -#define A2XX_PA_SU_LINE_CNTL_WIDTH__MASK 0x0000ffff -#define A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT 0 -static inline uint32_t A2XX_PA_SU_LINE_CNTL_WIDTH(float val) -{ - return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT) & A2XX_PA_SU_LINE_CNTL_WIDTH__MASK; -} - -#define REG_A2XX_PA_SC_LINE_STIPPLE 0x00002283 -#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK 0x0000ffff -#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT 0 -static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(uint32_t val) -{ - return ((val) << A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK; -} -#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK 0x00ff0000 -#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT 16 -static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(uint32_t val) -{ - return ((val) << A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK; -} -#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK 0x10000000 -#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT 28 -static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER(enum a2xx_pa_sc_pattern_bit_order val) -{ - return ((val) << A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK; -} -#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK 0x60000000 -#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT 29 -static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum a2xx_pa_sc_auto_reset_cntl val) -{ - return ((val) << A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK; -} - -#define REG_A2XX_PA_SC_VIZ_QUERY 0x00002293 -#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ENA 0x00000001 -#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK 0x0000007e -#define A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT 1 -static inline uint32_t A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(uint32_t val) -{ - return ((val) << A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__SHIFT) & A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID__MASK; -} -#define A2XX_PA_SC_VIZ_QUERY_KILL_PIX_POST_EARLY_Z 0x00000100 - -#define REG_A2XX_VGT_ENHANCE 0x00002294 - -#define REG_A2XX_PA_SC_LINE_CNTL 0x00002300 -#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK 0x0000ffff -#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT 0 -static inline uint32_t A2XX_PA_SC_LINE_CNTL_BRES_CNTL(uint32_t val) -{ - return ((val) << A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT) & A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK; -} -#define A2XX_PA_SC_LINE_CNTL_USE_BRES_CNTL 0x00000100 -#define A2XX_PA_SC_LINE_CNTL_EXPAND_LINE_WIDTH 0x00000200 -#define A2XX_PA_SC_LINE_CNTL_LAST_PIXEL 0x00000400 - -#define REG_A2XX_PA_SC_AA_CONFIG 0x00002301 -#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK 0x00000007 -#define A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT 0 -static inline uint32_t A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(uint32_t val) -{ - return ((val) << A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__SHIFT) & A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES__MASK; -} -#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK 0x0001e000 -#define A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT 13 -static inline uint32_t A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST(uint32_t val) -{ - return ((val) << A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__SHIFT) & A2XX_PA_SC_AA_CONFIG_MAX_SAMPLE_DIST__MASK; -} - -#define REG_A2XX_PA_SU_VTX_CNTL 0x00002302 -#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK 0x00000001 -#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT 0 -static inline uint32_t A2XX_PA_SU_VTX_CNTL_PIX_CENTER(enum a2xx_pa_pixcenter val) -{ - return ((val) << A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT) & A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK; -} -#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK 0x00000006 -#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT 1 -static inline uint32_t A2XX_PA_SU_VTX_CNTL_ROUND_MODE(enum a2xx_pa_roundmode val) -{ - return ((val) << A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK; -} -#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK 0x00000380 -#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT 7 -static inline uint32_t A2XX_PA_SU_VTX_CNTL_QUANT_MODE(enum a2xx_pa_quantmode val) -{ - return ((val) << A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK; -} - -#define REG_A2XX_PA_CL_GB_VERT_CLIP_ADJ 0x00002303 -#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK 0xffffffff -#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT 0 -static inline uint32_t A2XX_PA_CL_GB_VERT_CLIP_ADJ(float val) -{ - return ((fui(val)) << A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK; -} - -#define REG_A2XX_PA_CL_GB_VERT_DISC_ADJ 0x00002304 -#define A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK 0xffffffff -#define A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT 0 -static inline uint32_t A2XX_PA_CL_GB_VERT_DISC_ADJ(float val) -{ - return ((fui(val)) << A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK; -} - -#define REG_A2XX_PA_CL_GB_HORZ_CLIP_ADJ 0x00002305 -#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK 0xffffffff -#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT 0 -static inline uint32_t A2XX_PA_CL_GB_HORZ_CLIP_ADJ(float val) -{ - return ((fui(val)) << A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK; -} - -#define REG_A2XX_PA_CL_GB_HORZ_DISC_ADJ 0x00002306 -#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK 0xffffffff -#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT 0 -static inline uint32_t A2XX_PA_CL_GB_HORZ_DISC_ADJ(float val) -{ - return ((fui(val)) << A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK; -} - -#define REG_A2XX_SQ_VS_CONST 0x00002307 -#define A2XX_SQ_VS_CONST_BASE__MASK 0x000001ff -#define A2XX_SQ_VS_CONST_BASE__SHIFT 0 -static inline uint32_t A2XX_SQ_VS_CONST_BASE(uint32_t val) -{ - return ((val) << A2XX_SQ_VS_CONST_BASE__SHIFT) & A2XX_SQ_VS_CONST_BASE__MASK; -} -#define A2XX_SQ_VS_CONST_SIZE__MASK 0x001ff000 -#define A2XX_SQ_VS_CONST_SIZE__SHIFT 12 -static inline uint32_t A2XX_SQ_VS_CONST_SIZE(uint32_t val) -{ - return ((val) << A2XX_SQ_VS_CONST_SIZE__SHIFT) & A2XX_SQ_VS_CONST_SIZE__MASK; -} - -#define REG_A2XX_SQ_PS_CONST 0x00002308 -#define A2XX_SQ_PS_CONST_BASE__MASK 0x000001ff -#define A2XX_SQ_PS_CONST_BASE__SHIFT 0 -static inline uint32_t A2XX_SQ_PS_CONST_BASE(uint32_t val) -{ - return ((val) << A2XX_SQ_PS_CONST_BASE__SHIFT) & A2XX_SQ_PS_CONST_BASE__MASK; -} -#define A2XX_SQ_PS_CONST_SIZE__MASK 0x001ff000 -#define A2XX_SQ_PS_CONST_SIZE__SHIFT 12 -static inline uint32_t A2XX_SQ_PS_CONST_SIZE(uint32_t val) -{ - return ((val) << A2XX_SQ_PS_CONST_SIZE__SHIFT) & A2XX_SQ_PS_CONST_SIZE__MASK; -} - -#define REG_A2XX_SQ_DEBUG_MISC_0 0x00002309 - -#define REG_A2XX_SQ_DEBUG_MISC_1 0x0000230a - -#define REG_A2XX_PA_SC_AA_MASK 0x00002312 - -#define REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL 0x00002316 -#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK 0x00000007 -#define A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT 0 -static inline uint32_t A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH(uint32_t val) -{ - return ((val) << A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__SHIFT) & A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL_VTX_REUSE_DEPTH__MASK; -} - -#define REG_A2XX_VGT_OUT_DEALLOC_CNTL 0x00002317 -#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK 0x00000003 -#define A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT 0 -static inline uint32_t A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST(uint32_t val) -{ - return ((val) << A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__SHIFT) & A2XX_VGT_OUT_DEALLOC_CNTL_DEALLOC_DIST__MASK; -} - -#define REG_A2XX_RB_COPY_CONTROL 0x00002318 -#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK 0x00000007 -#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT 0 -static inline uint32_t A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT(enum a2xx_rb_copy_sample_select val) -{ - return ((val) << A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT) & A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK; -} -#define A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE 0x00000008 -#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK 0x000000f0 -#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT 4 -static inline uint32_t A2XX_RB_COPY_CONTROL_CLEAR_MASK(uint32_t val) -{ - return ((val) << A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT) & A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK; -} - -#define REG_A2XX_RB_COPY_DEST_BASE 0x00002319 - -#define REG_A2XX_RB_COPY_DEST_PITCH 0x0000231a -#define A2XX_RB_COPY_DEST_PITCH__MASK 0xffffffff -#define A2XX_RB_COPY_DEST_PITCH__SHIFT 0 -static inline uint32_t A2XX_RB_COPY_DEST_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A2XX_RB_COPY_DEST_PITCH__SHIFT) & A2XX_RB_COPY_DEST_PITCH__MASK; -} - -#define REG_A2XX_RB_COPY_DEST_INFO 0x0000231b -#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK 0x00000007 -#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT 0 -static inline uint32_t A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN(enum adreno_rb_surface_endian val) -{ - return ((val) << A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT) & A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK; -} -#define A2XX_RB_COPY_DEST_INFO_LINEAR 0x00000008 -#define A2XX_RB_COPY_DEST_INFO_FORMAT__MASK 0x000000f0 -#define A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT 4 -static inline uint32_t A2XX_RB_COPY_DEST_INFO_FORMAT(enum a2xx_colorformatx val) -{ - return ((val) << A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A2XX_RB_COPY_DEST_INFO_FORMAT__MASK; -} -#define A2XX_RB_COPY_DEST_INFO_SWAP__MASK 0x00000300 -#define A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT 8 -static inline uint32_t A2XX_RB_COPY_DEST_INFO_SWAP(uint32_t val) -{ - return ((val) << A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A2XX_RB_COPY_DEST_INFO_SWAP__MASK; -} -#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK 0x00000c00 -#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT 10 -static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) -{ - return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK; -} -#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK 0x00003000 -#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT 12 -static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_TYPE(enum a2xx_rb_dither_type val) -{ - return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK; -} -#define A2XX_RB_COPY_DEST_INFO_WRITE_RED 0x00004000 -#define A2XX_RB_COPY_DEST_INFO_WRITE_GREEN 0x00008000 -#define A2XX_RB_COPY_DEST_INFO_WRITE_BLUE 0x00010000 -#define A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA 0x00020000 - -#define REG_A2XX_RB_COPY_DEST_OFFSET 0x0000231c -#define A2XX_RB_COPY_DEST_OFFSET_X__MASK 0x00001fff -#define A2XX_RB_COPY_DEST_OFFSET_X__SHIFT 0 -static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_X(uint32_t val) -{ - return ((val) << A2XX_RB_COPY_DEST_OFFSET_X__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_X__MASK; -} -#define A2XX_RB_COPY_DEST_OFFSET_Y__MASK 0x03ffe000 -#define A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT 13 -static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_Y(uint32_t val) -{ - return ((val) << A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_Y__MASK; -} - -#define REG_A2XX_RB_DEPTH_CLEAR 0x0000231d - -#define REG_A2XX_RB_SAMPLE_COUNT_CTL 0x00002324 - -#define REG_A2XX_RB_COLOR_DEST_MASK 0x00002326 - -#define REG_A2XX_A225_GRAS_UCP0X 0x00002340 - -#define REG_A2XX_A225_GRAS_UCP5W 0x00002357 - -#define REG_A2XX_A225_GRAS_UCP_ENABLED 0x00002360 - -#define REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE 0x00002380 - -#define REG_A2XX_PA_SU_POLY_OFFSET_BACK_OFFSET 0x00002383 - -#define REG_A2XX_SQ_CONSTANT_0 0x00004000 - -#define REG_A2XX_SQ_FETCH_0 0x00004800 - -#define REG_A2XX_SQ_CF_BOOLEANS 0x00004900 - -#define REG_A2XX_SQ_CF_LOOP 0x00004908 - -#define REG_A2XX_COHER_SIZE_PM4 0x00000a29 - -#define REG_A2XX_COHER_BASE_PM4 0x00000a2a - -#define REG_A2XX_COHER_STATUS_PM4 0x00000a2b - -#define REG_A2XX_SQ_TEX_0 0x00000000 -#define A2XX_SQ_TEX_0_CLAMP_X__MASK 0x00001c00 -#define A2XX_SQ_TEX_0_CLAMP_X__SHIFT 10 -static inline uint32_t A2XX_SQ_TEX_0_CLAMP_X(enum sq_tex_clamp val) -{ - return ((val) << A2XX_SQ_TEX_0_CLAMP_X__SHIFT) & A2XX_SQ_TEX_0_CLAMP_X__MASK; -} -#define A2XX_SQ_TEX_0_CLAMP_Y__MASK 0x0000e000 -#define A2XX_SQ_TEX_0_CLAMP_Y__SHIFT 13 -static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Y(enum sq_tex_clamp val) -{ - return ((val) << A2XX_SQ_TEX_0_CLAMP_Y__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Y__MASK; -} -#define A2XX_SQ_TEX_0_CLAMP_Z__MASK 0x00070000 -#define A2XX_SQ_TEX_0_CLAMP_Z__SHIFT 16 -static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Z(enum sq_tex_clamp val) -{ - return ((val) << A2XX_SQ_TEX_0_CLAMP_Z__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Z__MASK; -} -#define A2XX_SQ_TEX_0_PITCH__MASK 0xffc00000 -#define A2XX_SQ_TEX_0_PITCH__SHIFT 22 -static inline uint32_t A2XX_SQ_TEX_0_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A2XX_SQ_TEX_0_PITCH__SHIFT) & A2XX_SQ_TEX_0_PITCH__MASK; -} - -#define REG_A2XX_SQ_TEX_1 0x00000001 - -#define REG_A2XX_SQ_TEX_2 0x00000002 -#define A2XX_SQ_TEX_2_WIDTH__MASK 0x00001fff -#define A2XX_SQ_TEX_2_WIDTH__SHIFT 0 -static inline uint32_t A2XX_SQ_TEX_2_WIDTH(uint32_t val) -{ - return ((val) << A2XX_SQ_TEX_2_WIDTH__SHIFT) & A2XX_SQ_TEX_2_WIDTH__MASK; -} -#define A2XX_SQ_TEX_2_HEIGHT__MASK 0x03ffe000 -#define A2XX_SQ_TEX_2_HEIGHT__SHIFT 13 -static inline uint32_t A2XX_SQ_TEX_2_HEIGHT(uint32_t val) -{ - return ((val) << A2XX_SQ_TEX_2_HEIGHT__SHIFT) & A2XX_SQ_TEX_2_HEIGHT__MASK; -} - -#define REG_A2XX_SQ_TEX_3 0x00000003 -#define A2XX_SQ_TEX_3_SWIZ_X__MASK 0x0000000e -#define A2XX_SQ_TEX_3_SWIZ_X__SHIFT 1 -static inline uint32_t A2XX_SQ_TEX_3_SWIZ_X(enum sq_tex_swiz val) -{ - return ((val) << A2XX_SQ_TEX_3_SWIZ_X__SHIFT) & A2XX_SQ_TEX_3_SWIZ_X__MASK; -} -#define A2XX_SQ_TEX_3_SWIZ_Y__MASK 0x00000070 -#define A2XX_SQ_TEX_3_SWIZ_Y__SHIFT 4 -static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Y(enum sq_tex_swiz val) -{ - return ((val) << A2XX_SQ_TEX_3_SWIZ_Y__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Y__MASK; -} -#define A2XX_SQ_TEX_3_SWIZ_Z__MASK 0x00000380 -#define A2XX_SQ_TEX_3_SWIZ_Z__SHIFT 7 -static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Z(enum sq_tex_swiz val) -{ - return ((val) << A2XX_SQ_TEX_3_SWIZ_Z__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Z__MASK; -} -#define A2XX_SQ_TEX_3_SWIZ_W__MASK 0x00001c00 -#define A2XX_SQ_TEX_3_SWIZ_W__SHIFT 10 -static inline uint32_t A2XX_SQ_TEX_3_SWIZ_W(enum sq_tex_swiz val) -{ - return ((val) << A2XX_SQ_TEX_3_SWIZ_W__SHIFT) & A2XX_SQ_TEX_3_SWIZ_W__MASK; -} -#define A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK 0x00180000 -#define A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT 19 -static inline uint32_t A2XX_SQ_TEX_3_XY_MAG_FILTER(enum sq_tex_filter val) -{ - return ((val) << A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK; -} -#define A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK 0x00600000 -#define A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT 21 -static inline uint32_t A2XX_SQ_TEX_3_XY_MIN_FILTER(enum sq_tex_filter val) -{ - return ((val) << A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK; -} - - -#endif /* A2XX_XML */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c 2019-03-31 23:16:37.000000000 +0000 @@ -96,17 +96,17 @@ } } -static void print_export_comment(uint32_t num, enum shader_t type) +static void print_export_comment(uint32_t num, gl_shader_stage type) { const char *name = NULL; switch (type) { - case SHADER_VERTEX: + case MESA_SHADER_VERTEX: switch (num) { case 62: name = "gl_Position"; break; case 63: name = "gl_PointSize"; break; } break; - case SHADER_FRAGMENT: + case MESA_SHADER_FRAGMENT: switch (num) { case 0: name = "gl_FragColor"; break; } @@ -212,7 +212,7 @@ }; static int disasm_alu(uint32_t *dwords, uint32_t alu_off, - int level, int sync, enum shader_t type) + int level, int sync, gl_shader_stage type) { instr_alu_t *alu = (instr_alu_t *)dwords; @@ -592,7 +592,7 @@ * 2) ALU and FETCH instructions */ -int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type) +int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type) { instr_cf_t *cfs = (instr_cf_t *)dwords; int idx, max_idx; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1119 +0,0 @@ -/* - * Copyright (C) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "pipe/p_state.h" -#include "util/u_string.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_strings.h" -#include "tgsi/tgsi_dump.h" - -#include "fd2_compiler.h" -#include "fd2_program.h" -#include "fd2_util.h" - -#include "instr-a2xx.h" -#include "ir-a2xx.h" - -struct fd2_compile_context { - struct fd_program_stateobj *prog; - struct fd2_shader_stateobj *so; - - struct tgsi_parse_context parser; - unsigned type; - - /* predicate stack: */ - int pred_depth; - enum ir2_pred pred_stack[8]; - - /* Internal-Temporary and Predicate register assignment: - * - * Some TGSI instructions which translate into multiple actual - * instructions need one or more temporary registers, which are not - * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY). - * And some instructions (texture fetch) cannot write directly to - * output registers. We could be more clever and re-use dst or a - * src register in some cases. But for now don't try to be clever. - * Eventually we should implement an optimization pass that re- - * juggles the register usage and gets rid of unneeded temporaries. - * - * The predicate register must be valid across multiple TGSI - * instructions, but internal temporary's do not. For this reason, - * once the predicate register is requested, until it is no longer - * needed, it gets the first register slot after after the TGSI - * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the - * internal temporaries get the register slots above this. - */ - - int pred_reg; - int num_internal_temps; - - uint8_t num_regs[TGSI_FILE_COUNT]; - - /* maps input register idx to prog->export_linkage idx: */ - uint8_t input_export_idx[64]; - - /* maps output register idx to prog->export_linkage idx: */ - uint8_t output_export_idx[64]; - - /* idx/slot for last compiler generated immediate */ - unsigned immediate_idx; - - // TODO we can skip emit exports in the VS that the FS doesn't need.. - // and get rid perhaps of num_param.. - unsigned num_position, num_param; - unsigned position, psize; - - uint64_t need_sync; -}; - -static int -semantic_idx(struct tgsi_declaration_semantic *semantic) -{ - int idx = semantic->Name; - if (idx == TGSI_SEMANTIC_GENERIC) - idx = TGSI_SEMANTIC_COUNT + semantic->Index; - return idx; -} - -/* assign/get the input/export register # for given semantic idx as - * returned by semantic_idx(): - */ -static int -export_linkage(struct fd2_compile_context *ctx, int idx) -{ - struct fd_program_stateobj *prog = ctx->prog; - - /* if first time we've seen this export, assign the next available slot: */ - if (prog->export_linkage[idx] == 0xff) - prog->export_linkage[idx] = prog->num_exports++; - - return prog->export_linkage[idx]; -} - -static unsigned -compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog, - struct fd2_shader_stateobj *so) -{ - unsigned ret; - - ctx->prog = prog; - ctx->so = so; - ctx->pred_depth = 0; - - ret = tgsi_parse_init(&ctx->parser, so->tokens); - if (ret != TGSI_PARSE_OK) - return ret; - - ctx->type = ctx->parser.FullHeader.Processor.Processor; - ctx->position = ~0; - ctx->psize = ~0; - ctx->num_position = 0; - ctx->num_param = 0; - ctx->need_sync = 0; - ctx->immediate_idx = 0; - ctx->pred_reg = -1; - ctx->num_internal_temps = 0; - - memset(ctx->num_regs, 0, sizeof(ctx->num_regs)); - memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx)); - memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx)); - - /* do first pass to extract declarations: */ - while (!tgsi_parse_end_of_tokens(&ctx->parser)) { - tgsi_parse_token(&ctx->parser); - - switch (ctx->parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *decl = - &ctx->parser.FullToken.FullDeclaration; - if (decl->Declaration.File == TGSI_FILE_OUTPUT) { - unsigned name = decl->Semantic.Name; - - assert(decl->Declaration.Semantic); // TODO is this ever not true? - - ctx->output_export_idx[decl->Range.First] = - semantic_idx(&decl->Semantic); - - if (ctx->type == PIPE_SHADER_VERTEX) { - switch (name) { - case TGSI_SEMANTIC_POSITION: - ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT]; - ctx->num_position++; - break; - case TGSI_SEMANTIC_PSIZE: - ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT]; - ctx->num_position++; - break; - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_GENERIC: - ctx->num_param++; - break; - default: - DBG("unknown VS semantic name: %s", - tgsi_semantic_names[name]); - assert(0); - } - } else { - switch (name) { - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_GENERIC: - ctx->num_param++; - break; - default: - DBG("unknown PS semantic name: %s", - tgsi_semantic_names[name]); - assert(0); - } - } - } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - ctx->input_export_idx[decl->Range.First] = - semantic_idx(&decl->Semantic); - } - ctx->num_regs[decl->Declaration.File] = - MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1); - break; - } - case TGSI_TOKEN_TYPE_IMMEDIATE: { - struct tgsi_full_immediate *imm = - &ctx->parser.FullToken.FullImmediate; - unsigned n = ctx->so->num_immediates++; - memcpy(ctx->so->immediates[n].val, imm->u, 16); - break; - } - default: - break; - } - } - - /* TGSI generated immediates are always entire vec4's, ones we - * generate internally are not: - */ - ctx->immediate_idx = ctx->so->num_immediates * 4; - - ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT]; - - tgsi_parse_free(&ctx->parser); - - return tgsi_parse_init(&ctx->parser, so->tokens); -} - -static void -compile_free(struct fd2_compile_context *ctx) -{ - tgsi_parse_free(&ctx->parser); -} - -static void -compile_vtx_fetch(struct fd2_compile_context *ctx) -{ - struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs; - int i; - for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) { - struct ir2_instruction *instr = ir2_instr_create( - ctx->so->ir, IR2_FETCH); - instr->fetch.opc = VTX_FETCH; - - ctx->need_sync |= 1 << (i+1); - - ir2_dst_create(instr, i+1, "xyzw", 0); - ir2_reg_create(instr, 0, "x", IR2_REG_INPUT); - - if (i == 0) - instr->sync = true; - - vfetch_instrs[i] = instr; - } - ctx->so->num_vfetch_instrs = i; -} - -/* - * For vertex shaders (VS): - * --- ------ ------------- - * - * Inputs: R1-R(num_input) - * Constants: C0-C(num_const-1) - * Immediates: C(num_const)-C(num_const+num_imm-1) - * Outputs: export0-export(n) and export62, export63 - * n is # of outputs minus gl_Position (export62) and gl_PointSize (export63) - * Temps: R(num_input+1)-R(num_input+num_temps) - * - * R0 could be clobbered after the vertex fetch instructions.. so we - * could use it for one of the temporaries. - * - * TODO: maybe the vertex fetch part could fetch first input into R0 as - * the last vtx fetch instruction, which would let us use the same - * register layout in either case.. although this is not what the blob - * compiler does. - * - * - * For frag shaders (PS): - * --- ---- ------------- - * - * Inputs: R0-R(num_input-1) - * Constants: same as VS - * Immediates: same as VS - * Outputs: export0-export(num_outputs) - * Temps: R(num_input)-R(num_input+num_temps-1) - * - * In either case, immediates are are postpended to the constants - * (uniforms). - * - */ - -static unsigned -get_temp_gpr(struct fd2_compile_context *ctx, int idx) -{ - unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT]; - if (ctx->type == PIPE_SHADER_VERTEX) - num++; - return num; -} - -static struct ir2_dst_register * -add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, - const struct tgsi_dst_register *dst) -{ - unsigned flags = 0, num = 0; - char swiz[5]; - - switch (dst->File) { - case TGSI_FILE_OUTPUT: - flags |= IR2_REG_EXPORT; - if (ctx->type == PIPE_SHADER_VERTEX) { - if (dst->Index == ctx->position) { - num = 62; - } else if (dst->Index == ctx->psize) { - num = 63; - } else { - num = export_linkage(ctx, - ctx->output_export_idx[dst->Index]); - } - } else { - num = dst->Index; - } - break; - case TGSI_FILE_TEMPORARY: - num = get_temp_gpr(ctx, dst->Index); - break; - default: - DBG("unsupported dst register file: %s", - tgsi_file_name(dst->File)); - assert(0); - break; - } - - swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_'; - swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_'; - swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_'; - swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_'; - swiz[4] = '\0'; - - return ir2_dst_create(alu, num, swiz, flags); -} - -static struct ir2_src_register * -add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu, - const struct tgsi_src_register *src) -{ - static const char swiz_vals[] = { - 'x', 'y', 'z', 'w', - }; - char swiz[5]; - unsigned flags = 0, num = 0; - - switch (src->File) { - case TGSI_FILE_CONSTANT: - num = src->Index; - flags |= IR2_REG_CONST; - break; - case TGSI_FILE_INPUT: - if (ctx->type == PIPE_SHADER_VERTEX) { - num = src->Index + 1; - } else { - flags |= IR2_REG_INPUT; - num = export_linkage(ctx, - ctx->input_export_idx[src->Index]); - } - break; - case TGSI_FILE_TEMPORARY: - num = get_temp_gpr(ctx, src->Index); - break; - case TGSI_FILE_IMMEDIATE: - num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT]; - flags |= IR2_REG_CONST; - break; - default: - DBG("unsupported src register file: %s", - tgsi_file_name(src->File)); - assert(0); - break; - } - - if (src->Absolute) - flags |= IR2_REG_ABS; - if (src->Negate) - flags |= IR2_REG_NEGATE; - - swiz[0] = swiz_vals[src->SwizzleX]; - swiz[1] = swiz_vals[src->SwizzleY]; - swiz[2] = swiz_vals[src->SwizzleZ]; - swiz[3] = swiz_vals[src->SwizzleW]; - swiz[4] = '\0'; - - if ((ctx->need_sync & ((uint64_t)1 << num)) && - !(flags & IR2_REG_CONST)) { - alu->sync = true; - ctx->need_sync &= ~((uint64_t)1 << num); - } - - return ir2_reg_create(alu, num, swiz, flags); -} - -static void -add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - if (inst->Instruction.Saturate) { - alu->alu_vector.clamp = true; - } -} - -static void -add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - if (inst->Instruction.Saturate) { - alu->alu_scalar.clamp = true; - } -} - -static void -add_regs_vector_1(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - assert(inst->Instruction.NumSrcRegs == 1); - assert(inst->Instruction.NumDstRegs == 1); - - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_vector_clamp(inst, alu); -} - -static void -add_regs_vector_2(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - assert(inst->Instruction.NumSrcRegs == 2); - assert(inst->Instruction.NumDstRegs == 1); - - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_src_reg(ctx, alu, &inst->Src[1].Register); - add_vector_clamp(inst, alu); -} - -static void -add_regs_vector_3(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - assert(inst->Instruction.NumSrcRegs == 3); - assert(inst->Instruction.NumDstRegs == 1); - - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_src_reg(ctx, alu, &inst->Src[1].Register); - add_src_reg(ctx, alu, &inst->Src[2].Register); - add_vector_clamp(inst, alu); -} - -static void -add_regs_scalar_1(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, struct ir2_instruction *alu) -{ - assert(inst->Instruction.NumSrcRegs == 1); - assert(inst->Instruction.NumDstRegs == 1); - - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &inst->Src[0].Register); - add_scalar_clamp(inst, alu); -} - -/* - * Helpers for TGSI instructions that don't map to a single shader instr: - */ - -static void -src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) -{ - src->File = dst->File; - src->Indirect = dst->Indirect; - src->Dimension = dst->Dimension; - src->Index = dst->Index; - src->Absolute = 0; - src->Negate = 0; - src->SwizzleX = TGSI_SWIZZLE_X; - src->SwizzleY = TGSI_SWIZZLE_Y; - src->SwizzleZ = TGSI_SWIZZLE_Z; - src->SwizzleW = TGSI_SWIZZLE_W; -} - -/* Get internal-temp src/dst to use for a sequence of instructions - * generated by a single TGSI op. - */ -static void -get_internal_temp(struct fd2_compile_context *ctx, - struct tgsi_dst_register *tmp_dst, - struct tgsi_src_register *tmp_src) -{ - int n; - - tmp_dst->File = TGSI_FILE_TEMPORARY; - tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; - tmp_dst->Indirect = 0; - tmp_dst->Dimension = 0; - - /* assign next temporary: */ - n = ctx->num_internal_temps++; - if (ctx->pred_reg != -1) - n++; - - tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n; - - src_from_dst(tmp_src, tmp_dst); -} - -static void -get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst, - struct tgsi_src_register *src) -{ - assert(ctx->pred_reg != -1); - - dst->File = TGSI_FILE_TEMPORARY; - dst->WriteMask = TGSI_WRITEMASK_W; - dst->Indirect = 0; - dst->Dimension = 0; - dst->Index = get_temp_gpr(ctx, ctx->pred_reg); - - if (src) { - src_from_dst(src, dst); - src->SwizzleX = TGSI_SWIZZLE_W; - src->SwizzleY = TGSI_SWIZZLE_W; - src->SwizzleZ = TGSI_SWIZZLE_W; - src->SwizzleW = TGSI_SWIZZLE_W; - } -} - -static void -push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src) -{ - struct ir2_instruction *alu; - struct tgsi_dst_register pred_dst; - - if (ctx->pred_depth == 0) { - /* assign predicate register: */ - ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY]; - - get_predicate(ctx, &pred_dst, NULL); - - alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SETNEs); - add_dst_reg(ctx, alu, &pred_dst); - add_src_reg(ctx, alu, src); - } else { - struct tgsi_src_register pred_src; - - get_predicate(ctx, &pred_dst, &pred_src); - - alu = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, alu, &pred_dst); - add_src_reg(ctx, alu, &pred_src); - add_src_reg(ctx, alu, src); - - // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make - // sure src reg is valid if it was calculated with a predicate - // condition.. - alu->pred = IR2_PRED_NONE; - } - - /* save previous pred state to restore in pop_predicate(): */ - ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred; -} - -static void -pop_predicate(struct fd2_compile_context *ctx) -{ - /* restore previous predicate state: */ - ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth]; - - if (ctx->pred_depth != 0) { - struct ir2_instruction *alu; - struct tgsi_dst_register pred_dst; - struct tgsi_src_register pred_src; - - get_predicate(ctx, &pred_dst, &pred_src); - - alu = ir2_instr_create_alu_s(ctx->so->ir, PRED_SET_POPs); - add_dst_reg(ctx, alu, &pred_dst); - add_src_reg(ctx, alu, &pred_src); - alu->pred = IR2_PRED_NONE; - } else { - /* predicate register no longer needed: */ - ctx->pred_reg = -1; - } -} - -static void -get_immediate(struct fd2_compile_context *ctx, - struct tgsi_src_register *reg, uint32_t val) -{ - unsigned neg, swiz, idx, i; - /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ - static const unsigned swiz2tgsi[] = { - TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, - }; - - for (i = 0; i < ctx->immediate_idx; i++) { - swiz = i % 4; - idx = i / 4; - - if (ctx->so->immediates[idx].val[swiz] == val) { - neg = 0; - break; - } - - if (ctx->so->immediates[idx].val[swiz] == -val) { - neg = 1; - break; - } - } - - if (i == ctx->immediate_idx) { - /* need to generate a new immediate: */ - swiz = i % 4; - idx = i / 4; - neg = 0; - ctx->so->immediates[idx].val[swiz] = val; - ctx->so->num_immediates = idx + 1; - ctx->immediate_idx++; - } - - reg->File = TGSI_FILE_IMMEDIATE; - reg->Indirect = 0; - reg->Dimension = 0; - reg->Index = idx; - reg->Absolute = 0; - reg->Negate = neg; - reg->SwizzleX = swiz2tgsi[swiz]; - reg->SwizzleY = swiz2tgsi[swiz]; - reg->SwizzleZ = swiz2tgsi[swiz]; - reg->SwizzleW = swiz2tgsi[swiz]; -} - -/* POW(a,b) = EXP2(b * LOG2(a)) */ -static void -translate_pow(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; - struct ir2_instruction *alu; - - get_internal_temp(ctx, &tmp_dst, &tmp_src); - - alu = ir2_instr_create_alu_s(ctx->so->ir, LOG_CLAMP); - add_dst_reg(ctx, alu, &tmp_dst); - add_src_reg(ctx, alu, &inst->Src[0].Register); - - alu = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, alu, &tmp_dst); - add_src_reg(ctx, alu, &tmp_src); - add_src_reg(ctx, alu, &inst->Src[1].Register); - - /* NOTE: some of the instructions, like EXP_IEEE, seem hard- - * coded to take their input from the w component. - */ - switch(inst->Dst[0].Register.WriteMask) { - case TGSI_WRITEMASK_X: - tmp_src.SwizzleW = TGSI_SWIZZLE_X; - break; - case TGSI_WRITEMASK_Y: - tmp_src.SwizzleW = TGSI_SWIZZLE_Y; - break; - case TGSI_WRITEMASK_Z: - tmp_src.SwizzleW = TGSI_SWIZZLE_Z; - break; - case TGSI_WRITEMASK_W: - tmp_src.SwizzleW = TGSI_SWIZZLE_W; - break; - default: - DBG("invalid writemask!"); - assert(0); - break; - } - - alu = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE); - add_dst_reg(ctx, alu, &inst->Dst[0].Register); - add_src_reg(ctx, alu, &tmp_src); - add_scalar_clamp(inst, alu); -} - -static void -translate_tex(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, unsigned opc) -{ - struct ir2_instruction *instr; - struct ir2_src_register *reg; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; - const struct tgsi_src_register *coord; - bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) || - inst->Instruction.Saturate; - int idx; - - if (using_temp || (opc == TGSI_OPCODE_TXP)) - get_internal_temp(ctx, &tmp_dst, &tmp_src); - - if (opc == TGSI_OPCODE_TXP) { - static const char *swiz[] = { - [TGSI_SWIZZLE_X] = "xxxx", - [TGSI_SWIZZLE_Y] = "yyyy", - [TGSI_SWIZZLE_Z] = "zzzz", - [TGSI_SWIZZLE_W] = "wwww", - }; - - /* TXP - Projective Texture Lookup: - * - * coord.x = src0.x / src.w - * coord.y = src0.y / src.w - * coord.z = src0.z / src.w - * coord.w = src0.w - * bias = 0.0 - * - * dst = texture_sample(unit, coord, bias) - */ - - instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); - add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w"; - add_src_reg(ctx, instr, &inst->Src[0].Register); - add_src_reg(ctx, instr, &inst->Src[0].Register); - - instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE); - add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___"; - memcpy(add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle, - swiz[inst->Src[0].Register.SwizzleW], 4); - - instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_"; - add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx"; - add_src_reg(ctx, instr, &inst->Src[0].Register); - - coord = &tmp_src; - } else { - coord = &inst->Src[0].Register; - } - - instr = ir2_instr_create(ctx->so->ir, IR2_FETCH); - instr->fetch.opc = TEX_FETCH; - instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D); - instr->fetch.is_rect = (inst->Texture.Texture == TGSI_TEXTURE_RECT); - assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases? - - /* save off the tex fetch to be patched later with correct const_idx: */ - idx = ctx->so->num_tfetch_instrs++; - ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index; - ctx->so->tfetch_instrs[idx].instr = instr; - - add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register); - reg = add_src_reg(ctx, instr, coord); - - /* blob compiler always sets 3rd component to same as 1st for 2d: */ - if (inst->Texture.Texture == TGSI_TEXTURE_2D || inst->Texture.Texture == TGSI_TEXTURE_RECT) - reg->swizzle[2] = reg->swizzle[0]; - - /* dst register needs to be marked for sync: */ - ctx->need_sync |= 1 << instr->dst_reg.num; - - /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */ - instr->sync = true; - - if (using_temp) { - /* texture fetch can't write directly to export, so if tgsi - * is telling us the dst register is in output file, we load - * the texture to a temp and the use ALU instruction to move - * to output - */ - instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); - - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &tmp_src); - add_src_reg(ctx, instr, &tmp_src); - add_vector_clamp(inst, instr); - } -} - -/* SGE(a,b) = GTE((b - a), 1.0, 0.0) */ -/* SLT(a,b) = GTE((b - a), 0.0, 1.0) */ -/* SEQ(a,b) = EQU((b - a), 1.0, 0.0) */ -/* SNE(a,b) = EQU((b - a), 0.0, 1.0) */ -static void -translate_sge_slt_seq_sne(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, unsigned opc) -{ - struct ir2_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; - struct tgsi_src_register tmp_const; - float c0, c1; - instr_vector_opc_t vopc; - - switch (opc) { - default: - assert(0); - case TGSI_OPCODE_SGE: - c0 = 1.0; - c1 = 0.0; - vopc = CNDGTEv; - break; - case TGSI_OPCODE_SLT: - c0 = 0.0; - c1 = 1.0; - vopc = CNDGTEv; - break; - case TGSI_OPCODE_SEQ: - c0 = 0.0; - c1 = 1.0; - vopc = CNDEv; - break; - case TGSI_OPCODE_SNE: - c0 = 1.0; - c1 = 0.0; - vopc = CNDEv; - break; - } - - get_internal_temp(ctx, &tmp_dst, &tmp_src); - - instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; - add_src_reg(ctx, instr, &inst->Src[1].Register); - - instr = ir2_instr_create_alu_v(ctx->so->ir, vopc); - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &tmp_src); - get_immediate(ctx, &tmp_const, fui(c1)); - add_src_reg(ctx, instr, &tmp_const); - get_immediate(ctx, &tmp_const, fui(c0)); - add_src_reg(ctx, instr, &tmp_const); -} - -/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ -static void -translate_lrp(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, - unsigned opc) -{ - struct ir2_instruction *instr; - struct tgsi_dst_register tmp_dst1, tmp_dst2; - struct tgsi_src_register tmp_src1, tmp_src2; - struct tgsi_src_register tmp_const; - - get_internal_temp(ctx, &tmp_dst1, &tmp_src1); - get_internal_temp(ctx, &tmp_dst2, &tmp_src2); - - get_immediate(ctx, &tmp_const, fui(1.0)); - - /* tmp1 = (a * b) */ - instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, instr, &tmp_dst1); - add_src_reg(ctx, instr, &inst->Src[0].Register); - add_src_reg(ctx, instr, &inst->Src[1].Register); - - /* tmp2 = (1 - a) */ - instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); - add_dst_reg(ctx, instr, &tmp_dst2); - add_src_reg(ctx, instr, &tmp_const); - add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE; - - /* tmp2 = tmp2 * c */ - instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_dst_reg(ctx, instr, &tmp_dst2); - add_src_reg(ctx, instr, &tmp_src2); - add_src_reg(ctx, instr, &inst->Src[2].Register); - - /* dst = tmp1 + tmp2 */ - instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &tmp_src1); - add_src_reg(ctx, instr, &tmp_src2); -} - -static void -translate_trig(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, - unsigned opc) -{ - struct ir2_instruction *instr; - struct tgsi_dst_register tmp_dst; - struct tgsi_src_register tmp_src; - struct tgsi_src_register tmp_const; - instr_scalar_opc_t op; - - switch (opc) { - default: - assert(0); - case TGSI_OPCODE_SIN: - op = SIN; - break; - case TGSI_OPCODE_COS: - op = COS; - break; - } - - get_internal_temp(ctx, &tmp_dst, &tmp_src); - - tmp_dst.WriteMask = TGSI_WRITEMASK_X; - tmp_src.SwizzleX = tmp_src.SwizzleY = - tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X; - - instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv); - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, &inst->Src[0].Register); - get_immediate(ctx, &tmp_const, fui(0.159155)); - add_src_reg(ctx, instr, &tmp_const); - get_immediate(ctx, &tmp_const, fui(0.5)); - add_src_reg(ctx, instr, &tmp_const); - - instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv); - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, &tmp_src); - add_src_reg(ctx, instr, &tmp_src); - - instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv); - add_dst_reg(ctx, instr, &tmp_dst); - add_src_reg(ctx, instr, &tmp_src); - get_immediate(ctx, &tmp_const, fui(6.283185)); - add_src_reg(ctx, instr, &tmp_const); - get_immediate(ctx, &tmp_const, fui(-3.141593)); - add_src_reg(ctx, instr, &tmp_const); - - instr = ir2_instr_create_alu_s(ctx->so->ir, op); - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &tmp_src); -} - -static void -translate_dp2(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst, - unsigned opc) -{ - struct tgsi_src_register tmp_const; - struct ir2_instruction *instr; - /* DP2ADD c,a,b -> dot2(a,b) + c */ - /* for c we use the constant 0.0 */ - instr = ir2_instr_create_alu_v(ctx->so->ir, DOT2ADDv); - add_dst_reg(ctx, instr, &inst->Dst[0].Register); - add_src_reg(ctx, instr, &inst->Src[0].Register); - add_src_reg(ctx, instr, &inst->Src[1].Register); - get_immediate(ctx, &tmp_const, fui(0.0f)); - add_src_reg(ctx, instr, &tmp_const); - add_vector_clamp(inst, instr); -} - -/* - * Main part of compiler/translator: - */ - -static void -translate_instruction(struct fd2_compile_context *ctx, - struct tgsi_full_instruction *inst) -{ - unsigned opc = inst->Instruction.Opcode; - struct ir2_instruction *instr; - - if (opc == TGSI_OPCODE_END) - return; - - /* TODO turn this into a table: */ - switch (opc) { - case TGSI_OPCODE_MOV: - instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); - add_regs_vector_1(ctx, inst, instr); - break; - case TGSI_OPCODE_RCP: - instr = ir2_instr_create_alu_s(ctx->so->ir, RECIP_IEEE); - add_regs_scalar_1(ctx, inst, instr); - break; - case TGSI_OPCODE_RSQ: - instr = ir2_instr_create_alu_s(ctx->so->ir, RECIPSQ_IEEE); - add_regs_scalar_1(ctx, inst, instr); - break; - case TGSI_OPCODE_SQRT: - instr = ir2_instr_create_alu_s(ctx->so->ir, SQRT_IEEE); - add_regs_scalar_1(ctx, inst, instr); - break; - case TGSI_OPCODE_MUL: - instr = ir2_instr_create_alu_v(ctx->so->ir, MULv); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_ADD: - instr = ir2_instr_create_alu_v(ctx->so->ir, ADDv); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_DP2: - translate_dp2(ctx, inst, opc); - break; - case TGSI_OPCODE_DP3: - instr = ir2_instr_create_alu_v(ctx->so->ir, DOT3v); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_DP4: - instr = ir2_instr_create_alu_v(ctx->so->ir, DOT4v); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_MIN: - instr = ir2_instr_create_alu_v(ctx->so->ir, MINv); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_MAX: - instr = ir2_instr_create_alu_v(ctx->so->ir, MAXv); - add_regs_vector_2(ctx, inst, instr); - break; - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SNE: - translate_sge_slt_seq_sne(ctx, inst, opc); - break; - case TGSI_OPCODE_MAD: - instr = ir2_instr_create_alu_v(ctx->so->ir, MULADDv); - add_regs_vector_3(ctx, inst, instr); - break; - case TGSI_OPCODE_LRP: - translate_lrp(ctx, inst, opc); - break; - case TGSI_OPCODE_FRC: - instr = ir2_instr_create_alu_v(ctx->so->ir, FRACv); - add_regs_vector_1(ctx, inst, instr); - break; - case TGSI_OPCODE_FLR: - instr = ir2_instr_create_alu_v(ctx->so->ir, FLOORv); - add_regs_vector_1(ctx, inst, instr); - break; - case TGSI_OPCODE_EX2: - instr = ir2_instr_create_alu_s(ctx->so->ir, EXP_IEEE); - add_regs_scalar_1(ctx, inst, instr); - break; - case TGSI_OPCODE_POW: - translate_pow(ctx, inst); - break; - case TGSI_OPCODE_COS: - case TGSI_OPCODE_SIN: - translate_trig(ctx, inst, opc); - break; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXP: - translate_tex(ctx, inst, opc); - break; - case TGSI_OPCODE_CMP: - instr = ir2_instr_create_alu_v(ctx->so->ir, CNDGTEv); - add_regs_vector_3(ctx, inst, instr); - instr->src_reg[0].flags ^= IR2_REG_NEGATE; /* src1 */ - break; - case TGSI_OPCODE_IF: - push_predicate(ctx, &inst->Src[0].Register); - ctx->so->ir->pred = IR2_PRED_EQ; - break; - case TGSI_OPCODE_ELSE: - ctx->so->ir->pred = IR2_PRED_NE; - break; - case TGSI_OPCODE_ENDIF: - pop_predicate(ctx); - break; - case TGSI_OPCODE_F2I: - instr = ir2_instr_create_alu_v(ctx->so->ir, TRUNCv); - add_regs_vector_1(ctx, inst, instr); - break; - default: - DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc)); - tgsi_dump(ctx->so->tokens, 0); - assert(0); - break; - } - - /* internal temporaries are only valid for the duration of a single - * TGSI instruction: - */ - ctx->num_internal_temps = 0; -} - -static void -compile_instructions(struct fd2_compile_context *ctx) -{ - while (!tgsi_parse_end_of_tokens(&ctx->parser)) { - tgsi_parse_token(&ctx->parser); - - switch (ctx->parser.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - translate_instruction(ctx, - &ctx->parser.FullToken.FullInstruction); - break; - default: - break; - } - } -} - -int -fd2_compile_shader(struct fd_program_stateobj *prog, - struct fd2_shader_stateobj *so) -{ - struct fd2_compile_context ctx; - - ir2_shader_destroy(so->ir); - so->ir = ir2_shader_create(); - so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0; - - if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK) - return -1; - - if (ctx.type == PIPE_SHADER_VERTEX) { - compile_vtx_fetch(&ctx); - } else if (ctx.type == PIPE_SHADER_FRAGMENT) { - prog->num_exports = 0; - memset(prog->export_linkage, 0xff, - sizeof(prog->export_linkage)); - } - - compile_instructions(&ctx); - - compile_free(&ctx); - - return 0; -} - diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_compiler.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_compiler.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_compiler.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_compiler.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -/* - * Copyright (C) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#ifndef FD2_COMPILER_H_ -#define FD2_COMPILER_H_ - -#include "fd2_program.h" -#include "fd2_util.h" - -int fd2_compile_shader(struct fd_program_stateobj *prog, - struct fd2_shader_stateobj *so); - -#endif /* FD2_COMPILER_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_context.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -31,6 +31,7 @@ #include "fd2_emit.h" #include "fd2_gmem.h" #include "fd2_program.h" +#include "fd2_query.h" #include "fd2_rasterizer.h" #include "fd2_texture.h" #include "fd2_zsa.h" @@ -46,17 +47,18 @@ create_solid_vertexbuf(struct pipe_context *pctx) { static const float init_shader_const[] = { - /* for clear/gmem2mem: */ - -1.000000, +1.000000, +1.000000, +1.100000, - +1.000000, +1.000000, -1.000000, -1.100000, - +1.000000, +1.100000, -1.100000, +1.000000, - /* for mem2gmem: (vertices) */ - -1.000000, +1.000000, +1.000000, +1.000000, - +1.000000, +1.000000, -1.000000, -1.000000, - +1.000000, +1.000000, -1.000000, +1.000000, + /* for clear/gmem2mem/mem2gmem (vertices): */ + -1.000000, +1.000000, +1.000000, + +1.000000, +1.000000, +1.000000, + -1.000000, -1.000000, +1.000000, /* for mem2gmem: (tex coords) */ - +0.000000, +0.000000, +1.000000, +0.000000, - +0.000000, +1.000000, +1.000000, +1.000000, + +0.000000, +0.000000, + +1.000000, +0.000000, + +0.000000, +1.000000, + /* SCREEN_SCISSOR_BR value (must be at 60 byte offset in page) */ + 0.0, + /* zero indices dummy draw workaround (3 16-bit zeros) */ + 0.0, 0.0, }; struct pipe_resource *prsc = pipe_buffer_create(pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const)); @@ -119,5 +121,7 @@ /* construct vertex state used for solid ops (clear, and gmem<->mem) */ fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx); + fd2_query_context_init(pctx); + return pctx; } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_draw.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_draw.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_draw.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -75,31 +75,43 @@ // CONST(20,0) (or CONST(26,0) in soliv_vp) fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements); + fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements); } -static bool -fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, - unsigned index_offset) +static void +draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info, + struct fd_ringbuffer *ring, unsigned index_offset, bool binning) { - struct fd_ringbuffer *ring = ctx->batch->draw; - - if (ctx->dirty & FD_DIRTY_VTXBUF) - emit_vertexbufs(ctx); - - fd2_emit_state(ctx, ctx->dirty); - OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); - OUT_RING(ring, info->start); - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); - OUT_RING(ring, 0x0000003b); + OUT_RING(ring, info->index_size ? 0 : info->start); OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1); OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE); - if (!is_a20x(ctx->screen)) { + if (is_a20x(ctx->screen)) { + /* wait for DMA to finish and + * dummy draw one triangle with indexes 0,0,0. + * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE. + * + * this workaround is for a HW bug related to DMA alignment: + * it is necessary for indexed draws and possibly also + * draws that read binning data + */ + OUT_PKT3(ring, CP_WAIT_REG_EQ, 4); + OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */ + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */ + OUT_RING(ring, 0x00000001); + + OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x0003c004); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000003); + OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0, 0); + OUT_RING(ring, 0x00000006); + } else { OUT_WFI (ring); OUT_PKT3(ring, CP_SET_CONSTANT, 3); @@ -108,134 +120,132 @@ OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */ } + /* binning shader will take offset from C64 */ + if (binning && is_a20x(ctx->screen)) { + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, 0x00000180); + OUT_RING(ring, fui(ctx->batch->num_vertices)); + OUT_RING(ring, fui(0.0f)); + OUT_RING(ring, fui(0.0f)); + OUT_RING(ring, fui(0.0f)); + } + + enum pc_di_vis_cull_mode vismode = USE_VISIBILITY; + if (binning || info->mode == PIPE_PRIM_POINTS) + vismode = IGNORE_VISIBILITY; + fd_draw_emit(ctx->batch, ring, ctx->primtypes[info->mode], - IGNORE_VISIBILITY, info, index_offset); + vismode, info, index_offset); - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010)); - OUT_RING(ring, 0x00000000); + if (is_a20x(ctx->screen)) { + /* not sure why this is required, but it fixes some hangs */ + OUT_WFI(ring); + } else { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010)); + OUT_RING(ring, 0x00000000); + } emit_cacheflush(ring); +} + + +static bool +fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo, + unsigned index_offset) +{ + if (!ctx->prog.fp || !ctx->prog.vp) + return false; + + if (ctx->dirty & FD_DIRTY_VTXBUF) + emit_vertexbufs(ctx); + + if (fd_binning_enabled) + fd2_emit_state_binning(ctx, ctx->dirty); + + fd2_emit_state(ctx, ctx->dirty); + + /* a2xx can draw only 65535 vertices at once + * on a22x the field in the draw command is 32bits but seems limited too + * using a limit of 32k because it fixes an unexplained hang + * 32766 works for all primitives (multiple of 2 and 3) + */ + if (pinfo->count > 32766) { + static const uint16_t step_tbl[PIPE_PRIM_MAX] = { + [0 ... PIPE_PRIM_MAX - 1] = 32766, + [PIPE_PRIM_LINE_STRIP] = 32765, + [PIPE_PRIM_TRIANGLE_STRIP] = 32764, + + /* needs more work */ + [PIPE_PRIM_TRIANGLE_FAN] = 0, + [PIPE_PRIM_LINE_LOOP] = 0, + }; + + struct pipe_draw_info info = *pinfo; + unsigned count = info.count; + unsigned step = step_tbl[info.mode]; + unsigned num_vertices = ctx->batch->num_vertices; + + if (!step) + return false; + + for (; count + step > 32766; count -= step) { + info.count = MIN2(count, 32766); + draw_impl(ctx, &info, ctx->batch->draw, index_offset, false); + draw_impl(ctx, &info, ctx->batch->binning, index_offset, true); + info.start += step; + ctx->batch->num_vertices += step; + } + /* changing this value is a hack, restore it */ + ctx->batch->num_vertices = num_vertices; + } else { + draw_impl(ctx, pinfo, ctx->batch->draw, index_offset, false); + draw_impl(ctx, pinfo, ctx->batch->binning, index_offset, true); + } fd_context_all_clean(ctx); return true; } - -static bool -fd2_clear(struct fd_context *ctx, unsigned buffers, - const union pipe_color_union *color, double depth, unsigned stencil) +static void +clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring, + unsigned buffers, bool fast_clear) { + struct fd_context *ctx = batch->ctx; struct fd2_context *fd2_ctx = fd2_context(ctx); - struct fd_ringbuffer *ring = ctx->batch->draw; - struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer; - uint32_t reg, colr = 0; - - if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs) - colr = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f); - - /* emit generic state now: */ - fd2_emit_state(ctx, ctx->dirty & - (FD_DIRTY_BLEND | FD_DIRTY_VIEWPORT | - FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR)); + uint32_t reg; fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) { - { .prsc = fd2_ctx->solid_vertexbuf, .size = 48 }, + { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 }, }, 1); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); OUT_RING(ring, 0); - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); - OUT_RING(ring, 0x0000028f); - - fd2_program_emit(ring, &ctx->solid_prog); + fd2_program_emit(ctx, ring, &ctx->solid_prog); OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1); OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE); - if (is_a20x(ctx->screen)) { - OUT_PKT3(ring, CP_SET_CONSTANT, 5); - OUT_RING(ring, 0x00000480); - OUT_RING(ring, color->ui[0]); - OUT_RING(ring, color->ui[1]); - OUT_RING(ring, color->ui[2]); - OUT_RING(ring, color->ui[3]); - } else { - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR)); - OUT_RING(ring, colr); - } - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL)); - OUT_RING(ring, 0x00000084); - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); - reg = 0; - if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - reg |= A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE; - switch (fd_pipe2depth(fb->zsbuf->format)) { - case DEPTHX_24_8: - if (buffers & PIPE_CLEAR_DEPTH) - reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xe); - if (buffers & PIPE_CLEAR_STENCIL) - reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0x1); - break; - case DEPTHX_16: - if (buffers & PIPE_CLEAR_DEPTH) - reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf); - break; - default: - debug_assert(0); - break; - } - } - OUT_RING(ring, reg); - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR)); - reg = 0; if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - switch (fd_pipe2depth(fb->zsbuf->format)) { - case DEPTHX_24_8: - reg = (((uint32_t)(0xffffff * depth)) << 8) | - (stencil & 0xff); - break; - case DEPTHX_16: - reg = (uint32_t)(0xffffffff * depth); - break; - default: - debug_assert(0); - break; - } - } - OUT_RING(ring, reg); - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); - reg = 0; - if (buffers & PIPE_CLEAR_DEPTH) { - reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) | + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); + reg = 0; + if (buffers & PIPE_CLEAR_DEPTH) { + reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) | A2XX_RB_DEPTHCONTROL_Z_ENABLE | A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE | A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE; + } + if (buffers & PIPE_CLEAR_STENCIL) { + reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) | + A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE | + A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE); + } + OUT_RING(ring, reg); } - if (buffers & PIPE_CLEAR_STENCIL) { - reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) | - A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE | - A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE); - } - OUT_RING(ring, reg); - - OUT_PKT3(ring, CP_SET_CONSTANT, 3); - OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); - OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff)); - OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL)); @@ -250,18 +260,19 @@ OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */ OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */ A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) | - A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES)); + A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) | + (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0)); + + if (fast_clear) { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG)); + OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3)); + } OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK)); OUT_RING(ring, 0x0000ffff); - OUT_PKT3(ring, CP_SET_CONSTANT, 3); - OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); - OUT_RING(ring, xy2d(0,0)); /* PA_SC_WINDOW_SCISSOR_TL */ - OUT_RING(ring, xy2d(fb->width, /* PA_SC_WINDOW_SCISSOR_BR */ - fb->height)); - OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK)); if (buffers & PIPE_CLEAR_COLOR) { @@ -273,24 +284,325 @@ OUT_RING(ring, 0x0); } - if (!is_a20x(ctx->screen)) { - OUT_PKT3(ring, CP_SET_CONSTANT, 3); - OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); - OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ - OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ - } + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL)); + OUT_RING(ring, 0); - fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, - DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); + if (is_a20x(batch->ctx->screen)) + return; + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); + OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ + OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); + OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff)); + OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL)); - OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000084); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000028f); +} + +static void +clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + if (is_a20x(ctx->screen)) + return; OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); OUT_RING(ring, 0x00000000); + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL)); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000003b); +} + +static void +clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring, + uint32_t color_clear, uint32_t depth_clear, unsigned patch_type) +{ + BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */ + + /* zero values are patched in */ + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR)); + OUT_RINGP(ring, patch_type, &batch->gmem_patches); + + OUT_PKT3(ring, CP_SET_CONSTANT, 4); + OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); + OUT_RING(ring, 0x8000 | 32); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* set fill values */ + if (!is_a20x(batch->ctx->screen)) { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR)); + OUT_RING(ring, color_clear); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); + OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE | + A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR)); + OUT_RING(ring, depth_clear); + } else { + const float sc = 1.0f / 255.0f; + + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, 0x00000480); + OUT_RING(ring, fui((float) (color_clear >> 0 & 0xff) * sc)); + OUT_RING(ring, fui((float) (color_clear >> 8 & 0xff) * sc)); + OUT_RING(ring, fui((float) (color_clear >> 16 & 0xff) * sc)); + OUT_RING(ring, fui((float) (color_clear >> 24 & 0xff) * sc)); + + // XXX if using float the rounding error breaks it.. + float depth = ((double) (depth_clear >> 8)) * (1.0/(double) 0xffffff); + assert((unsigned) (((double) depth * (double) 0xffffff)) == + (depth_clear >> 8)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE)); + OUT_RING(ring, fui(0.0f)); + OUT_RING(ring, fui(depth)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); + OUT_RING(ring, 0xff000000 | + A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) | + A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff)); + OUT_RING(ring, 0xff000000 | + A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) | + A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); + } + + fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); +} + +static bool +fd2_clear_fast(struct fd_context *ctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + /* using 4x MSAA allows clearing ~2x faster + * then we can use higher bpp clearing to clear lower bpp + * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8) + * note: its possible to clear with 32_32_32_32 format but its not faster + * note: fast clear doesn't work with sysmem rendering + * (sysmem rendering is disabled when clear is used) + * + * we only have 16-bit / 32-bit color formats + * and 16-bit / 32-bit depth formats + * so there are only a few possible combinations + * + * if the bpp of the color/depth doesn't match + * we clear with depth/color individually + */ + struct fd2_context *fd2_ctx = fd2_context(ctx); + struct fd_batch *batch = ctx->batch; + struct fd_ringbuffer *ring = batch->draw; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + uint32_t color_clear = 0, depth_clear = 0; + enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); + int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */ + int color_size = -1; + + /* TODO: need to test performance on a22x */ + if (!is_a20x(ctx->screen)) + return false; + + if (buffers & PIPE_CLEAR_COLOR) + color_size = util_format_get_blocksizebits(format) == 32; + + if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) + depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8; + + assert(color_size >= 0 || depth_size >= 0); + + /* when clearing 24_8, depth/stencil must be both cleared + * TODO: if buffer isn't attached we can clear it anyway + */ + if (depth_size == 1 && !(buffers & PIPE_CLEAR_STENCIL) != !(buffers & PIPE_CLEAR_DEPTH)) + return false; + + if (color_size == 0) { + color_clear = pack_rgba(format, color->f); + color_clear = (color_clear << 16) | (color_clear & 0xffff); + } else if (color_size == 1) { + color_clear = pack_rgba(format, color->f); + } + + if (depth_size == 0) { + depth_clear = (uint32_t)(0xffff * depth); + depth_clear |= depth_clear << 16; + } else if (depth_size == 1) { + depth_clear = (((uint32_t)(0xffffff * depth)) << 8); + depth_clear |= (stencil & 0xff); + } + + /* disable "window" scissor.. */ + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); + OUT_RING(ring, xy2d(0, 0)); + OUT_RING(ring, xy2d(0x7fff, 0x7fff)); + + /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */ + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); + OUT_RING(ring, fui(4096.0)); + OUT_RING(ring, fui(4096.0)); + OUT_RING(ring, fui(4096.0)); + OUT_RING(ring, fui(4096.0)); + + clear_state(batch, ring, ~0u, true); + + if (color_size >= 0 && depth_size != color_size) + clear_fast(batch, ring, color_clear, color_clear, GMEM_PATCH_FASTCLEAR_COLOR); + + if (depth_size >= 0 && depth_size != color_size) + clear_fast(batch, ring, depth_clear, depth_clear, GMEM_PATCH_FASTCLEAR_DEPTH); + + if (depth_size == color_size) + clear_fast(batch, ring, color_clear, depth_clear, GMEM_PATCH_FASTCLEAR_COLOR_DEPTH); + + clear_state_restore(ctx, ring); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG)); + OUT_RING(ring, 0); + + /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile. + * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT + * the value is read from byte offset 60 in the given bo + */ + OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3); + OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR)); + OUT_RING(ring, 1); + + OUT_PKT3(ring, CP_SET_CONSTANT, 4); + OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); + OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + return true; +} + +static bool +fd2_clear(struct fd_context *ctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct fd_ringbuffer *ring = ctx->batch->draw; + struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer; + + if (fd2_clear_fast(ctx, buffers, color, depth, stencil)) + goto dirty; + + /* set clear value */ + if (is_a20x(ctx->screen)) { + if (buffers & PIPE_CLEAR_COLOR) { + /* C0 used by fragment shader */ + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, 0x00000480); + OUT_RING(ring, color->ui[0]); + OUT_RING(ring, color->ui[1]); + OUT_RING(ring, color->ui[2]); + OUT_RING(ring, color->ui[3]); + } + + if (buffers & PIPE_CLEAR_DEPTH) { + /* use viewport to set depth value */ + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE)); + OUT_RING(ring, fui(0.0f)); + OUT_RING(ring, fui(depth)); + } + + if (buffers & PIPE_CLEAR_STENCIL) { + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); + OUT_RING(ring, 0xff000000 | + A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) | + A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff)); + OUT_RING(ring, 0xff000000 | + A2XX_RB_STENCILREFMASK_STENCILREF(stencil) | + A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); + } + } else { + if (buffers & PIPE_CLEAR_COLOR) { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR)); + OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f)); + } + + if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { + uint32_t clear_mask, depth_clear; + if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { + switch (fd_pipe2depth(fb->zsbuf->format)) { + case DEPTHX_24_8: + clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) | + ((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0); + depth_clear = (((uint32_t)(0xffffff * depth)) << 8) | + (stencil & 0xff); + break; + case DEPTHX_16: + clear_mask = 0xf; + depth_clear = (uint32_t)(0xffffffff * depth); + break; + default: + debug_assert(0); + break; + } + } + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); + OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE | + A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR)); + OUT_RING(ring, depth_clear); + } + } + + /* scissor state */ + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); + OUT_RING(ring, xy2d(0, 0)); + OUT_RING(ring, xy2d(fb->width, fb->height)); + + /* viewport state */ + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); + OUT_RING(ring, fui((float) fb->width / 2.0)); + OUT_RING(ring, fui((float) fb->width / 2.0)); + OUT_RING(ring, fui((float) fb->height / 2.0)); + OUT_RING(ring, fui((float) fb->height / 2.0)); + + /* common state */ + clear_state(ctx->batch, ring, buffers, false); + + fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); + + clear_state_restore(ctx, ring); + +dirty: ctx->dirty |= FD_DIRTY_ZSA | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | @@ -298,7 +610,8 @@ FD_DIRTY_PROG | FD_DIRTY_CONST | FD_DIRTY_BLEND | - FD_DIRTY_FRAMEBUFFER; + FD_DIRTY_FRAMEBUFFER | + FD_DIRTY_SCISSOR; ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG; ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_draw.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_draw.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_draw.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_draw.h 2019-03-31 23:16:37.000000000 +0000 @@ -33,4 +33,11 @@ void fd2_draw_init(struct pipe_context *pctx); +enum { + GMEM_PATCH_FASTCLEAR_COLOR, + GMEM_PATCH_FASTCLEAR_DEPTH, + GMEM_PATCH_FASTCLEAR_COLOR_DEPTH, + GMEM_PATCH_RESTORE_INFO, +}; + #endif /* FD2_DRAW_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_emit.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_emit.c 2019-03-31 23:16:37.000000000 +0000 @@ -118,6 +118,7 @@ static const struct fd2_pipe_sampler_view dummy_view = {}; const struct fd2_sampler_stateobj *sampler; const struct fd2_pipe_sampler_view *view; + struct fd_resource *rsc; if (emitted & (1 << const_idx)) return 0; @@ -129,19 +130,25 @@ fd2_pipe_sampler_view(tex->textures[samp_id]) : &dummy_view; + rsc = view->base.texture ? fd_resource(view->base.texture) : NULL; + OUT_PKT3(ring, CP_SET_CONSTANT, 7); OUT_RING(ring, 0x00010000 + (0x6 * const_idx)); OUT_RING(ring, sampler->tex0 | view->tex0); - if (view->base.texture) - OUT_RELOC(ring, fd_resource(view->base.texture)->bo, 0, view->fmt, 0); + if (rsc) + OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 0, 0), view->tex1, 0); else OUT_RING(ring, 0); OUT_RING(ring, view->tex2); OUT_RING(ring, sampler->tex3 | view->tex3); - OUT_RING(ring, sampler->tex4); - OUT_RING(ring, sampler->tex5); + OUT_RING(ring, sampler->tex4 | view->tex4); + + if (rsc && rsc->base.last_level) + OUT_RELOC(ring, rsc->bo, fd_resource_offset(rsc, 1, 0), view->tex5, 0); + else + OUT_RING(ring, view->tex5); return (1 << const_idx); } @@ -179,10 +186,63 @@ } void +fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) +{ + struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend); + struct fd_ringbuffer *ring = ctx->batch->binning; + + /* subset of fd2_emit_state needed for hw binning on a20x */ + + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE)) + fd2_program_emit(ctx, ring, &ctx->prog); + + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) { + emit_constants(ring, VS_CONST_BASE * 4, + &ctx->constbuf[PIPE_SHADER_VERTEX], + (dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL); + } + + if (dirty & FD_DIRTY_VIEWPORT) { + OUT_PKT3(ring, CP_SET_CONSTANT, 9); + OUT_RING(ring, 0x00000184); + OUT_RING(ring, fui(ctx->viewport.translate[0])); + OUT_RING(ring, fui(ctx->viewport.translate[1])); + OUT_RING(ring, fui(ctx->viewport.translate[2])); + OUT_RING(ring, fui(0.0f)); + OUT_RING(ring, fui(ctx->viewport.scale[0])); + OUT_RING(ring, fui(ctx->viewport.scale[1])); + OUT_RING(ring, fui(ctx->viewport.scale[2])); + OUT_RING(ring, fui(0.0f)); + } + + /* not sure why this is needed */ + if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { + enum pipe_format format = + pipe_surface_format(ctx->batch->framebuffer.cbufs[0]); + bool has_alpha = util_format_has_alpha(format); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL)); + OUT_RING(ring, blend->rb_blendcontrol_alpha | + COND(has_alpha, blend->rb_blendcontrol_rgb) | + COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK)); + OUT_RING(ring, blend->rb_colormask); + } + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL)); + OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE); +} + +void fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty) { struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend); struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa); + struct fd2_shader_stateobj *fp = ctx->prog.fp; struct fd_ringbuffer *ring = ctx->batch->draw; /* NOTE: we probably want to eventually refactor this so each state @@ -198,12 +258,16 @@ OUT_RING(ring, ctx->sample_mask); } - if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF | FD_DIRTY_PROG)) { struct pipe_stencil_ref *sr = &ctx->stencil_ref; + uint32_t val = zsa->rb_depthcontrol; + + if (fp->has_kill) + val &= ~A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE; OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); - OUT_RING(ring, zsa->rb_depthcontrol); + OUT_RING(ring, val); OUT_PKT3(ring, CP_SET_CONSTANT, 4); OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); @@ -266,21 +330,23 @@ OUT_RING(ring, fui(ctx->viewport.scale[2])); /* PA_CL_VPORT_ZSCALE */ OUT_RING(ring, fui(ctx->viewport.translate[2])); /* PA_CL_VPORT_ZOFFSET */ - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); - OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT | - A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | - A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | - A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | - A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA | - A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA | - A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA); + /* set viewport in C65/C66, for a20x hw binning and fragcoord.z */ + OUT_PKT3(ring, CP_SET_CONSTANT, 9); + OUT_RING(ring, 0x00000184); + + OUT_RING(ring, fui(ctx->viewport.translate[0])); + OUT_RING(ring, fui(ctx->viewport.translate[1])); + OUT_RING(ring, fui(ctx->viewport.translate[2])); + OUT_RING(ring, fui(0.0f)); + + OUT_RING(ring, fui(ctx->viewport.scale[0])); + OUT_RING(ring, fui(ctx->viewport.scale[1])); + OUT_RING(ring, fui(ctx->viewport.scale[2])); + OUT_RING(ring, fui(0.0f)); } - if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) { - fd2_program_validate(ctx); - fd2_program_emit(ring, &ctx->prog); - } + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) + fd2_program_emit(ctx, ring, &ctx->prog); if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONST)) { emit_constants(ring, VS_CONST_BASE * 4, @@ -294,7 +360,7 @@ if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) { OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL)); - OUT_RING(ring, blend ? zsa->rb_colorcontrol | blend->rb_colorcontrol : 0); + OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol); } if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { @@ -304,13 +370,13 @@ OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL)); - OUT_RING(ring, blend ? blend->rb_blendcontrol_alpha | + OUT_RING(ring, blend->rb_blendcontrol_alpha | COND(has_alpha, blend->rb_blendcontrol_rgb) | - COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb) : 0); + COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb)); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK)); - OUT_RING(ring, blend ? blend->rb_colormask : 0xf); + OUT_RING(ring, blend->rb_colormask); } if (dirty & FD_DIRTY_BLEND_COLOR) { @@ -339,8 +405,34 @@ A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE | A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) | A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3)); + + /* not sure why this is required */ + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_VIZ_QUERY)); + OUT_RING(ring, A2XX_PA_SC_VIZ_QUERY_VIZ_QUERY_ID(16)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x00000002); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_OUT_DEALLOC_CNTL)); + OUT_RING(ring, 0x00000002); + } else { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000003b); } + /* enable perfcntrs */ + OUT_PKT0(ring, REG_A2XX_CP_PERFMON_CNTL, 1); + OUT_RING(ring, COND(fd_mesa_debug & FD_DBG_PERFC, 1)); + + /* note: perfcntrs don't work without the PM_OVERRIDE bit */ + OUT_PKT0(ring, REG_A2XX_RBBM_PM_OVERRIDE1, 2); + OUT_RING(ring, 0xffffffff); + OUT_RING(ring, 0x00000fff); + OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1); OUT_RING(ring, 0x00000002); @@ -367,10 +459,6 @@ OUT_RING(ring, 0x00000000); OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); - OUT_RING(ring, 0x0000003b); - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC)); OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY)); @@ -463,6 +551,16 @@ OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */ OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */ OUT_RING(ring, 0x000000ff); /* RB_BLEND_ALPHA */ + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); + OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT | + A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA); } static void diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_emit.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_emit.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_emit.h 2019-03-31 23:16:37.000000000 +0000 @@ -40,7 +40,8 @@ void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val, struct fd2_vertex_buf *vbufs, uint32_t n); -void fd2_emit_state(struct fd_context *ctx, enum fd_dirty_3d_state dirty); +void fd2_emit_state_binning(struct fd_context *ctx, const enum fd_dirty_3d_state dirty); +void fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty); void fd2_emit_restore(struct fd_context *ctx, struct fd_ringbuffer *ring); void fd2_emit_init(struct pipe_context *pctx); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c 2019-03-31 23:16:37.000000000 +0000 @@ -39,6 +39,8 @@ #include "fd2_program.h" #include "fd2_util.h" #include "fd2_zsa.h" +#include "fd2_draw.h" +#include "instr-a2xx.h" static uint32_t fmt2swap(enum pipe_format format) { @@ -57,6 +59,28 @@ } } +static bool +use_hw_binning(struct fd_batch *batch) +{ + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + + /* we hardcoded a limit of 8 "pipes", we can increase this limit + * at the cost of a slightly larger command stream + * however very few cases will need more than 8 + * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?) + */ + if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes) + return false; + + /* only a20x hw binning is implement + * a22x is more like a3xx, but perhaps the a20x works? (TODO) + */ + if (!is_a20x(batch->ctx->screen)) + return false; + + return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2); +} + /* transfer from gmem to system memory (ie. normal RAM) */ static void @@ -66,6 +90,13 @@ struct fd_ringbuffer *ring = batch->gmem; struct fd_resource *rsc = fd_resource(psurf->texture); uint32_t swap = fmt2swap(psurf->format); + struct fd_resource_slice *slice = + fd_resource_slice(rsc, psurf->u.tex.level); + uint32_t offset = + fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); + + assert((slice->pitch & 31) == 0); + assert((offset & 0xfff) == 0); if (!rsc->valid) return; @@ -79,8 +110,8 @@ OUT_PKT3(ring, CP_SET_CONSTANT, 5); OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */ - OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* RB_COPY_DEST_BASE */ - OUT_RING(ring, rsc->slices[0].pitch >> 5); /* RB_COPY_DEST_PITCH */ + OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */ + OUT_RING(ring, slice->pitch >> 5); /* RB_COPY_DEST_PITCH */ OUT_RING(ring, /* RB_COPY_DEST_INFO */ A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) | A2XX_RB_COPY_DEST_INFO_LINEAR | @@ -108,11 +139,12 @@ { struct fd_context *ctx = batch->ctx; struct fd2_context *fd2_ctx = fd2_context(ctx); + struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_ringbuffer *ring = batch->gmem; struct pipe_framebuffer_state *pfb = &batch->framebuffer; fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) { - { .prsc = fd2_ctx->solid_vertexbuf, .size = 48 }, + { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 }, }, 1); OUT_PKT3(ring, CP_SET_CONSTANT, 2); @@ -123,11 +155,13 @@ OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); OUT_RING(ring, 0); - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); - OUT_RING(ring, 0x0000028f); + if (!is_a20x(ctx->screen)) { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000028f); + } - fd2_program_emit(ring, &ctx->solid_prog); + fd2_program_emit(ctx, ring, &ctx->solid_prog); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK)); @@ -149,17 +183,16 @@ OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */ OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); - OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT | - A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | - A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | - A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | - A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA); - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL)); OUT_RING(ring, 0x00000000); + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); + OUT_RING(ring, fui((float) tile->bin_w / 2.0)); /* XSCALE */ + OUT_RING(ring, fui((float) tile->bin_w / 2.0)); /* XOFFSET */ + OUT_RING(ring, fui((float) tile->bin_h / 2.0)); /* YSCALE */ + OUT_RING(ring, fui((float) tile->bin_h / 2.0)); /* YOFFSET */ + OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL)); OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY)); @@ -170,14 +203,20 @@ A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff)); if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_gmem2mem_surf(batch, tile->bin_w * tile->bin_h, pfb->zsbuf); + emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf); if (batch->resolve & FD_BUFFER_COLOR) - emit_gmem2mem_surf(batch, 0, pfb->cbufs[0]); + emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL)); OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH)); + + if (!is_a20x(ctx->screen)) { + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x0000003b); + } } /* transfer from system memory to gmem */ @@ -188,6 +227,10 @@ { struct fd_ringbuffer *ring = batch->gmem; struct fd_resource *rsc = fd_resource(psurf->texture); + struct fd_resource_slice *slice = + fd_resource_slice(rsc, psurf->u.tex.level); + uint32_t offset = + fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); uint32_t swiz; OUT_PKT3(ring, CP_SET_CONSTANT, 2); @@ -205,17 +248,18 @@ OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) | A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) | A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) | - A2XX_SQ_TEX_0_PITCH(rsc->slices[0].pitch)); - OUT_RELOC(ring, rsc->bo, 0, - fd2_pipe2surface(psurf->format) | 0x800, 0); + A2XX_SQ_TEX_0_PITCH(slice->pitch)); + OUT_RELOC(ring, rsc->bo, offset, + fd2_pipe2surface(psurf->format) | + A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL), 0); OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) | A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1)); - OUT_RING(ring, 0x01000000 | // XXX + OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) | swiz | A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) | A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT)); OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000200); + OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D)); if (!is_a20x(batch->ctx->screen)) { OUT_PKT3(ring, CP_SET_CONSTANT, 3); @@ -233,6 +277,7 @@ { struct fd_context *ctx = batch->ctx; struct fd2_context *fd2_ctx = fd2_context(ctx); + struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_ringbuffer *ring = batch->gmem; struct pipe_framebuffer_state *pfb = &batch->framebuffer; unsigned bin_w = tile->bin_w; @@ -240,8 +285,8 @@ float x0, y0, x1, y1; fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) { - { .prsc = fd2_ctx->solid_vertexbuf, .size = 48, .offset = 0x30 }, - { .prsc = fd2_ctx->solid_vertexbuf, .size = 32, .offset = 0x60 }, + { .prsc = fd2_ctx->solid_vertexbuf, .size = 36 }, + { .prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36 }, }, 2); /* write texture coordinates to vertexbuf: */ @@ -249,26 +294,20 @@ x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width); y0 = ((float)tile->yoff) / ((float)pfb->height); y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height); - OUT_PKT3(ring, CP_MEM_WRITE, 9); - OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0); + OUT_PKT3(ring, CP_MEM_WRITE, 7); + OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0); OUT_RING(ring, fui(x0)); OUT_RING(ring, fui(y0)); OUT_RING(ring, fui(x1)); OUT_RING(ring, fui(y0)); OUT_RING(ring, fui(x0)); OUT_RING(ring, fui(y1)); - OUT_RING(ring, fui(x1)); - OUT_RING(ring, fui(y1)); OUT_PKT3(ring, CP_SET_CONSTANT, 2); OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); OUT_RING(ring, 0); - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); - OUT_RING(ring, 0x0000003b); - - fd2_program_emit(ring, &ctx->blit_prog[0]); + fd2_program_emit(ctx, ring, &ctx->blit_prog[0]); OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1); OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE); @@ -331,14 +370,107 @@ OUT_RING(ring, 0x00000000); if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_mem2gmem_surf(batch, bin_w * bin_h, pfb->zsbuf); + emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf); if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) - emit_mem2gmem_surf(batch, 0, pfb->cbufs[0]); + emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); + OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT | + A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA | + A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA); /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */ } +static void +patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) +{ + unsigned i; + + if (!is_a20x(batch->ctx->screen)) { + /* identical to a3xx */ + for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); + *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0); + } + util_dynarray_resize(&batch->draw_patches, 0); + return; + } + + if (vismode == USE_VISIBILITY) + return; + + for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t*); i++) { + uint32_t *ptr = *util_dynarray_element(&batch->draw_patches, uint32_t*, i); + unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */ + + /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX + * replace first two DWORDS with NOP and move the rest down + * (we don't want to have to move the idx buffer reloc) + */ + ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8); + ptr[1] = 0x00000000; + + ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */ + ptr[2] = CP_TYPE3_PKT | ((cnt-2) << 16) | (CP_DRAW_INDX << 8); + ptr[3] = 0x00000000; + } +} + +static void +fd2_emit_sysmem_prep(struct fd_batch *batch) +{ + struct fd_context *ctx = batch->ctx; + struct fd_ringbuffer *ring = batch->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct pipe_surface *psurf = pfb->cbufs[0]; + + if (!psurf) + return; + + struct fd_resource *rsc = fd_resource(psurf->texture); + struct fd_resource_slice *slice = + fd_resource_slice(rsc, psurf->u.tex.level); + uint32_t offset = + fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); + + assert((slice->pitch & 31) == 0); + assert((offset & 0xfff) == 0); + + fd2_emit_restore(ctx, ring); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); + OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(slice->pitch)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); + OUT_RELOCW(ring, rsc->bo, offset, A2XX_RB_COLOR_INFO_LINEAR | + A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) | + A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)), 0); + + OUT_PKT3(ring, CP_SET_CONSTANT, 3); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL)); + OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE); + OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) | + A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height)); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); + OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(0) | + A2XX_PA_SC_WINDOW_OFFSET_Y(0)); + + patch_draws(batch, IGNORE_VISIBILITY); + util_dynarray_resize(&batch->draw_patches, 0); + util_dynarray_resize(&batch->shader_patches, 0); +} + /* before first tile */ static void fd2_emit_tile_init(struct fd_batch *batch) @@ -357,10 +489,168 @@ OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */ OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) | A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format))); - reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(align(gmem->bin_w * gmem->bin_h, 4)); + reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); if (pfb->zsbuf) reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); OUT_RING(ring, reg); /* RB_DEPTH_INFO */ + + /* fast clear patches */ + int depth_size = -1; + int color_size = -1; + + if (pfb->cbufs[0]) + color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2; + + if (pfb->zsbuf) + depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2; + + for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i); + uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0]; + uint32_t size, lines; + + /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */ + switch (patch->val) { + case GMEM_PATCH_FASTCLEAR_COLOR: + size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000); + lines = size / 1024; + depth_base = size / 2; + break; + case GMEM_PATCH_FASTCLEAR_DEPTH: + size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000); + lines = size / 1024; + color_base = depth_base; + depth_base = depth_base + size / 2; + break; + case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH: + lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024; + break; + case GMEM_PATCH_RESTORE_INFO: + patch->cs[0] = gmem->bin_w; + patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) | + A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)); + patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); + if (pfb->zsbuf) + patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); + continue; + default: + continue; + } + + patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) | + A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines); + patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) | + A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8); + patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) | + A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1); + } + util_dynarray_resize(&batch->gmem_patches, 0); + + /* set to zero, for some reason hardware doesn't like certain values */ + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN)); + OUT_RING(ring, 0); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX)); + OUT_RING(ring, 0); + + if (use_hw_binning(batch)) { + /* patch out unneeded memory exports by changing EXEC CF to EXEC_END + * + * in the shader compiler, we guarantee that the shader ends with + * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports + * + * the since patches point only to dwords and CFs are 1.5 dwords + * the patch is aligned and might point to a ALLOC CF + */ + for (int i = 0; i < batch->shader_patches.size / sizeof(void*); i++) { + instr_cf_t *cf = + *util_dynarray_element(&batch->shader_patches, instr_cf_t*, i); + if (cf->opc == ALLOC) + cf++; + assert(cf->opc == EXEC); + assert(cf[ctx->screen->num_vsc_pipes*2-2].opc == EXEC_END); + cf[2*(gmem->num_vsc_pipes-1)].opc = EXEC_END; + } + + patch_draws(batch, USE_VISIBILITY); + + /* initialize shader constants for the binning memexport */ + OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4); + OUT_RING(ring, 0x0000000C); + + for (int i = 0; i < gmem->num_vsc_pipes; i++) { + struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; + + /* XXX we know how large this needs to be.. + * should do some sort of realloc + * it should be ctx->batch->num_vertices bytes large + * with this size it will break with more than 256k vertices.. + */ + if (!pipe->bo) { + pipe->bo = fd_bo_new(ctx->dev, 0x40000, + DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i); + } + + /* memory export address (export32): + * .x: (base_address >> 2) | 0x40000000 (?) + * .y: index (float) - set by shader + * .z: 0x4B00D000 (?) + * .w: 0x4B000000 (?) | max_index (?) + */ + OUT_RELOCW(ring, pipe->bo, 0, 0x40000000, -2); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x4B00D000); + OUT_RING(ring, 0x4B000000 | 0x40000); + } + + OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8); + OUT_RING(ring, 0x0000018C); + + for (int i = 0; i < gmem->num_vsc_pipes; i++) { + struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; + float off_x, off_y, mul_x, mul_y; + + /* const to tranform from [-1,1] to bin coordinates for this pipe + * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc + * 8 possible values on x/y axis, + * to clip at binning stage: only use center 6x6 + * TODO: set the z parameters too so that hw binning + * can clip primitives in Z too + */ + + mul_x = 1.0f / (float) (gmem->bin_w * 8); + mul_y = 1.0f / (float) (gmem->bin_h * 8); + off_x = -pipe->x * (1.0/8.0f) + 0.125f - mul_x * gmem->minx; + off_y = -pipe->y * (1.0/8.0f) + 0.125f - mul_y * gmem->miny; + + OUT_RING(ring, fui(off_x * (256.0f/255.0f))); + OUT_RING(ring, fui(off_y * (256.0f/255.0f))); + OUT_RING(ring, 0x3f000000); + OUT_RING(ring, fui(0.0f)); + + OUT_RING(ring, fui(mul_x * (256.0f/255.0f))); + OUT_RING(ring, fui(mul_y * (256.0f/255.0f))); + OUT_RING(ring, fui(0.0f)); + OUT_RING(ring, fui(0.0f)); + } + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0); + + ctx->emit_ib(ring, batch->binning); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); + OUT_RING(ring, 0x00000002); + } else { + patch_draws(batch, IGNORE_VISIBILITY); + } + + util_dynarray_resize(&batch->draw_patches, 0); + util_dynarray_resize(&batch->shader_patches, 0); } /* before mem2gmem */ @@ -389,6 +679,8 @@ static void fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) { + struct fd_context *ctx = batch->ctx; + struct fd2_context *fd2_ctx = fd2_context(ctx); struct fd_ringbuffer *ring = batch->gmem; struct pipe_framebuffer_state *pfb = &batch->framebuffer; enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); @@ -405,6 +697,38 @@ OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) | A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff)); + + /* write SCISSOR_BR to memory so fast clear path can restore from it */ + OUT_PKT3(ring, CP_MEM_WRITE, 2); + OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0); + OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) | + A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h)); + + /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */ + if (is_a20x(ctx->screen)) { + OUT_PKT3(ring, CP_SET_CONSTANT, 5); + OUT_RING(ring, 0x00000580); + OUT_RING(ring, fui(tile->xoff)); + OUT_RING(ring, fui(tile->yoff)); + OUT_RING(ring, fui(0.0f)); + OUT_RING(ring, fui(0.0f)); + } + + if (use_hw_binning(batch)) { + struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p]; + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN)); + OUT_RING(ring, tile->n); + + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX)); + OUT_RING(ring, tile->n); + + /* TODO only emit this when tile->p changes */ + OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1); + OUT_RELOC(ring, pipe->bo, 0, 0, 0); + } } void @@ -412,6 +736,7 @@ { struct fd_context *ctx = fd_context(pctx); + ctx->emit_sysmem_prep = fd2_emit_sysmem_prep; ctx->emit_tile_init = fd2_emit_tile_init; ctx->emit_tile_prep = fd2_emit_tile_prep; ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_perfcntr.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,813 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + * Rob Clark + */ + +#include "freedreno_perfcntr.h" +#include "freedreno_util.h" +#include "a2xx.xml.h" + +#define REG(_x) REG_A2XX_ ## _x + +#define COUNTER(_sel, _lo, _hi) { \ + .select_reg = REG(_sel), \ + .counter_reg_lo = REG(_lo), \ + .counter_reg_hi = REG(_hi), \ +} + +#define COUNTABLE(_selector, _query_type, _result_type) { \ + .name = #_selector, \ + .selector = _selector, \ + .query_type = PIPE_DRIVER_QUERY_TYPE_ ## _query_type, \ + .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_ ## _result_type, \ +} + +#define GROUP(_name, _counters, _countables) { \ + .name = _name, \ + .num_counters = ARRAY_SIZE(_counters), \ + .counters = _counters, \ + .num_countables = ARRAY_SIZE(_countables), \ + .countables = _countables, \ +} + +static const struct fd_perfcntr_countable pa_su_countables[] = { + COUNTABLE(PERF_PAPC_PASX_REQ, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_FIRST_VECTOR, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_SECOND_VECTOR, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_FIRST_DEAD, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_SECOND_DEAD, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_VTX_KILL_DISCARD, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_VTX_NAN_DISCARD, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_EVENT_FLAG, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PA_INPUT_END_OF_PACKET, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_VV_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_VTX_KILL_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_VTX_NAN_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CULL_TO_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_VV_CLIP_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_POINT_CLIP_CANDIDATE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_1, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_2, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_3, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_4, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_5, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_CNT_6, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_NEAR, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_FAR, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_LEFT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_RIGHT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_TOP, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPR_CLIP_PLANE_BOTTOM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_TOTALLY_VISIBLE_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_CLIP_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_CULL_TO_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_1, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_2, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_3, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_4, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_5, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_OUT_PRIM_CNT_6_7, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLSM_NON_TRIVIAL_CULL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_INPUT_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_INPUT_CLIP_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_INPUT_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_ZERO_AREA_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_BACK_FACE_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_FRONT_FACE_CULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_POLYMODE_FACE_CULL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_POLYMODE_BACK_CULL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_POLYMODE_FRONT_CULL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_POLYMODE_INVALID_FILL, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_CLIP_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_EVENT_FLAG, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_FIRST_PRIM_SLOT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_END_OF_PACKET, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FACE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_BACK, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUTPUT_POLYMODE_FRONT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FACE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_BACK, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_OUT_CLIP_POLYMODE_FRONT, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REQ_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REQ_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REQ_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_STARVED_SX, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_STALLED_POS_MEM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_PASX_REC_STALLED_CCGSM_IN, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CCGSM_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CCGSM_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CCGSM_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPRIM_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPRIM_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPRIM_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLPRIM_STARVED_CCGSM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_VERT_ENGH, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_HIGH_PRI_SEQ, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIPGA, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_AVAIL_VTE_CLIP, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPSM_WAIT_CLIP_OUTSM, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPGA_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPGA_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPGA_STARVED_VTE_CLIP, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIPGA_STALLED, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIP_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_CLIP_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_IDLE, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_BUSY, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_STARVED_CLIP, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_STALLED_SC, UINT64, AVERAGE), + COUNTABLE(PERF_PAPC_SU_FACENESS_CULL, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable pa_sc_countables[] = { + COUNTABLE(SC_SR_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_CW_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_QM_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_FW_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_EZ_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_IT_WINDOW_VALID, UINT64, AVERAGE), + COUNTABLE(SC_STARVED_BY_PA, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_RB_TILE, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_RB_SAMP, UINT64, AVERAGE), + COUNTABLE(SC_STARVED_BY_RB_EZ, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_SAMPLE_FF, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_SQ, UINT64, AVERAGE), + COUNTABLE(SC_STALLED_BY_SP, UINT64, AVERAGE), + COUNTABLE(SC_TOTAL_NO_PRIMS, UINT64, AVERAGE), + COUNTABLE(SC_NON_EMPTY_PRIMS, UINT64, AVERAGE), + COUNTABLE(SC_NO_TILES_PASSING_QM, UINT64, AVERAGE), + COUNTABLE(SC_NO_PIXELS_PRE_EZ, UINT64, AVERAGE), + COUNTABLE(SC_NO_PIXELS_POST_EZ, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable vgt_countables[] = { + COUNTABLE(VGT_SQ_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_SEND, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_STALLED, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_STARVED_BUSY, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_STARVED_IDLE, UINT64, AVERAGE), + COUNTABLE(VGT_SQ_STATIC, UINT64, AVERAGE), + COUNTABLE(VGT_PA_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_SEND, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_STALLED, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_STARVED_BUSY, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_STARVED_IDLE, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_V_STATIC, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_SEND, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_STALLED, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_STARVED_BUSY, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_STARVED_IDLE, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_P_STATIC, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_SEND, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_STALLED, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_STARVED_BUSY, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_STARVED_IDLE, UINT64, AVERAGE), + COUNTABLE(VGT_PA_CLIP_S_STATIC, UINT64, AVERAGE), + COUNTABLE(RBIU_FIFOS_EVENT_WINDOW_ACTIVE, UINT64, AVERAGE), + COUNTABLE(RBIU_IMMED_DATA_FIFO_STARVED, UINT64, AVERAGE), + COUNTABLE(RBIU_IMMED_DATA_FIFO_STALLED, UINT64, AVERAGE), + COUNTABLE(RBIU_DMA_REQUEST_FIFO_STARVED, UINT64, AVERAGE), + COUNTABLE(RBIU_DMA_REQUEST_FIFO_STALLED, UINT64, AVERAGE), + COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STARVED, UINT64, AVERAGE), + COUNTABLE(RBIU_DRAW_INITIATOR_FIFO_STALLED, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_NEAR_CULL, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_ZERO_CULL, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_FAR_CULL, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_BIN_CULL, UINT64, AVERAGE), + COUNTABLE(BIN_PRIM_FACE_CULL, UINT64, AVERAGE), + COUNTABLE(SPARE34, UINT64, AVERAGE), + COUNTABLE(SPARE35, UINT64, AVERAGE), + COUNTABLE(SPARE36, UINT64, AVERAGE), + COUNTABLE(SPARE37, UINT64, AVERAGE), + COUNTABLE(SPARE38, UINT64, AVERAGE), + COUNTABLE(SPARE39, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_VALID, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_READ, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_PRIM, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_EOP, UINT64, AVERAGE), + COUNTABLE(TE_SU_IN_NULL_PRIM, UINT64, AVERAGE), + COUNTABLE(TE_WK_IN_VALID, UINT64, AVERAGE), + COUNTABLE(TE_WK_IN_READ, UINT64, AVERAGE), + COUNTABLE(TE_OUT_PRIM_VALID, UINT64, AVERAGE), + COUNTABLE(TE_OUT_PRIM_READ, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable tcr_countables[] = { + COUNTABLE(DGMMPD_IPMUX0_STALL, UINT64, AVERAGE), + COUNTABLE(DGMMPD_IPMUX_ALL_STALL, UINT64, AVERAGE), + COUNTABLE(OPMUX0_L2_WRITES, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable tp0_countables[] = { + COUNTABLE(POINT_QUADS, UINT64, AVERAGE), + COUNTABLE(BILIN_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_QUADS, UINT64, AVERAGE), + COUNTABLE(MIP_QUADS, UINT64, AVERAGE), + COUNTABLE(VOL_QUADS, UINT64, AVERAGE), + COUNTABLE(MIP_VOL_QUADS, UINT64, AVERAGE), + COUNTABLE(MIP_ANISO_QUADS, UINT64, AVERAGE), + COUNTABLE(VOL_ANISO_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_2_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_4_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_6_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_8_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_10_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_12_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_14_1_QUADS, UINT64, AVERAGE), + COUNTABLE(ANISO_16_1_QUADS, UINT64, AVERAGE), + COUNTABLE(MIP_VOL_ANISO_QUADS, UINT64, AVERAGE), + COUNTABLE(ALIGN_2_QUADS, UINT64, AVERAGE), + COUNTABLE(ALIGN_4_QUADS, UINT64, AVERAGE), + COUNTABLE(PIX_0_QUAD, UINT64, AVERAGE), + COUNTABLE(PIX_1_QUAD, UINT64, AVERAGE), + COUNTABLE(PIX_2_QUAD, UINT64, AVERAGE), + COUNTABLE(PIX_3_QUAD, UINT64, AVERAGE), + COUNTABLE(PIX_4_QUAD, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD0, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD1, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD2, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD3, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD4, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD5, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD6, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD7, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD8, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD9, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD10, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD11, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD12, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD13, UINT64, AVERAGE), + COUNTABLE(TP_MIPMAP_LOD14, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable tcm_countables[] = { + COUNTABLE(QUAD0_RD_LAT_FIFO_EMPTY, UINT64, AVERAGE), + COUNTABLE(QUAD0_RD_LAT_FIFO_4TH_FULL, UINT64, AVERAGE), + COUNTABLE(QUAD0_RD_LAT_FIFO_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(QUAD0_RD_LAT_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(QUAD0_RD_LAT_FIFO_LT_4TH_FULL, UINT64, AVERAGE), + COUNTABLE(READ_STARVED_QUAD0, UINT64, AVERAGE), + COUNTABLE(READ_STARVED, UINT64, AVERAGE), + COUNTABLE(READ_STALLED_QUAD0, UINT64, AVERAGE), + COUNTABLE(READ_STALLED, UINT64, AVERAGE), + COUNTABLE(VALID_READ_QUAD0, UINT64, AVERAGE), + COUNTABLE(TC_TP_STARVED_QUAD0, UINT64, AVERAGE), + COUNTABLE(TC_TP_STARVED, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable tcf_countables[] = { + COUNTABLE(VALID_CYCLES, UINT64, AVERAGE), + COUNTABLE(SINGLE_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_PHASES, UINT64, AVERAGE), + COUNTABLE(MIP_PHASES, UINT64, AVERAGE), + COUNTABLE(VOL_PHASES, UINT64, AVERAGE), + COUNTABLE(MIP_VOL_PHASES, UINT64, AVERAGE), + COUNTABLE(MIP_ANISO_PHASES, UINT64, AVERAGE), + COUNTABLE(VOL_ANISO_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_2_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_4_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_6_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_8_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_10_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_12_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_14_1_PHASES, UINT64, AVERAGE), + COUNTABLE(ANISO_16_1_PHASES, UINT64, AVERAGE), + COUNTABLE(MIP_VOL_ANISO_PHASES, UINT64, AVERAGE), + COUNTABLE(ALIGN_2_PHASES, UINT64, AVERAGE), + COUNTABLE(ALIGN_4_PHASES, UINT64, AVERAGE), + COUNTABLE(TPC_BUSY, UINT64, AVERAGE), + COUNTABLE(TPC_STALLED, UINT64, AVERAGE), + COUNTABLE(TPC_STARVED, UINT64, AVERAGE), + COUNTABLE(TPC_WORKING, UINT64, AVERAGE), + COUNTABLE(TPC_WALKER_BUSY, UINT64, AVERAGE), + COUNTABLE(TPC_WALKER_STALLED, UINT64, AVERAGE), + COUNTABLE(TPC_WALKER_WORKING, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_BUSY, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_STALLED, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_STALLED_BY_BLEND, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_STALLED_BY_CACHE, UINT64, AVERAGE), + COUNTABLE(TPC_ALIGNER_WORKING, UINT64, AVERAGE), + COUNTABLE(TPC_BLEND_BUSY, UINT64, AVERAGE), + COUNTABLE(TPC_BLEND_SYNC, UINT64, AVERAGE), + COUNTABLE(TPC_BLEND_STARVED, UINT64, AVERAGE), + COUNTABLE(TPC_BLEND_WORKING, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x00, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x01, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x04, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x10, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x11, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x12, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x13, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x18, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x19, UINT64, AVERAGE), + COUNTABLE(OPCODE_0x1A, UINT64, AVERAGE), + COUNTABLE(OPCODE_OTHER, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_0_EMPTY, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_0_LT_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_0_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_0_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_TPC_EMPTY, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_TPC_LT_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_TPC_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(IN_FIFO_TPC_FULL, UINT64, AVERAGE), + COUNTABLE(TPC_TC_XFC, UINT64, AVERAGE), + COUNTABLE(TPC_TC_STATE, UINT64, AVERAGE), + COUNTABLE(TC_STALL, UINT64, AVERAGE), + COUNTABLE(QUAD0_TAPS, UINT64, AVERAGE), + COUNTABLE(QUADS, UINT64, AVERAGE), + COUNTABLE(TCA_SYNC_STALL, UINT64, AVERAGE), + COUNTABLE(TAG_STALL, UINT64, AVERAGE), + COUNTABLE(TCB_SYNC_STALL, UINT64, AVERAGE), + COUNTABLE(TCA_VALID, UINT64, AVERAGE), + COUNTABLE(PROBES_VALID, UINT64, AVERAGE), + COUNTABLE(MISS_STALL, UINT64, AVERAGE), + COUNTABLE(FETCH_FIFO_STALL, UINT64, AVERAGE), + COUNTABLE(TCO_STALL, UINT64, AVERAGE), + COUNTABLE(ANY_STALL, UINT64, AVERAGE), + COUNTABLE(TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET0_INVALIDATES, UINT64, AVERAGE), + COUNTABLE(SET1_INVALIDATES, UINT64, AVERAGE), + COUNTABLE(SET2_INVALIDATES, UINT64, AVERAGE), + COUNTABLE(SET3_INVALIDATES, UINT64, AVERAGE), + COUNTABLE(SET0_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET1_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET2_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET3_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET0_TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SET1_TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SET2_TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SET3_TAG_HITS, UINT64, AVERAGE), + COUNTABLE(SET0_SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET1_SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET2_SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET3_SUB_TAG_MISSES, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT1, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT2, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT3, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT4, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT5, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT6, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT7, UINT64, AVERAGE), + COUNTABLE(SET0_EVICT8, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT1, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT2, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT3, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT4, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT5, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT6, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT7, UINT64, AVERAGE), + COUNTABLE(SET1_EVICT8, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT1, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT2, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT3, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT4, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT5, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT6, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT7, UINT64, AVERAGE), + COUNTABLE(SET2_EVICT8, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT1, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT2, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT3, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT4, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT5, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT6, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT7, UINT64, AVERAGE), + COUNTABLE(SET3_EVICT8, UINT64, AVERAGE), + COUNTABLE(FF_EMPTY, UINT64, AVERAGE), + COUNTABLE(FF_LT_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(FF_HALF_FULL, UINT64, AVERAGE), + COUNTABLE(FF_FULL, UINT64, AVERAGE), + COUNTABLE(FF_XFC, UINT64, AVERAGE), + COUNTABLE(FF_STALLED, UINT64, AVERAGE), + COUNTABLE(FG_MASKS, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_MASKS, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_MASK_STALLED, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_NOT_DONE_STALL, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_FG_STALL, UINT64, AVERAGE), + COUNTABLE(FG_LEFT_SECTORS, UINT64, AVERAGE), + COUNTABLE(FG0_REQUESTS, UINT64, AVERAGE), + COUNTABLE(FG0_STALLED, UINT64, AVERAGE), + COUNTABLE(MEM_REQ512, UINT64, AVERAGE), + COUNTABLE(MEM_REQ_SENT, UINT64, AVERAGE), + COUNTABLE(MEM_LOCAL_READ_REQ, UINT64, AVERAGE), + COUNTABLE(TC0_MH_STALLED, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable sq_countables[] = { + COUNTABLE(SQ_PIXEL_VECTORS_SUB, UINT64, AVERAGE), + COUNTABLE(SQ_VERTEX_VECTORS_SUB, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_EXPORT_CYCLES, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_CST_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_TEX_CST_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_CST_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_TEX_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_INST_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_BOOLEAN_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_LOOPS_WRITTEN, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_SWAP_IN, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_SWAP_OUT, UINT64, AVERAGE), + COUNTABLE(SQ_VERTEX_SWAP_IN, UINT64, AVERAGE), + COUNTABLE(SQ_VERTEX_SWAP_OUT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_VTX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_TEX_VTX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_VC_VTX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_CF_VTX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_PIX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_TEX_PIX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_VC_PIX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_CF_PIX_INST_ISSUED, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU_NOPS, UINT64, AVERAGE), + COUNTABLE(SQ_PRED_SKIP, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_TEX_STALL_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_VC_STALL_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_USED_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_GPR_STALL_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_GPR_STALL_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_RS_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_RS_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_SX_PC_FULL, UINT64, AVERAGE), + COUNTABLE(SQ_SX_EXP_BUFF_FULL, UINT64, AVERAGE), + COUNTABLE(SQ_SX_POS_BUFF_FULL, UINT64, AVERAGE), + COUNTABLE(SQ_INTERP_QUADS, UINT64, AVERAGE), + COUNTABLE(SQ_INTERP_ACTIVE, UINT64, AVERAGE), + COUNTABLE(SQ_IN_PIXEL_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_IN_VTX_STALL, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_VECTOR2, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_VECTOR3, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_VECTOR4, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_VECTOR1, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_VECTOR23, UINT64, AVERAGE), + COUNTABLE(SQ_PIXEL_VECTOR4, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_USED_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_SX_MEM_EXP_FULL, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_VTX_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ACTIVE_PIX_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_VTX_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_VTX_QUAL_TP_DONE, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ACTIVE_PIX_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_PIX_QUAL_TP_DONE, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_EMPTY_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_EMPTY_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_VTX, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_VTX_POP_THREAD, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD0_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD1_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_ALU_STALL_SIMD2_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_PIX_POP_THREAD, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_TEX_STALL_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_SYNC_VC_STALL_PIX, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_USED_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_CONSTANTS_SENT_SP_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_VTX_DEALLOC_ACK, UINT64, AVERAGE), + COUNTABLE(SQ_PERFCOUNT_PIX_DEALLOC_ACK, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD0, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD1, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD2, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_FIFO_FULL_SIMD3, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_FIFO_FULL_SIMD3, UINT64, AVERAGE), + COUNTABLE(VC_PERF_STATIC, UINT64, AVERAGE), + COUNTABLE(VC_PERF_STALLED, UINT64, AVERAGE), + COUNTABLE(VC_PERF_STARVED, UINT64, AVERAGE), + COUNTABLE(VC_PERF_SEND, UINT64, AVERAGE), + COUNTABLE(VC_PERF_ACTUAL_STARVED, UINT64, AVERAGE), + COUNTABLE(PIXEL_THREAD_0_ACTIVE, UINT64, AVERAGE), + COUNTABLE(VERTEX_THREAD_0_ACTIVE, UINT64, AVERAGE), + COUNTABLE(PIXEL_THREAD_0_NUMBER, UINT64, AVERAGE), + COUNTABLE(VERTEX_THREAD_0_NUMBER, UINT64, AVERAGE), + COUNTABLE(VERTEX_EVENT_NUMBER, UINT64, AVERAGE), + COUNTABLE(PIXEL_EVENT_NUMBER, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_PUSH, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_EVENT, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_NEW_VTX, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_DEALLOC, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_PVECTOR, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_PVECTOR_X, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_EF_POP_PVECTOR_VNZ, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PB_DEALLOC, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PI_STATE_PPB_POP, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PI_RTR, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PI_READ_EN, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_PI_BUFF_SWAP, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SQ_FREE_BUFF, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SQ_DEC, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SC_VALID_CNTL_EVENT, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SC_VALID_IJ_XFER, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_SC_NEW_VECTOR_1_Q, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_QUAL_NEW_VECTOR, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_QUAL_EVENT, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_END_BUFFER, UINT64, AVERAGE), + COUNTABLE(PTRBUFF_FILL_QUAD, UINT64, AVERAGE), + COUNTABLE(VERTS_WRITTEN_SPI, UINT64, AVERAGE), + COUNTABLE(TP_FETCH_INSTR_EXEC, UINT64, AVERAGE), + COUNTABLE(TP_FETCH_INSTR_REQ, UINT64, AVERAGE), + COUNTABLE(TP_DATA_RETURN, UINT64, AVERAGE), + COUNTABLE(SPI_WRITE_CYCLES_SP, UINT64, AVERAGE), + COUNTABLE(SPI_WRITES_SP, UINT64, AVERAGE), + COUNTABLE(SP_ALU_INSTR_EXEC, UINT64, AVERAGE), + COUNTABLE(SP_CONST_ADDR_TO_SQ, UINT64, AVERAGE), + COUNTABLE(SP_PRED_KILLS_TO_SQ, UINT64, AVERAGE), + COUNTABLE(SP_EXPORT_CYCLES_TO_SX, UINT64, AVERAGE), + COUNTABLE(SP_EXPORTS_TO_SX, UINT64, AVERAGE), + COUNTABLE(SQ_CYCLES_ELAPSED, UINT64, AVERAGE), + COUNTABLE(SQ_TCFS_OPT_ALLOC_EXEC, UINT64, AVERAGE), + COUNTABLE(SQ_TCFS_NO_OPT_ALLOC, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_NO_OPT_ALLOC, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_NO_OPT_ALLOC, UINT64, AVERAGE), + COUNTABLE(SQ_TCFS_ARB_XFC_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_ARB_XFC_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_ARB_XFC_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_TCFS_CFS_UPDATE_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU0_CFS_UPDATE_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_ALU1_CFS_UPDATE_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_PUSH_THREAD_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_VTX_POP_THREAD_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_PUSH_THREAD_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_POP_THREAD_CNT, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_TOTAL, UINT64, AVERAGE), + COUNTABLE(SQ_PIX_KILLED, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable sx_countables[] = { + COUNTABLE(SX_EXPORT_VECTORS, UINT64, AVERAGE), + COUNTABLE(SX_DUMMY_QUADS, UINT64, AVERAGE), + COUNTABLE(SX_ALPHA_FAIL, UINT64, AVERAGE), + COUNTABLE(SX_RB_QUAD_BUSY, UINT64, AVERAGE), + COUNTABLE(SX_RB_COLOR_BUSY, UINT64, AVERAGE), + COUNTABLE(SX_RB_QUAD_STALL, UINT64, AVERAGE), + COUNTABLE(SX_RB_COLOR_STALL, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable rb_countables[] = { + COUNTABLE(RBPERF_CNTX_BUSY, UINT64, AVERAGE), + COUNTABLE(RBPERF_CNTX_BUSY_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_QUAD_STARVED, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_QUAD_STARVED_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ, UINT64, AVERAGE), + COUNTABLE(RBPERF_GA_GC_CH0_SYS_REQ_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ, UINT64, AVERAGE), + COUNTABLE(RBPERF_GA_GC_CH1_SYS_REQ_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_MH_STARVED, UINT64, AVERAGE), + COUNTABLE(RBPERF_MH_STARVED_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY, UINT64, AVERAGE), + COUNTABLE(RBPERF_AZ_BC_COLOR_BUSY_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_AZ_BC_Z_BUSY, UINT64, AVERAGE), + COUNTABLE(RBPERF_AZ_BC_Z_BUSY_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_TILE_RTR_N, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_TILE_RTR_N_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_SAMP_RTR_N_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_QUAD_RTR_N_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_COLOR_RTR_N_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_SAMP_LZ_BUSY_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_ZXP_STALL, UINT64, AVERAGE), + COUNTABLE(RBPERF_ZXP_STALL_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_EVENT_PENDING, UINT64, AVERAGE), + COUNTABLE(RBPERF_EVENT_PENDING_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_MH_VALID, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_MH_VALID_MAX, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_RB_QUAD_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_RB_COLOR_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_SC_RB_TILE_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_SC_RB_SAMPLE_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_RB_MEM_EXPORT, UINT64, AVERAGE), + COUNTABLE(RBPERF_SX_RB_QUAD_EVENT, UINT64, AVERAGE), + COUNTABLE(RBPERF_SC_RB_TILE_EVENT_FILTERED, UINT64, AVERAGE), + COUNTABLE(RBPERF_SC_RB_TILE_EVENT_ALL, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SC_EZ_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_SX_INDEX_SEND, UINT64, AVERAGE), + COUNTABLE(RBPERF_GMEM_INTFO_RD, UINT64, AVERAGE), + COUNTABLE(RBPERF_GMEM_INTF1_RD, UINT64, AVERAGE), + COUNTABLE(RBPERF_GMEM_INTFO_WR, UINT64, AVERAGE), + COUNTABLE(RBPERF_GMEM_INTF1_WR, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_CP_CONTEXT_DONE, UINT64, AVERAGE), + COUNTABLE(RBPERF_RB_CP_CACHE_FLUSH, UINT64, AVERAGE), + COUNTABLE(RBPERF_ZPASS_DONE, UINT64, AVERAGE), + COUNTABLE(RBPERF_ZCMD_VALID, UINT64, AVERAGE), + COUNTABLE(RBPERF_CCMD_VALID, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_GRANT, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_C0_GRANT, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_C1_GRANT, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_FULL_BE_WR, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_REQUEST_NO_GRANT, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_TIMEOUT_PULSE, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_LIN_TIMEOUT_PULSE, UINT64, AVERAGE), + COUNTABLE(RBPERF_ACCUM_CAM_HIT_FLUSHING, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter pa_su_counters[] = { + COUNTER(PA_SU_PERFCOUNTER0_SELECT, PA_SU_PERFCOUNTER0_LOW, PA_SU_PERFCOUNTER0_HI), + COUNTER(PA_SU_PERFCOUNTER1_SELECT, PA_SU_PERFCOUNTER1_LOW, PA_SU_PERFCOUNTER1_HI), + COUNTER(PA_SU_PERFCOUNTER2_SELECT, PA_SU_PERFCOUNTER2_LOW, PA_SU_PERFCOUNTER2_HI), + COUNTER(PA_SU_PERFCOUNTER3_SELECT, PA_SU_PERFCOUNTER3_LOW, PA_SU_PERFCOUNTER3_HI), +}; + +static const struct fd_perfcntr_counter pa_sc_counters[] = { + COUNTER(PA_SC_PERFCOUNTER0_SELECT, PA_SC_PERFCOUNTER0_LOW, PA_SC_PERFCOUNTER0_HI), +}; + +static const struct fd_perfcntr_counter vgt_counters[] = { + COUNTER(VGT_PERFCOUNTER0_SELECT, VGT_PERFCOUNTER0_LOW, VGT_PERFCOUNTER0_HI), + COUNTER(VGT_PERFCOUNTER1_SELECT, VGT_PERFCOUNTER1_LOW, VGT_PERFCOUNTER1_HI), + COUNTER(VGT_PERFCOUNTER2_SELECT, VGT_PERFCOUNTER2_LOW, VGT_PERFCOUNTER2_HI), + COUNTER(VGT_PERFCOUNTER3_SELECT, VGT_PERFCOUNTER3_LOW, VGT_PERFCOUNTER3_HI), +}; + +static const struct fd_perfcntr_counter tcr_counters[] = { + COUNTER(TCR_PERFCOUNTER0_SELECT, TCR_PERFCOUNTER0_LOW, TCR_PERFCOUNTER0_HI), + COUNTER(TCR_PERFCOUNTER1_SELECT, TCR_PERFCOUNTER1_LOW, TCR_PERFCOUNTER1_HI), +}; + +static const struct fd_perfcntr_counter tp0_counters[] = { + COUNTER(TP0_PERFCOUNTER0_SELECT, TP0_PERFCOUNTER0_LOW, TP0_PERFCOUNTER0_HI), + COUNTER(TP0_PERFCOUNTER1_SELECT, TP0_PERFCOUNTER1_LOW, TP0_PERFCOUNTER1_HI), +}; + +static const struct fd_perfcntr_counter tcm_counters[] = { + COUNTER(TCM_PERFCOUNTER0_SELECT, TCM_PERFCOUNTER0_LOW, TCM_PERFCOUNTER0_HI), + COUNTER(TCM_PERFCOUNTER1_SELECT, TCM_PERFCOUNTER1_LOW, TCM_PERFCOUNTER1_HI), +}; + +static const struct fd_perfcntr_counter tcf_counters[] = { + COUNTER(TCF_PERFCOUNTER0_SELECT, TCF_PERFCOUNTER0_LOW, TCF_PERFCOUNTER0_HI), + COUNTER(TCF_PERFCOUNTER1_SELECT, TCF_PERFCOUNTER1_LOW, TCF_PERFCOUNTER1_HI), + COUNTER(TCF_PERFCOUNTER2_SELECT, TCF_PERFCOUNTER2_LOW, TCF_PERFCOUNTER2_HI), + COUNTER(TCF_PERFCOUNTER3_SELECT, TCF_PERFCOUNTER3_LOW, TCF_PERFCOUNTER3_HI), + COUNTER(TCF_PERFCOUNTER4_SELECT, TCF_PERFCOUNTER4_LOW, TCF_PERFCOUNTER4_HI), + COUNTER(TCF_PERFCOUNTER5_SELECT, TCF_PERFCOUNTER5_LOW, TCF_PERFCOUNTER5_HI), + COUNTER(TCF_PERFCOUNTER6_SELECT, TCF_PERFCOUNTER6_LOW, TCF_PERFCOUNTER6_HI), + COUNTER(TCF_PERFCOUNTER7_SELECT, TCF_PERFCOUNTER7_LOW, TCF_PERFCOUNTER7_HI), + COUNTER(TCF_PERFCOUNTER8_SELECT, TCF_PERFCOUNTER8_LOW, TCF_PERFCOUNTER8_HI), + COUNTER(TCF_PERFCOUNTER9_SELECT, TCF_PERFCOUNTER9_LOW, TCF_PERFCOUNTER9_HI), + COUNTER(TCF_PERFCOUNTER10_SELECT, TCF_PERFCOUNTER10_LOW, TCF_PERFCOUNTER10_HI), + COUNTER(TCF_PERFCOUNTER11_SELECT, TCF_PERFCOUNTER11_LOW, TCF_PERFCOUNTER11_HI), +}; + +static const struct fd_perfcntr_counter sq_counters[] = { + COUNTER(SQ_PERFCOUNTER0_SELECT, SQ_PERFCOUNTER0_LOW, SQ_PERFCOUNTER0_HI), + COUNTER(SQ_PERFCOUNTER1_SELECT, SQ_PERFCOUNTER1_LOW, SQ_PERFCOUNTER1_HI), + COUNTER(SQ_PERFCOUNTER2_SELECT, SQ_PERFCOUNTER2_LOW, SQ_PERFCOUNTER2_HI), + COUNTER(SQ_PERFCOUNTER3_SELECT, SQ_PERFCOUNTER3_LOW, SQ_PERFCOUNTER3_HI), +}; + +static const struct fd_perfcntr_countable rbbm_countables[] = { + COUNTABLE(RBBM1_COUNT, UINT64, AVERAGE), + COUNTABLE(RBBM1_NRT_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_RB_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_SQ_CNTX0_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_SQ_CNTX17_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_VGT_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_VGT_NODMA_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_PA_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_SC_CNTX_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_TPC_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_TC_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_SX_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_CP_COHER_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_CP_NRT_BUSY, UINT64, AVERAGE), + COUNTABLE(RBBM1_GFX_IDLE_STALL, UINT64, AVERAGE), + COUNTABLE(RBBM1_INTERRUPT, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_countable cp_countables[] = { + COUNTABLE(ALWAYS_COUNT, UINT64, AVERAGE), + COUNTABLE(TRANS_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(TRANS_FIFO_AF, UINT64, AVERAGE), + COUNTABLE(RCIU_PFPTRANS_WAIT, UINT64, AVERAGE), + COUNTABLE(RCIU_NRTTRANS_WAIT, UINT64, AVERAGE), + COUNTABLE(CSF_NRT_READ_WAIT, UINT64, AVERAGE), + COUNTABLE(CSF_I1_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_I2_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_ST_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_RING_ROQ_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_I1_ROQ_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_I2_ROQ_FULL, UINT64, AVERAGE), + COUNTABLE(CSF_ST_ROQ_FULL, UINT64, AVERAGE), + COUNTABLE(MIU_TAG_MEM_FULL, UINT64, AVERAGE), + COUNTABLE(MIU_WRITECLEAN, UINT64, AVERAGE), + COUNTABLE(MIU_NRT_WRITE_STALLED, UINT64, AVERAGE), + COUNTABLE(MIU_NRT_READ_STALLED, UINT64, AVERAGE), + COUNTABLE(ME_WRITE_CONFIRM_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_VS_DEALLOC_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_PS_DEALLOC_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_REGS_VS_EVENT_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_REGS_PS_EVENT_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_REGS_CF_EVENT_FIFO_FULL, UINT64, AVERAGE), + COUNTABLE(ME_MICRO_RB_STARVED, UINT64, AVERAGE), + COUNTABLE(ME_MICRO_I1_STARVED, UINT64, AVERAGE), + COUNTABLE(ME_MICRO_I2_STARVED, UINT64, AVERAGE), + COUNTABLE(ME_MICRO_ST_STARVED, UINT64, AVERAGE), + COUNTABLE(RCIU_RBBM_DWORD_SENT, UINT64, AVERAGE), + COUNTABLE(ME_BUSY_CLOCKS, UINT64, AVERAGE), + COUNTABLE(ME_WAIT_CONTEXT_AVAIL, UINT64, AVERAGE), + COUNTABLE(PFP_TYPE0_PACKET, UINT64, AVERAGE), + COUNTABLE(PFP_TYPE3_PACKET, UINT64, AVERAGE), + COUNTABLE(CSF_RB_WPTR_NEQ_RPTR, UINT64, AVERAGE), + COUNTABLE(CSF_I1_SIZE_NEQ_ZERO, UINT64, AVERAGE), + COUNTABLE(CSF_I2_SIZE_NEQ_ZERO, UINT64, AVERAGE), + COUNTABLE(CSF_RBI1I2_FETCHING, UINT64, AVERAGE), +}; + +static const struct fd_perfcntr_counter sx_counters[] = { + COUNTER(SX_PERFCOUNTER0_SELECT, SX_PERFCOUNTER0_LOW, SX_PERFCOUNTER0_HI), +}; + +// We don't have the enums for MH perfcntrs +#if 0 +static const struct fd_perfcntr_counter mh_counters[] = { + COUNTER(MH_PERFCOUNTER0_SELECT, MH_PERFCOUNTER0_LOW, MH_PERFCOUNTER0_HI), + COUNTER(MH_PERFCOUNTER1_SELECT, MH_PERFCOUNTER1_LOW, MH_PERFCOUNTER1_HI), +}; +#endif + +static const struct fd_perfcntr_counter rbbm_counters[] = { + COUNTER(RBBM_PERFCOUNTER1_SELECT, RBBM_PERFCOUNTER1_LO, RBBM_PERFCOUNTER1_HI), +}; + +static const struct fd_perfcntr_counter cp_counters[] = { + COUNTER(CP_PERFCOUNTER_SELECT, CP_PERFCOUNTER_LO, CP_PERFCOUNTER_HI), +}; + +static const struct fd_perfcntr_counter rb_counters[] = { + COUNTER(RB_PERFCOUNTER0_SELECT, RB_PERFCOUNTER0_LOW, RB_PERFCOUNTER0_HI), +}; + +const struct fd_perfcntr_group a2xx_perfcntr_groups[] = { + GROUP("PA_SU", pa_su_counters, pa_su_countables), + GROUP("PA_SC", pa_sc_counters, pa_sc_countables), + GROUP("VGT", vgt_counters, vgt_countables), + GROUP("TCR", tcr_counters, tcr_countables), + GROUP("TP0", tp0_counters, tp0_countables), + GROUP("TCM", tcm_counters, tcm_countables), + GROUP("TCF", tcf_counters, tcf_countables), + GROUP("SQ", sq_counters, sq_countables), + GROUP("SX", sx_counters, sx_countables), +// GROUP("MH", mh_counters, mh_countables), + GROUP("RBBM", rbbm_counters, rbbm_countables), + GROUP("CP", cp_counters, cp_countables), + GROUP("RB", rb_counters, rb_countables), +}; + +const unsigned a2xx_num_perfcntr_groups = ARRAY_SIZE(a2xx_perfcntr_groups); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_program.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -22,6 +22,7 @@ * * Authors: * Rob Clark + * Jonathan Marek */ #include "pipe/p_state.h" @@ -34,18 +35,20 @@ #include "freedreno_program.h" +#include "ir2.h" #include "fd2_program.h" -#include "fd2_compiler.h" #include "fd2_texture.h" #include "fd2_util.h" +#include "instr-a2xx.h" static struct fd2_shader_stateobj * -create_shader(enum shader_t type) +create_shader(struct pipe_context *pctx, gl_shader_stage type) { struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj); if (!so) return NULL; so->type = type; + so->is_a20x = is_a20x(fd_context(pctx)->screen); return so; } @@ -54,89 +57,71 @@ { if (!so) return; - ir2_shader_destroy(so->ir); - free(so->tokens); - free(so->bin); + ralloc_free(so->nir); + for (int i = 0; i < ARRAY_SIZE(so->variant); i++) + free(so->variant[i].info.dwords); free(so); } -static struct fd2_shader_stateobj * -assemble(struct fd2_shader_stateobj *so) +static void +emit(struct fd_ringbuffer *ring, gl_shader_stage type, + struct ir2_shader_info *info, struct util_dynarray *patches) { - free(so->bin); - so->bin = ir2_shader_assemble(so->ir, &so->info); - if (!so->bin) - goto fail; + unsigned i; - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("disassemble: type=%d", so->type); - disasm_a2xx(so->bin, so->info.sizedwords, 0, so->type); - } + assert(info->sizedwords); - return so; + OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords); + OUT_RING(ring, type == MESA_SHADER_FRAGMENT); + OUT_RING(ring, info->sizedwords); -fail: - debug_error("assemble failed!"); - delete_shader(so); - return NULL; + if (patches) + util_dynarray_append(patches, uint32_t*, &ring->cur[info->mem_export_ptr]); + + for (i = 0; i < info->sizedwords; i++) + OUT_RING(ring, info->dwords[i]); } -static struct fd2_shader_stateobj * -compile(struct fd_program_stateobj *prog, struct fd2_shader_stateobj *so) +static int +ir2_glsl_type_size(const struct glsl_type *type) { - int ret; + return glsl_count_attribute_slots(type, false); +} - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("dump tgsi: type=%d", so->type); - tgsi_dump(so->tokens, 0); +static void * +fd2_fp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT); + if (!so) + return NULL; + + if (cso->type == PIPE_SHADER_IR_NIR) { + so->nir = cso->ir.nir; + NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size, + (nir_lower_io_options)0); + } else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + so->nir = ir2_tgsi_to_nir(cso->tokens); } - ret = fd2_compile_shader(prog, so); - if (ret) + if (ir2_optimize_nir(so->nir, true)) goto fail; - /* NOTE: we don't assemble yet because for VS we don't know the - * type information for vertex fetch yet.. so those need to be - * patched up later before assembling. - */ + so->first_immediate = so->nir->num_uniforms; - so->info.sizedwords = 0; + ir2_compile(so, 0, NULL); + ralloc_free(so->nir); + so->nir = NULL; return so; fail: - debug_error("compile failed!"); delete_shader(so); return NULL; } static void -emit(struct fd_ringbuffer *ring, struct fd2_shader_stateobj *so) -{ - unsigned i; - - if (so->info.sizedwords == 0) - assemble(so); - - OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords); - OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1); - OUT_RING(ring, so->info.sizedwords); - for (i = 0; i < so->info.sizedwords; i++) - OUT_RING(ring, so->bin[i]); -} - -static void * -fd2_fp_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) -{ - struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT); - if (!so) - return NULL; - so->tokens = tgsi_dup_tokens(cso->tokens); - return so; -} - -static void fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso) { struct fd2_shader_stateobj *so = hwcso; @@ -147,11 +132,32 @@ fd2_vp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX); + struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX); if (!so) return NULL; - so->tokens = tgsi_dup_tokens(cso->tokens); + + if (cso->type == PIPE_SHADER_IR_NIR) { + so->nir = cso->ir.nir; + NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size, + (nir_lower_io_options)0); + } else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + so->nir = ir2_tgsi_to_nir(cso->tokens); + } + + if (ir2_optimize_nir(so->nir, true)) + goto fail; + + so->first_immediate = so->nir->num_uniforms; + + /* compile binning variant now */ + ir2_compile(so, 0, NULL); + return so; + +fail: + delete_shader(so); + return NULL; } static void @@ -162,277 +168,146 @@ } static void -patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so, - struct fd_vertex_stateobj *vtx) +patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem, + instr_fetch_vtx_t *instr, uint16_t dst_swiz) { - unsigned i; - - assert(so->num_vfetch_instrs == vtx->num_elements); - - /* update vtx fetch instructions: */ - for (i = 0; i < so->num_vfetch_instrs; i++) { - struct ir2_instruction *instr = so->vfetch_instrs[i]; - struct pipe_vertex_element *elem = &vtx->pipe[i]; - struct pipe_vertex_buffer *vb = + struct pipe_vertex_buffer *vb = &ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index]; - enum pipe_format format = elem->src_format; - const struct util_format_description *desc = - util_format_description(format); - unsigned j; - - /* Find the first non-VOID channel. */ - for (j = 0; j < 4; j++) - if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID) - break; - - /* CI/CIS can probably be set in compiler instead: */ - instr->fetch.const_idx = 20 + (i / 3); - instr->fetch.const_idx_sel = i % 3; - - instr->fetch.fmt = fd2_pipe2surface(format); - instr->fetch.is_normalized = desc->channel[j].normalized; - instr->fetch.is_signed = - desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED; - instr->fetch.stride = vb->stride ? : 1; - instr->fetch.offset = elem->src_offset; - - for (j = 0; j < 4; j++) - instr->dst_reg.swizzle[j] = "xyzw01__"[desc->swizzle[j]]; - - assert(instr->fetch.fmt != ~0); - - DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, " - "stride=%d, offset=%d", - i, util_format_name(format), - instr->fetch.fmt, - instr->fetch.const_idx, - instr->fetch.const_idx_sel, - elem->instance_divisor, - instr->dst_reg.swizzle, - instr->fetch.stride, - instr->fetch.offset); + enum pipe_format format = elem->src_format; + const struct util_format_description *desc = + util_format_description(format); + unsigned j; + + /* Find the first non-VOID channel. */ + for (j = 0; j < 4; j++) + if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID) + break; + + instr->format = fd2_pipe2surface(format); + instr->num_format_all = !desc->channel[j].normalized; + instr->format_comp_all = desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED; + instr->stride = vb->stride; + instr->offset = elem->src_offset; + + unsigned swiz = 0; + for (int i = 0; i < 4; i++) { + unsigned s = dst_swiz >> i*3 & 7; + swiz |= (s >= 4 ? s : desc->swizzle[s]) << i*3; } - - /* trigger re-assemble: */ - so->info.sizedwords = 0; + instr->dst_swiz = swiz; } static void -patch_tex_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so, - struct fd_texture_stateobj *tex) +patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info, + struct fd_vertex_stateobj *vtx, struct fd_texture_stateobj *tex) { - unsigned i; + for (int i = 0; i < info->num_fetch_instrs; i++) { + struct ir2_fetch_info *fi = &info->fetch_info[i]; - /* update tex fetch instructions: */ - for (i = 0; i < so->num_tfetch_instrs; i++) { - struct ir2_instruction *instr = so->tfetch_instrs[i].instr; - unsigned samp_id = so->tfetch_instrs[i].samp_id; - unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id); - - if (const_idx != instr->fetch.const_idx) { - instr->fetch.const_idx = const_idx; - /* trigger re-assemble: */ - so->info.sizedwords = 0; + instr_fetch_t *instr = (instr_fetch_t*) &info->dwords[fi->offset]; + if (instr->opc == VTX_FETCH) { + unsigned idx = (instr->vtx.const_index - 20) * 3 + + instr->vtx.const_index_sel; + patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz); + continue; } - } -} -void -fd2_program_validate(struct fd_context *ctx) -{ - struct fd_program_stateobj *prog = &ctx->prog; - bool dirty_fp = !!(ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_PROG); - bool dirty_vp = !!(ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_PROG); - - /* if vertex or frag shader is dirty, we may need to recompile. Compile - * frag shader first, as that assigns the register slots for exports - * from the vertex shader. And therefore if frag shader has changed we - * need to recompile both vert and frag shader. - */ - if (dirty_fp) - compile(prog, prog->fp); - - if (dirty_fp || dirty_vp) - compile(prog, prog->vp); - - /* if necessary, fix up vertex fetch instructions: */ - if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG)) - patch_vtx_fetches(ctx, prog->vp, ctx->vtx.vtx); - - /* if necessary, fix up texture fetch instructions: */ - if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) { - patch_tex_fetches(ctx, prog->vp, &ctx->tex[PIPE_SHADER_VERTEX]); - patch_tex_fetches(ctx, prog->fp, &ctx->tex[PIPE_SHADER_FRAGMENT]); + assert(instr->opc == TEX_FETCH); + instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id); + instr->tex.src_swiz = fi->tex.src_swiz; + if (fd2_texture_swap_xy(tex, fi->tex.samp_id)) { + unsigned x = instr->tex.src_swiz; + instr->tex.src_swiz = (x & 0x30) | (x & 3) << 2 | (x >> 2 & 3); + } } } void -fd2_program_emit(struct fd_ringbuffer *ring, +fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd_program_stateobj *prog) { - struct ir2_shader_info *vsi = - &((struct fd2_shader_stateobj *)prog->vp)->info; - struct ir2_shader_info *fsi = - &((struct fd2_shader_stateobj *)prog->fp)->info; - uint8_t vs_gprs, fs_gprs, vs_export; - - emit(ring, prog->vp); - emit(ring, prog->fp); - - vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg; - fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg; - vs_export = MAX2(1, prog->num_exports) - 1; - - OUT_PKT3(ring, CP_SET_CONSTANT, 2); - OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL)); - OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) | - A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE | - A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE | - A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) | - A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) | - A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs)); -} - -/* Creates shader: - * EXEC ADDR(0x2) CNT(0x1) - * (S)FETCH: SAMPLE R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER) - * ALLOC PARAM/PIXEL SIZE(0x0) - * EXEC_END ADDR(0x3) CNT(0x1) - * ALU: MAXv export0 = R0, R0 ; gl_FragColor - * NOP - */ -static struct fd2_shader_stateobj * -create_blit_fp(void) -{ - struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT); - struct ir2_instruction *instr; - - if (!so) - return NULL; - - so->ir = ir2_shader_create(); - - instr = ir2_instr_create_tex_fetch(so->ir, 0); - ir2_dst_create(instr, 0, "xyzw", 0); - ir2_reg_create(instr, 0, "xyx", IR2_REG_INPUT); - instr->sync = true; - - instr = ir2_instr_create_alu_v(so->ir, MAXv); - ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT); - ir2_reg_create(instr, 0, NULL, 0); - ir2_reg_create(instr, 0, NULL, 0); - - return assemble(so); -} - -/* Creates shader: -* EXEC ADDR(0x3) CNT(0x2) -* FETCH: VERTEX R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1) -* FETCH: VERTEX R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0) -* ALLOC POSITION SIZE(0x0) -* EXEC ADDR(0x5) CNT(0x1) -* ALU: MAXv export62 = R2, R2 ; gl_Position -* ALLOC PARAM/PIXEL SIZE(0x0) -* EXEC_END ADDR(0x6) CNT(0x1) -* ALU: MAXv export0 = R1, R1 -* NOP - */ -static struct fd2_shader_stateobj * -create_blit_vp(void) -{ - struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX); - struct ir2_instruction *instr; - - if (!so) - return NULL; - - so->ir = ir2_shader_create(); - - instr = ir2_instr_create_vtx_fetch(so->ir, 26, 1, FMT_32_32_FLOAT, false, 8); - instr->fetch.is_normalized = true; - ir2_dst_create(instr, 1, "xy01", 0); - ir2_reg_create(instr, 0, "x", IR2_REG_INPUT); - - instr = ir2_instr_create_vtx_fetch(so->ir, 26, 0, FMT_32_32_32_FLOAT, false, 12); - instr->fetch.is_normalized = true; - ir2_dst_create(instr, 2, "xyz1", 0); - ir2_reg_create(instr, 0, "x", IR2_REG_INPUT); - - instr = ir2_instr_create_alu_v(so->ir, MAXv); - ir2_dst_create(instr, 62, NULL, IR2_REG_EXPORT); - ir2_reg_create(instr, 2, NULL, 0); - ir2_reg_create(instr, 2, NULL, 0); - - instr = ir2_instr_create_alu_v(so->ir, MAXv); - ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT); - ir2_reg_create(instr, 1, NULL, 0); - ir2_reg_create(instr, 1, NULL, 0); - - return assemble(so); -} - -/* Creates shader: - * ALLOC PARAM/PIXEL SIZE(0x0) - * EXEC_END ADDR(0x1) CNT(0x1) - * ALU: MAXv export0 = C0, C0 ; gl_FragColor - */ -static struct fd2_shader_stateobj * -create_solid_fp(void) -{ - struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT); - struct ir2_instruction *instr; - - if (!so) - return NULL; + struct fd2_shader_stateobj *fp = NULL, *vp; + struct ir2_shader_info *fpi, *vpi; + struct ir2_frag_linkage *f; + uint8_t vs_gprs, fs_gprs = 0, vs_export = 0; + enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR; + bool binning = (ctx->batch && ring == ctx->batch->binning); + unsigned variant = 0; + + vp = prog->vp; + + /* find variant matching the linked fragment shader */ + if (!binning) { + fp = prog->fp; + for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) { + /* if checked all variants, compile a new variant */ + if (!vp->variant[variant].info.sizedwords) { + ir2_compile(vp, variant, fp); + break; + } - so->ir = ir2_shader_create(); + /* check if fragment shader linkage matches */ + if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f, + sizeof(struct ir2_frag_linkage))) + break; + } + assert(variant < ARRAY_SIZE(vp->variant)); + } - instr = ir2_instr_create_alu_v(so->ir, MAXv); - ir2_dst_create(instr, 0, NULL, IR2_REG_EXPORT); - ir2_reg_create(instr, 0, NULL, IR2_REG_CONST); - ir2_reg_create(instr, 0, NULL, IR2_REG_CONST); - - return assemble(so); -} - -/* Creates shader: - * EXEC ADDR(0x3) CNT(0x1) - * (S)FETCH: VERTEX R1.xyz1 = R0.x FMT_32_32_32_FLOAT - * UNSIGNED STRIDE(12) CONST(26, 0) - * ALLOC POSITION SIZE(0x0) - * EXEC ADDR(0x4) CNT(0x1) - * ALU: MAXv export62 = R1, R1 ; gl_Position - * ALLOC PARAM/PIXEL SIZE(0x0) - * EXEC_END ADDR(0x5) CNT(0x0) - */ -static struct fd2_shader_stateobj * -create_solid_vp(void) -{ - struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX); - struct ir2_instruction *instr; + vpi = &vp->variant[variant].info; + fpi = &fp->variant[0].info; + f = &fp->variant[0].f; + + /* clear/gmem2mem/mem2gmem need to be changed to remove this condition */ + if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) { + patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]); + if (fp) + patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]); + } - if (!so) - return NULL; + emit(ring, MESA_SHADER_VERTEX, vpi, + binning ? &ctx->batch->shader_patches : NULL); - so->ir = ir2_shader_create(); + if (fp) { + emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL); + fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg; + vs_export = MAX2(1, f->inputs_count) - 1; + } - instr = ir2_instr_create_vtx_fetch(so->ir, 26, 0, FMT_32_32_32_FLOAT, false, 12); - ir2_dst_create(instr, 1, "xyz1", 0); - ir2_reg_create(instr, 0, "x", IR2_REG_INPUT); + vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg; - instr = ir2_instr_create_alu_v(so->ir, MAXv); - ir2_dst_create(instr, 62, NULL, IR2_REG_EXPORT); - ir2_reg_create(instr, 1, NULL, 0); - ir2_reg_create(instr, 1, NULL, 0); + if (vp->writes_psize && !binning) + mode = POSITION_2_VECTORS_SPRITE; + /* set register to use for param (fragcoord/pointcoord/frontfacing) */ + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC)); + OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) | + COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) | + /* we need SCREEN_XY for both fragcoord and frontfacing */ + A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY); - return assemble(so); + OUT_PKT3(ring, CP_SET_CONSTANT, 2); + OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL)); + OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) | + A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) | + A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE | + A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE | + A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) | + A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) | + A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) | + COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) | + COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX)); } void fd2_prog_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); + struct fd_program_stateobj *prog; + struct fd2_shader_stateobj *so; + struct ir2_shader_info *info; + instr_fetch_vtx_t *instr; pctx->create_fs_state = fd2_fp_state_create; pctx->delete_fs_state = fd2_fp_state_delete; @@ -442,8 +317,47 @@ fd_prog_init(pctx); - ctx->solid_prog.fp = create_solid_fp(); - ctx->solid_prog.vp = create_solid_vp(); - ctx->blit_prog[0].fp = create_blit_fp(); - ctx->blit_prog[0].vp = create_blit_vp(); + /* XXX maybe its possible to reuse patch_vtx_fetch somehow? */ + + prog = &ctx->solid_prog; + so = prog->vp; + ir2_compile(prog->vp, 1, prog->fp); + +#define IR2_FETCH_SWIZ_XY01 0xb08 +#define IR2_FETCH_SWIZ_XYZ1 0xa88 + + info = &so->variant[1].info; + + instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset]; + instr->const_index = 26; + instr->const_index_sel = 0; + instr->format = FMT_32_32_32_FLOAT; + instr->format_comp_all = false; + instr->stride = 12; + instr->num_format_all = true; + instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1; + + prog = &ctx->blit_prog[0]; + so = prog->vp; + ir2_compile(prog->vp, 1, prog->fp); + + info = &so->variant[1].info; + + instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset]; + instr->const_index = 26; + instr->const_index_sel = 1; + instr->format = FMT_32_32_FLOAT; + instr->format_comp_all = false; + instr->stride = 8; + instr->num_format_all = false; + instr->dst_swiz = IR2_FETCH_SWIZ_XY01; + + instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[1].offset]; + instr->const_index = 26; + instr->const_index_sel = 0; + instr->format = FMT_32_32_32_FLOAT; + instr->format_comp_all = false; + instr->stride = 12; + instr->num_format_all = false; + instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1; } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_program.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_program.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_program.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,48 +31,39 @@ #include "freedreno_context.h" -#include "ir-a2xx.h" +#include "ir2.h" #include "disasm.h" struct fd2_shader_stateobj { - enum shader_t type; + nir_shader *nir; + gl_shader_stage type; + bool is_a20x; - uint32_t *bin; - - struct tgsi_token *tokens; - - /* note that we defer compiling shader until we know both vs and ps.. - * and if one changes, we potentially need to recompile in order to - * get varying linkages correct: - */ - struct ir2_shader_info info; - struct ir2_shader *ir; - - /* for vertex shaders, the fetch instructions which need to be - * patched up before assembly: - */ - unsigned num_vfetch_instrs; - struct ir2_instruction *vfetch_instrs[64]; - - /* for all shaders, any tex fetch instructions which need to be - * patched before assembly: + /* note: using same set of immediates for all variants + * it doesn't matter, other than the slightly larger command stream */ - unsigned num_tfetch_instrs; - struct { - unsigned samp_id; - struct ir2_instruction *instr; - } tfetch_instrs[64]; - unsigned first_immediate; /* const reg # of first immediate */ unsigned num_immediates; struct { uint32_t val[4]; + unsigned ncomp; } immediates[64]; + + bool writes_psize; + bool need_param; + bool has_kill; + + /* note: + * fragment shader only has one variant + * first vertex shader variant is always binning shader + * we should use a dynamic array but in normal case there is + * only 2 variants (and 3 sometimes with GALLIUM_HUD) + */ + struct ir2_shader_variant variant[8]; }; -void fd2_program_emit(struct fd_ringbuffer *ring, +void fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd_program_stateobj *prog); -void fd2_program_validate(struct fd_context *ctx); void fd2_prog_init(struct pipe_context *pctx); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_query.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_query.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_query.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_query.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + * Rob Clark + */ + +/* NOTE: perfcntrs are 48-bits but we only have 32-bit accumulate (?) + * so we work with 32-bits only. we accumulate start/stop separately, + * which differs from a5xx but works with only accumulate (no add/neg) + */ + +#include "freedreno_query_acc.h" +#include "freedreno_resource.h" + +#include "fd2_context.h" +#include "fd2_query.h" + +struct PACKED fd2_query_sample { + uint32_t start; + uint32_t stop; +}; + +/* offset of a single field of an array of fd2_query_sample: */ +#define query_sample_idx(aq, idx, field) \ + fd_resource((aq)->prsc)->bo, \ + (idx * sizeof(struct fd2_query_sample)) + \ + offsetof(struct fd2_query_sample, field), \ + 0, 0 + +/* offset of a single field of fd2_query_sample: */ +#define query_sample(aq, field) \ + query_sample_idx(aq, 0, field) + +/* + * Performance Counter (batch) queries: + * + * Only one of these is active at a time, per design of the gallium + * batch_query API design. On perfcntr query tracks N query_types, + * each of which has a 'fd_batch_query_entry' that maps it back to + * the associated group and counter. + */ + +struct fd_batch_query_entry { + uint8_t gid; /* group-id */ + uint8_t cid; /* countable-id within the group */ +}; + +struct fd_batch_query_data { + struct fd_screen *screen; + unsigned num_query_entries; + struct fd_batch_query_entry query_entries[]; +}; + +static void +perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_batch_query_data *data = aq->query_data; + struct fd_screen *screen = data->screen; + struct fd_ringbuffer *ring = batch->draw; + + unsigned counters_per_group[screen->num_perfcntr_groups]; + memset(counters_per_group, 0, sizeof(counters_per_group)); + + fd_wfi(batch, ring); + + /* configure performance counters for the requested queries: */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + struct fd_batch_query_entry *entry = &data->query_entries[i]; + const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; + unsigned counter_idx = counters_per_group[entry->gid]++; + + debug_assert(counter_idx < g->num_counters); + + OUT_PKT0(ring, g->counters[counter_idx].select_reg, 1); + OUT_RING(ring, g->countables[entry->cid].selector); + } + + memset(counters_per_group, 0, sizeof(counters_per_group)); + + /* and snapshot the start values */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + struct fd_batch_query_entry *entry = &data->query_entries[i]; + const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; + unsigned counter_idx = counters_per_group[entry->gid]++; + const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; + + OUT_PKT3(ring, CP_REG_TO_MEM, 2); + OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE); + OUT_RELOCW(ring, query_sample_idx(aq, i, start)); + } +} + +static void +perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) +{ + struct fd_batch_query_data *data = aq->query_data; + struct fd_screen *screen = data->screen; + struct fd_ringbuffer *ring = batch->draw; + + unsigned counters_per_group[screen->num_perfcntr_groups]; + memset(counters_per_group, 0, sizeof(counters_per_group)); + + fd_wfi(batch, ring); + + /* TODO do we need to bother to turn anything off? */ + + /* snapshot the end values: */ + for (unsigned i = 0; i < data->num_query_entries; i++) { + struct fd_batch_query_entry *entry = &data->query_entries[i]; + const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid]; + unsigned counter_idx = counters_per_group[entry->gid]++; + const struct fd_perfcntr_counter *counter = &g->counters[counter_idx]; + + OUT_PKT3(ring, CP_REG_TO_MEM, 2); + OUT_RING(ring, counter->counter_reg_lo | CP_MEM_TO_REG_0_ACCUMULATE); + OUT_RELOCW(ring, query_sample_idx(aq, i, stop)); + } +} + +static void +perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf, + union pipe_query_result *result) +{ + struct fd_batch_query_data *data = aq->query_data; + struct fd2_query_sample *sp = buf; + + for (unsigned i = 0; i < data->num_query_entries; i++) + result->batch[i].u64 = sp[i].stop - sp[i].start; +} + +static const struct fd_acc_sample_provider perfcntr = { + .query_type = FD_QUERY_FIRST_PERFCNTR, + .active = FD_STAGE_DRAW | FD_STAGE_CLEAR, + .resume = perfcntr_resume, + .pause = perfcntr_pause, + .result = perfcntr_accumulate_result, +}; + +static struct pipe_query * +fd2_create_batch_query(struct pipe_context *pctx, + unsigned num_queries, unsigned *query_types) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd_screen *screen = ctx->screen; + struct fd_query *q; + struct fd_acc_query *aq; + struct fd_batch_query_data *data; + + data = CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data, + num_queries * sizeof(data->query_entries[0])); + + data->screen = screen; + data->num_query_entries = num_queries; + + /* validate the requested query_types and ensure we don't try + * to request more query_types of a given group than we have + * counters: + */ + unsigned counters_per_group[screen->num_perfcntr_groups]; + memset(counters_per_group, 0, sizeof(counters_per_group)); + + for (unsigned i = 0; i < num_queries; i++) { + unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR; + + /* verify valid query_type, ie. is it actually a perfcntr? */ + if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) || + (idx >= screen->num_perfcntr_queries)) { + debug_printf("invalid batch query query_type: %u\n", query_types[i]); + goto error; + } + + struct fd_batch_query_entry *entry = &data->query_entries[i]; + struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx]; + + entry->gid = pq->group_id; + + /* the perfcntr_queries[] table flattens all the countables + * for each group in series, ie: + * + * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ... + * + * So to find the countable index just step back through the + * table to find the first entry with the same group-id. + */ + while (pq > screen->perfcntr_queries) { + pq--; + if (pq->group_id == entry->gid) + entry->cid++; + } + + if (counters_per_group[entry->gid] >= + screen->perfcntr_groups[entry->gid].num_counters) { + debug_printf("too many counters for group %u\n", entry->gid); + goto error; + } + + counters_per_group[entry->gid]++; + } + + q = fd_acc_create_query2(ctx, 0, &perfcntr); + aq = fd_acc_query(q); + + /* sample buffer size is based on # of queries: */ + aq->size = num_queries * sizeof(struct fd2_query_sample); + aq->query_data = data; + + return (struct pipe_query *)q; + +error: + free(data); + return NULL; +} + +void +fd2_query_context_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->create_query = fd_acc_create_query; + ctx->query_set_stage = fd_acc_query_set_stage; + + pctx->create_batch_query = fd2_create_batch_query; +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_query.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_query.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_query.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_query.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2019 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + * Rob Clark + */ + +#ifndef FD2_QUERY_H_ +#define FD2_QUERY_H_ + +#include "pipe/p_context.h" + +void fd2_query_context_init(struct pipe_context *pctx); + +#endif /* FD2_QUERY_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c 2019-03-31 23:16:37.000000000 +0000 @@ -47,7 +47,7 @@ if (cso->point_size_per_vertex) { psize_min = util_get_min_point_size(cso); - psize_max = 8192; + psize_max = 8192.0 - 0.0625; } else { /* Force the point size to be as if the vertex output was disabled. */ psize_min = cso->point_size; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_resource.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_resource.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_resource.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_resource.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include "fd2_resource.h" + +uint32_t +fd2_setup_slices(struct fd_resource *rsc) +{ + struct pipe_resource *prsc = &rsc->base; + enum pipe_format format = rsc->base.format; + uint32_t level, size = 0; + uint32_t width = prsc->width0; + uint32_t height = prsc->height0; + uint32_t depth = prsc->depth0; + + for (level = 0; level <= prsc->last_level; level++) { + struct fd_resource_slice *slice = fd_resource_slice(rsc, level); + uint32_t blocks; + + /* 32 * 32 block alignment */ + switch (prsc->target) { + default: assert(0); + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_CUBE: + height = align(height, 32 * util_format_get_blockheight(format)); + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + width = align(width, 32 * util_format_get_blockwidth(format)); + case PIPE_BUFFER: + break; + } + + /* mipmaps have power of two sizes in memory */ + if (level) { + width = util_next_power_of_two(width); + height = util_next_power_of_two(height); + } + + slice->pitch = width; + slice->offset = size; + + blocks = util_format_get_nblocks(format, width, height); + + /* 4k aligned size */ + slice->size0 = align(blocks * rsc->cpp, 4096); + + size += slice->size0 * depth * prsc->array_size; + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + return size; +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_resource.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_resource.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_resource.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_resource.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#ifndef FD2_RESOURCE_H_ +#define FD2_RESOURCE_H_ + +#include "freedreno_resource.h" + +uint32_t fd2_setup_slices(struct fd_resource *rsc); + +#endif /* FD2_RESOURCE_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_screen.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -30,6 +30,7 @@ #include "fd2_screen.h" #include "fd2_context.h" #include "fd2_util.h" +#include "fd2_resource.h" static boolean fd2_screen_is_format_supported(struct pipe_screen *pscreen, @@ -104,10 +105,21 @@ return retval == usage; } +extern const struct fd_perfcntr_group a2xx_perfcntr_groups[]; +extern const unsigned a2xx_num_perfcntr_groups; + void fd2_screen_init(struct pipe_screen *pscreen) { - fd_screen(pscreen)->max_rts = 1; + struct fd_screen *screen = fd_screen(pscreen); + + screen->max_rts = 1; pscreen->context_create = fd2_context_create; pscreen->is_format_supported = fd2_screen_is_format_supported; + screen->setup_slices = fd2_setup_slices; + + if (fd_mesa_debug & FD_DBG_PERFC) { + screen->perfcntr_groups = a2xx_perfcntr_groups; + screen->num_perfcntr_groups = a2xx_num_perfcntr_groups; + } } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_texture.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_texture.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_texture.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -72,6 +72,22 @@ } } +static enum sq_tex_filter +mip_filter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: + return SQ_TEX_FILTER_BASEMAP; + case PIPE_TEX_MIPFILTER_NEAREST: + return SQ_TEX_FILTER_POINT; + case PIPE_TEX_MIPFILTER_LINEAR: + return SQ_TEX_FILTER_BILINEAR; + default: + DBG("invalid filter: %u", filter); + return 0; + } +} + static void * fd2_sampler_state_create(struct pipe_context *pctx, const struct pipe_sampler_state *cso) @@ -83,6 +99,11 @@ so->base = *cso; + /* TODO + * cso->max_anisotropy + * cso->normalized_coords (dealt with by shader for rect textures?) + */ + /* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */ so->tex0 = A2XX_SQ_TEX_0_CLAMP_X(tex_clamp(cso->wrap_s)) | @@ -91,10 +112,12 @@ so->tex3 = A2XX_SQ_TEX_3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) | - A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)); + A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter)) | + A2XX_SQ_TEX_3_MIP_FILTER(mip_filter(cso->min_mip_filter)); - so->tex4 = 0x00000000; /* ??? */ - so->tex5 = 0x00000200; /* ??? */ + so->tex4 = 0; + if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) + so->tex4 = A2XX_SQ_TEX_4_LOD_BIAS(cso->lod_bias); return so; } @@ -121,6 +144,26 @@ fd_sampler_states_bind(pctx, shader, start, nr, hwcso); } +static enum sq_tex_dimension +tex_dimension(unsigned target) +{ + switch (target) { + default: + assert(0); + case PIPE_TEXTURE_1D: + assert(0); /* TODO */ + return SQ_TEX_DIMENSION_1D; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + return SQ_TEX_DIMENSION_2D; + case PIPE_TEXTURE_3D: + assert(0); /* TODO */ + return SQ_TEX_DIMENSION_3D; + case PIPE_TEXTURE_CUBE: + return SQ_TEX_DIMENSION_CUBE; + } +} + static struct pipe_sampler_view * fd2_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) @@ -137,15 +180,22 @@ so->base.reference.count = 1; so->base.context = pctx; - so->fmt = fd2_pipe2surface(cso->format); - so->tex0 = A2XX_SQ_TEX_0_PITCH(rsc->slices[0].pitch); + so->tex1 = + A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(cso->format)) | + A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL); so->tex2 = A2XX_SQ_TEX_2_HEIGHT(prsc->height0 - 1) | A2XX_SQ_TEX_2_WIDTH(prsc->width0 - 1); so->tex3 = fd2_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); + so->tex4 = + A2XX_SQ_TEX_4_MIP_MIN_LEVEL(fd_sampler_first_level(cso)) | + A2XX_SQ_TEX_4_MIP_MAX_LEVEL(fd_sampler_last_level(cso)); + + so->tex5 = A2XX_SQ_TEX_5_DIMENSION(tex_dimension(prsc->target)); + return &so->base; } @@ -188,6 +238,13 @@ return samp_id + ctx->tex[PIPE_SHADER_FRAGMENT].num_samplers; } +/* for reasons unknown, it appears ETC1 cubemap needs swapped xy coordinates */ +bool fd2_texture_swap_xy(struct fd_texture_stateobj *tex, unsigned samp_id) +{ + return tex->textures[samp_id]->format == PIPE_FORMAT_ETC1_RGB8 && + tex->textures[samp_id]->texture->target == PIPE_TEXTURE_CUBE; +} + void fd2_texture_init(struct pipe_context *pctx) { diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_texture.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_texture.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_texture.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_texture.h 2019-03-31 23:16:37.000000000 +0000 @@ -37,7 +37,7 @@ struct fd2_sampler_stateobj { struct pipe_sampler_state base; - uint32_t tex0, tex3, tex4, tex5; + uint32_t tex0, tex3, tex4; }; static inline struct fd2_sampler_stateobj * @@ -48,8 +48,7 @@ struct fd2_pipe_sampler_view { struct pipe_sampler_view base; - enum a2xx_sq_surfaceformat fmt; - uint32_t tex0, tex2, tex3; + uint32_t tex0, tex1, tex2, tex3, tex4, tex5; }; static inline struct fd2_pipe_sampler_view * @@ -61,6 +60,8 @@ unsigned fd2_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex, unsigned samp_id); +bool fd2_texture_swap_xy(struct fd_texture_stateobj *tex, unsigned samp_id); + void fd2_texture_init(struct pipe_context *pctx); #endif /* FD2_TEXTURE_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_zsa.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_zsa.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/fd2_zsa.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/fd2_zsa.c 2019-03-31 23:16:37.000000000 +0000 @@ -49,7 +49,8 @@ A2XX_RB_DEPTHCONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */ if (cso->depth.enabled) - so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_ENABLE; + so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_ENABLE | + COND(!cso->alpha.enabled, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE); if (cso->depth.writemask) so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h 2019-03-31 23:16:37.000000000 +0000 @@ -87,6 +87,7 @@ SIN = 48, COS = 49, RETAIN_PREV = 50, + SCALAR_NONE = 63, } instr_scalar_opc_t; typedef enum { @@ -120,6 +121,7 @@ KILLNEv = 27, DSTv = 28, MOVAv = 29, + VECTOR_NONE = 31, } instr_vector_opc_t; typedef struct PACKED { @@ -161,9 +163,9 @@ }; /* constants have full 8-bit index */ struct { - uint8_t src3_reg_const : 8; - uint8_t src2_reg_const : 8; - uint8_t src1_reg_const : 8; + uint8_t src3_reg_byte : 8; + uint8_t src2_reg_byte : 8; + uint8_t src1_reg_byte : 8; }; }; instr_vector_opc_t vector_opc : 5; @@ -389,10 +391,17 @@ instr_fetch_opc_t opc : 5; uint32_t dummy0 : 27; /* dword1: */ - uint32_t dummy1 : 32; + uint32_t dummy1 : 31; + uint8_t pred_select : 1; /* dword2: */ - uint32_t dummy2 : 32; + uint32_t dummy2 : 31; + uint8_t pred_condition : 1; }; } instr_fetch_t; +typedef union PACKED { + instr_alu_t alu; + instr_fetch_t fetch; +} instr_t; + #endif /* INSTR_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_assemble.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_assemble.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_assemble.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_assemble.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,548 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include "ir2_private.h" + +static unsigned +src_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp) +{ + struct ir2_reg_component *comps; + unsigned swiz = 0; + + switch (src->type) { + case IR2_SRC_SSA: + case IR2_SRC_REG: + break; + default: + return src->swizzle; + } + /* we need to take into account where the components were allocated */ + comps = get_reg_src(ctx, src)->comp; + for (int i = 0; i < ncomp; i++) { + swiz |= swiz_set(comps[swiz_get(src->swizzle, i)].c, i); + } + return swiz; +} + +/* alu instr need to take into how the output components are allocated */ + +/* scalar doesn't need to take into account dest swizzle */ + +static unsigned +alu_swizzle_scalar(struct ir2_context *ctx, struct ir2_src *reg) +{ + /* hardware seems to take from W, but swizzle everywhere just in case */ + return swiz_merge(src_swizzle(ctx, reg, 1), IR2_SWIZZLE_XXXX); +} + +static unsigned +alu_swizzle(struct ir2_context *ctx, struct ir2_instr *instr, struct ir2_src *src) +{ + struct ir2_reg_component *comp = get_reg(instr)->comp; + unsigned swiz0 = src_swizzle(ctx, src, src_ncomp(instr)); + unsigned swiz = 0; + + /* non per component special cases */ + switch (instr->alu.vector_opc) { + case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv: + return alu_swizzle_scalar(ctx, src); + case DOT2ADDv: + case DOT3v: + case DOT4v: + case CUBEv: + return swiz0; + default: + break; + } + + for (int i = 0, j = 0; i < dst_ncomp(instr); j++) { + if (instr->alu.write_mask & 1 << j) { + if (comp[j].c != 7) + swiz |= swiz_set(i, comp[j].c); + i++; + } + } + return swiz_merge(swiz0, swiz); +} + +static unsigned +alu_swizzle_scalar2(struct ir2_context *ctx, struct ir2_src *src, unsigned s1) +{ + /* hardware seems to take from ZW, but swizzle everywhere (ABAB) */ + unsigned s0 = swiz_get(src_swizzle(ctx, src, 1), 0); + return swiz_merge(swiz_set(s0, 0) | swiz_set(s1, 1), IR2_SWIZZLE_XYXY); +} + +/* write_mask needs to be transformed by allocation information */ + +static unsigned +alu_write_mask(struct ir2_context *ctx, struct ir2_instr *instr) +{ + struct ir2_reg_component *comp = get_reg(instr)->comp; + unsigned write_mask = 0; + + for (int i = 0; i < 4; i++) { + if (instr->alu.write_mask & 1 << i) + write_mask |= 1 << comp[i].c; + } + + return write_mask; +} + +/* fetch instructions can swizzle dest, but src swizzle needs conversion */ + +static unsigned +fetch_swizzle(struct ir2_context *ctx, struct ir2_src *src, unsigned ncomp) +{ + unsigned alu_swiz = src_swizzle(ctx, src, ncomp); + unsigned swiz = 0; + for (int i = 0; i < ncomp; i++) + swiz |= swiz_get(alu_swiz, i) << i * 2; + return swiz; +} + +static unsigned +fetch_dst_swiz(struct ir2_context *ctx, struct ir2_instr *instr) +{ + struct ir2_reg_component *comp = get_reg(instr)->comp; + unsigned dst_swiz = 0xfff; + for (int i = 0; i < dst_ncomp(instr); i++) { + dst_swiz &= ~(7 << comp[i].c * 3); + dst_swiz |= i << comp[i].c * 3; + } + return dst_swiz; +} + +/* register / export # for instr */ +static unsigned +dst_to_reg(struct ir2_context *ctx, struct ir2_instr *instr) +{ + if (is_export(instr)) + return instr->alu.export; + + return get_reg(instr)->idx; +} + +/* register # for src */ +static unsigned src_to_reg(struct ir2_context *ctx, struct ir2_src *src) +{ + return get_reg_src(ctx, src)->idx; +} + +static unsigned src_reg_byte(struct ir2_context *ctx, struct ir2_src *src) +{ + if (src->type == IR2_SRC_CONST) { + assert(!src->abs); /* no abs bit for const */ + return src->num; + } + return src_to_reg(ctx, src) | (src->abs ? 0x80 : 0); +} + +/* produce the 12 byte binary instruction for a given sched_instr */ +static void +fill_instr(struct ir2_context *ctx, struct ir2_sched_instr *sched, + instr_t *bc, bool * is_fetch) +{ + struct ir2_instr *instr = sched->instr, *instr_s, *instr_v; + + *bc = (instr_t) {}; + + if (instr && instr->type == IR2_FETCH) { + *is_fetch = true; + + bc->fetch.opc = instr->fetch.opc; + bc->fetch.pred_select = !!instr->pred; + bc->fetch.pred_condition = instr->pred & 1; + + struct ir2_src *src = instr->src; + + if (instr->fetch.opc == VTX_FETCH) { + instr_fetch_vtx_t *vtx = &bc->fetch.vtx; + + assert(instr->fetch.vtx.const_idx <= 0x1f); + assert(instr->fetch.vtx.const_idx_sel <= 0x3); + + vtx->src_reg = src_to_reg(ctx, src); + vtx->src_swiz = fetch_swizzle(ctx, src, 1); + vtx->dst_reg = dst_to_reg(ctx, instr); + vtx->dst_swiz = fetch_dst_swiz(ctx, instr); + + vtx->must_be_one = 1; + vtx->const_index = instr->fetch.vtx.const_idx; + vtx->const_index_sel = instr->fetch.vtx.const_idx_sel; + + /* other fields will be patched */ + + /* XXX seems like every FETCH but the first has + * this bit set: + */ + vtx->reserved3 = instr->idx ? 0x1 : 0x0; + vtx->reserved0 = instr->idx ? 0x2 : 0x3; + } else if (instr->fetch.opc == TEX_FETCH) { + instr_fetch_tex_t *tex = &bc->fetch.tex; + + tex->src_reg = src_to_reg(ctx, src); + tex->src_swiz = fetch_swizzle(ctx, src, 3); + tex->dst_reg = dst_to_reg(ctx, instr); + tex->dst_swiz = fetch_dst_swiz(ctx, instr); + /* tex->const_idx = patch_fetches */ + tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; + tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; + tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; + tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->use_comp_lod = ctx->so->type == MESA_SHADER_FRAGMENT; + tex->use_reg_lod = instr->src_count == 2; + tex->sample_location = SAMPLE_CENTER; + tex->tx_coord_denorm = instr->fetch.tex.is_rect; + } else if (instr->fetch.opc == TEX_SET_TEX_LOD) { + instr_fetch_tex_t *tex = &bc->fetch.tex; + + tex->src_reg = src_to_reg(ctx, src); + tex->src_swiz = fetch_swizzle(ctx, src, 1); + tex->dst_reg = 0; + tex->dst_swiz = 0xfff; + + tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; + tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; + tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; + tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; + tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; + tex->use_comp_lod = 1; + tex->use_reg_lod = 0; + tex->sample_location = SAMPLE_CENTER; + } else { + assert(0); + } + return; + } + + instr_v = sched->instr; + instr_s = sched->instr_s; + + if (instr_v) { + struct ir2_src src1, src2, *src3; + + src1 = instr_v->src[0]; + src2 = instr_v->src[instr_v->src_count > 1]; + src3 = instr_v->src_count == 3 ? &instr_v->src[2] : NULL; + + bc->alu.vector_opc = instr_v->alu.vector_opc; + bc->alu.vector_write_mask = alu_write_mask(ctx, instr_v); + bc->alu.vector_dest = dst_to_reg(ctx, instr_v); + bc->alu.vector_clamp = instr_v->alu.saturate; + bc->alu.export_data = instr_v->alu.export >= 0; + + /* single operand SETEv, use 0.0f as src2 */ + if (instr_v->src_count == 1 && + (bc->alu.vector_opc == SETEv || + bc->alu.vector_opc == SETNEv || + bc->alu.vector_opc == SETGTv || + bc->alu.vector_opc == SETGTEv)) + src2 = ir2_zero(ctx); + + /* export32 instr for a20x hw binning has this bit set.. + * it seems to do more than change the base address of constants + * XXX this is a hack + */ + bc->alu.relative_addr = + (bc->alu.export_data && bc->alu.vector_dest == 32); + + bc->alu.src1_reg_byte = src_reg_byte(ctx, &src1); + bc->alu.src1_swiz = alu_swizzle(ctx, instr_v, &src1); + bc->alu.src1_reg_negate = src1.negate; + bc->alu.src1_sel = src1.type != IR2_SRC_CONST; + + bc->alu.src2_reg_byte = src_reg_byte(ctx, &src2); + bc->alu.src2_swiz = alu_swizzle(ctx, instr_v, &src2); + bc->alu.src2_reg_negate = src2.negate; + bc->alu.src2_sel = src2.type != IR2_SRC_CONST; + + if (src3) { + bc->alu.src3_reg_byte = src_reg_byte(ctx, src3); + bc->alu.src3_swiz = alu_swizzle(ctx, instr_v, src3); + bc->alu.src3_reg_negate = src3->negate; + bc->alu.src3_sel = src3->type != IR2_SRC_CONST; + } + + bc->alu.pred_select = instr_v->pred; + } + + if (instr_s) { + struct ir2_src *src = instr_s->src; + + bc->alu.scalar_opc = instr_s->alu.scalar_opc; + bc->alu.scalar_write_mask = alu_write_mask(ctx, instr_s); + bc->alu.scalar_dest = dst_to_reg(ctx, instr_s); + bc->alu.scalar_clamp = instr_s->alu.saturate; + bc->alu.export_data = instr_s->alu.export >= 0; + + if (instr_s->src_count == 1) { + bc->alu.src3_reg_byte = src_reg_byte(ctx, src); + bc->alu.src3_swiz = alu_swizzle_scalar(ctx, src); + bc->alu.src3_reg_negate = src->negate; + bc->alu.src3_sel = src->type != IR2_SRC_CONST; + } else { + assert(instr_s->src_count == 2); + + bc->alu.src3_reg_byte = src_reg_byte(ctx, src); + bc->alu.src3_swiz = alu_swizzle_scalar2(ctx, src, instr_s->alu.src1_swizzle); + bc->alu.src3_reg_negate = src->negate; + bc->alu.src3_sel = src->type != IR2_SRC_CONST;; + } + + if (instr_v) + assert(instr_s->pred == instr_v->pred); + bc->alu.pred_select = instr_s->pred; + } + + *is_fetch = false; + return; +} + +static unsigned +write_cfs(struct ir2_context *ctx, instr_cf_t * cfs, unsigned cf_idx, + instr_cf_alloc_t *alloc, instr_cf_exec_t *exec) +{ + assert(exec->count); + + if (alloc) + cfs[cf_idx++].alloc = *alloc; + + /* for memory alloc offset for patching */ + if (alloc && alloc->buffer_select == SQ_MEMORY && + ctx->info->mem_export_ptr == -1) + ctx->info->mem_export_ptr = cf_idx / 2 * 3; + + cfs[cf_idx++].exec = *exec; + exec->address += exec->count; + exec->serialize = 0; + exec->count = 0; + + return cf_idx; +} + +/* assemble the final shader */ +void assemble(struct ir2_context *ctx, bool binning) +{ + /* hw seems to have a limit of 384 (num_cf/2+num_instr <= 384) + * address is 9 bits so could it be 512 ? + */ + instr_cf_t cfs[384]; + instr_t bytecode[384], bc; + unsigned block_addr[128]; + unsigned num_cf = 0; + + /* CF instr state */ + instr_cf_exec_t exec = {.opc = EXEC}; + instr_cf_alloc_t alloc = {.opc = ALLOC}; + + int sync_id, sync_id_prev = -1; + bool is_fetch = false; + bool need_sync = true; + bool need_alloc = false; + unsigned block_idx = 0; + + ctx->info->mem_export_ptr = -1; + ctx->info->num_fetch_instrs = 0; + + /* vertex shader always needs to allocate at least one parameter + * if it will never happen, + */ + if (ctx->so->type == MESA_SHADER_VERTEX && ctx->f->inputs_count == 0) { + alloc.buffer_select = SQ_PARAMETER_PIXEL; + cfs[num_cf++].alloc = alloc; + } + + block_addr[0] = 0; + + for (int i = 0, j = 0; j < ctx->instr_sched_count; j++) { + struct ir2_instr *instr = ctx->instr_sched[j].instr; + + /* catch IR2_CF since it isn't a regular instruction */ + if (instr && instr->type == IR2_CF) { + assert(!need_alloc); /* XXX */ + + /* flush any exec cf before inserting jmp */ + if (exec.count) + num_cf = write_cfs(ctx, cfs, num_cf, NULL, &exec); + + cfs[num_cf++].jmp_call = (instr_cf_jmp_call_t) { + .opc = COND_JMP, + .address = instr->cf.block_idx, /* will be fixed later */ + .force_call = !instr->pred, + .predicated_jmp = 1, + .direction = instr->cf.block_idx > instr->block_idx, + .condition = instr->pred & 1, + }; + continue; + } + + /* fill the 3 dwords for the instruction */ + fill_instr(ctx, &ctx->instr_sched[j], &bc, &is_fetch); + + /* we need to sync between ALU/VTX_FETCH/TEX_FETCH types */ + sync_id = 0; + if (is_fetch) + sync_id = bc.fetch.opc == VTX_FETCH ? 1 : 2; + + need_sync = sync_id != sync_id_prev; + sync_id_prev = sync_id; + + unsigned block; + { + + if (ctx->instr_sched[j].instr) + block = ctx->instr_sched[j].instr->block_idx; + else + block = ctx->instr_sched[j].instr_s->block_idx; + + assert(block_idx <= block); + } + + /* info for patching */ + if (is_fetch) { + struct ir2_fetch_info *info = + &ctx->info->fetch_info[ctx->info->num_fetch_instrs++]; + info->offset = i * 3; /* add cf offset later */ + + if (bc.fetch.opc == VTX_FETCH) { + info->vtx.dst_swiz = bc.fetch.vtx.dst_swiz; + } else if (bc.fetch.opc == TEX_FETCH) { + info->tex.samp_id = instr->fetch.tex.samp_id; + info->tex.src_swiz = bc.fetch.tex.src_swiz; + } else { + ctx->info->num_fetch_instrs--; + } + } + + /* exec cf after 6 instr or when switching between fetch / alu */ + if (exec.count == 6 || (exec.count && (need_sync || block != block_idx))) { + num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec); + need_alloc = false; + } + + /* update block_addrs for jmp patching */ + while (block_idx < block) + block_addr[++block_idx] = num_cf; + + /* export - fill alloc cf */ + if (!is_fetch && bc.alu.export_data) { + /* get the export buffer from either vector/scalar dest */ + instr_alloc_type_t buffer = + export_buf(bc.alu.vector_dest); + if (bc.alu.scalar_write_mask) { + if (bc.alu.vector_write_mask) + assert(buffer == export_buf(bc.alu.scalar_dest)); + buffer = export_buf(bc.alu.scalar_dest); + } + + /* flush previous alloc if the buffer changes */ + bool need_new_alloc = buffer != alloc.buffer_select; + + /* memory export always in 32/33 pair, new alloc on 32 */ + if (bc.alu.vector_dest == 32) + need_new_alloc = true; + + if (need_new_alloc && exec.count) { + num_cf = write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec); + need_alloc = false; + } + + need_alloc |= need_new_alloc; + + alloc.size = 0; + alloc.buffer_select = buffer; + + if (buffer == SQ_PARAMETER_PIXEL && ctx->so->type == MESA_SHADER_VERTEX) + alloc.size = ctx->f->inputs_count - 1; + + if (buffer == SQ_POSITION) + alloc.size = ctx->so->writes_psize; + } + + if (is_fetch) + exec.serialize |= 0x1 << exec.count * 2; + if (need_sync) + exec.serialize |= 0x2 << exec.count * 2; + + need_sync = false; + exec.count += 1; + bytecode[i++] = bc; + } + + /* final exec cf */ + exec.opc = EXEC_END; + num_cf = + write_cfs(ctx, cfs, num_cf, need_alloc ? &alloc : NULL, &exec); + + /* insert nop to get an even # of CFs */ + if (num_cf % 2) + cfs[num_cf++] = (instr_cf_t) { + .opc = NOP}; + + /* patch cf addrs */ + for (int idx = 0; idx < num_cf; idx++) { + switch (cfs[idx].opc) { + case NOP: + case ALLOC: + break; + case EXEC: + case EXEC_END: + cfs[idx].exec.address += num_cf / 2; + break; + case COND_JMP: + cfs[idx].jmp_call.address = block_addr[cfs[idx].jmp_call.address]; + break; + default: + assert(0); + } + } + + /* concatenate cfs and alu/fetch */ + uint32_t cfdwords = num_cf / 2 * 3; + uint32_t alufetchdwords = exec.address * 3; + uint32_t sizedwords = cfdwords + alufetchdwords; + uint32_t *dwords = malloc(sizedwords * 4); + assert(dwords); + memcpy(dwords, cfs, cfdwords * 4); + memcpy(&dwords[cfdwords], bytecode, alufetchdwords * 4); + + /* finalize ir2_shader_info */ + ctx->info->dwords = dwords; + ctx->info->sizedwords = sizedwords; + for (int i = 0; i < ctx->info->num_fetch_instrs; i++) + ctx->info->fetch_info[i].offset += cfdwords; + + if (fd_mesa_debug & FD_DBG_DISASM) { + DBG("disassemble: type=%d", ctx->so->type); + disasm_a2xx(dwords, sizedwords, 0, ctx->so->type); + } +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,442 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include "ir2_private.h" + +static bool scalar_possible(struct ir2_instr *instr) +{ + if (instr->alu.scalar_opc == SCALAR_NONE) + return false; + + return src_ncomp(instr) == 1; +} + +static bool is_alu_compatible(struct ir2_instr *a, struct ir2_instr *b) +{ + if (!a) + return true; + + /* dont use same instruction twice */ + if (a == b) + return false; + + /* PRED_SET must be alone */ + if (b->alu.scalar_opc >= PRED_SETEs && + b->alu.scalar_opc <= PRED_SET_RESTOREs) + return false; + + /* must write to same export (issues otherwise?) */ + return a->alu.export == b->alu.export; +} + +/* priority of vector instruction for scheduling (lower=higher prio) */ +static unsigned alu_vector_prio(struct ir2_instr *instr) +{ + if (instr->alu.vector_opc == VECTOR_NONE) + return ~0u; + + if (is_export(instr)) + return 4; + + /* TODO check src type and ncomps */ + if (instr->src_count == 3) + return 0; + + if (!scalar_possible(instr)) + return 1; + + return instr->src_count == 2 ? 2 : 3; +} + +/* priority of scalar instruction for scheduling (lower=higher prio) */ +static unsigned alu_scalar_prio(struct ir2_instr *instr) +{ + if (!scalar_possible(instr)) + return ~0u; + + /* this case is dealt with later */ + if (instr->src_count > 1) + return ~0u; + + if (is_export(instr)) + return 4; + + /* PRED to end of block */ + if (instr->alu.scalar_opc >= PRED_SETEs && + instr->alu.scalar_opc <= PRED_SET_RESTOREs) + return 5; + + /* scalar only have highest priority */ + return instr->alu.vector_opc == VECTOR_NONE ? 0 : 3; +} + +/* this is a bit messy: + * we want to find a slot where we can insert a scalar MOV with + * a vector instruction that was already scheduled + */ +static struct ir2_sched_instr* +insert(struct ir2_context *ctx, unsigned block_idx, unsigned reg_idx, + struct ir2_src src1, unsigned *comp) +{ + struct ir2_sched_instr *sched = NULL, *s; + unsigned i, mask = 0xf; + + /* go first earliest point where the mov can be inserted */ + for (i = ctx->instr_sched_count-1; i > 0; i--) { + s = &ctx->instr_sched[i - 1]; + + if (s->instr && s->instr->block_idx != block_idx) + break; + if (s->instr_s && s->instr_s->block_idx != block_idx) + break; + + if (src1.type == IR2_SRC_SSA) { + if ((s->instr && s->instr->idx == src1.num) || + (s->instr_s && s->instr_s->idx == src1.num)) + break; + } + + unsigned mr = ~(s->reg_state[reg_idx/8] >> reg_idx%8*4 & 0xf); + if ((mask & mr) == 0) + break; + + mask &= mr; + if (s->instr_s || s->instr->src_count == 3) + continue; + + if (s->instr->type != IR2_ALU || s->instr->alu.export >= 0) + continue; + + sched = s; + } + *comp = ffs(mask) - 1; + return sched; +} + +/* case1: + * in this case, insert a mov to place the 2nd src into to same reg + * (scalar sources come from the same register) + * + * this is a common case which works when one of the srcs is input/const + * but for instrs which have 2 ssa/reg srcs, then its not ideal + */ +static bool +scalarize_case1(struct ir2_context *ctx, struct ir2_instr *instr, bool order) +{ + struct ir2_src src0 = instr->src[ order]; + struct ir2_src src1 = instr->src[!order]; + struct ir2_sched_instr *sched; + struct ir2_instr *ins; + struct ir2_reg *reg; + unsigned idx, comp; + + switch (src0.type) { + case IR2_SRC_CONST: + case IR2_SRC_INPUT: + return false; + default: + break; + } + + /* TODO, insert needs logic for this */ + if (src1.type == IR2_SRC_REG) + return false; + + /* we could do something if they match src1.. */ + if (src0.negate || src0.abs) + return false; + + reg = get_reg_src(ctx, &src0); + + /* result not used more since we will overwrite */ + for (int i = 0; i < 4; i++) + if (reg->comp[i].ref_count != !!(instr->alu.write_mask & 1 << i)) + return false; + + /* find a place to insert the mov */ + sched = insert(ctx, instr->block_idx, reg->idx, src1, &comp); + if (!sched) + return false; + + ins = &ctx->instr[idx = ctx->instr_count++]; + ins->idx = idx; + ins->type = IR2_ALU; + ins->src[0] = src1; + ins->src_count = 1; + ins->is_ssa = true; + ins->ssa.idx = reg->idx; + ins->ssa.ncomp = 1; + ins->ssa.comp[0].c = comp; + ins->alu.scalar_opc = MAXs; + ins->alu.export = -1; + ins->alu.write_mask = 1; + ins->pred = instr->pred; + ins->block_idx = instr->block_idx; + + instr->src[0] = src0; + instr->alu.src1_swizzle = comp; + + sched->instr_s = ins; + return true; +} + +/* fill sched with next fetch or (vector and/or scalar) alu instruction */ +static int sched_next(struct ir2_context *ctx, struct ir2_sched_instr *sched) +{ + struct ir2_instr *avail[0x100], *instr_v = NULL, *instr_s = NULL; + unsigned avail_count = 0; + + instr_alloc_type_t export = ~0u; + int block_idx = -1; + + /* XXX merge this loop with the other one somehow? */ + ir2_foreach_instr(instr, ctx) { + if (!instr->need_emit) + continue; + if (is_export(instr)) + export = MIN2(export, export_buf(instr->alu.export)); + } + + ir2_foreach_instr(instr, ctx) { + if (!instr->need_emit) + continue; + + /* dont mix exports */ + if (is_export(instr) && export_buf(instr->alu.export) != export) + continue; + + if (block_idx < 0) + block_idx = instr->block_idx; + else if (block_idx != instr->block_idx || /* must be same block */ + instr->type == IR2_CF || /* CF/MEM must be alone */ + (is_export(instr) && export == SQ_MEMORY)) + break; + /* it works because IR2_CF is always at end of block + * and somewhat same idea with MEM exports, which might not be alone + * but will end up in-order at least + */ + + /* check if dependencies are satisfied */ + bool is_ok = true; + ir2_foreach_src(src, instr) { + if (src->type == IR2_SRC_REG) { + /* need to check if all previous instructions in the block + * which write the reg have been emitted + * slow.. + * XXX: check components instead of whole register + */ + struct ir2_reg *reg = get_reg_src(ctx, src); + ir2_foreach_instr(p, ctx) { + if (!p->is_ssa && p->reg == reg && p->idx < instr->idx) + is_ok &= !p->need_emit; + } + } else if (src->type == IR2_SRC_SSA) { + /* in this case its easy, just check need_emit */ + is_ok &= !ctx->instr[src->num].need_emit; + } + } + if (!is_ok) + continue; + + avail[avail_count++] = instr; + } + + if (!avail_count) { + assert(block_idx == -1); + return -1; + } + + /* priority to FETCH instructions */ + ir2_foreach_avail(instr) { + if (instr->type == IR2_ALU) + continue; + + ra_src_free(ctx, instr); + ra_reg(ctx, get_reg(instr), -1, false, 0); + + instr->need_emit = false; + sched->instr = instr; + sched->instr_s = NULL; + return block_idx; + } + + /* TODO precompute priorities */ + + unsigned prio_v = ~0u, prio_s = ~0u, prio; + ir2_foreach_avail(instr) { + prio = alu_vector_prio(instr); + if (prio < prio_v) { + instr_v = instr; + prio_v = prio; + } + } + + /* TODO can still insert scalar if src_count=3, if smart about it */ + if (!instr_v || instr_v->src_count < 3) { + ir2_foreach_avail(instr) { + bool compat = is_alu_compatible(instr_v, instr); + + prio = alu_scalar_prio(instr); + if (prio >= prio_v && !compat) + continue; + + if (prio < prio_s) { + instr_s = instr; + prio_s = prio; + if (!compat) + instr_v = NULL; + } + } + } + + assert(instr_v || instr_s); + + /* now, we try more complex insertion of vector instruction as scalar + * TODO: if we are smart we can still insert if instr_v->src_count==3 + */ + if (!instr_s && instr_v->src_count < 3) { + ir2_foreach_avail(instr) { + if (!is_alu_compatible(instr_v, instr) || !scalar_possible(instr)) + continue; + + /* at this point, src_count should always be 2 */ + assert(instr->src_count == 2); + + if (scalarize_case1(ctx, instr, 0)) { + instr_s = instr; + break; + } + if (scalarize_case1(ctx, instr, 1)) { + instr_s = instr; + break; + } + } + } + + /* free src registers */ + if (instr_v) { + instr_v->need_emit = false; + ra_src_free(ctx, instr_v); + } + + if (instr_s) { + instr_s->need_emit = false; + ra_src_free(ctx, instr_s); + } + + /* allocate dst registers */ + if (instr_v) + ra_reg(ctx, get_reg(instr_v), -1, is_export(instr_v), instr_v->alu.write_mask); + + if (instr_s) + ra_reg(ctx, get_reg(instr_s), -1, is_export(instr_s), instr_s->alu.write_mask); + + sched->instr = instr_v; + sched->instr_s = instr_s; + return block_idx; +} + +/* scheduling: determine order of instructions */ +static void schedule_instrs(struct ir2_context *ctx) +{ + struct ir2_sched_instr *sched; + int block_idx; + + /* allocate input registers */ + for (unsigned idx = 0; idx < ARRAY_SIZE(ctx->input); idx++) + if (ctx->input[idx].initialized) + ra_reg(ctx, &ctx->input[idx], idx, false, 0); + + for (;;) { + sched = &ctx->instr_sched[ctx->instr_sched_count++]; + block_idx = sched_next(ctx, sched); + if (block_idx < 0) + break; + memcpy(sched->reg_state, ctx->reg_state, sizeof(ctx->reg_state)); + + /* catch texture fetch after scheduling and insert the + * SET_TEX_LOD right before it if necessary + * TODO clean this up + */ + struct ir2_instr *instr = sched->instr, *tex_lod; + if (instr && instr->type == IR2_FETCH && + instr->fetch.opc == TEX_FETCH && instr->src_count == 2) { + /* generate the SET_LOD instruction */ + tex_lod = &ctx->instr[ctx->instr_count++]; + tex_lod->type = IR2_FETCH; + tex_lod->block_idx = instr->block_idx; + tex_lod->pred = instr->pred; + tex_lod->fetch.opc = TEX_SET_TEX_LOD; + tex_lod->src[0] = instr->src[1]; + tex_lod->src_count = 1; + + sched[1] = sched[0]; + sched->instr = tex_lod; + ctx->instr_sched_count++; + } + + bool free_block = true; + ir2_foreach_instr(instr, ctx) + free_block &= instr->block_idx != block_idx; + if (free_block) + ra_block_free(ctx, block_idx); + }; + ctx->instr_sched_count--; +} + +void +ir2_compile(struct fd2_shader_stateobj *so, unsigned variant, + struct fd2_shader_stateobj *fp) +{ + struct ir2_context ctx = { }; + bool binning = !fp && so->type == MESA_SHADER_VERTEX; + + if (fp) + so->variant[variant].f = fp->variant[0].f; + + ctx.so = so; + ctx.info = &so->variant[variant].info; + ctx.f = &so->variant[variant].f; + ctx.info->max_reg = -1; + + /* convert nir to internal representation */ + ir2_nir_compile(&ctx, binning); + + /* copy propagate srcs */ + cp_src(&ctx); + + /* get ref_counts and kill non-needed instructions */ + ra_count_refs(&ctx); + + /* remove movs used to write outputs */ + cp_export(&ctx); + + /* instruction order.. and vector->scalar conversions */ + schedule_instrs(&ctx); + + /* finally, assemble to bitcode */ + assemble(&ctx, binning); +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_cp.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_cp.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_cp.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_cp.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,225 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include "ir2_private.h" + +static bool is_mov(struct ir2_instr *instr) +{ + return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv && + instr->src_count == 1; +} + +static void src_combine(struct ir2_src *src, struct ir2_src b) +{ + src->num = b.num; + src->type = b.type; + src->swizzle = swiz_merge(b.swizzle, src->swizzle); + if (!src->abs) /* if we have abs we don't care about previous negate */ + src->negate ^= b.negate; + src->abs |= b.abs; +} + +/* cp_src: replace src regs when they refer to a mov instruction + * example: + * ALU: MAXv R7 = C7, C7 + * ALU: MULADDv R7 = R7, R10, R0.xxxx + * becomes: + * ALU: MULADDv R7 = C7, R10, R0.xxxx + */ +void cp_src(struct ir2_context *ctx) +{ + struct ir2_instr *p; + + ir2_foreach_instr(instr, ctx) { + ir2_foreach_src(src, instr) { + /* loop to replace recursively */ + do { + if (src->type != IR2_SRC_SSA) + break; + + p = &ctx->instr[src->num]; + /* don't work across blocks to avoid possible issues */ + if (p->block_idx != instr->block_idx) + break; + + if (!is_mov(p)) + break; + + /* cant apply abs to const src, const src only for alu */ + if (p->src[0].type == IR2_SRC_CONST && + (src->abs || instr->type != IR2_ALU)) + break; + + src_combine(src, p->src[0]); + } while (1); + } + } +} + +/* cp_export: replace mov to export when possible + * in the cp_src pass we bypass any mov instructions related + * to the src registers, but for exports for need something different + * example: + * ALU: MAXv R3.x___ = C9.x???, C9.x??? + * ALU: MAXv R3._y__ = R0.?x??, C8.?x?? + * ALU: MAXv export0 = R3.yyyx, R3.yyyx + * becomes: + * ALU: MAXv export0.___w = C9.???x, C9.???x + * ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx? + * + */ +void cp_export(struct ir2_context *ctx) +{ + struct ir2_instr *c[4], *ins[4]; + struct ir2_src *src; + struct ir2_reg *reg; + unsigned ncomp; + + ir2_foreach_instr(instr, ctx) { + if (!is_export(instr)) /* TODO */ + continue; + + if (!is_mov(instr)) + continue; + + src = &instr->src[0]; + + if (src->negate || src->abs) /* TODO handle these cases */ + continue; + + if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST) + continue; + + reg = get_reg_src(ctx, src); + ncomp = dst_ncomp(instr); + + unsigned reswiz[4] = {}; + unsigned num_instr = 0; + + /* fill array c with pointers to instrs that write each component */ + if (src->type == IR2_SRC_SSA) { + struct ir2_instr *instr = &ctx->instr[src->num]; + + if (instr->type != IR2_ALU) + continue; + + for (int i = 0; i < ncomp; i++) + c[i] = instr; + + ins[num_instr++] = instr; + reswiz[0] = src->swizzle; + } else { + bool ok = true; + unsigned write_mask = 0; + + ir2_foreach_instr(instr, ctx) { + if (instr->is_ssa || instr->reg != reg) + continue; + + /* set by non-ALU */ + if (instr->type != IR2_ALU) { + ok = false; + break; + } + + /* component written more than once */ + if (write_mask & instr->alu.write_mask) { + ok = false; + break; + } + + write_mask |= instr->alu.write_mask; + + /* src pointers for components */ + for (int i = 0, j = 0; i < 4; i++) { + unsigned k = swiz_get(src->swizzle, i); + if (instr->alu.write_mask & 1 << k) { + c[i] = instr; + + /* reswiz = compressed src->swizzle */ + unsigned x = 0; + for (int i = 0; i < k; i++) + x += !!(instr->alu.write_mask & 1 << i); + + assert(src->swizzle || x == j); + reswiz[num_instr] |= swiz_set(x, j++); + } + } + ins[num_instr++] = instr; + } + if (!ok) + continue; + } + + bool redirect = true; + + /* must all be in same block */ + for (int i = 0; i < ncomp; i++) + redirect &= (c[i]->block_idx == instr->block_idx); + + /* no other instr using the value */ + ir2_foreach_instr(p, ctx) { + if (p == instr) + continue; + ir2_foreach_src(src, p) + redirect &= reg != get_reg_src(ctx, src); + } + + if (!redirect) + continue; + + /* redirect the instructions writing to the register */ + for (int i = 0; i < num_instr; i++) { + struct ir2_instr *p = ins[i]; + + p->alu.export = instr->alu.export; + p->alu.write_mask = 0; + p->is_ssa = true; + p->ssa.ncomp = 0; + memset(p->ssa.comp, 0, sizeof(p->ssa.comp)); + + switch (instr->alu.vector_opc) { + case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv: + case DOT2ADDv: + case DOT3v: + case DOT4v: + case CUBEv: + continue; + default: + break; + } + ir2_foreach_src(s, p) + swiz_merge_p(&s->swizzle, reswiz[i]); + } + + for (int i = 0; i < ncomp; i++) { + c[i]->alu.write_mask |= (1 << i); + c[i]->ssa.ncomp++; + } + instr->type = IR2_NONE; + instr->need_emit = false; + } +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#ifndef IR2_H_ +#define IR2_H_ + +#include "compiler/nir/nir.h" + +struct ir2_fetch_info { + /* dword offset of the fetch instruction */ + uint16_t offset; + union { + /* swizzle to merge with tgsi swizzle */ + struct { + uint16_t dst_swiz; + } vtx; + /* sampler id to patch const_idx */ + struct { + uint16_t samp_id; + uint8_t src_swiz; + } tex; + }; +}; + +struct ir2_shader_info { + /* compiler shader */ + uint32_t *dwords; + + /* size of the compiled shader in dwords */ + uint16_t sizedwords; + + /* highest GPR # used by shader */ + int8_t max_reg; + + /* offset in dwords of first MEMORY export CF (for a20x hw binning) */ + int16_t mem_export_ptr; + + /* fetch instruction info for patching */ + uint16_t num_fetch_instrs; + struct ir2_fetch_info fetch_info[64]; +}; + +struct ir2_frag_linkage { + unsigned inputs_count; + struct { + uint8_t slot; + uint8_t ncomp; + } inputs[16]; + + /* driver_location of fragcoord.zw, -1 if not used */ + int fragcoord; +}; + +struct ir2_shader_variant { + struct ir2_shader_info info; + struct ir2_frag_linkage f; +}; + +struct fd2_shader_stateobj; +struct tgsi_token; + +void ir2_compile(struct fd2_shader_stateobj *so, unsigned variant, + struct fd2_shader_stateobj *fp); + +struct nir_shader *ir2_tgsi_to_nir(const struct tgsi_token *tokens); + +const nir_shader_compiler_options *ir2_get_compiler_options(void); + +int ir2_optimize_nir(nir_shader *s, bool lower); + +#endif /* IR2_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_nir.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_nir.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_nir.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_nir.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,1174 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include "ir2_private.h" +#include "nir/tgsi_to_nir.h" + +#include "freedreno_util.h" +#include "fd2_program.h" + +static const nir_shader_compiler_options options = { + .lower_fpow = true, + .lower_flrp32 = true, + .lower_fmod32 = true, + .lower_fdiv = true, + .lower_fceil = true, + .fuse_ffma = true, + /* .fdot_replicates = true, it is replicated, but it makes things worse */ + .lower_all_io_to_temps = true, + .vertex_id_zero_based = true, /* its not implemented anyway */ +}; + +struct nir_shader * +ir2_tgsi_to_nir(const struct tgsi_token *tokens) +{ + return tgsi_to_nir(tokens, &options); +} + +const nir_shader_compiler_options * +ir2_get_compiler_options(void) +{ + return &options; +} + +#define OPT(nir, pass, ...) ({ \ + bool this_progress = false; \ + NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ + this_progress; \ +}) +#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) + +static void +ir2_optimize_loop(nir_shader *s) +{ + bool progress; + do { + progress = false; + + OPT_V(s, nir_lower_vars_to_ssa); + progress |= OPT(s, nir_opt_copy_prop_vars); + progress |= OPT(s, nir_copy_prop); + progress |= OPT(s, nir_opt_dce); + progress |= OPT(s, nir_opt_cse); + /* progress |= OPT(s, nir_opt_gcm, true); */ + progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true); + progress |= OPT(s, nir_opt_intrinsics); + progress |= OPT(s, nir_opt_algebraic); + progress |= OPT(s, nir_opt_constant_folding); + progress |= OPT(s, nir_opt_dead_cf); + if (OPT(s, nir_opt_trivial_continues)) { + progress |= true; + /* If nir_opt_trivial_continues makes progress, then we need to clean + * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll + * to make progress. + */ + OPT(s, nir_copy_prop); + OPT(s, nir_opt_dce); + } + progress |= OPT(s, nir_opt_loop_unroll, nir_var_all); + progress |= OPT(s, nir_opt_if); + progress |= OPT(s, nir_opt_remove_phis); + progress |= OPT(s, nir_opt_undef); + + } + while (progress); +} + +/* trig workarounds is the same as ir3.. but we don't want to include ir3 */ +bool ir3_nir_apply_trig_workarounds(nir_shader * shader); + +int +ir2_optimize_nir(nir_shader *s, bool lower) +{ + struct nir_lower_tex_options tex_options = { + .lower_txp = ~0u, + .lower_rect = 0, + }; + + if (fd_mesa_debug & FD_DBG_DISASM) { + debug_printf("----------------------\n"); + nir_print_shader(s, stdout); + debug_printf("----------------------\n"); + } + + OPT_V(s, nir_opt_global_to_local); + OPT_V(s, nir_lower_regs_to_ssa); + OPT_V(s, nir_lower_vars_to_ssa); + OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out); + + if (lower) { + OPT_V(s, ir3_nir_apply_trig_workarounds); + OPT_V(s, nir_lower_tex, &tex_options); + } + + ir2_optimize_loop(s); + + OPT_V(s, nir_remove_dead_variables, nir_var_function_temp); + OPT_V(s, nir_move_load_const); + + /* TODO we dont want to get shaders writing to depth for depth textures */ + if (s->info.stage == MESA_SHADER_FRAGMENT) { + nir_foreach_variable(var, &s->outputs) { + if (var->data.location == FRAG_RESULT_DEPTH) + return -1; + } + } + + return 0; +} + +static struct ir2_src +load_const(struct ir2_context *ctx, float *value_f, unsigned ncomp) +{ + struct fd2_shader_stateobj *so = ctx->so; + unsigned imm_ncomp, swiz, idx, i, j; + uint32_t *value = (uint32_t*) value_f; + + /* try to merge with existing immediate (TODO: try with neg) */ + for (idx = 0; idx < so->num_immediates; idx++) { + swiz = 0; + imm_ncomp = so->immediates[idx].ncomp; + for (i = 0; i < ncomp; i++) { + for (j = 0; j < imm_ncomp; j++) { + if (value[i] == so->immediates[idx].val[j]) + break; + } + if (j == imm_ncomp) { + if (j == 4) + break; + so->immediates[idx].val[imm_ncomp++] = value[i]; + } + swiz |= swiz_set(j, i); + } + /* matched all components */ + if (i == ncomp) + break; + } + + /* need to allocate new immediate */ + if (idx == so->num_immediates) { + swiz = 0; + imm_ncomp = 0; + for (i = 0; i < ncomp; i++) { + for (j = 0; j < imm_ncomp; j++) { + if (value[i] == ctx->so->immediates[idx].val[j]) + break; + } + if (j == imm_ncomp) { + so->immediates[idx].val[imm_ncomp++] = value[i]; + } + swiz |= swiz_set(j, i); + } + so->num_immediates++; + } + so->immediates[idx].ncomp = imm_ncomp; + + if (ncomp == 1) + swiz = swiz_merge(swiz, IR2_SWIZZLE_XXXX); + + return ir2_src(so->first_immediate + idx, swiz, IR2_SRC_CONST); +} + +struct ir2_src +ir2_zero(struct ir2_context *ctx) +{ + return load_const(ctx, (float[]) {0.0f}, 1); +} + +static void +update_range(struct ir2_context *ctx, struct ir2_reg *reg) +{ + if (!reg->initialized) { + reg->initialized = true; + reg->loop_depth = ctx->loop_depth; + } + + if (ctx->loop_depth > reg->loop_depth) { + reg->block_idx_free = ctx->loop_last_block[reg->loop_depth + 1]; + } else { + reg->loop_depth = ctx->loop_depth; + reg->block_idx_free = -1; + } + + /* for regs we want to free at the end of the loop in any case + * XXX dont do this for ssa + */ + if (reg->loop_depth) + reg->block_idx_free = ctx->loop_last_block[reg->loop_depth]; +} + +static struct ir2_src +make_src(struct ir2_context *ctx, nir_src src) +{ + struct ir2_src res = {}; + struct ir2_reg *reg; + + nir_const_value *const_value = nir_src_as_const_value(src); + + if (const_value) { + assert(src.is_ssa); + return load_const(ctx, &const_value->f32[0], src.ssa->num_components); + } + + if (!src.is_ssa) { + res.num = src.reg.reg->index; + res.type = IR2_SRC_REG; + reg = &ctx->reg[res.num]; + } else { + assert(ctx->ssa_map[src.ssa->index] >= 0); + res.num = ctx->ssa_map[src.ssa->index]; + res.type = IR2_SRC_SSA; + reg = &ctx->instr[res.num].ssa; + } + + update_range(ctx, reg); + return res; +} + +static void +set_index(struct ir2_context *ctx, nir_dest * dst, + struct ir2_instr *instr) +{ + struct ir2_reg *reg = &instr->ssa; + + if (dst->is_ssa) { + ctx->ssa_map[dst->ssa.index] = instr->idx; + } else { + assert(instr->is_ssa); + reg = &ctx->reg[dst->reg.reg->index]; + + instr->is_ssa = false; + instr->reg = reg; + } + update_range(ctx, reg); +} + +static struct ir2_instr * +ir2_instr_create(struct ir2_context *ctx, int type) +{ + struct ir2_instr *instr; + + instr = &ctx->instr[ctx->instr_count++]; + instr->idx = ctx->instr_count - 1; + instr->type = type; + instr->block_idx = ctx->block_idx; + instr->pred = ctx->pred; + instr->is_ssa = true; + return instr; +} + +static struct ir2_instr * +instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp) +{ + /* emit_alu will fixup instrs that don't map directly */ + static const struct ir2_opc { + int8_t scalar, vector; + } nir_ir2_opc[nir_num_opcodes+1] = { + [0 ... nir_num_opcodes - 1] = {-1, -1}, + + [nir_op_fmov] = {MAXs, MAXv}, + [nir_op_fsign] = {-1, CNDGTEv}, + [nir_op_fnot] = {SETEs, SETEv}, + [nir_op_for] = {MAXs, MAXv}, + [nir_op_fand] = {MINs, MINv}, + [nir_op_fxor] = {-1, SETNEv}, + [nir_op_fadd] = {ADDs, ADDv}, + [nir_op_fsub] = {ADDs, ADDv}, + [nir_op_fmul] = {MULs, MULv}, + [nir_op_ffma] = {-1, MULADDv}, + [nir_op_fmax] = {MAXs, MAXv}, + [nir_op_fmin] = {MINs, MINv}, + [nir_op_ffloor] = {FLOORs, FLOORv}, + [nir_op_ffract] = {FRACs, FRACv}, + [nir_op_ftrunc] = {TRUNCs, TRUNCv}, + [nir_op_fdot2] = {-1, DOT2ADDv}, + [nir_op_fdot3] = {-1, DOT3v}, + [nir_op_fdot4] = {-1, DOT4v}, + [nir_op_sge] = {-1, SETGTEv}, + [nir_op_slt] = {-1, SETGTv}, + [nir_op_sne] = {-1, SETNEv}, + [nir_op_seq] = {-1, SETEv}, + [nir_op_fcsel] = {-1, CNDEv}, + [nir_op_frsq] = {RECIPSQ_IEEE, -1}, + [nir_op_frcp] = {RECIP_IEEE, -1}, + [nir_op_flog2] = {LOG_IEEE, -1}, + [nir_op_fexp2] = {EXP_IEEE, -1}, + [nir_op_fsqrt] = {SQRT_IEEE, -1}, + [nir_op_fcos] = {COS, -1}, + [nir_op_fsin] = {SIN, -1}, + /* no fsat, fneg, fabs since source mods deal with those */ + + /* some nir passes still generate nir_op_imov */ + [nir_op_imov] = {MAXs, MAXv}, + + /* so we can use this function with non-nir op */ +#define ir2_op_cube nir_num_opcodes + [ir2_op_cube] = {-1, CUBEv}, + }; + + struct ir2_opc op = nir_ir2_opc[opcode]; + assert(op.vector >= 0 || op.scalar >= 0); + + struct ir2_instr *instr = ir2_instr_create(ctx, IR2_ALU); + instr->alu.vector_opc = op.vector; + instr->alu.scalar_opc = op.scalar; + instr->alu.export = -1; + instr->alu.write_mask = (1 << ncomp) - 1; + instr->src_count = opcode == ir2_op_cube ? 2 : + nir_op_infos[opcode].num_inputs; + instr->ssa.ncomp = ncomp; + return instr; +} + +static struct ir2_instr * +instr_create_alu_reg(struct ir2_context *ctx, nir_op opcode, + uint8_t write_mask, struct ir2_instr *share_reg) +{ + struct ir2_instr *instr; + struct ir2_reg *reg; + + reg = share_reg ? share_reg->reg : &ctx->reg[ctx->reg_count++]; + reg->ncomp = MAX2(reg->ncomp, util_logbase2(write_mask) + 1); + + instr = instr_create_alu(ctx, opcode, util_bitcount(write_mask)); + instr->alu.write_mask = write_mask; + instr->reg = reg; + instr->is_ssa = false; + return instr; +} + + +static struct ir2_instr * +instr_create_alu_dest(struct ir2_context *ctx, nir_op opcode, nir_dest *dst) +{ + struct ir2_instr *instr; + instr = instr_create_alu(ctx, opcode, nir_dest_num_components(*dst)); + set_index(ctx, dst, instr); + return instr; +} + +static struct ir2_instr * +ir2_instr_create_fetch(struct ir2_context *ctx, nir_dest *dst, + instr_fetch_opc_t opc) +{ + struct ir2_instr *instr = ir2_instr_create(ctx, IR2_FETCH); + instr->fetch.opc = opc; + instr->src_count = 1; + instr->ssa.ncomp = nir_dest_num_components(*dst); + set_index(ctx, dst, instr); + return instr; +} + +static struct ir2_src +make_src_noconst(struct ir2_context *ctx, nir_src src) +{ + struct ir2_instr *instr; + + if (nir_src_as_const_value(src)) { + assert(src.is_ssa); + instr = instr_create_alu(ctx, nir_op_fmov, src.ssa->num_components); + instr->src[0] = make_src(ctx, src); + return ir2_src(instr->idx, 0, IR2_SRC_SSA); + } + + return make_src(ctx, src); +} + +static void +emit_alu(struct ir2_context *ctx, nir_alu_instr * alu) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + nir_dest *dst = &alu->dest.dest; + struct ir2_instr *instr; + struct ir2_src tmp; + unsigned ncomp; + + /* get the number of dst components */ + if (dst->is_ssa) { + ncomp = dst->ssa.num_components; + } else { + ncomp = 0; + for (int i = 0; i < 4; i++) + ncomp += !!(alu->dest.write_mask & 1 << i); + } + + instr = instr_create_alu(ctx, alu->op, ncomp); + set_index(ctx, dst, instr); + instr->alu.saturate = alu->dest.saturate; + instr->alu.write_mask = alu->dest.write_mask; + + for (int i = 0; i < info->num_inputs; i++) { + nir_alu_src *src = &alu->src[i]; + + /* compress swizzle with writemask when applicable */ + unsigned swiz = 0, j = 0; + for (int i = 0; i < 4; i++) { + if (!(alu->dest.write_mask & 1 << i) && !info->output_size) + continue; + swiz |= swiz_set(src->swizzle[i], j++); + } + + instr->src[i] = make_src(ctx, src->src); + instr->src[i].swizzle = swiz_merge(instr->src[i].swizzle, swiz); + instr->src[i].negate = src->negate; + instr->src[i].abs = src->abs; + } + + /* workarounds for NIR ops that don't map directly to a2xx ops */ + switch (alu->op) { + case nir_op_slt: + tmp = instr->src[0]; + instr->src[0] = instr->src[1]; + instr->src[1] = tmp; + break; + case nir_op_fcsel: + tmp = instr->src[1]; + instr->src[1] = instr->src[2]; + instr->src[2] = tmp; + break; + case nir_op_fsub: + instr->src[1].negate = !instr->src[1].negate; + break; + case nir_op_fdot2: + instr->src_count = 3; + instr->src[2] = ir2_zero(ctx); + break; + case nir_op_fsign: { + /* we need an extra instruction to deal with the zero case */ + struct ir2_instr *tmp; + + /* tmp = x == 0 ? 0 : 1 */ + tmp = instr_create_alu(ctx, nir_op_fcsel, ncomp); + tmp->src[0] = instr->src[0]; + tmp->src[1] = ir2_zero(ctx); + tmp->src[2] = load_const(ctx, (float[]) {1.0f}, 1); + + /* result = x >= 0 ? tmp : -tmp */ + instr->src[1] = ir2_src(tmp->idx, 0, IR2_SRC_SSA); + instr->src[2] = instr->src[1]; + instr->src[2].negate = true; + instr->src_count = 3; + } break; + default: + break; + } +} + +static void +load_input(struct ir2_context *ctx, nir_dest *dst, unsigned idx) +{ + struct ir2_instr *instr; + int slot = -1; + + if (ctx->so->type == MESA_SHADER_VERTEX) { + instr = ir2_instr_create_fetch(ctx, dst, 0); + instr->src[0] = ir2_src(0, 0, IR2_SRC_INPUT); + instr->fetch.vtx.const_idx = 20 + (idx / 3); + instr->fetch.vtx.const_idx_sel = idx % 3; + return; + } + + /* get slot from idx */ + nir_foreach_variable(var, &ctx->nir->inputs) { + if (var->data.driver_location == idx) { + slot = var->data.location; + break; + } + } + assert(slot >= 0); + + switch (slot) { + case VARYING_SLOT_PNTC: + /* need to extract with abs and invert y */ + instr = instr_create_alu_dest(ctx, nir_op_ffma, dst); + instr->src[0] = ir2_src(ctx->f->inputs_count, IR2_SWIZZLE_ZW, IR2_SRC_INPUT); + instr->src[0].abs = true; + instr->src[1] = load_const(ctx, (float[]) {1.0f, -1.0f}, 2); + instr->src[2] = load_const(ctx, (float[]) {0.0f, 1.0f}, 2); + break; + case VARYING_SLOT_POS: + /* need to extract xy with abs and add tile offset on a20x + * zw from fragcoord input (w inverted in fragment shader) + * TODO: only components that are required by fragment shader + */ + instr = instr_create_alu_reg(ctx, + ctx->so->is_a20x ? nir_op_fadd : nir_op_fmov, 3, NULL); + instr->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT); + instr->src[0].abs = true; + /* on a20x, C64 contains the tile offset */ + instr->src[1] = ir2_src(64, 0, IR2_SRC_CONST); + + instr = instr_create_alu_reg(ctx, nir_op_fmov, 4, instr); + instr->src[0] = ir2_src(ctx->f->fragcoord, 0, IR2_SRC_INPUT); + + instr = instr_create_alu_reg(ctx, nir_op_frcp, 8, instr); + instr->src[0] = ir2_src(ctx->f->fragcoord, IR2_SWIZZLE_Y, IR2_SRC_INPUT); + + unsigned reg_idx = instr->reg - ctx->reg; /* XXX */ + instr = instr_create_alu_dest(ctx, nir_op_fmov, dst); + instr->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG); + break; + default: + instr = instr_create_alu_dest(ctx, nir_op_fmov, dst); + instr->src[0] = ir2_src(idx, 0, IR2_SRC_INPUT); + break; + } +} + +static unsigned +output_slot(struct ir2_context *ctx, nir_intrinsic_instr *intr) +{ + int slot = -1; + unsigned idx = nir_intrinsic_base(intr); + nir_foreach_variable(var, &ctx->nir->outputs) { + if (var->data.driver_location == idx) { + slot = var->data.location; + break; + } + } + assert(slot != -1); + return slot; +} + +static void +store_output(struct ir2_context *ctx, nir_src src, unsigned slot, unsigned ncomp) +{ + struct ir2_instr *instr; + unsigned idx = 0; + + if (ctx->so->type == MESA_SHADER_VERTEX) { + switch (slot) { + case VARYING_SLOT_POS: + ctx->position = make_src(ctx, src); + idx = 62; + break; + case VARYING_SLOT_PSIZ: + ctx->so->writes_psize = true; + idx = 63; + break; + default: + /* find matching slot from fragment shader input */ + for (idx = 0; idx < ctx->f->inputs_count; idx++) + if (ctx->f->inputs[idx].slot == slot) + break; + if (idx == ctx->f->inputs_count) + return; + } + } else if (slot != FRAG_RESULT_COLOR && slot != FRAG_RESULT_DATA0) { + /* only color output is implemented */ + return; + } + + instr = instr_create_alu(ctx, nir_op_fmov, ncomp); + instr->src[0] = make_src(ctx, src); + instr->alu.export = idx; +} + +static void +emit_intrinsic(struct ir2_context *ctx, nir_intrinsic_instr *intr) +{ + struct ir2_instr *instr; + nir_const_value *const_offset; + nir_deref_instr *deref; + unsigned idx; + + switch (intr->intrinsic) { + case nir_intrinsic_load_input: + load_input(ctx, &intr->dest, nir_intrinsic_base(intr)); + break; + case nir_intrinsic_store_output: + store_output(ctx, intr->src[0], output_slot(ctx, intr), intr->num_components); + break; + case nir_intrinsic_load_deref: + deref = nir_src_as_deref(intr->src[0]); + assert(deref->deref_type == nir_deref_type_var); + load_input(ctx, &intr->dest, deref->var->data.driver_location); + break; + case nir_intrinsic_store_deref: + deref = nir_src_as_deref(intr->src[0]); + assert(deref->deref_type == nir_deref_type_var); + store_output(ctx, intr->src[1], deref->var->data.location, intr->num_components); + break; + case nir_intrinsic_load_uniform: + const_offset = nir_src_as_const_value(intr->src[0]); + assert(const_offset); /* TODO can be false in ES2? */ + idx = nir_intrinsic_base(intr); + idx += (uint32_t) nir_src_as_const_value(intr->src[0])->f32[0]; + instr = instr_create_alu_dest(ctx, nir_op_fmov, &intr->dest); + instr->src[0] = ir2_src(idx, 0, IR2_SRC_CONST); + break; + case nir_intrinsic_discard: + case nir_intrinsic_discard_if: + instr = ir2_instr_create(ctx, IR2_ALU); + instr->alu.vector_opc = VECTOR_NONE; + if (intr->intrinsic == nir_intrinsic_discard_if) { + instr->alu.scalar_opc = KILLNEs; + instr->src[0] = make_src(ctx, intr->src[0]); + } else { + instr->alu.scalar_opc = KILLEs; + instr->src[0] = ir2_zero(ctx); + } + instr->alu.export = -1; + instr->src_count = 1; + ctx->so->has_kill = true; + break; + case nir_intrinsic_load_front_face: + /* gl_FrontFacing is in the sign of param.x + * rcp required because otherwise we can't differentiate -0.0 and +0.0 + */ + ctx->so->need_param = true; + + struct ir2_instr *tmp = instr_create_alu(ctx, nir_op_frcp, 1); + tmp->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT); + + instr = instr_create_alu_dest(ctx, nir_op_sge, &intr->dest); + instr->src[0] = ir2_src(tmp->idx, 0, IR2_SRC_SSA); + instr->src[1] = ir2_zero(ctx); + break; + default: + compile_error(ctx, "unimplemented intr %d\n", intr->intrinsic); + break; + } +} + +static void +emit_tex(struct ir2_context *ctx, nir_tex_instr * tex) +{ + bool is_rect = false, is_cube = false; + struct ir2_instr *instr; + nir_src *coord, *lod_bias; + + coord = lod_bias = NULL; + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + coord = &tex->src[i].src; + break; + case nir_tex_src_bias: + case nir_tex_src_lod: + assert(!lod_bias); + lod_bias = &tex->src[i].src; + break; + default: + compile_error(ctx, "Unhandled NIR tex src type: %d\n", + tex->src[i].src_type); + return; + } + } + + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + break; + default: + compile_error(ctx, "unimplemented texop %d\n", tex->op); + return; + } + + switch (tex->sampler_dim) { + case GLSL_SAMPLER_DIM_2D: + break; + case GLSL_SAMPLER_DIM_RECT: + is_rect = true; + break; + case GLSL_SAMPLER_DIM_CUBE: + is_cube = true; + break; + default: + compile_error(ctx, "unimplemented sampler %d\n", tex->sampler_dim); + return; + } + + struct ir2_src src_coord = make_src_noconst(ctx, *coord); + + /* for cube maps + * tmp = cube(coord) + * tmp.xy = tmp.xy / |tmp.z| + 1.5 + * coord = tmp.xyw + */ + if (is_cube) { + struct ir2_instr *rcp, *coord_xy; + unsigned reg_idx; + + instr = instr_create_alu_reg(ctx, ir2_op_cube, 15, NULL); + instr->src[0] = src_coord; + instr->src[0].swizzle = IR2_SWIZZLE_ZZXY; + instr->src[1] = src_coord; + instr->src[1].swizzle = IR2_SWIZZLE_YXZZ; + + reg_idx = instr->reg - ctx->reg; /* hacky */ + + rcp = instr_create_alu(ctx, nir_op_frcp, 1); + rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG); + rcp->src[0].abs = true; + + coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr); + coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG); + coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA); + coord_xy->src[2] = load_const(ctx, (float[]) {1.5f}, 1); + + src_coord = ir2_src(reg_idx, 0, IR2_SRC_REG); + /* TODO: lod/bias transformed by src_coord.z ? */ + } + + instr = ir2_instr_create_fetch(ctx, &tex->dest, TEX_FETCH); + instr->src[0] = src_coord; + instr->src[0].swizzle = is_cube ? IR2_SWIZZLE_XYW : 0; + instr->fetch.tex.is_cube = is_cube; + instr->fetch.tex.is_rect = is_rect; + instr->fetch.tex.samp_id = tex->sampler_index; + + /* for lod/bias, we insert an extra src for the backend to deal with */ + if (lod_bias) { + instr->src[1] = make_src_noconst(ctx, *lod_bias); + /* backend will use 2-3 components so apply swizzle */ + swiz_merge_p(&instr->src[1].swizzle, IR2_SWIZZLE_XXXX); + instr->src_count = 2; + } +} + +static void +setup_input(struct ir2_context *ctx, nir_variable * in) +{ + struct fd2_shader_stateobj *so = ctx->so; + unsigned array_len = MAX2(glsl_get_length(in->type), 1); + unsigned n = in->data.driver_location; + unsigned slot = in->data.location; + + assert(array_len == 1); + + /* handle later */ + if (ctx->so->type == MESA_SHADER_VERTEX) + return; + + if (ctx->so->type != MESA_SHADER_FRAGMENT) + compile_error(ctx, "unknown shader type: %d\n", ctx->so->type); + + if (slot == VARYING_SLOT_PNTC) { + so->need_param = true; + return; + } + + n = ctx->f->inputs_count++; + + /* half of fragcoord from param reg, half from a varying */ + if (slot == VARYING_SLOT_POS) { + ctx->f->fragcoord = n; + so->need_param = true; + } + + ctx->f->inputs[n].slot = slot; + ctx->f->inputs[n].ncomp = glsl_get_components(in->type); + + /* in->data.interpolation? + * opengl ES 2.0 can't do flat mode, but we still get it from GALLIUM_HUD + */ +} + +static void +emit_undef(struct ir2_context *ctx, nir_ssa_undef_instr * undef) +{ + /* TODO we don't want to emit anything for undefs */ + + struct ir2_instr *instr; + + instr = instr_create_alu_dest(ctx, nir_op_fmov, + &(nir_dest) {.ssa = undef->def,.is_ssa = true}); + instr->src[0] = ir2_src(0, 0, IR2_SRC_CONST); +} + +static void +emit_instr(struct ir2_context *ctx, nir_instr * instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + emit_alu(ctx, nir_instr_as_alu(instr)); + break; + case nir_instr_type_deref: + /* ignored, handled as part of the intrinsic they are src to */ + break; + case nir_instr_type_intrinsic: + emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_load_const: + /* dealt with when using nir_src */ + break; + case nir_instr_type_tex: + emit_tex(ctx, nir_instr_as_tex(instr)); + break; + case nir_instr_type_jump: + ctx->block_has_jump[ctx->block_idx] = true; + break; + case nir_instr_type_ssa_undef: + emit_undef(ctx, nir_instr_as_ssa_undef(instr)); + break; + default: + break; + } +} + +/* fragcoord.zw and a20x hw binning outputs */ +static void +extra_position_exports(struct ir2_context *ctx, bool binning) +{ + struct ir2_instr *instr, *rcp, *sc, *wincoord, *off; + + if (ctx->f->fragcoord < 0 && !binning) + return; + + instr = instr_create_alu(ctx, nir_op_fmax, 1); + instr->src[0] = ctx->position; + instr->src[0].swizzle = IR2_SWIZZLE_W; + instr->src[1] = ir2_zero(ctx); + + rcp = instr_create_alu(ctx, nir_op_frcp, 1); + rcp->src[0] = ir2_src(instr->idx, 0, IR2_SRC_SSA); + + sc = instr_create_alu(ctx, nir_op_fmul, 4); + sc->src[0] = ctx->position; + sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA); + + wincoord = instr_create_alu(ctx, nir_op_ffma, 4); + wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST); + wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA); + wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST); + + /* fragcoord z/w */ + if (ctx->f->fragcoord >= 0 && !binning) { + instr = instr_create_alu(ctx, nir_op_fmov, 1); + instr->src[0] = ir2_src(wincoord->idx, IR2_SWIZZLE_Z, IR2_SRC_SSA); + instr->alu.export = ctx->f->fragcoord; + + instr = instr_create_alu(ctx, nir_op_fmov, 1); + instr->src[0] = ctx->position; + instr->src[0].swizzle = IR2_SWIZZLE_W; + instr->alu.export = ctx->f->fragcoord; + instr->alu.write_mask = 2; + } + + if (!binning) + return; + + off = instr_create_alu(ctx, nir_op_fadd, 1); + off->src[0] = ir2_src(64, 0, IR2_SRC_CONST); + off->src[1] = ir2_src(2, 0, IR2_SRC_INPUT); + + /* 8 max set in freedreno_screen.. unneeded instrs patched out */ + for (int i = 0; i < 8; i++) { + instr = instr_create_alu(ctx, nir_op_ffma, 4); + instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST); + instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA); + instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST); + instr->alu.export = 32; + + instr = instr_create_alu(ctx, nir_op_ffma, 4); + instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST); + instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA); + instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST); + instr->alu.export = 33; + } +} + +static bool emit_cf_list(struct ir2_context *ctx, struct exec_list *list); + +static bool +emit_block(struct ir2_context *ctx, nir_block * block) +{ + struct ir2_instr *instr; + nir_block *succs = block->successors[0]; + + ctx->block_idx = block->index; + + nir_foreach_instr(instr, block) + emit_instr(ctx, instr); + + if (!succs || !succs->index) + return false; + + /* we want to be smart and always jump and have the backend cleanup + * but we are not, so there are two cases where jump is needed: + * loops (succs index lower) + * jumps (jump instruction seen in block) + */ + if (succs->index > block->index && !ctx->block_has_jump[block->index]) + return false; + + assert(block->successors[1] == NULL); + + instr = ir2_instr_create(ctx, IR2_CF); + instr->cf.block_idx = succs->index; + /* XXX can't jump to a block with different predicate */ + return true; +} + +static void +emit_if(struct ir2_context *ctx, nir_if * nif) +{ + unsigned pred = ctx->pred, pred_idx = ctx->pred_idx; + struct ir2_instr *instr; + + /* XXX: blob seems to always use same register for condition */ + + instr = ir2_instr_create(ctx, IR2_ALU); + instr->src[0] = make_src(ctx, nif->condition); + instr->src_count = 1; + instr->ssa.ncomp = 1; + instr->alu.vector_opc = VECTOR_NONE; + instr->alu.scalar_opc = SCALAR_NONE; + instr->alu.export = -1; + instr->alu.write_mask = 1; + instr->pred = 0; + + /* if nested, use PRED_SETNE_PUSHv */ + if (pred) { + instr->alu.vector_opc = PRED_SETNE_PUSHv; + instr->src[1] = instr->src[0]; + instr->src[0] = ir2_src(pred_idx, 0, IR2_SRC_SSA); + instr->src[0].swizzle = IR2_SWIZZLE_XXXX; + instr->src[1].swizzle = IR2_SWIZZLE_XXXX; + instr->src_count = 2; + } else { + instr->alu.scalar_opc = PRED_SETNEs; + } + + ctx->pred_idx = instr->idx; + ctx->pred = 3; + + emit_cf_list(ctx, &nif->then_list); + + /* TODO: if these is no else branch we don't need this + * and if the else branch is simple, can just flip ctx->pred instead + */ + instr = ir2_instr_create(ctx, IR2_ALU); + instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA); + instr->src_count = 1; + instr->ssa.ncomp = 1; + instr->alu.vector_opc = VECTOR_NONE; + instr->alu.scalar_opc = PRED_SET_INVs; + instr->alu.export = -1; + instr->alu.write_mask = 1; + instr->pred = 0; + ctx->pred_idx = instr->idx; + + emit_cf_list(ctx, &nif->else_list); + + /* restore predicate for nested predicates */ + if (pred) { + instr = ir2_instr_create(ctx, IR2_ALU); + instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA); + instr->src_count = 1; + instr->ssa.ncomp = 1; + instr->alu.vector_opc = VECTOR_NONE; + instr->alu.scalar_opc = PRED_SET_POPs; + instr->alu.export = -1; + instr->alu.write_mask = 1; + instr->pred = 0; + ctx->pred_idx = instr->idx; + } + + /* restore ctx->pred */ + ctx->pred = pred; +} + +/* get the highest block idx in the loop, so we know when + * we can free registers that are allocated outside the loop + */ +static unsigned +loop_last_block(struct exec_list *list) +{ + nir_cf_node *node = + exec_node_data(nir_cf_node, exec_list_get_tail(list), node); + switch (node->type) { + case nir_cf_node_block: + return nir_cf_node_as_block(node)->index; + case nir_cf_node_if: + assert(0); /* XXX could this ever happen? */ + return 0; + case nir_cf_node_loop: + return loop_last_block(&nir_cf_node_as_loop(node)->body); + default: + compile_error(ctx, "Not supported\n"); + return 0; + } +} + +static void +emit_loop(struct ir2_context *ctx, nir_loop *nloop) +{ + ctx->loop_last_block[++ctx->loop_depth] = loop_last_block(&nloop->body); + emit_cf_list(ctx, &nloop->body); + ctx->loop_depth--; +} + +static bool +emit_cf_list(struct ir2_context *ctx, struct exec_list *list) +{ + bool ret = false; + foreach_list_typed(nir_cf_node, node, node, list) { + ret = false; + switch (node->type) { + case nir_cf_node_block: + ret = emit_block(ctx, nir_cf_node_as_block(node)); + break; + case nir_cf_node_if: + emit_if(ctx, nir_cf_node_as_if(node)); + break; + case nir_cf_node_loop: + emit_loop(ctx, nir_cf_node_as_loop(node)); + break; + case nir_cf_node_function: + compile_error(ctx, "Not supported\n"); + break; + } + } + return ret; +} + +static void cleanup_binning(struct ir2_context *ctx) +{ + assert(ctx->so->type == MESA_SHADER_VERTEX); + + /* kill non-position outputs for binning variant */ + nir_foreach_block(block, nir_shader_get_entrypoint(ctx->nir)) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + unsigned slot; + switch (intr->intrinsic) { + case nir_intrinsic_store_deref: { + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + assert(deref->deref_type == nir_deref_type_var); + slot = deref->var->data.location; + } break; + case nir_intrinsic_store_output: + slot = output_slot(ctx, intr); + break; + default: + continue; + } + + if (slot != VARYING_SLOT_POS) + nir_instr_remove(instr); + } + } + + ir2_optimize_nir(ctx->nir, false); +} + +void +ir2_nir_compile(struct ir2_context *ctx, bool binning) +{ + struct fd2_shader_stateobj *so = ctx->so; + + memset(ctx->ssa_map, 0xff, sizeof(ctx->ssa_map)); + + ctx->nir = nir_shader_clone(NULL, so->nir); + + if (binning) + cleanup_binning(ctx); + + /* postprocess */ + OPT_V(ctx->nir, nir_opt_algebraic_late); + + OPT_V(ctx->nir, nir_lower_to_source_mods, nir_lower_all_source_mods); + OPT_V(ctx->nir, nir_copy_prop); + OPT_V(ctx->nir, nir_opt_dce); + OPT_V(ctx->nir, nir_opt_move_comparisons); + + OPT_V(ctx->nir, nir_lower_bool_to_float); + + /* lower to scalar instructions that can only be scalar on a2xx */ + OPT_V(ctx->nir, ir2_nir_lower_scalar); + + OPT_V(ctx->nir, nir_lower_locals_to_regs); + + OPT_V(ctx->nir, nir_convert_from_ssa, true); + + OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest); + OPT_V(ctx->nir, nir_lower_vec_to_movs); + + OPT_V(ctx->nir, nir_opt_dce); + + nir_sweep(ctx->nir); + + if (fd_mesa_debug & FD_DBG_DISASM) { + debug_printf("----------------------\n"); + nir_print_shader(ctx->nir, stdout); + debug_printf("----------------------\n"); + } + + /* fd2_shader_stateobj init */ + if (so->type == MESA_SHADER_FRAGMENT) { + ctx->f->fragcoord = -1; + ctx->f->inputs_count = 0; + memset(ctx->f->inputs, 0, sizeof(ctx->f->inputs)); + } + + /* Setup inputs: */ + nir_foreach_variable(in, &ctx->nir->inputs) + setup_input(ctx, in); + + if (so->type == MESA_SHADER_FRAGMENT) { + unsigned idx; + for (idx = 0; idx < ctx->f->inputs_count; idx++) { + ctx->input[idx].ncomp = ctx->f->inputs[idx].ncomp; + update_range(ctx, &ctx->input[idx]); + } + /* assume we have param input and kill it later if not */ + ctx->input[idx].ncomp = 4; + update_range(ctx, &ctx->input[idx]); + } else { + ctx->input[0].ncomp = 1; + ctx->input[2].ncomp = 1; + update_range(ctx, &ctx->input[0]); + update_range(ctx, &ctx->input[2]); + } + + /* And emit the body: */ + nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->nir); + + nir_foreach_register(reg, &fxn->registers) { + ctx->reg[reg->index].ncomp = reg->num_components; + ctx->reg_count = MAX2(ctx->reg_count, reg->index + 1); + } + + nir_metadata_require(fxn, nir_metadata_block_index); + emit_cf_list(ctx, &fxn->body); + /* TODO emit_block(ctx, fxn->end_block); */ + + if (so->type == MESA_SHADER_VERTEX) + extra_position_exports(ctx, binning); + + ralloc_free(ctx->nir); + + /* kill unused param input */ + if (so->type == MESA_SHADER_FRAGMENT && !so->need_param) + ctx->input[ctx->f->inputs_count].initialized = false; +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_nir_lower_scalar.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_nir_lower_scalar.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_nir_lower_scalar.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_nir_lower_scalar.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +/* some operations can only be scalar on a2xx: + * rsq, rcp, log2, exp2, cos, sin, sqrt + * mostly copy-pasted from nir_lower_alu_to_scalar.c + */ + +#include "ir2_private.h" +#include "compiler/nir/nir_builder.h" + +static void +nir_alu_ssa_dest_init(nir_alu_instr * instr, unsigned num_components, + unsigned bit_size) +{ + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, + bit_size, NULL); + instr->dest.write_mask = (1 << num_components) - 1; +} + +static void +lower_reduction(nir_alu_instr * instr, nir_op chan_op, nir_op merge_op, + nir_builder * builder) +{ + unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; + + nir_ssa_def *last = NULL; + for (unsigned i = 0; i < num_components; i++) { + nir_alu_instr *chan = + nir_alu_instr_create(builder->shader, chan_op); + nir_alu_ssa_dest_init(chan, 1, instr->dest.dest.ssa.bit_size); + nir_alu_src_copy(&chan->src[0], &instr->src[0], chan); + chan->src[0].swizzle[0] = chan->src[0].swizzle[i]; + if (nir_op_infos[chan_op].num_inputs > 1) { + assert(nir_op_infos[chan_op].num_inputs == 2); + nir_alu_src_copy(&chan->src[1], &instr->src[1], chan); + chan->src[1].swizzle[0] = chan->src[1].swizzle[i]; + } + chan->exact = instr->exact; + + nir_builder_instr_insert(builder, &chan->instr); + + if (i == 0) { + last = &chan->dest.dest.ssa; + } else { + last = nir_build_alu(builder, merge_op, + last, &chan->dest.dest.ssa, NULL, NULL); + } + } + + assert(instr->dest.write_mask == 1); + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last)); + nir_instr_remove(&instr->instr); +} + +static bool lower_scalar(nir_alu_instr * instr, nir_builder * b) +{ + assert(instr->dest.dest.is_ssa); + assert(instr->dest.write_mask != 0); + + b->cursor = nir_before_instr(&instr->instr); + b->exact = instr->exact; + +#define LOWER_REDUCTION(name, chan, merge) \ + case name##2: \ + case name##3: \ + case name##4: \ + lower_reduction(instr, chan, merge, b); \ + return true; + + switch (instr->op) { + /* TODO: handle these instead of lowering */ + LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand); + LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for); + + default: + return false; + case nir_op_frsq: + case nir_op_frcp: + case nir_op_flog2: + case nir_op_fexp2: + case nir_op_fcos: + case nir_op_fsin: + case nir_op_fsqrt: + break; + } + + assert(nir_op_infos[instr->op].num_inputs == 1); + + unsigned num_components = instr->dest.dest.ssa.num_components; + nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL }; + unsigned chan; + + if (num_components == 1) + return false; + + for (chan = 0; chan < num_components; chan++) { + assert(instr->dest.write_mask & (1 << chan)); + + nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op); + + nir_alu_src_copy(&lower->src[0], &instr->src[0], lower); + lower->src[0].swizzle[0] = instr->src[0].swizzle[chan]; + + nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size); + lower->dest.saturate = instr->dest.saturate; + comps[chan] = &lower->dest.dest.ssa; + lower->exact = instr->exact; + + nir_builder_instr_insert(b, &lower->instr); + } + + nir_ssa_def *vec = nir_vec(b, comps, num_components); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); + + nir_instr_remove(&instr->instr); + return true; +} + +static bool lower_scalar_impl(nir_function_impl * impl) +{ + nir_builder builder; + nir_builder_init(&builder, impl); + bool progress = false; + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_alu) + progress = lower_scalar(nir_instr_as_alu(instr), &builder) + || progress; + } + } + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return progress; +} + +bool ir2_nir_lower_scalar(nir_shader * shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) + progress = lower_scalar_impl(function->impl) || progress; + } + + return progress; +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_private.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_private.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_private.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_private.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,396 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include +#include +#include +#include +#include + +#include "ir2.h" +#include "fd2_program.h" +#include "instr-a2xx.h" + +enum ir2_src_type { + IR2_SRC_SSA, + IR2_SRC_REG, + IR2_SRC_INPUT, + IR2_SRC_CONST, +}; + +struct ir2_src { + /* num can mean different things + * ssa: index of instruction + * reg: index in ctx->reg array + * input: index in ctx->input array + * const: constant index (C0, C1, etc) + */ + uint16_t num; + uint8_t swizzle; + enum ir2_src_type type : 2; + uint8_t abs : 1; + uint8_t negate : 1; + uint8_t : 4; +}; + +struct ir2_reg_component { + uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */ + bool alloc : 1; /* is it currently allocated */ + uint8_t ref_count; /* for ra */ +}; + +struct ir2_reg { + uint8_t idx; /* assigned hardware register */ + uint8_t ncomp; + + uint8_t loop_depth; + bool initialized; + /* block_idx to free on (-1 = free on ref_count==0) */ + int block_idx_free; + struct ir2_reg_component comp[4]; +}; + +struct ir2_instr { + unsigned idx; + + unsigned block_idx; + + enum { + IR2_NONE, + IR2_FETCH, + IR2_ALU, + IR2_CF, + } type : 2; + + /* instruction needs to be emitted (for scheduling) */ + bool need_emit : 1; + + /* predicate value - (usually) same for entire block */ + uint8_t pred : 2; + + /* src */ + uint8_t src_count; + struct ir2_src src[4]; + + /* dst */ + bool is_ssa; + union { + struct ir2_reg ssa; + struct ir2_reg *reg; + }; + + /* type-specific */ + union { + struct { + instr_fetch_opc_t opc : 5; + union { + struct { + uint8_t const_idx; + uint8_t const_idx_sel; + } vtx; + struct { + bool is_cube : 1; + bool is_rect : 1; + uint8_t samp_id; + } tex; + }; + } fetch; + struct { + /* store possible opcs, then we can choose vector/scalar instr */ + instr_scalar_opc_t scalar_opc : 6; + instr_vector_opc_t vector_opc : 5; + /* same as nir */ + uint8_t write_mask : 4; + bool saturate : 1; + + /* export idx (-1 no export) */ + int8_t export; + + /* for scalarized 2 src instruction */ + uint8_t src1_swizzle; + } alu; + struct { + /* jmp dst block_idx */ + uint8_t block_idx; + } cf; + }; +}; + +struct ir2_sched_instr { + uint32_t reg_state[8]; + struct ir2_instr *instr, *instr_s; +}; + +struct ir2_context { + struct fd2_shader_stateobj *so; + + unsigned block_idx, pred_idx; + uint8_t pred; + bool block_has_jump[64]; + + unsigned loop_last_block[64]; + unsigned loop_depth; + + nir_shader *nir; + + /* ssa index of position output */ + struct ir2_src position; + + /* to translate SSA ids to instruction ids */ + int16_t ssa_map[1024]; + + struct ir2_shader_info *info; + struct ir2_frag_linkage *f; + + int prev_export; + + /* RA state */ + struct ir2_reg* live_regs[64]; + uint32_t reg_state[256/32]; /* 64*4 bits */ + + /* inputs */ + struct ir2_reg input[16 + 1]; /* 16 + param */ + + /* non-ssa regs */ + struct ir2_reg reg[64]; + unsigned reg_count; + + struct ir2_instr instr[0x300]; + unsigned instr_count; + + struct ir2_sched_instr instr_sched[0x180]; + unsigned instr_sched_count; +}; + +void assemble(struct ir2_context *ctx, bool binning); + +void ir2_nir_compile(struct ir2_context *ctx, bool binning); +bool ir2_nir_lower_scalar(nir_shader * shader); + +void ra_count_refs(struct ir2_context *ctx); +void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, + bool export, uint8_t export_writemask); +void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr); +void ra_block_free(struct ir2_context *ctx, unsigned block); + +void cp_src(struct ir2_context *ctx); +void cp_export(struct ir2_context *ctx); + +/* utils */ +enum { + IR2_SWIZZLE_Y = 1 << 0, + IR2_SWIZZLE_Z = 2 << 0, + IR2_SWIZZLE_W = 3 << 0, + + IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2, + + IR2_SWIZZLE_XYW = 0 << 0 | 0 << 2 | 1 << 4, + + IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6, + IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6, + IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6, + IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6, + IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6, + IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6, + IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6, + IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6, +}; + +#define compile_error(ctx, args...) ({ \ + printf(args); \ + assert(0); \ +}) + +static inline struct ir2_src +ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type) +{ + return (struct ir2_src) { + .num = num, + .swizzle = swizzle, + .type = type + }; +} + +/* ir2_assemble uses it .. */ +struct ir2_src ir2_zero(struct ir2_context *ctx); + +#define ir2_foreach_instr(it, ctx) \ + for (struct ir2_instr *it = (ctx)->instr; ({ \ + while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \ + it != &(ctx)->instr[(ctx)->instr_count]; }); it++) + +#define ir2_foreach_live_reg(it, ctx) \ + for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \ + while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \ + __ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++) + +#define ir2_foreach_avail(it) \ + for (struct ir2_instr **__instrp = avail, *it; \ + it = *__instrp, __instrp != &avail[avail_count]; __instrp++) + +#define ir2_foreach_src(it, instr) \ + for (struct ir2_src *it = instr->src; \ + it != &instr->src[instr->src_count]; it++) + +/* mask for register allocation + * 64 registers with 4 components each = 256 bits + */ +/* typedef struct { + uint64_t data[4]; +} regmask_t; */ + +static inline bool mask_isset(uint32_t * mask, unsigned num) +{ + return ! !(mask[num / 32] & 1 << num % 32); +} + +static inline void mask_set(uint32_t * mask, unsigned num) +{ + mask[num / 32] |= 1 << num % 32; +} + +static inline void mask_unset(uint32_t * mask, unsigned num) +{ + mask[num / 32] &= ~(1 << num % 32); +} + +static inline unsigned mask_reg(uint32_t * mask, unsigned num) +{ + return mask[num / 8] >> num % 8 * 4 & 0xf; +} + +static inline bool is_export(struct ir2_instr *instr) +{ + return instr->type == IR2_ALU && instr->alu.export >= 0; +} + +static inline instr_alloc_type_t export_buf(unsigned num) +{ + return num < 32 ? SQ_PARAMETER_PIXEL : + num >= 62 ? SQ_POSITION : SQ_MEMORY; +} + +/* component c for channel i */ +static inline unsigned swiz_set(unsigned c, unsigned i) +{ + return ((c - i) & 3) << i * 2; +} + +/* get swizzle in channel i */ +static inline unsigned swiz_get(unsigned swiz, unsigned i) +{ + return ((swiz >> i * 2) + i) & 3; +} + +static inline unsigned swiz_merge(unsigned swiz0, unsigned swiz1) +{ + unsigned swiz = 0; + for (int i = 0; i < 4; i++) + swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i); + return swiz; +} + +static inline void swiz_merge_p(uint8_t *swiz0, unsigned swiz1) +{ + unsigned swiz = 0; + for (int i = 0; i < 4; i++) + swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i); + *swiz0 = swiz; +} + +static inline struct ir2_reg * get_reg(struct ir2_instr *instr) +{ + return instr->is_ssa ? &instr->ssa : instr->reg; +} + +static inline struct ir2_reg * +get_reg_src(struct ir2_context *ctx, struct ir2_src *src) +{ + switch (src->type) { + case IR2_SRC_INPUT: + return &ctx->input[src->num]; + case IR2_SRC_SSA: + return &ctx->instr[src->num].ssa; + case IR2_SRC_REG: + return &ctx->reg[src->num]; + default: + return NULL; + } +} + +/* gets a ncomp value for the dst */ +static inline unsigned dst_ncomp(struct ir2_instr *instr) +{ + if (instr->is_ssa) + return instr->ssa.ncomp; + + if (instr->type == IR2_FETCH) + return instr->reg->ncomp; + + assert(instr->type == IR2_ALU); + + unsigned ncomp = 0; + for (int i = 0; i < instr->reg->ncomp; i++) + ncomp += !!(instr->alu.write_mask & 1 << i); + return ncomp; +} + +/* gets a ncomp value for the src registers */ +static inline unsigned src_ncomp(struct ir2_instr *instr) +{ + if (instr->type == IR2_FETCH) { + switch (instr->fetch.opc) { + case VTX_FETCH: + return 1; + case TEX_FETCH: + return instr->fetch.tex.is_cube ? 3 : 2; + case TEX_SET_TEX_LOD: + return 1; + default: + assert(0); + } + } + + switch (instr->alu.scalar_opc) { + case PRED_SETEs ... KILLONEs: + return 1; + default: + break; + } + + switch (instr->alu.vector_opc) { + case DOT2ADDv: + return 2; + case DOT3v: + return 3; + case DOT4v: + case CUBEv: + case PRED_SETE_PUSHv: + return 4; + default: + return dst_ncomp(instr); + } +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_ra.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_ra.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir2_ra.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir2_ra.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2018 Jonathan Marek + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jonathan Marek + */ + +#include "ir2_private.h" + +/* if an instruction has side effects, we should never kill it */ +static bool has_side_effects(struct ir2_instr *instr) +{ + if (instr->type == IR2_CF) + return true; + else if (instr->type == IR2_FETCH) + return false; + + switch (instr->alu.scalar_opc) { + case PRED_SETEs ... KILLONEs: + return true; + default: + break; + } + + switch (instr->alu.vector_opc) { + case PRED_SETE_PUSHv ... KILLNEv: + return true; + default: + break; + } + + return instr->alu.export >= 0; +} + +/* mark an instruction as required, and all its sources recursively */ +static void set_need_emit(struct ir2_context *ctx, struct ir2_instr *instr) +{ + struct ir2_reg *reg; + + /* don't repeat work already done */ + if (instr->need_emit) + return; + + instr->need_emit = true; + + ir2_foreach_src(src, instr) { + switch (src->type) { + case IR2_SRC_SSA: + set_need_emit(ctx, &ctx->instr[src->num]); + break; + case IR2_SRC_REG: + /* slow .. */ + reg = get_reg_src(ctx, src); + ir2_foreach_instr(instr, ctx) { + if (!instr->is_ssa && instr->reg == reg) + set_need_emit(ctx, instr); + } + default: + break; + } + } +} + +/* get current bit mask of allocated components for a register */ +static unsigned reg_mask(struct ir2_context *ctx, unsigned idx) +{ + return ctx->reg_state[idx/8] >> idx%8*4 & 0xf; +} + +static void reg_setmask(struct ir2_context *ctx, unsigned idx, unsigned c) +{ + idx = idx * 4 + c; + ctx->reg_state[idx/32] |= 1 << idx%32; +} + +static void reg_freemask(struct ir2_context *ctx, unsigned idx, unsigned c) +{ + idx = idx * 4 + c; + ctx->reg_state[idx/32] &= ~(1 << idx%32); +} + +void ra_count_refs(struct ir2_context *ctx) +{ + struct ir2_reg *reg; + + /* mark instructions as needed + * need to do this because "substitutions" pass makes many movs not needed + */ + ir2_foreach_instr(instr, ctx) { + if (has_side_effects(instr)) + set_need_emit(ctx, instr); + } + + /* compute ref_counts */ + ir2_foreach_instr(instr, ctx) { + /* kill non-needed so they can be skipped */ + if (!instr->need_emit) { + instr->type = IR2_NONE; + continue; + } + + ir2_foreach_src(src, instr) { + if (src->type == IR2_SRC_CONST) + continue; + + reg = get_reg_src(ctx, src); + for (int i = 0; i < src_ncomp(instr); i++) + reg->comp[swiz_get(src->swizzle, i)].ref_count++; + } + } +} + +void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx, + bool export, uint8_t export_writemask) +{ + /* for export, don't allocate anything but set component layout */ + if (export) { + for (int i = 0; i < 4; i++) + reg->comp[i].c = i; + return; + } + + unsigned idx = force_idx; + + /* TODO: allocate into the same register if theres room + * note: the blob doesn't do it, so verify that it is indeed better + * also, doing it would conflict with scalar mov insertion + */ + + /* check if already allocated */ + for (int i = 0; i < reg->ncomp; i++) { + if (reg->comp[i].alloc) + return; + } + + if (force_idx < 0) { + for (idx = 0; idx < 64; idx++) { + if (reg_mask(ctx, idx) == 0) + break; + } + } + assert(idx != 64); /* TODO ran out of register space.. */ + + /* update max_reg value */ + ctx->info->max_reg = MAX2(ctx->info->max_reg, (int) idx); + + unsigned mask = reg_mask(ctx, idx); + + for (int i = 0; i < reg->ncomp; i++) { + /* don't allocate never used values */ + if (reg->comp[i].ref_count == 0) { + reg->comp[i].c = 7; + continue; + } + + /* TODO */ + unsigned c = 1 ? i : (ffs(~mask) - 1); + mask |= 1 << c; + reg->comp[i].c = c; + reg_setmask(ctx, idx, c); + reg->comp[i].alloc = true; + } + + reg->idx = idx; + ctx->live_regs[reg->idx] = reg; +} + +/* reduce srcs ref_count and free if needed */ +void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr) +{ + struct ir2_reg *reg; + struct ir2_reg_component *comp; + + ir2_foreach_src(src, instr) { + if (src->type == IR2_SRC_CONST) + continue; + + reg = get_reg_src(ctx, src); + /* XXX use before write case */ + + for (int i = 0; i < src_ncomp(instr); i++) { + comp = ®->comp[swiz_get(src->swizzle, i)]; + if (!--comp->ref_count && reg->block_idx_free < 0) { + reg_freemask(ctx, reg->idx, comp->c); + comp->alloc = false; + } + } + } +} + +/* free any regs left for a block */ +void ra_block_free(struct ir2_context *ctx, unsigned block) +{ + ir2_foreach_live_reg(reg, ctx) { + if (reg->block_idx_free != block) + continue; + + for (int i = 0; i < reg->ncomp; i++) { + if (!reg->comp[i].alloc) /* XXX should never be true? */ + continue; + + reg_freemask(ctx, reg->idx, reg->comp[i].c); + reg->comp[i].alloc = false; + } + ctx->live_regs[reg->idx] = NULL; + } +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,809 +0,0 @@ -/* - * Copyright (c) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ir-a2xx.h" - -#include -#include -#include -#include - -#include "freedreno_util.h" -#include "instr-a2xx.h" - -#define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0) -#define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__) -#define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__) - -static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, - uint32_t idx, struct ir2_shader_info *info); - -static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n); -static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg); -static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg); -static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg); - -/* simple allocator to carve allocations out of an up-front allocated heap, - * so that we can free everything easily in one shot. - */ -static void * ir2_alloc(struct ir2_shader *shader, int sz) -{ - void *ptr = &shader->heap[shader->heap_idx]; - shader->heap_idx += align(sz, 4) / 4; - return ptr; -} - -static char * ir2_strdup(struct ir2_shader *shader, const char *str) -{ - char *ptr = NULL; - if (str) { - int len = strlen(str); - ptr = ir2_alloc(shader, len+1); - memcpy(ptr, str, len); - ptr[len] = '\0'; - } - return ptr; -} - -struct ir2_shader * ir2_shader_create(void) -{ - DEBUG_MSG(""); - struct ir2_shader *shader = calloc(1, sizeof(struct ir2_shader)); - shader->max_reg = -1; - return shader; -} - -void ir2_shader_destroy(struct ir2_shader *shader) -{ - DEBUG_MSG(""); - free(shader); -} - -/* check if an instruction is a simple MOV - */ -static struct ir2_instruction * simple_mov(struct ir2_instruction *instr, - bool output) -{ - struct ir2_src_register *src_reg = instr->src_reg; - struct ir2_dst_register *dst_reg = &instr->dst_reg; - struct ir2_register *reg; - unsigned i; - - /* MAXv used for MOV */ - if (instr->instr_type != IR2_ALU_VECTOR || - instr->alu_vector.opc != MAXv) - return NULL; - - /* non identical srcs */ - if (src_reg[0].num != src_reg[1].num) - return NULL; - - /* flags */ - int flags = IR2_REG_NEGATE | IR2_REG_ABS; - if (output) - flags |= IR2_REG_INPUT | IR2_REG_CONST; - if ((src_reg[0].flags & flags) || (src_reg[1].flags & flags)) - return NULL; - - /* clamping */ - if (instr->alu_vector.clamp) - return NULL; - - /* swizzling */ - for (i = 0; i < 4; i++) { - char swiz = (dst_reg->swizzle ? dst_reg->swizzle : "xyzw")[i]; - if (swiz == '_') - continue; - - if (swiz != (src_reg[0].swizzle ? src_reg[0].swizzle : "xyzw")[i] || - swiz != (src_reg[1].swizzle ? src_reg[1].swizzle : "xyzw")[i]) - return NULL; - } - - if (output) - reg = &instr->shader->reg[src_reg[0].num]; - else - reg = &instr->shader->reg[dst_reg->num]; - - assert(reg->write_idx >= 0); - if (reg->write_idx != reg->write_idx2) - return NULL; - - if (!output) - return instr; - - instr = instr->shader->instr[reg->write_idx]; - return instr->instr_type != IR2_ALU_VECTOR ? NULL : instr; -} - -static int src_to_reg(struct ir2_instruction *instr, - struct ir2_src_register *reg) -{ - if (reg->flags & IR2_REG_CONST) - return reg->num; - - return instr->shader->reg[reg->num].reg; -} - -static int dst_to_reg(struct ir2_instruction *instr, - struct ir2_dst_register *reg) -{ - if (reg->flags & IR2_REG_EXPORT) - return reg->num; - - return instr->shader->reg[reg->num].reg; -} - -static bool mask_get(uint32_t *mask, unsigned index) -{ - return !!(mask[index / 32] & 1 << index % 32); -} - -static void mask_set(uint32_t *mask, struct ir2_register *reg, int index) -{ - if (reg) { - unsigned i; - for (i = 0; i < ARRAY_SIZE(reg->regmask); i++) - mask[i] |= reg->regmask[i]; - } - if (index >= 0) - mask[index / 32] |= 1 << index % 32; -} - -static bool sets_pred(struct ir2_instruction *instr) -{ - return instr->instr_type == IR2_ALU_SCALAR && - instr->alu_scalar.opc >= PRED_SETEs && - instr->alu_scalar.opc <= PRED_SET_RESTOREs; -} - - - -void* ir2_shader_assemble(struct ir2_shader *shader, - struct ir2_shader_info *info) -{ - /* NOTES - * blob compiler seems to always puts PRED_* instrs in a CF by - * themselves, and wont combine EQ/NE in the same CF - * (not doing this - doesn't seem to make a difference) - * - * TODO: implement scheduling for combining vector+scalar instructions - * -some vector instructions can be replaced by scalar - */ - - /* first step: - * 1. remove "NOP" MOV instructions generated by TGSI for input/output: - * 2. track information for register allocation, and to remove - * the dead code when some exports are not needed - * 3. add additional instructions for a20x hw binning if needed - * NOTE: modifies the shader instrs - * this step could be done as instructions are added by compiler instead - */ - - /* mask of exports that must be generated - * used to avoid calculating ps exports with hw binning - */ - uint64_t export = ~0ull; - /* bitmask of variables required for exports defined by "export" */ - uint32_t export_mask[REG_MASK/32+1] = {}; - - unsigned idx, reg_idx; - unsigned max_input = 0; - int export_size = -1; - - for (idx = 0; idx < shader->instr_count; idx++) { - struct ir2_instruction *instr = shader->instr[idx], *prev; - struct ir2_dst_register dst_reg = instr->dst_reg; - - if (dst_reg.flags & IR2_REG_EXPORT) { - if (dst_reg.num < 32) - export_size++; - - if ((prev = simple_mov(instr, true))) { - /* copy instruction but keep dst */ - *instr = *prev; - instr->dst_reg = dst_reg; - } - } - - for (reg_idx = 0; reg_idx < instr->src_reg_count; reg_idx++) { - struct ir2_src_register *src_reg = &instr->src_reg[reg_idx]; - struct ir2_register *reg; - int num; - - if (src_reg->flags & IR2_REG_CONST) - continue; - - num = src_reg->num; - reg = &shader->reg[num]; - reg->read_idx = idx; - - if (src_reg->flags & IR2_REG_INPUT) { - max_input = MAX2(max_input, num); - } else { - /* bypass simple mov used to set src_reg */ - assert(reg->write_idx >= 0); - prev = shader->instr[reg->write_idx]; - if (simple_mov(prev, false)) { - *src_reg = prev->src_reg[0]; - /* process same src_reg again */ - reg_idx -= 1; - continue; - } - } - - /* update dependencies */ - uint32_t *mask = (dst_reg.flags & IR2_REG_EXPORT) ? - export_mask : shader->reg[dst_reg.num].regmask; - mask_set(mask, reg, num); - if (sets_pred(instr)) - mask_set(export_mask, reg, num); - } - } - - /* second step: - * emit instructions (with CFs) + RA - */ - instr_cf_t cfs[128], *cf = cfs; - uint32_t alufetch[3*256], *af = alufetch; - - /* RA is done on write, so inputs must be allocated here */ - for (reg_idx = 0; reg_idx <= max_input; reg_idx++) - shader->reg[reg_idx].reg = reg_idx; - info->max_reg = max_input; - - /* CF instr state */ - instr_cf_exec_t exec = { .opc = EXEC }; - instr_cf_alloc_t alloc = { .opc = ALLOC }; - bool need_alloc = 0; - bool pos_export = 0; - - export_size = MAX2(export_size, 0); - - for (idx = 0; idx < shader->instr_count; idx++) { - struct ir2_instruction *instr = shader->instr[idx]; - struct ir2_dst_register *dst_reg = &instr->dst_reg; - unsigned num = dst_reg->num; - struct ir2_register *reg; - - /* a2xx only has 64 registers, so we can use a single 64-bit mask */ - uint64_t regmask = 0ull; - - /* compute the current regmask */ - for (reg_idx = 0; (int) reg_idx <= shader->max_reg; reg_idx++) { - reg = &shader->reg[reg_idx]; - if ((int) idx > reg->write_idx && idx < reg->read_idx) - regmask |= (1ull << reg->reg); - } - - if (dst_reg->flags & IR2_REG_EXPORT) { - /* skip if export is not needed */ - if (!(export & (1ull << num))) - continue; - - /* ALLOC CF: - * want to alloc all < 32 at once - * 32/33 and 62/63 come in pairs - * XXX assuming all 3 types are never interleaved - */ - if (num < 32) { - alloc.size = export_size; - alloc.buffer_select = SQ_PARAMETER_PIXEL; - need_alloc = export_size >= 0; - export_size = -1; - } else if (num == 32 || num == 33) { - alloc.size = 0; - alloc.buffer_select = SQ_MEMORY; - need_alloc = num != 33; - } else { - alloc.size = 0; - alloc.buffer_select = SQ_POSITION; - need_alloc = !pos_export; - pos_export = true; - } - - } else { - /* skip if dst register not needed to compute exports */ - if (!mask_get(export_mask, num)) - continue; - - /* RA on first write */ - reg = &shader->reg[num]; - if (reg->write_idx == idx) { - reg->reg = ffsll(~regmask) - 1; - info->max_reg = MAX2(info->max_reg, reg->reg); - } - } - - if (exec.count == 6 || (exec.count && need_alloc)) { - *cf++ = *(instr_cf_t*) &exec; - exec.address += exec.count; - exec.serialize = 0; - exec.count = 0; - } - - if (need_alloc) { - *cf++ = *(instr_cf_t*) &alloc; - need_alloc = false; - } - - int ret = instr_emit(instr, af, idx, info); af += 3; - assert(!ret); - - if (instr->instr_type == IR2_FETCH) - exec.serialize |= 0x1 << exec.count * 2; - if (instr->sync) - exec.serialize |= 0x2 << exec.count * 2; - exec.count += 1; - } - - - exec.opc = !export_size ? EXEC : EXEC_END; - *cf++ = *(instr_cf_t*) &exec; - exec.address += exec.count; - exec.serialize = 0; - exec.count = 0; - - /* GPU will hang without at least one pixel alloc */ - if (!export_size) { - alloc.size = 0; - alloc.buffer_select = SQ_PARAMETER_PIXEL; - *cf++ = *(instr_cf_t*) &alloc; - - exec.opc = EXEC_END; - *cf++ = *(instr_cf_t*) &exec; - } - - unsigned num_cfs = cf - cfs; - - /* insert nop to get an even # of CFs */ - if (num_cfs % 2) { - *cf++ = (instr_cf_t) { .opc = NOP }; - num_cfs++; - } - - /* offset cf addrs */ - for (idx = 0; idx < num_cfs; idx++) { - switch (cfs[idx].opc) { - case EXEC: - case EXEC_END: - cfs[idx].exec.address += num_cfs / 2; - break; - default: - break; - /* XXX and any other address using cf that gets implemented */ - } - } - - /* concatenate cfs+alufetchs */ - uint32_t cfdwords = num_cfs / 2 * 3; - uint32_t alufetchdwords = exec.address * 3; - info->sizedwords = cfdwords + alufetchdwords; - uint32_t *dwords = malloc(info->sizedwords * 4); - assert(dwords); - memcpy(dwords, cfs, cfdwords * 4); - memcpy(&dwords[cfdwords], alufetch, alufetchdwords * 4); - return dwords; -} - -struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader, - int instr_type) -{ - struct ir2_instruction *instr = - ir2_alloc(shader, sizeof(struct ir2_instruction)); - DEBUG_MSG("%d", instr_type); - instr->shader = shader; - instr->idx = shader->instr_count; - instr->pred = shader->pred; - instr->instr_type = instr_type; - shader->instr[shader->instr_count++] = instr; - return instr; -} - - -/* - * FETCH instructions: - */ - -static int instr_emit_fetch(struct ir2_instruction *instr, - uint32_t *dwords, uint32_t idx, - struct ir2_shader_info *info) -{ - instr_fetch_t *fetch = (instr_fetch_t *)dwords; - struct ir2_dst_register *dst_reg = &instr->dst_reg; - struct ir2_src_register *src_reg = &instr->src_reg[0]; - - memset(fetch, 0, sizeof(*fetch)); - - fetch->opc = instr->fetch.opc; - - if (instr->fetch.opc == VTX_FETCH) { - instr_fetch_vtx_t *vtx = &fetch->vtx; - - assert(instr->fetch.stride <= 0xff); - assert(instr->fetch.fmt <= 0x3f); - assert(instr->fetch.const_idx <= 0x1f); - assert(instr->fetch.const_idx_sel <= 0x3); - - vtx->src_reg = src_to_reg(instr, src_reg); - vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1); - vtx->dst_reg = dst_to_reg(instr, dst_reg); - vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg); - vtx->must_be_one = 1; - vtx->const_index = instr->fetch.const_idx; - vtx->const_index_sel = instr->fetch.const_idx_sel; - vtx->format_comp_all = !!instr->fetch.is_signed; - vtx->num_format_all = !instr->fetch.is_normalized; - vtx->format = instr->fetch.fmt; - vtx->stride = instr->fetch.stride; - vtx->offset = instr->fetch.offset; - - if (instr->pred != IR2_PRED_NONE) { - vtx->pred_select = 1; - vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; - } - - /* XXX seems like every FETCH but the first has - * this bit set: - */ - vtx->reserved3 = (idx > 0) ? 0x1 : 0x0; - vtx->reserved0 = (idx > 0) ? 0x2 : 0x3; - } else if (instr->fetch.opc == TEX_FETCH) { - instr_fetch_tex_t *tex = &fetch->tex; - - assert(instr->fetch.const_idx <= 0x1f); - - tex->src_reg = src_to_reg(instr, src_reg); - tex->src_swiz = reg_fetch_src_swiz(src_reg, 3); - tex->dst_reg = dst_to_reg(instr, dst_reg); - tex->dst_swiz = reg_fetch_dst_swiz(dst_reg); - tex->const_idx = instr->fetch.const_idx; - tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; - tex->min_filter = TEX_FILTER_USE_FETCH_CONST; - tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; - tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; - tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; - tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; - tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; - tex->use_comp_lod = 1; - tex->use_reg_lod = !instr->fetch.is_cube; - tex->sample_location = SAMPLE_CENTER; - tex->tx_coord_denorm = instr->fetch.is_rect; - - if (instr->pred != IR2_PRED_NONE) { - tex->pred_select = 1; - tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; - } - - } else { - ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc); - return -1; - } - - return 0; -} - -/* - * ALU instructions: - */ - -static int instr_emit_alu(struct ir2_instruction *instr_v, - struct ir2_instruction *instr_s, uint32_t *dwords, - struct ir2_shader_info *info) -{ - instr_alu_t *alu = (instr_alu_t *)dwords; - struct ir2_dst_register *vdst_reg, *sdst_reg; - struct ir2_src_register *src1_reg, *src2_reg, *src3_reg; - struct ir2_shader *shader = instr_v ? instr_v->shader : instr_s->shader; - enum ir2_pred pred = IR2_PRED_NONE; - - memset(alu, 0, sizeof(*alu)); - - vdst_reg = NULL; - sdst_reg = NULL; - src1_reg = NULL; - src2_reg = NULL; - src3_reg = NULL; - - if (instr_v) { - vdst_reg = &instr_v->dst_reg; - assert(instr_v->src_reg_count >= 2); - src1_reg = &instr_v->src_reg[0]; - src2_reg = &instr_v->src_reg[1]; - if (instr_v->src_reg_count > 2) - src3_reg = &instr_v->src_reg[2]; - pred = instr_v->pred; - } - - if (instr_s) { - sdst_reg = &instr_s->dst_reg; - assert(instr_s->src_reg_count == 1); - assert(!instr_v || vdst_reg->flags == sdst_reg->flags); - assert(!instr_v || pred == instr_s->pred); - if (src3_reg) { - assert(src3_reg->flags == instr_s->src_reg[0].flags); - assert(src3_reg->num == instr_s->src_reg[0].num); - assert(!strcmp(src3_reg->swizzle, instr_s->src_reg[0].swizzle)); - } - src3_reg = &instr_s->src_reg[0]; - pred = instr_s->pred; - } - - if (vdst_reg) { - assert((vdst_reg->flags & ~IR2_REG_EXPORT) == 0); - assert(!vdst_reg->swizzle || (strlen(vdst_reg->swizzle) == 4)); - alu->vector_opc = instr_v->alu_vector.opc; - alu->vector_write_mask = reg_alu_dst_swiz(vdst_reg); - alu->vector_dest = dst_to_reg(instr_v, vdst_reg); - } else { - alu->vector_opc = MAXv; - } - - if (sdst_reg) { - alu->scalar_opc = instr_s->alu_scalar.opc; - alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg); - alu->scalar_dest = dst_to_reg(instr_s, sdst_reg); - } else { - /* not sure if this is required, but adreno compiler seems - * to always set scalar opc to MAXs if it is not used: - */ - alu->scalar_opc = MAXs; - } - - alu->export_data = - !!((instr_v ? vdst_reg : sdst_reg)->flags & IR2_REG_EXPORT); - - /* export32 has this bit set.. it seems to do more than just set - * the base address of the constants used to zero - * TODO make this less of a hack - */ - if (alu->export_data && alu->vector_dest == 32) { - assert(!instr_s); - alu->relative_addr = 1; - } - - if (src1_reg) { - if (src1_reg->flags & IR2_REG_CONST) { - assert(!(src1_reg->flags & IR2_REG_ABS)); - alu->src1_reg_const = src1_reg->num; - } else { - alu->src1_reg = shader->reg[src1_reg->num].reg; - alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS); - } - alu->src1_swiz = reg_alu_src_swiz(src1_reg); - alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE); - alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST); - } else { - alu->src1_sel = 1; - } - - if (src2_reg) { - if (src2_reg->flags & IR2_REG_CONST) { - assert(!(src2_reg->flags & IR2_REG_ABS)); - alu->src2_reg_const = src2_reg->num; - } else { - alu->src2_reg = shader->reg[src2_reg->num].reg; - alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS); - } - alu->src2_swiz = reg_alu_src_swiz(src2_reg); - alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE); - alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST); - } else { - alu->src2_sel = 1; - } - - if (src3_reg) { - if (src3_reg->flags & IR2_REG_CONST) { - assert(!(src3_reg->flags & IR2_REG_ABS)); - alu->src3_reg_const = src3_reg->num; - } else { - alu->src3_reg = shader->reg[src3_reg->num].reg; - alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS); - } - alu->src3_swiz = reg_alu_src_swiz(src3_reg); - alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE); - alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST); - } else { - /* not sure if this is required, but adreno compiler seems - * to always set register bank for 3rd src if unused: - */ - alu->src3_sel = 1; - } - - alu->vector_clamp = instr_v ? instr_v->alu_vector.clamp : 0; - alu->scalar_clamp = instr_s ? instr_s->alu_scalar.clamp : 0; - - if (pred != IR2_PRED_NONE) - alu->pred_select = (pred == IR2_PRED_EQ) ? 3 : 2; - - return 0; -} - -static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, - uint32_t idx, struct ir2_shader_info *info) -{ - switch (instr->instr_type) { - case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info); - case IR2_ALU_VECTOR: return instr_emit_alu(instr, NULL, dwords, info); - case IR2_ALU_SCALAR: return instr_emit_alu(NULL, instr, dwords, info); - } - return -1; -} - -struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr, - int num, const char *swizzle, int flags) -{ - if (!(flags & IR2_REG_EXPORT)) { - struct ir2_register *reg = &instr->shader->reg[num]; - - unsigned i; - for (i = instr->shader->max_reg + 1; i <= num; i++) - instr->shader->reg[i].write_idx = -1; - instr->shader->max_reg = i - 1; - - if (reg->write_idx < 0) - reg->write_idx = instr->idx; - reg->write_idx2 = instr->idx; - } - - struct ir2_dst_register *reg = &instr->dst_reg; - reg->flags = flags; - reg->num = num; - reg->swizzle = ir2_strdup(instr->shader, swizzle); - return reg; -} - -struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr, - int num, const char *swizzle, int flags) -{ - assert(instr->src_reg_count + 1 <= ARRAY_SIZE(instr->src_reg)); - if (!(flags & IR2_REG_CONST)) { - struct ir2_register *reg = &instr->shader->reg[num]; - - reg->read_idx = instr->idx; - - unsigned i; - for (i = instr->shader->max_reg + 1; i <= num; i++) - instr->shader->reg[i].write_idx = -1; - instr->shader->max_reg = i - 1; - } - - struct ir2_src_register *reg = &instr->src_reg[instr->src_reg_count++]; - reg->flags = flags; - reg->num = num; - reg->swizzle = ir2_strdup(instr->shader, swizzle); - return reg; -} - -static uint32_t reg_fetch_src_swiz(struct ir2_src_register *reg, uint32_t n) -{ - uint32_t swiz = 0; - int i; - - assert((reg->flags & ~IR2_REG_INPUT) == 0); - assert(reg->swizzle); - - DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle); - - for (i = n-1; i >= 0; i--) { - swiz <<= 2; - switch (reg->swizzle[i]) { - default: - ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle); - case 'x': swiz |= 0x0; break; - case 'y': swiz |= 0x1; break; - case 'z': swiz |= 0x2; break; - case 'w': swiz |= 0x3; break; - } - } - - return swiz; -} - -static uint32_t reg_fetch_dst_swiz(struct ir2_dst_register *reg) -{ - uint32_t swiz = 0; - int i; - - assert(reg->flags == 0); - assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); - - DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle); - - if (reg->swizzle) { - for (i = 3; i >= 0; i--) { - swiz <<= 3; - switch (reg->swizzle[i]) { - default: - ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); - case 'x': swiz |= 0x0; break; - case 'y': swiz |= 0x1; break; - case 'z': swiz |= 0x2; break; - case 'w': swiz |= 0x3; break; - case '0': swiz |= 0x4; break; - case '1': swiz |= 0x5; break; - case '_': swiz |= 0x7; break; - } - } - } else { - swiz = 0x688; - } - - return swiz; -} - -/* actually, a write-mask */ -static uint32_t reg_alu_dst_swiz(struct ir2_dst_register *reg) -{ - uint32_t swiz = 0; - int i; - - assert((reg->flags & ~IR2_REG_EXPORT) == 0); - assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); - - DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle); - - if (reg->swizzle) { - for (i = 3; i >= 0; i--) { - swiz <<= 1; - if (reg->swizzle[i] == "xyzw"[i]) { - swiz |= 0x1; - } else if (reg->swizzle[i] != '_') { - ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); - break; - } - } - } else { - swiz = 0xf; - } - - return swiz; -} - -static uint32_t reg_alu_src_swiz(struct ir2_src_register *reg) -{ - uint32_t swiz = 0; - int i; - - assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); - - DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle); - - if (reg->swizzle) { - for (i = 3; i >= 0; i--) { - swiz <<= 2; - switch (reg->swizzle[i]) { - default: - ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle); - case 'x': swiz |= (0x0 - i) & 0x3; break; - case 'y': swiz |= (0x1 - i) & 0x3; break; - case 'z': swiz |= (0x2 - i) & 0x3; break; - case 'w': swiz |= (0x3 - i) & 0x3; break; - } - } - } else { - swiz = 0x0; - } - - return swiz; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IR2_H_ -#define IR2_H_ - -#include -#include - -#include "instr-a2xx.h" - -/* low level intermediate representation of an adreno a2xx shader program */ - -struct ir2_shader; - -#define REG_MASK 0xff - -struct ir2_shader_info { - uint16_t sizedwords; - int8_t max_reg; /* highest GPR # used by shader */ -}; - -struct ir2_register { - int16_t write_idx, write_idx2, read_idx, reg; - /* bitmask of variables on which this one depends - * XXX: use bitmask util? - */ - uint32_t regmask[REG_MASK/32+1]; -}; - -struct ir2_src_register { - enum { - IR2_REG_INPUT = 0x1, - IR2_REG_CONST = 0x2, - IR2_REG_NEGATE = 0x4, - IR2_REG_ABS = 0x8, - } flags; - int num; - char *swizzle; -}; - -struct ir2_dst_register { - enum { - IR2_REG_EXPORT = 0x1, - } flags; - int num; - char *swizzle; -}; - -enum ir2_pred { - IR2_PRED_NONE = 0, - IR2_PRED_EQ = 1, - IR2_PRED_NE = 2, -}; - -struct ir2_instruction { - struct ir2_shader *shader; - unsigned idx; - enum { - IR2_FETCH, - IR2_ALU_VECTOR, - IR2_ALU_SCALAR, - } instr_type; - enum ir2_pred pred; - int sync; - unsigned src_reg_count; - struct ir2_dst_register dst_reg; - struct ir2_src_register src_reg[3]; - union { - /* FETCH specific: */ - struct { - instr_fetch_opc_t opc; - unsigned const_idx; - /* texture fetch specific: */ - bool is_cube : 1; - bool is_rect : 1; - /* vertex fetch specific: */ - unsigned const_idx_sel; - enum a2xx_sq_surfaceformat fmt; - bool is_signed : 1; - bool is_normalized : 1; - uint32_t stride; - uint32_t offset; - } fetch; - /* ALU-Vector specific: */ - struct { - instr_vector_opc_t opc; - bool clamp; - } alu_vector; - /* ALU-Scalar specific: */ - struct { - instr_scalar_opc_t opc; - bool clamp; - } alu_scalar; - }; -}; - -struct ir2_shader { - unsigned instr_count; - int max_reg; - struct ir2_register reg[REG_MASK+1]; - - struct ir2_instruction *instr[0x200]; - uint32_t heap[100 * 4096]; - unsigned heap_idx; - - enum ir2_pred pred; /* pred inherited by newly created instrs */ -}; - -struct ir2_shader * ir2_shader_create(void); -void ir2_shader_destroy(struct ir2_shader *shader); -void * ir2_shader_assemble(struct ir2_shader *shader, - struct ir2_shader_info *info); - -struct ir2_instruction * ir2_instr_create(struct ir2_shader *shader, - int instr_type); - -struct ir2_dst_register * ir2_dst_create(struct ir2_instruction *instr, - int num, const char *swizzle, int flags); -struct ir2_src_register * ir2_reg_create(struct ir2_instruction *instr, - int num, const char *swizzle, int flags); - -/* some helper fxns: */ - -static inline struct ir2_instruction * -ir2_instr_create_alu_v(struct ir2_shader *shader, instr_vector_opc_t vop) -{ - struct ir2_instruction *instr = ir2_instr_create(shader, IR2_ALU_VECTOR); - if (!instr) - return instr; - instr->alu_vector.opc = vop; - return instr; -} - -static inline struct ir2_instruction * -ir2_instr_create_alu_s(struct ir2_shader *shader, instr_scalar_opc_t sop) -{ - struct ir2_instruction *instr = ir2_instr_create(shader, IR2_ALU_SCALAR); - if (!instr) - return instr; - instr->alu_scalar.opc = sop; - return instr; -} - -static inline struct ir2_instruction * -ir2_instr_create_vtx_fetch(struct ir2_shader *shader, int ci, int cis, - enum a2xx_sq_surfaceformat fmt, bool is_signed, int stride) -{ - struct ir2_instruction *instr = ir2_instr_create(shader, IR2_FETCH); - instr->fetch.opc = VTX_FETCH; - instr->fetch.const_idx = ci; - instr->fetch.const_idx_sel = cis; - instr->fetch.fmt = fmt; - instr->fetch.is_signed = is_signed; - instr->fetch.stride = stride; - return instr; -} -static inline struct ir2_instruction * -ir2_instr_create_tex_fetch(struct ir2_shader *shader, int ci) -{ - struct ir2_instruction *instr = ir2_instr_create(shader, IR2_FETCH); - instr->fetch.opc = TEX_FETCH; - instr->fetch.const_idx = ci; - return instr; -} - - -#endif /* IR2_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,3239 +0,0 @@ -#ifndef A3XX_XML -#define A3XX_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/freedreno/envytools/ -git clone https://github.com/freedreno/envytools.git - -The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 37936 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14201 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42864 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 140514 bytes, from 2018-10-08 21:57:35) -- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) -- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) - -Copyright (C) 2013-2018 by the following authors: -- Rob Clark (robclark) -- Ilia Mirkin (imirkin) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -enum a3xx_tile_mode { - LINEAR = 0, - TILE_32X32 = 2, -}; - -enum a3xx_state_block_id { - HLSQ_BLOCK_ID_TP_TEX = 2, - HLSQ_BLOCK_ID_TP_MIPMAP = 3, - HLSQ_BLOCK_ID_SP_VS = 4, - HLSQ_BLOCK_ID_SP_FS = 6, -}; - -enum a3xx_cache_opcode { - INVALIDATE = 1, -}; - -enum a3xx_vtx_fmt { - VFMT_32_FLOAT = 0, - VFMT_32_32_FLOAT = 1, - VFMT_32_32_32_FLOAT = 2, - VFMT_32_32_32_32_FLOAT = 3, - VFMT_16_FLOAT = 4, - VFMT_16_16_FLOAT = 5, - VFMT_16_16_16_FLOAT = 6, - VFMT_16_16_16_16_FLOAT = 7, - VFMT_32_FIXED = 8, - VFMT_32_32_FIXED = 9, - VFMT_32_32_32_FIXED = 10, - VFMT_32_32_32_32_FIXED = 11, - VFMT_16_SINT = 16, - VFMT_16_16_SINT = 17, - VFMT_16_16_16_SINT = 18, - VFMT_16_16_16_16_SINT = 19, - VFMT_16_UINT = 20, - VFMT_16_16_UINT = 21, - VFMT_16_16_16_UINT = 22, - VFMT_16_16_16_16_UINT = 23, - VFMT_16_SNORM = 24, - VFMT_16_16_SNORM = 25, - VFMT_16_16_16_SNORM = 26, - VFMT_16_16_16_16_SNORM = 27, - VFMT_16_UNORM = 28, - VFMT_16_16_UNORM = 29, - VFMT_16_16_16_UNORM = 30, - VFMT_16_16_16_16_UNORM = 31, - VFMT_32_UINT = 32, - VFMT_32_32_UINT = 33, - VFMT_32_32_32_UINT = 34, - VFMT_32_32_32_32_UINT = 35, - VFMT_32_SINT = 36, - VFMT_32_32_SINT = 37, - VFMT_32_32_32_SINT = 38, - VFMT_32_32_32_32_SINT = 39, - VFMT_8_UINT = 40, - VFMT_8_8_UINT = 41, - VFMT_8_8_8_UINT = 42, - VFMT_8_8_8_8_UINT = 43, - VFMT_8_UNORM = 44, - VFMT_8_8_UNORM = 45, - VFMT_8_8_8_UNORM = 46, - VFMT_8_8_8_8_UNORM = 47, - VFMT_8_SINT = 48, - VFMT_8_8_SINT = 49, - VFMT_8_8_8_SINT = 50, - VFMT_8_8_8_8_SINT = 51, - VFMT_8_SNORM = 52, - VFMT_8_8_SNORM = 53, - VFMT_8_8_8_SNORM = 54, - VFMT_8_8_8_8_SNORM = 55, - VFMT_10_10_10_2_UINT = 56, - VFMT_10_10_10_2_UNORM = 57, - VFMT_10_10_10_2_SINT = 58, - VFMT_10_10_10_2_SNORM = 59, - VFMT_2_10_10_10_UINT = 60, - VFMT_2_10_10_10_UNORM = 61, - VFMT_2_10_10_10_SINT = 62, - VFMT_2_10_10_10_SNORM = 63, -}; - -enum a3xx_tex_fmt { - TFMT_5_6_5_UNORM = 4, - TFMT_5_5_5_1_UNORM = 5, - TFMT_4_4_4_4_UNORM = 7, - TFMT_Z16_UNORM = 9, - TFMT_X8Z24_UNORM = 10, - TFMT_Z32_FLOAT = 11, - TFMT_UV_64X32 = 16, - TFMT_VU_64X32 = 17, - TFMT_Y_64X32 = 18, - TFMT_NV12_64X32 = 19, - TFMT_UV_LINEAR = 20, - TFMT_VU_LINEAR = 21, - TFMT_Y_LINEAR = 22, - TFMT_NV12_LINEAR = 23, - TFMT_I420_Y = 24, - TFMT_I420_U = 26, - TFMT_I420_V = 27, - TFMT_ATC_RGB = 32, - TFMT_ATC_RGBA_EXPLICIT = 33, - TFMT_ETC1 = 34, - TFMT_ATC_RGBA_INTERPOLATED = 35, - TFMT_DXT1 = 36, - TFMT_DXT3 = 37, - TFMT_DXT5 = 38, - TFMT_2_10_10_10_UNORM = 40, - TFMT_10_10_10_2_UNORM = 41, - TFMT_9_9_9_E5_FLOAT = 42, - TFMT_11_11_10_FLOAT = 43, - TFMT_A8_UNORM = 44, - TFMT_L8_UNORM = 45, - TFMT_L8_A8_UNORM = 47, - TFMT_8_UNORM = 48, - TFMT_8_8_UNORM = 49, - TFMT_8_8_8_UNORM = 50, - TFMT_8_8_8_8_UNORM = 51, - TFMT_8_SNORM = 52, - TFMT_8_8_SNORM = 53, - TFMT_8_8_8_SNORM = 54, - TFMT_8_8_8_8_SNORM = 55, - TFMT_8_UINT = 56, - TFMT_8_8_UINT = 57, - TFMT_8_8_8_UINT = 58, - TFMT_8_8_8_8_UINT = 59, - TFMT_8_SINT = 60, - TFMT_8_8_SINT = 61, - TFMT_8_8_8_SINT = 62, - TFMT_8_8_8_8_SINT = 63, - TFMT_16_FLOAT = 64, - TFMT_16_16_FLOAT = 65, - TFMT_16_16_16_16_FLOAT = 67, - TFMT_16_UINT = 68, - TFMT_16_16_UINT = 69, - TFMT_16_16_16_16_UINT = 71, - TFMT_16_SINT = 72, - TFMT_16_16_SINT = 73, - TFMT_16_16_16_16_SINT = 75, - TFMT_16_UNORM = 76, - TFMT_16_16_UNORM = 77, - TFMT_16_16_16_16_UNORM = 79, - TFMT_16_SNORM = 80, - TFMT_16_16_SNORM = 81, - TFMT_16_16_16_16_SNORM = 83, - TFMT_32_FLOAT = 84, - TFMT_32_32_FLOAT = 85, - TFMT_32_32_32_32_FLOAT = 87, - TFMT_32_UINT = 88, - TFMT_32_32_UINT = 89, - TFMT_32_32_32_32_UINT = 91, - TFMT_32_SINT = 92, - TFMT_32_32_SINT = 93, - TFMT_32_32_32_32_SINT = 95, - TFMT_2_10_10_10_UINT = 96, - TFMT_10_10_10_2_UINT = 97, - TFMT_ETC2_RG11_SNORM = 112, - TFMT_ETC2_RG11_UNORM = 113, - TFMT_ETC2_R11_SNORM = 114, - TFMT_ETC2_R11_UNORM = 115, - TFMT_ETC2_RGBA8 = 116, - TFMT_ETC2_RGB8A1 = 117, - TFMT_ETC2_RGB8 = 118, -}; - -enum a3xx_tex_fetchsize { - TFETCH_DISABLE = 0, - TFETCH_1_BYTE = 1, - TFETCH_2_BYTE = 2, - TFETCH_4_BYTE = 3, - TFETCH_8_BYTE = 4, - TFETCH_16_BYTE = 5, -}; - -enum a3xx_color_fmt { - RB_R5G6B5_UNORM = 0, - RB_R5G5B5A1_UNORM = 1, - RB_R4G4B4A4_UNORM = 3, - RB_R8G8B8_UNORM = 4, - RB_R8G8B8A8_UNORM = 8, - RB_R8G8B8A8_SNORM = 9, - RB_R8G8B8A8_UINT = 10, - RB_R8G8B8A8_SINT = 11, - RB_R8G8_UNORM = 12, - RB_R8G8_SNORM = 13, - RB_R8_UINT = 14, - RB_R8_SINT = 15, - RB_R10G10B10A2_UNORM = 16, - RB_A2R10G10B10_UNORM = 17, - RB_R10G10B10A2_UINT = 18, - RB_A2R10G10B10_UINT = 19, - RB_A8_UNORM = 20, - RB_R8_UNORM = 21, - RB_R16_FLOAT = 24, - RB_R16G16_FLOAT = 25, - RB_R16G16B16A16_FLOAT = 27, - RB_R11G11B10_FLOAT = 28, - RB_R16_SNORM = 32, - RB_R16G16_SNORM = 33, - RB_R16G16B16A16_SNORM = 35, - RB_R16_UNORM = 36, - RB_R16G16_UNORM = 37, - RB_R16G16B16A16_UNORM = 39, - RB_R16_SINT = 40, - RB_R16G16_SINT = 41, - RB_R16G16B16A16_SINT = 43, - RB_R16_UINT = 44, - RB_R16G16_UINT = 45, - RB_R16G16B16A16_UINT = 47, - RB_R32_FLOAT = 48, - RB_R32G32_FLOAT = 49, - RB_R32G32B32A32_FLOAT = 51, - RB_R32_SINT = 52, - RB_R32G32_SINT = 53, - RB_R32G32B32A32_SINT = 55, - RB_R32_UINT = 56, - RB_R32G32_UINT = 57, - RB_R32G32B32A32_UINT = 59, -}; - -enum a3xx_cp_perfcounter_select { - CP_ALWAYS_COUNT = 0, - CP_AHB_PFPTRANS_WAIT = 3, - CP_AHB_NRTTRANS_WAIT = 6, - CP_CSF_NRT_READ_WAIT = 8, - CP_CSF_I1_FIFO_FULL = 9, - CP_CSF_I2_FIFO_FULL = 10, - CP_CSF_ST_FIFO_FULL = 11, - CP_RESERVED_12 = 12, - CP_CSF_RING_ROQ_FULL = 13, - CP_CSF_I1_ROQ_FULL = 14, - CP_CSF_I2_ROQ_FULL = 15, - CP_CSF_ST_ROQ_FULL = 16, - CP_RESERVED_17 = 17, - CP_MIU_TAG_MEM_FULL = 18, - CP_MIU_NRT_WRITE_STALLED = 22, - CP_MIU_NRT_READ_STALLED = 23, - CP_ME_REGS_RB_DONE_FIFO_FULL = 26, - CP_ME_REGS_VS_EVENT_FIFO_FULL = 27, - CP_ME_REGS_PS_EVENT_FIFO_FULL = 28, - CP_ME_REGS_CF_EVENT_FIFO_FULL = 29, - CP_ME_MICRO_RB_STARVED = 30, - CP_AHB_RBBM_DWORD_SENT = 40, - CP_ME_BUSY_CLOCKS = 41, - CP_ME_WAIT_CONTEXT_AVAIL = 42, - CP_PFP_TYPE0_PACKET = 43, - CP_PFP_TYPE3_PACKET = 44, - CP_CSF_RB_WPTR_NEQ_RPTR = 45, - CP_CSF_I1_SIZE_NEQ_ZERO = 46, - CP_CSF_I2_SIZE_NEQ_ZERO = 47, - CP_CSF_RBI1I2_FETCHING = 48, -}; - -enum a3xx_gras_tse_perfcounter_select { - GRAS_TSEPERF_INPUT_PRIM = 0, - GRAS_TSEPERF_INPUT_NULL_PRIM = 1, - GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2, - GRAS_TSEPERF_CLIPPED_PRIM = 3, - GRAS_TSEPERF_NEW_PRIM = 4, - GRAS_TSEPERF_ZERO_AREA_PRIM = 5, - GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6, - GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7, - GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8, - GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9, - GRAS_TSEPERF_PRE_CLIP_PRIM = 10, - GRAS_TSEPERF_POST_CLIP_PRIM = 11, - GRAS_TSEPERF_WORKING_CYCLES = 12, - GRAS_TSEPERF_PC_STARVE = 13, - GRAS_TSERASPERF_STALL = 14, -}; - -enum a3xx_gras_ras_perfcounter_select { - GRAS_RASPERF_16X16_TILES = 0, - GRAS_RASPERF_8X8_TILES = 1, - GRAS_RASPERF_4X4_TILES = 2, - GRAS_RASPERF_WORKING_CYCLES = 3, - GRAS_RASPERF_STALL_CYCLES_BY_RB = 4, - GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5, - GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6, -}; - -enum a3xx_hlsq_perfcounter_select { - HLSQ_PERF_SP_VS_CONSTANT = 0, - HLSQ_PERF_SP_VS_INSTRUCTIONS = 1, - HLSQ_PERF_SP_FS_CONSTANT = 2, - HLSQ_PERF_SP_FS_INSTRUCTIONS = 3, - HLSQ_PERF_TP_STATE = 4, - HLSQ_PERF_QUADS = 5, - HLSQ_PERF_PIXELS = 6, - HLSQ_PERF_VERTICES = 7, - HLSQ_PERF_FS8_THREADS = 8, - HLSQ_PERF_FS16_THREADS = 9, - HLSQ_PERF_FS32_THREADS = 10, - HLSQ_PERF_VS8_THREADS = 11, - HLSQ_PERF_VS16_THREADS = 12, - HLSQ_PERF_SP_VS_DATA_BYTES = 13, - HLSQ_PERF_SP_FS_DATA_BYTES = 14, - HLSQ_PERF_ACTIVE_CYCLES = 15, - HLSQ_PERF_STALL_CYCLES_SP_STATE = 16, - HLSQ_PERF_STALL_CYCLES_SP_VS = 17, - HLSQ_PERF_STALL_CYCLES_SP_FS = 18, - HLSQ_PERF_STALL_CYCLES_UCHE = 19, - HLSQ_PERF_RBBM_LOAD_CYCLES = 20, - HLSQ_PERF_DI_TO_VS_START_SP0 = 21, - HLSQ_PERF_DI_TO_FS_START_SP0 = 22, - HLSQ_PERF_VS_START_TO_DONE_SP0 = 23, - HLSQ_PERF_FS_START_TO_DONE_SP0 = 24, - HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25, - HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26, - HLSQ_PERF_UCHE_LATENCY_CYCLES = 27, - HLSQ_PERF_UCHE_LATENCY_COUNT = 28, -}; - -enum a3xx_pc_perfcounter_select { - PC_PCPERF_VISIBILITY_STREAMS = 0, - PC_PCPERF_TOTAL_INSTANCES = 1, - PC_PCPERF_PRIMITIVES_PC_VPC = 2, - PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3, - PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4, - PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5, - PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6, - PC_PCPERF_VERTICES_TO_VFD = 7, - PC_PCPERF_REUSED_VERTICES = 8, - PC_PCPERF_CYCLES_STALLED_BY_VFD = 9, - PC_PCPERF_CYCLES_STALLED_BY_TSE = 10, - PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11, - PC_PCPERF_CYCLES_IS_WORKING = 12, -}; - -enum a3xx_rb_perfcounter_select { - RB_RBPERF_ACTIVE_CYCLES_ANY = 0, - RB_RBPERF_ACTIVE_CYCLES_ALL = 1, - RB_RBPERF_STARVE_CYCLES_BY_SP = 2, - RB_RBPERF_STARVE_CYCLES_BY_RAS = 3, - RB_RBPERF_STARVE_CYCLES_BY_MARB = 4, - RB_RBPERF_STALL_CYCLES_BY_MARB = 5, - RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6, - RB_RBPERF_RB_MARB_DATA = 7, - RB_RBPERF_SP_RB_QUAD = 8, - RB_RBPERF_RAS_EARLY_Z_QUADS = 9, - RB_RBPERF_GMEM_CH0_READ = 10, - RB_RBPERF_GMEM_CH1_READ = 11, - RB_RBPERF_GMEM_CH0_WRITE = 12, - RB_RBPERF_GMEM_CH1_WRITE = 13, - RB_RBPERF_CP_CONTEXT_DONE = 14, - RB_RBPERF_CP_CACHE_FLUSH = 15, - RB_RBPERF_CP_ZPASS_DONE = 16, -}; - -enum a3xx_rbbm_perfcounter_select { - RBBM_ALAWYS_ON = 0, - RBBM_VBIF_BUSY = 1, - RBBM_TSE_BUSY = 2, - RBBM_RAS_BUSY = 3, - RBBM_PC_DCALL_BUSY = 4, - RBBM_PC_VSD_BUSY = 5, - RBBM_VFD_BUSY = 6, - RBBM_VPC_BUSY = 7, - RBBM_UCHE_BUSY = 8, - RBBM_VSC_BUSY = 9, - RBBM_HLSQ_BUSY = 10, - RBBM_ANY_RB_BUSY = 11, - RBBM_ANY_TEX_BUSY = 12, - RBBM_ANY_USP_BUSY = 13, - RBBM_ANY_MARB_BUSY = 14, - RBBM_ANY_ARB_BUSY = 15, - RBBM_AHB_STATUS_BUSY = 16, - RBBM_AHB_STATUS_STALLED = 17, - RBBM_AHB_STATUS_TXFR = 18, - RBBM_AHB_STATUS_TXFR_SPLIT = 19, - RBBM_AHB_STATUS_TXFR_ERROR = 20, - RBBM_AHB_STATUS_LONG_STALL = 21, - RBBM_RBBM_STATUS_MASKED = 22, -}; - -enum a3xx_sp_perfcounter_select { - SP_LM_LOAD_INSTRUCTIONS = 0, - SP_LM_STORE_INSTRUCTIONS = 1, - SP_LM_ATOMICS = 2, - SP_UCHE_LOAD_INSTRUCTIONS = 3, - SP_UCHE_STORE_INSTRUCTIONS = 4, - SP_UCHE_ATOMICS = 5, - SP_VS_TEX_INSTRUCTIONS = 6, - SP_VS_CFLOW_INSTRUCTIONS = 7, - SP_VS_EFU_INSTRUCTIONS = 8, - SP_VS_FULL_ALU_INSTRUCTIONS = 9, - SP_VS_HALF_ALU_INSTRUCTIONS = 10, - SP_FS_TEX_INSTRUCTIONS = 11, - SP_FS_CFLOW_INSTRUCTIONS = 12, - SP_FS_EFU_INSTRUCTIONS = 13, - SP_FS_FULL_ALU_INSTRUCTIONS = 14, - SP_FS_HALF_ALU_INSTRUCTIONS = 15, - SP_FS_BARY_INSTRUCTIONS = 16, - SP_VS_INSTRUCTIONS = 17, - SP_FS_INSTRUCTIONS = 18, - SP_ADDR_LOCK_COUNT = 19, - SP_UCHE_READ_TRANS = 20, - SP_UCHE_WRITE_TRANS = 21, - SP_EXPORT_VPC_TRANS = 22, - SP_EXPORT_RB_TRANS = 23, - SP_PIXELS_KILLED = 24, - SP_ICL1_REQUESTS = 25, - SP_ICL1_MISSES = 26, - SP_ICL0_REQUESTS = 27, - SP_ICL0_MISSES = 28, - SP_ALU_ACTIVE_CYCLES = 29, - SP_EFU_ACTIVE_CYCLES = 30, - SP_STALL_CYCLES_BY_VPC = 31, - SP_STALL_CYCLES_BY_TP = 32, - SP_STALL_CYCLES_BY_UCHE = 33, - SP_STALL_CYCLES_BY_RB = 34, - SP_ACTIVE_CYCLES_ANY = 35, - SP_ACTIVE_CYCLES_ALL = 36, -}; - -enum a3xx_tp_perfcounter_select { - TPL1_TPPERF_L1_REQUESTS = 0, - TPL1_TPPERF_TP0_L1_REQUESTS = 1, - TPL1_TPPERF_TP0_L1_MISSES = 2, - TPL1_TPPERF_TP1_L1_REQUESTS = 3, - TPL1_TPPERF_TP1_L1_MISSES = 4, - TPL1_TPPERF_TP2_L1_REQUESTS = 5, - TPL1_TPPERF_TP2_L1_MISSES = 6, - TPL1_TPPERF_TP3_L1_REQUESTS = 7, - TPL1_TPPERF_TP3_L1_MISSES = 8, - TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9, - TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10, - TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11, - TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12, - TPL1_TPPERF_BILINEAR_OPS = 13, - TPL1_TPPERF_QUADSQUADS_OFFSET = 14, - TPL1_TPPERF_QUADQUADS_SHADOW = 15, - TPL1_TPPERF_QUADS_ARRAY = 16, - TPL1_TPPERF_QUADS_PROJECTION = 17, - TPL1_TPPERF_QUADS_GRADIENT = 18, - TPL1_TPPERF_QUADS_1D2D = 19, - TPL1_TPPERF_QUADS_3DCUBE = 20, - TPL1_TPPERF_ZERO_LOD = 21, - TPL1_TPPERF_OUTPUT_TEXELS = 22, - TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23, - TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24, - TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25, - TPL1_TPPERF_LATENCY = 26, - TPL1_TPPERF_LATENCY_TRANS = 27, -}; - -enum a3xx_vfd_perfcounter_select { - VFD_PERF_UCHE_BYTE_FETCHED = 0, - VFD_PERF_UCHE_TRANS = 1, - VFD_PERF_VPC_BYPASS_COMPONENTS = 2, - VFD_PERF_FETCH_INSTRUCTIONS = 3, - VFD_PERF_DECODE_INSTRUCTIONS = 4, - VFD_PERF_ACTIVE_CYCLES = 5, - VFD_PERF_STALL_CYCLES_UCHE = 6, - VFD_PERF_STALL_CYCLES_HLSQ = 7, - VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8, - VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9, -}; - -enum a3xx_vpc_perfcounter_select { - VPC_PERF_SP_LM_PRIMITIVES = 0, - VPC_PERF_COMPONENTS_FROM_SP = 1, - VPC_PERF_SP_LM_COMPONENTS = 2, - VPC_PERF_ACTIVE_CYCLES = 3, - VPC_PERF_STALL_CYCLES_LM = 4, - VPC_PERF_STALL_CYCLES_RAS = 5, -}; - -enum a3xx_uche_perfcounter_select { - UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0, - UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1, - UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2, - UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3, - UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4, - UCHE_UCHEPERF_READ_REQUESTS_TP = 8, - UCHE_UCHEPERF_READ_REQUESTS_VFD = 9, - UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10, - UCHE_UCHEPERF_READ_REQUESTS_MARB = 11, - UCHE_UCHEPERF_READ_REQUESTS_SP = 12, - UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13, - UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14, - UCHE_UCHEPERF_TAG_CHECK_FAILS = 15, - UCHE_UCHEPERF_EVICTS = 16, - UCHE_UCHEPERF_FLUSHES = 17, - UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18, - UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19, - UCHE_UCHEPERF_ACTIVE_CYCLES = 20, -}; - -enum a3xx_intp_mode { - SMOOTH = 0, - FLAT = 1, - ZERO = 2, - ONE = 3, -}; - -enum a3xx_repl_mode { - S = 1, - T = 2, - ONE_T = 3, -}; - -enum a3xx_tex_filter { - A3XX_TEX_NEAREST = 0, - A3XX_TEX_LINEAR = 1, - A3XX_TEX_ANISO = 2, -}; - -enum a3xx_tex_clamp { - A3XX_TEX_REPEAT = 0, - A3XX_TEX_CLAMP_TO_EDGE = 1, - A3XX_TEX_MIRROR_REPEAT = 2, - A3XX_TEX_CLAMP_TO_BORDER = 3, - A3XX_TEX_MIRROR_CLAMP = 4, -}; - -enum a3xx_tex_aniso { - A3XX_TEX_ANISO_1 = 0, - A3XX_TEX_ANISO_2 = 1, - A3XX_TEX_ANISO_4 = 2, - A3XX_TEX_ANISO_8 = 3, - A3XX_TEX_ANISO_16 = 4, -}; - -enum a3xx_tex_swiz { - A3XX_TEX_X = 0, - A3XX_TEX_Y = 1, - A3XX_TEX_Z = 2, - A3XX_TEX_W = 3, - A3XX_TEX_ZERO = 4, - A3XX_TEX_ONE = 5, -}; - -enum a3xx_tex_type { - A3XX_TEX_1D = 0, - A3XX_TEX_2D = 1, - A3XX_TEX_CUBE = 2, - A3XX_TEX_3D = 3, -}; - -enum a3xx_tex_msaa { - A3XX_TPL1_MSAA1X = 0, - A3XX_TPL1_MSAA2X = 1, - A3XX_TPL1_MSAA4X = 2, - A3XX_TPL1_MSAA8X = 3, -}; - -#define A3XX_INT0_RBBM_GPU_IDLE 0x00000001 -#define A3XX_INT0_RBBM_AHB_ERROR 0x00000002 -#define A3XX_INT0_RBBM_REG_TIMEOUT 0x00000004 -#define A3XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 -#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 -#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00000020 -#define A3XX_INT0_VFD_ERROR 0x00000040 -#define A3XX_INT0_CP_SW_INT 0x00000080 -#define A3XX_INT0_CP_T0_PACKET_IN_IB 0x00000100 -#define A3XX_INT0_CP_OPCODE_ERROR 0x00000200 -#define A3XX_INT0_CP_RESERVED_BIT_ERROR 0x00000400 -#define A3XX_INT0_CP_HW_FAULT 0x00000800 -#define A3XX_INT0_CP_DMA 0x00001000 -#define A3XX_INT0_CP_IB2_INT 0x00002000 -#define A3XX_INT0_CP_IB1_INT 0x00004000 -#define A3XX_INT0_CP_RB_INT 0x00008000 -#define A3XX_INT0_CP_REG_PROTECT_FAULT 0x00010000 -#define A3XX_INT0_CP_RB_DONE_TS 0x00020000 -#define A3XX_INT0_CP_VS_DONE_TS 0x00040000 -#define A3XX_INT0_CP_PS_DONE_TS 0x00080000 -#define A3XX_INT0_CACHE_FLUSH_TS 0x00100000 -#define A3XX_INT0_CP_AHB_ERROR_HALT 0x00200000 -#define A3XX_INT0_MISC_HANG_DETECT 0x01000000 -#define A3XX_INT0_UCHE_OOB_ACCESS 0x02000000 -#define REG_A3XX_RBBM_HW_VERSION 0x00000000 - -#define REG_A3XX_RBBM_HW_RELEASE 0x00000001 - -#define REG_A3XX_RBBM_HW_CONFIGURATION 0x00000002 - -#define REG_A3XX_RBBM_CLOCK_CTL 0x00000010 - -#define REG_A3XX_RBBM_SP_HYST_CNT 0x00000012 - -#define REG_A3XX_RBBM_SW_RESET_CMD 0x00000018 - -#define REG_A3XX_RBBM_AHB_CTL0 0x00000020 - -#define REG_A3XX_RBBM_AHB_CTL1 0x00000021 - -#define REG_A3XX_RBBM_AHB_CMD 0x00000022 - -#define REG_A3XX_RBBM_AHB_ERROR_STATUS 0x00000027 - -#define REG_A3XX_RBBM_GPR0_CTL 0x0000002e - -#define REG_A3XX_RBBM_STATUS 0x00000030 -#define A3XX_RBBM_STATUS_HI_BUSY 0x00000001 -#define A3XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 -#define A3XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 -#define A3XX_RBBM_STATUS_CP_NRT_BUSY 0x00004000 -#define A3XX_RBBM_STATUS_VBIF_BUSY 0x00008000 -#define A3XX_RBBM_STATUS_TSE_BUSY 0x00010000 -#define A3XX_RBBM_STATUS_RAS_BUSY 0x00020000 -#define A3XX_RBBM_STATUS_RB_BUSY 0x00040000 -#define A3XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 -#define A3XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 -#define A3XX_RBBM_STATUS_VFD_BUSY 0x00200000 -#define A3XX_RBBM_STATUS_VPC_BUSY 0x00400000 -#define A3XX_RBBM_STATUS_UCHE_BUSY 0x00800000 -#define A3XX_RBBM_STATUS_SP_BUSY 0x01000000 -#define A3XX_RBBM_STATUS_TPL1_BUSY 0x02000000 -#define A3XX_RBBM_STATUS_MARB_BUSY 0x04000000 -#define A3XX_RBBM_STATUS_VSC_BUSY 0x08000000 -#define A3XX_RBBM_STATUS_ARB_BUSY 0x10000000 -#define A3XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 -#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC 0x40000000 -#define A3XX_RBBM_STATUS_GPU_BUSY 0x80000000 - -#define REG_A3XX_RBBM_NQWAIT_UNTIL 0x00000040 - -#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x00000033 - -#define REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x00000050 - -#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x00000051 - -#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x00000054 - -#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x00000057 - -#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x0000005a - -#define REG_A3XX_RBBM_INT_SET_CMD 0x00000060 - -#define REG_A3XX_RBBM_INT_CLEAR_CMD 0x00000061 - -#define REG_A3XX_RBBM_INT_0_MASK 0x00000063 - -#define REG_A3XX_RBBM_INT_0_STATUS 0x00000064 - -#define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080 -#define A3XX_RBBM_PERFCTR_CTL_ENABLE 0x00000001 - -#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0 0x00000081 - -#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1 0x00000082 - -#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000084 - -#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000085 - -#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT 0x00000086 - -#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT 0x00000087 - -#define REG_A3XX_RBBM_GPU_BUSY_MASKED 0x00000088 - -#define REG_A3XX_RBBM_PERFCTR_CP_0_LO 0x00000090 - -#define REG_A3XX_RBBM_PERFCTR_CP_0_HI 0x00000091 - -#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO 0x00000092 - -#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI 0x00000093 - -#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO 0x00000094 - -#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI 0x00000095 - -#define REG_A3XX_RBBM_PERFCTR_PC_0_LO 0x00000096 - -#define REG_A3XX_RBBM_PERFCTR_PC_0_HI 0x00000097 - -#define REG_A3XX_RBBM_PERFCTR_PC_1_LO 0x00000098 - -#define REG_A3XX_RBBM_PERFCTR_PC_1_HI 0x00000099 - -#define REG_A3XX_RBBM_PERFCTR_PC_2_LO 0x0000009a - -#define REG_A3XX_RBBM_PERFCTR_PC_2_HI 0x0000009b - -#define REG_A3XX_RBBM_PERFCTR_PC_3_LO 0x0000009c - -#define REG_A3XX_RBBM_PERFCTR_PC_3_HI 0x0000009d - -#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO 0x0000009e - -#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI 0x0000009f - -#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO 0x000000a0 - -#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI 0x000000a1 - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000a2 - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000a3 - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000a4 - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000a5 - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000a6 - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000a7 - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000a8 - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000a9 - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000aa - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000ab - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000ac - -#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000ad - -#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO 0x000000ae - -#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI 0x000000af - -#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO 0x000000b0 - -#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI 0x000000b1 - -#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO 0x000000b2 - -#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI 0x000000b3 - -#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO 0x000000b4 - -#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI 0x000000b5 - -#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO 0x000000b6 - -#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI 0x000000b7 - -#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO 0x000000b8 - -#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI 0x000000b9 - -#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO 0x000000ba - -#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI 0x000000bb - -#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO 0x000000bc - -#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI 0x000000bd - -#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO 0x000000be - -#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI 0x000000bf - -#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO 0x000000c0 - -#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI 0x000000c1 - -#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO 0x000000c2 - -#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI 0x000000c3 - -#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO 0x000000c4 - -#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI 0x000000c5 - -#define REG_A3XX_RBBM_PERFCTR_TP_0_LO 0x000000c6 - -#define REG_A3XX_RBBM_PERFCTR_TP_0_HI 0x000000c7 - -#define REG_A3XX_RBBM_PERFCTR_TP_1_LO 0x000000c8 - -#define REG_A3XX_RBBM_PERFCTR_TP_1_HI 0x000000c9 - -#define REG_A3XX_RBBM_PERFCTR_TP_2_LO 0x000000ca - -#define REG_A3XX_RBBM_PERFCTR_TP_2_HI 0x000000cb - -#define REG_A3XX_RBBM_PERFCTR_TP_3_LO 0x000000cc - -#define REG_A3XX_RBBM_PERFCTR_TP_3_HI 0x000000cd - -#define REG_A3XX_RBBM_PERFCTR_TP_4_LO 0x000000ce - -#define REG_A3XX_RBBM_PERFCTR_TP_4_HI 0x000000cf - -#define REG_A3XX_RBBM_PERFCTR_TP_5_LO 0x000000d0 - -#define REG_A3XX_RBBM_PERFCTR_TP_5_HI 0x000000d1 - -#define REG_A3XX_RBBM_PERFCTR_SP_0_LO 0x000000d2 - -#define REG_A3XX_RBBM_PERFCTR_SP_0_HI 0x000000d3 - -#define REG_A3XX_RBBM_PERFCTR_SP_1_LO 0x000000d4 - -#define REG_A3XX_RBBM_PERFCTR_SP_1_HI 0x000000d5 - -#define REG_A3XX_RBBM_PERFCTR_SP_2_LO 0x000000d6 - -#define REG_A3XX_RBBM_PERFCTR_SP_2_HI 0x000000d7 - -#define REG_A3XX_RBBM_PERFCTR_SP_3_LO 0x000000d8 - -#define REG_A3XX_RBBM_PERFCTR_SP_3_HI 0x000000d9 - -#define REG_A3XX_RBBM_PERFCTR_SP_4_LO 0x000000da - -#define REG_A3XX_RBBM_PERFCTR_SP_4_HI 0x000000db - -#define REG_A3XX_RBBM_PERFCTR_SP_5_LO 0x000000dc - -#define REG_A3XX_RBBM_PERFCTR_SP_5_HI 0x000000dd - -#define REG_A3XX_RBBM_PERFCTR_SP_6_LO 0x000000de - -#define REG_A3XX_RBBM_PERFCTR_SP_6_HI 0x000000df - -#define REG_A3XX_RBBM_PERFCTR_SP_7_LO 0x000000e0 - -#define REG_A3XX_RBBM_PERFCTR_SP_7_HI 0x000000e1 - -#define REG_A3XX_RBBM_PERFCTR_RB_0_LO 0x000000e2 - -#define REG_A3XX_RBBM_PERFCTR_RB_0_HI 0x000000e3 - -#define REG_A3XX_RBBM_PERFCTR_RB_1_LO 0x000000e4 - -#define REG_A3XX_RBBM_PERFCTR_RB_1_HI 0x000000e5 - -#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO 0x000000ea - -#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI 0x000000eb - -#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO 0x000000ec - -#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI 0x000000ed - -#define REG_A3XX_RBBM_RBBM_CTL 0x00000100 - -#define REG_A3XX_RBBM_DEBUG_BUS_CTL 0x00000111 - -#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x00000112 - -#define REG_A3XX_CP_PFP_UCODE_ADDR 0x000001c9 - -#define REG_A3XX_CP_PFP_UCODE_DATA 0x000001ca - -#define REG_A3XX_CP_ROQ_ADDR 0x000001cc - -#define REG_A3XX_CP_ROQ_DATA 0x000001cd - -#define REG_A3XX_CP_MERCIU_ADDR 0x000001d1 - -#define REG_A3XX_CP_MERCIU_DATA 0x000001d2 - -#define REG_A3XX_CP_MERCIU_DATA2 0x000001d3 - -#define REG_A3XX_CP_MEQ_ADDR 0x000001da - -#define REG_A3XX_CP_MEQ_DATA 0x000001db - -#define REG_A3XX_CP_WFI_PEND_CTR 0x000001f5 - -#define REG_A3XX_RBBM_PM_OVERRIDE2 0x0000039d - -#define REG_A3XX_CP_PERFCOUNTER_SELECT 0x00000445 - -#define REG_A3XX_CP_HW_FAULT 0x0000045c - -#define REG_A3XX_CP_PROTECT_CTRL 0x0000045e - -#define REG_A3XX_CP_PROTECT_STATUS 0x0000045f - -static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; } - -static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; } - -#define REG_A3XX_CP_AHB_FAULT 0x0000054d - -#define REG_A3XX_SQ_GPR_MANAGEMENT 0x00000d00 - -#define REG_A3XX_SQ_INST_STORE_MANAGMENT 0x00000d02 - -#define REG_A3XX_TP0_CHICKEN 0x00000e1e - -#define REG_A3XX_SP_GLOBAL_MEM_SIZE 0x00000e22 - -#define REG_A3XX_SP_GLOBAL_MEM_ADDR 0x00000e23 - -#define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040 -#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000 -#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 -#define A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 0x00020000 -#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 0x00080000 -#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 0x00100000 -#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 0x00200000 -#define A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 -#define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD 0x00800000 -#define A3XX_GRAS_CL_CLIP_CNTL_WCOORD 0x01000000 -#define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE 0x02000000 -#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK 0x1c000000 -#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT 26 -static inline uint32_t A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(uint32_t val) -{ - return ((val) << A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT) & A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK; -} - -#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ 0x00002044 -#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK 0x000003ff -#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT 0 -static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val) -{ - return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK; -} -#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK 0x000ffc00 -#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT 10 -static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val) -{ - return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK; -} - -#define REG_A3XX_GRAS_CL_VPORT_XOFFSET 0x00002048 -#define A3XX_GRAS_CL_VPORT_XOFFSET__MASK 0xffffffff -#define A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT 0 -static inline uint32_t A3XX_GRAS_CL_VPORT_XOFFSET(float val) -{ - return ((fui(val)) << A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_XOFFSET__MASK; -} - -#define REG_A3XX_GRAS_CL_VPORT_XSCALE 0x00002049 -#define A3XX_GRAS_CL_VPORT_XSCALE__MASK 0xffffffff -#define A3XX_GRAS_CL_VPORT_XSCALE__SHIFT 0 -static inline uint32_t A3XX_GRAS_CL_VPORT_XSCALE(float val) -{ - return ((fui(val)) << A3XX_GRAS_CL_VPORT_XSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_XSCALE__MASK; -} - -#define REG_A3XX_GRAS_CL_VPORT_YOFFSET 0x0000204a -#define A3XX_GRAS_CL_VPORT_YOFFSET__MASK 0xffffffff -#define A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT 0 -static inline uint32_t A3XX_GRAS_CL_VPORT_YOFFSET(float val) -{ - return ((fui(val)) << A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_YOFFSET__MASK; -} - -#define REG_A3XX_GRAS_CL_VPORT_YSCALE 0x0000204b -#define A3XX_GRAS_CL_VPORT_YSCALE__MASK 0xffffffff -#define A3XX_GRAS_CL_VPORT_YSCALE__SHIFT 0 -static inline uint32_t A3XX_GRAS_CL_VPORT_YSCALE(float val) -{ - return ((fui(val)) << A3XX_GRAS_CL_VPORT_YSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_YSCALE__MASK; -} - -#define REG_A3XX_GRAS_CL_VPORT_ZOFFSET 0x0000204c -#define A3XX_GRAS_CL_VPORT_ZOFFSET__MASK 0xffffffff -#define A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT 0 -static inline uint32_t A3XX_GRAS_CL_VPORT_ZOFFSET(float val) -{ - return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_ZOFFSET__MASK; -} - -#define REG_A3XX_GRAS_CL_VPORT_ZSCALE 0x0000204d -#define A3XX_GRAS_CL_VPORT_ZSCALE__MASK 0xffffffff -#define A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT 0 -static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val) -{ - return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_ZSCALE__MASK; -} - -#define REG_A3XX_GRAS_SU_POINT_MINMAX 0x00002068 -#define A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff -#define A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 -static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MIN(float val) -{ - return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK; -} -#define A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 -#define A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 -static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val) -{ - return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK; -} - -#define REG_A3XX_GRAS_SU_POINT_SIZE 0x00002069 -#define A3XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff -#define A3XX_GRAS_SU_POINT_SIZE__SHIFT 0 -static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val) -{ - return ((((int32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK; -} - -#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000206c -#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK 0x00ffffff -#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT 0 -static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val) -{ - return ((((int32_t)(val * 1048576.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK; -} - -#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000206d -#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff -#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 -static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) -{ - return ((((int32_t)(val * 64.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; -} - -#define REG_A3XX_GRAS_SU_MODE_CONTROL 0x00002070 -#define A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT 0x00000001 -#define A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK 0x00000002 -#define A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW 0x00000004 -#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK 0x000007f8 -#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT 3 -static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val) -{ - return ((((int32_t)(val * 4.0))) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK; -} -#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET 0x00000800 - -#define REG_A3XX_GRAS_SC_CONTROL 0x00002072 -#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK 0x000000f0 -#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT 4 -static inline uint32_t A3XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val) -{ - return ((val) << A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK; -} -#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK 0x00000f00 -#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT 8 -static inline uint32_t A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK; -} -#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK 0x0000f000 -#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT 12 -static inline uint32_t A3XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val) -{ - return ((val) << A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK; -} - -#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x00002074 -#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK 0x00007fff -#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK; -} -#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK; -} - -#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x00002075 -#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK 0x00007fff -#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK; -} -#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK; -} - -#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x00002079 -#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff -#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; -} -#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; -} - -#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000207a -#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff -#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; -} -#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; -} - -#define REG_A3XX_RB_MODE_CONTROL 0x000020c0 -#define A3XX_RB_MODE_CONTROL_GMEM_BYPASS 0x00000080 -#define A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK 0x00000700 -#define A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT 8 -static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode val) -{ - return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK; -} -#define A3XX_RB_MODE_CONTROL_MRT__MASK 0x00003000 -#define A3XX_RB_MODE_CONTROL_MRT__SHIFT 12 -static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val) -{ - return ((val) << A3XX_RB_MODE_CONTROL_MRT__SHIFT) & A3XX_RB_MODE_CONTROL_MRT__MASK; -} -#define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE 0x00008000 -#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE 0x00010000 - -#define REG_A3XX_RB_RENDER_CONTROL 0x000020c1 -#define A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE 0x00000001 -#define A3XX_RB_RENDER_CONTROL_YUV_IN_ENABLE 0x00000002 -#define A3XX_RB_RENDER_CONTROL_COV_VALUE_INPUT_ENABLE 0x00000004 -#define A3XX_RB_RENDER_CONTROL_FACENESS 0x00000008 -#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK 0x00000ff0 -#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT 4 -static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK; -} -#define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE 0x00001000 -#define A3XX_RB_RENDER_CONTROL_ENABLE_GMEM 0x00002000 -#define A3XX_RB_RENDER_CONTROL_XCOORD 0x00004000 -#define A3XX_RB_RENDER_CONTROL_YCOORD 0x00008000 -#define A3XX_RB_RENDER_CONTROL_ZCOORD 0x00010000 -#define A3XX_RB_RENDER_CONTROL_WCOORD 0x00020000 -#define A3XX_RB_RENDER_CONTROL_I_CLAMP_ENABLE 0x00080000 -#define A3XX_RB_RENDER_CONTROL_COV_VALUE_OUTPUT_ENABLE 0x00100000 -#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST 0x00400000 -#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK 0x07000000 -#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT 24 -static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) -{ - return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK; -} -#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_COVERAGE 0x40000000 -#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_ONE 0x80000000 - -#define REG_A3XX_RB_MSAA_CONTROL 0x000020c2 -#define A3XX_RB_MSAA_CONTROL_DISABLE 0x00000400 -#define A3XX_RB_MSAA_CONTROL_SAMPLES__MASK 0x0000f000 -#define A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT 12 -static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLES__MASK; -} -#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK 0xffff0000 -#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT 16 -static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val) -{ - return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK; -} - -#define REG_A3XX_RB_ALPHA_REF 0x000020c3 -#define A3XX_RB_ALPHA_REF_UINT__MASK 0x0000ff00 -#define A3XX_RB_ALPHA_REF_UINT__SHIFT 8 -static inline uint32_t A3XX_RB_ALPHA_REF_UINT(uint32_t val) -{ - return ((val) << A3XX_RB_ALPHA_REF_UINT__SHIFT) & A3XX_RB_ALPHA_REF_UINT__MASK; -} -#define A3XX_RB_ALPHA_REF_FLOAT__MASK 0xffff0000 -#define A3XX_RB_ALPHA_REF_FLOAT__SHIFT 16 -static inline uint32_t A3XX_RB_ALPHA_REF_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A3XX_RB_ALPHA_REF_FLOAT__SHIFT) & A3XX_RB_ALPHA_REF_FLOAT__MASK; -} - -static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; } - -static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; } -#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 -#define A3XX_RB_MRT_CONTROL_BLEND 0x00000010 -#define A3XX_RB_MRT_CONTROL_BLEND2 0x00000020 -#define A3XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00 -#define A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8 -static inline uint32_t A3XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) -{ - return ((val) << A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A3XX_RB_MRT_CONTROL_ROP_CODE__MASK; -} -#define A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK 0x00003000 -#define A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT 12 -static inline uint32_t A3XX_RB_MRT_CONTROL_DITHER_MODE(enum adreno_rb_dither_mode val) -{ - return ((val) << A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT) & A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK; -} -#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000 -#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24 -static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) -{ - return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; -} - -static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; } -#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f -#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val) -{ - return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; -} -#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x000000c0 -#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 6 -static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a3xx_tile_mode val) -{ - return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; -} -#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; -} -#define A3XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00004000 -#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0xfffe0000 -#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 17 -static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; -} - -static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; } -#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK 0xfffffff0 -#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT 4 -static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK; -} - -static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; } -#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f -#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 -static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; -} -#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 -#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 -static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; -} -#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 -#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 -static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; -} -#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 -#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 -static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; -} -#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 -#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 -static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; -} -#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 -#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 -static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; -} -#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE 0x20000000 - -#define REG_A3XX_RB_BLEND_RED 0x000020e4 -#define A3XX_RB_BLEND_RED_UINT__MASK 0x000000ff -#define A3XX_RB_BLEND_RED_UINT__SHIFT 0 -static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val) -{ - return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK; -} -#define A3XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 -#define A3XX_RB_BLEND_RED_FLOAT__SHIFT 16 -static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK; -} - -#define REG_A3XX_RB_BLEND_GREEN 0x000020e5 -#define A3XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff -#define A3XX_RB_BLEND_GREEN_UINT__SHIFT 0 -static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val) -{ - return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK; -} -#define A3XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 -#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 -static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK; -} - -#define REG_A3XX_RB_BLEND_BLUE 0x000020e6 -#define A3XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff -#define A3XX_RB_BLEND_BLUE_UINT__SHIFT 0 -static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val) -{ - return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK; -} -#define A3XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 -#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 -static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK; -} - -#define REG_A3XX_RB_BLEND_ALPHA 0x000020e7 -#define A3XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff -#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT 0 -static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val) -{ - return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK; -} -#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 -#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 -static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK; -} - -#define REG_A3XX_RB_CLEAR_COLOR_DW0 0x000020e8 - -#define REG_A3XX_RB_CLEAR_COLOR_DW1 0x000020e9 - -#define REG_A3XX_RB_CLEAR_COLOR_DW2 0x000020ea - -#define REG_A3XX_RB_CLEAR_COLOR_DW3 0x000020eb - -#define REG_A3XX_RB_COPY_CONTROL 0x000020ec -#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK 0x00000003 -#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT 0 -static inline uint32_t A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val) -{ - return ((val) << A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK; -} -#define A3XX_RB_COPY_CONTROL_DEPTHCLEAR 0x00000008 -#define A3XX_RB_COPY_CONTROL_MODE__MASK 0x00000070 -#define A3XX_RB_COPY_CONTROL_MODE__SHIFT 4 -static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val) -{ - return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK; -} -#define A3XX_RB_COPY_CONTROL_MSAA_SRGB_DOWNSAMPLE 0x00000080 -#define A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK 0x00000f00 -#define A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT 8 -static inline uint32_t A3XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val) -{ - return ((val) << A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK; -} -#define A3XX_RB_COPY_CONTROL_DEPTH32_RESOLVE 0x00001000 -#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xffffc000 -#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 14 -static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) -{ - assert(!(val & 0x3fff)); - return ((val >> 14) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK; -} - -#define REG_A3XX_RB_COPY_DEST_BASE 0x000020ed -#define A3XX_RB_COPY_DEST_BASE_BASE__MASK 0xfffffff0 -#define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT 4 -static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK; -} - -#define REG_A3XX_RB_COPY_DEST_PITCH 0x000020ee -#define A3XX_RB_COPY_DEST_PITCH_PITCH__MASK 0xffffffff -#define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT 0 -static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK; -} - -#define REG_A3XX_RB_COPY_DEST_INFO 0x000020ef -#define A3XX_RB_COPY_DEST_INFO_TILE__MASK 0x00000003 -#define A3XX_RB_COPY_DEST_INFO_TILE__SHIFT 0 -static inline uint32_t A3XX_RB_COPY_DEST_INFO_TILE(enum a3xx_tile_mode val) -{ - return ((val) << A3XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A3XX_RB_COPY_DEST_INFO_TILE__MASK; -} -#define A3XX_RB_COPY_DEST_INFO_FORMAT__MASK 0x000000fc -#define A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT 2 -static inline uint32_t A3XX_RB_COPY_DEST_INFO_FORMAT(enum a3xx_color_fmt val) -{ - return ((val) << A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A3XX_RB_COPY_DEST_INFO_FORMAT__MASK; -} -#define A3XX_RB_COPY_DEST_INFO_SWAP__MASK 0x00000300 -#define A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT 8 -static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK; -} -#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK 0x00000c00 -#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT 10 -static inline uint32_t A3XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) -{ - return ((val) << A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK; -} -#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK 0x0003c000 -#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT 14 -static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val) -{ - return ((val) << A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK; -} -#define A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK 0x001c0000 -#define A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT 18 -static inline uint32_t A3XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val) -{ - return ((val) << A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK; -} - -#define REG_A3XX_RB_DEPTH_CONTROL 0x00002100 -#define A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z 0x00000001 -#define A3XX_RB_DEPTH_CONTROL_Z_ENABLE 0x00000002 -#define A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE 0x00000004 -#define A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00000008 -#define A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK 0x00000070 -#define A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT 4 -static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val) -{ - return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK; -} -#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x00000080 -#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000 - -#define REG_A3XX_RB_DEPTH_CLEAR 0x00002101 - -#define REG_A3XX_RB_DEPTH_INFO 0x00002102 -#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK 0x00000003 -#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT 0 -static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val) -{ - return ((val) << A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK; -} -#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK 0xfffff800 -#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 11 -static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; -} - -#define REG_A3XX_RB_DEPTH_PITCH 0x00002103 -#define A3XX_RB_DEPTH_PITCH__MASK 0xffffffff -#define A3XX_RB_DEPTH_PITCH__SHIFT 0 -static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val) -{ - assert(!(val & 0x7)); - return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK; -} - -#define REG_A3XX_RB_STENCIL_CONTROL 0x00002104 -#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 -#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 -#define A3XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 -#define A3XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 -#define A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 -static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) -{ - return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC__MASK; -} -#define A3XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 -#define A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 -static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) -{ - return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL__MASK; -} -#define A3XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 -#define A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 -static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) -{ - return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS__MASK; -} -#define A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 -#define A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 -static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) -{ - return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK; -} -#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 -#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 -static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) -{ - return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; -} -#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 -#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 -static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; -} -#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 -#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 -static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) -{ - return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; -} -#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 -#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 -static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; -} - -#define REG_A3XX_RB_STENCIL_CLEAR 0x00002105 - -#define REG_A3XX_RB_STENCIL_INFO 0x00002106 -#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK 0xfffff800 -#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT 11 -static inline uint32_t A3XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK; -} - -#define REG_A3XX_RB_STENCIL_PITCH 0x00002107 -#define A3XX_RB_STENCIL_PITCH__MASK 0xffffffff -#define A3XX_RB_STENCIL_PITCH__SHIFT 0 -static inline uint32_t A3XX_RB_STENCIL_PITCH(uint32_t val) -{ - assert(!(val & 0x7)); - return ((val >> 3) << A3XX_RB_STENCIL_PITCH__SHIFT) & A3XX_RB_STENCIL_PITCH__MASK; -} - -#define REG_A3XX_RB_STENCILREFMASK 0x00002108 -#define A3XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff -#define A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 -static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) -{ - return ((val) << A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILREF__MASK; -} -#define A3XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 -#define A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 -static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) -{ - return ((val) << A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILMASK__MASK; -} -#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 -#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; -} - -#define REG_A3XX_RB_STENCILREFMASK_BF 0x00002109 -#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff -#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 -static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) -{ - return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; -} -#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 -#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 -static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) -{ - return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; -} -#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 -#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; -} - -#define REG_A3XX_RB_LRZ_VSC_CONTROL 0x0000210c -#define A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE 0x00000002 - -#define REG_A3XX_RB_WINDOW_OFFSET 0x0000210e -#define A3XX_RB_WINDOW_OFFSET_X__MASK 0x0000ffff -#define A3XX_RB_WINDOW_OFFSET_X__SHIFT 0 -static inline uint32_t A3XX_RB_WINDOW_OFFSET_X(uint32_t val) -{ - return ((val) << A3XX_RB_WINDOW_OFFSET_X__SHIFT) & A3XX_RB_WINDOW_OFFSET_X__MASK; -} -#define A3XX_RB_WINDOW_OFFSET_Y__MASK 0xffff0000 -#define A3XX_RB_WINDOW_OFFSET_Y__SHIFT 16 -static inline uint32_t A3XX_RB_WINDOW_OFFSET_Y(uint32_t val) -{ - return ((val) << A3XX_RB_WINDOW_OFFSET_Y__SHIFT) & A3XX_RB_WINDOW_OFFSET_Y__MASK; -} - -#define REG_A3XX_RB_SAMPLE_COUNT_CONTROL 0x00002110 -#define A3XX_RB_SAMPLE_COUNT_CONTROL_RESET 0x00000001 -#define A3XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 - -#define REG_A3XX_RB_SAMPLE_COUNT_ADDR 0x00002111 - -#define REG_A3XX_RB_Z_CLAMP_MIN 0x00002114 - -#define REG_A3XX_RB_Z_CLAMP_MAX 0x00002115 - -#define REG_A3XX_VGT_BIN_BASE 0x000021e1 - -#define REG_A3XX_VGT_BIN_SIZE 0x000021e2 - -#define REG_A3XX_PC_VSTREAM_CONTROL 0x000021e4 -#define A3XX_PC_VSTREAM_CONTROL_SIZE__MASK 0x003f0000 -#define A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT 16 -static inline uint32_t A3XX_PC_VSTREAM_CONTROL_SIZE(uint32_t val) -{ - return ((val) << A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT) & A3XX_PC_VSTREAM_CONTROL_SIZE__MASK; -} -#define A3XX_PC_VSTREAM_CONTROL_N__MASK 0x07c00000 -#define A3XX_PC_VSTREAM_CONTROL_N__SHIFT 22 -static inline uint32_t A3XX_PC_VSTREAM_CONTROL_N(uint32_t val) -{ - return ((val) << A3XX_PC_VSTREAM_CONTROL_N__SHIFT) & A3XX_PC_VSTREAM_CONTROL_N__MASK; -} - -#define REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x000021ea - -#define REG_A3XX_PC_PRIM_VTX_CNTL 0x000021ec -#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK 0x0000001f -#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT 0 -static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(uint32_t val) -{ - return ((val) << A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK; -} -#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK 0x000000e0 -#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT 5 -static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK; -} -#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK 0x00000700 -#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT 8 -static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK; -} -#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE 0x00001000 -#define A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART 0x00100000 -#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 -#define A3XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 - -#define REG_A3XX_PC_RESTART_INDEX 0x000021ed - -#define REG_A3XX_HLSQ_CONTROL_0_REG 0x00002200 -#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000030 -#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4 -static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; -} -#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE 0x00000040 -#define A3XX_HLSQ_CONTROL_0_REG_COMPUTEMODE 0x00000100 -#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200 -#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400 -#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK 0x00fff000 -#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT 12 -static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK; -} -#define A3XX_HLSQ_CONTROL_0_REG_FSONLYTEX 0x02000000 -#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000 -#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK 0x08000000 -#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT 27 -static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK; -} -#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE 0x10000000 -#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE 0x20000000 -#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE 0x40000000 -#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT 0x80000000 - -#define REG_A3XX_HLSQ_CONTROL_1_REG 0x00002201 -#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x000000c0 -#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT 6 -static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK; -} -#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE 0x00000100 -#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK 0x00ff0000 -#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT 16 -static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK; -} -#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK 0xff000000 -#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT 24 -static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK; -} - -#define REG_A3XX_HLSQ_CONTROL_2_REG 0x00002202 -#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK 0x000003fc -#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT 2 -static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK; -} -#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK 0x03fc0000 -#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT 18 -static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK; -} -#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000 -#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT 26 -static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK; -} - -#define REG_A3XX_HLSQ_CONTROL_3_REG 0x00002203 -#define A3XX_HLSQ_CONTROL_3_REG_REGID__MASK 0x000000ff -#define A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT 0 -static inline uint32_t A3XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A3XX_HLSQ_CONTROL_3_REG_REGID__MASK; -} - -#define REG_A3XX_HLSQ_VS_CONTROL_REG 0x00002204 -#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff -#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0 -static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val) -{ - return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK; -} -#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000 -#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12 -static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val) -{ - return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK; -} -#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 -#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT 24 -static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val) -{ - return ((val) << A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK; -} - -#define REG_A3XX_HLSQ_FS_CONTROL_REG 0x00002205 -#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff -#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT 0 -static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val) -{ - return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK; -} -#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000 -#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12 -static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val) -{ - return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK; -} -#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 -#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT 24 -static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val) -{ - return ((val) << A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK; -} - -#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x00002206 -#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff -#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT 0 -static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK; -} -#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000 -#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT 16 -static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK; -} - -#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x00002207 -#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff -#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT 0 -static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK; -} -#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000 -#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT 16 -static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK; -} - -#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG 0x0000220a -#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK 0x00000003 -#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT 0 -static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK; -} -#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK 0x00000ffc -#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT 2 -static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK; -} -#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK 0x003ff000 -#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT 12 -static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK; -} -#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK 0xffc00000 -#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT 22 -static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2(uint32_t val) -{ - return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK; -} - -static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK(uint32_t i0) { return 0x0000220b + 0x2*i0; } - -static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_SIZE(uint32_t i0) { return 0x0000220b + 0x2*i0; } - -static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_OFFSET(uint32_t i0) { return 0x0000220c + 0x2*i0; } - -#define REG_A3XX_HLSQ_CL_CONTROL_0_REG 0x00002211 - -#define REG_A3XX_HLSQ_CL_CONTROL_1_REG 0x00002212 - -#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG 0x00002214 - -static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP(uint32_t i0) { return 0x00002215 + 0x1*i0; } - -static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP_RATIO(uint32_t i0) { return 0x00002215 + 0x1*i0; } - -#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x00002216 - -#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x00002217 - -#define REG_A3XX_HLSQ_CL_WG_OFFSET_REG 0x0000221a - -#define REG_A3XX_VFD_CONTROL_0 0x00002240 -#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK 0x0003ffff -#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT 0 -static inline uint32_t A3XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val) -{ - return ((val) << A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK; -} -#define A3XX_VFD_CONTROL_0_PACKETSIZE__MASK 0x003c0000 -#define A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT 18 -static inline uint32_t A3XX_VFD_CONTROL_0_PACKETSIZE(uint32_t val) -{ - return ((val) << A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT) & A3XX_VFD_CONTROL_0_PACKETSIZE__MASK; -} -#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK 0x07c00000 -#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT 22 -static inline uint32_t A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val) -{ - return ((val) << A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK; -} -#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK 0xf8000000 -#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT 27 -static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val) -{ - return ((val) << A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK; -} - -#define REG_A3XX_VFD_CONTROL_1 0x00002241 -#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000000f -#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0 -static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val) -{ - return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK; -} -#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK 0x000000f0 -#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT 4 -static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val) -{ - return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK; -} -#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK 0x00000f00 -#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT 8 -static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val) -{ - return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK; -} -#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 -#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 -static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) -{ - return ((val) << A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A3XX_VFD_CONTROL_1_REGID4VTX__MASK; -} -#define A3XX_VFD_CONTROL_1_REGID4INST__MASK 0xff000000 -#define A3XX_VFD_CONTROL_1_REGID4INST__SHIFT 24 -static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val) -{ - return ((val) << A3XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A3XX_VFD_CONTROL_1_REGID4INST__MASK; -} - -#define REG_A3XX_VFD_INDEX_MIN 0x00002242 - -#define REG_A3XX_VFD_INDEX_MAX 0x00002243 - -#define REG_A3XX_VFD_INSTANCEID_OFFSET 0x00002244 - -#define REG_A3XX_VFD_INDEX_OFFSET 0x00002245 - -#define REG_A3XX_VFD_INDEX_OFFSET 0x00002245 - -static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; } - -static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; } -#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f -#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0 -static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val) -{ - return ((val) << A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK; -} -#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK 0x0000ff80 -#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT 7 -static inline uint32_t A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val) -{ - return ((val) << A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK; -} -#define A3XX_VFD_FETCH_INSTR_0_INSTANCED 0x00010000 -#define A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT 0x00020000 -#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK 0x00fc0000 -#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT 18 -static inline uint32_t A3XX_VFD_FETCH_INSTR_0_INDEXCODE(uint32_t val) -{ - return ((val) << A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK; -} -#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK 0xff000000 -#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT 24 -static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val) -{ - return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK; -} - -static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; } - -static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; } - -static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; } -#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f -#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0 -static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val) -{ - return ((val) << A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK; -} -#define A3XX_VFD_DECODE_INSTR_CONSTFILL 0x00000010 -#define A3XX_VFD_DECODE_INSTR_FORMAT__MASK 0x00000fc0 -#define A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT 6 -static inline uint32_t A3XX_VFD_DECODE_INSTR_FORMAT(enum a3xx_vtx_fmt val) -{ - return ((val) << A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A3XX_VFD_DECODE_INSTR_FORMAT__MASK; -} -#define A3XX_VFD_DECODE_INSTR_REGID__MASK 0x000ff000 -#define A3XX_VFD_DECODE_INSTR_REGID__SHIFT 12 -static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val) -{ - return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK; -} -#define A3XX_VFD_DECODE_INSTR_INT 0x00100000 -#define A3XX_VFD_DECODE_INSTR_SWAP__MASK 0x00c00000 -#define A3XX_VFD_DECODE_INSTR_SWAP__SHIFT 22 -static inline uint32_t A3XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A3XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A3XX_VFD_DECODE_INSTR_SWAP__MASK; -} -#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK 0x1f000000 -#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT 24 -static inline uint32_t A3XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val) -{ - return ((val) << A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK; -} -#define A3XX_VFD_DECODE_INSTR_LASTCOMPVALID 0x20000000 -#define A3XX_VFD_DECODE_INSTR_SWITCHNEXT 0x40000000 - -#define REG_A3XX_VFD_VS_THREADING_THRESHOLD 0x0000227e -#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK 0x0000000f -#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT 0 -static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(uint32_t val) -{ - return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK; -} -#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK 0x0000ff00 -#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT 8 -static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(uint32_t val) -{ - return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK; -} - -#define REG_A3XX_VPC_ATTR 0x00002280 -#define A3XX_VPC_ATTR_TOTALATTR__MASK 0x000001ff -#define A3XX_VPC_ATTR_TOTALATTR__SHIFT 0 -static inline uint32_t A3XX_VPC_ATTR_TOTALATTR(uint32_t val) -{ - return ((val) << A3XX_VPC_ATTR_TOTALATTR__SHIFT) & A3XX_VPC_ATTR_TOTALATTR__MASK; -} -#define A3XX_VPC_ATTR_PSIZE 0x00000200 -#define A3XX_VPC_ATTR_THRDASSIGN__MASK 0x0ffff000 -#define A3XX_VPC_ATTR_THRDASSIGN__SHIFT 12 -static inline uint32_t A3XX_VPC_ATTR_THRDASSIGN(uint32_t val) -{ - return ((val) << A3XX_VPC_ATTR_THRDASSIGN__SHIFT) & A3XX_VPC_ATTR_THRDASSIGN__MASK; -} -#define A3XX_VPC_ATTR_LMSIZE__MASK 0xf0000000 -#define A3XX_VPC_ATTR_LMSIZE__SHIFT 28 -static inline uint32_t A3XX_VPC_ATTR_LMSIZE(uint32_t val) -{ - return ((val) << A3XX_VPC_ATTR_LMSIZE__SHIFT) & A3XX_VPC_ATTR_LMSIZE__MASK; -} - -#define REG_A3XX_VPC_PACK 0x00002281 -#define A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK 0x0000ff00 -#define A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT 8 -static inline uint32_t A3XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val) -{ - return ((val) << A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK; -} -#define A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK 0x00ff0000 -#define A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT 16 -static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val) -{ - return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK; -} - -static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; } - -static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; } -#define A3XX_VPC_VARYING_INTERP_MODE_C0__MASK 0x00000003 -#define A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT 0 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C0(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C0__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_C1__MASK 0x0000000c -#define A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT 2 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C1(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C1__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_C2__MASK 0x00000030 -#define A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT 4 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C2(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C2__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_C3__MASK 0x000000c0 -#define A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT 6 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C3(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C3__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_C4__MASK 0x00000300 -#define A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT 8 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C4(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C4__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_C5__MASK 0x00000c00 -#define A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT 10 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C5(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C5__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_C6__MASK 0x00003000 -#define A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT 12 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C6(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C6__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_C7__MASK 0x0000c000 -#define A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT 14 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C7(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C7__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_C8__MASK 0x00030000 -#define A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT 16 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C8(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C8__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_C9__MASK 0x000c0000 -#define A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT 18 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C9(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C9__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_CA__MASK 0x00300000 -#define A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT 20 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CA(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CA__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_CB__MASK 0x00c00000 -#define A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT 22 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CB(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CB__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_CC__MASK 0x03000000 -#define A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT 24 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CC(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CC__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_CD__MASK 0x0c000000 -#define A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT 26 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CD(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CD__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_CE__MASK 0x30000000 -#define A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT 28 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CE(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CE__MASK; -} -#define A3XX_VPC_VARYING_INTERP_MODE_CF__MASK 0xc0000000 -#define A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT 30 -static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CF(enum a3xx_intp_mode val) -{ - return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CF__MASK; -} - -static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; } - -static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; } -#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK 0x00000003 -#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT 0 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C0(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK 0x0000000c -#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT 2 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C1(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK 0x00000030 -#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT 4 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C2(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK 0x000000c0 -#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT 6 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C3(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK 0x00000300 -#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT 8 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C4(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK 0x00000c00 -#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT 10 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C5(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK 0x00003000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT 12 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C6(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK 0x0000c000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT 14 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C7(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK 0x00030000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT 16 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C8(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK 0x000c0000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT 18 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C9(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK 0x00300000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT 20 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CA(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK 0x00c00000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT 22 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CB(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK 0x03000000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT 24 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CC(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK 0x0c000000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT 26 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CD(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK 0x30000000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT 28 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CE(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK; -} -#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK 0xc0000000 -#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT 30 -static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CF(enum a3xx_repl_mode val) -{ - return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK; -} - -#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0 0x0000228a - -#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x0000228b - -#define REG_A3XX_SP_SP_CTRL_REG 0x000022c0 -#define A3XX_SP_SP_CTRL_REG_RESOLVE 0x00010000 -#define A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK 0x00040000 -#define A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT 18 -static inline uint32_t A3XX_SP_SP_CTRL_REG_CONSTMODE(uint32_t val) -{ - return ((val) << A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK; -} -#define A3XX_SP_SP_CTRL_REG_BINNING 0x00080000 -#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK 0x00300000 -#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT 20 -static inline uint32_t A3XX_SP_SP_CTRL_REG_SLEEPMODE(uint32_t val) -{ - return ((val) << A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK; -} -#define A3XX_SP_SP_CTRL_REG_L0MODE__MASK 0x00c00000 -#define A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT 22 -static inline uint32_t A3XX_SP_SP_CTRL_REG_L0MODE(uint32_t val) -{ - return ((val) << A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT) & A3XX_SP_SP_CTRL_REG_L0MODE__MASK; -} - -#define REG_A3XX_SP_VS_CTRL_REG0 0x000022c4 -#define A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK 0x00000001 -#define A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT 0 -static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK; -} -#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK 0x00000002 -#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT 1 -static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK; -} -#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID 0x00000004 -#define A3XX_SP_VS_CTRL_REG0_ALUSCHMODE 0x00000008 -#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; -} -#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000 -#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000 -#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24 -static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG0_LENGTH__MASK; -} - -#define REG_A3XX_SP_VS_CTRL_REG1 0x000022c5 -#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK 0x000003ff -#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT 0 -static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK; -} -#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK 0x000ffc00 -#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT 10 -static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK; -} -#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x7f000000 -#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 24 -static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK; -} - -#define REG_A3XX_SP_VS_PARAM_REG 0x000022c6 -#define A3XX_SP_VS_PARAM_REG_POSREGID__MASK 0x000000ff -#define A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT 0 -static inline uint32_t A3XX_SP_VS_PARAM_REG_POSREGID(uint32_t val) -{ - return ((val) << A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_POSREGID__MASK; -} -#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK 0x0000ff00 -#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT 8 -static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val) -{ - return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK; -} -#define A3XX_SP_VS_PARAM_REG_POS2DMODE 0x00010000 -#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0x01f00000 -#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT 20 -static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) -{ - return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK; -} - -static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; } - -static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; } -#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff -#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 -static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val) -{ - return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK; -} -#define A3XX_SP_VS_OUT_REG_A_HALF 0x00000100 -#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00001e00 -#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 9 -static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) -{ - return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK; -} -#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 -#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 -static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val) -{ - return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK; -} -#define A3XX_SP_VS_OUT_REG_B_HALF 0x01000000 -#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x1e000000 -#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 25 -static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) -{ - return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK; -} - -static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; } - -static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; } -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x0000007f -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 -static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) -{ - return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; -} -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x00007f00 -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 -static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) -{ - return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; -} -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x007f0000 -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 -static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) -{ - return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; -} -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0x7f000000 -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 -static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) -{ - return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; -} - -#define REG_A3XX_SP_VS_OBJ_OFFSET_REG 0x000022d4 -#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff -#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0 -static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val) -{ - return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK; -} -#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 -#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 -static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; -} -#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 -#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 -static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; -} - -#define REG_A3XX_SP_VS_OBJ_START_REG 0x000022d5 - -#define REG_A3XX_SP_VS_PVT_MEM_PARAM_REG 0x000022d6 -#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff -#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0 -static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val) -{ - return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK; -} -#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00 -#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8 -static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val) -{ - return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK; -} -#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000 -#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24 -static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val) -{ - return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK; -} - -#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG 0x000022d7 -#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f -#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0 -static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val) -{ - return ((val) << A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK; -} -#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0 -#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5 -static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK; -} - -#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG 0x000022d8 - -#define REG_A3XX_SP_VS_LENGTH_REG 0x000022df -#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK 0xffffffff -#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT 0 -static inline uint32_t A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(uint32_t val) -{ - return ((val) << A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK; -} - -#define REG_A3XX_SP_FS_CTRL_REG0 0x000022e0 -#define A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK 0x00000001 -#define A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT 0 -static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK; -} -#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK 0x00000002 -#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT 1 -static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK; -} -#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID 0x00000004 -#define A3XX_SP_FS_CTRL_REG0_ALUSCHMODE 0x00000008 -#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A3XX_SP_FS_CTRL_REG0_FSBYPASSENABLE 0x00020000 -#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP 0x00040000 -#define A3XX_SP_FS_CTRL_REG0_OUTORDERED 0x00080000 -#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; -} -#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE 0x00200000 -#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00400000 -#define A3XX_SP_FS_CTRL_REG0_COMPUTEMODE 0x00800000 -#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK 0xff000000 -#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT 24 -static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG0_LENGTH__MASK; -} - -#define REG_A3XX_SP_FS_CTRL_REG1 0x000022e1 -#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK 0x000003ff -#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT 0 -static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK; -} -#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK 0x000ffc00 -#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT 10 -static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK; -} -#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x00f00000 -#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 20 -static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK; -} -#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x7f000000 -#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT 24 -static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT) & A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK; -} - -#define REG_A3XX_SP_FS_OBJ_OFFSET_REG 0x000022e2 -#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff -#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0 -static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val) -{ - return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK; -} -#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 -#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 -static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; -} -#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 -#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 -static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; -} - -#define REG_A3XX_SP_FS_OBJ_START_REG 0x000022e3 - -#define REG_A3XX_SP_FS_PVT_MEM_PARAM_REG 0x000022e4 -#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff -#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0 -static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val) -{ - return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK; -} -#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00 -#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8 -static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val) -{ - return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK; -} -#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000 -#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24 -static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val) -{ - return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK; -} - -#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG 0x000022e5 -#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f -#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0 -static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val) -{ - return ((val) << A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK; -} -#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0 -#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5 -static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK; -} - -#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG 0x000022e6 - -#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x000022e8 - -#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x000022e9 - -#define REG_A3XX_SP_FS_OUTPUT_REG 0x000022ec -#define A3XX_SP_FS_OUTPUT_REG_MRT__MASK 0x00000003 -#define A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT 0 -static inline uint32_t A3XX_SP_FS_OUTPUT_REG_MRT(uint32_t val) -{ - return ((val) << A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A3XX_SP_FS_OUTPUT_REG_MRT__MASK; -} -#define A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE 0x00000080 -#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK 0x0000ff00 -#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT 8 -static inline uint32_t A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val) -{ - return ((val) << A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK; -} - -static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; } - -static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; } -#define A3XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff -#define A3XX_SP_FS_MRT_REG_REGID__SHIFT 0 -static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val) -{ - return ((val) << A3XX_SP_FS_MRT_REG_REGID__SHIFT) & A3XX_SP_FS_MRT_REG_REGID__MASK; -} -#define A3XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100 -#define A3XX_SP_FS_MRT_REG_SINT 0x00000400 -#define A3XX_SP_FS_MRT_REG_UINT 0x00000800 - -static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; } - -static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; } -#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK 0x0000003f -#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT 0 -static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val) -{ - return ((val) << A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT) & A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK; -} - -#define REG_A3XX_SP_FS_LENGTH_REG 0x000022ff -#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK 0xffffffff -#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT 0 -static inline uint32_t A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(uint32_t val) -{ - return ((val) << A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK; -} - -#define REG_A3XX_PA_SC_AA_CONFIG 0x00002301 - -#define REG_A3XX_TPL1_TP_VS_TEX_OFFSET 0x00002340 -#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK 0x000000ff -#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT 0 -static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val) -{ - return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK; -} -#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK 0x0000ff00 -#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT 8 -static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val) -{ - return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK; -} -#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK 0xffff0000 -#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT 16 -static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(uint32_t val) -{ - return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK; -} - -#define REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR 0x00002341 - -#define REG_A3XX_TPL1_TP_FS_TEX_OFFSET 0x00002342 -#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK 0x000000ff -#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT 0 -static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val) -{ - return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK; -} -#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK 0x0000ff00 -#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT 8 -static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val) -{ - return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK; -} -#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK 0xffff0000 -#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT 16 -static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val) -{ - return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK; -} - -#define REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x00002343 - -#define REG_A3XX_VBIF_CLKON 0x00003001 - -#define REG_A3XX_VBIF_FIXED_SORT_EN 0x0000300c - -#define REG_A3XX_VBIF_FIXED_SORT_SEL0 0x0000300d - -#define REG_A3XX_VBIF_FIXED_SORT_SEL1 0x0000300e - -#define REG_A3XX_VBIF_ABIT_SORT 0x0000301c - -#define REG_A3XX_VBIF_ABIT_SORT_CONF 0x0000301d - -#define REG_A3XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a - -#define REG_A3XX_VBIF_IN_RD_LIM_CONF0 0x0000302c - -#define REG_A3XX_VBIF_IN_RD_LIM_CONF1 0x0000302d - -#define REG_A3XX_VBIF_IN_WR_LIM_CONF0 0x00003030 - -#define REG_A3XX_VBIF_IN_WR_LIM_CONF1 0x00003031 - -#define REG_A3XX_VBIF_OUT_RD_LIM_CONF0 0x00003034 - -#define REG_A3XX_VBIF_OUT_WR_LIM_CONF0 0x00003035 - -#define REG_A3XX_VBIF_DDR_OUT_MAX_BURST 0x00003036 - -#define REG_A3XX_VBIF_ARB_CTL 0x0000303c - -#define REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 - -#define REG_A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0 0x00003058 - -#define REG_A3XX_VBIF_OUT_AXI_AOOO_EN 0x0000305e - -#define REG_A3XX_VBIF_OUT_AXI_AOOO 0x0000305f - -#define REG_A3XX_VBIF_PERF_CNT_EN 0x00003070 -#define A3XX_VBIF_PERF_CNT_EN_CNT0 0x00000001 -#define A3XX_VBIF_PERF_CNT_EN_CNT1 0x00000002 -#define A3XX_VBIF_PERF_CNT_EN_PWRCNT0 0x00000004 -#define A3XX_VBIF_PERF_CNT_EN_PWRCNT1 0x00000008 -#define A3XX_VBIF_PERF_CNT_EN_PWRCNT2 0x00000010 - -#define REG_A3XX_VBIF_PERF_CNT_CLR 0x00003071 -#define A3XX_VBIF_PERF_CNT_CLR_CNT0 0x00000001 -#define A3XX_VBIF_PERF_CNT_CLR_CNT1 0x00000002 -#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT0 0x00000004 -#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT1 0x00000008 -#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT2 0x00000010 - -#define REG_A3XX_VBIF_PERF_CNT_SEL 0x00003072 - -#define REG_A3XX_VBIF_PERF_CNT0_LO 0x00003073 - -#define REG_A3XX_VBIF_PERF_CNT0_HI 0x00003074 - -#define REG_A3XX_VBIF_PERF_CNT1_LO 0x00003075 - -#define REG_A3XX_VBIF_PERF_CNT1_HI 0x00003076 - -#define REG_A3XX_VBIF_PERF_PWR_CNT0_LO 0x00003077 - -#define REG_A3XX_VBIF_PERF_PWR_CNT0_HI 0x00003078 - -#define REG_A3XX_VBIF_PERF_PWR_CNT1_LO 0x00003079 - -#define REG_A3XX_VBIF_PERF_PWR_CNT1_HI 0x0000307a - -#define REG_A3XX_VBIF_PERF_PWR_CNT2_LO 0x0000307b - -#define REG_A3XX_VBIF_PERF_PWR_CNT2_HI 0x0000307c - -#define REG_A3XX_VSC_BIN_SIZE 0x00000c01 -#define A3XX_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f -#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 -static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK; -} -#define A3XX_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 -#define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT 5 -static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK; -} - -#define REG_A3XX_VSC_SIZE_ADDRESS 0x00000c02 - -static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; } - -static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; } -#define A3XX_VSC_PIPE_CONFIG_X__MASK 0x000003ff -#define A3XX_VSC_PIPE_CONFIG_X__SHIFT 0 -static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val) -{ - return ((val) << A3XX_VSC_PIPE_CONFIG_X__SHIFT) & A3XX_VSC_PIPE_CONFIG_X__MASK; -} -#define A3XX_VSC_PIPE_CONFIG_Y__MASK 0x000ffc00 -#define A3XX_VSC_PIPE_CONFIG_Y__SHIFT 10 -static inline uint32_t A3XX_VSC_PIPE_CONFIG_Y(uint32_t val) -{ - return ((val) << A3XX_VSC_PIPE_CONFIG_Y__SHIFT) & A3XX_VSC_PIPE_CONFIG_Y__MASK; -} -#define A3XX_VSC_PIPE_CONFIG_W__MASK 0x00f00000 -#define A3XX_VSC_PIPE_CONFIG_W__SHIFT 20 -static inline uint32_t A3XX_VSC_PIPE_CONFIG_W(uint32_t val) -{ - return ((val) << A3XX_VSC_PIPE_CONFIG_W__SHIFT) & A3XX_VSC_PIPE_CONFIG_W__MASK; -} -#define A3XX_VSC_PIPE_CONFIG_H__MASK 0x0f000000 -#define A3XX_VSC_PIPE_CONFIG_H__SHIFT 24 -static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val) -{ - return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK; -} - -static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; } - -static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; } - -#define REG_A3XX_VSC_BIN_CONTROL 0x00000c3c -#define A3XX_VSC_BIN_CONTROL_BINNING_ENABLE 0x00000001 - -#define REG_A3XX_UNKNOWN_0C3D 0x00000c3d - -#define REG_A3XX_PC_PERFCOUNTER0_SELECT 0x00000c48 - -#define REG_A3XX_PC_PERFCOUNTER1_SELECT 0x00000c49 - -#define REG_A3XX_PC_PERFCOUNTER2_SELECT 0x00000c4a - -#define REG_A3XX_PC_PERFCOUNTER3_SELECT 0x00000c4b - -#define REG_A3XX_GRAS_TSE_DEBUG_ECO 0x00000c81 - -#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT 0x00000c88 - -#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT 0x00000c89 - -#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT 0x00000c8a - -#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT 0x00000c8b - -static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } - -static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; } - -static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; } - -static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; } - -static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; } - -#define REG_A3XX_RB_GMEM_BASE_ADDR 0x00000cc0 - -#define REG_A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0x00000cc1 - -#define REG_A3XX_RB_PERFCOUNTER0_SELECT 0x00000cc6 - -#define REG_A3XX_RB_PERFCOUNTER1_SELECT 0x00000cc7 - -#define REG_A3XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0 -#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK 0x00003fff -#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT 0 -static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val) -{ - return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK; -} -#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK 0x0fffc000 -#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT 14 -static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val) -{ - return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK; -} - -#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT 0x00000e00 - -#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT 0x00000e01 - -#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT 0x00000e02 - -#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT 0x00000e03 - -#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT 0x00000e04 - -#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT 0x00000e05 - -#define REG_A3XX_UNKNOWN_0E43 0x00000e43 - -#define REG_A3XX_VFD_PERFCOUNTER0_SELECT 0x00000e44 - -#define REG_A3XX_VFD_PERFCOUNTER1_SELECT 0x00000e45 - -#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL 0x00000e61 - -#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ 0x00000e62 - -#define REG_A3XX_VPC_PERFCOUNTER0_SELECT 0x00000e64 - -#define REG_A3XX_VPC_PERFCOUNTER1_SELECT 0x00000e65 - -#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG 0x00000e82 - -#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT 0x00000e84 - -#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT 0x00000e85 - -#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT 0x00000e86 - -#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT 0x00000e87 - -#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT 0x00000e88 - -#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT 0x00000e89 - -#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG 0x00000ea0 -#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK 0x0fffffff -#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT 0 -static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(uint32_t val) -{ - return ((val) << A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK; -} - -#define REG_A3XX_UCHE_CACHE_INVALIDATE1_REG 0x00000ea1 -#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK 0x0fffffff -#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT 0 -static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(uint32_t val) -{ - return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK; -} -#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK 0x30000000 -#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT 28 -static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_opcode val) -{ - return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK; -} -#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE 0x80000000 - -#define REG_A3XX_UNKNOWN_0EA6 0x00000ea6 - -#define REG_A3XX_SP_PERFCOUNTER0_SELECT 0x00000ec4 - -#define REG_A3XX_SP_PERFCOUNTER1_SELECT 0x00000ec5 - -#define REG_A3XX_SP_PERFCOUNTER2_SELECT 0x00000ec6 - -#define REG_A3XX_SP_PERFCOUNTER3_SELECT 0x00000ec7 - -#define REG_A3XX_SP_PERFCOUNTER4_SELECT 0x00000ec8 - -#define REG_A3XX_SP_PERFCOUNTER5_SELECT 0x00000ec9 - -#define REG_A3XX_SP_PERFCOUNTER6_SELECT 0x00000eca - -#define REG_A3XX_SP_PERFCOUNTER7_SELECT 0x00000ecb - -#define REG_A3XX_UNKNOWN_0EE0 0x00000ee0 - -#define REG_A3XX_UNKNOWN_0F03 0x00000f03 - -#define REG_A3XX_TP_PERFCOUNTER0_SELECT 0x00000f04 - -#define REG_A3XX_TP_PERFCOUNTER1_SELECT 0x00000f05 - -#define REG_A3XX_TP_PERFCOUNTER2_SELECT 0x00000f06 - -#define REG_A3XX_TP_PERFCOUNTER3_SELECT 0x00000f07 - -#define REG_A3XX_TP_PERFCOUNTER4_SELECT 0x00000f08 - -#define REG_A3XX_TP_PERFCOUNTER5_SELECT 0x00000f09 - -#define REG_A3XX_VGT_CL_INITIATOR 0x000021f0 - -#define REG_A3XX_VGT_EVENT_INITIATOR 0x000021f9 - -#define REG_A3XX_VGT_DRAW_INITIATOR 0x000021fc -#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK 0x0000003f -#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT 0 -static inline uint32_t A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK; -} -#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK 0x000000c0 -#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT 6 -static inline uint32_t A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK; -} -#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK 0x00000600 -#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT 9 -static inline uint32_t A3XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK; -} -#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK 0x00000800 -#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT 11 -static inline uint32_t A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val) -{ - return ((val) << A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK; -} -#define A3XX_VGT_DRAW_INITIATOR_NOT_EOP 0x00001000 -#define A3XX_VGT_DRAW_INITIATOR_SMALL_INDEX 0x00002000 -#define A3XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE 0x00004000 -#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK 0xff000000 -#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT 24 -static inline uint32_t A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val) -{ - return ((val) << A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK; -} - -#define REG_A3XX_VGT_IMMED_DATA 0x000021fd - -#define REG_A3XX_TEX_SAMP_0 0x00000000 -#define A3XX_TEX_SAMP_0_CLAMPENABLE 0x00000001 -#define A3XX_TEX_SAMP_0_MIPFILTER_LINEAR 0x00000002 -#define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c -#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2 -static inline uint32_t A3XX_TEX_SAMP_0_XY_MAG(enum a3xx_tex_filter val) -{ - return ((val) << A3XX_TEX_SAMP_0_XY_MAG__SHIFT) & A3XX_TEX_SAMP_0_XY_MAG__MASK; -} -#define A3XX_TEX_SAMP_0_XY_MIN__MASK 0x00000030 -#define A3XX_TEX_SAMP_0_XY_MIN__SHIFT 4 -static inline uint32_t A3XX_TEX_SAMP_0_XY_MIN(enum a3xx_tex_filter val) -{ - return ((val) << A3XX_TEX_SAMP_0_XY_MIN__SHIFT) & A3XX_TEX_SAMP_0_XY_MIN__MASK; -} -#define A3XX_TEX_SAMP_0_WRAP_S__MASK 0x000001c0 -#define A3XX_TEX_SAMP_0_WRAP_S__SHIFT 6 -static inline uint32_t A3XX_TEX_SAMP_0_WRAP_S(enum a3xx_tex_clamp val) -{ - return ((val) << A3XX_TEX_SAMP_0_WRAP_S__SHIFT) & A3XX_TEX_SAMP_0_WRAP_S__MASK; -} -#define A3XX_TEX_SAMP_0_WRAP_T__MASK 0x00000e00 -#define A3XX_TEX_SAMP_0_WRAP_T__SHIFT 9 -static inline uint32_t A3XX_TEX_SAMP_0_WRAP_T(enum a3xx_tex_clamp val) -{ - return ((val) << A3XX_TEX_SAMP_0_WRAP_T__SHIFT) & A3XX_TEX_SAMP_0_WRAP_T__MASK; -} -#define A3XX_TEX_SAMP_0_WRAP_R__MASK 0x00007000 -#define A3XX_TEX_SAMP_0_WRAP_R__SHIFT 12 -static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val) -{ - return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK; -} -#define A3XX_TEX_SAMP_0_ANISO__MASK 0x00038000 -#define A3XX_TEX_SAMP_0_ANISO__SHIFT 15 -static inline uint32_t A3XX_TEX_SAMP_0_ANISO(enum a3xx_tex_aniso val) -{ - return ((val) << A3XX_TEX_SAMP_0_ANISO__SHIFT) & A3XX_TEX_SAMP_0_ANISO__MASK; -} -#define A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK 0x00700000 -#define A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT 20 -static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val) -{ - return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK; -} -#define A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF 0x01000000 -#define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000 - -#define REG_A3XX_TEX_SAMP_1 0x00000001 -#define A3XX_TEX_SAMP_1_LOD_BIAS__MASK 0x000007ff -#define A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT 0 -static inline uint32_t A3XX_TEX_SAMP_1_LOD_BIAS(float val) -{ - return ((((int32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT) & A3XX_TEX_SAMP_1_LOD_BIAS__MASK; -} -#define A3XX_TEX_SAMP_1_MAX_LOD__MASK 0x003ff000 -#define A3XX_TEX_SAMP_1_MAX_LOD__SHIFT 12 -static inline uint32_t A3XX_TEX_SAMP_1_MAX_LOD(float val) -{ - return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A3XX_TEX_SAMP_1_MAX_LOD__MASK; -} -#define A3XX_TEX_SAMP_1_MIN_LOD__MASK 0xffc00000 -#define A3XX_TEX_SAMP_1_MIN_LOD__SHIFT 22 -static inline uint32_t A3XX_TEX_SAMP_1_MIN_LOD(float val) -{ - return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A3XX_TEX_SAMP_1_MIN_LOD__MASK; -} - -#define REG_A3XX_TEX_CONST_0 0x00000000 -#define A3XX_TEX_CONST_0_TILED 0x00000001 -#define A3XX_TEX_CONST_0_SRGB 0x00000004 -#define A3XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 -#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT 4 -static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val) -{ - return ((val) << A3XX_TEX_CONST_0_SWIZ_X__SHIFT) & A3XX_TEX_CONST_0_SWIZ_X__MASK; -} -#define A3XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 -#define A3XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 -static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Y(enum a3xx_tex_swiz val) -{ - return ((val) << A3XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Y__MASK; -} -#define A3XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 -#define A3XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 -static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Z(enum a3xx_tex_swiz val) -{ - return ((val) << A3XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Z__MASK; -} -#define A3XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 -#define A3XX_TEX_CONST_0_SWIZ_W__SHIFT 13 -static inline uint32_t A3XX_TEX_CONST_0_SWIZ_W(enum a3xx_tex_swiz val) -{ - return ((val) << A3XX_TEX_CONST_0_SWIZ_W__SHIFT) & A3XX_TEX_CONST_0_SWIZ_W__MASK; -} -#define A3XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 -#define A3XX_TEX_CONST_0_MIPLVLS__SHIFT 16 -static inline uint32_t A3XX_TEX_CONST_0_MIPLVLS(uint32_t val) -{ - return ((val) << A3XX_TEX_CONST_0_MIPLVLS__SHIFT) & A3XX_TEX_CONST_0_MIPLVLS__MASK; -} -#define A3XX_TEX_CONST_0_MSAATEX__MASK 0x00300000 -#define A3XX_TEX_CONST_0_MSAATEX__SHIFT 20 -static inline uint32_t A3XX_TEX_CONST_0_MSAATEX(enum a3xx_tex_msaa val) -{ - return ((val) << A3XX_TEX_CONST_0_MSAATEX__SHIFT) & A3XX_TEX_CONST_0_MSAATEX__MASK; -} -#define A3XX_TEX_CONST_0_FMT__MASK 0x1fc00000 -#define A3XX_TEX_CONST_0_FMT__SHIFT 22 -static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val) -{ - return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK; -} -#define A3XX_TEX_CONST_0_NOCONVERT 0x20000000 -#define A3XX_TEX_CONST_0_TYPE__MASK 0xc0000000 -#define A3XX_TEX_CONST_0_TYPE__SHIFT 30 -static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val) -{ - return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK; -} - -#define REG_A3XX_TEX_CONST_1 0x00000001 -#define A3XX_TEX_CONST_1_HEIGHT__MASK 0x00003fff -#define A3XX_TEX_CONST_1_HEIGHT__SHIFT 0 -static inline uint32_t A3XX_TEX_CONST_1_HEIGHT(uint32_t val) -{ - return ((val) << A3XX_TEX_CONST_1_HEIGHT__SHIFT) & A3XX_TEX_CONST_1_HEIGHT__MASK; -} -#define A3XX_TEX_CONST_1_WIDTH__MASK 0x0fffc000 -#define A3XX_TEX_CONST_1_WIDTH__SHIFT 14 -static inline uint32_t A3XX_TEX_CONST_1_WIDTH(uint32_t val) -{ - return ((val) << A3XX_TEX_CONST_1_WIDTH__SHIFT) & A3XX_TEX_CONST_1_WIDTH__MASK; -} -#define A3XX_TEX_CONST_1_FETCHSIZE__MASK 0xf0000000 -#define A3XX_TEX_CONST_1_FETCHSIZE__SHIFT 28 -static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val) -{ - return ((val) << A3XX_TEX_CONST_1_FETCHSIZE__SHIFT) & A3XX_TEX_CONST_1_FETCHSIZE__MASK; -} - -#define REG_A3XX_TEX_CONST_2 0x00000002 -#define A3XX_TEX_CONST_2_INDX__MASK 0x000001ff -#define A3XX_TEX_CONST_2_INDX__SHIFT 0 -static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val) -{ - return ((val) << A3XX_TEX_CONST_2_INDX__SHIFT) & A3XX_TEX_CONST_2_INDX__MASK; -} -#define A3XX_TEX_CONST_2_PITCH__MASK 0x3ffff000 -#define A3XX_TEX_CONST_2_PITCH__SHIFT 12 -static inline uint32_t A3XX_TEX_CONST_2_PITCH(uint32_t val) -{ - return ((val) << A3XX_TEX_CONST_2_PITCH__SHIFT) & A3XX_TEX_CONST_2_PITCH__MASK; -} -#define A3XX_TEX_CONST_2_SWAP__MASK 0xc0000000 -#define A3XX_TEX_CONST_2_SWAP__SHIFT 30 -static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A3XX_TEX_CONST_2_SWAP__SHIFT) & A3XX_TEX_CONST_2_SWAP__MASK; -} - -#define REG_A3XX_TEX_CONST_3 0x00000003 -#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x0001ffff -#define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT 0 -static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ1__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ1__MASK; -} -#define A3XX_TEX_CONST_3_DEPTH__MASK 0x0ffe0000 -#define A3XX_TEX_CONST_3_DEPTH__SHIFT 17 -static inline uint32_t A3XX_TEX_CONST_3_DEPTH(uint32_t val) -{ - return ((val) << A3XX_TEX_CONST_3_DEPTH__SHIFT) & A3XX_TEX_CONST_3_DEPTH__MASK; -} -#define A3XX_TEX_CONST_3_LAYERSZ2__MASK 0xf0000000 -#define A3XX_TEX_CONST_3_LAYERSZ2__SHIFT 28 -static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ2(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ2__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ2__MASK; -} - - -#endif /* A3XX_XML */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_context.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -101,13 +101,13 @@ fd_hw_query_init(pctx); fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt"); fd3_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt"); fd3_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size"); fd_context_setup_common_vbos(&fd3_ctx->base); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_context.h mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_context.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,7 +31,7 @@ #include "freedreno_context.h" -#include "ir3_shader.h" +#include "ir3/ir3_shader.h" struct fd3_context { diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_emit.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_emit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_emit.c 2019-03-31 23:16:37.000000000 +0000 @@ -44,8 +44,8 @@ #include "fd3_zsa.h" static const enum adreno_state_block sb[] = { - [SHADER_VERTEX] = SB_VERT_SHADER, - [SHADER_FRAGMENT] = SB_FRAG_SHADER, + [MESA_SHADER_VERTEX] = SB_VERT_SHADER, + [MESA_SHADER_FRAGMENT] = SB_FRAG_SHADER, }; /* regid: base const register @@ -53,7 +53,7 @@ * sizedwords: size of const value buffer */ static void -fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type, +fd3_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { @@ -91,7 +91,7 @@ } static void -fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, +fd3_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { uint32_t anum = align(num, 4); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_emit.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_emit.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_emit.h 2019-03-31 23:16:37.000000000 +0000 @@ -32,7 +32,7 @@ #include "freedreno_context.h" #include "fd3_format.h" #include "fd3_program.h" -#include "ir3_shader.h" +#include "ir3_gallium.h" struct fd_ringbuffer; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c 2019-03-31 23:16:37.000000000 +0000 @@ -785,7 +785,7 @@ if (!pipe->bo) { pipe->bo = fd_bo_new(ctx->dev, 0x40000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i); } OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_program.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -40,7 +40,7 @@ static struct ir3_shader * create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, - enum shader_t type) + gl_shader_stage type) { struct fd_context *ctx = fd_context(pctx); struct ir3_compiler *compiler = ctx->screen->compiler; @@ -51,7 +51,7 @@ fd3_fp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); + return create_shader_stateobj(pctx, cso, MESA_SHADER_FRAGMENT); } static void @@ -65,7 +65,7 @@ fd3_vp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader_stateobj(pctx, cso, SHADER_VERTEX); + return create_shader_stateobj(pctx, cso, MESA_SHADER_VERTEX); } static void @@ -97,7 +97,7 @@ enum adreno_state_src src; uint32_t i, sz, *bin; - if (so->type == SHADER_VERTEX) { + if (so->type == MESA_SHADER_VERTEX) { sb = SB_VERT_SHADER; } else { sb = SB_FRAG_SHADER; @@ -122,7 +122,7 @@ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); } else { - OUT_RELOC(ring, so->bo, 0, + OUT_RELOCD(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); } for (i = 0; i < sz; i++) { @@ -226,6 +226,7 @@ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6); OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | + A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe * flush some caches? I think we only need to set those diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_program.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_program.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_program.h 2019-03-31 23:16:37.000000000 +0000 @@ -29,7 +29,8 @@ #include "pipe/p_context.h" #include "freedreno_context.h" -#include "ir3_shader.h" + +#include "ir3/ir3_shader.h" struct fd3_emit; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_screen.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a3xx/fd3_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a3xx/fd3_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -30,7 +30,8 @@ #include "fd3_screen.h" #include "fd3_context.h" #include "fd3_format.h" -#include "ir3_compiler.h" + +#include "ir3/ir3_compiler.h" static boolean fd3_screen_is_format_supported(struct pipe_screen *pscreen, diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,4257 +0,0 @@ -#ifndef A4XX_XML -#define A4XX_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/freedreno/envytools/ -git clone https://github.com/freedreno/envytools.git - -The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 37936 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14201 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42864 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 140514 bytes, from 2018-10-08 21:57:35) -- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) -- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) - -Copyright (C) 2013-2018 by the following authors: -- Rob Clark (robclark) -- Ilia Mirkin (imirkin) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -enum a4xx_color_fmt { - RB4_A8_UNORM = 1, - RB4_R8_UNORM = 2, - RB4_R8_SNORM = 3, - RB4_R8_UINT = 4, - RB4_R8_SINT = 5, - RB4_R4G4B4A4_UNORM = 8, - RB4_R5G5B5A1_UNORM = 10, - RB4_R5G6B5_UNORM = 14, - RB4_R8G8_UNORM = 15, - RB4_R8G8_SNORM = 16, - RB4_R8G8_UINT = 17, - RB4_R8G8_SINT = 18, - RB4_R16_UNORM = 19, - RB4_R16_SNORM = 20, - RB4_R16_FLOAT = 21, - RB4_R16_UINT = 22, - RB4_R16_SINT = 23, - RB4_R8G8B8_UNORM = 25, - RB4_R8G8B8A8_UNORM = 26, - RB4_R8G8B8A8_SNORM = 28, - RB4_R8G8B8A8_UINT = 29, - RB4_R8G8B8A8_SINT = 30, - RB4_R10G10B10A2_UNORM = 31, - RB4_R10G10B10A2_UINT = 34, - RB4_R11G11B10_FLOAT = 39, - RB4_R16G16_UNORM = 40, - RB4_R16G16_SNORM = 41, - RB4_R16G16_FLOAT = 42, - RB4_R16G16_UINT = 43, - RB4_R16G16_SINT = 44, - RB4_R32_FLOAT = 45, - RB4_R32_UINT = 46, - RB4_R32_SINT = 47, - RB4_R16G16B16A16_UNORM = 52, - RB4_R16G16B16A16_SNORM = 53, - RB4_R16G16B16A16_FLOAT = 54, - RB4_R16G16B16A16_UINT = 55, - RB4_R16G16B16A16_SINT = 56, - RB4_R32G32_FLOAT = 57, - RB4_R32G32_UINT = 58, - RB4_R32G32_SINT = 59, - RB4_R32G32B32A32_FLOAT = 60, - RB4_R32G32B32A32_UINT = 61, - RB4_R32G32B32A32_SINT = 62, -}; - -enum a4xx_tile_mode { - TILE4_LINEAR = 0, - TILE4_2 = 2, - TILE4_3 = 3, -}; - -enum a4xx_vtx_fmt { - VFMT4_32_FLOAT = 1, - VFMT4_32_32_FLOAT = 2, - VFMT4_32_32_32_FLOAT = 3, - VFMT4_32_32_32_32_FLOAT = 4, - VFMT4_16_FLOAT = 5, - VFMT4_16_16_FLOAT = 6, - VFMT4_16_16_16_FLOAT = 7, - VFMT4_16_16_16_16_FLOAT = 8, - VFMT4_32_FIXED = 9, - VFMT4_32_32_FIXED = 10, - VFMT4_32_32_32_FIXED = 11, - VFMT4_32_32_32_32_FIXED = 12, - VFMT4_11_11_10_FLOAT = 13, - VFMT4_16_SINT = 16, - VFMT4_16_16_SINT = 17, - VFMT4_16_16_16_SINT = 18, - VFMT4_16_16_16_16_SINT = 19, - VFMT4_16_UINT = 20, - VFMT4_16_16_UINT = 21, - VFMT4_16_16_16_UINT = 22, - VFMT4_16_16_16_16_UINT = 23, - VFMT4_16_SNORM = 24, - VFMT4_16_16_SNORM = 25, - VFMT4_16_16_16_SNORM = 26, - VFMT4_16_16_16_16_SNORM = 27, - VFMT4_16_UNORM = 28, - VFMT4_16_16_UNORM = 29, - VFMT4_16_16_16_UNORM = 30, - VFMT4_16_16_16_16_UNORM = 31, - VFMT4_32_UINT = 32, - VFMT4_32_32_UINT = 33, - VFMT4_32_32_32_UINT = 34, - VFMT4_32_32_32_32_UINT = 35, - VFMT4_32_SINT = 36, - VFMT4_32_32_SINT = 37, - VFMT4_32_32_32_SINT = 38, - VFMT4_32_32_32_32_SINT = 39, - VFMT4_8_UINT = 40, - VFMT4_8_8_UINT = 41, - VFMT4_8_8_8_UINT = 42, - VFMT4_8_8_8_8_UINT = 43, - VFMT4_8_UNORM = 44, - VFMT4_8_8_UNORM = 45, - VFMT4_8_8_8_UNORM = 46, - VFMT4_8_8_8_8_UNORM = 47, - VFMT4_8_SINT = 48, - VFMT4_8_8_SINT = 49, - VFMT4_8_8_8_SINT = 50, - VFMT4_8_8_8_8_SINT = 51, - VFMT4_8_SNORM = 52, - VFMT4_8_8_SNORM = 53, - VFMT4_8_8_8_SNORM = 54, - VFMT4_8_8_8_8_SNORM = 55, - VFMT4_10_10_10_2_UINT = 56, - VFMT4_10_10_10_2_UNORM = 57, - VFMT4_10_10_10_2_SINT = 58, - VFMT4_10_10_10_2_SNORM = 59, - VFMT4_2_10_10_10_UINT = 60, - VFMT4_2_10_10_10_UNORM = 61, - VFMT4_2_10_10_10_SINT = 62, - VFMT4_2_10_10_10_SNORM = 63, -}; - -enum a4xx_tex_fmt { - TFMT4_A8_UNORM = 3, - TFMT4_8_UNORM = 4, - TFMT4_8_SNORM = 5, - TFMT4_8_UINT = 6, - TFMT4_8_SINT = 7, - TFMT4_4_4_4_4_UNORM = 8, - TFMT4_5_5_5_1_UNORM = 9, - TFMT4_5_6_5_UNORM = 11, - TFMT4_L8_A8_UNORM = 13, - TFMT4_8_8_UNORM = 14, - TFMT4_8_8_SNORM = 15, - TFMT4_8_8_UINT = 16, - TFMT4_8_8_SINT = 17, - TFMT4_16_UNORM = 18, - TFMT4_16_SNORM = 19, - TFMT4_16_FLOAT = 20, - TFMT4_16_UINT = 21, - TFMT4_16_SINT = 22, - TFMT4_8_8_8_8_UNORM = 28, - TFMT4_8_8_8_8_SNORM = 29, - TFMT4_8_8_8_8_UINT = 30, - TFMT4_8_8_8_8_SINT = 31, - TFMT4_9_9_9_E5_FLOAT = 32, - TFMT4_10_10_10_2_UNORM = 33, - TFMT4_10_10_10_2_UINT = 34, - TFMT4_11_11_10_FLOAT = 37, - TFMT4_16_16_UNORM = 38, - TFMT4_16_16_SNORM = 39, - TFMT4_16_16_FLOAT = 40, - TFMT4_16_16_UINT = 41, - TFMT4_16_16_SINT = 42, - TFMT4_32_FLOAT = 43, - TFMT4_32_UINT = 44, - TFMT4_32_SINT = 45, - TFMT4_16_16_16_16_UNORM = 51, - TFMT4_16_16_16_16_SNORM = 52, - TFMT4_16_16_16_16_FLOAT = 53, - TFMT4_16_16_16_16_UINT = 54, - TFMT4_16_16_16_16_SINT = 55, - TFMT4_32_32_FLOAT = 56, - TFMT4_32_32_UINT = 57, - TFMT4_32_32_SINT = 58, - TFMT4_32_32_32_FLOAT = 59, - TFMT4_32_32_32_UINT = 60, - TFMT4_32_32_32_SINT = 61, - TFMT4_32_32_32_32_FLOAT = 63, - TFMT4_32_32_32_32_UINT = 64, - TFMT4_32_32_32_32_SINT = 65, - TFMT4_X8Z24_UNORM = 71, - TFMT4_DXT1 = 86, - TFMT4_DXT3 = 87, - TFMT4_DXT5 = 88, - TFMT4_RGTC1_UNORM = 90, - TFMT4_RGTC1_SNORM = 91, - TFMT4_RGTC2_UNORM = 94, - TFMT4_RGTC2_SNORM = 95, - TFMT4_BPTC_UFLOAT = 97, - TFMT4_BPTC_FLOAT = 98, - TFMT4_BPTC = 99, - TFMT4_ATC_RGB = 100, - TFMT4_ATC_RGBA_EXPLICIT = 101, - TFMT4_ATC_RGBA_INTERPOLATED = 102, - TFMT4_ETC2_RG11_UNORM = 103, - TFMT4_ETC2_RG11_SNORM = 104, - TFMT4_ETC2_R11_UNORM = 105, - TFMT4_ETC2_R11_SNORM = 106, - TFMT4_ETC1 = 107, - TFMT4_ETC2_RGB8 = 108, - TFMT4_ETC2_RGBA8 = 109, - TFMT4_ETC2_RGB8A1 = 110, - TFMT4_ASTC_4x4 = 111, - TFMT4_ASTC_5x4 = 112, - TFMT4_ASTC_5x5 = 113, - TFMT4_ASTC_6x5 = 114, - TFMT4_ASTC_6x6 = 115, - TFMT4_ASTC_8x5 = 116, - TFMT4_ASTC_8x6 = 117, - TFMT4_ASTC_8x8 = 118, - TFMT4_ASTC_10x5 = 119, - TFMT4_ASTC_10x6 = 120, - TFMT4_ASTC_10x8 = 121, - TFMT4_ASTC_10x10 = 122, - TFMT4_ASTC_12x10 = 123, - TFMT4_ASTC_12x12 = 124, -}; - -enum a4xx_tex_fetchsize { - TFETCH4_1_BYTE = 0, - TFETCH4_2_BYTE = 1, - TFETCH4_4_BYTE = 2, - TFETCH4_8_BYTE = 3, - TFETCH4_16_BYTE = 4, -}; - -enum a4xx_depth_format { - DEPTH4_NONE = 0, - DEPTH4_16 = 1, - DEPTH4_24_8 = 2, - DEPTH4_32 = 3, -}; - -enum a4xx_ccu_perfcounter_select { - CCU_BUSY_CYCLES = 0, - CCU_RB_DEPTH_RETURN_STALL = 2, - CCU_RB_COLOR_RETURN_STALL = 3, - CCU_DEPTH_BLOCKS = 6, - CCU_COLOR_BLOCKS = 7, - CCU_DEPTH_BLOCK_HIT = 8, - CCU_COLOR_BLOCK_HIT = 9, - CCU_DEPTH_FLAG1_COUNT = 10, - CCU_DEPTH_FLAG2_COUNT = 11, - CCU_DEPTH_FLAG3_COUNT = 12, - CCU_DEPTH_FLAG4_COUNT = 13, - CCU_COLOR_FLAG1_COUNT = 14, - CCU_COLOR_FLAG2_COUNT = 15, - CCU_COLOR_FLAG3_COUNT = 16, - CCU_COLOR_FLAG4_COUNT = 17, - CCU_PARTIAL_BLOCK_READ = 18, -}; - -enum a4xx_cp_perfcounter_select { - CP_ALWAYS_COUNT = 0, - CP_BUSY = 1, - CP_PFP_IDLE = 2, - CP_PFP_BUSY_WORKING = 3, - CP_PFP_STALL_CYCLES_ANY = 4, - CP_PFP_STARVE_CYCLES_ANY = 5, - CP_PFP_STARVED_PER_LOAD_ADDR = 6, - CP_PFP_STALLED_PER_STORE_ADDR = 7, - CP_PFP_PC_PROFILE = 8, - CP_PFP_MATCH_PM4_PKT_PROFILE = 9, - CP_PFP_COND_INDIRECT_DISCARDED = 10, - CP_LONG_RESUMPTIONS = 11, - CP_RESUME_CYCLES = 12, - CP_RESUME_TO_BOUNDARY_CYCLES = 13, - CP_LONG_PREEMPTIONS = 14, - CP_PREEMPT_CYCLES = 15, - CP_PREEMPT_TO_BOUNDARY_CYCLES = 16, - CP_ME_FIFO_EMPTY_PFP_IDLE = 17, - CP_ME_FIFO_EMPTY_PFP_BUSY = 18, - CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19, - CP_ME_FIFO_FULL_ME_BUSY = 20, - CP_ME_FIFO_FULL_ME_NON_WORKING = 21, - CP_ME_WAITING_FOR_PACKETS = 22, - CP_ME_BUSY_WORKING = 23, - CP_ME_STARVE_CYCLES_ANY = 24, - CP_ME_STARVE_CYCLES_PER_PROFILE = 25, - CP_ME_STALL_CYCLES_PER_PROFILE = 26, - CP_ME_PC_PROFILE = 27, - CP_RCIU_FIFO_EMPTY = 28, - CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29, - CP_RCIU_FIFO_FULL = 30, - CP_RCIU_FIFO_FULL_NO_CONTEXT = 31, - CP_RCIU_FIFO_FULL_AHB_MASTER = 32, - CP_RCIU_FIFO_FULL_OTHER = 33, - CP_AHB_IDLE = 34, - CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35, - CP_AHB_STALL_ON_GRANT_SPLIT = 36, - CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37, - CP_AHB_BUSY_WORKING = 38, - CP_AHB_BUSY_STALL_ON_HRDY = 39, - CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40, -}; - -enum a4xx_gras_ras_perfcounter_select { - RAS_SUPER_TILES = 0, - RAS_8X8_TILES = 1, - RAS_4X4_TILES = 2, - RAS_BUSY_CYCLES = 3, - RAS_STALL_CYCLES_BY_RB = 4, - RAS_STALL_CYCLES_BY_VSC = 5, - RAS_STARVE_CYCLES_BY_TSE = 6, - RAS_SUPERTILE_CYCLES = 7, - RAS_TILE_CYCLES = 8, - RAS_FULLY_COVERED_SUPER_TILES = 9, - RAS_FULLY_COVERED_8X8_TILES = 10, - RAS_4X4_PRIM = 11, - RAS_8X4_4X8_PRIM = 12, - RAS_8X8_PRIM = 13, -}; - -enum a4xx_gras_tse_perfcounter_select { - TSE_INPUT_PRIM = 0, - TSE_INPUT_NULL_PRIM = 1, - TSE_TRIVAL_REJ_PRIM = 2, - TSE_CLIPPED_PRIM = 3, - TSE_NEW_PRIM = 4, - TSE_ZERO_AREA_PRIM = 5, - TSE_FACENESS_CULLED_PRIM = 6, - TSE_ZERO_PIXEL_PRIM = 7, - TSE_OUTPUT_NULL_PRIM = 8, - TSE_OUTPUT_VISIBLE_PRIM = 9, - TSE_PRE_CLIP_PRIM = 10, - TSE_POST_CLIP_PRIM = 11, - TSE_BUSY_CYCLES = 12, - TSE_PC_STARVE = 13, - TSE_RAS_STALL = 14, - TSE_STALL_BARYPLANE_FIFO_FULL = 15, - TSE_STALL_ZPLANE_FIFO_FULL = 16, -}; - -enum a4xx_hlsq_perfcounter_select { - HLSQ_SP_VS_STAGE_CONSTANT = 0, - HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1, - HLSQ_SP_FS_STAGE_CONSTANT = 2, - HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3, - HLSQ_TP_STATE = 4, - HLSQ_QUADS = 5, - HLSQ_PIXELS = 6, - HLSQ_VERTICES = 7, - HLSQ_SP_VS_STAGE_DATA_BYTES = 13, - HLSQ_SP_FS_STAGE_DATA_BYTES = 14, - HLSQ_BUSY_CYCLES = 15, - HLSQ_STALL_CYCLES_SP_STATE = 16, - HLSQ_STALL_CYCLES_SP_VS_STAGE = 17, - HLSQ_STALL_CYCLES_SP_FS_STAGE = 18, - HLSQ_STALL_CYCLES_UCHE = 19, - HLSQ_RBBM_LOAD_CYCLES = 20, - HLSQ_DI_TO_VS_START_SP = 21, - HLSQ_DI_TO_FS_START_SP = 22, - HLSQ_VS_STAGE_START_TO_DONE_SP = 23, - HLSQ_FS_STAGE_START_TO_DONE_SP = 24, - HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25, - HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26, - HLSQ_UCHE_LATENCY_CYCLES = 27, - HLSQ_UCHE_LATENCY_COUNT = 28, - HLSQ_STARVE_CYCLES_VFD = 29, -}; - -enum a4xx_pc_perfcounter_select { - PC_VIS_STREAMS_LOADED = 0, - PC_VPC_PRIMITIVES = 2, - PC_DEAD_PRIM = 3, - PC_LIVE_PRIM = 4, - PC_DEAD_DRAWCALLS = 5, - PC_LIVE_DRAWCALLS = 6, - PC_VERTEX_MISSES = 7, - PC_STALL_CYCLES_VFD = 9, - PC_STALL_CYCLES_TSE = 10, - PC_STALL_CYCLES_UCHE = 11, - PC_WORKING_CYCLES = 12, - PC_IA_VERTICES = 13, - PC_GS_PRIMITIVES = 14, - PC_HS_INVOCATIONS = 15, - PC_DS_INVOCATIONS = 16, - PC_DS_PRIMITIVES = 17, - PC_STARVE_CYCLES_FOR_INDEX = 20, - PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21, - PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22, - PC_STALL_CYCLES_TESS = 23, - PC_STARVE_CYCLES_FOR_POSITION = 24, - PC_MODE0_DRAWCALL = 25, - PC_MODE1_DRAWCALL = 26, - PC_MODE2_DRAWCALL = 27, - PC_MODE3_DRAWCALL = 28, - PC_MODE4_DRAWCALL = 29, - PC_PREDICATED_DEAD_DRAWCALL = 30, - PC_STALL_CYCLES_BY_TSE_ONLY = 31, - PC_STALL_CYCLES_BY_VPC_ONLY = 32, - PC_VPC_POS_DATA_TRANSACTION = 33, - PC_BUSY_CYCLES = 34, - PC_STARVE_CYCLES_DI = 35, - PC_STALL_CYCLES_VPC = 36, - TESS_WORKING_CYCLES = 37, - TESS_NUM_CYCLES_SETUP_WORKING = 38, - TESS_NUM_CYCLES_PTGEN_WORKING = 39, - TESS_NUM_CYCLES_CONNGEN_WORKING = 40, - TESS_BUSY_CYCLES = 41, - TESS_STARVE_CYCLES_PC = 42, - TESS_STALL_CYCLES_PC = 43, -}; - -enum a4xx_pwr_perfcounter_select { - PWR_CORE_CLOCK_CYCLES = 0, - PWR_BUSY_CLOCK_CYCLES = 1, -}; - -enum a4xx_rb_perfcounter_select { - RB_BUSY_CYCLES = 0, - RB_BUSY_CYCLES_BINNING = 1, - RB_BUSY_CYCLES_RENDERING = 2, - RB_BUSY_CYCLES_RESOLVE = 3, - RB_STARVE_CYCLES_BY_SP = 4, - RB_STARVE_CYCLES_BY_RAS = 5, - RB_STARVE_CYCLES_BY_MARB = 6, - RB_STALL_CYCLES_BY_MARB = 7, - RB_STALL_CYCLES_BY_HLSQ = 8, - RB_RB_RB_MARB_DATA = 9, - RB_SP_RB_QUAD = 10, - RB_RAS_RB_Z_QUADS = 11, - RB_GMEM_CH0_READ = 12, - RB_GMEM_CH1_READ = 13, - RB_GMEM_CH0_WRITE = 14, - RB_GMEM_CH1_WRITE = 15, - RB_CP_CONTEXT_DONE = 16, - RB_CP_CACHE_FLUSH = 17, - RB_CP_ZPASS_DONE = 18, - RB_STALL_FIFO0_FULL = 19, - RB_STALL_FIFO1_FULL = 20, - RB_STALL_FIFO2_FULL = 21, - RB_STALL_FIFO3_FULL = 22, - RB_RB_HLSQ_TRANSACTIONS = 23, - RB_Z_READ = 24, - RB_Z_WRITE = 25, - RB_C_READ = 26, - RB_C_WRITE = 27, - RB_C_READ_LATENCY = 28, - RB_Z_READ_LATENCY = 29, - RB_STALL_BY_UCHE = 30, - RB_MARB_UCHE_TRANSACTIONS = 31, - RB_CACHE_STALL_MISS = 32, - RB_CACHE_STALL_FIFO_FULL = 33, - RB_8BIT_BLENDER_UNITS_ACTIVE = 34, - RB_16BIT_BLENDER_UNITS_ACTIVE = 35, - RB_SAMPLER_UNITS_ACTIVE = 36, - RB_TOTAL_PASS = 38, - RB_Z_PASS = 39, - RB_Z_FAIL = 40, - RB_S_FAIL = 41, - RB_POWER0 = 42, - RB_POWER1 = 43, - RB_POWER2 = 44, - RB_POWER3 = 45, - RB_POWER4 = 46, - RB_POWER5 = 47, - RB_POWER6 = 48, - RB_POWER7 = 49, -}; - -enum a4xx_rbbm_perfcounter_select { - RBBM_ALWAYS_ON = 0, - RBBM_VBIF_BUSY = 1, - RBBM_TSE_BUSY = 2, - RBBM_RAS_BUSY = 3, - RBBM_PC_DCALL_BUSY = 4, - RBBM_PC_VSD_BUSY = 5, - RBBM_VFD_BUSY = 6, - RBBM_VPC_BUSY = 7, - RBBM_UCHE_BUSY = 8, - RBBM_VSC_BUSY = 9, - RBBM_HLSQ_BUSY = 10, - RBBM_ANY_RB_BUSY = 11, - RBBM_ANY_TPL1_BUSY = 12, - RBBM_ANY_SP_BUSY = 13, - RBBM_ANY_MARB_BUSY = 14, - RBBM_ANY_ARB_BUSY = 15, - RBBM_AHB_STATUS_BUSY = 16, - RBBM_AHB_STATUS_STALLED = 17, - RBBM_AHB_STATUS_TXFR = 18, - RBBM_AHB_STATUS_TXFR_SPLIT = 19, - RBBM_AHB_STATUS_TXFR_ERROR = 20, - RBBM_AHB_STATUS_LONG_STALL = 21, - RBBM_STATUS_MASKED = 22, - RBBM_CP_BUSY_GFX_CORE_IDLE = 23, - RBBM_TESS_BUSY = 24, - RBBM_COM_BUSY = 25, - RBBM_DCOM_BUSY = 32, - RBBM_ANY_CCU_BUSY = 33, - RBBM_DPM_BUSY = 34, -}; - -enum a4xx_sp_perfcounter_select { - SP_LM_LOAD_INSTRUCTIONS = 0, - SP_LM_STORE_INSTRUCTIONS = 1, - SP_LM_ATOMICS = 2, - SP_GM_LOAD_INSTRUCTIONS = 3, - SP_GM_STORE_INSTRUCTIONS = 4, - SP_GM_ATOMICS = 5, - SP_VS_STAGE_TEX_INSTRUCTIONS = 6, - SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7, - SP_VS_STAGE_EFU_INSTRUCTIONS = 8, - SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9, - SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10, - SP_FS_STAGE_TEX_INSTRUCTIONS = 11, - SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12, - SP_FS_STAGE_EFU_INSTRUCTIONS = 13, - SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14, - SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15, - SP_VS_INSTRUCTIONS = 17, - SP_FS_INSTRUCTIONS = 18, - SP_ADDR_LOCK_COUNT = 19, - SP_UCHE_READ_TRANS = 20, - SP_UCHE_WRITE_TRANS = 21, - SP_EXPORT_VPC_TRANS = 22, - SP_EXPORT_RB_TRANS = 23, - SP_PIXELS_KILLED = 24, - SP_ICL1_REQUESTS = 25, - SP_ICL1_MISSES = 26, - SP_ICL0_REQUESTS = 27, - SP_ICL0_MISSES = 28, - SP_ALU_WORKING_CYCLES = 29, - SP_EFU_WORKING_CYCLES = 30, - SP_STALL_CYCLES_BY_VPC = 31, - SP_STALL_CYCLES_BY_TP = 32, - SP_STALL_CYCLES_BY_UCHE = 33, - SP_STALL_CYCLES_BY_RB = 34, - SP_BUSY_CYCLES = 35, - SP_HS_INSTRUCTIONS = 36, - SP_DS_INSTRUCTIONS = 37, - SP_GS_INSTRUCTIONS = 38, - SP_CS_INSTRUCTIONS = 39, - SP_SCHEDULER_NON_WORKING = 40, - SP_WAVE_CONTEXTS = 41, - SP_WAVE_CONTEXT_CYCLES = 42, - SP_POWER0 = 43, - SP_POWER1 = 44, - SP_POWER2 = 45, - SP_POWER3 = 46, - SP_POWER4 = 47, - SP_POWER5 = 48, - SP_POWER6 = 49, - SP_POWER7 = 50, - SP_POWER8 = 51, - SP_POWER9 = 52, - SP_POWER10 = 53, - SP_POWER11 = 54, - SP_POWER12 = 55, - SP_POWER13 = 56, - SP_POWER14 = 57, - SP_POWER15 = 58, -}; - -enum a4xx_tp_perfcounter_select { - TP_L1_REQUESTS = 0, - TP_L1_MISSES = 1, - TP_QUADS_OFFSET = 8, - TP_QUAD_SHADOW = 9, - TP_QUADS_ARRAY = 10, - TP_QUADS_GRADIENT = 11, - TP_QUADS_1D2D = 12, - TP_QUADS_3DCUBE = 13, - TP_BUSY_CYCLES = 16, - TP_STALL_CYCLES_BY_ARB = 17, - TP_STATE_CACHE_REQUESTS = 20, - TP_STATE_CACHE_MISSES = 21, - TP_POWER0 = 22, - TP_POWER1 = 23, - TP_POWER2 = 24, - TP_POWER3 = 25, - TP_POWER4 = 26, - TP_POWER5 = 27, - TP_POWER6 = 28, - TP_POWER7 = 29, -}; - -enum a4xx_uche_perfcounter_select { - UCHE_VBIF_READ_BEATS_TP = 0, - UCHE_VBIF_READ_BEATS_VFD = 1, - UCHE_VBIF_READ_BEATS_HLSQ = 2, - UCHE_VBIF_READ_BEATS_MARB = 3, - UCHE_VBIF_READ_BEATS_SP = 4, - UCHE_READ_REQUESTS_TP = 5, - UCHE_READ_REQUESTS_VFD = 6, - UCHE_READ_REQUESTS_HLSQ = 7, - UCHE_READ_REQUESTS_MARB = 8, - UCHE_READ_REQUESTS_SP = 9, - UCHE_WRITE_REQUESTS_MARB = 10, - UCHE_WRITE_REQUESTS_SP = 11, - UCHE_TAG_CHECK_FAILS = 12, - UCHE_EVICTS = 13, - UCHE_FLUSHES = 14, - UCHE_VBIF_LATENCY_CYCLES = 15, - UCHE_VBIF_LATENCY_SAMPLES = 16, - UCHE_BUSY_CYCLES = 17, - UCHE_VBIF_READ_BEATS_PC = 18, - UCHE_READ_REQUESTS_PC = 19, - UCHE_WRITE_REQUESTS_VPC = 20, - UCHE_STALL_BY_VBIF = 21, - UCHE_WRITE_REQUESTS_VSC = 22, - UCHE_POWER0 = 23, - UCHE_POWER1 = 24, - UCHE_POWER2 = 25, - UCHE_POWER3 = 26, - UCHE_POWER4 = 27, - UCHE_POWER5 = 28, - UCHE_POWER6 = 29, - UCHE_POWER7 = 30, -}; - -enum a4xx_vbif_perfcounter_select { - AXI_READ_REQUESTS_ID_0 = 0, - AXI_READ_REQUESTS_ID_1 = 1, - AXI_READ_REQUESTS_ID_2 = 2, - AXI_READ_REQUESTS_ID_3 = 3, - AXI_READ_REQUESTS_ID_4 = 4, - AXI_READ_REQUESTS_ID_5 = 5, - AXI_READ_REQUESTS_ID_6 = 6, - AXI_READ_REQUESTS_ID_7 = 7, - AXI_READ_REQUESTS_ID_8 = 8, - AXI_READ_REQUESTS_ID_9 = 9, - AXI_READ_REQUESTS_ID_10 = 10, - AXI_READ_REQUESTS_ID_11 = 11, - AXI_READ_REQUESTS_ID_12 = 12, - AXI_READ_REQUESTS_ID_13 = 13, - AXI_READ_REQUESTS_ID_14 = 14, - AXI_READ_REQUESTS_ID_15 = 15, - AXI0_READ_REQUESTS_TOTAL = 16, - AXI1_READ_REQUESTS_TOTAL = 17, - AXI2_READ_REQUESTS_TOTAL = 18, - AXI3_READ_REQUESTS_TOTAL = 19, - AXI_READ_REQUESTS_TOTAL = 20, - AXI_WRITE_REQUESTS_ID_0 = 21, - AXI_WRITE_REQUESTS_ID_1 = 22, - AXI_WRITE_REQUESTS_ID_2 = 23, - AXI_WRITE_REQUESTS_ID_3 = 24, - AXI_WRITE_REQUESTS_ID_4 = 25, - AXI_WRITE_REQUESTS_ID_5 = 26, - AXI_WRITE_REQUESTS_ID_6 = 27, - AXI_WRITE_REQUESTS_ID_7 = 28, - AXI_WRITE_REQUESTS_ID_8 = 29, - AXI_WRITE_REQUESTS_ID_9 = 30, - AXI_WRITE_REQUESTS_ID_10 = 31, - AXI_WRITE_REQUESTS_ID_11 = 32, - AXI_WRITE_REQUESTS_ID_12 = 33, - AXI_WRITE_REQUESTS_ID_13 = 34, - AXI_WRITE_REQUESTS_ID_14 = 35, - AXI_WRITE_REQUESTS_ID_15 = 36, - AXI0_WRITE_REQUESTS_TOTAL = 37, - AXI1_WRITE_REQUESTS_TOTAL = 38, - AXI2_WRITE_REQUESTS_TOTAL = 39, - AXI3_WRITE_REQUESTS_TOTAL = 40, - AXI_WRITE_REQUESTS_TOTAL = 41, - AXI_TOTAL_REQUESTS = 42, - AXI_READ_DATA_BEATS_ID_0 = 43, - AXI_READ_DATA_BEATS_ID_1 = 44, - AXI_READ_DATA_BEATS_ID_2 = 45, - AXI_READ_DATA_BEATS_ID_3 = 46, - AXI_READ_DATA_BEATS_ID_4 = 47, - AXI_READ_DATA_BEATS_ID_5 = 48, - AXI_READ_DATA_BEATS_ID_6 = 49, - AXI_READ_DATA_BEATS_ID_7 = 50, - AXI_READ_DATA_BEATS_ID_8 = 51, - AXI_READ_DATA_BEATS_ID_9 = 52, - AXI_READ_DATA_BEATS_ID_10 = 53, - AXI_READ_DATA_BEATS_ID_11 = 54, - AXI_READ_DATA_BEATS_ID_12 = 55, - AXI_READ_DATA_BEATS_ID_13 = 56, - AXI_READ_DATA_BEATS_ID_14 = 57, - AXI_READ_DATA_BEATS_ID_15 = 58, - AXI0_READ_DATA_BEATS_TOTAL = 59, - AXI1_READ_DATA_BEATS_TOTAL = 60, - AXI2_READ_DATA_BEATS_TOTAL = 61, - AXI3_READ_DATA_BEATS_TOTAL = 62, - AXI_READ_DATA_BEATS_TOTAL = 63, - AXI_WRITE_DATA_BEATS_ID_0 = 64, - AXI_WRITE_DATA_BEATS_ID_1 = 65, - AXI_WRITE_DATA_BEATS_ID_2 = 66, - AXI_WRITE_DATA_BEATS_ID_3 = 67, - AXI_WRITE_DATA_BEATS_ID_4 = 68, - AXI_WRITE_DATA_BEATS_ID_5 = 69, - AXI_WRITE_DATA_BEATS_ID_6 = 70, - AXI_WRITE_DATA_BEATS_ID_7 = 71, - AXI_WRITE_DATA_BEATS_ID_8 = 72, - AXI_WRITE_DATA_BEATS_ID_9 = 73, - AXI_WRITE_DATA_BEATS_ID_10 = 74, - AXI_WRITE_DATA_BEATS_ID_11 = 75, - AXI_WRITE_DATA_BEATS_ID_12 = 76, - AXI_WRITE_DATA_BEATS_ID_13 = 77, - AXI_WRITE_DATA_BEATS_ID_14 = 78, - AXI_WRITE_DATA_BEATS_ID_15 = 79, - AXI0_WRITE_DATA_BEATS_TOTAL = 80, - AXI1_WRITE_DATA_BEATS_TOTAL = 81, - AXI2_WRITE_DATA_BEATS_TOTAL = 82, - AXI3_WRITE_DATA_BEATS_TOTAL = 83, - AXI_WRITE_DATA_BEATS_TOTAL = 84, - AXI_DATA_BEATS_TOTAL = 85, - CYCLES_HELD_OFF_ID_0 = 86, - CYCLES_HELD_OFF_ID_1 = 87, - CYCLES_HELD_OFF_ID_2 = 88, - CYCLES_HELD_OFF_ID_3 = 89, - CYCLES_HELD_OFF_ID_4 = 90, - CYCLES_HELD_OFF_ID_5 = 91, - CYCLES_HELD_OFF_ID_6 = 92, - CYCLES_HELD_OFF_ID_7 = 93, - CYCLES_HELD_OFF_ID_8 = 94, - CYCLES_HELD_OFF_ID_9 = 95, - CYCLES_HELD_OFF_ID_10 = 96, - CYCLES_HELD_OFF_ID_11 = 97, - CYCLES_HELD_OFF_ID_12 = 98, - CYCLES_HELD_OFF_ID_13 = 99, - CYCLES_HELD_OFF_ID_14 = 100, - CYCLES_HELD_OFF_ID_15 = 101, - AXI_READ_REQUEST_HELD_OFF = 102, - AXI_WRITE_REQUEST_HELD_OFF = 103, - AXI_REQUEST_HELD_OFF = 104, - AXI_WRITE_DATA_HELD_OFF = 105, - OCMEM_AXI_READ_REQUEST_HELD_OFF = 106, - OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107, - OCMEM_AXI_REQUEST_HELD_OFF = 108, - OCMEM_AXI_WRITE_DATA_HELD_OFF = 109, - ELAPSED_CYCLES_DDR = 110, - ELAPSED_CYCLES_OCMEM = 111, -}; - -enum a4xx_vfd_perfcounter_select { - VFD_UCHE_BYTE_FETCHED = 0, - VFD_UCHE_TRANS = 1, - VFD_FETCH_INSTRUCTIONS = 3, - VFD_BUSY_CYCLES = 5, - VFD_STALL_CYCLES_UCHE = 6, - VFD_STALL_CYCLES_HLSQ = 7, - VFD_STALL_CYCLES_VPC_BYPASS = 8, - VFD_STALL_CYCLES_VPC_ALLOC = 9, - VFD_MODE_0_FIBERS = 13, - VFD_MODE_1_FIBERS = 14, - VFD_MODE_2_FIBERS = 15, - VFD_MODE_3_FIBERS = 16, - VFD_MODE_4_FIBERS = 17, - VFD_BFIFO_STALL = 18, - VFD_NUM_VERTICES_TOTAL = 19, - VFD_PACKER_FULL = 20, - VFD_UCHE_REQUEST_FIFO_FULL = 21, - VFD_STARVE_CYCLES_PC = 22, - VFD_STARVE_CYCLES_UCHE = 23, -}; - -enum a4xx_vpc_perfcounter_select { - VPC_SP_LM_COMPONENTS = 2, - VPC_SP0_LM_BYTES = 3, - VPC_SP1_LM_BYTES = 4, - VPC_SP2_LM_BYTES = 5, - VPC_SP3_LM_BYTES = 6, - VPC_WORKING_CYCLES = 7, - VPC_STALL_CYCLES_LM = 8, - VPC_STARVE_CYCLES_RAS = 9, - VPC_STREAMOUT_CYCLES = 10, - VPC_UCHE_TRANSACTIONS = 12, - VPC_STALL_CYCLES_UCHE = 13, - VPC_BUSY_CYCLES = 14, - VPC_STARVE_CYCLES_SP = 15, -}; - -enum a4xx_vsc_perfcounter_select { - VSC_BUSY_CYCLES = 0, - VSC_WORKING_CYCLES = 1, - VSC_STALL_CYCLES_UCHE = 2, - VSC_STARVE_CYCLES_RAS = 3, - VSC_EOT_NUM = 4, -}; - -enum a4xx_tex_filter { - A4XX_TEX_NEAREST = 0, - A4XX_TEX_LINEAR = 1, - A4XX_TEX_ANISO = 2, -}; - -enum a4xx_tex_clamp { - A4XX_TEX_REPEAT = 0, - A4XX_TEX_CLAMP_TO_EDGE = 1, - A4XX_TEX_MIRROR_REPEAT = 2, - A4XX_TEX_CLAMP_TO_BORDER = 3, - A4XX_TEX_MIRROR_CLAMP = 4, -}; - -enum a4xx_tex_aniso { - A4XX_TEX_ANISO_1 = 0, - A4XX_TEX_ANISO_2 = 1, - A4XX_TEX_ANISO_4 = 2, - A4XX_TEX_ANISO_8 = 3, - A4XX_TEX_ANISO_16 = 4, -}; - -enum a4xx_tex_swiz { - A4XX_TEX_X = 0, - A4XX_TEX_Y = 1, - A4XX_TEX_Z = 2, - A4XX_TEX_W = 3, - A4XX_TEX_ZERO = 4, - A4XX_TEX_ONE = 5, -}; - -enum a4xx_tex_type { - A4XX_TEX_1D = 0, - A4XX_TEX_2D = 1, - A4XX_TEX_CUBE = 2, - A4XX_TEX_3D = 3, -}; - -#define A4XX_CGC_HLSQ_EARLY_CYC__MASK 0x00700000 -#define A4XX_CGC_HLSQ_EARLY_CYC__SHIFT 20 -static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val) -{ - return ((val) << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT) & A4XX_CGC_HLSQ_EARLY_CYC__MASK; -} -#define A4XX_INT0_RBBM_GPU_IDLE 0x00000001 -#define A4XX_INT0_RBBM_AHB_ERROR 0x00000002 -#define A4XX_INT0_RBBM_REG_TIMEOUT 0x00000004 -#define A4XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 -#define A4XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 -#define A4XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00000020 -#define A4XX_INT0_VFD_ERROR 0x00000040 -#define A4XX_INT0_CP_SW_INT 0x00000080 -#define A4XX_INT0_CP_T0_PACKET_IN_IB 0x00000100 -#define A4XX_INT0_CP_OPCODE_ERROR 0x00000200 -#define A4XX_INT0_CP_RESERVED_BIT_ERROR 0x00000400 -#define A4XX_INT0_CP_HW_FAULT 0x00000800 -#define A4XX_INT0_CP_DMA 0x00001000 -#define A4XX_INT0_CP_IB2_INT 0x00002000 -#define A4XX_INT0_CP_IB1_INT 0x00004000 -#define A4XX_INT0_CP_RB_INT 0x00008000 -#define A4XX_INT0_CP_REG_PROTECT_FAULT 0x00010000 -#define A4XX_INT0_CP_RB_DONE_TS 0x00020000 -#define A4XX_INT0_CP_VS_DONE_TS 0x00040000 -#define A4XX_INT0_CP_PS_DONE_TS 0x00080000 -#define A4XX_INT0_CACHE_FLUSH_TS 0x00100000 -#define A4XX_INT0_CP_AHB_ERROR_HALT 0x00200000 -#define A4XX_INT0_MISC_HANG_DETECT 0x01000000 -#define A4XX_INT0_UCHE_OOB_ACCESS 0x02000000 -#define REG_A4XX_RB_GMEM_BASE_ADDR 0x00000cc0 - -#define REG_A4XX_RB_PERFCTR_RB_SEL_0 0x00000cc7 - -#define REG_A4XX_RB_PERFCTR_RB_SEL_1 0x00000cc8 - -#define REG_A4XX_RB_PERFCTR_RB_SEL_2 0x00000cc9 - -#define REG_A4XX_RB_PERFCTR_RB_SEL_3 0x00000cca - -#define REG_A4XX_RB_PERFCTR_RB_SEL_4 0x00000ccb - -#define REG_A4XX_RB_PERFCTR_RB_SEL_5 0x00000ccc - -#define REG_A4XX_RB_PERFCTR_RB_SEL_6 0x00000ccd - -#define REG_A4XX_RB_PERFCTR_RB_SEL_7 0x00000cce - -#define REG_A4XX_RB_PERFCTR_CCU_SEL_0 0x00000ccf - -#define REG_A4XX_RB_PERFCTR_CCU_SEL_1 0x00000cd0 - -#define REG_A4XX_RB_PERFCTR_CCU_SEL_2 0x00000cd1 - -#define REG_A4XX_RB_PERFCTR_CCU_SEL_3 0x00000cd2 - -#define REG_A4XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0 -#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK 0x00003fff -#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT 0 -static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val) -{ - return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK; -} -#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK 0x3fff0000 -#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT 16 -static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val) -{ - return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK; -} - -#define REG_A4XX_RB_CLEAR_COLOR_DW0 0x000020cc - -#define REG_A4XX_RB_CLEAR_COLOR_DW1 0x000020cd - -#define REG_A4XX_RB_CLEAR_COLOR_DW2 0x000020ce - -#define REG_A4XX_RB_CLEAR_COLOR_DW3 0x000020cf - -#define REG_A4XX_RB_MODE_CONTROL 0x000020a0 -#define A4XX_RB_MODE_CONTROL_WIDTH__MASK 0x0000003f -#define A4XX_RB_MODE_CONTROL_WIDTH__SHIFT 0 -static inline uint32_t A4XX_RB_MODE_CONTROL_WIDTH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_RB_MODE_CONTROL_WIDTH__SHIFT) & A4XX_RB_MODE_CONTROL_WIDTH__MASK; -} -#define A4XX_RB_MODE_CONTROL_HEIGHT__MASK 0x00003f00 -#define A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT 8 -static inline uint32_t A4XX_RB_MODE_CONTROL_HEIGHT(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT) & A4XX_RB_MODE_CONTROL_HEIGHT__MASK; -} -#define A4XX_RB_MODE_CONTROL_ENABLE_GMEM 0x00010000 - -#define REG_A4XX_RB_RENDER_CONTROL 0x000020a1 -#define A4XX_RB_RENDER_CONTROL_BINNING_PASS 0x00000001 -#define A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE 0x00000020 - -#define REG_A4XX_RB_MSAA_CONTROL 0x000020a2 -#define A4XX_RB_MSAA_CONTROL_DISABLE 0x00001000 -#define A4XX_RB_MSAA_CONTROL_SAMPLES__MASK 0x0000e000 -#define A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT 13 -static inline uint32_t A4XX_RB_MSAA_CONTROL_SAMPLES(uint32_t val) -{ - return ((val) << A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A4XX_RB_MSAA_CONTROL_SAMPLES__MASK; -} - -#define REG_A4XX_RB_RENDER_CONTROL2 0x000020a3 -#define A4XX_RB_RENDER_CONTROL2_XCOORD 0x00000001 -#define A4XX_RB_RENDER_CONTROL2_YCOORD 0x00000002 -#define A4XX_RB_RENDER_CONTROL2_ZCOORD 0x00000004 -#define A4XX_RB_RENDER_CONTROL2_WCOORD 0x00000008 -#define A4XX_RB_RENDER_CONTROL2_SAMPLEMASK 0x00000010 -#define A4XX_RB_RENDER_CONTROL2_FACENESS 0x00000020 -#define A4XX_RB_RENDER_CONTROL2_SAMPLEID 0x00000040 -#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK 0x00000380 -#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT 7 -static inline uint32_t A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(uint32_t val) -{ - return ((val) << A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT) & A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK; -} -#define A4XX_RB_RENDER_CONTROL2_SAMPLEID_HR 0x00000800 -#define A4XX_RB_RENDER_CONTROL2_VARYING 0x00001000 - -static inline uint32_t REG_A4XX_RB_MRT(uint32_t i0) { return 0x000020a4 + 0x5*i0; } - -static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4 + 0x5*i0; } -#define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 -#define A4XX_RB_MRT_CONTROL_BLEND 0x00000010 -#define A4XX_RB_MRT_CONTROL_BLEND2 0x00000020 -#define A4XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000040 -#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00 -#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8 -static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) -{ - return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK; -} -#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000 -#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24 -static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) -{ - return ((val) << A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; -} - -static inline uint32_t REG_A4XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020a5 + 0x5*i0; } -#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f -#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a4xx_color_fmt val) -{ - return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; -} -#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x000000c0 -#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 6 -static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a4xx_tile_mode val) -{ - return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; -} -#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK 0x00000600 -#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT 9 -static inline uint32_t A4XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) -{ - return ((val) << A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK; -} -#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00001800 -#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 11 -static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; -} -#define A4XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00002000 -#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0xffffc000 -#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 14 -static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) -{ - assert(!(val & 0xf)); - return ((val >> 4) << A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; -} - -static inline uint32_t REG_A4XX_RB_MRT_BASE(uint32_t i0) { return 0x000020a6 + 0x5*i0; } - -static inline uint32_t REG_A4XX_RB_MRT_CONTROL3(uint32_t i0) { return 0x000020a7 + 0x5*i0; } -#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK 0x03fffff8 -#define A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT 3 -static inline uint32_t A4XX_RB_MRT_CONTROL3_STRIDE(uint32_t val) -{ - return ((val) << A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT) & A4XX_RB_MRT_CONTROL3_STRIDE__MASK; -} - -static inline uint32_t REG_A4XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020a8 + 0x5*i0; } -#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f -#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 -static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; -} -#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 -#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 -static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; -} -#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 -#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 -static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; -} -#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 -#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 -static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; -} -#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 -#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 -static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; -} -#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 -#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 -static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; -} - -#define REG_A4XX_RB_BLEND_RED 0x000020f0 -#define A4XX_RB_BLEND_RED_UINT__MASK 0x000000ff -#define A4XX_RB_BLEND_RED_UINT__SHIFT 0 -static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val) -{ - return ((val) << A4XX_RB_BLEND_RED_UINT__SHIFT) & A4XX_RB_BLEND_RED_UINT__MASK; -} -#define A4XX_RB_BLEND_RED_SINT__MASK 0x0000ff00 -#define A4XX_RB_BLEND_RED_SINT__SHIFT 8 -static inline uint32_t A4XX_RB_BLEND_RED_SINT(uint32_t val) -{ - return ((val) << A4XX_RB_BLEND_RED_SINT__SHIFT) & A4XX_RB_BLEND_RED_SINT__MASK; -} -#define A4XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 -#define A4XX_RB_BLEND_RED_FLOAT__SHIFT 16 -static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK; -} - -#define REG_A4XX_RB_BLEND_RED_F32 0x000020f1 -#define A4XX_RB_BLEND_RED_F32__MASK 0xffffffff -#define A4XX_RB_BLEND_RED_F32__SHIFT 0 -static inline uint32_t A4XX_RB_BLEND_RED_F32(float val) -{ - return ((fui(val)) << A4XX_RB_BLEND_RED_F32__SHIFT) & A4XX_RB_BLEND_RED_F32__MASK; -} - -#define REG_A4XX_RB_BLEND_GREEN 0x000020f2 -#define A4XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff -#define A4XX_RB_BLEND_GREEN_UINT__SHIFT 0 -static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val) -{ - return ((val) << A4XX_RB_BLEND_GREEN_UINT__SHIFT) & A4XX_RB_BLEND_GREEN_UINT__MASK; -} -#define A4XX_RB_BLEND_GREEN_SINT__MASK 0x0000ff00 -#define A4XX_RB_BLEND_GREEN_SINT__SHIFT 8 -static inline uint32_t A4XX_RB_BLEND_GREEN_SINT(uint32_t val) -{ - return ((val) << A4XX_RB_BLEND_GREEN_SINT__SHIFT) & A4XX_RB_BLEND_GREEN_SINT__MASK; -} -#define A4XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 -#define A4XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 -static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK; -} - -#define REG_A4XX_RB_BLEND_GREEN_F32 0x000020f3 -#define A4XX_RB_BLEND_GREEN_F32__MASK 0xffffffff -#define A4XX_RB_BLEND_GREEN_F32__SHIFT 0 -static inline uint32_t A4XX_RB_BLEND_GREEN_F32(float val) -{ - return ((fui(val)) << A4XX_RB_BLEND_GREEN_F32__SHIFT) & A4XX_RB_BLEND_GREEN_F32__MASK; -} - -#define REG_A4XX_RB_BLEND_BLUE 0x000020f4 -#define A4XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff -#define A4XX_RB_BLEND_BLUE_UINT__SHIFT 0 -static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val) -{ - return ((val) << A4XX_RB_BLEND_BLUE_UINT__SHIFT) & A4XX_RB_BLEND_BLUE_UINT__MASK; -} -#define A4XX_RB_BLEND_BLUE_SINT__MASK 0x0000ff00 -#define A4XX_RB_BLEND_BLUE_SINT__SHIFT 8 -static inline uint32_t A4XX_RB_BLEND_BLUE_SINT(uint32_t val) -{ - return ((val) << A4XX_RB_BLEND_BLUE_SINT__SHIFT) & A4XX_RB_BLEND_BLUE_SINT__MASK; -} -#define A4XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 -#define A4XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 -static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK; -} - -#define REG_A4XX_RB_BLEND_BLUE_F32 0x000020f5 -#define A4XX_RB_BLEND_BLUE_F32__MASK 0xffffffff -#define A4XX_RB_BLEND_BLUE_F32__SHIFT 0 -static inline uint32_t A4XX_RB_BLEND_BLUE_F32(float val) -{ - return ((fui(val)) << A4XX_RB_BLEND_BLUE_F32__SHIFT) & A4XX_RB_BLEND_BLUE_F32__MASK; -} - -#define REG_A4XX_RB_BLEND_ALPHA 0x000020f6 -#define A4XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff -#define A4XX_RB_BLEND_ALPHA_UINT__SHIFT 0 -static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val) -{ - return ((val) << A4XX_RB_BLEND_ALPHA_UINT__SHIFT) & A4XX_RB_BLEND_ALPHA_UINT__MASK; -} -#define A4XX_RB_BLEND_ALPHA_SINT__MASK 0x0000ff00 -#define A4XX_RB_BLEND_ALPHA_SINT__SHIFT 8 -static inline uint32_t A4XX_RB_BLEND_ALPHA_SINT(uint32_t val) -{ - return ((val) << A4XX_RB_BLEND_ALPHA_SINT__SHIFT) & A4XX_RB_BLEND_ALPHA_SINT__MASK; -} -#define A4XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 -#define A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 -static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK; -} - -#define REG_A4XX_RB_BLEND_ALPHA_F32 0x000020f7 -#define A4XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff -#define A4XX_RB_BLEND_ALPHA_F32__SHIFT 0 -static inline uint32_t A4XX_RB_BLEND_ALPHA_F32(float val) -{ - return ((fui(val)) << A4XX_RB_BLEND_ALPHA_F32__SHIFT) & A4XX_RB_BLEND_ALPHA_F32__MASK; -} - -#define REG_A4XX_RB_ALPHA_CONTROL 0x000020f8 -#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff -#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 -static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val) -{ - return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK; -} -#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST 0x00000100 -#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK 0x00000e00 -#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT 9 -static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) -{ - return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK; -} - -#define REG_A4XX_RB_FS_OUTPUT 0x000020f9 -#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK 0x000000ff -#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT 0 -static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val) -{ - return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK; -} -#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100 -#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000 -#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16 -static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val) -{ - return ((val) << A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT) & A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK; -} - -#define REG_A4XX_RB_SAMPLE_COUNT_CONTROL 0x000020fa -#define A4XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 -#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK 0xfffffffc -#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT 2 -static inline uint32_t A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT) & A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK; -} - -#define REG_A4XX_RB_RENDER_COMPONENTS 0x000020fb -#define A4XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f -#define A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 -static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) -{ - return ((val) << A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT0__MASK; -} -#define A4XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 -#define A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 -static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) -{ - return ((val) << A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT1__MASK; -} -#define A4XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 -#define A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 -static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) -{ - return ((val) << A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT2__MASK; -} -#define A4XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 -#define A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 -static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) -{ - return ((val) << A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT3__MASK; -} -#define A4XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 -#define A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 -static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) -{ - return ((val) << A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT4__MASK; -} -#define A4XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 -#define A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 -static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) -{ - return ((val) << A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT5__MASK; -} -#define A4XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 -#define A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 -static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) -{ - return ((val) << A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT6__MASK; -} -#define A4XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 -#define A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 -static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) -{ - return ((val) << A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT7__MASK; -} - -#define REG_A4XX_RB_COPY_CONTROL 0x000020fc -#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK 0x00000003 -#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT 0 -static inline uint32_t A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val) -{ - return ((val) << A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK; -} -#define A4XX_RB_COPY_CONTROL_MODE__MASK 0x00000070 -#define A4XX_RB_COPY_CONTROL_MODE__SHIFT 4 -static inline uint32_t A4XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val) -{ - return ((val) << A4XX_RB_COPY_CONTROL_MODE__SHIFT) & A4XX_RB_COPY_CONTROL_MODE__MASK; -} -#define A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK 0x00000f00 -#define A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT 8 -static inline uint32_t A4XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val) -{ - return ((val) << A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK; -} -#define A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xffffc000 -#define A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 14 -static inline uint32_t A4XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) -{ - assert(!(val & 0x3fff)); - return ((val >> 14) << A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK; -} - -#define REG_A4XX_RB_COPY_DEST_BASE 0x000020fd -#define A4XX_RB_COPY_DEST_BASE_BASE__MASK 0xffffffe0 -#define A4XX_RB_COPY_DEST_BASE_BASE__SHIFT 5 -static inline uint32_t A4XX_RB_COPY_DEST_BASE_BASE(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A4XX_RB_COPY_DEST_BASE_BASE__MASK; -} - -#define REG_A4XX_RB_COPY_DEST_PITCH 0x000020fe -#define A4XX_RB_COPY_DEST_PITCH_PITCH__MASK 0xffffffff -#define A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT 0 -static inline uint32_t A4XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A4XX_RB_COPY_DEST_PITCH_PITCH__MASK; -} - -#define REG_A4XX_RB_COPY_DEST_INFO 0x000020ff -#define A4XX_RB_COPY_DEST_INFO_FORMAT__MASK 0x000000fc -#define A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT 2 -static inline uint32_t A4XX_RB_COPY_DEST_INFO_FORMAT(enum a4xx_color_fmt val) -{ - return ((val) << A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A4XX_RB_COPY_DEST_INFO_FORMAT__MASK; -} -#define A4XX_RB_COPY_DEST_INFO_SWAP__MASK 0x00000300 -#define A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT 8 -static inline uint32_t A4XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A4XX_RB_COPY_DEST_INFO_SWAP__MASK; -} -#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK 0x00000c00 -#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT 10 -static inline uint32_t A4XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) -{ - return ((val) << A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK; -} -#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK 0x0003c000 -#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT 14 -static inline uint32_t A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val) -{ - return ((val) << A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK; -} -#define A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK 0x001c0000 -#define A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT 18 -static inline uint32_t A4XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val) -{ - return ((val) << A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK; -} -#define A4XX_RB_COPY_DEST_INFO_TILE__MASK 0x03000000 -#define A4XX_RB_COPY_DEST_INFO_TILE__SHIFT 24 -static inline uint32_t A4XX_RB_COPY_DEST_INFO_TILE(enum a4xx_tile_mode val) -{ - return ((val) << A4XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A4XX_RB_COPY_DEST_INFO_TILE__MASK; -} - -#define REG_A4XX_RB_FS_OUTPUT_REG 0x00002100 -#define A4XX_RB_FS_OUTPUT_REG_MRT__MASK 0x0000000f -#define A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT 0 -static inline uint32_t A4XX_RB_FS_OUTPUT_REG_MRT(uint32_t val) -{ - return ((val) << A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_RB_FS_OUTPUT_REG_MRT__MASK; -} -#define A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z 0x00000020 - -#define REG_A4XX_RB_DEPTH_CONTROL 0x00002101 -#define A4XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z 0x00000001 -#define A4XX_RB_DEPTH_CONTROL_Z_ENABLE 0x00000002 -#define A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE 0x00000004 -#define A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK 0x00000070 -#define A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT 4 -static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val) -{ - return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK; -} -#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x00000080 -#define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00010000 -#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS 0x00020000 -#define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000 - -#define REG_A4XX_RB_DEPTH_CLEAR 0x00002102 - -#define REG_A4XX_RB_DEPTH_INFO 0x00002103 -#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK 0x00000003 -#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT 0 -static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum a4xx_depth_format val) -{ - return ((val) << A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK; -} -#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK 0xfffff000 -#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 12 -static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; -} - -#define REG_A4XX_RB_DEPTH_PITCH 0x00002104 -#define A4XX_RB_DEPTH_PITCH__MASK 0xffffffff -#define A4XX_RB_DEPTH_PITCH__SHIFT 0 -static inline uint32_t A4XX_RB_DEPTH_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_RB_DEPTH_PITCH__SHIFT) & A4XX_RB_DEPTH_PITCH__MASK; -} - -#define REG_A4XX_RB_DEPTH_PITCH2 0x00002105 -#define A4XX_RB_DEPTH_PITCH2__MASK 0xffffffff -#define A4XX_RB_DEPTH_PITCH2__SHIFT 0 -static inline uint32_t A4XX_RB_DEPTH_PITCH2(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_RB_DEPTH_PITCH2__SHIFT) & A4XX_RB_DEPTH_PITCH2__MASK; -} - -#define REG_A4XX_RB_STENCIL_CONTROL 0x00002106 -#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 -#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 -#define A4XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 -#define A4XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 -#define A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 -static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) -{ - return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC__MASK; -} -#define A4XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 -#define A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 -static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) -{ - return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL__MASK; -} -#define A4XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 -#define A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 -static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) -{ - return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS__MASK; -} -#define A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 -#define A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 -static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) -{ - return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK; -} -#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 -#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 -static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) -{ - return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; -} -#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 -#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 -static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; -} -#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 -#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 -static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) -{ - return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; -} -#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 -#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 -static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; -} - -#define REG_A4XX_RB_STENCIL_CONTROL2 0x00002107 -#define A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER 0x00000001 - -#define REG_A4XX_RB_STENCIL_INFO 0x00002108 -#define A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001 -#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK 0xfffff000 -#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT 12 -static inline uint32_t A4XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK; -} - -#define REG_A4XX_RB_STENCIL_PITCH 0x00002109 -#define A4XX_RB_STENCIL_PITCH__MASK 0xffffffff -#define A4XX_RB_STENCIL_PITCH__SHIFT 0 -static inline uint32_t A4XX_RB_STENCIL_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_RB_STENCIL_PITCH__SHIFT) & A4XX_RB_STENCIL_PITCH__MASK; -} - -#define REG_A4XX_RB_STENCILREFMASK 0x0000210b -#define A4XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff -#define A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 -static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) -{ - return ((val) << A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILREF__MASK; -} -#define A4XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 -#define A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 -static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) -{ - return ((val) << A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILMASK__MASK; -} -#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 -#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; -} - -#define REG_A4XX_RB_STENCILREFMASK_BF 0x0000210c -#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff -#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 -static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) -{ - return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; -} -#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 -#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 -static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) -{ - return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; -} -#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 -#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; -} - -#define REG_A4XX_RB_BIN_OFFSET 0x0000210d -#define A4XX_RB_BIN_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 -#define A4XX_RB_BIN_OFFSET_X__MASK 0x00007fff -#define A4XX_RB_BIN_OFFSET_X__SHIFT 0 -static inline uint32_t A4XX_RB_BIN_OFFSET_X(uint32_t val) -{ - return ((val) << A4XX_RB_BIN_OFFSET_X__SHIFT) & A4XX_RB_BIN_OFFSET_X__MASK; -} -#define A4XX_RB_BIN_OFFSET_Y__MASK 0x7fff0000 -#define A4XX_RB_BIN_OFFSET_Y__SHIFT 16 -static inline uint32_t A4XX_RB_BIN_OFFSET_Y(uint32_t val) -{ - return ((val) << A4XX_RB_BIN_OFFSET_Y__SHIFT) & A4XX_RB_BIN_OFFSET_Y__MASK; -} - -static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP(uint32_t i0) { return 0x00002120 + 0x2*i0; } - -static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MIN(uint32_t i0) { return 0x00002120 + 0x2*i0; } - -static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MAX(uint32_t i0) { return 0x00002121 + 0x2*i0; } - -#define REG_A4XX_RBBM_HW_VERSION 0x00000000 - -#define REG_A4XX_RBBM_HW_CONFIGURATION 0x00000002 - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP(uint32_t i0) { return 0x00000004 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP_REG(uint32_t i0) { return 0x00000004 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP(uint32_t i0) { return 0x00000008 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP_REG(uint32_t i0) { return 0x00000008 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP(uint32_t i0) { return 0x0000000c + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP_REG(uint32_t i0) { return 0x0000000c + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP(uint32_t i0) { return 0x00000010 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x00000010 + 0x1*i0; } - -#define REG_A4XX_RBBM_CLOCK_CTL_UCHE 0x00000014 - -#define REG_A4XX_RBBM_CLOCK_CTL2_UCHE 0x00000015 - -#define REG_A4XX_RBBM_CLOCK_CTL3_UCHE 0x00000016 - -#define REG_A4XX_RBBM_CLOCK_CTL4_UCHE 0x00000017 - -#define REG_A4XX_RBBM_CLOCK_HYST_UCHE 0x00000018 - -#define REG_A4XX_RBBM_CLOCK_DELAY_UCHE 0x00000019 - -#define REG_A4XX_RBBM_CLOCK_MODE_GPC 0x0000001a - -#define REG_A4XX_RBBM_CLOCK_DELAY_GPC 0x0000001b - -#define REG_A4XX_RBBM_CLOCK_HYST_GPC 0x0000001c - -#define REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM 0x0000001d - -#define REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0000001e - -#define REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x0000001f - -#define REG_A4XX_RBBM_CLOCK_CTL 0x00000020 - -#define REG_A4XX_RBBM_SP_HYST_CNT 0x00000021 - -#define REG_A4XX_RBBM_SW_RESET_CMD 0x00000022 - -#define REG_A4XX_RBBM_AHB_CTL0 0x00000023 - -#define REG_A4XX_RBBM_AHB_CTL1 0x00000024 - -#define REG_A4XX_RBBM_AHB_CMD 0x00000025 - -#define REG_A4XX_RBBM_RB_SUB_BLOCK_SEL_CTL 0x00000026 - -#define REG_A4XX_RBBM_RAM_ACC_63_32 0x00000028 - -#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x0000002b - -#define REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL 0x0000002f - -#define REG_A4XX_RBBM_INTERFACE_HANG_MASK_CTL4 0x00000034 - -#define REG_A4XX_RBBM_INT_CLEAR_CMD 0x00000036 - -#define REG_A4XX_RBBM_INT_0_MASK 0x00000037 - -#define REG_A4XX_RBBM_RBBM_CTL 0x0000003e - -#define REG_A4XX_RBBM_AHB_DEBUG_CTL 0x0000003f - -#define REG_A4XX_RBBM_VBIF_DEBUG_CTL 0x00000041 - -#define REG_A4XX_RBBM_CLOCK_CTL2 0x00000042 - -#define REG_A4XX_RBBM_BLOCK_SW_RESET_CMD 0x00000045 - -#define REG_A4XX_RBBM_RESET_CYCLES 0x00000047 - -#define REG_A4XX_RBBM_EXT_TRACE_BUS_CTL 0x00000049 - -#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_A 0x0000004a - -#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_B 0x0000004b - -#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_C 0x0000004c - -#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_D 0x0000004d - -#define REG_A4XX_RBBM_POWER_CNTL_IP 0x00000098 -#define A4XX_RBBM_POWER_CNTL_IP_SW_COLLAPSE 0x00000001 -#define A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON 0x00100000 - -#define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c - -#define REG_A4XX_RBBM_PERFCTR_CP_0_HI 0x0000009d - -#define REG_A4XX_RBBM_PERFCTR_CP_1_LO 0x0000009e - -#define REG_A4XX_RBBM_PERFCTR_CP_1_HI 0x0000009f - -#define REG_A4XX_RBBM_PERFCTR_CP_2_LO 0x000000a0 - -#define REG_A4XX_RBBM_PERFCTR_CP_2_HI 0x000000a1 - -#define REG_A4XX_RBBM_PERFCTR_CP_3_LO 0x000000a2 - -#define REG_A4XX_RBBM_PERFCTR_CP_3_HI 0x000000a3 - -#define REG_A4XX_RBBM_PERFCTR_CP_4_LO 0x000000a4 - -#define REG_A4XX_RBBM_PERFCTR_CP_4_HI 0x000000a5 - -#define REG_A4XX_RBBM_PERFCTR_CP_5_LO 0x000000a6 - -#define REG_A4XX_RBBM_PERFCTR_CP_5_HI 0x000000a7 - -#define REG_A4XX_RBBM_PERFCTR_CP_6_LO 0x000000a8 - -#define REG_A4XX_RBBM_PERFCTR_CP_6_HI 0x000000a9 - -#define REG_A4XX_RBBM_PERFCTR_CP_7_LO 0x000000aa - -#define REG_A4XX_RBBM_PERFCTR_CP_7_HI 0x000000ab - -#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO 0x000000ac - -#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI 0x000000ad - -#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO 0x000000ae - -#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI 0x000000af - -#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO 0x000000b0 - -#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI 0x000000b1 - -#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO 0x000000b2 - -#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI 0x000000b3 - -#define REG_A4XX_RBBM_PERFCTR_PC_0_LO 0x000000b4 - -#define REG_A4XX_RBBM_PERFCTR_PC_0_HI 0x000000b5 - -#define REG_A4XX_RBBM_PERFCTR_PC_1_LO 0x000000b6 - -#define REG_A4XX_RBBM_PERFCTR_PC_1_HI 0x000000b7 - -#define REG_A4XX_RBBM_PERFCTR_PC_2_LO 0x000000b8 - -#define REG_A4XX_RBBM_PERFCTR_PC_2_HI 0x000000b9 - -#define REG_A4XX_RBBM_PERFCTR_PC_3_LO 0x000000ba - -#define REG_A4XX_RBBM_PERFCTR_PC_3_HI 0x000000bb - -#define REG_A4XX_RBBM_PERFCTR_PC_4_LO 0x000000bc - -#define REG_A4XX_RBBM_PERFCTR_PC_4_HI 0x000000bd - -#define REG_A4XX_RBBM_PERFCTR_PC_5_LO 0x000000be - -#define REG_A4XX_RBBM_PERFCTR_PC_5_HI 0x000000bf - -#define REG_A4XX_RBBM_PERFCTR_PC_6_LO 0x000000c0 - -#define REG_A4XX_RBBM_PERFCTR_PC_6_HI 0x000000c1 - -#define REG_A4XX_RBBM_PERFCTR_PC_7_LO 0x000000c2 - -#define REG_A4XX_RBBM_PERFCTR_PC_7_HI 0x000000c3 - -#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO 0x000000c4 - -#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI 0x000000c5 - -#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO 0x000000c6 - -#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI 0x000000c7 - -#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO 0x000000c8 - -#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI 0x000000c9 - -#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO 0x000000ca - -#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI 0x000000cb - -#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO 0x000000cc - -#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI 0x000000cd - -#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO 0x000000ce - -#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI 0x000000cf - -#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO 0x000000d0 - -#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI 0x000000d1 - -#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO 0x000000d2 - -#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI 0x000000d3 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000d4 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000d5 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000d6 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000d7 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000d8 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000d9 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000da - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000db - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000dc - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000dd - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000de - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000df - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO 0x000000e0 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI 0x000000e1 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO 0x000000e2 - -#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI 0x000000e3 - -#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO 0x000000e4 - -#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI 0x000000e5 - -#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO 0x000000e6 - -#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI 0x000000e7 - -#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO 0x000000e8 - -#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI 0x000000e9 - -#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO 0x000000ea - -#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI 0x000000eb - -#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO 0x000000ec - -#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI 0x000000ed - -#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO 0x000000ee - -#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI 0x000000ef - -#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO 0x000000f0 - -#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI 0x000000f1 - -#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO 0x000000f2 - -#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI 0x000000f3 - -#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO 0x000000f4 - -#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI 0x000000f5 - -#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO 0x000000f6 - -#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI 0x000000f7 - -#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO 0x000000f8 - -#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI 0x000000f9 - -#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO 0x000000fa - -#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI 0x000000fb - -#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO 0x000000fc - -#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI 0x000000fd - -#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO 0x000000fe - -#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI 0x000000ff - -#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO 0x00000100 - -#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI 0x00000101 - -#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO 0x00000102 - -#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI 0x00000103 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO 0x00000104 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI 0x00000105 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO 0x00000106 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI 0x00000107 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO 0x00000108 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI 0x00000109 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO 0x0000010a - -#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI 0x0000010b - -#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO 0x0000010c - -#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI 0x0000010d - -#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO 0x0000010e - -#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI 0x0000010f - -#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO 0x00000110 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI 0x00000111 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO 0x00000112 - -#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI 0x00000113 - -#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 - -#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 - -#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 - -#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 - -#define REG_A4XX_RBBM_PERFCTR_TP_1_LO 0x00000116 - -#define REG_A4XX_RBBM_PERFCTR_TP_1_HI 0x00000117 - -#define REG_A4XX_RBBM_PERFCTR_TP_2_LO 0x00000118 - -#define REG_A4XX_RBBM_PERFCTR_TP_2_HI 0x00000119 - -#define REG_A4XX_RBBM_PERFCTR_TP_3_LO 0x0000011a - -#define REG_A4XX_RBBM_PERFCTR_TP_3_HI 0x0000011b - -#define REG_A4XX_RBBM_PERFCTR_TP_4_LO 0x0000011c - -#define REG_A4XX_RBBM_PERFCTR_TP_4_HI 0x0000011d - -#define REG_A4XX_RBBM_PERFCTR_TP_5_LO 0x0000011e - -#define REG_A4XX_RBBM_PERFCTR_TP_5_HI 0x0000011f - -#define REG_A4XX_RBBM_PERFCTR_TP_6_LO 0x00000120 - -#define REG_A4XX_RBBM_PERFCTR_TP_6_HI 0x00000121 - -#define REG_A4XX_RBBM_PERFCTR_TP_7_LO 0x00000122 - -#define REG_A4XX_RBBM_PERFCTR_TP_7_HI 0x00000123 - -#define REG_A4XX_RBBM_PERFCTR_SP_0_LO 0x00000124 - -#define REG_A4XX_RBBM_PERFCTR_SP_0_HI 0x00000125 - -#define REG_A4XX_RBBM_PERFCTR_SP_1_LO 0x00000126 - -#define REG_A4XX_RBBM_PERFCTR_SP_1_HI 0x00000127 - -#define REG_A4XX_RBBM_PERFCTR_SP_2_LO 0x00000128 - -#define REG_A4XX_RBBM_PERFCTR_SP_2_HI 0x00000129 - -#define REG_A4XX_RBBM_PERFCTR_SP_3_LO 0x0000012a - -#define REG_A4XX_RBBM_PERFCTR_SP_3_HI 0x0000012b - -#define REG_A4XX_RBBM_PERFCTR_SP_4_LO 0x0000012c - -#define REG_A4XX_RBBM_PERFCTR_SP_4_HI 0x0000012d - -#define REG_A4XX_RBBM_PERFCTR_SP_5_LO 0x0000012e - -#define REG_A4XX_RBBM_PERFCTR_SP_5_HI 0x0000012f - -#define REG_A4XX_RBBM_PERFCTR_SP_6_LO 0x00000130 - -#define REG_A4XX_RBBM_PERFCTR_SP_6_HI 0x00000131 - -#define REG_A4XX_RBBM_PERFCTR_SP_7_LO 0x00000132 - -#define REG_A4XX_RBBM_PERFCTR_SP_7_HI 0x00000133 - -#define REG_A4XX_RBBM_PERFCTR_SP_8_LO 0x00000134 - -#define REG_A4XX_RBBM_PERFCTR_SP_8_HI 0x00000135 - -#define REG_A4XX_RBBM_PERFCTR_SP_9_LO 0x00000136 - -#define REG_A4XX_RBBM_PERFCTR_SP_9_HI 0x00000137 - -#define REG_A4XX_RBBM_PERFCTR_SP_10_LO 0x00000138 - -#define REG_A4XX_RBBM_PERFCTR_SP_10_HI 0x00000139 - -#define REG_A4XX_RBBM_PERFCTR_SP_11_LO 0x0000013a - -#define REG_A4XX_RBBM_PERFCTR_SP_11_HI 0x0000013b - -#define REG_A4XX_RBBM_PERFCTR_RB_0_LO 0x0000013c - -#define REG_A4XX_RBBM_PERFCTR_RB_0_HI 0x0000013d - -#define REG_A4XX_RBBM_PERFCTR_RB_1_LO 0x0000013e - -#define REG_A4XX_RBBM_PERFCTR_RB_1_HI 0x0000013f - -#define REG_A4XX_RBBM_PERFCTR_RB_2_LO 0x00000140 - -#define REG_A4XX_RBBM_PERFCTR_RB_2_HI 0x00000141 - -#define REG_A4XX_RBBM_PERFCTR_RB_3_LO 0x00000142 - -#define REG_A4XX_RBBM_PERFCTR_RB_3_HI 0x00000143 - -#define REG_A4XX_RBBM_PERFCTR_RB_4_LO 0x00000144 - -#define REG_A4XX_RBBM_PERFCTR_RB_4_HI 0x00000145 - -#define REG_A4XX_RBBM_PERFCTR_RB_5_LO 0x00000146 - -#define REG_A4XX_RBBM_PERFCTR_RB_5_HI 0x00000147 - -#define REG_A4XX_RBBM_PERFCTR_RB_6_LO 0x00000148 - -#define REG_A4XX_RBBM_PERFCTR_RB_6_HI 0x00000149 - -#define REG_A4XX_RBBM_PERFCTR_RB_7_LO 0x0000014a - -#define REG_A4XX_RBBM_PERFCTR_RB_7_HI 0x0000014b - -#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO 0x0000014c - -#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI 0x0000014d - -#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO 0x0000014e - -#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI 0x0000014f - -#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO 0x00000166 - -#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI 0x00000167 - -#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168 - -#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI 0x00000169 - -#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO 0x0000016e - -#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI 0x0000016f - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP(uint32_t i0) { return 0x0000006c + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP_REG(uint32_t i0) { return 0x0000006c + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP(uint32_t i0) { return 0x00000070 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP_REG(uint32_t i0) { return 0x00000070 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP(uint32_t i0) { return 0x00000074 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP_REG(uint32_t i0) { return 0x00000074 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB(uint32_t i0) { return 0x00000078 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB_REG(uint32_t i0) { return 0x00000078 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB(uint32_t i0) { return 0x0000007c + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB_REG(uint32_t i0) { return 0x0000007c + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(uint32_t i0) { return 0x00000082 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU_REG(uint32_t i0) { return 0x00000082 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(uint32_t i0) { return 0x00000086 + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU_REG(uint32_t i0) { return 0x00000086 + 0x1*i0; } - -#define REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM 0x00000080 - -#define REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM 0x00000081 - -#define REG_A4XX_RBBM_CLOCK_CTL_HLSQ 0x0000008a - -#define REG_A4XX_RBBM_CLOCK_HYST_HLSQ 0x0000008b - -#define REG_A4XX_RBBM_CLOCK_DELAY_HLSQ 0x0000008c - -#define REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM 0x0000008d - -static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(uint32_t i0) { return 0x0000008e + 0x1*i0; } - -static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) { return 0x0000008e + 0x1*i0; } - -#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0 0x00000099 - -#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1 0x0000009a - -#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168 - -#define REG_A4XX_RBBM_PERFCTR_CTL 0x00000170 - -#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD0 0x00000171 - -#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD1 0x00000172 - -#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD2 0x00000173 - -#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000174 - -#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000175 - -#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000176 - -#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000177 - -#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000178 - -#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3 0x00000179 - -#define REG_A4XX_RBBM_GPU_BUSY_MASKED 0x0000017a - -#define REG_A4XX_RBBM_INT_0_STATUS 0x0000017d - -#define REG_A4XX_RBBM_CLOCK_STATUS 0x00000182 - -#define REG_A4XX_RBBM_AHB_STATUS 0x00000189 - -#define REG_A4XX_RBBM_AHB_ME_SPLIT_STATUS 0x0000018c - -#define REG_A4XX_RBBM_AHB_PFP_SPLIT_STATUS 0x0000018d - -#define REG_A4XX_RBBM_AHB_ERROR_STATUS 0x0000018f - -#define REG_A4XX_RBBM_STATUS 0x00000191 -#define A4XX_RBBM_STATUS_HI_BUSY 0x00000001 -#define A4XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 -#define A4XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 -#define A4XX_RBBM_STATUS_CP_NRT_BUSY 0x00004000 -#define A4XX_RBBM_STATUS_VBIF_BUSY 0x00008000 -#define A4XX_RBBM_STATUS_TSE_BUSY 0x00010000 -#define A4XX_RBBM_STATUS_RAS_BUSY 0x00020000 -#define A4XX_RBBM_STATUS_RB_BUSY 0x00040000 -#define A4XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 -#define A4XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 -#define A4XX_RBBM_STATUS_VFD_BUSY 0x00200000 -#define A4XX_RBBM_STATUS_VPC_BUSY 0x00400000 -#define A4XX_RBBM_STATUS_UCHE_BUSY 0x00800000 -#define A4XX_RBBM_STATUS_SP_BUSY 0x01000000 -#define A4XX_RBBM_STATUS_TPL1_BUSY 0x02000000 -#define A4XX_RBBM_STATUS_MARB_BUSY 0x04000000 -#define A4XX_RBBM_STATUS_VSC_BUSY 0x08000000 -#define A4XX_RBBM_STATUS_ARB_BUSY 0x10000000 -#define A4XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 -#define A4XX_RBBM_STATUS_GPU_BUSY_NOHC 0x40000000 -#define A4XX_RBBM_STATUS_GPU_BUSY 0x80000000 - -#define REG_A4XX_RBBM_INTERFACE_RRDY_STATUS5 0x0000019f - -#define REG_A4XX_RBBM_POWER_STATUS 0x000001b0 -#define A4XX_RBBM_POWER_STATUS_SP_TP_PWR_ON 0x00100000 - -#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2 0x000001b8 - -#define REG_A4XX_CP_SCRATCH_UMASK 0x00000228 - -#define REG_A4XX_CP_SCRATCH_ADDR 0x00000229 - -#define REG_A4XX_CP_RB_BASE 0x00000200 - -#define REG_A4XX_CP_RB_CNTL 0x00000201 - -#define REG_A4XX_CP_RB_WPTR 0x00000205 - -#define REG_A4XX_CP_RB_RPTR_ADDR 0x00000203 - -#define REG_A4XX_CP_RB_RPTR 0x00000204 - -#define REG_A4XX_CP_IB1_BASE 0x00000206 - -#define REG_A4XX_CP_IB1_BUFSZ 0x00000207 - -#define REG_A4XX_CP_IB2_BASE 0x00000208 - -#define REG_A4XX_CP_IB2_BUFSZ 0x00000209 - -#define REG_A4XX_CP_ME_NRT_ADDR 0x0000020c - -#define REG_A4XX_CP_ME_NRT_DATA 0x0000020d - -#define REG_A4XX_CP_ME_RB_DONE_DATA 0x00000217 - -#define REG_A4XX_CP_QUEUE_THRESH2 0x00000219 - -#define REG_A4XX_CP_MERCIU_SIZE 0x0000021b - -#define REG_A4XX_CP_ROQ_ADDR 0x0000021c - -#define REG_A4XX_CP_ROQ_DATA 0x0000021d - -#define REG_A4XX_CP_MEQ_ADDR 0x0000021e - -#define REG_A4XX_CP_MEQ_DATA 0x0000021f - -#define REG_A4XX_CP_MERCIU_ADDR 0x00000220 - -#define REG_A4XX_CP_MERCIU_DATA 0x00000221 - -#define REG_A4XX_CP_MERCIU_DATA2 0x00000222 - -#define REG_A4XX_CP_PFP_UCODE_ADDR 0x00000223 - -#define REG_A4XX_CP_PFP_UCODE_DATA 0x00000224 - -#define REG_A4XX_CP_ME_RAM_WADDR 0x00000225 - -#define REG_A4XX_CP_ME_RAM_RADDR 0x00000226 - -#define REG_A4XX_CP_ME_RAM_DATA 0x00000227 - -#define REG_A4XX_CP_PREEMPT 0x0000022a - -#define REG_A4XX_CP_CNTL 0x0000022c - -#define REG_A4XX_CP_ME_CNTL 0x0000022d - -#define REG_A4XX_CP_DEBUG 0x0000022e - -#define REG_A4XX_CP_DEBUG_ECO_CONTROL 0x00000231 - -#define REG_A4XX_CP_DRAW_STATE_ADDR 0x00000232 - -static inline uint32_t REG_A4XX_CP_PROTECT(uint32_t i0) { return 0x00000240 + 0x1*i0; } - -static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240 + 0x1*i0; } -#define A4XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0001ffff -#define A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 -static inline uint32_t A4XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) -{ - return ((val) << A4XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A4XX_CP_PROTECT_REG_BASE_ADDR__MASK; -} -#define A4XX_CP_PROTECT_REG_MASK_LEN__MASK 0x1f000000 -#define A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT 24 -static inline uint32_t A4XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) -{ - return ((val) << A4XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A4XX_CP_PROTECT_REG_MASK_LEN__MASK; -} -#define A4XX_CP_PROTECT_REG_TRAP_WRITE 0x20000000 -#define A4XX_CP_PROTECT_REG_TRAP_READ 0x40000000 - -#define REG_A4XX_CP_PROTECT_CTRL 0x00000250 - -#define REG_A4XX_CP_ST_BASE 0x000004c0 - -#define REG_A4XX_CP_STQ_AVAIL 0x000004ce - -#define REG_A4XX_CP_MERCIU_STAT 0x000004d0 - -#define REG_A4XX_CP_WFI_PEND_CTR 0x000004d2 - -#define REG_A4XX_CP_HW_FAULT 0x000004d8 - -#define REG_A4XX_CP_PROTECT_STATUS 0x000004da - -#define REG_A4XX_CP_EVENTS_IN_FLIGHT 0x000004dd - -#define REG_A4XX_CP_PERFCTR_CP_SEL_0 0x00000500 - -#define REG_A4XX_CP_PERFCTR_CP_SEL_1 0x00000501 - -#define REG_A4XX_CP_PERFCTR_CP_SEL_2 0x00000502 - -#define REG_A4XX_CP_PERFCTR_CP_SEL_3 0x00000503 - -#define REG_A4XX_CP_PERFCTR_CP_SEL_4 0x00000504 - -#define REG_A4XX_CP_PERFCTR_CP_SEL_5 0x00000505 - -#define REG_A4XX_CP_PERFCTR_CP_SEL_6 0x00000506 - -#define REG_A4XX_CP_PERFCTR_CP_SEL_7 0x00000507 - -#define REG_A4XX_CP_PERFCOMBINER_SELECT 0x0000050b - -static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; } - -static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578 + 0x1*i0; } - -#define REG_A4XX_SP_VS_STATUS 0x00000ec0 - -#define REG_A4XX_SP_MODE_CONTROL 0x00000ec3 - -#define REG_A4XX_SP_PERFCTR_SP_SEL_0 0x00000ec4 - -#define REG_A4XX_SP_PERFCTR_SP_SEL_1 0x00000ec5 - -#define REG_A4XX_SP_PERFCTR_SP_SEL_2 0x00000ec6 - -#define REG_A4XX_SP_PERFCTR_SP_SEL_3 0x00000ec7 - -#define REG_A4XX_SP_PERFCTR_SP_SEL_4 0x00000ec8 - -#define REG_A4XX_SP_PERFCTR_SP_SEL_5 0x00000ec9 - -#define REG_A4XX_SP_PERFCTR_SP_SEL_6 0x00000eca - -#define REG_A4XX_SP_PERFCTR_SP_SEL_7 0x00000ecb - -#define REG_A4XX_SP_PERFCTR_SP_SEL_8 0x00000ecc - -#define REG_A4XX_SP_PERFCTR_SP_SEL_9 0x00000ecd - -#define REG_A4XX_SP_PERFCTR_SP_SEL_10 0x00000ece - -#define REG_A4XX_SP_PERFCTR_SP_SEL_11 0x00000ecf - -#define REG_A4XX_SP_SP_CTRL_REG 0x000022c0 -#define A4XX_SP_SP_CTRL_REG_BINNING_PASS 0x00080000 - -#define REG_A4XX_SP_INSTR_CACHE_CTRL 0x000022c1 -#define A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER 0x00000080 -#define A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER 0x00000100 -#define A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER 0x00000400 - -#define REG_A4XX_SP_VS_CTRL_REG0 0x000022c4 -#define A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK 0x00000001 -#define A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT 0 -static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) -{ - return ((val) << A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK; -} -#define A4XX_SP_VS_CTRL_REG0_VARYING 0x00000002 -#define A4XX_SP_VS_CTRL_REG0_CACHEINVALID 0x00000004 -#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000 -#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18 -static inline uint32_t A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val) -{ - return ((val) << A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK; -} -#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; -} -#define A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000 -#define A4XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000 - -#define REG_A4XX_SP_VS_CTRL_REG1 0x000022c5 -#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK 0x000000ff -#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT 0 -static inline uint32_t A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val) -{ - return ((val) << A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK; -} -#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x7f000000 -#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 24 -static inline uint32_t A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val) -{ - return ((val) << A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK; -} - -#define REG_A4XX_SP_VS_PARAM_REG 0x000022c6 -#define A4XX_SP_VS_PARAM_REG_POSREGID__MASK 0x000000ff -#define A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT 0 -static inline uint32_t A4XX_SP_VS_PARAM_REG_POSREGID(uint32_t val) -{ - return ((val) << A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_POSREGID__MASK; -} -#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK 0x0000ff00 -#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT 8 -static inline uint32_t A4XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val) -{ - return ((val) << A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK; -} -#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0xfff00000 -#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT 20 -static inline uint32_t A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) -{ - return ((val) << A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK; -} - -static inline uint32_t REG_A4XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; } - -static inline uint32_t REG_A4XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; } -#define A4XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff -#define A4XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 -static inline uint32_t A4XX_SP_VS_OUT_REG_A_REGID(uint32_t val) -{ - return ((val) << A4XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_A_REGID__MASK; -} -#define A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00001e00 -#define A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 9 -static inline uint32_t A4XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) -{ - return ((val) << A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK; -} -#define A4XX_SP_VS_OUT_REG_B_REGID__MASK 0x01ff0000 -#define A4XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 -static inline uint32_t A4XX_SP_VS_OUT_REG_B_REGID(uint32_t val) -{ - return ((val) << A4XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_B_REGID__MASK; -} -#define A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x1e000000 -#define A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 25 -static inline uint32_t A4XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) -{ - return ((val) << A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK; -} - -static inline uint32_t REG_A4XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d8 + 0x1*i0; } - -static inline uint32_t REG_A4XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d8 + 0x1*i0; } -#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff -#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 -static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) -{ - return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; -} -#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 -#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 -static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) -{ - return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; -} -#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 -#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 -static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) -{ - return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; -} -#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 -#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 -static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) -{ - return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; -} - -#define REG_A4XX_SP_VS_OBJ_OFFSET_REG 0x000022e0 -#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 -#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 -static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 -#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 -static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; -} - -#define REG_A4XX_SP_VS_OBJ_START 0x000022e1 - -#define REG_A4XX_SP_VS_PVT_MEM_PARAM 0x000022e2 - -#define REG_A4XX_SP_VS_PVT_MEM_ADDR 0x000022e3 - -#define REG_A4XX_SP_VS_LENGTH_REG 0x000022e5 - -#define REG_A4XX_SP_FS_CTRL_REG0 0x000022e8 -#define A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK 0x00000001 -#define A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT 0 -static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) -{ - return ((val) << A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK; -} -#define A4XX_SP_FS_CTRL_REG0_VARYING 0x00000002 -#define A4XX_SP_FS_CTRL_REG0_CACHEINVALID 0x00000004 -#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000 -#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18 -static inline uint32_t A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val) -{ - return ((val) << A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK; -} -#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; -} -#define A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE 0x00200000 -#define A4XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00400000 - -#define REG_A4XX_SP_FS_CTRL_REG1 0x000022e9 -#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK 0x000000ff -#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT 0 -static inline uint32_t A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val) -{ - return ((val) << A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK; -} -#define A4XX_SP_FS_CTRL_REG1_FACENESS 0x00080000 -#define A4XX_SP_FS_CTRL_REG1_VARYING 0x00100000 -#define A4XX_SP_FS_CTRL_REG1_FRAGCOORD 0x00200000 - -#define REG_A4XX_SP_FS_OBJ_OFFSET_REG 0x000022ea -#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 -#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 -static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 -#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 -static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; -} - -#define REG_A4XX_SP_FS_OBJ_START 0x000022eb - -#define REG_A4XX_SP_FS_PVT_MEM_PARAM 0x000022ec - -#define REG_A4XX_SP_FS_PVT_MEM_ADDR 0x000022ed - -#define REG_A4XX_SP_FS_LENGTH_REG 0x000022ef - -#define REG_A4XX_SP_FS_OUTPUT_REG 0x000022f0 -#define A4XX_SP_FS_OUTPUT_REG_MRT__MASK 0x0000000f -#define A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT 0 -static inline uint32_t A4XX_SP_FS_OUTPUT_REG_MRT(uint32_t val) -{ - return ((val) << A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_SP_FS_OUTPUT_REG_MRT__MASK; -} -#define A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE 0x00000080 -#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK 0x0000ff00 -#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT 8 -static inline uint32_t A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val) -{ - return ((val) << A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK; -} -#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK 0xff000000 -#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT 24 -static inline uint32_t A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID(uint32_t val) -{ - return ((val) << A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK; -} - -static inline uint32_t REG_A4XX_SP_FS_MRT(uint32_t i0) { return 0x000022f1 + 0x1*i0; } - -static inline uint32_t REG_A4XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f1 + 0x1*i0; } -#define A4XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff -#define A4XX_SP_FS_MRT_REG_REGID__SHIFT 0 -static inline uint32_t A4XX_SP_FS_MRT_REG_REGID(uint32_t val) -{ - return ((val) << A4XX_SP_FS_MRT_REG_REGID__SHIFT) & A4XX_SP_FS_MRT_REG_REGID__MASK; -} -#define A4XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100 -#define A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK 0x0003f000 -#define A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT 12 -static inline uint32_t A4XX_SP_FS_MRT_REG_MRTFORMAT(enum a4xx_color_fmt val) -{ - return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK; -} -#define A4XX_SP_FS_MRT_REG_COLOR_SRGB 0x00040000 - -#define REG_A4XX_SP_CS_CTRL_REG0 0x00002300 - -#define REG_A4XX_SP_CS_OBJ_OFFSET_REG 0x00002301 - -#define REG_A4XX_SP_CS_OBJ_START 0x00002302 - -#define REG_A4XX_SP_CS_PVT_MEM_PARAM 0x00002303 - -#define REG_A4XX_SP_CS_PVT_MEM_ADDR 0x00002304 - -#define REG_A4XX_SP_CS_PVT_MEM_SIZE 0x00002305 - -#define REG_A4XX_SP_CS_LENGTH_REG 0x00002306 - -#define REG_A4XX_SP_HS_OBJ_OFFSET_REG 0x0000230d -#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 -#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 -static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 -#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 -static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; -} - -#define REG_A4XX_SP_HS_OBJ_START 0x0000230e - -#define REG_A4XX_SP_HS_PVT_MEM_PARAM 0x0000230f - -#define REG_A4XX_SP_HS_PVT_MEM_ADDR 0x00002310 - -#define REG_A4XX_SP_HS_LENGTH_REG 0x00002312 - -#define REG_A4XX_SP_DS_PARAM_REG 0x0000231a -#define A4XX_SP_DS_PARAM_REG_POSREGID__MASK 0x000000ff -#define A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT 0 -static inline uint32_t A4XX_SP_DS_PARAM_REG_POSREGID(uint32_t val) -{ - return ((val) << A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_DS_PARAM_REG_POSREGID__MASK; -} -#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK 0xfff00000 -#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT 20 -static inline uint32_t A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR(uint32_t val) -{ - return ((val) << A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK; -} - -static inline uint32_t REG_A4XX_SP_DS_OUT(uint32_t i0) { return 0x0000231b + 0x1*i0; } - -static inline uint32_t REG_A4XX_SP_DS_OUT_REG(uint32_t i0) { return 0x0000231b + 0x1*i0; } -#define A4XX_SP_DS_OUT_REG_A_REGID__MASK 0x000001ff -#define A4XX_SP_DS_OUT_REG_A_REGID__SHIFT 0 -static inline uint32_t A4XX_SP_DS_OUT_REG_A_REGID(uint32_t val) -{ - return ((val) << A4XX_SP_DS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_A_REGID__MASK; -} -#define A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK 0x00001e00 -#define A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT 9 -static inline uint32_t A4XX_SP_DS_OUT_REG_A_COMPMASK(uint32_t val) -{ - return ((val) << A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK; -} -#define A4XX_SP_DS_OUT_REG_B_REGID__MASK 0x01ff0000 -#define A4XX_SP_DS_OUT_REG_B_REGID__SHIFT 16 -static inline uint32_t A4XX_SP_DS_OUT_REG_B_REGID(uint32_t val) -{ - return ((val) << A4XX_SP_DS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_B_REGID__MASK; -} -#define A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK 0x1e000000 -#define A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT 25 -static inline uint32_t A4XX_SP_DS_OUT_REG_B_COMPMASK(uint32_t val) -{ - return ((val) << A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK; -} - -static inline uint32_t REG_A4XX_SP_DS_VPC_DST(uint32_t i0) { return 0x0000232c + 0x1*i0; } - -static inline uint32_t REG_A4XX_SP_DS_VPC_DST_REG(uint32_t i0) { return 0x0000232c + 0x1*i0; } -#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff -#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT 0 -static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC0(uint32_t val) -{ - return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK; -} -#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 -#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT 8 -static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC1(uint32_t val) -{ - return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK; -} -#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 -#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT 16 -static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC2(uint32_t val) -{ - return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK; -} -#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 -#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT 24 -static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC3(uint32_t val) -{ - return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK; -} - -#define REG_A4XX_SP_DS_OBJ_OFFSET_REG 0x00002334 -#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 -#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 -static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 -#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 -static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; -} - -#define REG_A4XX_SP_DS_OBJ_START 0x00002335 - -#define REG_A4XX_SP_DS_PVT_MEM_PARAM 0x00002336 - -#define REG_A4XX_SP_DS_PVT_MEM_ADDR 0x00002337 - -#define REG_A4XX_SP_DS_LENGTH_REG 0x00002339 - -#define REG_A4XX_SP_GS_PARAM_REG 0x00002341 -#define A4XX_SP_GS_PARAM_REG_POSREGID__MASK 0x000000ff -#define A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT 0 -static inline uint32_t A4XX_SP_GS_PARAM_REG_POSREGID(uint32_t val) -{ - return ((val) << A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_POSREGID__MASK; -} -#define A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK 0x0000ff00 -#define A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT 8 -static inline uint32_t A4XX_SP_GS_PARAM_REG_PRIMREGID(uint32_t val) -{ - return ((val) << A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK; -} -#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK 0xfff00000 -#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT 20 -static inline uint32_t A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR(uint32_t val) -{ - return ((val) << A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK; -} - -static inline uint32_t REG_A4XX_SP_GS_OUT(uint32_t i0) { return 0x00002342 + 0x1*i0; } - -static inline uint32_t REG_A4XX_SP_GS_OUT_REG(uint32_t i0) { return 0x00002342 + 0x1*i0; } -#define A4XX_SP_GS_OUT_REG_A_REGID__MASK 0x000001ff -#define A4XX_SP_GS_OUT_REG_A_REGID__SHIFT 0 -static inline uint32_t A4XX_SP_GS_OUT_REG_A_REGID(uint32_t val) -{ - return ((val) << A4XX_SP_GS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_A_REGID__MASK; -} -#define A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK 0x00001e00 -#define A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT 9 -static inline uint32_t A4XX_SP_GS_OUT_REG_A_COMPMASK(uint32_t val) -{ - return ((val) << A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK; -} -#define A4XX_SP_GS_OUT_REG_B_REGID__MASK 0x01ff0000 -#define A4XX_SP_GS_OUT_REG_B_REGID__SHIFT 16 -static inline uint32_t A4XX_SP_GS_OUT_REG_B_REGID(uint32_t val) -{ - return ((val) << A4XX_SP_GS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_B_REGID__MASK; -} -#define A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK 0x1e000000 -#define A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT 25 -static inline uint32_t A4XX_SP_GS_OUT_REG_B_COMPMASK(uint32_t val) -{ - return ((val) << A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK; -} - -static inline uint32_t REG_A4XX_SP_GS_VPC_DST(uint32_t i0) { return 0x00002353 + 0x1*i0; } - -static inline uint32_t REG_A4XX_SP_GS_VPC_DST_REG(uint32_t i0) { return 0x00002353 + 0x1*i0; } -#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff -#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT 0 -static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC0(uint32_t val) -{ - return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK; -} -#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 -#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT 8 -static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC1(uint32_t val) -{ - return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK; -} -#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 -#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT 16 -static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC2(uint32_t val) -{ - return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK; -} -#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 -#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT 24 -static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC3(uint32_t val) -{ - return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK; -} - -#define REG_A4XX_SP_GS_OBJ_OFFSET_REG 0x0000235b -#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 -#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 -static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 -#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 -static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; -} - -#define REG_A4XX_SP_GS_OBJ_START 0x0000235c - -#define REG_A4XX_SP_GS_PVT_MEM_PARAM 0x0000235d - -#define REG_A4XX_SP_GS_PVT_MEM_ADDR 0x0000235e - -#define REG_A4XX_SP_GS_LENGTH_REG 0x00002360 - -#define REG_A4XX_VPC_DEBUG_RAM_SEL 0x00000e60 - -#define REG_A4XX_VPC_DEBUG_RAM_READ 0x00000e61 - -#define REG_A4XX_VPC_DEBUG_ECO_CONTROL 0x00000e64 - -#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0 0x00000e65 - -#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1 0x00000e66 - -#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2 0x00000e67 - -#define REG_A4XX_VPC_PERFCTR_VPC_SEL_3 0x00000e68 - -#define REG_A4XX_VPC_ATTR 0x00002140 -#define A4XX_VPC_ATTR_TOTALATTR__MASK 0x000001ff -#define A4XX_VPC_ATTR_TOTALATTR__SHIFT 0 -static inline uint32_t A4XX_VPC_ATTR_TOTALATTR(uint32_t val) -{ - return ((val) << A4XX_VPC_ATTR_TOTALATTR__SHIFT) & A4XX_VPC_ATTR_TOTALATTR__MASK; -} -#define A4XX_VPC_ATTR_PSIZE 0x00000200 -#define A4XX_VPC_ATTR_THRDASSIGN__MASK 0x00003000 -#define A4XX_VPC_ATTR_THRDASSIGN__SHIFT 12 -static inline uint32_t A4XX_VPC_ATTR_THRDASSIGN(uint32_t val) -{ - return ((val) << A4XX_VPC_ATTR_THRDASSIGN__SHIFT) & A4XX_VPC_ATTR_THRDASSIGN__MASK; -} -#define A4XX_VPC_ATTR_ENABLE 0x02000000 - -#define REG_A4XX_VPC_PACK 0x00002141 -#define A4XX_VPC_PACK_NUMBYPASSVAR__MASK 0x000000ff -#define A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT 0 -static inline uint32_t A4XX_VPC_PACK_NUMBYPASSVAR(uint32_t val) -{ - return ((val) << A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT) & A4XX_VPC_PACK_NUMBYPASSVAR__MASK; -} -#define A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK 0x0000ff00 -#define A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT 8 -static inline uint32_t A4XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val) -{ - return ((val) << A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK; -} -#define A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK 0x00ff0000 -#define A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT 16 -static inline uint32_t A4XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val) -{ - return ((val) << A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK; -} - -static inline uint32_t REG_A4XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002142 + 0x1*i0; } - -static inline uint32_t REG_A4XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002142 + 0x1*i0; } - -static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000214a + 0x1*i0; } - -static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000214a + 0x1*i0; } - -#define REG_A4XX_VPC_SO_FLUSH_WADDR_3 0x0000216e - -#define REG_A4XX_VSC_BIN_SIZE 0x00000c00 -#define A4XX_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f -#define A4XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 -static inline uint32_t A4XX_VSC_BIN_SIZE_WIDTH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A4XX_VSC_BIN_SIZE_WIDTH__MASK; -} -#define A4XX_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 -#define A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT 5 -static inline uint32_t A4XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A4XX_VSC_BIN_SIZE_HEIGHT__MASK; -} - -#define REG_A4XX_VSC_SIZE_ADDRESS 0x00000c01 - -#define REG_A4XX_VSC_SIZE_ADDRESS2 0x00000c02 - -#define REG_A4XX_VSC_DEBUG_ECO_CONTROL 0x00000c03 - -static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c08 + 0x1*i0; } - -static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c08 + 0x1*i0; } -#define A4XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff -#define A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 -static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) -{ - return ((val) << A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_X__MASK; -} -#define A4XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 -#define A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 -static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) -{ - return ((val) << A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_Y__MASK; -} -#define A4XX_VSC_PIPE_CONFIG_REG_W__MASK 0x00f00000 -#define A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 -static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) -{ - return ((val) << A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_W__MASK; -} -#define A4XX_VSC_PIPE_CONFIG_REG_H__MASK 0x0f000000 -#define A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT 24 -static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) -{ - return ((val) << A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_H__MASK; -} - -static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c10 + 0x1*i0; } - -static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; } - -static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c18 + 0x1*i0; } - -static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c18 + 0x1*i0; } - -#define REG_A4XX_VSC_PIPE_PARTIAL_POSN_1 0x00000c41 - -#define REG_A4XX_VSC_PERFCTR_VSC_SEL_0 0x00000c50 - -#define REG_A4XX_VSC_PERFCTR_VSC_SEL_1 0x00000c51 - -#define REG_A4XX_VFD_DEBUG_CONTROL 0x00000e40 - -#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0 0x00000e43 - -#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1 0x00000e44 - -#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2 0x00000e45 - -#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3 0x00000e46 - -#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4 0x00000e47 - -#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5 0x00000e48 - -#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6 0x00000e49 - -#define REG_A4XX_VFD_PERFCTR_VFD_SEL_7 0x00000e4a - -#define REG_A4XX_VGT_CL_INITIATOR 0x000021d0 - -#define REG_A4XX_VGT_EVENT_INITIATOR 0x000021d9 - -#define REG_A4XX_VFD_CONTROL_0 0x00002200 -#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK 0x000000ff -#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT 0 -static inline uint32_t A4XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK; -} -#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK 0x0001fe00 -#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT 9 -static inline uint32_t A4XX_VFD_CONTROL_0_BYPASSATTROVS(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT) & A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK; -} -#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK 0x03f00000 -#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT 20 -static inline uint32_t A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK; -} -#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK 0xfc000000 -#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT 26 -static inline uint32_t A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK; -} - -#define REG_A4XX_VFD_CONTROL_1 0x00002201 -#define A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff -#define A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0 -static inline uint32_t A4XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK; -} -#define A4XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 -#define A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 -static inline uint32_t A4XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A4XX_VFD_CONTROL_1_REGID4VTX__MASK; -} -#define A4XX_VFD_CONTROL_1_REGID4INST__MASK 0xff000000 -#define A4XX_VFD_CONTROL_1_REGID4INST__SHIFT 24 -static inline uint32_t A4XX_VFD_CONTROL_1_REGID4INST(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A4XX_VFD_CONTROL_1_REGID4INST__MASK; -} - -#define REG_A4XX_VFD_CONTROL_2 0x00002202 - -#define REG_A4XX_VFD_CONTROL_3 0x00002203 -#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK 0x0000ff00 -#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT 8 -static inline uint32_t A4XX_VFD_CONTROL_3_REGID_VTXCNT(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT) & A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK; -} -#define A4XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000 -#define A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16 -static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSX__MASK; -} -#define A4XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000 -#define A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24 -static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val) -{ - return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSY__MASK; -} - -#define REG_A4XX_VFD_CONTROL_4 0x00002204 - -#define REG_A4XX_VFD_INDEX_OFFSET 0x00002208 - -static inline uint32_t REG_A4XX_VFD_FETCH(uint32_t i0) { return 0x0000220a + 0x4*i0; } - -static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x0000220a + 0x4*i0; } -#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f -#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0 -static inline uint32_t A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val) -{ - return ((val) << A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK; -} -#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK 0x0001ff80 -#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT 7 -static inline uint32_t A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val) -{ - return ((val) << A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK; -} -#define A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT 0x00080000 -#define A4XX_VFD_FETCH_INSTR_0_INSTANCED 0x00100000 - -static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x0000220b + 0x4*i0; } - -static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_2(uint32_t i0) { return 0x0000220c + 0x4*i0; } -#define A4XX_VFD_FETCH_INSTR_2_SIZE__MASK 0xffffffff -#define A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT 0 -static inline uint32_t A4XX_VFD_FETCH_INSTR_2_SIZE(uint32_t val) -{ - return ((val) << A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_2_SIZE__MASK; -} - -static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_3(uint32_t i0) { return 0x0000220d + 0x4*i0; } -#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK 0x000001ff -#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT 0 -static inline uint32_t A4XX_VFD_FETCH_INSTR_3_STEPRATE(uint32_t val) -{ - return ((val) << A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT) & A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK; -} - -static inline uint32_t REG_A4XX_VFD_DECODE(uint32_t i0) { return 0x0000228a + 0x1*i0; } - -static inline uint32_t REG_A4XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000228a + 0x1*i0; } -#define A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f -#define A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0 -static inline uint32_t A4XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val) -{ - return ((val) << A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK; -} -#define A4XX_VFD_DECODE_INSTR_CONSTFILL 0x00000010 -#define A4XX_VFD_DECODE_INSTR_FORMAT__MASK 0x00000fc0 -#define A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT 6 -static inline uint32_t A4XX_VFD_DECODE_INSTR_FORMAT(enum a4xx_vtx_fmt val) -{ - return ((val) << A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A4XX_VFD_DECODE_INSTR_FORMAT__MASK; -} -#define A4XX_VFD_DECODE_INSTR_REGID__MASK 0x000ff000 -#define A4XX_VFD_DECODE_INSTR_REGID__SHIFT 12 -static inline uint32_t A4XX_VFD_DECODE_INSTR_REGID(uint32_t val) -{ - return ((val) << A4XX_VFD_DECODE_INSTR_REGID__SHIFT) & A4XX_VFD_DECODE_INSTR_REGID__MASK; -} -#define A4XX_VFD_DECODE_INSTR_INT 0x00100000 -#define A4XX_VFD_DECODE_INSTR_SWAP__MASK 0x00c00000 -#define A4XX_VFD_DECODE_INSTR_SWAP__SHIFT 22 -static inline uint32_t A4XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A4XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A4XX_VFD_DECODE_INSTR_SWAP__MASK; -} -#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK 0x1f000000 -#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT 24 -static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val) -{ - return ((val) << A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK; -} -#define A4XX_VFD_DECODE_INSTR_LASTCOMPVALID 0x20000000 -#define A4XX_VFD_DECODE_INSTR_SWITCHNEXT 0x40000000 - -#define REG_A4XX_TPL1_DEBUG_ECO_CONTROL 0x00000f00 - -#define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03 - -#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0 0x00000f04 - -#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1 0x00000f05 - -#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2 0x00000f06 - -#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3 0x00000f07 - -#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4 0x00000f08 - -#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5 0x00000f09 - -#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6 0x00000f0a - -#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b - -#define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380 - -#define REG_A4XX_TPL1_TP_TEX_COUNT 0x00002381 -#define A4XX_TPL1_TP_TEX_COUNT_VS__MASK 0x000000ff -#define A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT 0 -static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_VS(uint32_t val) -{ - return ((val) << A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_VS__MASK; -} -#define A4XX_TPL1_TP_TEX_COUNT_HS__MASK 0x0000ff00 -#define A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT 8 -static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_HS(uint32_t val) -{ - return ((val) << A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_HS__MASK; -} -#define A4XX_TPL1_TP_TEX_COUNT_DS__MASK 0x00ff0000 -#define A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT 16 -static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_DS(uint32_t val) -{ - return ((val) << A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_DS__MASK; -} -#define A4XX_TPL1_TP_TEX_COUNT_GS__MASK 0xff000000 -#define A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT 24 -static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) -{ - return ((val) << A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_GS__MASK; -} - -#define REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR 0x00002384 - -#define REG_A4XX_TPL1_TP_HS_BORDER_COLOR_BASE_ADDR 0x00002387 - -#define REG_A4XX_TPL1_TP_DS_BORDER_COLOR_BASE_ADDR 0x0000238a - -#define REG_A4XX_TPL1_TP_GS_BORDER_COLOR_BASE_ADDR 0x0000238d - -#define REG_A4XX_TPL1_TP_FS_TEX_COUNT 0x000023a0 - -#define REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x000023a1 - -#define REG_A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR 0x000023a4 - -#define REG_A4XX_TPL1_TP_CS_SAMPLER_BASE_ADDR 0x000023a5 - -#define REG_A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR 0x000023a6 - -#define REG_A4XX_GRAS_TSE_STATUS 0x00000c80 - -#define REG_A4XX_GRAS_DEBUG_ECO_CONTROL 0x00000c81 - -#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c88 - -#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c89 - -#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c8a - -#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b - -#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c8c - -#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c8d - -#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c8e - -#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c8f - -#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000 -#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000 -#define A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE 0x00010000 -#define A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 0x00020000 -#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 - -#define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003 -#define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR 0x00000001 - -#define REG_A4XX_GRAS_CL_GB_CLIP_ADJ 0x00002004 -#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK 0x000003ff -#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT 0 -static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val) -{ - return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK; -} -#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK 0x000ffc00 -#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT 10 -static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val) -{ - return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK; -} - -#define REG_A4XX_GRAS_CL_VPORT_XOFFSET_0 0x00002008 -#define A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK 0xffffffff -#define A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT 0 -static inline uint32_t A4XX_GRAS_CL_VPORT_XOFFSET_0(float val) -{ - return ((fui(val)) << A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK; -} - -#define REG_A4XX_GRAS_CL_VPORT_XSCALE_0 0x00002009 -#define A4XX_GRAS_CL_VPORT_XSCALE_0__MASK 0xffffffff -#define A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT 0 -static inline uint32_t A4XX_GRAS_CL_VPORT_XSCALE_0(float val) -{ - return ((fui(val)) << A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_XSCALE_0__MASK; -} - -#define REG_A4XX_GRAS_CL_VPORT_YOFFSET_0 0x0000200a -#define A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK 0xffffffff -#define A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT 0 -static inline uint32_t A4XX_GRAS_CL_VPORT_YOFFSET_0(float val) -{ - return ((fui(val)) << A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK; -} - -#define REG_A4XX_GRAS_CL_VPORT_YSCALE_0 0x0000200b -#define A4XX_GRAS_CL_VPORT_YSCALE_0__MASK 0xffffffff -#define A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT 0 -static inline uint32_t A4XX_GRAS_CL_VPORT_YSCALE_0(float val) -{ - return ((fui(val)) << A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_YSCALE_0__MASK; -} - -#define REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0 0x0000200c -#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK 0xffffffff -#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT 0 -static inline uint32_t A4XX_GRAS_CL_VPORT_ZOFFSET_0(float val) -{ - return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK; -} - -#define REG_A4XX_GRAS_CL_VPORT_ZSCALE_0 0x0000200d -#define A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK 0xffffffff -#define A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT 0 -static inline uint32_t A4XX_GRAS_CL_VPORT_ZSCALE_0(float val) -{ - return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK; -} - -#define REG_A4XX_GRAS_SU_POINT_MINMAX 0x00002070 -#define A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff -#define A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 -static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MIN(float val) -{ - return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK; -} -#define A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 -#define A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 -static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MAX(float val) -{ - return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK; -} - -#define REG_A4XX_GRAS_SU_POINT_SIZE 0x00002071 -#define A4XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff -#define A4XX_GRAS_SU_POINT_SIZE__SHIFT 0 -static inline uint32_t A4XX_GRAS_SU_POINT_SIZE(float val) -{ - return ((((int32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_SIZE__SHIFT) & A4XX_GRAS_SU_POINT_SIZE__MASK; -} - -#define REG_A4XX_GRAS_ALPHA_CONTROL 0x00002073 -#define A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE 0x00000004 -#define A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS 0x00000008 - -#define REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE 0x00002074 -#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff -#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT 0 -static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_SCALE(float val) -{ - return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK; -} - -#define REG_A4XX_GRAS_SU_POLY_OFFSET_OFFSET 0x00002075 -#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff -#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 -static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) -{ - return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; -} - -#define REG_A4XX_GRAS_SU_POLY_OFFSET_CLAMP 0x00002076 -#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK 0xffffffff -#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT 0 -static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_CLAMP(float val) -{ - return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK; -} - -#define REG_A4XX_GRAS_DEPTH_CONTROL 0x00002077 -#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK 0x00000003 -#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT 0 -static inline uint32_t A4XX_GRAS_DEPTH_CONTROL_FORMAT(enum a4xx_depth_format val) -{ - return ((val) << A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT) & A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK; -} - -#define REG_A4XX_GRAS_SU_MODE_CONTROL 0x00002078 -#define A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT 0x00000001 -#define A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK 0x00000002 -#define A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW 0x00000004 -#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK 0x000007f8 -#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT 3 -static inline uint32_t A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val) -{ - return ((((int32_t)(val * 4.0))) << A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK; -} -#define A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET 0x00000800 -#define A4XX_GRAS_SU_MODE_CONTROL_MSAA_ENABLE 0x00002000 -#define A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS 0x00100000 - -#define REG_A4XX_GRAS_SC_CONTROL 0x0000207b -#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK 0x0000000c -#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT 2 -static inline uint32_t A4XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val) -{ - return ((val) << A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK; -} -#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK 0x00000380 -#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT 7 -static inline uint32_t A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK; -} -#define A4XX_GRAS_SC_CONTROL_MSAA_DISABLE 0x00000800 -#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK 0x0000f000 -#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT 12 -static inline uint32_t A4XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK; -} - -#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL 0x0000207c -#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK 0x00007fff -#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK; -} -#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK; -} - -#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_BR 0x0000207d -#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK 0x00007fff -#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK; -} -#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK; -} - -#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000209c -#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff -#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; -} -#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; -} - -#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_TL 0x0000209d -#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff -#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; -} -#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; -} - -#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_BR 0x0000209e -#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK 0x00007fff -#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT 0 -static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_X(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK; -} -#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK 0x7fff0000 -#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT 16 -static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK; -} - -#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_TL 0x0000209f -#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK 0x00007fff -#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT 0 -static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_X(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK; -} -#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK 0x7fff0000 -#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT 16 -static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val) -{ - return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK; -} - -#define REG_A4XX_UCHE_CACHE_MODE_CONTROL 0x00000e80 - -#define REG_A4XX_UCHE_TRAP_BASE_LO 0x00000e83 - -#define REG_A4XX_UCHE_TRAP_BASE_HI 0x00000e84 - -#define REG_A4XX_UCHE_CACHE_STATUS 0x00000e88 - -#define REG_A4XX_UCHE_INVALIDATE0 0x00000e8a - -#define REG_A4XX_UCHE_INVALIDATE1 0x00000e8b - -#define REG_A4XX_UCHE_CACHE_WAYS_VFD 0x00000e8c - -#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e8e - -#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e8f - -#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e90 - -#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e91 - -#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e92 - -#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e93 - -#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e94 - -#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e95 - -#define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD 0x00000e00 - -#define REG_A4XX_HLSQ_DEBUG_ECO_CONTROL 0x00000e04 - -#define REG_A4XX_HLSQ_MODE_CONTROL 0x00000e05 - -#define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e - -#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e06 - -#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e07 - -#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e08 - -#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e09 - -#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e0a - -#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e0b - -#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e0c - -#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e0d - -#define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0 -#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010 -#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4 -static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; -} -#define A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE 0x00000040 -#define A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200 -#define A4XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400 -#define A4XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000 -#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK 0x08000000 -#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT 27 -static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK; -} -#define A4XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE 0x10000000 -#define A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE 0x20000000 -#define A4XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE 0x40000000 -#define A4XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT 0x80000000 - -#define REG_A4XX_HLSQ_CONTROL_1_REG 0x000023c1 -#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x00000040 -#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT 6 -static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK; -} -#define A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE 0x00000100 -#define A4XX_HLSQ_CONTROL_1_REG_RESERVED1 0x00000200 -#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK 0x00ff0000 -#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT 16 -static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_COORDREGID(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK; -} -#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK 0xff000000 -#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT 24 -static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK; -} - -#define REG_A4XX_HLSQ_CONTROL_2_REG 0x000023c2 -#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000 -#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT 26 -static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK; -} -#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK 0x000003fc -#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT 2 -static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; -} -#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK 0x0003fc00 -#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT 10 -static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK; -} -#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK 0x03fc0000 -#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT 18 -static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK; -} - -#define REG_A4XX_HLSQ_CONTROL_3_REG 0x000023c3 -#define A4XX_HLSQ_CONTROL_3_REG_REGID__MASK 0x000000ff -#define A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT 0 -static inline uint32_t A4XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A4XX_HLSQ_CONTROL_3_REG_REGID__MASK; -} - -#define REG_A4XX_HLSQ_CONTROL_4_REG 0x000023c4 - -#define REG_A4XX_HLSQ_VS_CONTROL_REG 0x000023c5 -#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff -#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0 -static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK; -} -#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 -#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 -static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_HLSQ_VS_CONTROL_REG_SSBO_ENABLE 0x00008000 -#define A4XX_HLSQ_VS_CONTROL_REG_ENABLED 0x00010000 -#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 -#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 -static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK; -} -#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 -#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT 24 -static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK; -} - -#define REG_A4XX_HLSQ_FS_CONTROL_REG 0x000023c6 -#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff -#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT 0 -static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK; -} -#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 -#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 -static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_HLSQ_FS_CONTROL_REG_SSBO_ENABLE 0x00008000 -#define A4XX_HLSQ_FS_CONTROL_REG_ENABLED 0x00010000 -#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 -#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 -static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK; -} -#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 -#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT 24 -static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK; -} - -#define REG_A4XX_HLSQ_HS_CONTROL_REG 0x000023c7 -#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff -#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT 0 -static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK; -} -#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 -#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 -static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_HLSQ_HS_CONTROL_REG_SSBO_ENABLE 0x00008000 -#define A4XX_HLSQ_HS_CONTROL_REG_ENABLED 0x00010000 -#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 -#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 -static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK; -} -#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 -#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT 24 -static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK; -} - -#define REG_A4XX_HLSQ_DS_CONTROL_REG 0x000023c8 -#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff -#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT 0 -static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK; -} -#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 -#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 -static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_HLSQ_DS_CONTROL_REG_SSBO_ENABLE 0x00008000 -#define A4XX_HLSQ_DS_CONTROL_REG_ENABLED 0x00010000 -#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 -#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 -static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK; -} -#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 -#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT 24 -static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK; -} - -#define REG_A4XX_HLSQ_GS_CONTROL_REG 0x000023c9 -#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff -#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT 0 -static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK; -} -#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 -#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 -static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_HLSQ_GS_CONTROL_REG_SSBO_ENABLE 0x00008000 -#define A4XX_HLSQ_GS_CONTROL_REG_ENABLED 0x00010000 -#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 -#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 -static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK; -} -#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 -#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT 24 -static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK; -} - -#define REG_A4XX_HLSQ_CS_CONTROL_REG 0x000023ca -#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff -#define A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT 0 -static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH__MASK; -} -#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK 0x00007f00 -#define A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT 8 -static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; -} -#define A4XX_HLSQ_CS_CONTROL_REG_SSBO_ENABLE 0x00008000 -#define A4XX_HLSQ_CS_CONTROL_REG_ENABLED 0x00010000 -#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 -#define A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 -static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET__MASK; -} -#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 -#define A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT 24 -static inline uint32_t A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH__MASK; -} - -#define REG_A4XX_HLSQ_CL_NDRANGE_0 0x000023cd -#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK 0x00000003 -#define A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT 0 -static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM__MASK; -} -#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc -#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT 2 -static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX__MASK; -} -#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 -#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT 12 -static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY__MASK; -} -#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 -#define A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT 22 -static inline uint32_t A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__SHIFT) & A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ__MASK; -} - -#define REG_A4XX_HLSQ_CL_NDRANGE_1 0x000023ce -#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK 0xffffffff -#define A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT 0 -static inline uint32_t A4XX_HLSQ_CL_NDRANGE_1_SIZE_X(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__SHIFT) & A4XX_HLSQ_CL_NDRANGE_1_SIZE_X__MASK; -} - -#define REG_A4XX_HLSQ_CL_NDRANGE_2 0x000023cf - -#define REG_A4XX_HLSQ_CL_NDRANGE_3 0x000023d0 -#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK 0xffffffff -#define A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT 0 -static inline uint32_t A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__SHIFT) & A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y__MASK; -} - -#define REG_A4XX_HLSQ_CL_NDRANGE_4 0x000023d1 - -#define REG_A4XX_HLSQ_CL_NDRANGE_5 0x000023d2 -#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK 0xffffffff -#define A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT 0 -static inline uint32_t A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__SHIFT) & A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z__MASK; -} - -#define REG_A4XX_HLSQ_CL_NDRANGE_6 0x000023d3 - -#define REG_A4XX_HLSQ_CL_CONTROL_0 0x000023d4 -#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK 0x000000ff -#define A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT 0 -static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID__MASK; -} -#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK 0xff000000 -#define A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT 24 -static inline uint32_t A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(uint32_t val) -{ - return ((val) << A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__SHIFT) & A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID__MASK; -} - -#define REG_A4XX_HLSQ_CL_CONTROL_1 0x000023d5 - -#define REG_A4XX_HLSQ_CL_KERNEL_CONST 0x000023d6 - -#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_X 0x000023d7 - -#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Y 0x000023d8 - -#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Z 0x000023d9 - -#define REG_A4XX_HLSQ_CL_WG_OFFSET 0x000023da - -#define REG_A4XX_HLSQ_UPDATE_CONTROL 0x000023db - -#define REG_A4XX_PC_BINNING_COMMAND 0x00000d00 -#define A4XX_PC_BINNING_COMMAND_BINNING_ENABLE 0x00000001 - -#define REG_A4XX_PC_TESSFACTOR_ADDR 0x00000d08 - -#define REG_A4XX_PC_DRAWCALL_SETUP_OVERRIDE 0x00000d0c - -#define REG_A4XX_PC_PERFCTR_PC_SEL_0 0x00000d10 - -#define REG_A4XX_PC_PERFCTR_PC_SEL_1 0x00000d11 - -#define REG_A4XX_PC_PERFCTR_PC_SEL_2 0x00000d12 - -#define REG_A4XX_PC_PERFCTR_PC_SEL_3 0x00000d13 - -#define REG_A4XX_PC_PERFCTR_PC_SEL_4 0x00000d14 - -#define REG_A4XX_PC_PERFCTR_PC_SEL_5 0x00000d15 - -#define REG_A4XX_PC_PERFCTR_PC_SEL_6 0x00000d16 - -#define REG_A4XX_PC_PERFCTR_PC_SEL_7 0x00000d17 - -#define REG_A4XX_PC_BIN_BASE 0x000021c0 - -#define REG_A4XX_PC_VSTREAM_CONTROL 0x000021c2 -#define A4XX_PC_VSTREAM_CONTROL_SIZE__MASK 0x003f0000 -#define A4XX_PC_VSTREAM_CONTROL_SIZE__SHIFT 16 -static inline uint32_t A4XX_PC_VSTREAM_CONTROL_SIZE(uint32_t val) -{ - return ((val) << A4XX_PC_VSTREAM_CONTROL_SIZE__SHIFT) & A4XX_PC_VSTREAM_CONTROL_SIZE__MASK; -} -#define A4XX_PC_VSTREAM_CONTROL_N__MASK 0x07c00000 -#define A4XX_PC_VSTREAM_CONTROL_N__SHIFT 22 -static inline uint32_t A4XX_PC_VSTREAM_CONTROL_N(uint32_t val) -{ - return ((val) << A4XX_PC_VSTREAM_CONTROL_N__SHIFT) & A4XX_PC_VSTREAM_CONTROL_N__MASK; -} - -#define REG_A4XX_PC_PRIM_VTX_CNTL 0x000021c4 -#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK 0x0000000f -#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT 0 -static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val) -{ - return ((val) << A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT) & A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK; -} -#define A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART 0x00100000 -#define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 -#define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 - -#define REG_A4XX_PC_PRIM_VTX_CNTL2 0x000021c5 -#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK 0x00000007 -#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT 0 -static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK; -} -#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK 0x00000038 -#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT 3 -static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK; -} -#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE 0x00000040 - -#define REG_A4XX_PC_RESTART_INDEX 0x000021c6 - -#define REG_A4XX_PC_GS_PARAM 0x000021e5 -#define A4XX_PC_GS_PARAM_MAX_VERTICES__MASK 0x000003ff -#define A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT 0 -static inline uint32_t A4XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val) -{ - return ((val) << A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A4XX_PC_GS_PARAM_MAX_VERTICES__MASK; -} -#define A4XX_PC_GS_PARAM_INVOCATIONS__MASK 0x0000f800 -#define A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT 11 -static inline uint32_t A4XX_PC_GS_PARAM_INVOCATIONS(uint32_t val) -{ - return ((val) << A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A4XX_PC_GS_PARAM_INVOCATIONS__MASK; -} -#define A4XX_PC_GS_PARAM_PRIMTYPE__MASK 0x01800000 -#define A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT 23 -static inline uint32_t A4XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_GS_PARAM_PRIMTYPE__MASK; -} -#define A4XX_PC_GS_PARAM_LAYER 0x80000000 - -#define REG_A4XX_PC_HS_PARAM 0x000021e7 -#define A4XX_PC_HS_PARAM_VERTICES_OUT__MASK 0x0000003f -#define A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT 0 -static inline uint32_t A4XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val) -{ - return ((val) << A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A4XX_PC_HS_PARAM_VERTICES_OUT__MASK; -} -#define A4XX_PC_HS_PARAM_SPACING__MASK 0x00600000 -#define A4XX_PC_HS_PARAM_SPACING__SHIFT 21 -static inline uint32_t A4XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val) -{ - return ((val) << A4XX_PC_HS_PARAM_SPACING__SHIFT) & A4XX_PC_HS_PARAM_SPACING__MASK; -} -#define A4XX_PC_HS_PARAM_CW 0x00800000 -#define A4XX_PC_HS_PARAM_CONNECTED 0x01000000 - -#define REG_A4XX_VBIF_VERSION 0x00003000 - -#define REG_A4XX_VBIF_CLKON 0x00003001 -#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS 0x00000001 - -#define REG_A4XX_VBIF_ABIT_SORT 0x0000301c - -#define REG_A4XX_VBIF_ABIT_SORT_CONF 0x0000301d - -#define REG_A4XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a - -#define REG_A4XX_VBIF_IN_RD_LIM_CONF0 0x0000302c - -#define REG_A4XX_VBIF_IN_RD_LIM_CONF1 0x0000302d - -#define REG_A4XX_VBIF_IN_WR_LIM_CONF0 0x00003030 - -#define REG_A4XX_VBIF_IN_WR_LIM_CONF1 0x00003031 - -#define REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 - -#define REG_A4XX_VBIF_PERF_CNT_EN0 0x000030c0 - -#define REG_A4XX_VBIF_PERF_CNT_EN1 0x000030c1 - -#define REG_A4XX_VBIF_PERF_CNT_EN2 0x000030c2 - -#define REG_A4XX_VBIF_PERF_CNT_EN3 0x000030c3 - -#define REG_A4XX_VBIF_PERF_CNT_SEL0 0x000030d0 - -#define REG_A4XX_VBIF_PERF_CNT_SEL1 0x000030d1 - -#define REG_A4XX_VBIF_PERF_CNT_SEL2 0x000030d2 - -#define REG_A4XX_VBIF_PERF_CNT_SEL3 0x000030d3 - -#define REG_A4XX_VBIF_PERF_CNT_LOW0 0x000030d8 - -#define REG_A4XX_VBIF_PERF_CNT_LOW1 0x000030d9 - -#define REG_A4XX_VBIF_PERF_CNT_LOW2 0x000030da - -#define REG_A4XX_VBIF_PERF_CNT_LOW3 0x000030db - -#define REG_A4XX_VBIF_PERF_CNT_HIGH0 0x000030e0 - -#define REG_A4XX_VBIF_PERF_CNT_HIGH1 0x000030e1 - -#define REG_A4XX_VBIF_PERF_CNT_HIGH2 0x000030e2 - -#define REG_A4XX_VBIF_PERF_CNT_HIGH3 0x000030e3 - -#define REG_A4XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 - -#define REG_A4XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 - -#define REG_A4XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 - -#define REG_A4XX_UNKNOWN_0CC5 0x00000cc5 - -#define REG_A4XX_UNKNOWN_0CC6 0x00000cc6 - -#define REG_A4XX_UNKNOWN_0D01 0x00000d01 - -#define REG_A4XX_UNKNOWN_0E42 0x00000e42 - -#define REG_A4XX_UNKNOWN_0EC2 0x00000ec2 - -#define REG_A4XX_UNKNOWN_2001 0x00002001 - -#define REG_A4XX_UNKNOWN_209B 0x0000209b - -#define REG_A4XX_UNKNOWN_20EF 0x000020ef - -#define REG_A4XX_UNKNOWN_2152 0x00002152 - -#define REG_A4XX_UNKNOWN_2153 0x00002153 - -#define REG_A4XX_UNKNOWN_2154 0x00002154 - -#define REG_A4XX_UNKNOWN_2155 0x00002155 - -#define REG_A4XX_UNKNOWN_2156 0x00002156 - -#define REG_A4XX_UNKNOWN_2157 0x00002157 - -#define REG_A4XX_UNKNOWN_21C3 0x000021c3 - -#define REG_A4XX_UNKNOWN_21E6 0x000021e6 - -#define REG_A4XX_UNKNOWN_2209 0x00002209 - -#define REG_A4XX_UNKNOWN_22D7 0x000022d7 - -#define REG_A4XX_UNKNOWN_2352 0x00002352 - -#define REG_A4XX_TEX_SAMP_0 0x00000000 -#define A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 -#define A4XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 -#define A4XX_TEX_SAMP_0_XY_MAG__SHIFT 1 -static inline uint32_t A4XX_TEX_SAMP_0_XY_MAG(enum a4xx_tex_filter val) -{ - return ((val) << A4XX_TEX_SAMP_0_XY_MAG__SHIFT) & A4XX_TEX_SAMP_0_XY_MAG__MASK; -} -#define A4XX_TEX_SAMP_0_XY_MIN__MASK 0x00000018 -#define A4XX_TEX_SAMP_0_XY_MIN__SHIFT 3 -static inline uint32_t A4XX_TEX_SAMP_0_XY_MIN(enum a4xx_tex_filter val) -{ - return ((val) << A4XX_TEX_SAMP_0_XY_MIN__SHIFT) & A4XX_TEX_SAMP_0_XY_MIN__MASK; -} -#define A4XX_TEX_SAMP_0_WRAP_S__MASK 0x000000e0 -#define A4XX_TEX_SAMP_0_WRAP_S__SHIFT 5 -static inline uint32_t A4XX_TEX_SAMP_0_WRAP_S(enum a4xx_tex_clamp val) -{ - return ((val) << A4XX_TEX_SAMP_0_WRAP_S__SHIFT) & A4XX_TEX_SAMP_0_WRAP_S__MASK; -} -#define A4XX_TEX_SAMP_0_WRAP_T__MASK 0x00000700 -#define A4XX_TEX_SAMP_0_WRAP_T__SHIFT 8 -static inline uint32_t A4XX_TEX_SAMP_0_WRAP_T(enum a4xx_tex_clamp val) -{ - return ((val) << A4XX_TEX_SAMP_0_WRAP_T__SHIFT) & A4XX_TEX_SAMP_0_WRAP_T__MASK; -} -#define A4XX_TEX_SAMP_0_WRAP_R__MASK 0x00003800 -#define A4XX_TEX_SAMP_0_WRAP_R__SHIFT 11 -static inline uint32_t A4XX_TEX_SAMP_0_WRAP_R(enum a4xx_tex_clamp val) -{ - return ((val) << A4XX_TEX_SAMP_0_WRAP_R__SHIFT) & A4XX_TEX_SAMP_0_WRAP_R__MASK; -} -#define A4XX_TEX_SAMP_0_ANISO__MASK 0x0001c000 -#define A4XX_TEX_SAMP_0_ANISO__SHIFT 14 -static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val) -{ - return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK; -} -#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 -#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 -static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val) -{ - return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK; -} - -#define REG_A4XX_TEX_SAMP_1 0x00000001 -#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e -#define A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT 1 -static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val) -{ - return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK; -} -#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 -#define A4XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 -#define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 -#define A4XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 -#define A4XX_TEX_SAMP_1_MAX_LOD__SHIFT 8 -static inline uint32_t A4XX_TEX_SAMP_1_MAX_LOD(float val) -{ - return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A4XX_TEX_SAMP_1_MAX_LOD__MASK; -} -#define A4XX_TEX_SAMP_1_MIN_LOD__MASK 0xfff00000 -#define A4XX_TEX_SAMP_1_MIN_LOD__SHIFT 20 -static inline uint32_t A4XX_TEX_SAMP_1_MIN_LOD(float val) -{ - return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A4XX_TEX_SAMP_1_MIN_LOD__MASK; -} - -#define REG_A4XX_TEX_CONST_0 0x00000000 -#define A4XX_TEX_CONST_0_TILED 0x00000001 -#define A4XX_TEX_CONST_0_SRGB 0x00000004 -#define A4XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 -#define A4XX_TEX_CONST_0_SWIZ_X__SHIFT 4 -static inline uint32_t A4XX_TEX_CONST_0_SWIZ_X(enum a4xx_tex_swiz val) -{ - return ((val) << A4XX_TEX_CONST_0_SWIZ_X__SHIFT) & A4XX_TEX_CONST_0_SWIZ_X__MASK; -} -#define A4XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 -#define A4XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 -static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Y(enum a4xx_tex_swiz val) -{ - return ((val) << A4XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Y__MASK; -} -#define A4XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 -#define A4XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 -static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Z(enum a4xx_tex_swiz val) -{ - return ((val) << A4XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Z__MASK; -} -#define A4XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 -#define A4XX_TEX_CONST_0_SWIZ_W__SHIFT 13 -static inline uint32_t A4XX_TEX_CONST_0_SWIZ_W(enum a4xx_tex_swiz val) -{ - return ((val) << A4XX_TEX_CONST_0_SWIZ_W__SHIFT) & A4XX_TEX_CONST_0_SWIZ_W__MASK; -} -#define A4XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 -#define A4XX_TEX_CONST_0_MIPLVLS__SHIFT 16 -static inline uint32_t A4XX_TEX_CONST_0_MIPLVLS(uint32_t val) -{ - return ((val) << A4XX_TEX_CONST_0_MIPLVLS__SHIFT) & A4XX_TEX_CONST_0_MIPLVLS__MASK; -} -#define A4XX_TEX_CONST_0_FMT__MASK 0x1fc00000 -#define A4XX_TEX_CONST_0_FMT__SHIFT 22 -static inline uint32_t A4XX_TEX_CONST_0_FMT(enum a4xx_tex_fmt val) -{ - return ((val) << A4XX_TEX_CONST_0_FMT__SHIFT) & A4XX_TEX_CONST_0_FMT__MASK; -} -#define A4XX_TEX_CONST_0_TYPE__MASK 0x60000000 -#define A4XX_TEX_CONST_0_TYPE__SHIFT 29 -static inline uint32_t A4XX_TEX_CONST_0_TYPE(enum a4xx_tex_type val) -{ - return ((val) << A4XX_TEX_CONST_0_TYPE__SHIFT) & A4XX_TEX_CONST_0_TYPE__MASK; -} - -#define REG_A4XX_TEX_CONST_1 0x00000001 -#define A4XX_TEX_CONST_1_HEIGHT__MASK 0x00007fff -#define A4XX_TEX_CONST_1_HEIGHT__SHIFT 0 -static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val) -{ - return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK; -} -#define A4XX_TEX_CONST_1_WIDTH__MASK 0x3fff8000 -#define A4XX_TEX_CONST_1_WIDTH__SHIFT 15 -static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val) -{ - return ((val) << A4XX_TEX_CONST_1_WIDTH__SHIFT) & A4XX_TEX_CONST_1_WIDTH__MASK; -} - -#define REG_A4XX_TEX_CONST_2 0x00000002 -#define A4XX_TEX_CONST_2_FETCHSIZE__MASK 0x0000000f -#define A4XX_TEX_CONST_2_FETCHSIZE__SHIFT 0 -static inline uint32_t A4XX_TEX_CONST_2_FETCHSIZE(enum a4xx_tex_fetchsize val) -{ - return ((val) << A4XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A4XX_TEX_CONST_2_FETCHSIZE__MASK; -} -#define A4XX_TEX_CONST_2_PITCH__MASK 0x3ffffe00 -#define A4XX_TEX_CONST_2_PITCH__SHIFT 9 -static inline uint32_t A4XX_TEX_CONST_2_PITCH(uint32_t val) -{ - return ((val) << A4XX_TEX_CONST_2_PITCH__SHIFT) & A4XX_TEX_CONST_2_PITCH__MASK; -} -#define A4XX_TEX_CONST_2_SWAP__MASK 0xc0000000 -#define A4XX_TEX_CONST_2_SWAP__SHIFT 30 -static inline uint32_t A4XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A4XX_TEX_CONST_2_SWAP__SHIFT) & A4XX_TEX_CONST_2_SWAP__MASK; -} - -#define REG_A4XX_TEX_CONST_3 0x00000003 -#define A4XX_TEX_CONST_3_LAYERSZ__MASK 0x00003fff -#define A4XX_TEX_CONST_3_LAYERSZ__SHIFT 0 -static inline uint32_t A4XX_TEX_CONST_3_LAYERSZ(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A4XX_TEX_CONST_3_LAYERSZ__SHIFT) & A4XX_TEX_CONST_3_LAYERSZ__MASK; -} -#define A4XX_TEX_CONST_3_DEPTH__MASK 0x7ffc0000 -#define A4XX_TEX_CONST_3_DEPTH__SHIFT 18 -static inline uint32_t A4XX_TEX_CONST_3_DEPTH(uint32_t val) -{ - return ((val) << A4XX_TEX_CONST_3_DEPTH__SHIFT) & A4XX_TEX_CONST_3_DEPTH__MASK; -} - -#define REG_A4XX_TEX_CONST_4 0x00000004 -#define A4XX_TEX_CONST_4_LAYERSZ__MASK 0x0000000f -#define A4XX_TEX_CONST_4_LAYERSZ__SHIFT 0 -static inline uint32_t A4XX_TEX_CONST_4_LAYERSZ(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A4XX_TEX_CONST_4_LAYERSZ__SHIFT) & A4XX_TEX_CONST_4_LAYERSZ__MASK; -} -#define A4XX_TEX_CONST_4_BASE__MASK 0xffffffe0 -#define A4XX_TEX_CONST_4_BASE__SHIFT 5 -static inline uint32_t A4XX_TEX_CONST_4_BASE(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_TEX_CONST_4_BASE__SHIFT) & A4XX_TEX_CONST_4_BASE__MASK; -} - -#define REG_A4XX_TEX_CONST_5 0x00000005 - -#define REG_A4XX_TEX_CONST_6 0x00000006 - -#define REG_A4XX_TEX_CONST_7 0x00000007 - -#define REG_A4XX_SSBO_0_0 0x00000000 -#define A4XX_SSBO_0_0_BASE__MASK 0xffffffe0 -#define A4XX_SSBO_0_0_BASE__SHIFT 5 -static inline uint32_t A4XX_SSBO_0_0_BASE(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A4XX_SSBO_0_0_BASE__SHIFT) & A4XX_SSBO_0_0_BASE__MASK; -} - -#define REG_A4XX_SSBO_0_1 0x00000001 -#define A4XX_SSBO_0_1_PITCH__MASK 0x003fffff -#define A4XX_SSBO_0_1_PITCH__SHIFT 0 -static inline uint32_t A4XX_SSBO_0_1_PITCH(uint32_t val) -{ - return ((val) << A4XX_SSBO_0_1_PITCH__SHIFT) & A4XX_SSBO_0_1_PITCH__MASK; -} - -#define REG_A4XX_SSBO_0_2 0x00000002 -#define A4XX_SSBO_0_2_ARRAY_PITCH__MASK 0x03fff000 -#define A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT 12 -static inline uint32_t A4XX_SSBO_0_2_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A4XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A4XX_SSBO_0_2_ARRAY_PITCH__MASK; -} - -#define REG_A4XX_SSBO_0_3 0x00000003 -#define A4XX_SSBO_0_3_CPP__MASK 0x0000003f -#define A4XX_SSBO_0_3_CPP__SHIFT 0 -static inline uint32_t A4XX_SSBO_0_3_CPP(uint32_t val) -{ - return ((val) << A4XX_SSBO_0_3_CPP__SHIFT) & A4XX_SSBO_0_3_CPP__MASK; -} - -#define REG_A4XX_SSBO_1_0 0x00000000 -#define A4XX_SSBO_1_0_CPP__MASK 0x0000001f -#define A4XX_SSBO_1_0_CPP__SHIFT 0 -static inline uint32_t A4XX_SSBO_1_0_CPP(uint32_t val) -{ - return ((val) << A4XX_SSBO_1_0_CPP__SHIFT) & A4XX_SSBO_1_0_CPP__MASK; -} -#define A4XX_SSBO_1_0_FMT__MASK 0x0000ff00 -#define A4XX_SSBO_1_0_FMT__SHIFT 8 -static inline uint32_t A4XX_SSBO_1_0_FMT(enum a4xx_color_fmt val) -{ - return ((val) << A4XX_SSBO_1_0_FMT__SHIFT) & A4XX_SSBO_1_0_FMT__MASK; -} -#define A4XX_SSBO_1_0_WIDTH__MASK 0xffff0000 -#define A4XX_SSBO_1_0_WIDTH__SHIFT 16 -static inline uint32_t A4XX_SSBO_1_0_WIDTH(uint32_t val) -{ - return ((val) << A4XX_SSBO_1_0_WIDTH__SHIFT) & A4XX_SSBO_1_0_WIDTH__MASK; -} - -#define REG_A4XX_SSBO_1_1 0x00000001 -#define A4XX_SSBO_1_1_HEIGHT__MASK 0x0000ffff -#define A4XX_SSBO_1_1_HEIGHT__SHIFT 0 -static inline uint32_t A4XX_SSBO_1_1_HEIGHT(uint32_t val) -{ - return ((val) << A4XX_SSBO_1_1_HEIGHT__SHIFT) & A4XX_SSBO_1_1_HEIGHT__MASK; -} -#define A4XX_SSBO_1_1_DEPTH__MASK 0xffff0000 -#define A4XX_SSBO_1_1_DEPTH__SHIFT 16 -static inline uint32_t A4XX_SSBO_1_1_DEPTH(uint32_t val) -{ - return ((val) << A4XX_SSBO_1_1_DEPTH__SHIFT) & A4XX_SSBO_1_1_DEPTH__MASK; -} - - -#endif /* A4XX_XML */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_context.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -101,13 +101,13 @@ fd_hw_query_init(pctx); fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vs_pvt"); fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "fs_pvt"); fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size"); fd_context_setup_common_vbos(&fd4_ctx->base); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_context.h mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_context.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,7 +31,7 @@ #include "freedreno_context.h" -#include "ir3_shader.h" +#include "ir3/ir3_shader.h" struct fd4_context { struct fd_context base; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_emit.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_emit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_emit.c 2019-03-31 23:16:37.000000000 +0000 @@ -48,7 +48,7 @@ * sizedwords: size of const value buffer */ static void -fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type, +fd4_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { @@ -86,7 +86,7 @@ } static void -fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, +fd4_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { uint32_t anum = align(num, 4); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_emit.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_emit.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_emit.h 2019-03-31 23:16:37.000000000 +0000 @@ -32,7 +32,7 @@ #include "freedreno_context.h" #include "fd4_format.h" #include "fd4_program.h" -#include "ir3_shader.h" +#include "ir3_gallium.h" struct fd_ringbuffer; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c 2019-03-31 23:16:37.000000000 +0000 @@ -582,7 +582,7 @@ struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; if (!pipe->bo) { pipe->bo = fd_bo_new(ctx->dev, 0x40000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i); } OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i] */ } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_program.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -39,7 +39,7 @@ static struct ir3_shader * create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, - enum shader_t type) + gl_shader_stage type) { struct fd_context *ctx = fd_context(pctx); struct ir3_compiler *compiler = ctx->screen->compiler; @@ -50,7 +50,7 @@ fd4_fp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); + return create_shader_stateobj(pctx, cso, MESA_SHADER_FRAGMENT); } static void @@ -64,7 +64,7 @@ fd4_vp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader_stateobj(pctx, cso, SHADER_VERTEX); + return create_shader_stateobj(pctx, cso, MESA_SHADER_VERTEX); } static void @@ -101,7 +101,7 @@ OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER)); } else { - OUT_RELOC(ring, so->bo, 0, + OUT_RELOCD(ring, so->bo, 0, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0); } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_program.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_program.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_program.h 2019-03-31 23:16:37.000000000 +0000 @@ -29,7 +29,8 @@ #include "pipe/p_context.h" #include "freedreno_context.h" -#include "ir3_shader.h" + +#include "ir3/ir3_shader.h" struct fd4_emit; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_screen.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a4xx/fd4_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a4xx/fd4_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -30,7 +30,8 @@ #include "fd4_screen.h" #include "fd4_context.h" #include "fd4_format.h" -#include "ir3_compiler.h" + +#include "ir3/ir3_compiler.h" static boolean fd4_screen_is_format_supported(struct pipe_screen *pscreen, diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/a5xx.xml.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,5226 +0,0 @@ -#ifndef A5XX_XML -#define A5XX_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/freedreno/envytools/ -git clone https://github.com/freedreno/envytools.git - -The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 37936 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14201 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42864 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 140514 bytes, from 2018-10-08 21:57:35) -- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) -- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) - -Copyright (C) 2013-2018 by the following authors: -- Rob Clark (robclark) -- Ilia Mirkin (imirkin) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -enum a5xx_color_fmt { - RB5_A8_UNORM = 2, - RB5_R8_UNORM = 3, - RB5_R8_SNORM = 4, - RB5_R8_UINT = 5, - RB5_R8_SINT = 6, - RB5_R4G4B4A4_UNORM = 8, - RB5_R5G5B5A1_UNORM = 10, - RB5_R5G6B5_UNORM = 14, - RB5_R8G8_UNORM = 15, - RB5_R8G8_SNORM = 16, - RB5_R8G8_UINT = 17, - RB5_R8G8_SINT = 18, - RB5_R16_UNORM = 21, - RB5_R16_SNORM = 22, - RB5_R16_FLOAT = 23, - RB5_R16_UINT = 24, - RB5_R16_SINT = 25, - RB5_R8G8B8A8_UNORM = 48, - RB5_R8G8B8_UNORM = 49, - RB5_R8G8B8A8_SNORM = 50, - RB5_R8G8B8A8_UINT = 51, - RB5_R8G8B8A8_SINT = 52, - RB5_R10G10B10A2_UNORM = 55, - RB5_R10G10B10A2_UINT = 58, - RB5_R11G11B10_FLOAT = 66, - RB5_R16G16_UNORM = 67, - RB5_R16G16_SNORM = 68, - RB5_R16G16_FLOAT = 69, - RB5_R16G16_UINT = 70, - RB5_R16G16_SINT = 71, - RB5_R32_FLOAT = 74, - RB5_R32_UINT = 75, - RB5_R32_SINT = 76, - RB5_R16G16B16A16_UNORM = 96, - RB5_R16G16B16A16_SNORM = 97, - RB5_R16G16B16A16_FLOAT = 98, - RB5_R16G16B16A16_UINT = 99, - RB5_R16G16B16A16_SINT = 100, - RB5_R32G32_FLOAT = 103, - RB5_R32G32_UINT = 104, - RB5_R32G32_SINT = 105, - RB5_R32G32B32A32_FLOAT = 130, - RB5_R32G32B32A32_UINT = 131, - RB5_R32G32B32A32_SINT = 132, -}; - -enum a5xx_tile_mode { - TILE5_LINEAR = 0, - TILE5_2 = 2, - TILE5_3 = 3, -}; - -enum a5xx_vtx_fmt { - VFMT5_8_UNORM = 3, - VFMT5_8_SNORM = 4, - VFMT5_8_UINT = 5, - VFMT5_8_SINT = 6, - VFMT5_8_8_UNORM = 15, - VFMT5_8_8_SNORM = 16, - VFMT5_8_8_UINT = 17, - VFMT5_8_8_SINT = 18, - VFMT5_16_UNORM = 21, - VFMT5_16_SNORM = 22, - VFMT5_16_FLOAT = 23, - VFMT5_16_UINT = 24, - VFMT5_16_SINT = 25, - VFMT5_8_8_8_UNORM = 33, - VFMT5_8_8_8_SNORM = 34, - VFMT5_8_8_8_UINT = 35, - VFMT5_8_8_8_SINT = 36, - VFMT5_8_8_8_8_UNORM = 48, - VFMT5_8_8_8_8_SNORM = 50, - VFMT5_8_8_8_8_UINT = 51, - VFMT5_8_8_8_8_SINT = 52, - VFMT5_10_10_10_2_UNORM = 54, - VFMT5_10_10_10_2_SNORM = 57, - VFMT5_10_10_10_2_UINT = 58, - VFMT5_10_10_10_2_SINT = 59, - VFMT5_11_11_10_FLOAT = 66, - VFMT5_16_16_UNORM = 67, - VFMT5_16_16_SNORM = 68, - VFMT5_16_16_FLOAT = 69, - VFMT5_16_16_UINT = 70, - VFMT5_16_16_SINT = 71, - VFMT5_32_UNORM = 72, - VFMT5_32_SNORM = 73, - VFMT5_32_FLOAT = 74, - VFMT5_32_UINT = 75, - VFMT5_32_SINT = 76, - VFMT5_32_FIXED = 77, - VFMT5_16_16_16_UNORM = 88, - VFMT5_16_16_16_SNORM = 89, - VFMT5_16_16_16_FLOAT = 90, - VFMT5_16_16_16_UINT = 91, - VFMT5_16_16_16_SINT = 92, - VFMT5_16_16_16_16_UNORM = 96, - VFMT5_16_16_16_16_SNORM = 97, - VFMT5_16_16_16_16_FLOAT = 98, - VFMT5_16_16_16_16_UINT = 99, - VFMT5_16_16_16_16_SINT = 100, - VFMT5_32_32_UNORM = 101, - VFMT5_32_32_SNORM = 102, - VFMT5_32_32_FLOAT = 103, - VFMT5_32_32_UINT = 104, - VFMT5_32_32_SINT = 105, - VFMT5_32_32_FIXED = 106, - VFMT5_32_32_32_UNORM = 112, - VFMT5_32_32_32_SNORM = 113, - VFMT5_32_32_32_UINT = 114, - VFMT5_32_32_32_SINT = 115, - VFMT5_32_32_32_FLOAT = 116, - VFMT5_32_32_32_FIXED = 117, - VFMT5_32_32_32_32_UNORM = 128, - VFMT5_32_32_32_32_SNORM = 129, - VFMT5_32_32_32_32_FLOAT = 130, - VFMT5_32_32_32_32_UINT = 131, - VFMT5_32_32_32_32_SINT = 132, - VFMT5_32_32_32_32_FIXED = 133, -}; - -enum a5xx_tex_fmt { - TFMT5_A8_UNORM = 2, - TFMT5_8_UNORM = 3, - TFMT5_8_SNORM = 4, - TFMT5_8_UINT = 5, - TFMT5_8_SINT = 6, - TFMT5_4_4_4_4_UNORM = 8, - TFMT5_5_5_5_1_UNORM = 10, - TFMT5_5_6_5_UNORM = 14, - TFMT5_8_8_UNORM = 15, - TFMT5_8_8_SNORM = 16, - TFMT5_8_8_UINT = 17, - TFMT5_8_8_SINT = 18, - TFMT5_L8_A8_UNORM = 19, - TFMT5_16_UNORM = 21, - TFMT5_16_SNORM = 22, - TFMT5_16_FLOAT = 23, - TFMT5_16_UINT = 24, - TFMT5_16_SINT = 25, - TFMT5_8_8_8_8_UNORM = 48, - TFMT5_8_8_8_UNORM = 49, - TFMT5_8_8_8_8_SNORM = 50, - TFMT5_8_8_8_8_UINT = 51, - TFMT5_8_8_8_8_SINT = 52, - TFMT5_9_9_9_E5_FLOAT = 53, - TFMT5_10_10_10_2_UNORM = 54, - TFMT5_10_10_10_2_UINT = 58, - TFMT5_11_11_10_FLOAT = 66, - TFMT5_16_16_UNORM = 67, - TFMT5_16_16_SNORM = 68, - TFMT5_16_16_FLOAT = 69, - TFMT5_16_16_UINT = 70, - TFMT5_16_16_SINT = 71, - TFMT5_32_FLOAT = 74, - TFMT5_32_UINT = 75, - TFMT5_32_SINT = 76, - TFMT5_16_16_16_16_UNORM = 96, - TFMT5_16_16_16_16_SNORM = 97, - TFMT5_16_16_16_16_FLOAT = 98, - TFMT5_16_16_16_16_UINT = 99, - TFMT5_16_16_16_16_SINT = 100, - TFMT5_32_32_FLOAT = 103, - TFMT5_32_32_UINT = 104, - TFMT5_32_32_SINT = 105, - TFMT5_32_32_32_UINT = 114, - TFMT5_32_32_32_SINT = 115, - TFMT5_32_32_32_FLOAT = 116, - TFMT5_32_32_32_32_FLOAT = 130, - TFMT5_32_32_32_32_UINT = 131, - TFMT5_32_32_32_32_SINT = 132, - TFMT5_X8Z24_UNORM = 160, - TFMT5_ETC2_RG11_UNORM = 171, - TFMT5_ETC2_RG11_SNORM = 172, - TFMT5_ETC2_R11_UNORM = 173, - TFMT5_ETC2_R11_SNORM = 174, - TFMT5_ETC1 = 175, - TFMT5_ETC2_RGB8 = 176, - TFMT5_ETC2_RGBA8 = 177, - TFMT5_ETC2_RGB8A1 = 178, - TFMT5_DXT1 = 179, - TFMT5_DXT3 = 180, - TFMT5_DXT5 = 181, - TFMT5_RGTC1_UNORM = 183, - TFMT5_RGTC1_SNORM = 184, - TFMT5_RGTC2_UNORM = 187, - TFMT5_RGTC2_SNORM = 188, - TFMT5_BPTC_UFLOAT = 190, - TFMT5_BPTC_FLOAT = 191, - TFMT5_BPTC = 192, - TFMT5_ASTC_4x4 = 193, - TFMT5_ASTC_5x4 = 194, - TFMT5_ASTC_5x5 = 195, - TFMT5_ASTC_6x5 = 196, - TFMT5_ASTC_6x6 = 197, - TFMT5_ASTC_8x5 = 198, - TFMT5_ASTC_8x6 = 199, - TFMT5_ASTC_8x8 = 200, - TFMT5_ASTC_10x5 = 201, - TFMT5_ASTC_10x6 = 202, - TFMT5_ASTC_10x8 = 203, - TFMT5_ASTC_10x10 = 204, - TFMT5_ASTC_12x10 = 205, - TFMT5_ASTC_12x12 = 206, -}; - -enum a5xx_tex_fetchsize { - TFETCH5_1_BYTE = 0, - TFETCH5_2_BYTE = 1, - TFETCH5_4_BYTE = 2, - TFETCH5_8_BYTE = 3, - TFETCH5_16_BYTE = 4, -}; - -enum a5xx_depth_format { - DEPTH5_NONE = 0, - DEPTH5_16 = 1, - DEPTH5_24_8 = 2, - DEPTH5_32 = 4, -}; - -enum a5xx_blit_buf { - BLIT_MRT0 = 0, - BLIT_MRT1 = 1, - BLIT_MRT2 = 2, - BLIT_MRT3 = 3, - BLIT_MRT4 = 4, - BLIT_MRT5 = 5, - BLIT_MRT6 = 6, - BLIT_MRT7 = 7, - BLIT_ZS = 8, - BLIT_S = 9, -}; - -enum a5xx_cp_perfcounter_select { - PERF_CP_ALWAYS_COUNT = 0, - PERF_CP_BUSY_GFX_CORE_IDLE = 1, - PERF_CP_BUSY_CYCLES = 2, - PERF_CP_PFP_IDLE = 3, - PERF_CP_PFP_BUSY_WORKING = 4, - PERF_CP_PFP_STALL_CYCLES_ANY = 5, - PERF_CP_PFP_STARVE_CYCLES_ANY = 6, - PERF_CP_PFP_ICACHE_MISS = 7, - PERF_CP_PFP_ICACHE_HIT = 8, - PERF_CP_PFP_MATCH_PM4_PKT_PROFILE = 9, - PERF_CP_ME_BUSY_WORKING = 10, - PERF_CP_ME_IDLE = 11, - PERF_CP_ME_STARVE_CYCLES_ANY = 12, - PERF_CP_ME_FIFO_EMPTY_PFP_IDLE = 13, - PERF_CP_ME_FIFO_EMPTY_PFP_BUSY = 14, - PERF_CP_ME_FIFO_FULL_ME_BUSY = 15, - PERF_CP_ME_FIFO_FULL_ME_NON_WORKING = 16, - PERF_CP_ME_STALL_CYCLES_ANY = 17, - PERF_CP_ME_ICACHE_MISS = 18, - PERF_CP_ME_ICACHE_HIT = 19, - PERF_CP_NUM_PREEMPTIONS = 20, - PERF_CP_PREEMPTION_REACTION_DELAY = 21, - PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 22, - PERF_CP_PREEMPTION_SWITCH_IN_TIME = 23, - PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 24, - PERF_CP_PREDICATED_DRAWS_KILLED = 25, - PERF_CP_MODE_SWITCH = 26, - PERF_CP_ZPASS_DONE = 27, - PERF_CP_CONTEXT_DONE = 28, - PERF_CP_CACHE_FLUSH = 29, - PERF_CP_LONG_PREEMPTIONS = 30, -}; - -enum a5xx_rbbm_perfcounter_select { - PERF_RBBM_ALWAYS_COUNT = 0, - PERF_RBBM_ALWAYS_ON = 1, - PERF_RBBM_TSE_BUSY = 2, - PERF_RBBM_RAS_BUSY = 3, - PERF_RBBM_PC_DCALL_BUSY = 4, - PERF_RBBM_PC_VSD_BUSY = 5, - PERF_RBBM_STATUS_MASKED = 6, - PERF_RBBM_COM_BUSY = 7, - PERF_RBBM_DCOM_BUSY = 8, - PERF_RBBM_VBIF_BUSY = 9, - PERF_RBBM_VSC_BUSY = 10, - PERF_RBBM_TESS_BUSY = 11, - PERF_RBBM_UCHE_BUSY = 12, - PERF_RBBM_HLSQ_BUSY = 13, -}; - -enum a5xx_pc_perfcounter_select { - PERF_PC_BUSY_CYCLES = 0, - PERF_PC_WORKING_CYCLES = 1, - PERF_PC_STALL_CYCLES_VFD = 2, - PERF_PC_STALL_CYCLES_TSE = 3, - PERF_PC_STALL_CYCLES_VPC = 4, - PERF_PC_STALL_CYCLES_UCHE = 5, - PERF_PC_STALL_CYCLES_TESS = 6, - PERF_PC_STALL_CYCLES_TSE_ONLY = 7, - PERF_PC_STALL_CYCLES_VPC_ONLY = 8, - PERF_PC_PASS1_TF_STALL_CYCLES = 9, - PERF_PC_STARVE_CYCLES_FOR_INDEX = 10, - PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11, - PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12, - PERF_PC_STARVE_CYCLES_FOR_POSITION = 13, - PERF_PC_STARVE_CYCLES_DI = 14, - PERF_PC_VIS_STREAMS_LOADED = 15, - PERF_PC_INSTANCES = 16, - PERF_PC_VPC_PRIMITIVES = 17, - PERF_PC_DEAD_PRIM = 18, - PERF_PC_LIVE_PRIM = 19, - PERF_PC_VERTEX_HITS = 20, - PERF_PC_IA_VERTICES = 21, - PERF_PC_IA_PRIMITIVES = 22, - PERF_PC_GS_PRIMITIVES = 23, - PERF_PC_HS_INVOCATIONS = 24, - PERF_PC_DS_INVOCATIONS = 25, - PERF_PC_VS_INVOCATIONS = 26, - PERF_PC_GS_INVOCATIONS = 27, - PERF_PC_DS_PRIMITIVES = 28, - PERF_PC_VPC_POS_DATA_TRANSACTION = 29, - PERF_PC_3D_DRAWCALLS = 30, - PERF_PC_2D_DRAWCALLS = 31, - PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32, - PERF_TESS_BUSY_CYCLES = 33, - PERF_TESS_WORKING_CYCLES = 34, - PERF_TESS_STALL_CYCLES_PC = 35, - PERF_TESS_STARVE_CYCLES_PC = 36, -}; - -enum a5xx_vfd_perfcounter_select { - PERF_VFD_BUSY_CYCLES = 0, - PERF_VFD_STALL_CYCLES_UCHE = 1, - PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2, - PERF_VFD_STALL_CYCLES_MISS_VB = 3, - PERF_VFD_STALL_CYCLES_MISS_Q = 4, - PERF_VFD_STALL_CYCLES_SP_INFO = 5, - PERF_VFD_STALL_CYCLES_SP_ATTR = 6, - PERF_VFD_STALL_CYCLES_VFDP_VB = 7, - PERF_VFD_STALL_CYCLES_VFDP_Q = 8, - PERF_VFD_DECODER_PACKER_STALL = 9, - PERF_VFD_STARVE_CYCLES_UCHE = 10, - PERF_VFD_RBUFFER_FULL = 11, - PERF_VFD_ATTR_INFO_FIFO_FULL = 12, - PERF_VFD_DECODED_ATTRIBUTE_BYTES = 13, - PERF_VFD_NUM_ATTRIBUTES = 14, - PERF_VFD_INSTRUCTIONS = 15, - PERF_VFD_UPPER_SHADER_FIBERS = 16, - PERF_VFD_LOWER_SHADER_FIBERS = 17, - PERF_VFD_MODE_0_FIBERS = 18, - PERF_VFD_MODE_1_FIBERS = 19, - PERF_VFD_MODE_2_FIBERS = 20, - PERF_VFD_MODE_3_FIBERS = 21, - PERF_VFD_MODE_4_FIBERS = 22, - PERF_VFD_TOTAL_VERTICES = 23, - PERF_VFD_NUM_ATTR_MISS = 24, - PERF_VFD_1_BURST_REQ = 25, - PERF_VFDP_STALL_CYCLES_VFD = 26, - PERF_VFDP_STALL_CYCLES_VFD_INDEX = 27, - PERF_VFDP_STALL_CYCLES_VFD_PROG = 28, - PERF_VFDP_STARVE_CYCLES_PC = 29, - PERF_VFDP_VS_STAGE_32_WAVES = 30, -}; - -enum a5xx_hlsq_perfcounter_select { - PERF_HLSQ_BUSY_CYCLES = 0, - PERF_HLSQ_STALL_CYCLES_UCHE = 1, - PERF_HLSQ_STALL_CYCLES_SP_STATE = 2, - PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3, - PERF_HLSQ_UCHE_LATENCY_CYCLES = 4, - PERF_HLSQ_UCHE_LATENCY_COUNT = 5, - PERF_HLSQ_FS_STAGE_32_WAVES = 6, - PERF_HLSQ_FS_STAGE_64_WAVES = 7, - PERF_HLSQ_QUADS = 8, - PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE = 9, - PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE = 10, - PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE = 11, - PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE = 12, - PERF_HLSQ_CS_INVOCATIONS = 13, - PERF_HLSQ_COMPUTE_DRAWCALLS = 14, -}; - -enum a5xx_vpc_perfcounter_select { - PERF_VPC_BUSY_CYCLES = 0, - PERF_VPC_WORKING_CYCLES = 1, - PERF_VPC_STALL_CYCLES_UCHE = 2, - PERF_VPC_STALL_CYCLES_VFD_WACK = 3, - PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4, - PERF_VPC_STALL_CYCLES_PC = 5, - PERF_VPC_STALL_CYCLES_SP_LM = 6, - PERF_VPC_POS_EXPORT_STALL_CYCLES = 7, - PERF_VPC_STARVE_CYCLES_SP = 8, - PERF_VPC_STARVE_CYCLES_LRZ = 9, - PERF_VPC_PC_PRIMITIVES = 10, - PERF_VPC_SP_COMPONENTS = 11, - PERF_VPC_SP_LM_PRIMITIVES = 12, - PERF_VPC_SP_LM_COMPONENTS = 13, - PERF_VPC_SP_LM_DWORDS = 14, - PERF_VPC_STREAMOUT_COMPONENTS = 15, - PERF_VPC_GRANT_PHASES = 16, -}; - -enum a5xx_tse_perfcounter_select { - PERF_TSE_BUSY_CYCLES = 0, - PERF_TSE_CLIPPING_CYCLES = 1, - PERF_TSE_STALL_CYCLES_RAS = 2, - PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3, - PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4, - PERF_TSE_STARVE_CYCLES_PC = 5, - PERF_TSE_INPUT_PRIM = 6, - PERF_TSE_INPUT_NULL_PRIM = 7, - PERF_TSE_TRIVAL_REJ_PRIM = 8, - PERF_TSE_CLIPPED_PRIM = 9, - PERF_TSE_ZERO_AREA_PRIM = 10, - PERF_TSE_FACENESS_CULLED_PRIM = 11, - PERF_TSE_ZERO_PIXEL_PRIM = 12, - PERF_TSE_OUTPUT_NULL_PRIM = 13, - PERF_TSE_OUTPUT_VISIBLE_PRIM = 14, - PERF_TSE_CINVOCATION = 15, - PERF_TSE_CPRIMITIVES = 16, - PERF_TSE_2D_INPUT_PRIM = 17, - PERF_TSE_2D_ALIVE_CLCLES = 18, -}; - -enum a5xx_ras_perfcounter_select { - PERF_RAS_BUSY_CYCLES = 0, - PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1, - PERF_RAS_STALL_CYCLES_LRZ = 2, - PERF_RAS_STARVE_CYCLES_TSE = 3, - PERF_RAS_SUPER_TILES = 4, - PERF_RAS_8X4_TILES = 5, - PERF_RAS_MASKGEN_ACTIVE = 6, - PERF_RAS_FULLY_COVERED_SUPER_TILES = 7, - PERF_RAS_FULLY_COVERED_8X4_TILES = 8, - PERF_RAS_PRIM_KILLED_INVISILBE = 9, -}; - -enum a5xx_lrz_perfcounter_select { - PERF_LRZ_BUSY_CYCLES = 0, - PERF_LRZ_STARVE_CYCLES_RAS = 1, - PERF_LRZ_STALL_CYCLES_RB = 2, - PERF_LRZ_STALL_CYCLES_VSC = 3, - PERF_LRZ_STALL_CYCLES_VPC = 4, - PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5, - PERF_LRZ_STALL_CYCLES_UCHE = 6, - PERF_LRZ_LRZ_READ = 7, - PERF_LRZ_LRZ_WRITE = 8, - PERF_LRZ_READ_LATENCY = 9, - PERF_LRZ_MERGE_CACHE_UPDATING = 10, - PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11, - PERF_LRZ_PRIM_KILLED_BY_LRZ = 12, - PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13, - PERF_LRZ_FULL_8X8_TILES = 14, - PERF_LRZ_PARTIAL_8X8_TILES = 15, - PERF_LRZ_TILE_KILLED = 16, - PERF_LRZ_TOTAL_PIXEL = 17, - PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18, -}; - -enum a5xx_uche_perfcounter_select { - PERF_UCHE_BUSY_CYCLES = 0, - PERF_UCHE_STALL_CYCLES_VBIF = 1, - PERF_UCHE_VBIF_LATENCY_CYCLES = 2, - PERF_UCHE_VBIF_LATENCY_SAMPLES = 3, - PERF_UCHE_VBIF_READ_BEATS_TP = 4, - PERF_UCHE_VBIF_READ_BEATS_VFD = 5, - PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6, - PERF_UCHE_VBIF_READ_BEATS_LRZ = 7, - PERF_UCHE_VBIF_READ_BEATS_SP = 8, - PERF_UCHE_READ_REQUESTS_TP = 9, - PERF_UCHE_READ_REQUESTS_VFD = 10, - PERF_UCHE_READ_REQUESTS_HLSQ = 11, - PERF_UCHE_READ_REQUESTS_LRZ = 12, - PERF_UCHE_READ_REQUESTS_SP = 13, - PERF_UCHE_WRITE_REQUESTS_LRZ = 14, - PERF_UCHE_WRITE_REQUESTS_SP = 15, - PERF_UCHE_WRITE_REQUESTS_VPC = 16, - PERF_UCHE_WRITE_REQUESTS_VSC = 17, - PERF_UCHE_EVICTS = 18, - PERF_UCHE_BANK_REQ0 = 19, - PERF_UCHE_BANK_REQ1 = 20, - PERF_UCHE_BANK_REQ2 = 21, - PERF_UCHE_BANK_REQ3 = 22, - PERF_UCHE_BANK_REQ4 = 23, - PERF_UCHE_BANK_REQ5 = 24, - PERF_UCHE_BANK_REQ6 = 25, - PERF_UCHE_BANK_REQ7 = 26, - PERF_UCHE_VBIF_READ_BEATS_CH0 = 27, - PERF_UCHE_VBIF_READ_BEATS_CH1 = 28, - PERF_UCHE_GMEM_READ_BEATS = 29, - PERF_UCHE_FLAG_COUNT = 30, -}; - -enum a5xx_tp_perfcounter_select { - PERF_TP_BUSY_CYCLES = 0, - PERF_TP_STALL_CYCLES_UCHE = 1, - PERF_TP_LATENCY_CYCLES = 2, - PERF_TP_LATENCY_TRANS = 3, - PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4, - PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5, - PERF_TP_L1_CACHELINE_REQUESTS = 6, - PERF_TP_L1_CACHELINE_MISSES = 7, - PERF_TP_SP_TP_TRANS = 8, - PERF_TP_TP_SP_TRANS = 9, - PERF_TP_OUTPUT_PIXELS = 10, - PERF_TP_FILTER_WORKLOAD_16BIT = 11, - PERF_TP_FILTER_WORKLOAD_32BIT = 12, - PERF_TP_QUADS_RECEIVED = 13, - PERF_TP_QUADS_OFFSET = 14, - PERF_TP_QUADS_SHADOW = 15, - PERF_TP_QUADS_ARRAY = 16, - PERF_TP_QUADS_GRADIENT = 17, - PERF_TP_QUADS_1D = 18, - PERF_TP_QUADS_2D = 19, - PERF_TP_QUADS_BUFFER = 20, - PERF_TP_QUADS_3D = 21, - PERF_TP_QUADS_CUBE = 22, - PERF_TP_STATE_CACHE_REQUESTS = 23, - PERF_TP_STATE_CACHE_MISSES = 24, - PERF_TP_DIVERGENT_QUADS_RECEIVED = 25, - PERF_TP_BINDLESS_STATE_CACHE_REQUESTS = 26, - PERF_TP_BINDLESS_STATE_CACHE_MISSES = 27, - PERF_TP_PRT_NON_RESIDENT_EVENTS = 28, - PERF_TP_OUTPUT_PIXELS_POINT = 29, - PERF_TP_OUTPUT_PIXELS_BILINEAR = 30, - PERF_TP_OUTPUT_PIXELS_MIP = 31, - PERF_TP_OUTPUT_PIXELS_ANISO = 32, - PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 33, - PERF_TP_FLAG_CACHE_REQUESTS = 34, - PERF_TP_FLAG_CACHE_MISSES = 35, - PERF_TP_L1_5_L2_REQUESTS = 36, - PERF_TP_2D_OUTPUT_PIXELS = 37, - PERF_TP_2D_OUTPUT_PIXELS_POINT = 38, - PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 39, - PERF_TP_2D_FILTER_WORKLOAD_16BIT = 40, - PERF_TP_2D_FILTER_WORKLOAD_32BIT = 41, -}; - -enum a5xx_sp_perfcounter_select { - PERF_SP_BUSY_CYCLES = 0, - PERF_SP_ALU_WORKING_CYCLES = 1, - PERF_SP_EFU_WORKING_CYCLES = 2, - PERF_SP_STALL_CYCLES_VPC = 3, - PERF_SP_STALL_CYCLES_TP = 4, - PERF_SP_STALL_CYCLES_UCHE = 5, - PERF_SP_STALL_CYCLES_RB = 6, - PERF_SP_SCHEDULER_NON_WORKING = 7, - PERF_SP_WAVE_CONTEXTS = 8, - PERF_SP_WAVE_CONTEXT_CYCLES = 9, - PERF_SP_FS_STAGE_WAVE_CYCLES = 10, - PERF_SP_FS_STAGE_WAVE_SAMPLES = 11, - PERF_SP_VS_STAGE_WAVE_CYCLES = 12, - PERF_SP_VS_STAGE_WAVE_SAMPLES = 13, - PERF_SP_FS_STAGE_DURATION_CYCLES = 14, - PERF_SP_VS_STAGE_DURATION_CYCLES = 15, - PERF_SP_WAVE_CTRL_CYCLES = 16, - PERF_SP_WAVE_LOAD_CYCLES = 17, - PERF_SP_WAVE_EMIT_CYCLES = 18, - PERF_SP_WAVE_NOP_CYCLES = 19, - PERF_SP_WAVE_WAIT_CYCLES = 20, - PERF_SP_WAVE_FETCH_CYCLES = 21, - PERF_SP_WAVE_IDLE_CYCLES = 22, - PERF_SP_WAVE_END_CYCLES = 23, - PERF_SP_WAVE_LONG_SYNC_CYCLES = 24, - PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25, - PERF_SP_WAVE_JOIN_CYCLES = 26, - PERF_SP_LM_LOAD_INSTRUCTIONS = 27, - PERF_SP_LM_STORE_INSTRUCTIONS = 28, - PERF_SP_LM_ATOMICS = 29, - PERF_SP_GM_LOAD_INSTRUCTIONS = 30, - PERF_SP_GM_STORE_INSTRUCTIONS = 31, - PERF_SP_GM_ATOMICS = 32, - PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33, - PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS = 34, - PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 35, - PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 36, - PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 37, - PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 38, - PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 39, - PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 40, - PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 41, - PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 42, - PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 43, - PERF_SP_VS_INSTRUCTIONS = 44, - PERF_SP_FS_INSTRUCTIONS = 45, - PERF_SP_ADDR_LOCK_COUNT = 46, - PERF_SP_UCHE_READ_TRANS = 47, - PERF_SP_UCHE_WRITE_TRANS = 48, - PERF_SP_EXPORT_VPC_TRANS = 49, - PERF_SP_EXPORT_RB_TRANS = 50, - PERF_SP_PIXELS_KILLED = 51, - PERF_SP_ICL1_REQUESTS = 52, - PERF_SP_ICL1_MISSES = 53, - PERF_SP_ICL0_REQUESTS = 54, - PERF_SP_ICL0_MISSES = 55, - PERF_SP_HS_INSTRUCTIONS = 56, - PERF_SP_DS_INSTRUCTIONS = 57, - PERF_SP_GS_INSTRUCTIONS = 58, - PERF_SP_CS_INSTRUCTIONS = 59, - PERF_SP_GPR_READ = 60, - PERF_SP_GPR_WRITE = 61, - PERF_SP_LM_CH0_REQUESTS = 62, - PERF_SP_LM_CH1_REQUESTS = 63, - PERF_SP_LM_BANK_CONFLICTS = 64, -}; - -enum a5xx_rb_perfcounter_select { - PERF_RB_BUSY_CYCLES = 0, - PERF_RB_STALL_CYCLES_CCU = 1, - PERF_RB_STALL_CYCLES_HLSQ = 2, - PERF_RB_STALL_CYCLES_FIFO0_FULL = 3, - PERF_RB_STALL_CYCLES_FIFO1_FULL = 4, - PERF_RB_STALL_CYCLES_FIFO2_FULL = 5, - PERF_RB_STARVE_CYCLES_SP = 6, - PERF_RB_STARVE_CYCLES_LRZ_TILE = 7, - PERF_RB_STARVE_CYCLES_CCU = 8, - PERF_RB_STARVE_CYCLES_Z_PLANE = 9, - PERF_RB_STARVE_CYCLES_BARY_PLANE = 10, - PERF_RB_Z_WORKLOAD = 11, - PERF_RB_HLSQ_ACTIVE = 12, - PERF_RB_Z_READ = 13, - PERF_RB_Z_WRITE = 14, - PERF_RB_C_READ = 15, - PERF_RB_C_WRITE = 16, - PERF_RB_TOTAL_PASS = 17, - PERF_RB_Z_PASS = 18, - PERF_RB_Z_FAIL = 19, - PERF_RB_S_FAIL = 20, - PERF_RB_BLENDED_FXP_COMPONENTS = 21, - PERF_RB_BLENDED_FP16_COMPONENTS = 22, - RB_RESERVED = 23, - PERF_RB_2D_ALIVE_CYCLES = 24, - PERF_RB_2D_STALL_CYCLES_A2D = 25, - PERF_RB_2D_STARVE_CYCLES_SRC = 26, - PERF_RB_2D_STARVE_CYCLES_SP = 27, - PERF_RB_2D_STARVE_CYCLES_DST = 28, - PERF_RB_2D_VALID_PIXELS = 29, -}; - -enum a5xx_rb_samples_perfcounter_select { - TOTAL_SAMPLES = 0, - ZPASS_SAMPLES = 1, - ZFAIL_SAMPLES = 2, - SFAIL_SAMPLES = 3, -}; - -enum a5xx_vsc_perfcounter_select { - PERF_VSC_BUSY_CYCLES = 0, - PERF_VSC_WORKING_CYCLES = 1, - PERF_VSC_STALL_CYCLES_UCHE = 2, - PERF_VSC_EOT_NUM = 3, -}; - -enum a5xx_ccu_perfcounter_select { - PERF_CCU_BUSY_CYCLES = 0, - PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1, - PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2, - PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3, - PERF_CCU_DEPTH_BLOCKS = 4, - PERF_CCU_COLOR_BLOCKS = 5, - PERF_CCU_DEPTH_BLOCK_HIT = 6, - PERF_CCU_COLOR_BLOCK_HIT = 7, - PERF_CCU_PARTIAL_BLOCK_READ = 8, - PERF_CCU_GMEM_READ = 9, - PERF_CCU_GMEM_WRITE = 10, - PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11, - PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12, - PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13, - PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14, - PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15, - PERF_CCU_COLOR_READ_FLAG0_COUNT = 16, - PERF_CCU_COLOR_READ_FLAG1_COUNT = 17, - PERF_CCU_COLOR_READ_FLAG2_COUNT = 18, - PERF_CCU_COLOR_READ_FLAG3_COUNT = 19, - PERF_CCU_COLOR_READ_FLAG4_COUNT = 20, - PERF_CCU_2D_BUSY_CYCLES = 21, - PERF_CCU_2D_RD_REQ = 22, - PERF_CCU_2D_WR_REQ = 23, - PERF_CCU_2D_REORDER_STARVE_CYCLES = 24, - PERF_CCU_2D_PIXELS = 25, -}; - -enum a5xx_cmp_perfcounter_select { - PERF_CMPDECMP_STALL_CYCLES_VBIF = 0, - PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1, - PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2, - PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3, - PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4, - PERF_CMPDECMP_VBIF_READ_REQUEST = 5, - PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6, - PERF_CMPDECMP_VBIF_READ_DATA = 7, - PERF_CMPDECMP_VBIF_WRITE_DATA = 8, - PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9, - PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10, - PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11, - PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12, - PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13, - PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14, - PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 15, - PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 16, - PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 17, - PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 18, - PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 19, - PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 20, - PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 21, - PERF_CMPDECMP_2D_RD_DATA = 22, - PERF_CMPDECMP_2D_WR_DATA = 23, -}; - -enum a5xx_vbif_perfcounter_select { - AXI_READ_REQUESTS_ID_0 = 0, - AXI_READ_REQUESTS_ID_1 = 1, - AXI_READ_REQUESTS_ID_2 = 2, - AXI_READ_REQUESTS_ID_3 = 3, - AXI_READ_REQUESTS_ID_4 = 4, - AXI_READ_REQUESTS_ID_5 = 5, - AXI_READ_REQUESTS_ID_6 = 6, - AXI_READ_REQUESTS_ID_7 = 7, - AXI_READ_REQUESTS_ID_8 = 8, - AXI_READ_REQUESTS_ID_9 = 9, - AXI_READ_REQUESTS_ID_10 = 10, - AXI_READ_REQUESTS_ID_11 = 11, - AXI_READ_REQUESTS_ID_12 = 12, - AXI_READ_REQUESTS_ID_13 = 13, - AXI_READ_REQUESTS_ID_14 = 14, - AXI_READ_REQUESTS_ID_15 = 15, - AXI0_READ_REQUESTS_TOTAL = 16, - AXI1_READ_REQUESTS_TOTAL = 17, - AXI2_READ_REQUESTS_TOTAL = 18, - AXI3_READ_REQUESTS_TOTAL = 19, - AXI_READ_REQUESTS_TOTAL = 20, - AXI_WRITE_REQUESTS_ID_0 = 21, - AXI_WRITE_REQUESTS_ID_1 = 22, - AXI_WRITE_REQUESTS_ID_2 = 23, - AXI_WRITE_REQUESTS_ID_3 = 24, - AXI_WRITE_REQUESTS_ID_4 = 25, - AXI_WRITE_REQUESTS_ID_5 = 26, - AXI_WRITE_REQUESTS_ID_6 = 27, - AXI_WRITE_REQUESTS_ID_7 = 28, - AXI_WRITE_REQUESTS_ID_8 = 29, - AXI_WRITE_REQUESTS_ID_9 = 30, - AXI_WRITE_REQUESTS_ID_10 = 31, - AXI_WRITE_REQUESTS_ID_11 = 32, - AXI_WRITE_REQUESTS_ID_12 = 33, - AXI_WRITE_REQUESTS_ID_13 = 34, - AXI_WRITE_REQUESTS_ID_14 = 35, - AXI_WRITE_REQUESTS_ID_15 = 36, - AXI0_WRITE_REQUESTS_TOTAL = 37, - AXI1_WRITE_REQUESTS_TOTAL = 38, - AXI2_WRITE_REQUESTS_TOTAL = 39, - AXI3_WRITE_REQUESTS_TOTAL = 40, - AXI_WRITE_REQUESTS_TOTAL = 41, - AXI_TOTAL_REQUESTS = 42, - AXI_READ_DATA_BEATS_ID_0 = 43, - AXI_READ_DATA_BEATS_ID_1 = 44, - AXI_READ_DATA_BEATS_ID_2 = 45, - AXI_READ_DATA_BEATS_ID_3 = 46, - AXI_READ_DATA_BEATS_ID_4 = 47, - AXI_READ_DATA_BEATS_ID_5 = 48, - AXI_READ_DATA_BEATS_ID_6 = 49, - AXI_READ_DATA_BEATS_ID_7 = 50, - AXI_READ_DATA_BEATS_ID_8 = 51, - AXI_READ_DATA_BEATS_ID_9 = 52, - AXI_READ_DATA_BEATS_ID_10 = 53, - AXI_READ_DATA_BEATS_ID_11 = 54, - AXI_READ_DATA_BEATS_ID_12 = 55, - AXI_READ_DATA_BEATS_ID_13 = 56, - AXI_READ_DATA_BEATS_ID_14 = 57, - AXI_READ_DATA_BEATS_ID_15 = 58, - AXI0_READ_DATA_BEATS_TOTAL = 59, - AXI1_READ_DATA_BEATS_TOTAL = 60, - AXI2_READ_DATA_BEATS_TOTAL = 61, - AXI3_READ_DATA_BEATS_TOTAL = 62, - AXI_READ_DATA_BEATS_TOTAL = 63, - AXI_WRITE_DATA_BEATS_ID_0 = 64, - AXI_WRITE_DATA_BEATS_ID_1 = 65, - AXI_WRITE_DATA_BEATS_ID_2 = 66, - AXI_WRITE_DATA_BEATS_ID_3 = 67, - AXI_WRITE_DATA_BEATS_ID_4 = 68, - AXI_WRITE_DATA_BEATS_ID_5 = 69, - AXI_WRITE_DATA_BEATS_ID_6 = 70, - AXI_WRITE_DATA_BEATS_ID_7 = 71, - AXI_WRITE_DATA_BEATS_ID_8 = 72, - AXI_WRITE_DATA_BEATS_ID_9 = 73, - AXI_WRITE_DATA_BEATS_ID_10 = 74, - AXI_WRITE_DATA_BEATS_ID_11 = 75, - AXI_WRITE_DATA_BEATS_ID_12 = 76, - AXI_WRITE_DATA_BEATS_ID_13 = 77, - AXI_WRITE_DATA_BEATS_ID_14 = 78, - AXI_WRITE_DATA_BEATS_ID_15 = 79, - AXI0_WRITE_DATA_BEATS_TOTAL = 80, - AXI1_WRITE_DATA_BEATS_TOTAL = 81, - AXI2_WRITE_DATA_BEATS_TOTAL = 82, - AXI3_WRITE_DATA_BEATS_TOTAL = 83, - AXI_WRITE_DATA_BEATS_TOTAL = 84, - AXI_DATA_BEATS_TOTAL = 85, -}; - -enum a5xx_tex_filter { - A5XX_TEX_NEAREST = 0, - A5XX_TEX_LINEAR = 1, - A5XX_TEX_ANISO = 2, -}; - -enum a5xx_tex_clamp { - A5XX_TEX_REPEAT = 0, - A5XX_TEX_CLAMP_TO_EDGE = 1, - A5XX_TEX_MIRROR_REPEAT = 2, - A5XX_TEX_CLAMP_TO_BORDER = 3, - A5XX_TEX_MIRROR_CLAMP = 4, -}; - -enum a5xx_tex_aniso { - A5XX_TEX_ANISO_1 = 0, - A5XX_TEX_ANISO_2 = 1, - A5XX_TEX_ANISO_4 = 2, - A5XX_TEX_ANISO_8 = 3, - A5XX_TEX_ANISO_16 = 4, -}; - -enum a5xx_tex_swiz { - A5XX_TEX_X = 0, - A5XX_TEX_Y = 1, - A5XX_TEX_Z = 2, - A5XX_TEX_W = 3, - A5XX_TEX_ZERO = 4, - A5XX_TEX_ONE = 5, -}; - -enum a5xx_tex_type { - A5XX_TEX_1D = 0, - A5XX_TEX_2D = 1, - A5XX_TEX_CUBE = 2, - A5XX_TEX_3D = 3, -}; - -#define A5XX_INT0_RBBM_GPU_IDLE 0x00000001 -#define A5XX_INT0_RBBM_AHB_ERROR 0x00000002 -#define A5XX_INT0_RBBM_TRANSFER_TIMEOUT 0x00000004 -#define A5XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 -#define A5XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 -#define A5XX_INT0_RBBM_ETS_MS_TIMEOUT 0x00000020 -#define A5XX_INT0_RBBM_ATB_ASYNC_OVERFLOW 0x00000040 -#define A5XX_INT0_RBBM_GPC_ERROR 0x00000080 -#define A5XX_INT0_CP_SW 0x00000100 -#define A5XX_INT0_CP_HW_ERROR 0x00000200 -#define A5XX_INT0_CP_CCU_FLUSH_DEPTH_TS 0x00000400 -#define A5XX_INT0_CP_CCU_FLUSH_COLOR_TS 0x00000800 -#define A5XX_INT0_CP_CCU_RESOLVE_TS 0x00001000 -#define A5XX_INT0_CP_IB2 0x00002000 -#define A5XX_INT0_CP_IB1 0x00004000 -#define A5XX_INT0_CP_RB 0x00008000 -#define A5XX_INT0_CP_UNUSED_1 0x00010000 -#define A5XX_INT0_CP_RB_DONE_TS 0x00020000 -#define A5XX_INT0_CP_WT_DONE_TS 0x00040000 -#define A5XX_INT0_UNKNOWN_1 0x00080000 -#define A5XX_INT0_CP_CACHE_FLUSH_TS 0x00100000 -#define A5XX_INT0_UNUSED_2 0x00200000 -#define A5XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00400000 -#define A5XX_INT0_MISC_HANG_DETECT 0x00800000 -#define A5XX_INT0_UCHE_OOB_ACCESS 0x01000000 -#define A5XX_INT0_UCHE_TRAP_INTR 0x02000000 -#define A5XX_INT0_DEBBUS_INTR_0 0x04000000 -#define A5XX_INT0_DEBBUS_INTR_1 0x08000000 -#define A5XX_INT0_GPMU_VOLTAGE_DROOP 0x10000000 -#define A5XX_INT0_GPMU_FIRMWARE 0x20000000 -#define A5XX_INT0_ISDB_CPU_IRQ 0x40000000 -#define A5XX_INT0_ISDB_UNDER_DEBUG 0x80000000 -#define A5XX_CP_INT_CP_OPCODE_ERROR 0x00000001 -#define A5XX_CP_INT_CP_RESERVED_BIT_ERROR 0x00000002 -#define A5XX_CP_INT_CP_HW_FAULT_ERROR 0x00000004 -#define A5XX_CP_INT_CP_DMA_ERROR 0x00000008 -#define A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR 0x00000010 -#define A5XX_CP_INT_CP_AHB_ERROR 0x00000020 -#define REG_A5XX_CP_RB_BASE 0x00000800 - -#define REG_A5XX_CP_RB_BASE_HI 0x00000801 - -#define REG_A5XX_CP_RB_CNTL 0x00000802 - -#define REG_A5XX_CP_RB_RPTR_ADDR 0x00000804 - -#define REG_A5XX_CP_RB_RPTR_ADDR_HI 0x00000805 - -#define REG_A5XX_CP_RB_RPTR 0x00000806 - -#define REG_A5XX_CP_RB_WPTR 0x00000807 - -#define REG_A5XX_CP_PFP_STAT_ADDR 0x00000808 - -#define REG_A5XX_CP_PFP_STAT_DATA 0x00000809 - -#define REG_A5XX_CP_DRAW_STATE_ADDR 0x0000080b - -#define REG_A5XX_CP_DRAW_STATE_DATA 0x0000080c - -#define REG_A5XX_CP_ME_NRT_ADDR_LO 0x0000080d - -#define REG_A5XX_CP_ME_NRT_ADDR_HI 0x0000080e - -#define REG_A5XX_CP_ME_NRT_DATA 0x00000810 - -#define REG_A5XX_CP_CRASH_SCRIPT_BASE_LO 0x00000817 - -#define REG_A5XX_CP_CRASH_SCRIPT_BASE_HI 0x00000818 - -#define REG_A5XX_CP_CRASH_DUMP_CNTL 0x00000819 - -#define REG_A5XX_CP_ME_STAT_ADDR 0x0000081a - -#define REG_A5XX_CP_ROQ_THRESHOLDS_1 0x0000081f - -#define REG_A5XX_CP_ROQ_THRESHOLDS_2 0x00000820 - -#define REG_A5XX_CP_ROQ_DBG_ADDR 0x00000821 - -#define REG_A5XX_CP_ROQ_DBG_DATA 0x00000822 - -#define REG_A5XX_CP_MEQ_DBG_ADDR 0x00000823 - -#define REG_A5XX_CP_MEQ_DBG_DATA 0x00000824 - -#define REG_A5XX_CP_MEQ_THRESHOLDS 0x00000825 - -#define REG_A5XX_CP_MERCIU_SIZE 0x00000826 - -#define REG_A5XX_CP_MERCIU_DBG_ADDR 0x00000827 - -#define REG_A5XX_CP_MERCIU_DBG_DATA_1 0x00000828 - -#define REG_A5XX_CP_MERCIU_DBG_DATA_2 0x00000829 - -#define REG_A5XX_CP_PFP_UCODE_DBG_ADDR 0x0000082a - -#define REG_A5XX_CP_PFP_UCODE_DBG_DATA 0x0000082b - -#define REG_A5XX_CP_ME_UCODE_DBG_ADDR 0x0000082f - -#define REG_A5XX_CP_ME_UCODE_DBG_DATA 0x00000830 - -#define REG_A5XX_CP_CNTL 0x00000831 - -#define REG_A5XX_CP_PFP_ME_CNTL 0x00000832 - -#define REG_A5XX_CP_CHICKEN_DBG 0x00000833 - -#define REG_A5XX_CP_PFP_INSTR_BASE_LO 0x00000835 - -#define REG_A5XX_CP_PFP_INSTR_BASE_HI 0x00000836 - -#define REG_A5XX_CP_ME_INSTR_BASE_LO 0x00000838 - -#define REG_A5XX_CP_ME_INSTR_BASE_HI 0x00000839 - -#define REG_A5XX_CP_CONTEXT_SWITCH_CNTL 0x0000083b - -#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO 0x0000083c - -#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI 0x0000083d - -#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO 0x0000083e - -#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI 0x0000083f - -#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x00000840 - -#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x00000841 - -#define REG_A5XX_CP_ADDR_MODE_CNTL 0x00000860 - -#define REG_A5XX_CP_ME_STAT_DATA 0x00000b14 - -#define REG_A5XX_CP_WFI_PEND_CTR 0x00000b15 - -#define REG_A5XX_CP_INTERRUPT_STATUS 0x00000b18 - -#define REG_A5XX_CP_HW_FAULT 0x00000b1a - -#define REG_A5XX_CP_PROTECT_STATUS 0x00000b1c - -#define REG_A5XX_CP_IB1_BASE 0x00000b1f - -#define REG_A5XX_CP_IB1_BASE_HI 0x00000b20 - -#define REG_A5XX_CP_IB1_BUFSZ 0x00000b21 - -#define REG_A5XX_CP_IB2_BASE 0x00000b22 - -#define REG_A5XX_CP_IB2_BASE_HI 0x00000b23 - -#define REG_A5XX_CP_IB2_BUFSZ 0x00000b24 - -static inline uint32_t REG_A5XX_CP_SCRATCH(uint32_t i0) { return 0x00000b78 + 0x1*i0; } - -static inline uint32_t REG_A5XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000b78 + 0x1*i0; } - -static inline uint32_t REG_A5XX_CP_PROTECT(uint32_t i0) { return 0x00000880 + 0x1*i0; } - -static inline uint32_t REG_A5XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000880 + 0x1*i0; } -#define A5XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0001ffff -#define A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 -static inline uint32_t A5XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) -{ - return ((val) << A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A5XX_CP_PROTECT_REG_BASE_ADDR__MASK; -} -#define A5XX_CP_PROTECT_REG_MASK_LEN__MASK 0x1f000000 -#define A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT 24 -static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) -{ - return ((val) << A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A5XX_CP_PROTECT_REG_MASK_LEN__MASK; -} -#define A5XX_CP_PROTECT_REG_TRAP_WRITE 0x20000000 -#define A5XX_CP_PROTECT_REG_TRAP_READ 0x40000000 - -#define REG_A5XX_CP_PROTECT_CNTL 0x000008a0 - -#define REG_A5XX_CP_AHB_FAULT 0x00000b1b - -#define REG_A5XX_CP_PERFCTR_CP_SEL_0 0x00000bb0 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_1 0x00000bb1 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_2 0x00000bb2 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_3 0x00000bb3 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_4 0x00000bb4 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_5 0x00000bb5 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_6 0x00000bb6 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_7 0x00000bb7 - -#define REG_A5XX_VSC_ADDR_MODE_CNTL 0x00000bc1 - -#define REG_A5XX_CP_POWERCTR_CP_SEL_0 0x00000bba - -#define REG_A5XX_CP_POWERCTR_CP_SEL_1 0x00000bbb - -#define REG_A5XX_CP_POWERCTR_CP_SEL_2 0x00000bbc - -#define REG_A5XX_CP_POWERCTR_CP_SEL_3 0x00000bbd - -#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_A 0x00000004 - -#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_B 0x00000005 - -#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_C 0x00000006 - -#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_D 0x00000007 - -#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLT 0x00000008 - -#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLM 0x00000009 - -#define REG_A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT 0x00000018 - -#define REG_A5XX_RBBM_CFG_DBGBUS_OPL 0x0000000a - -#define REG_A5XX_RBBM_CFG_DBGBUS_OPE 0x0000000b - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_0 0x0000000c - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_1 0x0000000d - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_2 0x0000000e - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_3 0x0000000f - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_0 0x00000010 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_1 0x00000011 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_2 0x00000012 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_3 0x00000013 - -#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_0 0x00000014 - -#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_1 0x00000015 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_0 0x00000016 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_1 0x00000017 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_2 0x00000018 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_3 0x00000019 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_0 0x0000001a - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_1 0x0000001b - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_2 0x0000001c - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_3 0x0000001d - -#define REG_A5XX_RBBM_CFG_DBGBUS_NIBBLEE 0x0000001e - -#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC0 0x0000001f - -#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC1 0x00000020 - -#define REG_A5XX_RBBM_CFG_DBGBUS_LOADREG 0x00000021 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IDX 0x00000022 - -#define REG_A5XX_RBBM_CFG_DBGBUS_CLRC 0x00000023 - -#define REG_A5XX_RBBM_CFG_DBGBUS_LOADIVT 0x00000024 - -#define REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0000002f - -#define REG_A5XX_RBBM_INT_CLEAR_CMD 0x00000037 - -#define REG_A5XX_RBBM_INT_0_MASK 0x00000038 -#define A5XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE 0x00000001 -#define A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR 0x00000002 -#define A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT 0x00000004 -#define A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT 0x00000008 -#define A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT 0x00000010 -#define A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT 0x00000020 -#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW 0x00000040 -#define A5XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR 0x00000080 -#define A5XX_RBBM_INT_0_MASK_CP_SW 0x00000100 -#define A5XX_RBBM_INT_0_MASK_CP_HW_ERROR 0x00000200 -#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS 0x00000400 -#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS 0x00000800 -#define A5XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS 0x00001000 -#define A5XX_RBBM_INT_0_MASK_CP_IB2 0x00002000 -#define A5XX_RBBM_INT_0_MASK_CP_IB1 0x00004000 -#define A5XX_RBBM_INT_0_MASK_CP_RB 0x00008000 -#define A5XX_RBBM_INT_0_MASK_CP_RB_DONE_TS 0x00020000 -#define A5XX_RBBM_INT_0_MASK_CP_WT_DONE_TS 0x00040000 -#define A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS 0x00100000 -#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW 0x00400000 -#define A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT 0x00800000 -#define A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS 0x01000000 -#define A5XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR 0x02000000 -#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_0 0x04000000 -#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_1 0x08000000 -#define A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP 0x10000000 -#define A5XX_RBBM_INT_0_MASK_GPMU_FIRMWARE 0x20000000 -#define A5XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ 0x40000000 -#define A5XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG 0x80000000 - -#define REG_A5XX_RBBM_AHB_DBG_CNTL 0x0000003f - -#define REG_A5XX_RBBM_EXT_VBIF_DBG_CNTL 0x00000041 - -#define REG_A5XX_RBBM_SW_RESET_CMD 0x00000043 - -#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD 0x00000045 - -#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD2 0x00000046 - -#define REG_A5XX_RBBM_DBG_LO_HI_GPIO 0x00000048 - -#define REG_A5XX_RBBM_EXT_TRACE_BUS_CNTL 0x00000049 - -#define REG_A5XX_RBBM_CLOCK_CNTL_TP0 0x0000004a - -#define REG_A5XX_RBBM_CLOCK_CNTL_TP1 0x0000004b - -#define REG_A5XX_RBBM_CLOCK_CNTL_TP2 0x0000004c - -#define REG_A5XX_RBBM_CLOCK_CNTL_TP3 0x0000004d - -#define REG_A5XX_RBBM_CLOCK_CNTL2_TP0 0x0000004e - -#define REG_A5XX_RBBM_CLOCK_CNTL2_TP1 0x0000004f - -#define REG_A5XX_RBBM_CLOCK_CNTL2_TP2 0x00000050 - -#define REG_A5XX_RBBM_CLOCK_CNTL2_TP3 0x00000051 - -#define REG_A5XX_RBBM_CLOCK_CNTL3_TP0 0x00000052 - -#define REG_A5XX_RBBM_CLOCK_CNTL3_TP1 0x00000053 - -#define REG_A5XX_RBBM_CLOCK_CNTL3_TP2 0x00000054 - -#define REG_A5XX_RBBM_CLOCK_CNTL3_TP3 0x00000055 - -#define REG_A5XX_RBBM_READ_AHB_THROUGH_DBG 0x00000059 - -#define REG_A5XX_RBBM_CLOCK_CNTL_UCHE 0x0000005a - -#define REG_A5XX_RBBM_CLOCK_CNTL2_UCHE 0x0000005b - -#define REG_A5XX_RBBM_CLOCK_CNTL3_UCHE 0x0000005c - -#define REG_A5XX_RBBM_CLOCK_CNTL4_UCHE 0x0000005d - -#define REG_A5XX_RBBM_CLOCK_HYST_UCHE 0x0000005e - -#define REG_A5XX_RBBM_CLOCK_DELAY_UCHE 0x0000005f - -#define REG_A5XX_RBBM_CLOCK_MODE_GPC 0x00000060 - -#define REG_A5XX_RBBM_CLOCK_DELAY_GPC 0x00000061 - -#define REG_A5XX_RBBM_CLOCK_HYST_GPC 0x00000062 - -#define REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00000063 - -#define REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x00000064 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00000065 - -#define REG_A5XX_RBBM_CLOCK_DELAY_HLSQ 0x00000066 - -#define REG_A5XX_RBBM_CLOCK_CNTL 0x00000067 - -#define REG_A5XX_RBBM_CLOCK_CNTL_SP0 0x00000068 - -#define REG_A5XX_RBBM_CLOCK_CNTL_SP1 0x00000069 - -#define REG_A5XX_RBBM_CLOCK_CNTL_SP2 0x0000006a - -#define REG_A5XX_RBBM_CLOCK_CNTL_SP3 0x0000006b - -#define REG_A5XX_RBBM_CLOCK_CNTL2_SP0 0x0000006c - -#define REG_A5XX_RBBM_CLOCK_CNTL2_SP1 0x0000006d - -#define REG_A5XX_RBBM_CLOCK_CNTL2_SP2 0x0000006e - -#define REG_A5XX_RBBM_CLOCK_CNTL2_SP3 0x0000006f - -#define REG_A5XX_RBBM_CLOCK_HYST_SP0 0x00000070 - -#define REG_A5XX_RBBM_CLOCK_HYST_SP1 0x00000071 - -#define REG_A5XX_RBBM_CLOCK_HYST_SP2 0x00000072 - -#define REG_A5XX_RBBM_CLOCK_HYST_SP3 0x00000073 - -#define REG_A5XX_RBBM_CLOCK_DELAY_SP0 0x00000074 - -#define REG_A5XX_RBBM_CLOCK_DELAY_SP1 0x00000075 - -#define REG_A5XX_RBBM_CLOCK_DELAY_SP2 0x00000076 - -#define REG_A5XX_RBBM_CLOCK_DELAY_SP3 0x00000077 - -#define REG_A5XX_RBBM_CLOCK_CNTL_RB0 0x00000078 - -#define REG_A5XX_RBBM_CLOCK_CNTL_RB1 0x00000079 - -#define REG_A5XX_RBBM_CLOCK_CNTL_RB2 0x0000007a - -#define REG_A5XX_RBBM_CLOCK_CNTL_RB3 0x0000007b - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RB0 0x0000007c - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RB1 0x0000007d - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RB2 0x0000007e - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RB3 0x0000007f - -#define REG_A5XX_RBBM_CLOCK_HYST_RAC 0x00000080 - -#define REG_A5XX_RBBM_CLOCK_DELAY_RAC 0x00000081 - -#define REG_A5XX_RBBM_CLOCK_CNTL_CCU0 0x00000082 - -#define REG_A5XX_RBBM_CLOCK_CNTL_CCU1 0x00000083 - -#define REG_A5XX_RBBM_CLOCK_CNTL_CCU2 0x00000084 - -#define REG_A5XX_RBBM_CLOCK_CNTL_CCU3 0x00000085 - -#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0 0x00000086 - -#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1 0x00000087 - -#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2 0x00000088 - -#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3 0x00000089 - -#define REG_A5XX_RBBM_CLOCK_CNTL_RAC 0x0000008a - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RAC 0x0000008b - -#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0 0x0000008c - -#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1 0x0000008d - -#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2 0x0000008e - -#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3 0x0000008f - -#define REG_A5XX_RBBM_CLOCK_HYST_VFD 0x00000090 - -#define REG_A5XX_RBBM_CLOCK_MODE_VFD 0x00000091 - -#define REG_A5XX_RBBM_CLOCK_DELAY_VFD 0x00000092 - -#define REG_A5XX_RBBM_AHB_CNTL0 0x00000093 - -#define REG_A5XX_RBBM_AHB_CNTL1 0x00000094 - -#define REG_A5XX_RBBM_AHB_CNTL2 0x00000095 - -#define REG_A5XX_RBBM_AHB_CMD 0x00000096 - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11 0x0000009c - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12 0x0000009d - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13 0x0000009e - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14 0x0000009f - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15 0x000000a0 - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16 0x000000a1 - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17 0x000000a2 - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18 0x000000a3 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TP0 0x000000a4 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TP1 0x000000a5 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TP2 0x000000a6 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TP3 0x000000a7 - -#define REG_A5XX_RBBM_CLOCK_DELAY2_TP0 0x000000a8 - -#define REG_A5XX_RBBM_CLOCK_DELAY2_TP1 0x000000a9 - -#define REG_A5XX_RBBM_CLOCK_DELAY2_TP2 0x000000aa - -#define REG_A5XX_RBBM_CLOCK_DELAY2_TP3 0x000000ab - -#define REG_A5XX_RBBM_CLOCK_DELAY3_TP0 0x000000ac - -#define REG_A5XX_RBBM_CLOCK_DELAY3_TP1 0x000000ad - -#define REG_A5XX_RBBM_CLOCK_DELAY3_TP2 0x000000ae - -#define REG_A5XX_RBBM_CLOCK_DELAY3_TP3 0x000000af - -#define REG_A5XX_RBBM_CLOCK_HYST_TP0 0x000000b0 - -#define REG_A5XX_RBBM_CLOCK_HYST_TP1 0x000000b1 - -#define REG_A5XX_RBBM_CLOCK_HYST_TP2 0x000000b2 - -#define REG_A5XX_RBBM_CLOCK_HYST_TP3 0x000000b3 - -#define REG_A5XX_RBBM_CLOCK_HYST2_TP0 0x000000b4 - -#define REG_A5XX_RBBM_CLOCK_HYST2_TP1 0x000000b5 - -#define REG_A5XX_RBBM_CLOCK_HYST2_TP2 0x000000b6 - -#define REG_A5XX_RBBM_CLOCK_HYST2_TP3 0x000000b7 - -#define REG_A5XX_RBBM_CLOCK_HYST3_TP0 0x000000b8 - -#define REG_A5XX_RBBM_CLOCK_HYST3_TP1 0x000000b9 - -#define REG_A5XX_RBBM_CLOCK_HYST3_TP2 0x000000ba - -#define REG_A5XX_RBBM_CLOCK_HYST3_TP3 0x000000bb - -#define REG_A5XX_RBBM_CLOCK_CNTL_GPMU 0x000000c8 - -#define REG_A5XX_RBBM_CLOCK_DELAY_GPMU 0x000000c9 - -#define REG_A5XX_RBBM_CLOCK_HYST_GPMU 0x000000ca - -#define REG_A5XX_RBBM_PERFCTR_CP_0_LO 0x000003a0 - -#define REG_A5XX_RBBM_PERFCTR_CP_0_HI 0x000003a1 - -#define REG_A5XX_RBBM_PERFCTR_CP_1_LO 0x000003a2 - -#define REG_A5XX_RBBM_PERFCTR_CP_1_HI 0x000003a3 - -#define REG_A5XX_RBBM_PERFCTR_CP_2_LO 0x000003a4 - -#define REG_A5XX_RBBM_PERFCTR_CP_2_HI 0x000003a5 - -#define REG_A5XX_RBBM_PERFCTR_CP_3_LO 0x000003a6 - -#define REG_A5XX_RBBM_PERFCTR_CP_3_HI 0x000003a7 - -#define REG_A5XX_RBBM_PERFCTR_CP_4_LO 0x000003a8 - -#define REG_A5XX_RBBM_PERFCTR_CP_4_HI 0x000003a9 - -#define REG_A5XX_RBBM_PERFCTR_CP_5_LO 0x000003aa - -#define REG_A5XX_RBBM_PERFCTR_CP_5_HI 0x000003ab - -#define REG_A5XX_RBBM_PERFCTR_CP_6_LO 0x000003ac - -#define REG_A5XX_RBBM_PERFCTR_CP_6_HI 0x000003ad - -#define REG_A5XX_RBBM_PERFCTR_CP_7_LO 0x000003ae - -#define REG_A5XX_RBBM_PERFCTR_CP_7_HI 0x000003af - -#define REG_A5XX_RBBM_PERFCTR_RBBM_0_LO 0x000003b0 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_0_HI 0x000003b1 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_1_LO 0x000003b2 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_1_HI 0x000003b3 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_2_LO 0x000003b4 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_2_HI 0x000003b5 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_3_LO 0x000003b6 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_3_HI 0x000003b7 - -#define REG_A5XX_RBBM_PERFCTR_PC_0_LO 0x000003b8 - -#define REG_A5XX_RBBM_PERFCTR_PC_0_HI 0x000003b9 - -#define REG_A5XX_RBBM_PERFCTR_PC_1_LO 0x000003ba - -#define REG_A5XX_RBBM_PERFCTR_PC_1_HI 0x000003bb - -#define REG_A5XX_RBBM_PERFCTR_PC_2_LO 0x000003bc - -#define REG_A5XX_RBBM_PERFCTR_PC_2_HI 0x000003bd - -#define REG_A5XX_RBBM_PERFCTR_PC_3_LO 0x000003be - -#define REG_A5XX_RBBM_PERFCTR_PC_3_HI 0x000003bf - -#define REG_A5XX_RBBM_PERFCTR_PC_4_LO 0x000003c0 - -#define REG_A5XX_RBBM_PERFCTR_PC_4_HI 0x000003c1 - -#define REG_A5XX_RBBM_PERFCTR_PC_5_LO 0x000003c2 - -#define REG_A5XX_RBBM_PERFCTR_PC_5_HI 0x000003c3 - -#define REG_A5XX_RBBM_PERFCTR_PC_6_LO 0x000003c4 - -#define REG_A5XX_RBBM_PERFCTR_PC_6_HI 0x000003c5 - -#define REG_A5XX_RBBM_PERFCTR_PC_7_LO 0x000003c6 - -#define REG_A5XX_RBBM_PERFCTR_PC_7_HI 0x000003c7 - -#define REG_A5XX_RBBM_PERFCTR_VFD_0_LO 0x000003c8 - -#define REG_A5XX_RBBM_PERFCTR_VFD_0_HI 0x000003c9 - -#define REG_A5XX_RBBM_PERFCTR_VFD_1_LO 0x000003ca - -#define REG_A5XX_RBBM_PERFCTR_VFD_1_HI 0x000003cb - -#define REG_A5XX_RBBM_PERFCTR_VFD_2_LO 0x000003cc - -#define REG_A5XX_RBBM_PERFCTR_VFD_2_HI 0x000003cd - -#define REG_A5XX_RBBM_PERFCTR_VFD_3_LO 0x000003ce - -#define REG_A5XX_RBBM_PERFCTR_VFD_3_HI 0x000003cf - -#define REG_A5XX_RBBM_PERFCTR_VFD_4_LO 0x000003d0 - -#define REG_A5XX_RBBM_PERFCTR_VFD_4_HI 0x000003d1 - -#define REG_A5XX_RBBM_PERFCTR_VFD_5_LO 0x000003d2 - -#define REG_A5XX_RBBM_PERFCTR_VFD_5_HI 0x000003d3 - -#define REG_A5XX_RBBM_PERFCTR_VFD_6_LO 0x000003d4 - -#define REG_A5XX_RBBM_PERFCTR_VFD_6_HI 0x000003d5 - -#define REG_A5XX_RBBM_PERFCTR_VFD_7_LO 0x000003d6 - -#define REG_A5XX_RBBM_PERFCTR_VFD_7_HI 0x000003d7 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_LO 0x000003d8 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_HI 0x000003d9 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_LO 0x000003da - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_HI 0x000003db - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_LO 0x000003dc - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_HI 0x000003dd - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_LO 0x000003de - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_HI 0x000003df - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_LO 0x000003e0 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_HI 0x000003e1 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_LO 0x000003e2 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_HI 0x000003e3 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_LO 0x000003e4 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_HI 0x000003e5 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_LO 0x000003e6 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_HI 0x000003e7 - -#define REG_A5XX_RBBM_PERFCTR_VPC_0_LO 0x000003e8 - -#define REG_A5XX_RBBM_PERFCTR_VPC_0_HI 0x000003e9 - -#define REG_A5XX_RBBM_PERFCTR_VPC_1_LO 0x000003ea - -#define REG_A5XX_RBBM_PERFCTR_VPC_1_HI 0x000003eb - -#define REG_A5XX_RBBM_PERFCTR_VPC_2_LO 0x000003ec - -#define REG_A5XX_RBBM_PERFCTR_VPC_2_HI 0x000003ed - -#define REG_A5XX_RBBM_PERFCTR_VPC_3_LO 0x000003ee - -#define REG_A5XX_RBBM_PERFCTR_VPC_3_HI 0x000003ef - -#define REG_A5XX_RBBM_PERFCTR_CCU_0_LO 0x000003f0 - -#define REG_A5XX_RBBM_PERFCTR_CCU_0_HI 0x000003f1 - -#define REG_A5XX_RBBM_PERFCTR_CCU_1_LO 0x000003f2 - -#define REG_A5XX_RBBM_PERFCTR_CCU_1_HI 0x000003f3 - -#define REG_A5XX_RBBM_PERFCTR_CCU_2_LO 0x000003f4 - -#define REG_A5XX_RBBM_PERFCTR_CCU_2_HI 0x000003f5 - -#define REG_A5XX_RBBM_PERFCTR_CCU_3_LO 0x000003f6 - -#define REG_A5XX_RBBM_PERFCTR_CCU_3_HI 0x000003f7 - -#define REG_A5XX_RBBM_PERFCTR_TSE_0_LO 0x000003f8 - -#define REG_A5XX_RBBM_PERFCTR_TSE_0_HI 0x000003f9 - -#define REG_A5XX_RBBM_PERFCTR_TSE_1_LO 0x000003fa - -#define REG_A5XX_RBBM_PERFCTR_TSE_1_HI 0x000003fb - -#define REG_A5XX_RBBM_PERFCTR_TSE_2_LO 0x000003fc - -#define REG_A5XX_RBBM_PERFCTR_TSE_2_HI 0x000003fd - -#define REG_A5XX_RBBM_PERFCTR_TSE_3_LO 0x000003fe - -#define REG_A5XX_RBBM_PERFCTR_TSE_3_HI 0x000003ff - -#define REG_A5XX_RBBM_PERFCTR_RAS_0_LO 0x00000400 - -#define REG_A5XX_RBBM_PERFCTR_RAS_0_HI 0x00000401 - -#define REG_A5XX_RBBM_PERFCTR_RAS_1_LO 0x00000402 - -#define REG_A5XX_RBBM_PERFCTR_RAS_1_HI 0x00000403 - -#define REG_A5XX_RBBM_PERFCTR_RAS_2_LO 0x00000404 - -#define REG_A5XX_RBBM_PERFCTR_RAS_2_HI 0x00000405 - -#define REG_A5XX_RBBM_PERFCTR_RAS_3_LO 0x00000406 - -#define REG_A5XX_RBBM_PERFCTR_RAS_3_HI 0x00000407 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_0_LO 0x00000408 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_0_HI 0x00000409 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_1_LO 0x0000040a - -#define REG_A5XX_RBBM_PERFCTR_UCHE_1_HI 0x0000040b - -#define REG_A5XX_RBBM_PERFCTR_UCHE_2_LO 0x0000040c - -#define REG_A5XX_RBBM_PERFCTR_UCHE_2_HI 0x0000040d - -#define REG_A5XX_RBBM_PERFCTR_UCHE_3_LO 0x0000040e - -#define REG_A5XX_RBBM_PERFCTR_UCHE_3_HI 0x0000040f - -#define REG_A5XX_RBBM_PERFCTR_UCHE_4_LO 0x00000410 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_4_HI 0x00000411 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_5_LO 0x00000412 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_5_HI 0x00000413 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_6_LO 0x00000414 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_6_HI 0x00000415 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_7_LO 0x00000416 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_7_HI 0x00000417 - -#define REG_A5XX_RBBM_PERFCTR_TP_0_LO 0x00000418 - -#define REG_A5XX_RBBM_PERFCTR_TP_0_HI 0x00000419 - -#define REG_A5XX_RBBM_PERFCTR_TP_1_LO 0x0000041a - -#define REG_A5XX_RBBM_PERFCTR_TP_1_HI 0x0000041b - -#define REG_A5XX_RBBM_PERFCTR_TP_2_LO 0x0000041c - -#define REG_A5XX_RBBM_PERFCTR_TP_2_HI 0x0000041d - -#define REG_A5XX_RBBM_PERFCTR_TP_3_LO 0x0000041e - -#define REG_A5XX_RBBM_PERFCTR_TP_3_HI 0x0000041f - -#define REG_A5XX_RBBM_PERFCTR_TP_4_LO 0x00000420 - -#define REG_A5XX_RBBM_PERFCTR_TP_4_HI 0x00000421 - -#define REG_A5XX_RBBM_PERFCTR_TP_5_LO 0x00000422 - -#define REG_A5XX_RBBM_PERFCTR_TP_5_HI 0x00000423 - -#define REG_A5XX_RBBM_PERFCTR_TP_6_LO 0x00000424 - -#define REG_A5XX_RBBM_PERFCTR_TP_6_HI 0x00000425 - -#define REG_A5XX_RBBM_PERFCTR_TP_7_LO 0x00000426 - -#define REG_A5XX_RBBM_PERFCTR_TP_7_HI 0x00000427 - -#define REG_A5XX_RBBM_PERFCTR_SP_0_LO 0x00000428 - -#define REG_A5XX_RBBM_PERFCTR_SP_0_HI 0x00000429 - -#define REG_A5XX_RBBM_PERFCTR_SP_1_LO 0x0000042a - -#define REG_A5XX_RBBM_PERFCTR_SP_1_HI 0x0000042b - -#define REG_A5XX_RBBM_PERFCTR_SP_2_LO 0x0000042c - -#define REG_A5XX_RBBM_PERFCTR_SP_2_HI 0x0000042d - -#define REG_A5XX_RBBM_PERFCTR_SP_3_LO 0x0000042e - -#define REG_A5XX_RBBM_PERFCTR_SP_3_HI 0x0000042f - -#define REG_A5XX_RBBM_PERFCTR_SP_4_LO 0x00000430 - -#define REG_A5XX_RBBM_PERFCTR_SP_4_HI 0x00000431 - -#define REG_A5XX_RBBM_PERFCTR_SP_5_LO 0x00000432 - -#define REG_A5XX_RBBM_PERFCTR_SP_5_HI 0x00000433 - -#define REG_A5XX_RBBM_PERFCTR_SP_6_LO 0x00000434 - -#define REG_A5XX_RBBM_PERFCTR_SP_6_HI 0x00000435 - -#define REG_A5XX_RBBM_PERFCTR_SP_7_LO 0x00000436 - -#define REG_A5XX_RBBM_PERFCTR_SP_7_HI 0x00000437 - -#define REG_A5XX_RBBM_PERFCTR_SP_8_LO 0x00000438 - -#define REG_A5XX_RBBM_PERFCTR_SP_8_HI 0x00000439 - -#define REG_A5XX_RBBM_PERFCTR_SP_9_LO 0x0000043a - -#define REG_A5XX_RBBM_PERFCTR_SP_9_HI 0x0000043b - -#define REG_A5XX_RBBM_PERFCTR_SP_10_LO 0x0000043c - -#define REG_A5XX_RBBM_PERFCTR_SP_10_HI 0x0000043d - -#define REG_A5XX_RBBM_PERFCTR_SP_11_LO 0x0000043e - -#define REG_A5XX_RBBM_PERFCTR_SP_11_HI 0x0000043f - -#define REG_A5XX_RBBM_PERFCTR_RB_0_LO 0x00000440 - -#define REG_A5XX_RBBM_PERFCTR_RB_0_HI 0x00000441 - -#define REG_A5XX_RBBM_PERFCTR_RB_1_LO 0x00000442 - -#define REG_A5XX_RBBM_PERFCTR_RB_1_HI 0x00000443 - -#define REG_A5XX_RBBM_PERFCTR_RB_2_LO 0x00000444 - -#define REG_A5XX_RBBM_PERFCTR_RB_2_HI 0x00000445 - -#define REG_A5XX_RBBM_PERFCTR_RB_3_LO 0x00000446 - -#define REG_A5XX_RBBM_PERFCTR_RB_3_HI 0x00000447 - -#define REG_A5XX_RBBM_PERFCTR_RB_4_LO 0x00000448 - -#define REG_A5XX_RBBM_PERFCTR_RB_4_HI 0x00000449 - -#define REG_A5XX_RBBM_PERFCTR_RB_5_LO 0x0000044a - -#define REG_A5XX_RBBM_PERFCTR_RB_5_HI 0x0000044b - -#define REG_A5XX_RBBM_PERFCTR_RB_6_LO 0x0000044c - -#define REG_A5XX_RBBM_PERFCTR_RB_6_HI 0x0000044d - -#define REG_A5XX_RBBM_PERFCTR_RB_7_LO 0x0000044e - -#define REG_A5XX_RBBM_PERFCTR_RB_7_HI 0x0000044f - -#define REG_A5XX_RBBM_PERFCTR_VSC_0_LO 0x00000450 - -#define REG_A5XX_RBBM_PERFCTR_VSC_0_HI 0x00000451 - -#define REG_A5XX_RBBM_PERFCTR_VSC_1_LO 0x00000452 - -#define REG_A5XX_RBBM_PERFCTR_VSC_1_HI 0x00000453 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_0_LO 0x00000454 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_0_HI 0x00000455 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_1_LO 0x00000456 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_1_HI 0x00000457 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_2_LO 0x00000458 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_2_HI 0x00000459 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_3_LO 0x0000045a - -#define REG_A5XX_RBBM_PERFCTR_LRZ_3_HI 0x0000045b - -#define REG_A5XX_RBBM_PERFCTR_CMP_0_LO 0x0000045c - -#define REG_A5XX_RBBM_PERFCTR_CMP_0_HI 0x0000045d - -#define REG_A5XX_RBBM_PERFCTR_CMP_1_LO 0x0000045e - -#define REG_A5XX_RBBM_PERFCTR_CMP_1_HI 0x0000045f - -#define REG_A5XX_RBBM_PERFCTR_CMP_2_LO 0x00000460 - -#define REG_A5XX_RBBM_PERFCTR_CMP_2_HI 0x00000461 - -#define REG_A5XX_RBBM_PERFCTR_CMP_3_LO 0x00000462 - -#define REG_A5XX_RBBM_PERFCTR_CMP_3_HI 0x00000463 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x0000046b - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x0000046c - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x0000046d - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000046e - -#define REG_A5XX_RBBM_ALWAYSON_COUNTER_LO 0x000004d2 - -#define REG_A5XX_RBBM_ALWAYSON_COUNTER_HI 0x000004d3 - -#define REG_A5XX_RBBM_STATUS 0x000004f5 -#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB 0x80000000 -#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP 0x40000000 -#define A5XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 -#define A5XX_RBBM_STATUS_VSC_BUSY 0x10000000 -#define A5XX_RBBM_STATUS_TPL1_BUSY 0x08000000 -#define A5XX_RBBM_STATUS_SP_BUSY 0x04000000 -#define A5XX_RBBM_STATUS_UCHE_BUSY 0x02000000 -#define A5XX_RBBM_STATUS_VPC_BUSY 0x01000000 -#define A5XX_RBBM_STATUS_VFDP_BUSY 0x00800000 -#define A5XX_RBBM_STATUS_VFD_BUSY 0x00400000 -#define A5XX_RBBM_STATUS_TESS_BUSY 0x00200000 -#define A5XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 -#define A5XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 -#define A5XX_RBBM_STATUS_GPMU_SLAVE_BUSY 0x00040000 -#define A5XX_RBBM_STATUS_DCOM_BUSY 0x00020000 -#define A5XX_RBBM_STATUS_COM_BUSY 0x00010000 -#define A5XX_RBBM_STATUS_LRZ_BUZY 0x00008000 -#define A5XX_RBBM_STATUS_A2D_DSP_BUSY 0x00004000 -#define A5XX_RBBM_STATUS_CCUFCHE_BUSY 0x00002000 -#define A5XX_RBBM_STATUS_RB_BUSY 0x00001000 -#define A5XX_RBBM_STATUS_RAS_BUSY 0x00000800 -#define A5XX_RBBM_STATUS_TSE_BUSY 0x00000400 -#define A5XX_RBBM_STATUS_VBIF_BUSY 0x00000200 -#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_HYST 0x00000100 -#define A5XX_RBBM_STATUS_CP_BUSY_IGN_HYST 0x00000080 -#define A5XX_RBBM_STATUS_CP_BUSY 0x00000040 -#define A5XX_RBBM_STATUS_GPMU_MASTER_BUSY 0x00000020 -#define A5XX_RBBM_STATUS_CP_CRASH_BUSY 0x00000010 -#define A5XX_RBBM_STATUS_CP_ETS_BUSY 0x00000008 -#define A5XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 -#define A5XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 -#define A5XX_RBBM_STATUS_HI_BUSY 0x00000001 - -#define REG_A5XX_RBBM_STATUS3 0x00000530 - -#define REG_A5XX_RBBM_INT_0_STATUS 0x000004e1 - -#define REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS 0x000004f0 - -#define REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS 0x000004f1 - -#define REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS 0x000004f3 - -#define REG_A5XX_RBBM_AHB_ERROR_STATUS 0x000004f4 - -#define REG_A5XX_RBBM_PERFCTR_CNTL 0x00000464 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD0 0x00000465 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD1 0x00000466 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD2 0x00000467 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD3 0x00000468 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000469 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x0000046a - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x0000046b - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x0000046c - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x0000046d - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000046e - -#define REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x0000046f - -#define REG_A5XX_RBBM_AHB_ERROR 0x000004ed - -#define REG_A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC 0x00000504 - -#define REG_A5XX_RBBM_CFG_DBGBUS_OVER 0x00000505 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT0 0x00000506 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT1 0x00000507 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT2 0x00000508 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT3 0x00000509 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT4 0x0000050a - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT5 0x0000050b - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR 0x0000050c - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 0x0000050d - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 0x0000050e - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 0x0000050f - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 0x00000510 - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 0x00000511 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MISR0 0x00000512 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MISR1 0x00000513 - -#define REG_A5XX_RBBM_ISDB_CNT 0x00000533 - -#define REG_A5XX_RBBM_SECVID_TRUST_CONFIG 0x0000f000 - -#define REG_A5XX_RBBM_SECVID_TRUST_CNTL 0x0000f400 - -#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0x0000f800 - -#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0x0000f801 - -#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0x0000f802 - -#define REG_A5XX_RBBM_SECVID_TSB_CNTL 0x0000f803 - -#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_LO 0x0000f804 - -#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_HI 0x0000f805 - -#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_LO 0x0000f806 - -#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_HI 0x0000f807 - -#define REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0x0000f810 - -#define REG_A5XX_VSC_BIN_SIZE 0x00000bc2 -#define A5XX_VSC_BIN_SIZE_WIDTH__MASK 0x000000ff -#define A5XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 -static inline uint32_t A5XX_VSC_BIN_SIZE_WIDTH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A5XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A5XX_VSC_BIN_SIZE_WIDTH__MASK; -} -#define A5XX_VSC_BIN_SIZE_HEIGHT__MASK 0x0001fe00 -#define A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT 9 -static inline uint32_t A5XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A5XX_VSC_BIN_SIZE_HEIGHT__MASK; -} - -#define REG_A5XX_VSC_SIZE_ADDRESS_LO 0x00000bc3 - -#define REG_A5XX_VSC_SIZE_ADDRESS_HI 0x00000bc4 - -#define REG_A5XX_UNKNOWN_0BC5 0x00000bc5 - -#define REG_A5XX_UNKNOWN_0BC6 0x00000bc6 - -static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } -#define A5XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff -#define A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 -static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) -{ - return ((val) << A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_X__MASK; -} -#define A5XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 -#define A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 -static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) -{ - return ((val) << A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_Y__MASK; -} -#define A5XX_VSC_PIPE_CONFIG_REG_W__MASK 0x00f00000 -#define A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 -static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) -{ - return ((val) << A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_W__MASK; -} -#define A5XX_VSC_PIPE_CONFIG_REG_H__MASK 0x0f000000 -#define A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT 24 -static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) -{ - return ((val) << A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_H__MASK; -} - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000be0 + 0x2*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(uint32_t i0) { return 0x00000be0 + 0x2*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_HI(uint32_t i0) { return 0x00000be1 + 0x2*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c00 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c00 + 0x1*i0; } - -#define REG_A5XX_VSC_PERFCTR_VSC_SEL_0 0x00000c60 - -#define REG_A5XX_VSC_PERFCTR_VSC_SEL_1 0x00000c61 - -#define REG_A5XX_VSC_RESOLVE_CNTL 0x00000cdd -#define A5XX_VSC_RESOLVE_CNTL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_VSC_RESOLVE_CNTL_X__MASK 0x00007fff -#define A5XX_VSC_RESOLVE_CNTL_X__SHIFT 0 -static inline uint32_t A5XX_VSC_RESOLVE_CNTL_X(uint32_t val) -{ - return ((val) << A5XX_VSC_RESOLVE_CNTL_X__SHIFT) & A5XX_VSC_RESOLVE_CNTL_X__MASK; -} -#define A5XX_VSC_RESOLVE_CNTL_Y__MASK 0x7fff0000 -#define A5XX_VSC_RESOLVE_CNTL_Y__SHIFT 16 -static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val) -{ - return ((val) << A5XX_VSC_RESOLVE_CNTL_Y__SHIFT) & A5XX_VSC_RESOLVE_CNTL_Y__MASK; -} - -#define REG_A5XX_GRAS_ADDR_MODE_CNTL 0x00000c81 - -#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c90 - -#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c91 - -#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c92 - -#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c93 - -#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c94 - -#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c95 - -#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c96 - -#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c97 - -#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_0 0x00000c98 - -#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_1 0x00000c99 - -#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_2 0x00000c9a - -#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_3 0x00000c9b - -#define REG_A5XX_RB_DBG_ECO_CNTL 0x00000cc4 - -#define REG_A5XX_RB_ADDR_MODE_CNTL 0x00000cc5 - -#define REG_A5XX_RB_MODE_CNTL 0x00000cc6 - -#define REG_A5XX_RB_CCU_CNTL 0x00000cc7 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_0 0x00000cd0 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_1 0x00000cd1 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_2 0x00000cd2 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_3 0x00000cd3 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_4 0x00000cd4 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_5 0x00000cd5 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_6 0x00000cd6 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_7 0x00000cd7 - -#define REG_A5XX_RB_PERFCTR_CCU_SEL_0 0x00000cd8 - -#define REG_A5XX_RB_PERFCTR_CCU_SEL_1 0x00000cd9 - -#define REG_A5XX_RB_PERFCTR_CCU_SEL_2 0x00000cda - -#define REG_A5XX_RB_PERFCTR_CCU_SEL_3 0x00000cdb - -#define REG_A5XX_RB_POWERCTR_RB_SEL_0 0x00000ce0 - -#define REG_A5XX_RB_POWERCTR_RB_SEL_1 0x00000ce1 - -#define REG_A5XX_RB_POWERCTR_RB_SEL_2 0x00000ce2 - -#define REG_A5XX_RB_POWERCTR_RB_SEL_3 0x00000ce3 - -#define REG_A5XX_RB_POWERCTR_CCU_SEL_0 0x00000ce4 - -#define REG_A5XX_RB_POWERCTR_CCU_SEL_1 0x00000ce5 - -#define REG_A5XX_RB_PERFCTR_CMP_SEL_0 0x00000cec - -#define REG_A5XX_RB_PERFCTR_CMP_SEL_1 0x00000ced - -#define REG_A5XX_RB_PERFCTR_CMP_SEL_2 0x00000cee - -#define REG_A5XX_RB_PERFCTR_CMP_SEL_3 0x00000cef - -#define REG_A5XX_PC_DBG_ECO_CNTL 0x00000d00 -#define A5XX_PC_DBG_ECO_CNTL_TWOPASSUSEWFI 0x00000100 - -#define REG_A5XX_PC_ADDR_MODE_CNTL 0x00000d01 - -#define REG_A5XX_PC_MODE_CNTL 0x00000d02 - -#define REG_A5XX_PC_INDEX_BUF_LO 0x00000d04 - -#define REG_A5XX_PC_INDEX_BUF_HI 0x00000d05 - -#define REG_A5XX_PC_START_INDEX 0x00000d06 - -#define REG_A5XX_PC_MAX_INDEX 0x00000d07 - -#define REG_A5XX_PC_TESSFACTOR_ADDR_LO 0x00000d08 - -#define REG_A5XX_PC_TESSFACTOR_ADDR_HI 0x00000d09 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_0 0x00000d10 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_1 0x00000d11 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_2 0x00000d12 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_3 0x00000d13 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_4 0x00000d14 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_5 0x00000d15 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_6 0x00000d16 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_7 0x00000d17 - -#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0 0x00000e00 - -#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_1 0x00000e01 - -#define REG_A5XX_HLSQ_ADDR_MODE_CNTL 0x00000e05 - -#define REG_A5XX_HLSQ_MODE_CNTL 0x00000e06 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e10 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e11 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e12 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e13 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e14 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e15 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e16 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e17 - -#define REG_A5XX_HLSQ_SPTP_RDSEL 0x00000f08 - -#define REG_A5XX_HLSQ_DBG_READ_SEL 0x0000bc00 - -#define REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE 0x0000a000 - -#define REG_A5XX_VFD_ADDR_MODE_CNTL 0x00000e41 - -#define REG_A5XX_VFD_MODE_CNTL 0x00000e42 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_0 0x00000e50 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_1 0x00000e51 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_2 0x00000e52 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_3 0x00000e53 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_4 0x00000e54 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_5 0x00000e55 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_6 0x00000e56 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_7 0x00000e57 - -#define REG_A5XX_VPC_DBG_ECO_CNTL 0x00000e60 - -#define REG_A5XX_VPC_ADDR_MODE_CNTL 0x00000e61 - -#define REG_A5XX_VPC_MODE_CNTL 0x00000e62 -#define A5XX_VPC_MODE_CNTL_BINNING_PASS 0x00000001 - -#define REG_A5XX_VPC_PERFCTR_VPC_SEL_0 0x00000e64 - -#define REG_A5XX_VPC_PERFCTR_VPC_SEL_1 0x00000e65 - -#define REG_A5XX_VPC_PERFCTR_VPC_SEL_2 0x00000e66 - -#define REG_A5XX_VPC_PERFCTR_VPC_SEL_3 0x00000e67 - -#define REG_A5XX_UCHE_ADDR_MODE_CNTL 0x00000e80 - -#define REG_A5XX_UCHE_SVM_CNTL 0x00000e82 - -#define REG_A5XX_UCHE_WRITE_THRU_BASE_LO 0x00000e87 - -#define REG_A5XX_UCHE_WRITE_THRU_BASE_HI 0x00000e88 - -#define REG_A5XX_UCHE_TRAP_BASE_LO 0x00000e89 - -#define REG_A5XX_UCHE_TRAP_BASE_HI 0x00000e8a - -#define REG_A5XX_UCHE_GMEM_RANGE_MIN_LO 0x00000e8b - -#define REG_A5XX_UCHE_GMEM_RANGE_MIN_HI 0x00000e8c - -#define REG_A5XX_UCHE_GMEM_RANGE_MAX_LO 0x00000e8d - -#define REG_A5XX_UCHE_GMEM_RANGE_MAX_HI 0x00000e8e - -#define REG_A5XX_UCHE_DBG_ECO_CNTL_2 0x00000e8f - -#define REG_A5XX_UCHE_DBG_ECO_CNTL 0x00000e90 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO 0x00000e91 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_HI 0x00000e92 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_LO 0x00000e93 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_HI 0x00000e94 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE 0x00000e95 - -#define REG_A5XX_UCHE_CACHE_WAYS 0x00000e96 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000ea0 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000ea1 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000ea2 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000ea3 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000ea4 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000ea5 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000ea6 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000ea7 - -#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_0 0x00000ea8 - -#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_1 0x00000ea9 - -#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_2 0x00000eaa - -#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_3 0x00000eab - -#define REG_A5XX_UCHE_TRAP_LOG_LO 0x00000eb1 - -#define REG_A5XX_UCHE_TRAP_LOG_HI 0x00000eb2 - -#define REG_A5XX_SP_DBG_ECO_CNTL 0x00000ec0 - -#define REG_A5XX_SP_ADDR_MODE_CNTL 0x00000ec1 - -#define REG_A5XX_SP_MODE_CNTL 0x00000ec2 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_0 0x00000ed0 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_1 0x00000ed1 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_2 0x00000ed2 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_3 0x00000ed3 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_4 0x00000ed4 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_5 0x00000ed5 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_6 0x00000ed6 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_7 0x00000ed7 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_8 0x00000ed8 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_9 0x00000ed9 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_10 0x00000eda - -#define REG_A5XX_SP_PERFCTR_SP_SEL_11 0x00000edb - -#define REG_A5XX_SP_POWERCTR_SP_SEL_0 0x00000edc - -#define REG_A5XX_SP_POWERCTR_SP_SEL_1 0x00000edd - -#define REG_A5XX_SP_POWERCTR_SP_SEL_2 0x00000ede - -#define REG_A5XX_SP_POWERCTR_SP_SEL_3 0x00000edf - -#define REG_A5XX_TPL1_ADDR_MODE_CNTL 0x00000f01 - -#define REG_A5XX_TPL1_MODE_CNTL 0x00000f02 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_0 0x00000f10 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_1 0x00000f11 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_2 0x00000f12 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_3 0x00000f13 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_4 0x00000f14 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_5 0x00000f15 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_6 0x00000f16 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_7 0x00000f17 - -#define REG_A5XX_TPL1_POWERCTR_TP_SEL_0 0x00000f18 - -#define REG_A5XX_TPL1_POWERCTR_TP_SEL_1 0x00000f19 - -#define REG_A5XX_TPL1_POWERCTR_TP_SEL_2 0x00000f1a - -#define REG_A5XX_TPL1_POWERCTR_TP_SEL_3 0x00000f1b - -#define REG_A5XX_VBIF_VERSION 0x00003000 - -#define REG_A5XX_VBIF_CLKON 0x00003001 - -#define REG_A5XX_VBIF_ABIT_SORT 0x00003028 - -#define REG_A5XX_VBIF_ABIT_SORT_CONF 0x00003029 - -#define REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 - -#define REG_A5XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a - -#define REG_A5XX_VBIF_IN_RD_LIM_CONF0 0x0000302c - -#define REG_A5XX_VBIF_IN_RD_LIM_CONF1 0x0000302d - -#define REG_A5XX_VBIF_XIN_HALT_CTRL0 0x00003080 - -#define REG_A5XX_VBIF_XIN_HALT_CTRL1 0x00003081 - -#define REG_A5XX_VBIF_TEST_BUS_OUT_CTRL 0x00003084 - -#define REG_A5XX_VBIF_TEST_BUS1_CTRL0 0x00003085 - -#define REG_A5XX_VBIF_TEST_BUS1_CTRL1 0x00003086 - -#define REG_A5XX_VBIF_TEST_BUS2_CTRL0 0x00003087 - -#define REG_A5XX_VBIF_TEST_BUS2_CTRL1 0x00003088 - -#define REG_A5XX_VBIF_TEST_BUS_OUT 0x0000308c - -#define REG_A5XX_VBIF_PERF_CNT_EN0 0x000030c0 - -#define REG_A5XX_VBIF_PERF_CNT_EN1 0x000030c1 - -#define REG_A5XX_VBIF_PERF_CNT_EN2 0x000030c2 - -#define REG_A5XX_VBIF_PERF_CNT_EN3 0x000030c3 - -#define REG_A5XX_VBIF_PERF_CNT_CLR0 0x000030c8 - -#define REG_A5XX_VBIF_PERF_CNT_CLR1 0x000030c9 - -#define REG_A5XX_VBIF_PERF_CNT_CLR2 0x000030ca - -#define REG_A5XX_VBIF_PERF_CNT_CLR3 0x000030cb - -#define REG_A5XX_VBIF_PERF_CNT_SEL0 0x000030d0 - -#define REG_A5XX_VBIF_PERF_CNT_SEL1 0x000030d1 - -#define REG_A5XX_VBIF_PERF_CNT_SEL2 0x000030d2 - -#define REG_A5XX_VBIF_PERF_CNT_SEL3 0x000030d3 - -#define REG_A5XX_VBIF_PERF_CNT_LOW0 0x000030d8 - -#define REG_A5XX_VBIF_PERF_CNT_LOW1 0x000030d9 - -#define REG_A5XX_VBIF_PERF_CNT_LOW2 0x000030da - -#define REG_A5XX_VBIF_PERF_CNT_LOW3 0x000030db - -#define REG_A5XX_VBIF_PERF_CNT_HIGH0 0x000030e0 - -#define REG_A5XX_VBIF_PERF_CNT_HIGH1 0x000030e1 - -#define REG_A5XX_VBIF_PERF_CNT_HIGH2 0x000030e2 - -#define REG_A5XX_VBIF_PERF_CNT_HIGH3 0x000030e3 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW0 0x00003110 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW1 0x00003111 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW2 0x00003112 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH0 0x00003118 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH1 0x00003119 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH2 0x0000311a - -#define REG_A5XX_GPMU_INST_RAM_BASE 0x00008800 - -#define REG_A5XX_GPMU_DATA_RAM_BASE 0x00009800 - -#define REG_A5XX_GPMU_SP_POWER_CNTL 0x0000a881 - -#define REG_A5XX_GPMU_RBCCU_CLOCK_CNTL 0x0000a886 - -#define REG_A5XX_GPMU_RBCCU_POWER_CNTL 0x0000a887 - -#define REG_A5XX_GPMU_SP_PWR_CLK_STATUS 0x0000a88b -#define A5XX_GPMU_SP_PWR_CLK_STATUS_PWR_ON 0x00100000 - -#define REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS 0x0000a88d -#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS_PWR_ON 0x00100000 - -#define REG_A5XX_GPMU_PWR_COL_STAGGER_DELAY 0x0000a891 - -#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0x0000a892 - -#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0x0000a893 - -#define REG_A5XX_GPMU_PWR_COL_BINNING_CTRL 0x0000a894 - -#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL 0x0000a8a3 - -#define REG_A5XX_GPMU_WFI_CONFIG 0x0000a8c1 - -#define REG_A5XX_GPMU_RBBM_INTR_INFO 0x0000a8d6 - -#define REG_A5XX_GPMU_CM3_SYSRESET 0x0000a8d8 - -#define REG_A5XX_GPMU_GENERAL_0 0x0000a8e0 - -#define REG_A5XX_GPMU_GENERAL_1 0x0000a8e1 - -#define REG_A5XX_SP_POWER_COUNTER_0_LO 0x0000a840 - -#define REG_A5XX_SP_POWER_COUNTER_0_HI 0x0000a841 - -#define REG_A5XX_SP_POWER_COUNTER_1_LO 0x0000a842 - -#define REG_A5XX_SP_POWER_COUNTER_1_HI 0x0000a843 - -#define REG_A5XX_SP_POWER_COUNTER_2_LO 0x0000a844 - -#define REG_A5XX_SP_POWER_COUNTER_2_HI 0x0000a845 - -#define REG_A5XX_SP_POWER_COUNTER_3_LO 0x0000a846 - -#define REG_A5XX_SP_POWER_COUNTER_3_HI 0x0000a847 - -#define REG_A5XX_TP_POWER_COUNTER_0_LO 0x0000a848 - -#define REG_A5XX_TP_POWER_COUNTER_0_HI 0x0000a849 - -#define REG_A5XX_TP_POWER_COUNTER_1_LO 0x0000a84a - -#define REG_A5XX_TP_POWER_COUNTER_1_HI 0x0000a84b - -#define REG_A5XX_TP_POWER_COUNTER_2_LO 0x0000a84c - -#define REG_A5XX_TP_POWER_COUNTER_2_HI 0x0000a84d - -#define REG_A5XX_TP_POWER_COUNTER_3_LO 0x0000a84e - -#define REG_A5XX_TP_POWER_COUNTER_3_HI 0x0000a84f - -#define REG_A5XX_RB_POWER_COUNTER_0_LO 0x0000a850 - -#define REG_A5XX_RB_POWER_COUNTER_0_HI 0x0000a851 - -#define REG_A5XX_RB_POWER_COUNTER_1_LO 0x0000a852 - -#define REG_A5XX_RB_POWER_COUNTER_1_HI 0x0000a853 - -#define REG_A5XX_RB_POWER_COUNTER_2_LO 0x0000a854 - -#define REG_A5XX_RB_POWER_COUNTER_2_HI 0x0000a855 - -#define REG_A5XX_RB_POWER_COUNTER_3_LO 0x0000a856 - -#define REG_A5XX_RB_POWER_COUNTER_3_HI 0x0000a857 - -#define REG_A5XX_CCU_POWER_COUNTER_0_LO 0x0000a858 - -#define REG_A5XX_CCU_POWER_COUNTER_0_HI 0x0000a859 - -#define REG_A5XX_CCU_POWER_COUNTER_1_LO 0x0000a85a - -#define REG_A5XX_CCU_POWER_COUNTER_1_HI 0x0000a85b - -#define REG_A5XX_UCHE_POWER_COUNTER_0_LO 0x0000a85c - -#define REG_A5XX_UCHE_POWER_COUNTER_0_HI 0x0000a85d - -#define REG_A5XX_UCHE_POWER_COUNTER_1_LO 0x0000a85e - -#define REG_A5XX_UCHE_POWER_COUNTER_1_HI 0x0000a85f - -#define REG_A5XX_UCHE_POWER_COUNTER_2_LO 0x0000a860 - -#define REG_A5XX_UCHE_POWER_COUNTER_2_HI 0x0000a861 - -#define REG_A5XX_UCHE_POWER_COUNTER_3_LO 0x0000a862 - -#define REG_A5XX_UCHE_POWER_COUNTER_3_HI 0x0000a863 - -#define REG_A5XX_CP_POWER_COUNTER_0_LO 0x0000a864 - -#define REG_A5XX_CP_POWER_COUNTER_0_HI 0x0000a865 - -#define REG_A5XX_CP_POWER_COUNTER_1_LO 0x0000a866 - -#define REG_A5XX_CP_POWER_COUNTER_1_HI 0x0000a867 - -#define REG_A5XX_CP_POWER_COUNTER_2_LO 0x0000a868 - -#define REG_A5XX_CP_POWER_COUNTER_2_HI 0x0000a869 - -#define REG_A5XX_CP_POWER_COUNTER_3_LO 0x0000a86a - -#define REG_A5XX_CP_POWER_COUNTER_3_HI 0x0000a86b - -#define REG_A5XX_GPMU_POWER_COUNTER_0_LO 0x0000a86c - -#define REG_A5XX_GPMU_POWER_COUNTER_0_HI 0x0000a86d - -#define REG_A5XX_GPMU_POWER_COUNTER_1_LO 0x0000a86e - -#define REG_A5XX_GPMU_POWER_COUNTER_1_HI 0x0000a86f - -#define REG_A5XX_GPMU_POWER_COUNTER_2_LO 0x0000a870 - -#define REG_A5XX_GPMU_POWER_COUNTER_2_HI 0x0000a871 - -#define REG_A5XX_GPMU_POWER_COUNTER_3_LO 0x0000a872 - -#define REG_A5XX_GPMU_POWER_COUNTER_3_HI 0x0000a873 - -#define REG_A5XX_GPMU_POWER_COUNTER_4_LO 0x0000a874 - -#define REG_A5XX_GPMU_POWER_COUNTER_4_HI 0x0000a875 - -#define REG_A5XX_GPMU_POWER_COUNTER_5_LO 0x0000a876 - -#define REG_A5XX_GPMU_POWER_COUNTER_5_HI 0x0000a877 - -#define REG_A5XX_GPMU_POWER_COUNTER_ENABLE 0x0000a878 - -#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_LO 0x0000a879 - -#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_HI 0x0000a87a - -#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_RESET 0x0000a87b - -#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_0 0x0000a87c - -#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_1 0x0000a87d - -#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL 0x0000a8a3 - -#define REG_A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL 0x0000a8a8 - -#define REG_A5XX_GPMU_TEMP_SENSOR_ID 0x0000ac00 - -#define REG_A5XX_GPMU_TEMP_SENSOR_CONFIG 0x0000ac01 - -#define REG_A5XX_GPMU_TEMP_VAL 0x0000ac02 - -#define REG_A5XX_GPMU_DELTA_TEMP_THRESHOLD 0x0000ac03 - -#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_STATUS 0x0000ac05 - -#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK 0x0000ac06 - -#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1 0x0000ac40 - -#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3 0x0000ac41 - -#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1 0x0000ac42 - -#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3 0x0000ac43 - -#define REG_A5XX_GPMU_BASE_LEAKAGE 0x0000ac46 - -#define REG_A5XX_GPMU_GPMU_VOLTAGE 0x0000ac60 - -#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS 0x0000ac61 - -#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK 0x0000ac62 - -#define REG_A5XX_GPMU_GPMU_PWR_THRESHOLD 0x0000ac80 - -#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL 0x0000acc4 - -#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS 0x0000acc5 - -#define REG_A5XX_GDPM_CONFIG1 0x0000b80c - -#define REG_A5XX_GDPM_CONFIG2 0x0000b80d - -#define REG_A5XX_GDPM_INT_EN 0x0000b80f - -#define REG_A5XX_GDPM_INT_MASK 0x0000b811 - -#define REG_A5XX_GPMU_BEC_ENABLE 0x0000b9a0 - -#define REG_A5XX_GPU_CS_SENSOR_GENERAL_STATUS 0x0000c41a - -#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0 0x0000c41d - -#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2 0x0000c41f - -#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4 0x0000c421 - -#define REG_A5XX_GPU_CS_ENABLE_REG 0x0000c520 - -#define REG_A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1 0x0000c557 - -#define REG_A5XX_GRAS_CL_CNTL 0x0000e000 -#define A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z 0x00000040 - -#define REG_A5XX_UNKNOWN_E001 0x0000e001 - -#define REG_A5XX_UNKNOWN_E004 0x0000e004 - -#define REG_A5XX_GRAS_CNTL 0x0000e005 -#define A5XX_GRAS_CNTL_VARYING 0x00000001 -#define A5XX_GRAS_CNTL_UNK3 0x00000008 -#define A5XX_GRAS_CNTL_XCOORD 0x00000040 -#define A5XX_GRAS_CNTL_YCOORD 0x00000080 -#define A5XX_GRAS_CNTL_ZCOORD 0x00000100 -#define A5XX_GRAS_CNTL_WCOORD 0x00000200 - -#define REG_A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ 0x0000e006 -#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK 0x000003ff -#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val) -{ - return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK; -} -#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK 0x000ffc00 -#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT 10 -static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val) -{ - return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_XOFFSET_0 0x0000e010 -#define A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_XOFFSET_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_XSCALE_0 0x0000e011 -#define A5XX_GRAS_CL_VPORT_XSCALE_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_XSCALE_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_XSCALE_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_YOFFSET_0 0x0000e012 -#define A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_YOFFSET_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_YSCALE_0 0x0000e013 -#define A5XX_GRAS_CL_VPORT_YSCALE_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_YSCALE_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_YSCALE_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_ZOFFSET_0 0x0000e014 -#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_ZOFFSET_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_ZSCALE_0 0x0000e015 -#define A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_ZSCALE_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK; -} - -#define REG_A5XX_GRAS_SU_CNTL 0x0000e090 -#define A5XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001 -#define A5XX_GRAS_SU_CNTL_CULL_BACK 0x00000002 -#define A5XX_GRAS_SU_CNTL_FRONT_CW 0x00000004 -#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8 -#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3 -static inline uint32_t A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val) -{ - return ((((int32_t)(val * 4.0))) << A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; -} -#define A5XX_GRAS_SU_CNTL_POLY_OFFSET 0x00000800 -#define A5XX_GRAS_SU_CNTL_MSAA_ENABLE 0x00002000 - -#define REG_A5XX_GRAS_SU_POINT_MINMAX 0x0000e091 -#define A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff -#define A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MIN(float val) -{ - return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK; -} -#define A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 -#define A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 -static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MAX(float val) -{ - return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK; -} - -#define REG_A5XX_GRAS_SU_POINT_SIZE 0x0000e092 -#define A5XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff -#define A5XX_GRAS_SU_POINT_SIZE__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POINT_SIZE(float val) -{ - return ((((int32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_SIZE__SHIFT) & A5XX_GRAS_SU_POINT_SIZE__MASK; -} - -#define REG_A5XX_GRAS_SU_LAYERED 0x0000e093 - -#define REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL 0x0000e094 -#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 -#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1 0x00000002 - -#define REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000e095 -#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff -#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_SCALE(float val) -{ - return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK; -} - -#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000e096 -#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff -#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) -{ - return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; -} - -#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP 0x0000e097 -#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK 0xffffffff -#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val) -{ - return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK; -} - -#define REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO 0x0000e098 -#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 -#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val) -{ - return ((val) << A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; -} - -#define REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL 0x0000e099 - -#define REG_A5XX_GRAS_SC_CNTL 0x0000e0a0 -#define A5XX_GRAS_SC_CNTL_BINNING_PASS 0x00000001 -#define A5XX_GRAS_SC_CNTL_SAMPLES_PASSED 0x00008000 - -#define REG_A5XX_GRAS_SC_BIN_CNTL 0x0000e0a1 - -#define REG_A5XX_GRAS_SC_RAS_MSAA_CNTL 0x0000e0a2 -#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK; -} - -#define REG_A5XX_GRAS_SC_DEST_MSAA_CNTL 0x0000e0a3 -#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK; -} -#define A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 - -#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL 0x0000e0a4 - -#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0 0x0000e0aa -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK 0x00007fff -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK; -} -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0 0x0000e0ab -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK 0x00007fff -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK; -} -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0 0x0000e0ca -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK 0x00007fff -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK; -} -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0 0x0000e0cb -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK 0x00007fff -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK; -} -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL 0x0000e0ea -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; -} -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000e0eb -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; -} -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; -} - -#define REG_A5XX_GRAS_LRZ_CNTL 0x0000e100 -#define A5XX_GRAS_LRZ_CNTL_ENABLE 0x00000001 -#define A5XX_GRAS_LRZ_CNTL_LRZ_WRITE 0x00000002 -#define A5XX_GRAS_LRZ_CNTL_GREATER 0x00000004 - -#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO 0x0000e101 - -#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_HI 0x0000e102 - -#define REG_A5XX_GRAS_LRZ_BUFFER_PITCH 0x0000e103 -#define A5XX_GRAS_LRZ_BUFFER_PITCH__MASK 0xffffffff -#define A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT 0 -static inline uint32_t A5XX_GRAS_LRZ_BUFFER_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT) & A5XX_GRAS_LRZ_BUFFER_PITCH__MASK; -} - -#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO 0x0000e104 - -#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI 0x0000e105 - -#define REG_A5XX_RB_CNTL 0x0000e140 -#define A5XX_RB_CNTL_WIDTH__MASK 0x000000ff -#define A5XX_RB_CNTL_WIDTH__SHIFT 0 -static inline uint32_t A5XX_RB_CNTL_WIDTH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A5XX_RB_CNTL_WIDTH__SHIFT) & A5XX_RB_CNTL_WIDTH__MASK; -} -#define A5XX_RB_CNTL_HEIGHT__MASK 0x0001fe00 -#define A5XX_RB_CNTL_HEIGHT__SHIFT 9 -static inline uint32_t A5XX_RB_CNTL_HEIGHT(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A5XX_RB_CNTL_HEIGHT__SHIFT) & A5XX_RB_CNTL_HEIGHT__MASK; -} -#define A5XX_RB_CNTL_BYPASS 0x00020000 - -#define REG_A5XX_RB_RENDER_CNTL 0x0000e141 -#define A5XX_RB_RENDER_CNTL_BINNING_PASS 0x00000001 -#define A5XX_RB_RENDER_CNTL_SAMPLES_PASSED 0x00000040 -#define A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE 0x00000080 -#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH 0x00004000 -#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH2 0x00008000 -#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK 0x00ff0000 -#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT 16 -static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK; -} -#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK 0xff000000 -#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT 24 -static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS2(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK; -} - -#define REG_A5XX_RB_RAS_MSAA_CNTL 0x0000e142 -#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK; -} - -#define REG_A5XX_RB_DEST_MSAA_CNTL 0x0000e143 -#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK; -} -#define A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 - -#define REG_A5XX_RB_RENDER_CONTROL0 0x0000e144 -#define A5XX_RB_RENDER_CONTROL0_VARYING 0x00000001 -#define A5XX_RB_RENDER_CONTROL0_UNK3 0x00000008 -#define A5XX_RB_RENDER_CONTROL0_XCOORD 0x00000040 -#define A5XX_RB_RENDER_CONTROL0_YCOORD 0x00000080 -#define A5XX_RB_RENDER_CONTROL0_ZCOORD 0x00000100 -#define A5XX_RB_RENDER_CONTROL0_WCOORD 0x00000200 - -#define REG_A5XX_RB_RENDER_CONTROL1 0x0000e145 -#define A5XX_RB_RENDER_CONTROL1_SAMPLEMASK 0x00000001 -#define A5XX_RB_RENDER_CONTROL1_FACENESS 0x00000002 -#define A5XX_RB_RENDER_CONTROL1_SAMPLEID 0x00000004 - -#define REG_A5XX_RB_FS_OUTPUT_CNTL 0x0000e146 -#define A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f -#define A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT 0 -static inline uint32_t A5XX_RB_FS_OUTPUT_CNTL_MRT(uint32_t val) -{ - return ((val) << A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK; -} -#define A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z 0x00000020 - -#define REG_A5XX_RB_RENDER_COMPONENTS 0x0000e147 -#define A5XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f -#define A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT0__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 -#define A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT1__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 -#define A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT2__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 -#define A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT3__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 -#define A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT4__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 -#define A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT5__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 -#define A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT6__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 -#define A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT7__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT(uint32_t i0) { return 0x0000e150 + 0x7*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_CONTROL(uint32_t i0) { return 0x0000e150 + 0x7*i0; } -#define A5XX_RB_MRT_CONTROL_BLEND 0x00000001 -#define A5XX_RB_MRT_CONTROL_BLEND2 0x00000002 -#define A5XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000004 -#define A5XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000078 -#define A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 3 -static inline uint32_t A5XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) -{ - return ((val) << A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A5XX_RB_MRT_CONTROL_ROP_CODE__MASK; -} -#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x00000780 -#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 7 -static inline uint32_t A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) -{ - return ((val) << A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x0000e151 + 0x7*i0; } -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x0000e152 + 0x7*i0; } -#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x00000300 -#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; -} -#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK 0x00001800 -#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT 11 -static inline uint32_t A5XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) -{ - return ((val) << A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK; -} -#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00006000 -#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 13 -static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; -} -#define A5XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00008000 - -static inline uint32_t REG_A5XX_RB_MRT_PITCH(uint32_t i0) { return 0x0000e153 + 0x7*i0; } -#define A5XX_RB_MRT_PITCH__MASK 0xffffffff -#define A5XX_RB_MRT_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_MRT_PITCH__SHIFT) & A5XX_RB_MRT_PITCH__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x0000e154 + 0x7*i0; } -#define A5XX_RB_MRT_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_MRT_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_MRT_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_ARRAY_PITCH__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x0000e155 + 0x7*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x0000e156 + 0x7*i0; } - -#define REG_A5XX_RB_BLEND_RED 0x0000e1a0 -#define A5XX_RB_BLEND_RED_UINT__MASK 0x000000ff -#define A5XX_RB_BLEND_RED_UINT__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_RED_UINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_RED_UINT__SHIFT) & A5XX_RB_BLEND_RED_UINT__MASK; -} -#define A5XX_RB_BLEND_RED_SINT__MASK 0x0000ff00 -#define A5XX_RB_BLEND_RED_SINT__SHIFT 8 -static inline uint32_t A5XX_RB_BLEND_RED_SINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_RED_SINT__SHIFT) & A5XX_RB_BLEND_RED_SINT__MASK; -} -#define A5XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 -#define A5XX_RB_BLEND_RED_FLOAT__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_RED_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A5XX_RB_BLEND_RED_FLOAT__SHIFT) & A5XX_RB_BLEND_RED_FLOAT__MASK; -} - -#define REG_A5XX_RB_BLEND_RED_F32 0x0000e1a1 -#define A5XX_RB_BLEND_RED_F32__MASK 0xffffffff -#define A5XX_RB_BLEND_RED_F32__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_RED_F32(float val) -{ - return ((fui(val)) << A5XX_RB_BLEND_RED_F32__SHIFT) & A5XX_RB_BLEND_RED_F32__MASK; -} - -#define REG_A5XX_RB_BLEND_GREEN 0x0000e1a2 -#define A5XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff -#define A5XX_RB_BLEND_GREEN_UINT__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_GREEN_UINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_GREEN_UINT__SHIFT) & A5XX_RB_BLEND_GREEN_UINT__MASK; -} -#define A5XX_RB_BLEND_GREEN_SINT__MASK 0x0000ff00 -#define A5XX_RB_BLEND_GREEN_SINT__SHIFT 8 -static inline uint32_t A5XX_RB_BLEND_GREEN_SINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_GREEN_SINT__SHIFT) & A5XX_RB_BLEND_GREEN_SINT__MASK; -} -#define A5XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 -#define A5XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_GREEN_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A5XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A5XX_RB_BLEND_GREEN_FLOAT__MASK; -} - -#define REG_A5XX_RB_BLEND_GREEN_F32 0x0000e1a3 -#define A5XX_RB_BLEND_GREEN_F32__MASK 0xffffffff -#define A5XX_RB_BLEND_GREEN_F32__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_GREEN_F32(float val) -{ - return ((fui(val)) << A5XX_RB_BLEND_GREEN_F32__SHIFT) & A5XX_RB_BLEND_GREEN_F32__MASK; -} - -#define REG_A5XX_RB_BLEND_BLUE 0x0000e1a4 -#define A5XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff -#define A5XX_RB_BLEND_BLUE_UINT__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_BLUE_UINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_BLUE_UINT__SHIFT) & A5XX_RB_BLEND_BLUE_UINT__MASK; -} -#define A5XX_RB_BLEND_BLUE_SINT__MASK 0x0000ff00 -#define A5XX_RB_BLEND_BLUE_SINT__SHIFT 8 -static inline uint32_t A5XX_RB_BLEND_BLUE_SINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_BLUE_SINT__SHIFT) & A5XX_RB_BLEND_BLUE_SINT__MASK; -} -#define A5XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 -#define A5XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_BLUE_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A5XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A5XX_RB_BLEND_BLUE_FLOAT__MASK; -} - -#define REG_A5XX_RB_BLEND_BLUE_F32 0x0000e1a5 -#define A5XX_RB_BLEND_BLUE_F32__MASK 0xffffffff -#define A5XX_RB_BLEND_BLUE_F32__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_BLUE_F32(float val) -{ - return ((fui(val)) << A5XX_RB_BLEND_BLUE_F32__SHIFT) & A5XX_RB_BLEND_BLUE_F32__MASK; -} - -#define REG_A5XX_RB_BLEND_ALPHA 0x0000e1a6 -#define A5XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff -#define A5XX_RB_BLEND_ALPHA_UINT__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_ALPHA_UINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_ALPHA_UINT__SHIFT) & A5XX_RB_BLEND_ALPHA_UINT__MASK; -} -#define A5XX_RB_BLEND_ALPHA_SINT__MASK 0x0000ff00 -#define A5XX_RB_BLEND_ALPHA_SINT__SHIFT 8 -static inline uint32_t A5XX_RB_BLEND_ALPHA_SINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_ALPHA_SINT__SHIFT) & A5XX_RB_BLEND_ALPHA_SINT__MASK; -} -#define A5XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 -#define A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_ALPHA_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A5XX_RB_BLEND_ALPHA_FLOAT__MASK; -} - -#define REG_A5XX_RB_BLEND_ALPHA_F32 0x0000e1a7 -#define A5XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff -#define A5XX_RB_BLEND_ALPHA_F32__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_ALPHA_F32(float val) -{ - return ((fui(val)) << A5XX_RB_BLEND_ALPHA_F32__SHIFT) & A5XX_RB_BLEND_ALPHA_F32__MASK; -} - -#define REG_A5XX_RB_ALPHA_CONTROL 0x0000e1a8 -#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff -#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 -static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val) -{ - return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK; -} -#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST 0x00000100 -#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK 0x00000e00 -#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT 9 -static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) -{ - return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK; -} - -#define REG_A5XX_RB_BLEND_CNTL 0x0000e1a9 -#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK 0x000000ff -#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; -} -#define A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND 0x00000100 -#define A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 -#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK 0xffff0000 -#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK; -} - -#define REG_A5XX_RB_DEPTH_PLANE_CNTL 0x0000e1b0 -#define A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 -#define A5XX_RB_DEPTH_PLANE_CNTL_UNK1 0x00000002 - -#define REG_A5XX_RB_DEPTH_CNTL 0x0000e1b1 -#define A5XX_RB_DEPTH_CNTL_Z_ENABLE 0x00000001 -#define A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE 0x00000002 -#define A5XX_RB_DEPTH_CNTL_ZFUNC__MASK 0x0000001c -#define A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT 2 -static inline uint32_t A5XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val) -{ - return ((val) << A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A5XX_RB_DEPTH_CNTL_ZFUNC__MASK; -} -#define A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE 0x00000040 - -#define REG_A5XX_RB_DEPTH_BUFFER_INFO 0x0000e1b2 -#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 -#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 -static inline uint32_t A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val) -{ - return ((val) << A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; -} - -#define REG_A5XX_RB_DEPTH_BUFFER_BASE_LO 0x0000e1b3 - -#define REG_A5XX_RB_DEPTH_BUFFER_BASE_HI 0x0000e1b4 - -#define REG_A5XX_RB_DEPTH_BUFFER_PITCH 0x0000e1b5 -#define A5XX_RB_DEPTH_BUFFER_PITCH__MASK 0xffffffff -#define A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_DEPTH_BUFFER_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_PITCH__MASK; -} - -#define REG_A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH 0x0000e1b6 -#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_STENCIL_CONTROL 0x0000e1c0 -#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 -#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 -#define A5XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 -#define A5XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 -#define A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 -#define A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 -#define A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 -#define A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 -#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 -#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 -#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 -#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; -} - -#define REG_A5XX_RB_STENCIL_INFO 0x0000e1c1 -#define A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001 - -#define REG_A5XX_RB_STENCIL_BASE_LO 0x0000e1c2 - -#define REG_A5XX_RB_STENCIL_BASE_HI 0x0000e1c3 - -#define REG_A5XX_RB_STENCIL_PITCH 0x0000e1c4 -#define A5XX_RB_STENCIL_PITCH__MASK 0xffffffff -#define A5XX_RB_STENCIL_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_STENCIL_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_STENCIL_PITCH__SHIFT) & A5XX_RB_STENCIL_PITCH__MASK; -} - -#define REG_A5XX_RB_STENCIL_ARRAY_PITCH 0x0000e1c5 -#define A5XX_RB_STENCIL_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_STENCIL_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT) & A5XX_RB_STENCIL_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_STENCILREFMASK 0x0000e1c6 -#define A5XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff -#define A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 -static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILREF__MASK; -} -#define A5XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 -#define A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 -static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILMASK__MASK; -} -#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 -#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; -} - -#define REG_A5XX_RB_STENCILREFMASK_BF 0x0000e1c7 -#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff -#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 -static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; -} -#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 -#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 -static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; -} -#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 -#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; -} - -#define REG_A5XX_RB_WINDOW_OFFSET 0x0000e1d0 -#define A5XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_RB_WINDOW_OFFSET_X__MASK 0x00007fff -#define A5XX_RB_WINDOW_OFFSET_X__SHIFT 0 -static inline uint32_t A5XX_RB_WINDOW_OFFSET_X(uint32_t val) -{ - return ((val) << A5XX_RB_WINDOW_OFFSET_X__SHIFT) & A5XX_RB_WINDOW_OFFSET_X__MASK; -} -#define A5XX_RB_WINDOW_OFFSET_Y__MASK 0x7fff0000 -#define A5XX_RB_WINDOW_OFFSET_Y__SHIFT 16 -static inline uint32_t A5XX_RB_WINDOW_OFFSET_Y(uint32_t val) -{ - return ((val) << A5XX_RB_WINDOW_OFFSET_Y__SHIFT) & A5XX_RB_WINDOW_OFFSET_Y__MASK; -} - -#define REG_A5XX_RB_SAMPLE_COUNT_CONTROL 0x0000e1d1 -#define A5XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 - -#define REG_A5XX_RB_BLIT_CNTL 0x0000e210 -#define A5XX_RB_BLIT_CNTL_BUF__MASK 0x0000000f -#define A5XX_RB_BLIT_CNTL_BUF__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_CNTL_BUF(enum a5xx_blit_buf val) -{ - return ((val) << A5XX_RB_BLIT_CNTL_BUF__SHIFT) & A5XX_RB_BLIT_CNTL_BUF__MASK; -} - -#define REG_A5XX_RB_RESOLVE_CNTL_1 0x0000e211 -#define A5XX_RB_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_RB_RESOLVE_CNTL_1_X__MASK 0x00007fff -#define A5XX_RB_RESOLVE_CNTL_1_X__SHIFT 0 -static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_X(uint32_t val) -{ - return ((val) << A5XX_RB_RESOLVE_CNTL_1_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_X__MASK; -} -#define A5XX_RB_RESOLVE_CNTL_1_Y__MASK 0x7fff0000 -#define A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT 16 -static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_Y(uint32_t val) -{ - return ((val) << A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_Y__MASK; -} - -#define REG_A5XX_RB_RESOLVE_CNTL_2 0x0000e212 -#define A5XX_RB_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_RB_RESOLVE_CNTL_2_X__MASK 0x00007fff -#define A5XX_RB_RESOLVE_CNTL_2_X__SHIFT 0 -static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_X(uint32_t val) -{ - return ((val) << A5XX_RB_RESOLVE_CNTL_2_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_X__MASK; -} -#define A5XX_RB_RESOLVE_CNTL_2_Y__MASK 0x7fff0000 -#define A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT 16 -static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_Y(uint32_t val) -{ - return ((val) << A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_Y__MASK; -} - -#define REG_A5XX_RB_RESOLVE_CNTL_3 0x0000e213 -#define A5XX_RB_RESOLVE_CNTL_3_TILED 0x00000001 - -#define REG_A5XX_RB_BLIT_DST_LO 0x0000e214 - -#define REG_A5XX_RB_BLIT_DST_HI 0x0000e215 - -#define REG_A5XX_RB_BLIT_DST_PITCH 0x0000e216 -#define A5XX_RB_BLIT_DST_PITCH__MASK 0xffffffff -#define A5XX_RB_BLIT_DST_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_DST_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_BLIT_DST_PITCH__SHIFT) & A5XX_RB_BLIT_DST_PITCH__MASK; -} - -#define REG_A5XX_RB_BLIT_DST_ARRAY_PITCH 0x0000e217 -#define A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_CLEAR_COLOR_DW0 0x0000e218 - -#define REG_A5XX_RB_CLEAR_COLOR_DW1 0x0000e219 - -#define REG_A5XX_RB_CLEAR_COLOR_DW2 0x0000e21a - -#define REG_A5XX_RB_CLEAR_COLOR_DW3 0x0000e21b - -#define REG_A5XX_RB_CLEAR_CNTL 0x0000e21c -#define A5XX_RB_CLEAR_CNTL_FAST_CLEAR 0x00000002 -#define A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE 0x00000004 -#define A5XX_RB_CLEAR_CNTL_MASK__MASK 0x000000f0 -#define A5XX_RB_CLEAR_CNTL_MASK__SHIFT 4 -static inline uint32_t A5XX_RB_CLEAR_CNTL_MASK(uint32_t val) -{ - return ((val) << A5XX_RB_CLEAR_CNTL_MASK__SHIFT) & A5XX_RB_CLEAR_CNTL_MASK__MASK; -} - -#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO 0x0000e240 - -#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_HI 0x0000e241 - -#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_PITCH 0x0000e242 - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x0000e243 + 0x4*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x0000e243 + 0x4*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x0000e244 + 0x4*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x0000e245 + 0x4*i0; } -#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK 0xffffffff -#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t i0) { return 0x0000e246 + 0x4*i0; } -#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_BLIT_FLAG_DST_LO 0x0000e263 - -#define REG_A5XX_RB_BLIT_FLAG_DST_HI 0x0000e264 - -#define REG_A5XX_RB_BLIT_FLAG_DST_PITCH 0x0000e265 -#define A5XX_RB_BLIT_FLAG_DST_PITCH__MASK 0xffffffff -#define A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_FLAG_DST_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_PITCH__MASK; -} - -#define REG_A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH 0x0000e266 -#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO 0x0000e267 - -#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_HI 0x0000e268 - -#define REG_A5XX_VPC_CNTL_0 0x0000e280 -#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK 0x0000007f -#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT 0 -static inline uint32_t A5XX_VPC_CNTL_0_STRIDE_IN_VPC(uint32_t val) -{ - return ((val) << A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT) & A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK; -} -#define A5XX_VPC_CNTL_0_VARYING 0x00000800 - -static inline uint32_t REG_A5XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x0000e282 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x0000e282 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000e28a + 0x1*i0; } - -static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000e28a + 0x1*i0; } - -#define REG_A5XX_UNKNOWN_E292 0x0000e292 - -#define REG_A5XX_UNKNOWN_E293 0x0000e293 - -static inline uint32_t REG_A5XX_VPC_VAR(uint32_t i0) { return 0x0000e294 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x0000e294 + 0x1*i0; } - -#define REG_A5XX_VPC_GS_SIV_CNTL 0x0000e298 - -#define REG_A5XX_UNKNOWN_E29A 0x0000e29a - -#define REG_A5XX_VPC_PACK 0x0000e29d -#define A5XX_VPC_PACK_NUMNONPOSVAR__MASK 0x000000ff -#define A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT 0 -static inline uint32_t A5XX_VPC_PACK_NUMNONPOSVAR(uint32_t val) -{ - return ((val) << A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A5XX_VPC_PACK_NUMNONPOSVAR__MASK; -} -#define A5XX_VPC_PACK_PSIZELOC__MASK 0x0000ff00 -#define A5XX_VPC_PACK_PSIZELOC__SHIFT 8 -static inline uint32_t A5XX_VPC_PACK_PSIZELOC(uint32_t val) -{ - return ((val) << A5XX_VPC_PACK_PSIZELOC__SHIFT) & A5XX_VPC_PACK_PSIZELOC__MASK; -} - -#define REG_A5XX_VPC_FS_PRIMITIVEID_CNTL 0x0000e2a0 - -#define REG_A5XX_VPC_SO_BUF_CNTL 0x0000e2a1 -#define A5XX_VPC_SO_BUF_CNTL_BUF0 0x00000001 -#define A5XX_VPC_SO_BUF_CNTL_BUF1 0x00000008 -#define A5XX_VPC_SO_BUF_CNTL_BUF2 0x00000040 -#define A5XX_VPC_SO_BUF_CNTL_BUF3 0x00000200 -#define A5XX_VPC_SO_BUF_CNTL_ENABLE 0x00008000 - -#define REG_A5XX_VPC_SO_OVERRIDE 0x0000e2a2 -#define A5XX_VPC_SO_OVERRIDE_SO_DISABLE 0x00000001 - -#define REG_A5XX_VPC_SO_CNTL 0x0000e2a3 -#define A5XX_VPC_SO_CNTL_ENABLE 0x00010000 - -#define REG_A5XX_VPC_SO_PROG 0x0000e2a4 -#define A5XX_VPC_SO_PROG_A_BUF__MASK 0x00000003 -#define A5XX_VPC_SO_PROG_A_BUF__SHIFT 0 -static inline uint32_t A5XX_VPC_SO_PROG_A_BUF(uint32_t val) -{ - return ((val) << A5XX_VPC_SO_PROG_A_BUF__SHIFT) & A5XX_VPC_SO_PROG_A_BUF__MASK; -} -#define A5XX_VPC_SO_PROG_A_OFF__MASK 0x000007fc -#define A5XX_VPC_SO_PROG_A_OFF__SHIFT 2 -static inline uint32_t A5XX_VPC_SO_PROG_A_OFF(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A5XX_VPC_SO_PROG_A_OFF__SHIFT) & A5XX_VPC_SO_PROG_A_OFF__MASK; -} -#define A5XX_VPC_SO_PROG_A_EN 0x00000800 -#define A5XX_VPC_SO_PROG_B_BUF__MASK 0x00003000 -#define A5XX_VPC_SO_PROG_B_BUF__SHIFT 12 -static inline uint32_t A5XX_VPC_SO_PROG_B_BUF(uint32_t val) -{ - return ((val) << A5XX_VPC_SO_PROG_B_BUF__SHIFT) & A5XX_VPC_SO_PROG_B_BUF__MASK; -} -#define A5XX_VPC_SO_PROG_B_OFF__MASK 0x007fc000 -#define A5XX_VPC_SO_PROG_B_OFF__SHIFT 14 -static inline uint32_t A5XX_VPC_SO_PROG_B_OFF(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A5XX_VPC_SO_PROG_B_OFF__SHIFT) & A5XX_VPC_SO_PROG_B_OFF__MASK; -} -#define A5XX_VPC_SO_PROG_B_EN 0x00800000 - -static inline uint32_t REG_A5XX_VPC_SO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000e2a8 + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000e2a9 + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000e2aa + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000e2ab + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000e2ac + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x0000e2ad + 0x7*i0; } - -#define REG_A5XX_PC_PRIMITIVE_CNTL 0x0000e384 -#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK 0x0000007f -#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT 0 -static inline uint32_t A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val) -{ - return ((val) << A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK; -} -#define A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART 0x00000100 -#define A5XX_PC_PRIMITIVE_CNTL_COUNT_PRIMITIVES 0x00000200 -#define A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST 0x00000400 - -#define REG_A5XX_PC_PRIM_VTX_CNTL 0x0000e385 -#define A5XX_PC_PRIM_VTX_CNTL_PSIZE 0x00000800 - -#define REG_A5XX_PC_RASTER_CNTL 0x0000e388 -#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK 0x00000007 -#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT 0 -static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK; -} -#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK 0x00000038 -#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT 3 -static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK; -} -#define A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE 0x00000040 - -#define REG_A5XX_UNKNOWN_E389 0x0000e389 - -#define REG_A5XX_PC_RESTART_INDEX 0x0000e38c - -#define REG_A5XX_PC_GS_LAYERED 0x0000e38d - -#define REG_A5XX_PC_GS_PARAM 0x0000e38e -#define A5XX_PC_GS_PARAM_MAX_VERTICES__MASK 0x000003ff -#define A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT 0 -static inline uint32_t A5XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val) -{ - return ((val) << A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A5XX_PC_GS_PARAM_MAX_VERTICES__MASK; -} -#define A5XX_PC_GS_PARAM_INVOCATIONS__MASK 0x0000f800 -#define A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT 11 -static inline uint32_t A5XX_PC_GS_PARAM_INVOCATIONS(uint32_t val) -{ - return ((val) << A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A5XX_PC_GS_PARAM_INVOCATIONS__MASK; -} -#define A5XX_PC_GS_PARAM_PRIMTYPE__MASK 0x01800000 -#define A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT 23 -static inline uint32_t A5XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A5XX_PC_GS_PARAM_PRIMTYPE__MASK; -} - -#define REG_A5XX_PC_HS_PARAM 0x0000e38f -#define A5XX_PC_HS_PARAM_VERTICES_OUT__MASK 0x0000003f -#define A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT 0 -static inline uint32_t A5XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val) -{ - return ((val) << A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A5XX_PC_HS_PARAM_VERTICES_OUT__MASK; -} -#define A5XX_PC_HS_PARAM_SPACING__MASK 0x00600000 -#define A5XX_PC_HS_PARAM_SPACING__SHIFT 21 -static inline uint32_t A5XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val) -{ - return ((val) << A5XX_PC_HS_PARAM_SPACING__SHIFT) & A5XX_PC_HS_PARAM_SPACING__MASK; -} -#define A5XX_PC_HS_PARAM_CW 0x00800000 -#define A5XX_PC_HS_PARAM_CONNECTED 0x01000000 - -#define REG_A5XX_PC_POWER_CNTL 0x0000e3b0 - -#define REG_A5XX_VFD_CONTROL_0 0x0000e400 -#define A5XX_VFD_CONTROL_0_VTXCNT__MASK 0x0000003f -#define A5XX_VFD_CONTROL_0_VTXCNT__SHIFT 0 -static inline uint32_t A5XX_VFD_CONTROL_0_VTXCNT(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A5XX_VFD_CONTROL_0_VTXCNT__MASK; -} - -#define REG_A5XX_VFD_CONTROL_1 0x0000e401 -#define A5XX_VFD_CONTROL_1_REGID4VTX__MASK 0x000000ff -#define A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT 0 -static inline uint32_t A5XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A5XX_VFD_CONTROL_1_REGID4VTX__MASK; -} -#define A5XX_VFD_CONTROL_1_REGID4INST__MASK 0x0000ff00 -#define A5XX_VFD_CONTROL_1_REGID4INST__SHIFT 8 -static inline uint32_t A5XX_VFD_CONTROL_1_REGID4INST(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A5XX_VFD_CONTROL_1_REGID4INST__MASK; -} -#define A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK 0x00ff0000 -#define A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT 16 -static inline uint32_t A5XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK; -} - -#define REG_A5XX_VFD_CONTROL_2 0x0000e402 -#define A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK 0x000000ff -#define A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT 0 -static inline uint32_t A5XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK; -} - -#define REG_A5XX_VFD_CONTROL_3 0x0000e403 -#define A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK 0x0000ff00 -#define A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT 8 -static inline uint32_t A5XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK; -} -#define A5XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000 -#define A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16 -static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSX__MASK; -} -#define A5XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000 -#define A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24 -static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSY__MASK; -} - -#define REG_A5XX_VFD_CONTROL_4 0x0000e404 - -#define REG_A5XX_VFD_CONTROL_5 0x0000e405 - -#define REG_A5XX_VFD_INDEX_OFFSET 0x0000e408 - -#define REG_A5XX_VFD_INSTANCE_START_OFFSET 0x0000e409 - -static inline uint32_t REG_A5XX_VFD_FETCH(uint32_t i0) { return 0x0000e40a + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000e40a + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000e40b + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000e40c + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000e40d + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_DECODE(uint32_t i0) { return 0x0000e48a + 0x2*i0; } - -static inline uint32_t REG_A5XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000e48a + 0x2*i0; } -#define A5XX_VFD_DECODE_INSTR_IDX__MASK 0x0000001f -#define A5XX_VFD_DECODE_INSTR_IDX__SHIFT 0 -static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val) -{ - return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK; -} -#define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000 -#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK 0x0ff00000 -#define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20 -static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val) -{ - return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK; -} -#define A5XX_VFD_DECODE_INSTR_SWAP__MASK 0x30000000 -#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT 28 -static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK; -} -#define A5XX_VFD_DECODE_INSTR_UNK30 0x40000000 -#define A5XX_VFD_DECODE_INSTR_FLOAT 0x80000000 - -static inline uint32_t REG_A5XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000e48b + 0x2*i0; } - -static inline uint32_t REG_A5XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000e4ca + 0x1*i0; } - -static inline uint32_t REG_A5XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000e4ca + 0x1*i0; } -#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK 0x0000000f -#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT 0 -static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val) -{ - return ((val) << A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK; -} -#define A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK 0x00000ff0 -#define A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT 4 -static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val) -{ - return ((val) << A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK; -} - -#define REG_A5XX_VFD_POWER_CNTL 0x0000e4f0 - -#define REG_A5XX_SP_SP_CNTL 0x0000e580 - -#define REG_A5XX_SP_VS_CONFIG 0x0000e584 -#define A5XX_SP_VS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_FS_CONFIG 0x0000e585 -#define A5XX_SP_FS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_HS_CONFIG 0x0000e586 -#define A5XX_SP_HS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_DS_CONFIG 0x0000e587 -#define A5XX_SP_DS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_GS_CONFIG 0x0000e588 -#define A5XX_SP_GS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_CS_CONFIG 0x0000e589 -#define A5XX_SP_CS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_VS_CONFIG_MAX_CONST 0x0000e58a - -#define REG_A5XX_SP_FS_CONFIG_MAX_CONST 0x0000e58b - -#define REG_A5XX_SP_VS_CTRL_REG0 0x0000e590 -#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_VS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_SP_PRIMITIVE_CNTL 0x0000e592 -#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK 0x0000001f -#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT 0 -static inline uint32_t A5XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val) -{ - return ((val) << A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK; -} - -static inline uint32_t REG_A5XX_SP_VS_OUT(uint32_t i0) { return 0x0000e593 + 0x1*i0; } - -static inline uint32_t REG_A5XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000e593 + 0x1*i0; } -#define A5XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff -#define A5XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 -static inline uint32_t A5XX_SP_VS_OUT_REG_A_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_A_REGID__MASK; -} -#define A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00000f00 -#define A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 8 -static inline uint32_t A5XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) -{ - return ((val) << A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK; -} -#define A5XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 -#define A5XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 -static inline uint32_t A5XX_SP_VS_OUT_REG_B_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_B_REGID__MASK; -} -#define A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x0f000000 -#define A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 24 -static inline uint32_t A5XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) -{ - return ((val) << A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK; -} - -static inline uint32_t REG_A5XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; } - -static inline uint32_t REG_A5XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; } -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 -static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) -{ - return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; -} -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 -static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) -{ - return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; -} -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 -static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) -{ - return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; -} -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 -static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) -{ - return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; -} - -#define REG_A5XX_UNKNOWN_E5AB 0x0000e5ab - -#define REG_A5XX_SP_VS_OBJ_START_LO 0x0000e5ac - -#define REG_A5XX_SP_VS_OBJ_START_HI 0x0000e5ad - -#define REG_A5XX_SP_FS_CTRL_REG0 0x0000e5c0 -#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_FS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E5C2 0x0000e5c2 - -#define REG_A5XX_SP_FS_OBJ_START_LO 0x0000e5c3 - -#define REG_A5XX_SP_FS_OBJ_START_HI 0x0000e5c4 - -#define REG_A5XX_SP_BLEND_CNTL 0x0000e5c9 -#define A5XX_SP_BLEND_CNTL_ENABLED 0x00000001 -#define A5XX_SP_BLEND_CNTL_UNK8 0x00000100 -#define A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 - -#define REG_A5XX_SP_FS_OUTPUT_CNTL 0x0000e5ca -#define A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f -#define A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT 0 -static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_MRT(uint32_t val) -{ - return ((val) << A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK; -} -#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK 0x00001fe0 -#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT 5 -static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK; -} -#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK 0x001fe000 -#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT 13 -static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK; -} - -static inline uint32_t REG_A5XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000e5cb + 0x1*i0; } - -static inline uint32_t REG_A5XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000e5cb + 0x1*i0; } -#define A5XX_SP_FS_OUTPUT_REG_REGID__MASK 0x000000ff -#define A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT 0 -static inline uint32_t A5XX_SP_FS_OUTPUT_REG_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_REG_REGID__MASK; -} -#define A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION 0x00000100 - -static inline uint32_t REG_A5XX_SP_FS_MRT(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; } - -static inline uint32_t REG_A5XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; } -#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK; -} -#define A5XX_SP_FS_MRT_REG_COLOR_SINT 0x00000100 -#define A5XX_SP_FS_MRT_REG_COLOR_UINT 0x00000200 -#define A5XX_SP_FS_MRT_REG_COLOR_SRGB 0x00000400 - -#define REG_A5XX_UNKNOWN_E5DB 0x0000e5db - -#define REG_A5XX_SP_CS_CTRL_REG0 0x0000e5f0 -#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_CS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_CS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E5F2 0x0000e5f2 - -#define REG_A5XX_SP_CS_OBJ_START_LO 0x0000e5f3 - -#define REG_A5XX_SP_CS_OBJ_START_HI 0x0000e5f4 - -#define REG_A5XX_SP_HS_CTRL_REG0 0x0000e600 -#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_HS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_HS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E602 0x0000e602 - -#define REG_A5XX_SP_HS_OBJ_START_LO 0x0000e603 - -#define REG_A5XX_SP_HS_OBJ_START_HI 0x0000e604 - -#define REG_A5XX_SP_DS_CTRL_REG0 0x0000e610 -#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_DS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_DS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E62B 0x0000e62b - -#define REG_A5XX_SP_DS_OBJ_START_LO 0x0000e62c - -#define REG_A5XX_SP_DS_OBJ_START_HI 0x0000e62d - -#define REG_A5XX_SP_GS_CTRL_REG0 0x0000e640 -#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_GS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_GS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E65B 0x0000e65b - -#define REG_A5XX_SP_GS_OBJ_START_LO 0x0000e65c - -#define REG_A5XX_SP_GS_OBJ_START_HI 0x0000e65d - -#define REG_A5XX_TPL1_TP_RAS_MSAA_CNTL 0x0000e704 -#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK; -} - -#define REG_A5XX_TPL1_TP_DEST_MSAA_CNTL 0x0000e705 -#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK; -} -#define A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 - -#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO 0x0000e706 - -#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_HI 0x0000e707 - -#define REG_A5XX_TPL1_VS_TEX_COUNT 0x0000e700 - -#define REG_A5XX_TPL1_HS_TEX_COUNT 0x0000e701 - -#define REG_A5XX_TPL1_DS_TEX_COUNT 0x0000e702 - -#define REG_A5XX_TPL1_GS_TEX_COUNT 0x0000e703 - -#define REG_A5XX_TPL1_VS_TEX_SAMP_LO 0x0000e722 - -#define REG_A5XX_TPL1_VS_TEX_SAMP_HI 0x0000e723 - -#define REG_A5XX_TPL1_HS_TEX_SAMP_LO 0x0000e724 - -#define REG_A5XX_TPL1_HS_TEX_SAMP_HI 0x0000e725 - -#define REG_A5XX_TPL1_DS_TEX_SAMP_LO 0x0000e726 - -#define REG_A5XX_TPL1_DS_TEX_SAMP_HI 0x0000e727 - -#define REG_A5XX_TPL1_GS_TEX_SAMP_LO 0x0000e728 - -#define REG_A5XX_TPL1_GS_TEX_SAMP_HI 0x0000e729 - -#define REG_A5XX_TPL1_VS_TEX_CONST_LO 0x0000e72a - -#define REG_A5XX_TPL1_VS_TEX_CONST_HI 0x0000e72b - -#define REG_A5XX_TPL1_HS_TEX_CONST_LO 0x0000e72c - -#define REG_A5XX_TPL1_HS_TEX_CONST_HI 0x0000e72d - -#define REG_A5XX_TPL1_DS_TEX_CONST_LO 0x0000e72e - -#define REG_A5XX_TPL1_DS_TEX_CONST_HI 0x0000e72f - -#define REG_A5XX_TPL1_GS_TEX_CONST_LO 0x0000e730 - -#define REG_A5XX_TPL1_GS_TEX_CONST_HI 0x0000e731 - -#define REG_A5XX_TPL1_FS_TEX_COUNT 0x0000e750 - -#define REG_A5XX_TPL1_CS_TEX_COUNT 0x0000e751 - -#define REG_A5XX_TPL1_FS_TEX_SAMP_LO 0x0000e75a - -#define REG_A5XX_TPL1_FS_TEX_SAMP_HI 0x0000e75b - -#define REG_A5XX_TPL1_CS_TEX_SAMP_LO 0x0000e75c - -#define REG_A5XX_TPL1_CS_TEX_SAMP_HI 0x0000e75d - -#define REG_A5XX_TPL1_FS_TEX_CONST_LO 0x0000e75e - -#define REG_A5XX_TPL1_FS_TEX_CONST_HI 0x0000e75f - -#define REG_A5XX_TPL1_CS_TEX_CONST_LO 0x0000e760 - -#define REG_A5XX_TPL1_CS_TEX_CONST_HI 0x0000e761 - -#define REG_A5XX_TPL1_TP_FS_ROTATION_CNTL 0x0000e764 - -#define REG_A5XX_HLSQ_CONTROL_0_REG 0x0000e784 -#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000001 -#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; -} -#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK 0x00000004 -#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT 2 -static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK; -} - -#define REG_A5XX_HLSQ_CONTROL_1_REG 0x0000e785 -#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK 0x0000003f -#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT) & A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK; -} - -#define REG_A5XX_HLSQ_CONTROL_2_REG 0x0000e786 -#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK 0x000000ff -#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; -} -#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK 0x0000ff00 -#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT 8 -static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK; -} -#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK 0x00ff0000 -#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT 16 -static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK; -} - -#define REG_A5XX_HLSQ_CONTROL_3_REG 0x0000e787 -#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK 0x000000ff -#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK; -} - -#define REG_A5XX_HLSQ_CONTROL_4_REG 0x0000e788 -#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK 0x00ff0000 -#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT 16 -static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK; -} -#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK 0xff000000 -#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT 24 -static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK; -} - -#define REG_A5XX_HLSQ_UPDATE_CNTL 0x0000e78a - -#define REG_A5XX_HLSQ_VS_CONFIG 0x0000e78b -#define A5XX_HLSQ_VS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_FS_CONFIG 0x0000e78c -#define A5XX_HLSQ_FS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_HS_CONFIG 0x0000e78d -#define A5XX_HLSQ_HS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_DS_CONFIG 0x0000e78e -#define A5XX_HLSQ_DS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_GS_CONFIG 0x0000e78f -#define A5XX_HLSQ_GS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_CS_CONFIG 0x0000e790 -#define A5XX_HLSQ_CS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_VS_CNTL 0x0000e791 -#define A5XX_HLSQ_VS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_VS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_FS_CNTL 0x0000e792 -#define A5XX_HLSQ_FS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_FS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_HS_CNTL 0x0000e793 -#define A5XX_HLSQ_HS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_HS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_DS_CNTL 0x0000e794 -#define A5XX_HLSQ_DS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_DS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_GS_CNTL 0x0000e795 -#define A5XX_HLSQ_GS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_GS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_CS_CNTL 0x0000e796 -#define A5XX_HLSQ_CS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_X 0x0000e7b9 - -#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Y 0x0000e7ba - -#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Z 0x0000e7bb - -#define REG_A5XX_HLSQ_CS_NDRANGE_0 0x0000e7b0 -#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK 0x00000003 -#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK; -} -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT 2 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK; -} -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT 12 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK; -} -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT 22 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_1 0x0000e7b1 -#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_2 0x0000e7b2 -#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_3 0x0000e7b3 -#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_4 0x0000e7b4 -#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_5 0x0000e7b5 -#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_6 0x0000e7b6 -#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK; -} - -#define REG_A5XX_HLSQ_CS_CNTL_0 0x0000e7b7 -#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK 0x000000ff -#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK; -} -#define A5XX_HLSQ_CS_CNTL_0_UNK0__MASK 0x0000ff00 -#define A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT 8 -static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK0__MASK; -} -#define A5XX_HLSQ_CS_CNTL_0_UNK1__MASK 0x00ff0000 -#define A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT 16 -static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK1__MASK; -} -#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK 0xff000000 -#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT 24 -static inline uint32_t A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK; -} - -#define REG_A5XX_HLSQ_CS_CNTL_1 0x0000e7b8 - -#define REG_A5XX_UNKNOWN_E7C0 0x0000e7c0 - -#define REG_A5XX_HLSQ_VS_CONSTLEN 0x0000e7c3 - -#define REG_A5XX_HLSQ_VS_INSTRLEN 0x0000e7c4 - -#define REG_A5XX_UNKNOWN_E7C5 0x0000e7c5 - -#define REG_A5XX_HLSQ_HS_CONSTLEN 0x0000e7c8 - -#define REG_A5XX_HLSQ_HS_INSTRLEN 0x0000e7c9 - -#define REG_A5XX_UNKNOWN_E7CA 0x0000e7ca - -#define REG_A5XX_HLSQ_DS_CONSTLEN 0x0000e7cd - -#define REG_A5XX_HLSQ_DS_INSTRLEN 0x0000e7ce - -#define REG_A5XX_UNKNOWN_E7CF 0x0000e7cf - -#define REG_A5XX_HLSQ_GS_CONSTLEN 0x0000e7d2 - -#define REG_A5XX_HLSQ_GS_INSTRLEN 0x0000e7d3 - -#define REG_A5XX_UNKNOWN_E7D4 0x0000e7d4 - -#define REG_A5XX_HLSQ_FS_CONSTLEN 0x0000e7d7 - -#define REG_A5XX_HLSQ_FS_INSTRLEN 0x0000e7d8 - -#define REG_A5XX_UNKNOWN_E7D9 0x0000e7d9 - -#define REG_A5XX_HLSQ_CS_CONSTLEN 0x0000e7dc - -#define REG_A5XX_HLSQ_CS_INSTRLEN 0x0000e7dd - -#define REG_A5XX_RB_2D_BLIT_CNTL 0x00002100 - -#define REG_A5XX_RB_2D_SRC_SOLID_DW0 0x00002101 - -#define REG_A5XX_RB_2D_SRC_SOLID_DW1 0x00002102 - -#define REG_A5XX_RB_2D_SRC_SOLID_DW2 0x00002103 - -#define REG_A5XX_RB_2D_SRC_SOLID_DW3 0x00002104 - -#define REG_A5XX_RB_2D_SRC_INFO 0x00002107 -#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 -#define A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_RB_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK; -} -#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK; -} -#define A5XX_RB_2D_SRC_INFO_FLAGS 0x00001000 - -#define REG_A5XX_RB_2D_SRC_LO 0x00002108 - -#define REG_A5XX_RB_2D_SRC_HI 0x00002109 - -#define REG_A5XX_RB_2D_SRC_SIZE 0x0000210a -#define A5XX_RB_2D_SRC_SIZE_PITCH__MASK 0x0000ffff -#define A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_2D_SRC_SIZE_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_PITCH__MASK; -} -#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK 0xffff0000 -#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT 16 -static inline uint32_t A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_2D_DST_INFO 0x00002110 -#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_RB_2D_DST_INFO_TILE_MODE__MASK 0x00000300 -#define A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_RB_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_DST_INFO_TILE_MODE__MASK; -} -#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK; -} -#define A5XX_RB_2D_DST_INFO_FLAGS 0x00001000 - -#define REG_A5XX_RB_2D_DST_LO 0x00002111 - -#define REG_A5XX_RB_2D_DST_HI 0x00002112 - -#define REG_A5XX_RB_2D_DST_SIZE 0x00002113 -#define A5XX_RB_2D_DST_SIZE_PITCH__MASK 0x0000ffff -#define A5XX_RB_2D_DST_SIZE_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_2D_DST_SIZE_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_PITCH__MASK; -} -#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK 0xffff0000 -#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT 16 -static inline uint32_t A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_2D_SRC_FLAGS_LO 0x00002140 - -#define REG_A5XX_RB_2D_SRC_FLAGS_HI 0x00002141 - -#define REG_A5XX_RB_2D_DST_FLAGS_LO 0x00002143 - -#define REG_A5XX_RB_2D_DST_FLAGS_HI 0x00002144 - -#define REG_A5XX_GRAS_2D_BLIT_CNTL 0x00002180 - -#define REG_A5XX_GRAS_2D_SRC_INFO 0x00002181 -#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 -#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_GRAS_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK; -} -#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK; -} -#define A5XX_GRAS_2D_SRC_INFO_FLAGS 0x00001000 - -#define REG_A5XX_GRAS_2D_DST_INFO 0x00002182 -#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK 0x00000300 -#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_GRAS_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK; -} -#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK; -} -#define A5XX_GRAS_2D_DST_INFO_FLAGS 0x00001000 - -#define REG_A5XX_UNKNOWN_2100 0x00002100 - -#define REG_A5XX_UNKNOWN_2180 0x00002180 - -#define REG_A5XX_UNKNOWN_2184 0x00002184 - -#define REG_A5XX_TEX_SAMP_0 0x00000000 -#define A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 -#define A5XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 -#define A5XX_TEX_SAMP_0_XY_MAG__SHIFT 1 -static inline uint32_t A5XX_TEX_SAMP_0_XY_MAG(enum a5xx_tex_filter val) -{ - return ((val) << A5XX_TEX_SAMP_0_XY_MAG__SHIFT) & A5XX_TEX_SAMP_0_XY_MAG__MASK; -} -#define A5XX_TEX_SAMP_0_XY_MIN__MASK 0x00000018 -#define A5XX_TEX_SAMP_0_XY_MIN__SHIFT 3 -static inline uint32_t A5XX_TEX_SAMP_0_XY_MIN(enum a5xx_tex_filter val) -{ - return ((val) << A5XX_TEX_SAMP_0_XY_MIN__SHIFT) & A5XX_TEX_SAMP_0_XY_MIN__MASK; -} -#define A5XX_TEX_SAMP_0_WRAP_S__MASK 0x000000e0 -#define A5XX_TEX_SAMP_0_WRAP_S__SHIFT 5 -static inline uint32_t A5XX_TEX_SAMP_0_WRAP_S(enum a5xx_tex_clamp val) -{ - return ((val) << A5XX_TEX_SAMP_0_WRAP_S__SHIFT) & A5XX_TEX_SAMP_0_WRAP_S__MASK; -} -#define A5XX_TEX_SAMP_0_WRAP_T__MASK 0x00000700 -#define A5XX_TEX_SAMP_0_WRAP_T__SHIFT 8 -static inline uint32_t A5XX_TEX_SAMP_0_WRAP_T(enum a5xx_tex_clamp val) -{ - return ((val) << A5XX_TEX_SAMP_0_WRAP_T__SHIFT) & A5XX_TEX_SAMP_0_WRAP_T__MASK; -} -#define A5XX_TEX_SAMP_0_WRAP_R__MASK 0x00003800 -#define A5XX_TEX_SAMP_0_WRAP_R__SHIFT 11 -static inline uint32_t A5XX_TEX_SAMP_0_WRAP_R(enum a5xx_tex_clamp val) -{ - return ((val) << A5XX_TEX_SAMP_0_WRAP_R__SHIFT) & A5XX_TEX_SAMP_0_WRAP_R__MASK; -} -#define A5XX_TEX_SAMP_0_ANISO__MASK 0x0001c000 -#define A5XX_TEX_SAMP_0_ANISO__SHIFT 14 -static inline uint32_t A5XX_TEX_SAMP_0_ANISO(enum a5xx_tex_aniso val) -{ - return ((val) << A5XX_TEX_SAMP_0_ANISO__SHIFT) & A5XX_TEX_SAMP_0_ANISO__MASK; -} -#define A5XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 -#define A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 -static inline uint32_t A5XX_TEX_SAMP_0_LOD_BIAS(float val) -{ - return ((((int32_t)(val * 256.0))) << A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A5XX_TEX_SAMP_0_LOD_BIAS__MASK; -} - -#define REG_A5XX_TEX_SAMP_1 0x00000001 -#define A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e -#define A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT 1 -static inline uint32_t A5XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val) -{ - return ((val) << A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK; -} -#define A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 -#define A5XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 -#define A5XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 -#define A5XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 -#define A5XX_TEX_SAMP_1_MAX_LOD__SHIFT 8 -static inline uint32_t A5XX_TEX_SAMP_1_MAX_LOD(float val) -{ - return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A5XX_TEX_SAMP_1_MAX_LOD__MASK; -} -#define A5XX_TEX_SAMP_1_MIN_LOD__MASK 0xfff00000 -#define A5XX_TEX_SAMP_1_MIN_LOD__SHIFT 20 -static inline uint32_t A5XX_TEX_SAMP_1_MIN_LOD(float val) -{ - return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A5XX_TEX_SAMP_1_MIN_LOD__MASK; -} - -#define REG_A5XX_TEX_SAMP_2 0x00000002 -#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK 0xfffffff0 -#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT 4 -static inline uint32_t A5XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val) -{ - return ((val) << A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK; -} - -#define REG_A5XX_TEX_SAMP_3 0x00000003 - -#define REG_A5XX_TEX_CONST_0 0x00000000 -#define A5XX_TEX_CONST_0_TILE_MODE__MASK 0x00000003 -#define A5XX_TEX_CONST_0_TILE_MODE__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_0_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_TEX_CONST_0_TILE_MODE__SHIFT) & A5XX_TEX_CONST_0_TILE_MODE__MASK; -} -#define A5XX_TEX_CONST_0_SRGB 0x00000004 -#define A5XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 -#define A5XX_TEX_CONST_0_SWIZ_X__SHIFT 4 -static inline uint32_t A5XX_TEX_CONST_0_SWIZ_X(enum a5xx_tex_swiz val) -{ - return ((val) << A5XX_TEX_CONST_0_SWIZ_X__SHIFT) & A5XX_TEX_CONST_0_SWIZ_X__MASK; -} -#define A5XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 -#define A5XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 -static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Y(enum a5xx_tex_swiz val) -{ - return ((val) << A5XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Y__MASK; -} -#define A5XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 -#define A5XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 -static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Z(enum a5xx_tex_swiz val) -{ - return ((val) << A5XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Z__MASK; -} -#define A5XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 -#define A5XX_TEX_CONST_0_SWIZ_W__SHIFT 13 -static inline uint32_t A5XX_TEX_CONST_0_SWIZ_W(enum a5xx_tex_swiz val) -{ - return ((val) << A5XX_TEX_CONST_0_SWIZ_W__SHIFT) & A5XX_TEX_CONST_0_SWIZ_W__MASK; -} -#define A5XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 -#define A5XX_TEX_CONST_0_MIPLVLS__SHIFT 16 -static inline uint32_t A5XX_TEX_CONST_0_MIPLVLS(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_0_MIPLVLS__SHIFT) & A5XX_TEX_CONST_0_MIPLVLS__MASK; -} -#define A5XX_TEX_CONST_0_SAMPLES__MASK 0x00300000 -#define A5XX_TEX_CONST_0_SAMPLES__SHIFT 20 -static inline uint32_t A5XX_TEX_CONST_0_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_TEX_CONST_0_SAMPLES__SHIFT) & A5XX_TEX_CONST_0_SAMPLES__MASK; -} -#define A5XX_TEX_CONST_0_FMT__MASK 0x3fc00000 -#define A5XX_TEX_CONST_0_FMT__SHIFT 22 -static inline uint32_t A5XX_TEX_CONST_0_FMT(enum a5xx_tex_fmt val) -{ - return ((val) << A5XX_TEX_CONST_0_FMT__SHIFT) & A5XX_TEX_CONST_0_FMT__MASK; -} -#define A5XX_TEX_CONST_0_SWAP__MASK 0xc0000000 -#define A5XX_TEX_CONST_0_SWAP__SHIFT 30 -static inline uint32_t A5XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_TEX_CONST_0_SWAP__SHIFT) & A5XX_TEX_CONST_0_SWAP__MASK; -} - -#define REG_A5XX_TEX_CONST_1 0x00000001 -#define A5XX_TEX_CONST_1_WIDTH__MASK 0x00007fff -#define A5XX_TEX_CONST_1_WIDTH__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_1_WIDTH(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_1_WIDTH__SHIFT) & A5XX_TEX_CONST_1_WIDTH__MASK; -} -#define A5XX_TEX_CONST_1_HEIGHT__MASK 0x3fff8000 -#define A5XX_TEX_CONST_1_HEIGHT__SHIFT 15 -static inline uint32_t A5XX_TEX_CONST_1_HEIGHT(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_1_HEIGHT__SHIFT) & A5XX_TEX_CONST_1_HEIGHT__MASK; -} - -#define REG_A5XX_TEX_CONST_2 0x00000002 -#define A5XX_TEX_CONST_2_FETCHSIZE__MASK 0x0000000f -#define A5XX_TEX_CONST_2_FETCHSIZE__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_2_FETCHSIZE(enum a5xx_tex_fetchsize val) -{ - return ((val) << A5XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A5XX_TEX_CONST_2_FETCHSIZE__MASK; -} -#define A5XX_TEX_CONST_2_PITCH__MASK 0x1fffff80 -#define A5XX_TEX_CONST_2_PITCH__SHIFT 7 -static inline uint32_t A5XX_TEX_CONST_2_PITCH(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_2_PITCH__SHIFT) & A5XX_TEX_CONST_2_PITCH__MASK; -} -#define A5XX_TEX_CONST_2_TYPE__MASK 0x60000000 -#define A5XX_TEX_CONST_2_TYPE__SHIFT 29 -static inline uint32_t A5XX_TEX_CONST_2_TYPE(enum a5xx_tex_type val) -{ - return ((val) << A5XX_TEX_CONST_2_TYPE__SHIFT) & A5XX_TEX_CONST_2_TYPE__MASK; -} - -#define REG_A5XX_TEX_CONST_3 0x00000003 -#define A5XX_TEX_CONST_3_ARRAY_PITCH__MASK 0x00003fff -#define A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A5XX_TEX_CONST_3_ARRAY_PITCH__MASK; -} -#define A5XX_TEX_CONST_3_FLAG 0x10000000 - -#define REG_A5XX_TEX_CONST_4 0x00000004 -#define A5XX_TEX_CONST_4_BASE_LO__MASK 0xffffffe0 -#define A5XX_TEX_CONST_4_BASE_LO__SHIFT 5 -static inline uint32_t A5XX_TEX_CONST_4_BASE_LO(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A5XX_TEX_CONST_4_BASE_LO__SHIFT) & A5XX_TEX_CONST_4_BASE_LO__MASK; -} - -#define REG_A5XX_TEX_CONST_5 0x00000005 -#define A5XX_TEX_CONST_5_BASE_HI__MASK 0x0001ffff -#define A5XX_TEX_CONST_5_BASE_HI__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_5_BASE_HI(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_5_BASE_HI__SHIFT) & A5XX_TEX_CONST_5_BASE_HI__MASK; -} -#define A5XX_TEX_CONST_5_DEPTH__MASK 0x3ffe0000 -#define A5XX_TEX_CONST_5_DEPTH__SHIFT 17 -static inline uint32_t A5XX_TEX_CONST_5_DEPTH(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_5_DEPTH__SHIFT) & A5XX_TEX_CONST_5_DEPTH__MASK; -} - -#define REG_A5XX_TEX_CONST_6 0x00000006 - -#define REG_A5XX_TEX_CONST_7 0x00000007 - -#define REG_A5XX_TEX_CONST_8 0x00000008 - -#define REG_A5XX_TEX_CONST_9 0x00000009 - -#define REG_A5XX_TEX_CONST_10 0x0000000a - -#define REG_A5XX_TEX_CONST_11 0x0000000b - -#define REG_A5XX_SSBO_0_0 0x00000000 -#define A5XX_SSBO_0_0_BASE_LO__MASK 0xffffffe0 -#define A5XX_SSBO_0_0_BASE_LO__SHIFT 5 -static inline uint32_t A5XX_SSBO_0_0_BASE_LO(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A5XX_SSBO_0_0_BASE_LO__SHIFT) & A5XX_SSBO_0_0_BASE_LO__MASK; -} - -#define REG_A5XX_SSBO_0_1 0x00000001 -#define A5XX_SSBO_0_1_PITCH__MASK 0x003fffff -#define A5XX_SSBO_0_1_PITCH__SHIFT 0 -static inline uint32_t A5XX_SSBO_0_1_PITCH(uint32_t val) -{ - return ((val) << A5XX_SSBO_0_1_PITCH__SHIFT) & A5XX_SSBO_0_1_PITCH__MASK; -} - -#define REG_A5XX_SSBO_0_2 0x00000002 -#define A5XX_SSBO_0_2_ARRAY_PITCH__MASK 0x03fff000 -#define A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT 12 -static inline uint32_t A5XX_SSBO_0_2_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A5XX_SSBO_0_2_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_SSBO_0_3 0x00000003 -#define A5XX_SSBO_0_3_CPP__MASK 0x0000003f -#define A5XX_SSBO_0_3_CPP__SHIFT 0 -static inline uint32_t A5XX_SSBO_0_3_CPP(uint32_t val) -{ - return ((val) << A5XX_SSBO_0_3_CPP__SHIFT) & A5XX_SSBO_0_3_CPP__MASK; -} - -#define REG_A5XX_SSBO_1_0 0x00000000 -#define A5XX_SSBO_1_0_FMT__MASK 0x0000ff00 -#define A5XX_SSBO_1_0_FMT__SHIFT 8 -static inline uint32_t A5XX_SSBO_1_0_FMT(enum a5xx_tex_fmt val) -{ - return ((val) << A5XX_SSBO_1_0_FMT__SHIFT) & A5XX_SSBO_1_0_FMT__MASK; -} -#define A5XX_SSBO_1_0_WIDTH__MASK 0xffff0000 -#define A5XX_SSBO_1_0_WIDTH__SHIFT 16 -static inline uint32_t A5XX_SSBO_1_0_WIDTH(uint32_t val) -{ - return ((val) << A5XX_SSBO_1_0_WIDTH__SHIFT) & A5XX_SSBO_1_0_WIDTH__MASK; -} - -#define REG_A5XX_SSBO_1_1 0x00000001 -#define A5XX_SSBO_1_1_HEIGHT__MASK 0x0000ffff -#define A5XX_SSBO_1_1_HEIGHT__SHIFT 0 -static inline uint32_t A5XX_SSBO_1_1_HEIGHT(uint32_t val) -{ - return ((val) << A5XX_SSBO_1_1_HEIGHT__SHIFT) & A5XX_SSBO_1_1_HEIGHT__MASK; -} -#define A5XX_SSBO_1_1_DEPTH__MASK 0xffff0000 -#define A5XX_SSBO_1_1_DEPTH__SHIFT 16 -static inline uint32_t A5XX_SSBO_1_1_DEPTH(uint32_t val) -{ - return ((val) << A5XX_SSBO_1_1_DEPTH__SHIFT) & A5XX_SSBO_1_1_DEPTH__MASK; -} - -#define REG_A5XX_SSBO_2_0 0x00000000 -#define A5XX_SSBO_2_0_BASE_LO__MASK 0xffffffff -#define A5XX_SSBO_2_0_BASE_LO__SHIFT 0 -static inline uint32_t A5XX_SSBO_2_0_BASE_LO(uint32_t val) -{ - return ((val) << A5XX_SSBO_2_0_BASE_LO__SHIFT) & A5XX_SSBO_2_0_BASE_LO__MASK; -} - -#define REG_A5XX_SSBO_2_1 0x00000001 -#define A5XX_SSBO_2_1_BASE_HI__MASK 0xffffffff -#define A5XX_SSBO_2_1_BASE_HI__SHIFT 0 -static inline uint32_t A5XX_SSBO_2_1_BASE_HI(uint32_t val) -{ - return ((val) << A5XX_SSBO_2_1_BASE_HI__SHIFT) & A5XX_SSBO_2_1_BASE_HI__MASK; -} - - -#endif /* A5XX_XML */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c 2019-03-31 23:16:37.000000000 +0000 @@ -122,7 +122,8 @@ debug_assert(info->dst.box.height >= 0); debug_assert(info->dst.box.depth >= 0); - if (info->dst.resource->nr_samples + info->src.resource->nr_samples) + if ((info->dst.resource->nr_samples > 1) || + (info->src.resource->nr_samples > 1)) return false; if (info->scissor_enable) @@ -449,14 +450,13 @@ } } -void +bool fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info) { struct fd_batch *batch; if (!can_do_blit(info)) { - fd_blitter_blit(ctx, info); - return; + return false; } batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true); @@ -482,6 +482,8 @@ batch->needs_flush = true; fd_batch_flush(batch, false, false); + + return true; } unsigned diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h 2018-01-17 14:10:45.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_blitter.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,7 +31,7 @@ #include "freedreno_context.h" -void fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info); +bool fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info); unsigned fd5_tile_mode(const struct pipe_resource *tmpl); #endif /* FD5_BLIT_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_context.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -48,8 +48,6 @@ fd_context_destroy(pctx); - fd_bo_del(fd5_ctx->vs_pvt_mem); - fd_bo_del(fd5_ctx->fs_pvt_mem); fd_bo_del(fd5_ctx->vsc_size_mem); fd_bo_del(fd5_ctx->blit_mem); @@ -105,17 +103,11 @@ util_blitter_set_texture_multisample(fd5_ctx->base.blitter, true); - fd5_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, - DRM_FREEDRENO_GEM_TYPE_KMEM); - - fd5_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, - DRM_FREEDRENO_GEM_TYPE_KMEM); - fd5_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_size"); fd5_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "blit"); fd_context_setup_common_vbos(&fd5_ctx->base); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_context.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_context.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,13 +31,11 @@ #include "freedreno_context.h" -#include "ir3_shader.h" +#include "ir3/ir3_shader.h" struct fd5_context { struct fd_context base; - struct fd_bo *vs_pvt_mem, *fs_pvt_mem; - /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We * could combine it with another allocation. */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_emit.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_emit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_emit.c 2019-03-31 23:16:37.000000000 +0000 @@ -51,7 +51,7 @@ * sizedwords: size of const value buffer */ static void -fd5_emit_const(struct fd_ringbuffer *ring, enum shader_t type, +fd5_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { @@ -90,7 +90,7 @@ } static void -fd5_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, +fd5_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { uint32_t anum = align(num, 2); @@ -704,7 +704,7 @@ if (!emit->binning_pass) ir3_emit_fs_consts(fp, ring, ctx); - struct pipe_stream_output_info *info = &vp->shader->stream_output; + struct ir3_stream_output_info *info = &vp->shader->stream_output; if (info->num_outputs) { struct fd_streamout_stateobj *so = &ctx->streamout; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_emit.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_emit.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_emit.h 2019-03-31 23:16:37.000000000 +0000 @@ -34,7 +34,7 @@ #include "fd5_format.h" #include "fd5_program.h" #include "fd5_screen.h" -#include "ir3_shader.h" +#include "ir3_gallium.h" struct fd_ringbuffer; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c 2019-03-31 23:16:37.000000000 +0000 @@ -290,7 +290,7 @@ struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; if (!pipe->bo) { pipe->bo = fd_bo_new(ctx->dev, 0x20000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_pipe[%u]", i); } OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */ } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_program.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -42,7 +42,7 @@ static struct ir3_shader * create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, - enum shader_t type) + gl_shader_stage type) { struct fd_context *ctx = fd_context(pctx); struct ir3_compiler *compiler = ctx->screen->compiler; @@ -53,7 +53,7 @@ fd5_fp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); + return create_shader_stateobj(pctx, cso, MESA_SHADER_FRAGMENT); } static void @@ -67,7 +67,7 @@ fd5_vp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader_stateobj(pctx, cso, SHADER_VERTEX); + return create_shader_stateobj(pctx, cso, MESA_SHADER_VERTEX); } static void @@ -105,7 +105,7 @@ CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER)); OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); } else { - OUT_RELOC(ring, so->bo, 0, + OUT_RELOCD(ring, so->bo, 0, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0); } @@ -125,14 +125,14 @@ static void link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v) { - const struct pipe_stream_output_info *strmout = &v->shader->stream_output; + const struct ir3_stream_output_info *strmout = &v->shader->stream_output; /* * First, any stream-out varyings not already in linkage map (ie. also * consumed by frag shader) need to be added: */ for (unsigned i = 0; i < strmout->num_outputs; i++) { - const struct pipe_stream_output *out = &strmout->output[i]; + const struct ir3_stream_output *out = &strmout->output[i]; unsigned k = out->register_index; unsigned compmask = (1 << (out->num_components + out->start_component)) - 1; @@ -173,14 +173,14 @@ emit_stream_out(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, struct ir3_shader_linkage *l) { - const struct pipe_stream_output_info *strmout = &v->shader->stream_output; + const struct ir3_stream_output_info *strmout = &v->shader->stream_output; unsigned ncomp[PIPE_MAX_SO_BUFFERS] = {0}; unsigned prog[align(l->max_loc, 2) / 2]; memset(prog, 0, sizeof(prog)); for (unsigned i = 0; i < strmout->num_outputs; i++) { - const struct pipe_stream_output *out = &strmout->output[i]; + const struct ir3_stream_output *out = &strmout->output[i]; unsigned k = out->register_index; unsigned idx; @@ -443,7 +443,7 @@ OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | 0x6 | /* XXX seems to be always set? */ - A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) | COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; @@ -567,7 +567,7 @@ A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | - A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) | COND(s[FS].v->num_samp > 0, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_program.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_program.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_program.h 2019-03-31 23:16:37.000000000 +0000 @@ -29,7 +29,8 @@ #include "pipe/p_context.h" #include "freedreno_context.h" -#include "ir3_shader.h" + +#include "ir3/ir3_shader.h" struct fd5_emit; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_screen.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a5xx/fd5_screen.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a5xx/fd5_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -33,7 +33,7 @@ #include "fd5_format.h" #include "fd5_resource.h" -#include "ir3_compiler.h" +#include "ir3/ir3_compiler.h" static bool valid_sample_count(unsigned sample_count) diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/a6xx.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/a6xx.xml.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/a6xx.xml.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/a6xx.xml.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,5447 +0,0 @@ -#ifndef A6XX_XML -#define A6XX_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/freedreno/envytools/ -git clone https://github.com/freedreno/envytools.git - -The rules-ng-ng source files this header was generated from are: -- /work/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-10 14:59:32) -- /work/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-10 14:59:32) -- /work/envytools/rnndb/adreno/a2xx.xml ( 37936 bytes, from 2018-10-08 20:10:47) -- /work/envytools/rnndb/adreno/adreno_common.xml ( 14201 bytes, from 2018-10-08 20:10:47) -- /work/envytools/rnndb/adreno/adreno_pm4.xml ( 42864 bytes, from 2018-10-08 20:14:26) -- /work/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-10 14:59:32) -- /work/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-10 14:59:32) -- /work/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-09-28 22:41:49) -- /work/envytools/rnndb/adreno/a6xx.xml ( 140642 bytes, from 2018-10-12 21:46:25) -- /work/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-28 22:41:49) -- /work/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-10 14:59:32) - -Copyright (C) 2013-2018 by the following authors: -- Rob Clark (robclark) -- Ilia Mirkin (imirkin) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -enum a6xx_color_fmt { - RB6_A8_UNORM = 2, - RB6_R8_UNORM = 3, - RB6_R8_SNORM = 4, - RB6_R8_UINT = 5, - RB6_R8_SINT = 6, - RB6_R4G4B4A4_UNORM = 8, - RB6_R5G5B5A1_UNORM = 10, - RB6_R5G6B5_UNORM = 14, - RB6_R8G8_UNORM = 15, - RB6_R8G8_SNORM = 16, - RB6_R8G8_UINT = 17, - RB6_R8G8_SINT = 18, - RB6_R16_UNORM = 21, - RB6_R16_SNORM = 22, - RB6_R16_FLOAT = 23, - RB6_R16_UINT = 24, - RB6_R16_SINT = 25, - RB6_R8G8B8A8_UNORM = 48, - RB6_R8G8B8_UNORM = 49, - RB6_R8G8B8A8_SNORM = 50, - RB6_R8G8B8A8_UINT = 51, - RB6_R8G8B8A8_SINT = 52, - RB6_R10G10B10A2_UNORM = 55, - RB6_R10G10B10A2_UINT = 58, - RB6_R11G11B10_FLOAT = 66, - RB6_R16G16_UNORM = 67, - RB6_R16G16_SNORM = 68, - RB6_R16G16_FLOAT = 69, - RB6_R16G16_UINT = 70, - RB6_R16G16_SINT = 71, - RB6_R32_FLOAT = 74, - RB6_R32_UINT = 75, - RB6_R32_SINT = 76, - RB6_R16G16B16A16_UNORM = 96, - RB6_R16G16B16A16_SNORM = 97, - RB6_R16G16B16A16_FLOAT = 98, - RB6_R16G16B16A16_UINT = 99, - RB6_R16G16B16A16_SINT = 100, - RB6_R32G32_FLOAT = 103, - RB6_R32G32_UINT = 104, - RB6_R32G32_SINT = 105, - RB6_R32G32B32A32_FLOAT = 130, - RB6_R32G32B32A32_UINT = 131, - RB6_R32G32B32A32_SINT = 132, - RB6_X8Z24_UNORM = 160, -}; - -enum a6xx_tile_mode { - TILE6_LINEAR = 0, - TILE6_2 = 2, - TILE6_3 = 3, -}; - -enum a6xx_vtx_fmt { - VFMT6_8_UNORM = 3, - VFMT6_8_SNORM = 4, - VFMT6_8_UINT = 5, - VFMT6_8_SINT = 6, - VFMT6_8_8_UNORM = 15, - VFMT6_8_8_SNORM = 16, - VFMT6_8_8_UINT = 17, - VFMT6_8_8_SINT = 18, - VFMT6_16_UNORM = 21, - VFMT6_16_SNORM = 22, - VFMT6_16_FLOAT = 23, - VFMT6_16_UINT = 24, - VFMT6_16_SINT = 25, - VFMT6_8_8_8_UNORM = 33, - VFMT6_8_8_8_SNORM = 34, - VFMT6_8_8_8_UINT = 35, - VFMT6_8_8_8_SINT = 36, - VFMT6_8_8_8_8_UNORM = 48, - VFMT6_8_8_8_8_SNORM = 50, - VFMT6_8_8_8_8_UINT = 51, - VFMT6_8_8_8_8_SINT = 52, - VFMT6_10_10_10_2_UNORM = 54, - VFMT6_10_10_10_2_SNORM = 57, - VFMT6_10_10_10_2_UINT = 58, - VFMT6_10_10_10_2_SINT = 59, - VFMT6_11_11_10_FLOAT = 66, - VFMT6_16_16_UNORM = 67, - VFMT6_16_16_SNORM = 68, - VFMT6_16_16_FLOAT = 69, - VFMT6_16_16_UINT = 70, - VFMT6_16_16_SINT = 71, - VFMT6_32_UNORM = 72, - VFMT6_32_SNORM = 73, - VFMT6_32_FLOAT = 74, - VFMT6_32_UINT = 75, - VFMT6_32_SINT = 76, - VFMT6_32_FIXED = 77, - VFMT6_16_16_16_UNORM = 88, - VFMT6_16_16_16_SNORM = 89, - VFMT6_16_16_16_FLOAT = 90, - VFMT6_16_16_16_UINT = 91, - VFMT6_16_16_16_SINT = 92, - VFMT6_16_16_16_16_UNORM = 96, - VFMT6_16_16_16_16_SNORM = 97, - VFMT6_16_16_16_16_FLOAT = 98, - VFMT6_16_16_16_16_UINT = 99, - VFMT6_16_16_16_16_SINT = 100, - VFMT6_32_32_UNORM = 101, - VFMT6_32_32_SNORM = 102, - VFMT6_32_32_FLOAT = 103, - VFMT6_32_32_UINT = 104, - VFMT6_32_32_SINT = 105, - VFMT6_32_32_FIXED = 106, - VFMT6_32_32_32_UNORM = 112, - VFMT6_32_32_32_SNORM = 113, - VFMT6_32_32_32_UINT = 114, - VFMT6_32_32_32_SINT = 115, - VFMT6_32_32_32_FLOAT = 116, - VFMT6_32_32_32_FIXED = 117, - VFMT6_32_32_32_32_UNORM = 128, - VFMT6_32_32_32_32_SNORM = 129, - VFMT6_32_32_32_32_FLOAT = 130, - VFMT6_32_32_32_32_UINT = 131, - VFMT6_32_32_32_32_SINT = 132, - VFMT6_32_32_32_32_FIXED = 133, -}; - -enum a6xx_tex_fmt { - TFMT6_A8_UNORM = 2, - TFMT6_8_UNORM = 3, - TFMT6_8_SNORM = 4, - TFMT6_8_UINT = 5, - TFMT6_8_SINT = 6, - TFMT6_4_4_4_4_UNORM = 8, - TFMT6_5_5_5_1_UNORM = 10, - TFMT6_5_6_5_UNORM = 14, - TFMT6_8_8_UNORM = 15, - TFMT6_8_8_SNORM = 16, - TFMT6_8_8_UINT = 17, - TFMT6_8_8_SINT = 18, - TFMT6_L8_A8_UNORM = 19, - TFMT6_16_UNORM = 21, - TFMT6_16_SNORM = 22, - TFMT6_16_FLOAT = 23, - TFMT6_16_UINT = 24, - TFMT6_16_SINT = 25, - TFMT6_8_8_8_8_UNORM = 48, - TFMT6_8_8_8_UNORM = 49, - TFMT6_8_8_8_8_SNORM = 50, - TFMT6_8_8_8_8_UINT = 51, - TFMT6_8_8_8_8_SINT = 52, - TFMT6_9_9_9_E5_FLOAT = 53, - TFMT6_10_10_10_2_UNORM = 54, - TFMT6_10_10_10_2_UINT = 58, - TFMT6_11_11_10_FLOAT = 66, - TFMT6_16_16_UNORM = 67, - TFMT6_16_16_SNORM = 68, - TFMT6_16_16_FLOAT = 69, - TFMT6_16_16_UINT = 70, - TFMT6_16_16_SINT = 71, - TFMT6_32_FLOAT = 74, - TFMT6_32_UINT = 75, - TFMT6_32_SINT = 76, - TFMT6_16_16_16_16_UNORM = 96, - TFMT6_16_16_16_16_SNORM = 97, - TFMT6_16_16_16_16_FLOAT = 98, - TFMT6_16_16_16_16_UINT = 99, - TFMT6_16_16_16_16_SINT = 100, - TFMT6_32_32_FLOAT = 103, - TFMT6_32_32_UINT = 104, - TFMT6_32_32_SINT = 105, - TFMT6_32_32_32_UINT = 114, - TFMT6_32_32_32_SINT = 115, - TFMT6_32_32_32_FLOAT = 116, - TFMT6_32_32_32_32_FLOAT = 130, - TFMT6_32_32_32_32_UINT = 131, - TFMT6_32_32_32_32_SINT = 132, - TFMT6_X8Z24_UNORM = 160, - TFMT6_ETC2_RG11_UNORM = 171, - TFMT6_ETC2_RG11_SNORM = 172, - TFMT6_ETC2_R11_UNORM = 173, - TFMT6_ETC2_R11_SNORM = 174, - TFMT6_ETC1 = 175, - TFMT6_ETC2_RGB8 = 176, - TFMT6_ETC2_RGBA8 = 177, - TFMT6_ETC2_RGB8A1 = 178, - TFMT6_DXT1 = 179, - TFMT6_DXT3 = 180, - TFMT6_DXT5 = 181, - TFMT6_RGTC1_UNORM = 183, - TFMT6_RGTC1_SNORM = 184, - TFMT6_RGTC2_UNORM = 187, - TFMT6_RGTC2_SNORM = 188, - TFMT6_BPTC_UFLOAT = 190, - TFMT6_BPTC_FLOAT = 191, - TFMT6_BPTC = 192, - TFMT6_ASTC_4x4 = 193, - TFMT6_ASTC_5x4 = 194, - TFMT6_ASTC_5x5 = 195, - TFMT6_ASTC_6x5 = 196, - TFMT6_ASTC_6x6 = 197, - TFMT6_ASTC_8x5 = 198, - TFMT6_ASTC_8x6 = 199, - TFMT6_ASTC_8x8 = 200, - TFMT6_ASTC_10x5 = 201, - TFMT6_ASTC_10x6 = 202, - TFMT6_ASTC_10x8 = 203, - TFMT6_ASTC_10x10 = 204, - TFMT6_ASTC_12x10 = 205, - TFMT6_ASTC_12x12 = 206, -}; - -enum a6xx_tex_fetchsize { - TFETCH6_1_BYTE = 0, - TFETCH6_2_BYTE = 1, - TFETCH6_4_BYTE = 2, - TFETCH6_8_BYTE = 3, - TFETCH6_16_BYTE = 4, -}; - -enum a6xx_depth_format { - DEPTH6_NONE = 0, - DEPTH6_16 = 1, - DEPTH6_24_8 = 2, - DEPTH6_32 = 4, -}; - -enum a6xx_shader_id { - A6XX_TP0_TMO_DATA = 9, - A6XX_TP0_SMO_DATA = 10, - A6XX_TP0_MIPMAP_BASE_DATA = 11, - A6XX_TP1_TMO_DATA = 25, - A6XX_TP1_SMO_DATA = 26, - A6XX_TP1_MIPMAP_BASE_DATA = 27, - A6XX_SP_INST_DATA = 41, - A6XX_SP_LB_0_DATA = 42, - A6XX_SP_LB_1_DATA = 43, - A6XX_SP_LB_2_DATA = 44, - A6XX_SP_LB_3_DATA = 45, - A6XX_SP_LB_4_DATA = 46, - A6XX_SP_LB_5_DATA = 47, - A6XX_SP_CB_BINDLESS_DATA = 48, - A6XX_SP_CB_LEGACY_DATA = 49, - A6XX_SP_UAV_DATA = 50, - A6XX_SP_INST_TAG = 51, - A6XX_SP_CB_BINDLESS_TAG = 52, - A6XX_SP_TMO_UMO_TAG = 53, - A6XX_SP_SMO_TAG = 54, - A6XX_SP_STATE_DATA = 55, - A6XX_HLSQ_CHUNK_CVS_RAM = 73, - A6XX_HLSQ_CHUNK_CPS_RAM = 74, - A6XX_HLSQ_CHUNK_CVS_RAM_TAG = 75, - A6XX_HLSQ_CHUNK_CPS_RAM_TAG = 76, - A6XX_HLSQ_ICB_CVS_CB_BASE_TAG = 77, - A6XX_HLSQ_ICB_CPS_CB_BASE_TAG = 78, - A6XX_HLSQ_CVS_MISC_RAM = 80, - A6XX_HLSQ_CPS_MISC_RAM = 81, - A6XX_HLSQ_INST_RAM = 82, - A6XX_HLSQ_GFX_CVS_CONST_RAM = 83, - A6XX_HLSQ_GFX_CPS_CONST_RAM = 84, - A6XX_HLSQ_CVS_MISC_RAM_TAG = 85, - A6XX_HLSQ_CPS_MISC_RAM_TAG = 86, - A6XX_HLSQ_INST_RAM_TAG = 87, - A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG = 88, - A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG = 89, - A6XX_HLSQ_PWR_REST_RAM = 90, - A6XX_HLSQ_PWR_REST_TAG = 91, - A6XX_HLSQ_DATAPATH_META = 96, - A6XX_HLSQ_FRONTEND_META = 97, - A6XX_HLSQ_INDIRECT_META = 98, - A6XX_HLSQ_BACKEND_META = 99, -}; - -enum a6xx_debugbus_id { - A6XX_DBGBUS_CP = 1, - A6XX_DBGBUS_RBBM = 2, - A6XX_DBGBUS_VBIF = 3, - A6XX_DBGBUS_HLSQ = 4, - A6XX_DBGBUS_UCHE = 5, - A6XX_DBGBUS_DPM = 6, - A6XX_DBGBUS_TESS = 7, - A6XX_DBGBUS_PC = 8, - A6XX_DBGBUS_VFDP = 9, - A6XX_DBGBUS_VPC = 10, - A6XX_DBGBUS_TSE = 11, - A6XX_DBGBUS_RAS = 12, - A6XX_DBGBUS_VSC = 13, - A6XX_DBGBUS_COM = 14, - A6XX_DBGBUS_LRZ = 16, - A6XX_DBGBUS_A2D = 17, - A6XX_DBGBUS_CCUFCHE = 18, - A6XX_DBGBUS_GMU_CX = 19, - A6XX_DBGBUS_RBP = 20, - A6XX_DBGBUS_DCS = 21, - A6XX_DBGBUS_DBGC = 22, - A6XX_DBGBUS_CX = 23, - A6XX_DBGBUS_GMU_GX = 24, - A6XX_DBGBUS_TPFCHE = 25, - A6XX_DBGBUS_GBIF_GX = 26, - A6XX_DBGBUS_GPC = 29, - A6XX_DBGBUS_LARC = 30, - A6XX_DBGBUS_HLSQ_SPTP = 31, - A6XX_DBGBUS_RB_0 = 32, - A6XX_DBGBUS_RB_1 = 33, - A6XX_DBGBUS_UCHE_WRAPPER = 36, - A6XX_DBGBUS_CCU_0 = 40, - A6XX_DBGBUS_CCU_1 = 41, - A6XX_DBGBUS_VFD_0 = 56, - A6XX_DBGBUS_VFD_1 = 57, - A6XX_DBGBUS_VFD_2 = 58, - A6XX_DBGBUS_VFD_3 = 59, - A6XX_DBGBUS_SP_0 = 64, - A6XX_DBGBUS_SP_1 = 65, - A6XX_DBGBUS_TPL1_0 = 72, - A6XX_DBGBUS_TPL1_1 = 73, - A6XX_DBGBUS_TPL1_2 = 74, - A6XX_DBGBUS_TPL1_3 = 75, -}; - -enum a6xx_cp_perfcounter_select { - PERF_CP_ALWAYS_COUNT = 0, - PERF_CP_BUSY_GFX_CORE_IDLE = 1, - PERF_CP_BUSY_CYCLES = 2, - PERF_CP_NUM_PREEMPTIONS = 3, - PERF_CP_PREEMPTION_REACTION_DELAY = 4, - PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 5, - PERF_CP_PREEMPTION_SWITCH_IN_TIME = 6, - PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 7, - PERF_CP_PREDICATED_DRAWS_KILLED = 8, - PERF_CP_MODE_SWITCH = 9, - PERF_CP_ZPASS_DONE = 10, - PERF_CP_CONTEXT_DONE = 11, - PERF_CP_CACHE_FLUSH = 12, - PERF_CP_LONG_PREEMPTIONS = 13, - PERF_CP_SQE_I_CACHE_STARVE = 14, - PERF_CP_SQE_IDLE = 15, - PERF_CP_SQE_PM4_STARVE_RB_IB = 16, - PERF_CP_SQE_PM4_STARVE_SDS = 17, - PERF_CP_SQE_MRB_STARVE = 18, - PERF_CP_SQE_RRB_STARVE = 19, - PERF_CP_SQE_VSD_STARVE = 20, - PERF_CP_VSD_DECODE_STARVE = 21, - PERF_CP_SQE_PIPE_OUT_STALL = 22, - PERF_CP_SQE_SYNC_STALL = 23, - PERF_CP_SQE_PM4_WFI_STALL = 24, - PERF_CP_SQE_SYS_WFI_STALL = 25, - PERF_CP_SQE_T4_EXEC = 26, - PERF_CP_SQE_LOAD_STATE_EXEC = 27, - PERF_CP_SQE_SAVE_SDS_STATE = 28, - PERF_CP_SQE_DRAW_EXEC = 29, - PERF_CP_SQE_CTXT_REG_BUNCH_EXEC = 30, - PERF_CP_SQE_EXEC_PROFILED = 31, - PERF_CP_MEMORY_POOL_EMPTY = 32, - PERF_CP_MEMORY_POOL_SYNC_STALL = 33, - PERF_CP_MEMORY_POOL_ABOVE_THRESH = 34, - PERF_CP_AHB_WR_STALL_PRE_DRAWS = 35, - PERF_CP_AHB_STALL_SQE_GMU = 36, - PERF_CP_AHB_STALL_SQE_WR_OTHER = 37, - PERF_CP_AHB_STALL_SQE_RD_OTHER = 38, - PERF_CP_CLUSTER0_EMPTY = 39, - PERF_CP_CLUSTER1_EMPTY = 40, - PERF_CP_CLUSTER2_EMPTY = 41, - PERF_CP_CLUSTER3_EMPTY = 42, - PERF_CP_CLUSTER4_EMPTY = 43, - PERF_CP_CLUSTER5_EMPTY = 44, - PERF_CP_PM4_DATA = 45, - PERF_CP_PM4_HEADERS = 46, - PERF_CP_VBIF_READ_BEATS = 47, - PERF_CP_VBIF_WRITE_BEATS = 48, - PERF_CP_SQE_INSTR_COUNTER = 49, -}; - -enum a6xx_rbbm_perfcounter_select { - PERF_RBBM_ALWAYS_COUNT = 0, - PERF_RBBM_ALWAYS_ON = 1, - PERF_RBBM_TSE_BUSY = 2, - PERF_RBBM_RAS_BUSY = 3, - PERF_RBBM_PC_DCALL_BUSY = 4, - PERF_RBBM_PC_VSD_BUSY = 5, - PERF_RBBM_STATUS_MASKED = 6, - PERF_RBBM_COM_BUSY = 7, - PERF_RBBM_DCOM_BUSY = 8, - PERF_RBBM_VBIF_BUSY = 9, - PERF_RBBM_VSC_BUSY = 10, - PERF_RBBM_TESS_BUSY = 11, - PERF_RBBM_UCHE_BUSY = 12, - PERF_RBBM_HLSQ_BUSY = 13, -}; - -enum a6xx_pc_perfcounter_select { - PERF_PC_BUSY_CYCLES = 0, - PERF_PC_WORKING_CYCLES = 1, - PERF_PC_STALL_CYCLES_VFD = 2, - PERF_PC_STALL_CYCLES_TSE = 3, - PERF_PC_STALL_CYCLES_VPC = 4, - PERF_PC_STALL_CYCLES_UCHE = 5, - PERF_PC_STALL_CYCLES_TESS = 6, - PERF_PC_STALL_CYCLES_TSE_ONLY = 7, - PERF_PC_STALL_CYCLES_VPC_ONLY = 8, - PERF_PC_PASS1_TF_STALL_CYCLES = 9, - PERF_PC_STARVE_CYCLES_FOR_INDEX = 10, - PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11, - PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12, - PERF_PC_STARVE_CYCLES_FOR_POSITION = 13, - PERF_PC_STARVE_CYCLES_DI = 14, - PERF_PC_VIS_STREAMS_LOADED = 15, - PERF_PC_INSTANCES = 16, - PERF_PC_VPC_PRIMITIVES = 17, - PERF_PC_DEAD_PRIM = 18, - PERF_PC_LIVE_PRIM = 19, - PERF_PC_VERTEX_HITS = 20, - PERF_PC_IA_VERTICES = 21, - PERF_PC_IA_PRIMITIVES = 22, - PERF_PC_GS_PRIMITIVES = 23, - PERF_PC_HS_INVOCATIONS = 24, - PERF_PC_DS_INVOCATIONS = 25, - PERF_PC_VS_INVOCATIONS = 26, - PERF_PC_GS_INVOCATIONS = 27, - PERF_PC_DS_PRIMITIVES = 28, - PERF_PC_VPC_POS_DATA_TRANSACTION = 29, - PERF_PC_3D_DRAWCALLS = 30, - PERF_PC_2D_DRAWCALLS = 31, - PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32, - PERF_TESS_BUSY_CYCLES = 33, - PERF_TESS_WORKING_CYCLES = 34, - PERF_TESS_STALL_CYCLES_PC = 35, - PERF_TESS_STARVE_CYCLES_PC = 36, - PERF_PC_TSE_TRANSACTION = 37, - PERF_PC_TSE_VERTEX = 38, - PERF_PC_TESS_PC_UV_TRANS = 39, - PERF_PC_TESS_PC_UV_PATCHES = 40, - PERF_PC_TESS_FACTOR_TRANS = 41, -}; - -enum a6xx_vfd_perfcounter_select { - PERF_VFD_BUSY_CYCLES = 0, - PERF_VFD_STALL_CYCLES_UCHE = 1, - PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2, - PERF_VFD_STALL_CYCLES_SP_INFO = 3, - PERF_VFD_STALL_CYCLES_SP_ATTR = 4, - PERF_VFD_STARVE_CYCLES_UCHE = 5, - PERF_VFD_RBUFFER_FULL = 6, - PERF_VFD_ATTR_INFO_FIFO_FULL = 7, - PERF_VFD_DECODED_ATTRIBUTE_BYTES = 8, - PERF_VFD_NUM_ATTRIBUTES = 9, - PERF_VFD_UPPER_SHADER_FIBERS = 10, - PERF_VFD_LOWER_SHADER_FIBERS = 11, - PERF_VFD_MODE_0_FIBERS = 12, - PERF_VFD_MODE_1_FIBERS = 13, - PERF_VFD_MODE_2_FIBERS = 14, - PERF_VFD_MODE_3_FIBERS = 15, - PERF_VFD_MODE_4_FIBERS = 16, - PERF_VFD_TOTAL_VERTICES = 17, - PERF_VFDP_STALL_CYCLES_VFD = 18, - PERF_VFDP_STALL_CYCLES_VFD_INDEX = 19, - PERF_VFDP_STALL_CYCLES_VFD_PROG = 20, - PERF_VFDP_STARVE_CYCLES_PC = 21, - PERF_VFDP_VS_STAGE_WAVES = 22, -}; - -enum a6xx_hlsq_perfcounter_select { - PERF_HLSQ_BUSY_CYCLES = 0, - PERF_HLSQ_STALL_CYCLES_UCHE = 1, - PERF_HLSQ_STALL_CYCLES_SP_STATE = 2, - PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3, - PERF_HLSQ_UCHE_LATENCY_CYCLES = 4, - PERF_HLSQ_UCHE_LATENCY_COUNT = 5, - PERF_HLSQ_FS_STAGE_1X_WAVES = 6, - PERF_HLSQ_FS_STAGE_2X_WAVES = 7, - PERF_HLSQ_QUADS = 8, - PERF_HLSQ_CS_INVOCATIONS = 9, - PERF_HLSQ_COMPUTE_DRAWCALLS = 10, - PERF_HLSQ_FS_DATA_WAIT_PROGRAMMING = 11, - PERF_HLSQ_DUAL_FS_PROG_ACTIVE = 12, - PERF_HLSQ_DUAL_VS_PROG_ACTIVE = 13, - PERF_HLSQ_FS_BATCH_COUNT_ZERO = 14, - PERF_HLSQ_VS_BATCH_COUNT_ZERO = 15, - PERF_HLSQ_WAVE_PENDING_NO_QUAD = 16, - PERF_HLSQ_WAVE_PENDING_NO_PRIM_BASE = 17, - PERF_HLSQ_STALL_CYCLES_VPC = 18, - PERF_HLSQ_PIXELS = 19, - PERF_HLSQ_DRAW_MODE_SWITCH_VSFS_SYNC = 20, -}; - -enum a6xx_vpc_perfcounter_select { - PERF_VPC_BUSY_CYCLES = 0, - PERF_VPC_WORKING_CYCLES = 1, - PERF_VPC_STALL_CYCLES_UCHE = 2, - PERF_VPC_STALL_CYCLES_VFD_WACK = 3, - PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4, - PERF_VPC_STALL_CYCLES_PC = 5, - PERF_VPC_STALL_CYCLES_SP_LM = 6, - PERF_VPC_STARVE_CYCLES_SP = 7, - PERF_VPC_STARVE_CYCLES_LRZ = 8, - PERF_VPC_PC_PRIMITIVES = 9, - PERF_VPC_SP_COMPONENTS = 10, - PERF_VPC_STALL_CYCLES_VPCRAM_POS = 11, - PERF_VPC_LRZ_ASSIGN_PRIMITIVES = 12, - PERF_VPC_RB_VISIBLE_PRIMITIVES = 13, - PERF_VPC_LM_TRANSACTION = 14, - PERF_VPC_STREAMOUT_TRANSACTION = 15, - PERF_VPC_VS_BUSY_CYCLES = 16, - PERF_VPC_PS_BUSY_CYCLES = 17, - PERF_VPC_VS_WORKING_CYCLES = 18, - PERF_VPC_PS_WORKING_CYCLES = 19, - PERF_VPC_STARVE_CYCLES_RB = 20, - PERF_VPC_NUM_VPCRAM_READ_POS = 21, - PERF_VPC_WIT_FULL_CYCLES = 22, - PERF_VPC_VPCRAM_FULL_CYCLES = 23, - PERF_VPC_LM_FULL_WAIT_FOR_INTP_END = 24, - PERF_VPC_NUM_VPCRAM_WRITE = 25, - PERF_VPC_NUM_VPCRAM_READ_SO = 26, - PERF_VPC_NUM_ATTR_REQ_LM = 27, -}; - -enum a6xx_tse_perfcounter_select { - PERF_TSE_BUSY_CYCLES = 0, - PERF_TSE_CLIPPING_CYCLES = 1, - PERF_TSE_STALL_CYCLES_RAS = 2, - PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3, - PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4, - PERF_TSE_STARVE_CYCLES_PC = 5, - PERF_TSE_INPUT_PRIM = 6, - PERF_TSE_INPUT_NULL_PRIM = 7, - PERF_TSE_TRIVAL_REJ_PRIM = 8, - PERF_TSE_CLIPPED_PRIM = 9, - PERF_TSE_ZERO_AREA_PRIM = 10, - PERF_TSE_FACENESS_CULLED_PRIM = 11, - PERF_TSE_ZERO_PIXEL_PRIM = 12, - PERF_TSE_OUTPUT_NULL_PRIM = 13, - PERF_TSE_OUTPUT_VISIBLE_PRIM = 14, - PERF_TSE_CINVOCATION = 15, - PERF_TSE_CPRIMITIVES = 16, - PERF_TSE_2D_INPUT_PRIM = 17, - PERF_TSE_2D_ALIVE_CYCLES = 18, - PERF_TSE_CLIP_PLANES = 19, -}; - -enum a6xx_ras_perfcounter_select { - PERF_RAS_BUSY_CYCLES = 0, - PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1, - PERF_RAS_STALL_CYCLES_LRZ = 2, - PERF_RAS_STARVE_CYCLES_TSE = 3, - PERF_RAS_SUPER_TILES = 4, - PERF_RAS_8X4_TILES = 5, - PERF_RAS_MASKGEN_ACTIVE = 6, - PERF_RAS_FULLY_COVERED_SUPER_TILES = 7, - PERF_RAS_FULLY_COVERED_8X4_TILES = 8, - PERF_RAS_PRIM_KILLED_INVISILBE = 9, - PERF_RAS_SUPERTILE_GEN_ACTIVE_CYCLES = 10, - PERF_RAS_LRZ_INTF_WORKING_CYCLES = 11, - PERF_RAS_BLOCKS = 12, -}; - -enum a6xx_uche_perfcounter_select { - PERF_UCHE_BUSY_CYCLES = 0, - PERF_UCHE_STALL_CYCLES_ARBITER = 1, - PERF_UCHE_VBIF_LATENCY_CYCLES = 2, - PERF_UCHE_VBIF_LATENCY_SAMPLES = 3, - PERF_UCHE_VBIF_READ_BEATS_TP = 4, - PERF_UCHE_VBIF_READ_BEATS_VFD = 5, - PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6, - PERF_UCHE_VBIF_READ_BEATS_LRZ = 7, - PERF_UCHE_VBIF_READ_BEATS_SP = 8, - PERF_UCHE_READ_REQUESTS_TP = 9, - PERF_UCHE_READ_REQUESTS_VFD = 10, - PERF_UCHE_READ_REQUESTS_HLSQ = 11, - PERF_UCHE_READ_REQUESTS_LRZ = 12, - PERF_UCHE_READ_REQUESTS_SP = 13, - PERF_UCHE_WRITE_REQUESTS_LRZ = 14, - PERF_UCHE_WRITE_REQUESTS_SP = 15, - PERF_UCHE_WRITE_REQUESTS_VPC = 16, - PERF_UCHE_WRITE_REQUESTS_VSC = 17, - PERF_UCHE_EVICTS = 18, - PERF_UCHE_BANK_REQ0 = 19, - PERF_UCHE_BANK_REQ1 = 20, - PERF_UCHE_BANK_REQ2 = 21, - PERF_UCHE_BANK_REQ3 = 22, - PERF_UCHE_BANK_REQ4 = 23, - PERF_UCHE_BANK_REQ5 = 24, - PERF_UCHE_BANK_REQ6 = 25, - PERF_UCHE_BANK_REQ7 = 26, - PERF_UCHE_VBIF_READ_BEATS_CH0 = 27, - PERF_UCHE_VBIF_READ_BEATS_CH1 = 28, - PERF_UCHE_GMEM_READ_BEATS = 29, - PERF_UCHE_TPH_REF_FULL = 30, - PERF_UCHE_TPH_VICTIM_FULL = 31, - PERF_UCHE_TPH_EXT_FULL = 32, - PERF_UCHE_VBIF_STALL_WRITE_DATA = 33, - PERF_UCHE_DCMP_LATENCY_SAMPLES = 34, - PERF_UCHE_DCMP_LATENCY_CYCLES = 35, - PERF_UCHE_VBIF_READ_BEATS_PC = 36, - PERF_UCHE_READ_REQUESTS_PC = 37, - PERF_UCHE_RAM_READ_REQ = 38, - PERF_UCHE_RAM_WRITE_REQ = 39, -}; - -enum a6xx_tp_perfcounter_select { - PERF_TP_BUSY_CYCLES = 0, - PERF_TP_STALL_CYCLES_UCHE = 1, - PERF_TP_LATENCY_CYCLES = 2, - PERF_TP_LATENCY_TRANS = 3, - PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4, - PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5, - PERF_TP_L1_CACHELINE_REQUESTS = 6, - PERF_TP_L1_CACHELINE_MISSES = 7, - PERF_TP_SP_TP_TRANS = 8, - PERF_TP_TP_SP_TRANS = 9, - PERF_TP_OUTPUT_PIXELS = 10, - PERF_TP_FILTER_WORKLOAD_16BIT = 11, - PERF_TP_FILTER_WORKLOAD_32BIT = 12, - PERF_TP_QUADS_RECEIVED = 13, - PERF_TP_QUADS_OFFSET = 14, - PERF_TP_QUADS_SHADOW = 15, - PERF_TP_QUADS_ARRAY = 16, - PERF_TP_QUADS_GRADIENT = 17, - PERF_TP_QUADS_1D = 18, - PERF_TP_QUADS_2D = 19, - PERF_TP_QUADS_BUFFER = 20, - PERF_TP_QUADS_3D = 21, - PERF_TP_QUADS_CUBE = 22, - PERF_TP_DIVERGENT_QUADS_RECEIVED = 23, - PERF_TP_PRT_NON_RESIDENT_EVENTS = 24, - PERF_TP_OUTPUT_PIXELS_POINT = 25, - PERF_TP_OUTPUT_PIXELS_BILINEAR = 26, - PERF_TP_OUTPUT_PIXELS_MIP = 27, - PERF_TP_OUTPUT_PIXELS_ANISO = 28, - PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 29, - PERF_TP_FLAG_CACHE_REQUESTS = 30, - PERF_TP_FLAG_CACHE_MISSES = 31, - PERF_TP_L1_5_L2_REQUESTS = 32, - PERF_TP_2D_OUTPUT_PIXELS = 33, - PERF_TP_2D_OUTPUT_PIXELS_POINT = 34, - PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 35, - PERF_TP_2D_FILTER_WORKLOAD_16BIT = 36, - PERF_TP_2D_FILTER_WORKLOAD_32BIT = 37, - PERF_TP_TPA2TPC_TRANS = 38, - PERF_TP_L1_MISSES_ASTC_1TILE = 39, - PERF_TP_L1_MISSES_ASTC_2TILE = 40, - PERF_TP_L1_MISSES_ASTC_4TILE = 41, - PERF_TP_L1_5_L2_COMPRESS_REQS = 42, - PERF_TP_L1_5_L2_COMPRESS_MISS = 43, - PERF_TP_L1_BANK_CONFLICT = 44, - PERF_TP_L1_5_MISS_LATENCY_CYCLES = 45, - PERF_TP_L1_5_MISS_LATENCY_TRANS = 46, - PERF_TP_QUADS_CONSTANT_MULTIPLIED = 47, - PERF_TP_FRONTEND_WORKING_CYCLES = 48, - PERF_TP_L1_TAG_WORKING_CYCLES = 49, - PERF_TP_L1_DATA_WRITE_WORKING_CYCLES = 50, - PERF_TP_PRE_L1_DECOM_WORKING_CYCLES = 51, - PERF_TP_BACKEND_WORKING_CYCLES = 52, - PERF_TP_FLAG_CACHE_WORKING_CYCLES = 53, - PERF_TP_L1_5_CACHE_WORKING_CYCLES = 54, - PERF_TP_STARVE_CYCLES_SP = 55, - PERF_TP_STARVE_CYCLES_UCHE = 56, -}; - -enum a6xx_sp_perfcounter_select { - PERF_SP_BUSY_CYCLES = 0, - PERF_SP_ALU_WORKING_CYCLES = 1, - PERF_SP_EFU_WORKING_CYCLES = 2, - PERF_SP_STALL_CYCLES_VPC = 3, - PERF_SP_STALL_CYCLES_TP = 4, - PERF_SP_STALL_CYCLES_UCHE = 5, - PERF_SP_STALL_CYCLES_RB = 6, - PERF_SP_NON_EXECUTION_CYCLES = 7, - PERF_SP_WAVE_CONTEXTS = 8, - PERF_SP_WAVE_CONTEXT_CYCLES = 9, - PERF_SP_FS_STAGE_WAVE_CYCLES = 10, - PERF_SP_FS_STAGE_WAVE_SAMPLES = 11, - PERF_SP_VS_STAGE_WAVE_CYCLES = 12, - PERF_SP_VS_STAGE_WAVE_SAMPLES = 13, - PERF_SP_FS_STAGE_DURATION_CYCLES = 14, - PERF_SP_VS_STAGE_DURATION_CYCLES = 15, - PERF_SP_WAVE_CTRL_CYCLES = 16, - PERF_SP_WAVE_LOAD_CYCLES = 17, - PERF_SP_WAVE_EMIT_CYCLES = 18, - PERF_SP_WAVE_NOP_CYCLES = 19, - PERF_SP_WAVE_WAIT_CYCLES = 20, - PERF_SP_WAVE_FETCH_CYCLES = 21, - PERF_SP_WAVE_IDLE_CYCLES = 22, - PERF_SP_WAVE_END_CYCLES = 23, - PERF_SP_WAVE_LONG_SYNC_CYCLES = 24, - PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25, - PERF_SP_WAVE_JOIN_CYCLES = 26, - PERF_SP_LM_LOAD_INSTRUCTIONS = 27, - PERF_SP_LM_STORE_INSTRUCTIONS = 28, - PERF_SP_LM_ATOMICS = 29, - PERF_SP_GM_LOAD_INSTRUCTIONS = 30, - PERF_SP_GM_STORE_INSTRUCTIONS = 31, - PERF_SP_GM_ATOMICS = 32, - PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33, - PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 34, - PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 35, - PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 36, - PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 37, - PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 38, - PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 39, - PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 40, - PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 41, - PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 42, - PERF_SP_VS_INSTRUCTIONS = 43, - PERF_SP_FS_INSTRUCTIONS = 44, - PERF_SP_ADDR_LOCK_COUNT = 45, - PERF_SP_UCHE_READ_TRANS = 46, - PERF_SP_UCHE_WRITE_TRANS = 47, - PERF_SP_EXPORT_VPC_TRANS = 48, - PERF_SP_EXPORT_RB_TRANS = 49, - PERF_SP_PIXELS_KILLED = 50, - PERF_SP_ICL1_REQUESTS = 51, - PERF_SP_ICL1_MISSES = 52, - PERF_SP_HS_INSTRUCTIONS = 53, - PERF_SP_DS_INSTRUCTIONS = 54, - PERF_SP_GS_INSTRUCTIONS = 55, - PERF_SP_CS_INSTRUCTIONS = 56, - PERF_SP_GPR_READ = 57, - PERF_SP_GPR_WRITE = 58, - PERF_SP_FS_STAGE_HALF_EFU_INSTRUCTIONS = 59, - PERF_SP_VS_STAGE_HALF_EFU_INSTRUCTIONS = 60, - PERF_SP_LM_BANK_CONFLICTS = 61, - PERF_SP_TEX_CONTROL_WORKING_CYCLES = 62, - PERF_SP_LOAD_CONTROL_WORKING_CYCLES = 63, - PERF_SP_FLOW_CONTROL_WORKING_CYCLES = 64, - PERF_SP_LM_WORKING_CYCLES = 65, - PERF_SP_DISPATCHER_WORKING_CYCLES = 66, - PERF_SP_SEQUENCER_WORKING_CYCLES = 67, - PERF_SP_LOW_EFFICIENCY_STARVED_BY_TP = 68, - PERF_SP_STARVE_CYCLES_HLSQ = 69, - PERF_SP_NON_EXECUTION_LS_CYCLES = 70, - PERF_SP_WORKING_EU = 71, - PERF_SP_ANY_EU_WORKING = 72, - PERF_SP_WORKING_EU_FS_STAGE = 73, - PERF_SP_ANY_EU_WORKING_FS_STAGE = 74, - PERF_SP_WORKING_EU_VS_STAGE = 75, - PERF_SP_ANY_EU_WORKING_VS_STAGE = 76, - PERF_SP_WORKING_EU_CS_STAGE = 77, - PERF_SP_ANY_EU_WORKING_CS_STAGE = 78, - PERF_SP_GPR_READ_PREFETCH = 79, - PERF_SP_GPR_READ_CONFLICT = 80, - PERF_SP_GPR_WRITE_CONFLICT = 81, - PERF_SP_GM_LOAD_LATENCY_CYCLES = 82, - PERF_SP_GM_LOAD_LATENCY_SAMPLES = 83, - PERF_SP_EXECUTABLE_WAVES = 84, -}; - -enum a6xx_rb_perfcounter_select { - PERF_RB_BUSY_CYCLES = 0, - PERF_RB_STALL_CYCLES_HLSQ = 1, - PERF_RB_STALL_CYCLES_FIFO0_FULL = 2, - PERF_RB_STALL_CYCLES_FIFO1_FULL = 3, - PERF_RB_STALL_CYCLES_FIFO2_FULL = 4, - PERF_RB_STARVE_CYCLES_SP = 5, - PERF_RB_STARVE_CYCLES_LRZ_TILE = 6, - PERF_RB_STARVE_CYCLES_CCU = 7, - PERF_RB_STARVE_CYCLES_Z_PLANE = 8, - PERF_RB_STARVE_CYCLES_BARY_PLANE = 9, - PERF_RB_Z_WORKLOAD = 10, - PERF_RB_HLSQ_ACTIVE = 11, - PERF_RB_Z_READ = 12, - PERF_RB_Z_WRITE = 13, - PERF_RB_C_READ = 14, - PERF_RB_C_WRITE = 15, - PERF_RB_TOTAL_PASS = 16, - PERF_RB_Z_PASS = 17, - PERF_RB_Z_FAIL = 18, - PERF_RB_S_FAIL = 19, - PERF_RB_BLENDED_FXP_COMPONENTS = 20, - PERF_RB_BLENDED_FP16_COMPONENTS = 21, - PERF_RB_PS_INVOCATIONS = 22, - PERF_RB_2D_ALIVE_CYCLES = 23, - PERF_RB_2D_STALL_CYCLES_A2D = 24, - PERF_RB_2D_STARVE_CYCLES_SRC = 25, - PERF_RB_2D_STARVE_CYCLES_SP = 26, - PERF_RB_2D_STARVE_CYCLES_DST = 27, - PERF_RB_2D_VALID_PIXELS = 28, - PERF_RB_3D_PIXELS = 29, - PERF_RB_BLENDER_WORKING_CYCLES = 30, - PERF_RB_ZPROC_WORKING_CYCLES = 31, - PERF_RB_CPROC_WORKING_CYCLES = 32, - PERF_RB_SAMPLER_WORKING_CYCLES = 33, - PERF_RB_STALL_CYCLES_CCU_COLOR_READ = 34, - PERF_RB_STALL_CYCLES_CCU_COLOR_WRITE = 35, - PERF_RB_STALL_CYCLES_CCU_DEPTH_READ = 36, - PERF_RB_STALL_CYCLES_CCU_DEPTH_WRITE = 37, - PERF_RB_STALL_CYCLES_VPC = 38, - PERF_RB_2D_INPUT_TRANS = 39, - PERF_RB_2D_OUTPUT_RB_DST_TRANS = 40, - PERF_RB_2D_OUTPUT_RB_SRC_TRANS = 41, - PERF_RB_BLENDED_FP32_COMPONENTS = 42, - PERF_RB_COLOR_PIX_TILES = 43, - PERF_RB_STALL_CYCLES_CCU = 44, - PERF_RB_EARLY_Z_ARB3_GRANT = 45, - PERF_RB_LATE_Z_ARB3_GRANT = 46, - PERF_RB_EARLY_Z_SKIP_GRANT = 47, -}; - -enum a6xx_vsc_perfcounter_select { - PERF_VSC_BUSY_CYCLES = 0, - PERF_VSC_WORKING_CYCLES = 1, - PERF_VSC_STALL_CYCLES_UCHE = 2, - PERF_VSC_EOT_NUM = 3, - PERF_VSC_INPUT_TILES = 4, -}; - -enum a6xx_ccu_perfcounter_select { - PERF_CCU_BUSY_CYCLES = 0, - PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1, - PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2, - PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3, - PERF_CCU_DEPTH_BLOCKS = 4, - PERF_CCU_COLOR_BLOCKS = 5, - PERF_CCU_DEPTH_BLOCK_HIT = 6, - PERF_CCU_COLOR_BLOCK_HIT = 7, - PERF_CCU_PARTIAL_BLOCK_READ = 8, - PERF_CCU_GMEM_READ = 9, - PERF_CCU_GMEM_WRITE = 10, - PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11, - PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12, - PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13, - PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14, - PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15, - PERF_CCU_DEPTH_READ_FLAG5_COUNT = 16, - PERF_CCU_DEPTH_READ_FLAG6_COUNT = 17, - PERF_CCU_DEPTH_READ_FLAG8_COUNT = 18, - PERF_CCU_COLOR_READ_FLAG0_COUNT = 19, - PERF_CCU_COLOR_READ_FLAG1_COUNT = 20, - PERF_CCU_COLOR_READ_FLAG2_COUNT = 21, - PERF_CCU_COLOR_READ_FLAG3_COUNT = 22, - PERF_CCU_COLOR_READ_FLAG4_COUNT = 23, - PERF_CCU_COLOR_READ_FLAG5_COUNT = 24, - PERF_CCU_COLOR_READ_FLAG6_COUNT = 25, - PERF_CCU_COLOR_READ_FLAG8_COUNT = 26, - PERF_CCU_2D_RD_REQ = 27, - PERF_CCU_2D_WR_REQ = 28, -}; - -enum a6xx_lrz_perfcounter_select { - PERF_LRZ_BUSY_CYCLES = 0, - PERF_LRZ_STARVE_CYCLES_RAS = 1, - PERF_LRZ_STALL_CYCLES_RB = 2, - PERF_LRZ_STALL_CYCLES_VSC = 3, - PERF_LRZ_STALL_CYCLES_VPC = 4, - PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5, - PERF_LRZ_STALL_CYCLES_UCHE = 6, - PERF_LRZ_LRZ_READ = 7, - PERF_LRZ_LRZ_WRITE = 8, - PERF_LRZ_READ_LATENCY = 9, - PERF_LRZ_MERGE_CACHE_UPDATING = 10, - PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11, - PERF_LRZ_PRIM_KILLED_BY_LRZ = 12, - PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13, - PERF_LRZ_FULL_8X8_TILES = 14, - PERF_LRZ_PARTIAL_8X8_TILES = 15, - PERF_LRZ_TILE_KILLED = 16, - PERF_LRZ_TOTAL_PIXEL = 17, - PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18, - PERF_LRZ_FULLY_COVERED_TILES = 19, - PERF_LRZ_PARTIAL_COVERED_TILES = 20, - PERF_LRZ_FEEDBACK_ACCEPT = 21, - PERF_LRZ_FEEDBACK_DISCARD = 22, - PERF_LRZ_FEEDBACK_STALL = 23, - PERF_LRZ_STALL_CYCLES_RB_ZPLANE = 24, - PERF_LRZ_STALL_CYCLES_RB_BPLANE = 25, - PERF_LRZ_STALL_CYCLES_VC = 26, - PERF_LRZ_RAS_MASK_TRANS = 27, -}; - -enum a6xx_cmp_perfcounter_select { - PERF_CMPDECMP_STALL_CYCLES_ARB = 0, - PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1, - PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2, - PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3, - PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4, - PERF_CMPDECMP_VBIF_READ_REQUEST = 5, - PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6, - PERF_CMPDECMP_VBIF_READ_DATA = 7, - PERF_CMPDECMP_VBIF_WRITE_DATA = 8, - PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9, - PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10, - PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11, - PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12, - PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13, - PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14, - PERF_CMPDECMP_DEPTH_WRITE_FLAG5_COUNT = 15, - PERF_CMPDECMP_DEPTH_WRITE_FLAG6_COUNT = 16, - PERF_CMPDECMP_DEPTH_WRITE_FLAG8_COUNT = 17, - PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 18, - PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 19, - PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 20, - PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 21, - PERF_CMPDECMP_COLOR_WRITE_FLAG5_COUNT = 22, - PERF_CMPDECMP_COLOR_WRITE_FLAG6_COUNT = 23, - PERF_CMPDECMP_COLOR_WRITE_FLAG8_COUNT = 24, - PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 25, - PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 26, - PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 27, - PERF_CMPDECMP_2D_RD_DATA = 28, - PERF_CMPDECMP_2D_WR_DATA = 29, - PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH0 = 30, - PERF_CMPDECMP_VBIF_READ_DATA_UCHE_CH1 = 31, - PERF_CMPDECMP_2D_OUTPUT_TRANS = 32, - PERF_CMPDECMP_VBIF_WRITE_DATA_UCHE = 33, - PERF_CMPDECMP_DEPTH_WRITE_FLAG0_COUNT = 34, - PERF_CMPDECMP_COLOR_WRITE_FLAG0_COUNT = 35, - PERF_CMPDECMP_COLOR_WRITE_FLAGALPHA_COUNT = 36, - PERF_CMPDECMP_2D_BUSY_CYCLES = 37, - PERF_CMPDECMP_2D_REORDER_STARVE_CYCLES = 38, - PERF_CMPDECMP_2D_PIXELS = 39, -}; - -enum a6xx_tex_filter { - A6XX_TEX_NEAREST = 0, - A6XX_TEX_LINEAR = 1, - A6XX_TEX_ANISO = 2, -}; - -enum a6xx_tex_clamp { - A6XX_TEX_REPEAT = 0, - A6XX_TEX_CLAMP_TO_EDGE = 1, - A6XX_TEX_MIRROR_REPEAT = 2, - A6XX_TEX_CLAMP_TO_BORDER = 3, - A6XX_TEX_MIRROR_CLAMP = 4, -}; - -enum a6xx_tex_aniso { - A6XX_TEX_ANISO_1 = 0, - A6XX_TEX_ANISO_2 = 1, - A6XX_TEX_ANISO_4 = 2, - A6XX_TEX_ANISO_8 = 3, - A6XX_TEX_ANISO_16 = 4, -}; - -enum a6xx_tex_swiz { - A6XX_TEX_X = 0, - A6XX_TEX_Y = 1, - A6XX_TEX_Z = 2, - A6XX_TEX_W = 3, - A6XX_TEX_ZERO = 4, - A6XX_TEX_ONE = 5, -}; - -enum a6xx_tex_type { - A6XX_TEX_1D = 0, - A6XX_TEX_2D = 1, - A6XX_TEX_CUBE = 2, - A6XX_TEX_3D = 3, -}; - -#define A6XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE 0x00000001 -#define A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR 0x00000002 -#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW 0x00000040 -#define A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR 0x00000080 -#define A6XX_RBBM_INT_0_MASK_CP_SW 0x00000100 -#define A6XX_RBBM_INT_0_MASK_CP_HW_ERROR 0x00000200 -#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS 0x00000400 -#define A6XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS 0x00000800 -#define A6XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS 0x00001000 -#define A6XX_RBBM_INT_0_MASK_CP_IB2 0x00002000 -#define A6XX_RBBM_INT_0_MASK_CP_IB1 0x00004000 -#define A6XX_RBBM_INT_0_MASK_CP_RB 0x00008000 -#define A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS 0x00020000 -#define A6XX_RBBM_INT_0_MASK_CP_WT_DONE_TS 0x00040000 -#define A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS 0x00100000 -#define A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW 0x00400000 -#define A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT 0x00800000 -#define A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS 0x01000000 -#define A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR 0x02000000 -#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_0 0x04000000 -#define A6XX_RBBM_INT_0_MASK_DEBBUS_INTR_1 0x08000000 -#define A6XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ 0x40000000 -#define A6XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG 0x80000000 -#define A6XX_CP_INT_CP_OPCODE_ERROR 0x00000001 -#define A6XX_CP_INT_CP_UCODE_ERROR 0x00000002 -#define A6XX_CP_INT_CP_HW_FAULT_ERROR 0x00000004 -#define A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR 0x00000010 -#define A6XX_CP_INT_CP_AHB_ERROR 0x00000020 -#define A6XX_CP_INT_CP_VSD_PARITY_ERROR 0x00000040 -#define A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR 0x00000080 -#define REG_A6XX_CP_RB_BASE 0x00000800 - -#define REG_A6XX_CP_RB_BASE_HI 0x00000801 - -#define REG_A6XX_CP_RB_CNTL 0x00000802 - -#define REG_A6XX_CP_RB_RPTR_ADDR_LO 0x00000804 - -#define REG_A6XX_CP_RB_RPTR_ADDR_HI 0x00000805 - -#define REG_A6XX_CP_RB_RPTR 0x00000806 - -#define REG_A6XX_CP_RB_WPTR 0x00000807 - -#define REG_A6XX_CP_SQE_CNTL 0x00000808 - -#define REG_A6XX_CP_HW_FAULT 0x00000821 - -#define REG_A6XX_CP_INTERRUPT_STATUS 0x00000823 - -#define REG_A6XX_CP_PROTECT_STATUS 0x00000824 - -#define REG_A6XX_CP_SQE_INSTR_BASE_LO 0x00000830 - -#define REG_A6XX_CP_SQE_INSTR_BASE_HI 0x00000831 - -#define REG_A6XX_CP_MISC_CNTL 0x00000840 - -#define REG_A6XX_CP_ROQ_THRESHOLDS_1 0x000008c1 - -#define REG_A6XX_CP_ROQ_THRESHOLDS_2 0x000008c2 - -#define REG_A6XX_CP_MEM_POOL_SIZE 0x000008c3 - -#define REG_A6XX_CP_CHICKEN_DBG 0x00000841 - -#define REG_A6XX_CP_ADDR_MODE_CNTL 0x00000842 - -#define REG_A6XX_CP_DBG_ECO_CNTL 0x00000843 - -#define REG_A6XX_CP_PROTECT_CNTL 0x0000084f - -static inline uint32_t REG_A6XX_CP_SCRATCH(uint32_t i0) { return 0x00000883 + 0x1*i0; } - -static inline uint32_t REG_A6XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000883 + 0x1*i0; } - -static inline uint32_t REG_A6XX_CP_PROTECT(uint32_t i0) { return 0x00000850 + 0x1*i0; } - -static inline uint32_t REG_A6XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000850 + 0x1*i0; } -#define A6XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0003ffff -#define A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 -static inline uint32_t A6XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) -{ - return ((val) << A6XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A6XX_CP_PROTECT_REG_BASE_ADDR__MASK; -} -#define A6XX_CP_PROTECT_REG_MASK_LEN__MASK 0x7ffc0000 -#define A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT 18 -static inline uint32_t A6XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) -{ - return ((val) << A6XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A6XX_CP_PROTECT_REG_MASK_LEN__MASK; -} -#define A6XX_CP_PROTECT_REG_READ 0x80000000 - -#define REG_A6XX_CP_CONTEXT_SWITCH_CNTL 0x000008a0 - -#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x000008a1 - -#define REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x000008a2 - -#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO 0x000008a3 - -#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI 0x000008a4 - -#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO 0x000008a5 - -#define REG_A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI 0x000008a6 - -#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO 0x000008a7 - -#define REG_A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI 0x000008a8 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_0 0x000008d0 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_1 0x000008d1 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_2 0x000008d2 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_3 0x000008d3 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_4 0x000008d4 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_5 0x000008d5 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_6 0x000008d6 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_7 0x000008d7 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_8 0x000008d8 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_9 0x000008d9 - -#define REG_A6XX_CP_PERFCTR_CP_SEL_10 0x000008da - -#define REG_A6XX_CP_PERFCTR_CP_SEL_11 0x000008db - -#define REG_A6XX_CP_PERFCTR_CP_SEL_12 0x000008dc - -#define REG_A6XX_CP_PERFCTR_CP_SEL_13 0x000008dd - -#define REG_A6XX_CP_CRASH_SCRIPT_BASE_LO 0x00000900 - -#define REG_A6XX_CP_CRASH_SCRIPT_BASE_HI 0x00000901 - -#define REG_A6XX_CP_CRASH_DUMP_CNTL 0x00000902 - -#define REG_A6XX_CP_CRASH_DUMP_STATUS 0x00000903 - -#define REG_A6XX_CP_SQE_STAT_ADDR 0x00000908 - -#define REG_A6XX_CP_SQE_STAT_DATA 0x00000909 - -#define REG_A6XX_CP_DRAW_STATE_ADDR 0x0000090a - -#define REG_A6XX_CP_DRAW_STATE_DATA 0x0000090b - -#define REG_A6XX_CP_ROQ_DBG_ADDR 0x0000090c - -#define REG_A6XX_CP_ROQ_DBG_DATA 0x0000090d - -#define REG_A6XX_CP_MEM_POOL_DBG_ADDR 0x0000090e - -#define REG_A6XX_CP_MEM_POOL_DBG_DATA 0x0000090f - -#define REG_A6XX_CP_SQE_UCODE_DBG_ADDR 0x00000910 - -#define REG_A6XX_CP_SQE_UCODE_DBG_DATA 0x00000911 - -#define REG_A6XX_CP_IB1_BASE 0x00000928 - -#define REG_A6XX_CP_IB1_BASE_HI 0x00000929 - -#define REG_A6XX_CP_IB1_REM_SIZE 0x0000092a - -#define REG_A6XX_CP_IB2_BASE 0x0000092b - -#define REG_A6XX_CP_IB2_BASE_HI 0x0000092c - -#define REG_A6XX_CP_IB2_REM_SIZE 0x0000092d - -#define REG_A6XX_CP_ALWAYS_ON_COUNTER_LO 0x00000980 - -#define REG_A6XX_CP_ALWAYS_ON_COUNTER_HI 0x00000981 - -#define REG_A6XX_CP_AHB_CNTL 0x0000098d - -#define REG_A6XX_CP_APERTURE_CNTL_HOST 0x00000a00 - -#define REG_A6XX_CP_APERTURE_CNTL_CD 0x00000a03 - -#define REG_A6XX_VSC_ADDR_MODE_CNTL 0x00000c01 - -#define REG_A6XX_RBBM_INT_0_STATUS 0x00000201 - -#define REG_A6XX_RBBM_STATUS 0x00000210 -#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB 0x00800000 -#define A6XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP 0x00400000 -#define A6XX_RBBM_STATUS_HLSQ_BUSY 0x00200000 -#define A6XX_RBBM_STATUS_VSC_BUSY 0x00100000 -#define A6XX_RBBM_STATUS_TPL1_BUSY 0x00080000 -#define A6XX_RBBM_STATUS_SP_BUSY 0x00040000 -#define A6XX_RBBM_STATUS_UCHE_BUSY 0x00020000 -#define A6XX_RBBM_STATUS_VPC_BUSY 0x00010000 -#define A6XX_RBBM_STATUS_VFD_BUSY 0x00008000 -#define A6XX_RBBM_STATUS_TESS_BUSY 0x00004000 -#define A6XX_RBBM_STATUS_PC_VSD_BUSY 0x00002000 -#define A6XX_RBBM_STATUS_PC_DCALL_BUSY 0x00001000 -#define A6XX_RBBM_STATUS_COM_DCOM_BUSY 0x00000800 -#define A6XX_RBBM_STATUS_LRZ_BUSY 0x00000400 -#define A6XX_RBBM_STATUS_A2D_BUSY 0x00000200 -#define A6XX_RBBM_STATUS_CCU_BUSY 0x00000100 -#define A6XX_RBBM_STATUS_RB_BUSY 0x00000080 -#define A6XX_RBBM_STATUS_RAS_BUSY 0x00000040 -#define A6XX_RBBM_STATUS_TSE_BUSY 0x00000020 -#define A6XX_RBBM_STATUS_VBIF_BUSY 0x00000010 -#define A6XX_RBBM_STATUS_GFX_DBGC_BUSY 0x00000008 -#define A6XX_RBBM_STATUS_CP_BUSY 0x00000004 -#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CP_MASTER 0x00000002 -#define A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER 0x00000001 - -#define REG_A6XX_RBBM_STATUS3 0x00000213 - -#define REG_A6XX_RBBM_VBIF_GX_RESET_STATUS 0x00000215 - -#define REG_A6XX_RBBM_PERFCTR_CP_0_LO 0x00000400 - -#define REG_A6XX_RBBM_PERFCTR_CP_0_HI 0x00000401 - -#define REG_A6XX_RBBM_PERFCTR_CP_1_LO 0x00000402 - -#define REG_A6XX_RBBM_PERFCTR_CP_1_HI 0x00000403 - -#define REG_A6XX_RBBM_PERFCTR_CP_2_LO 0x00000404 - -#define REG_A6XX_RBBM_PERFCTR_CP_2_HI 0x00000405 - -#define REG_A6XX_RBBM_PERFCTR_CP_3_LO 0x00000406 - -#define REG_A6XX_RBBM_PERFCTR_CP_3_HI 0x00000407 - -#define REG_A6XX_RBBM_PERFCTR_CP_4_LO 0x00000408 - -#define REG_A6XX_RBBM_PERFCTR_CP_4_HI 0x00000409 - -#define REG_A6XX_RBBM_PERFCTR_CP_5_LO 0x0000040a - -#define REG_A6XX_RBBM_PERFCTR_CP_5_HI 0x0000040b - -#define REG_A6XX_RBBM_PERFCTR_CP_6_LO 0x0000040c - -#define REG_A6XX_RBBM_PERFCTR_CP_6_HI 0x0000040d - -#define REG_A6XX_RBBM_PERFCTR_CP_7_LO 0x0000040e - -#define REG_A6XX_RBBM_PERFCTR_CP_7_HI 0x0000040f - -#define REG_A6XX_RBBM_PERFCTR_CP_8_LO 0x00000410 - -#define REG_A6XX_RBBM_PERFCTR_CP_8_HI 0x00000411 - -#define REG_A6XX_RBBM_PERFCTR_CP_9_LO 0x00000412 - -#define REG_A6XX_RBBM_PERFCTR_CP_9_HI 0x00000413 - -#define REG_A6XX_RBBM_PERFCTR_CP_10_LO 0x00000414 - -#define REG_A6XX_RBBM_PERFCTR_CP_10_HI 0x00000415 - -#define REG_A6XX_RBBM_PERFCTR_CP_11_LO 0x00000416 - -#define REG_A6XX_RBBM_PERFCTR_CP_11_HI 0x00000417 - -#define REG_A6XX_RBBM_PERFCTR_CP_12_LO 0x00000418 - -#define REG_A6XX_RBBM_PERFCTR_CP_12_HI 0x00000419 - -#define REG_A6XX_RBBM_PERFCTR_CP_13_LO 0x0000041a - -#define REG_A6XX_RBBM_PERFCTR_CP_13_HI 0x0000041b - -#define REG_A6XX_RBBM_PERFCTR_RBBM_0_LO 0x0000041c - -#define REG_A6XX_RBBM_PERFCTR_RBBM_0_HI 0x0000041d - -#define REG_A6XX_RBBM_PERFCTR_RBBM_1_LO 0x0000041e - -#define REG_A6XX_RBBM_PERFCTR_RBBM_1_HI 0x0000041f - -#define REG_A6XX_RBBM_PERFCTR_RBBM_2_LO 0x00000420 - -#define REG_A6XX_RBBM_PERFCTR_RBBM_2_HI 0x00000421 - -#define REG_A6XX_RBBM_PERFCTR_RBBM_3_LO 0x00000422 - -#define REG_A6XX_RBBM_PERFCTR_RBBM_3_HI 0x00000423 - -#define REG_A6XX_RBBM_PERFCTR_PC_0_LO 0x00000424 - -#define REG_A6XX_RBBM_PERFCTR_PC_0_HI 0x00000425 - -#define REG_A6XX_RBBM_PERFCTR_PC_1_LO 0x00000426 - -#define REG_A6XX_RBBM_PERFCTR_PC_1_HI 0x00000427 - -#define REG_A6XX_RBBM_PERFCTR_PC_2_LO 0x00000428 - -#define REG_A6XX_RBBM_PERFCTR_PC_2_HI 0x00000429 - -#define REG_A6XX_RBBM_PERFCTR_PC_3_LO 0x0000042a - -#define REG_A6XX_RBBM_PERFCTR_PC_3_HI 0x0000042b - -#define REG_A6XX_RBBM_PERFCTR_PC_4_LO 0x0000042c - -#define REG_A6XX_RBBM_PERFCTR_PC_4_HI 0x0000042d - -#define REG_A6XX_RBBM_PERFCTR_PC_5_LO 0x0000042e - -#define REG_A6XX_RBBM_PERFCTR_PC_5_HI 0x0000042f - -#define REG_A6XX_RBBM_PERFCTR_PC_6_LO 0x00000430 - -#define REG_A6XX_RBBM_PERFCTR_PC_6_HI 0x00000431 - -#define REG_A6XX_RBBM_PERFCTR_PC_7_LO 0x00000432 - -#define REG_A6XX_RBBM_PERFCTR_PC_7_HI 0x00000433 - -#define REG_A6XX_RBBM_PERFCTR_VFD_0_LO 0x00000434 - -#define REG_A6XX_RBBM_PERFCTR_VFD_0_HI 0x00000435 - -#define REG_A6XX_RBBM_PERFCTR_VFD_1_LO 0x00000436 - -#define REG_A6XX_RBBM_PERFCTR_VFD_1_HI 0x00000437 - -#define REG_A6XX_RBBM_PERFCTR_VFD_2_LO 0x00000438 - -#define REG_A6XX_RBBM_PERFCTR_VFD_2_HI 0x00000439 - -#define REG_A6XX_RBBM_PERFCTR_VFD_3_LO 0x0000043a - -#define REG_A6XX_RBBM_PERFCTR_VFD_3_HI 0x0000043b - -#define REG_A6XX_RBBM_PERFCTR_VFD_4_LO 0x0000043c - -#define REG_A6XX_RBBM_PERFCTR_VFD_4_HI 0x0000043d - -#define REG_A6XX_RBBM_PERFCTR_VFD_5_LO 0x0000043e - -#define REG_A6XX_RBBM_PERFCTR_VFD_5_HI 0x0000043f - -#define REG_A6XX_RBBM_PERFCTR_VFD_6_LO 0x00000440 - -#define REG_A6XX_RBBM_PERFCTR_VFD_6_HI 0x00000441 - -#define REG_A6XX_RBBM_PERFCTR_VFD_7_LO 0x00000442 - -#define REG_A6XX_RBBM_PERFCTR_VFD_7_HI 0x00000443 - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_LO 0x00000444 - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_0_HI 0x00000445 - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_LO 0x00000446 - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_1_HI 0x00000447 - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_LO 0x00000448 - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_2_HI 0x00000449 - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_LO 0x0000044a - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_3_HI 0x0000044b - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_LO 0x0000044c - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_4_HI 0x0000044d - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_LO 0x0000044e - -#define REG_A6XX_RBBM_PERFCTR_HLSQ_5_HI 0x0000044f - -#define REG_A6XX_RBBM_PERFCTR_VPC_0_LO 0x00000450 - -#define REG_A6XX_RBBM_PERFCTR_VPC_0_HI 0x00000451 - -#define REG_A6XX_RBBM_PERFCTR_VPC_1_LO 0x00000452 - -#define REG_A6XX_RBBM_PERFCTR_VPC_1_HI 0x00000453 - -#define REG_A6XX_RBBM_PERFCTR_VPC_2_LO 0x00000454 - -#define REG_A6XX_RBBM_PERFCTR_VPC_2_HI 0x00000455 - -#define REG_A6XX_RBBM_PERFCTR_VPC_3_LO 0x00000456 - -#define REG_A6XX_RBBM_PERFCTR_VPC_3_HI 0x00000457 - -#define REG_A6XX_RBBM_PERFCTR_VPC_4_LO 0x00000458 - -#define REG_A6XX_RBBM_PERFCTR_VPC_4_HI 0x00000459 - -#define REG_A6XX_RBBM_PERFCTR_VPC_5_LO 0x0000045a - -#define REG_A6XX_RBBM_PERFCTR_VPC_5_HI 0x0000045b - -#define REG_A6XX_RBBM_PERFCTR_CCU_0_LO 0x0000045c - -#define REG_A6XX_RBBM_PERFCTR_CCU_0_HI 0x0000045d - -#define REG_A6XX_RBBM_PERFCTR_CCU_1_LO 0x0000045e - -#define REG_A6XX_RBBM_PERFCTR_CCU_1_HI 0x0000045f - -#define REG_A6XX_RBBM_PERFCTR_CCU_2_LO 0x00000460 - -#define REG_A6XX_RBBM_PERFCTR_CCU_2_HI 0x00000461 - -#define REG_A6XX_RBBM_PERFCTR_CCU_3_LO 0x00000462 - -#define REG_A6XX_RBBM_PERFCTR_CCU_3_HI 0x00000463 - -#define REG_A6XX_RBBM_PERFCTR_CCU_4_LO 0x00000464 - -#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI 0x00000465 - -#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO 0x00000466 - -#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI 0x00000467 - -#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO 0x00000468 - -#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI 0x00000469 - -#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO 0x0000046a - -#define REG_A6XX_RBBM_PERFCTR_CCU_4_HI 0x00000465 - -#define REG_A6XX_RBBM_PERFCTR_TSE_0_LO 0x00000466 - -#define REG_A6XX_RBBM_PERFCTR_TSE_0_HI 0x00000467 - -#define REG_A6XX_RBBM_PERFCTR_TSE_1_LO 0x00000468 - -#define REG_A6XX_RBBM_PERFCTR_TSE_1_HI 0x00000469 - -#define REG_A6XX_RBBM_PERFCTR_TSE_2_LO 0x0000046a - -#define REG_A6XX_RBBM_PERFCTR_TSE_2_HI 0x0000046b - -#define REG_A6XX_RBBM_PERFCTR_TSE_3_LO 0x0000046c - -#define REG_A6XX_RBBM_PERFCTR_TSE_3_HI 0x0000046d - -#define REG_A6XX_RBBM_PERFCTR_RAS_0_LO 0x0000046e - -#define REG_A6XX_RBBM_PERFCTR_RAS_0_HI 0x0000046f - -#define REG_A6XX_RBBM_PERFCTR_RAS_1_LO 0x00000470 - -#define REG_A6XX_RBBM_PERFCTR_RAS_1_HI 0x00000471 - -#define REG_A6XX_RBBM_PERFCTR_RAS_2_LO 0x00000472 - -#define REG_A6XX_RBBM_PERFCTR_RAS_2_HI 0x00000473 - -#define REG_A6XX_RBBM_PERFCTR_RAS_3_LO 0x00000474 - -#define REG_A6XX_RBBM_PERFCTR_RAS_3_HI 0x00000475 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_0_LO 0x00000476 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_0_HI 0x00000477 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_1_LO 0x00000478 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_1_HI 0x00000479 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_2_LO 0x0000047a - -#define REG_A6XX_RBBM_PERFCTR_UCHE_2_HI 0x0000047b - -#define REG_A6XX_RBBM_PERFCTR_UCHE_3_LO 0x0000047c - -#define REG_A6XX_RBBM_PERFCTR_UCHE_3_HI 0x0000047d - -#define REG_A6XX_RBBM_PERFCTR_UCHE_4_LO 0x0000047e - -#define REG_A6XX_RBBM_PERFCTR_UCHE_4_HI 0x0000047f - -#define REG_A6XX_RBBM_PERFCTR_UCHE_5_LO 0x00000480 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_5_HI 0x00000481 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_6_LO 0x00000482 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_6_HI 0x00000483 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_7_LO 0x00000484 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_7_HI 0x00000485 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_8_LO 0x00000486 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_8_HI 0x00000487 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_9_LO 0x00000488 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_9_HI 0x00000489 - -#define REG_A6XX_RBBM_PERFCTR_UCHE_10_LO 0x0000048a - -#define REG_A6XX_RBBM_PERFCTR_UCHE_10_HI 0x0000048b - -#define REG_A6XX_RBBM_PERFCTR_UCHE_11_LO 0x0000048c - -#define REG_A6XX_RBBM_PERFCTR_UCHE_11_HI 0x0000048d - -#define REG_A6XX_RBBM_PERFCTR_TP_0_LO 0x0000048e - -#define REG_A6XX_RBBM_PERFCTR_TP_0_HI 0x0000048f - -#define REG_A6XX_RBBM_PERFCTR_TP_1_LO 0x00000490 - -#define REG_A6XX_RBBM_PERFCTR_TP_1_HI 0x00000491 - -#define REG_A6XX_RBBM_PERFCTR_TP_2_LO 0x00000492 - -#define REG_A6XX_RBBM_PERFCTR_TP_2_HI 0x00000493 - -#define REG_A6XX_RBBM_PERFCTR_TP_3_LO 0x00000494 - -#define REG_A6XX_RBBM_PERFCTR_TP_3_HI 0x00000495 - -#define REG_A6XX_RBBM_PERFCTR_TP_4_LO 0x00000496 - -#define REG_A6XX_RBBM_PERFCTR_TP_4_HI 0x00000497 - -#define REG_A6XX_RBBM_PERFCTR_TP_5_LO 0x00000498 - -#define REG_A6XX_RBBM_PERFCTR_TP_5_HI 0x00000499 - -#define REG_A6XX_RBBM_PERFCTR_TP_6_LO 0x0000049a - -#define REG_A6XX_RBBM_PERFCTR_TP_6_HI 0x0000049b - -#define REG_A6XX_RBBM_PERFCTR_TP_7_LO 0x0000049c - -#define REG_A6XX_RBBM_PERFCTR_TP_7_HI 0x0000049d - -#define REG_A6XX_RBBM_PERFCTR_TP_8_LO 0x0000049e - -#define REG_A6XX_RBBM_PERFCTR_TP_8_HI 0x0000049f - -#define REG_A6XX_RBBM_PERFCTR_TP_9_LO 0x000004a0 - -#define REG_A6XX_RBBM_PERFCTR_TP_9_HI 0x000004a1 - -#define REG_A6XX_RBBM_PERFCTR_TP_10_LO 0x000004a2 - -#define REG_A6XX_RBBM_PERFCTR_TP_10_HI 0x000004a3 - -#define REG_A6XX_RBBM_PERFCTR_TP_11_LO 0x000004a4 - -#define REG_A6XX_RBBM_PERFCTR_TP_11_HI 0x000004a5 - -#define REG_A6XX_RBBM_PERFCTR_SP_0_LO 0x000004a6 - -#define REG_A6XX_RBBM_PERFCTR_SP_0_HI 0x000004a7 - -#define REG_A6XX_RBBM_PERFCTR_SP_1_LO 0x000004a8 - -#define REG_A6XX_RBBM_PERFCTR_SP_1_HI 0x000004a9 - -#define REG_A6XX_RBBM_PERFCTR_SP_2_LO 0x000004aa - -#define REG_A6XX_RBBM_PERFCTR_SP_2_HI 0x000004ab - -#define REG_A6XX_RBBM_PERFCTR_SP_3_LO 0x000004ac - -#define REG_A6XX_RBBM_PERFCTR_SP_3_HI 0x000004ad - -#define REG_A6XX_RBBM_PERFCTR_SP_4_LO 0x000004ae - -#define REG_A6XX_RBBM_PERFCTR_SP_4_HI 0x000004af - -#define REG_A6XX_RBBM_PERFCTR_SP_5_LO 0x000004b0 - -#define REG_A6XX_RBBM_PERFCTR_SP_5_HI 0x000004b1 - -#define REG_A6XX_RBBM_PERFCTR_SP_6_LO 0x000004b2 - -#define REG_A6XX_RBBM_PERFCTR_SP_6_HI 0x000004b3 - -#define REG_A6XX_RBBM_PERFCTR_SP_7_LO 0x000004b4 - -#define REG_A6XX_RBBM_PERFCTR_SP_7_HI 0x000004b5 - -#define REG_A6XX_RBBM_PERFCTR_SP_8_LO 0x000004b6 - -#define REG_A6XX_RBBM_PERFCTR_SP_8_HI 0x000004b7 - -#define REG_A6XX_RBBM_PERFCTR_SP_9_LO 0x000004b8 - -#define REG_A6XX_RBBM_PERFCTR_SP_9_HI 0x000004b9 - -#define REG_A6XX_RBBM_PERFCTR_SP_10_LO 0x000004ba - -#define REG_A6XX_RBBM_PERFCTR_SP_10_HI 0x000004bb - -#define REG_A6XX_RBBM_PERFCTR_SP_11_LO 0x000004bc - -#define REG_A6XX_RBBM_PERFCTR_SP_11_HI 0x000004bd - -#define REG_A6XX_RBBM_PERFCTR_SP_12_LO 0x000004be - -#define REG_A6XX_RBBM_PERFCTR_SP_12_HI 0x000004bf - -#define REG_A6XX_RBBM_PERFCTR_SP_13_LO 0x000004c0 - -#define REG_A6XX_RBBM_PERFCTR_SP_13_HI 0x000004c1 - -#define REG_A6XX_RBBM_PERFCTR_SP_14_LO 0x000004c2 - -#define REG_A6XX_RBBM_PERFCTR_SP_14_HI 0x000004c3 - -#define REG_A6XX_RBBM_PERFCTR_SP_15_LO 0x000004c4 - -#define REG_A6XX_RBBM_PERFCTR_SP_15_HI 0x000004c5 - -#define REG_A6XX_RBBM_PERFCTR_SP_16_LO 0x000004c6 - -#define REG_A6XX_RBBM_PERFCTR_SP_16_HI 0x000004c7 - -#define REG_A6XX_RBBM_PERFCTR_SP_17_LO 0x000004c8 - -#define REG_A6XX_RBBM_PERFCTR_SP_17_HI 0x000004c9 - -#define REG_A6XX_RBBM_PERFCTR_SP_18_LO 0x000004ca - -#define REG_A6XX_RBBM_PERFCTR_SP_18_HI 0x000004cb - -#define REG_A6XX_RBBM_PERFCTR_SP_19_LO 0x000004cc - -#define REG_A6XX_RBBM_PERFCTR_SP_19_HI 0x000004cd - -#define REG_A6XX_RBBM_PERFCTR_SP_20_LO 0x000004ce - -#define REG_A6XX_RBBM_PERFCTR_SP_20_HI 0x000004cf - -#define REG_A6XX_RBBM_PERFCTR_SP_21_LO 0x000004d0 - -#define REG_A6XX_RBBM_PERFCTR_SP_21_HI 0x000004d1 - -#define REG_A6XX_RBBM_PERFCTR_SP_22_LO 0x000004d2 - -#define REG_A6XX_RBBM_PERFCTR_SP_22_HI 0x000004d3 - -#define REG_A6XX_RBBM_PERFCTR_SP_23_LO 0x000004d4 - -#define REG_A6XX_RBBM_PERFCTR_SP_23_HI 0x000004d5 - -#define REG_A6XX_RBBM_PERFCTR_RB_0_LO 0x000004d6 - -#define REG_A6XX_RBBM_PERFCTR_RB_0_HI 0x000004d7 - -#define REG_A6XX_RBBM_PERFCTR_RB_1_LO 0x000004d8 - -#define REG_A6XX_RBBM_PERFCTR_RB_1_HI 0x000004d9 - -#define REG_A6XX_RBBM_PERFCTR_RB_2_LO 0x000004da - -#define REG_A6XX_RBBM_PERFCTR_RB_2_HI 0x000004db - -#define REG_A6XX_RBBM_PERFCTR_RB_3_LO 0x000004dc - -#define REG_A6XX_RBBM_PERFCTR_RB_3_HI 0x000004dd - -#define REG_A6XX_RBBM_PERFCTR_RB_4_LO 0x000004de - -#define REG_A6XX_RBBM_PERFCTR_RB_4_HI 0x000004df - -#define REG_A6XX_RBBM_PERFCTR_RB_5_LO 0x000004e0 - -#define REG_A6XX_RBBM_PERFCTR_RB_5_HI 0x000004e1 - -#define REG_A6XX_RBBM_PERFCTR_RB_6_LO 0x000004e2 - -#define REG_A6XX_RBBM_PERFCTR_RB_6_HI 0x000004e3 - -#define REG_A6XX_RBBM_PERFCTR_RB_7_LO 0x000004e4 - -#define REG_A6XX_RBBM_PERFCTR_RB_7_HI 0x000004e5 - -#define REG_A6XX_RBBM_PERFCTR_VSC_0_LO 0x000004e6 - -#define REG_A6XX_RBBM_PERFCTR_VSC_0_HI 0x000004e7 - -#define REG_A6XX_RBBM_PERFCTR_VSC_1_LO 0x000004e8 - -#define REG_A6XX_RBBM_PERFCTR_VSC_1_HI 0x000004e9 - -#define REG_A6XX_RBBM_PERFCTR_LRZ_0_LO 0x000004ea - -#define REG_A6XX_RBBM_PERFCTR_LRZ_0_HI 0x000004eb - -#define REG_A6XX_RBBM_PERFCTR_LRZ_1_LO 0x000004ec - -#define REG_A6XX_RBBM_PERFCTR_LRZ_1_HI 0x000004ed - -#define REG_A6XX_RBBM_PERFCTR_LRZ_2_LO 0x000004ee - -#define REG_A6XX_RBBM_PERFCTR_LRZ_2_HI 0x000004ef - -#define REG_A6XX_RBBM_PERFCTR_LRZ_3_LO 0x000004f0 - -#define REG_A6XX_RBBM_PERFCTR_LRZ_3_HI 0x000004f1 - -#define REG_A6XX_RBBM_PERFCTR_CMP_0_LO 0x000004f2 - -#define REG_A6XX_RBBM_PERFCTR_CMP_0_HI 0x000004f3 - -#define REG_A6XX_RBBM_PERFCTR_CMP_1_LO 0x000004f4 - -#define REG_A6XX_RBBM_PERFCTR_CMP_1_HI 0x000004f5 - -#define REG_A6XX_RBBM_PERFCTR_CMP_2_LO 0x000004f6 - -#define REG_A6XX_RBBM_PERFCTR_CMP_2_HI 0x000004f7 - -#define REG_A6XX_RBBM_PERFCTR_CMP_3_LO 0x000004f8 - -#define REG_A6XX_RBBM_PERFCTR_CMP_3_HI 0x000004f9 - -#define REG_A6XX_RBBM_PERFCTR_CNTL 0x00000500 - -#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD0 0x00000501 - -#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD1 0x00000502 - -#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD2 0x00000503 - -#define REG_A6XX_RBBM_PERFCTR_LOAD_CMD3 0x00000504 - -#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000505 - -#define REG_A6XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000506 - -#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000507 - -#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000508 - -#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000509 - -#define REG_A6XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000050a - -#define REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x0000050b - -#define REG_A6XX_RBBM_ISDB_CNT 0x00000533 - -#define REG_A6XX_RBBM_SECVID_TRUST_CNTL 0x0000f400 - -#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0x0000f800 - -#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0x0000f801 - -#define REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0x0000f802 - -#define REG_A6XX_RBBM_SECVID_TSB_CNTL 0x0000f803 - -#define REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0x0000f810 - -#define REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL 0x00000010 - -#define REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0000001f - -#define REG_A6XX_RBBM_INT_CLEAR_CMD 0x00000037 - -#define REG_A6XX_RBBM_INT_0_MASK 0x00000038 - -#define REG_A6XX_RBBM_SP_HYST_CNT 0x00000042 - -#define REG_A6XX_RBBM_SW_RESET_CMD 0x00000043 - -#define REG_A6XX_RBBM_RAC_THRESHOLD_CNT 0x00000044 - -#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD 0x00000045 - -#define REG_A6XX_RBBM_BLOCK_SW_RESET_CMD2 0x00000046 - -#define REG_A6XX_RBBM_CLOCK_CNTL 0x000000ae - -#define REG_A6XX_RBBM_CLOCK_CNTL_SP0 0x000000b0 - -#define REG_A6XX_RBBM_CLOCK_CNTL_SP1 0x000000b1 - -#define REG_A6XX_RBBM_CLOCK_CNTL_SP2 0x000000b2 - -#define REG_A6XX_RBBM_CLOCK_CNTL_SP3 0x000000b3 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_SP0 0x000000b4 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_SP1 0x000000b5 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_SP2 0x000000b6 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_SP3 0x000000b7 - -#define REG_A6XX_RBBM_CLOCK_DELAY_SP0 0x000000b8 - -#define REG_A6XX_RBBM_CLOCK_DELAY_SP1 0x000000b9 - -#define REG_A6XX_RBBM_CLOCK_DELAY_SP2 0x000000ba - -#define REG_A6XX_RBBM_CLOCK_DELAY_SP3 0x000000bb - -#define REG_A6XX_RBBM_CLOCK_HYST_SP0 0x000000bc - -#define REG_A6XX_RBBM_CLOCK_HYST_SP1 0x000000bd - -#define REG_A6XX_RBBM_CLOCK_HYST_SP2 0x000000be - -#define REG_A6XX_RBBM_CLOCK_HYST_SP3 0x000000bf - -#define REG_A6XX_RBBM_CLOCK_CNTL_TP0 0x000000c0 - -#define REG_A6XX_RBBM_CLOCK_CNTL_TP1 0x000000c1 - -#define REG_A6XX_RBBM_CLOCK_CNTL_TP2 0x000000c2 - -#define REG_A6XX_RBBM_CLOCK_CNTL_TP3 0x000000c3 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_TP0 0x000000c4 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_TP1 0x000000c5 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_TP2 0x000000c6 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_TP3 0x000000c7 - -#define REG_A6XX_RBBM_CLOCK_CNTL3_TP0 0x000000c8 - -#define REG_A6XX_RBBM_CLOCK_CNTL3_TP1 0x000000c9 - -#define REG_A6XX_RBBM_CLOCK_CNTL3_TP2 0x000000ca - -#define REG_A6XX_RBBM_CLOCK_CNTL3_TP3 0x000000cb - -#define REG_A6XX_RBBM_CLOCK_CNTL4_TP0 0x000000cc - -#define REG_A6XX_RBBM_CLOCK_CNTL4_TP1 0x000000cd - -#define REG_A6XX_RBBM_CLOCK_CNTL4_TP2 0x000000ce - -#define REG_A6XX_RBBM_CLOCK_CNTL4_TP3 0x000000cf - -#define REG_A6XX_RBBM_CLOCK_DELAY_TP0 0x000000d0 - -#define REG_A6XX_RBBM_CLOCK_DELAY_TP1 0x000000d1 - -#define REG_A6XX_RBBM_CLOCK_DELAY_TP2 0x000000d2 - -#define REG_A6XX_RBBM_CLOCK_DELAY_TP3 0x000000d3 - -#define REG_A6XX_RBBM_CLOCK_DELAY2_TP0 0x000000d4 - -#define REG_A6XX_RBBM_CLOCK_DELAY2_TP1 0x000000d5 - -#define REG_A6XX_RBBM_CLOCK_DELAY2_TP2 0x000000d6 - -#define REG_A6XX_RBBM_CLOCK_DELAY2_TP3 0x000000d7 - -#define REG_A6XX_RBBM_CLOCK_DELAY3_TP0 0x000000d8 - -#define REG_A6XX_RBBM_CLOCK_DELAY3_TP1 0x000000d9 - -#define REG_A6XX_RBBM_CLOCK_DELAY3_TP2 0x000000da - -#define REG_A6XX_RBBM_CLOCK_DELAY3_TP3 0x000000db - -#define REG_A6XX_RBBM_CLOCK_DELAY4_TP0 0x000000dc - -#define REG_A6XX_RBBM_CLOCK_DELAY4_TP1 0x000000dd - -#define REG_A6XX_RBBM_CLOCK_DELAY4_TP2 0x000000de - -#define REG_A6XX_RBBM_CLOCK_DELAY4_TP3 0x000000df - -#define REG_A6XX_RBBM_CLOCK_HYST_TP0 0x000000e0 - -#define REG_A6XX_RBBM_CLOCK_HYST_TP1 0x000000e1 - -#define REG_A6XX_RBBM_CLOCK_HYST_TP2 0x000000e2 - -#define REG_A6XX_RBBM_CLOCK_HYST_TP3 0x000000e3 - -#define REG_A6XX_RBBM_CLOCK_HYST2_TP0 0x000000e4 - -#define REG_A6XX_RBBM_CLOCK_HYST2_TP1 0x000000e5 - -#define REG_A6XX_RBBM_CLOCK_HYST2_TP2 0x000000e6 - -#define REG_A6XX_RBBM_CLOCK_HYST2_TP3 0x000000e7 - -#define REG_A6XX_RBBM_CLOCK_HYST3_TP0 0x000000e8 - -#define REG_A6XX_RBBM_CLOCK_HYST3_TP1 0x000000e9 - -#define REG_A6XX_RBBM_CLOCK_HYST3_TP2 0x000000ea - -#define REG_A6XX_RBBM_CLOCK_HYST3_TP3 0x000000eb - -#define REG_A6XX_RBBM_CLOCK_HYST4_TP0 0x000000ec - -#define REG_A6XX_RBBM_CLOCK_HYST4_TP1 0x000000ed - -#define REG_A6XX_RBBM_CLOCK_HYST4_TP2 0x000000ee - -#define REG_A6XX_RBBM_CLOCK_HYST4_TP3 0x000000ef - -#define REG_A6XX_RBBM_CLOCK_CNTL_RB0 0x000000f0 - -#define REG_A6XX_RBBM_CLOCK_CNTL_RB1 0x000000f1 - -#define REG_A6XX_RBBM_CLOCK_CNTL_RB2 0x000000f2 - -#define REG_A6XX_RBBM_CLOCK_CNTL_RB3 0x000000f3 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_RB0 0x000000f4 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_RB1 0x000000f5 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_RB2 0x000000f6 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_RB3 0x000000f7 - -#define REG_A6XX_RBBM_CLOCK_CNTL_CCU0 0x000000f8 - -#define REG_A6XX_RBBM_CLOCK_CNTL_CCU1 0x000000f9 - -#define REG_A6XX_RBBM_CLOCK_CNTL_CCU2 0x000000fa - -#define REG_A6XX_RBBM_CLOCK_CNTL_CCU3 0x000000fb - -#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0 0x00000100 - -#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1 0x00000101 - -#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2 0x00000102 - -#define REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3 0x00000103 - -#define REG_A6XX_RBBM_CLOCK_CNTL_RAC 0x00000104 - -#define REG_A6XX_RBBM_CLOCK_CNTL2_RAC 0x00000105 - -#define REG_A6XX_RBBM_CLOCK_DELAY_RAC 0x00000106 - -#define REG_A6XX_RBBM_CLOCK_HYST_RAC 0x00000107 - -#define REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00000108 - -#define REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00000109 - -#define REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x0000010a - -#define REG_A6XX_RBBM_CLOCK_CNTL_UCHE 0x0000010b - -#define REG_A6XX_RBBM_CLOCK_CNTL2_UCHE 0x0000010c - -#define REG_A6XX_RBBM_CLOCK_CNTL3_UCHE 0x0000010d - -#define REG_A6XX_RBBM_CLOCK_CNTL4_UCHE 0x0000010e - -#define REG_A6XX_RBBM_CLOCK_DELAY_UCHE 0x0000010f - -#define REG_A6XX_RBBM_CLOCK_HYST_UCHE 0x00000110 - -#define REG_A6XX_RBBM_CLOCK_MODE_VFD 0x00000111 - -#define REG_A6XX_RBBM_CLOCK_DELAY_VFD 0x00000112 - -#define REG_A6XX_RBBM_CLOCK_HYST_VFD 0x00000113 - -#define REG_A6XX_RBBM_CLOCK_MODE_GPC 0x00000114 - -#define REG_A6XX_RBBM_CLOCK_DELAY_GPC 0x00000115 - -#define REG_A6XX_RBBM_CLOCK_HYST_GPC 0x00000116 - -#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2 0x00000117 - -#define REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX 0x00000118 - -#define REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX 0x00000119 - -#define REG_A6XX_RBBM_CLOCK_HYST_GMU_GX 0x0000011a - -#define REG_A6XX_RBBM_CLOCK_MODE_HLSQ 0x0000011b - -#define REG_A6XX_RBBM_CLOCK_DELAY_HLSQ 0x0000011c - -#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_A 0x00000600 - -#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_B 0x00000601 - -#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_C 0x00000602 - -#define REG_A6XX_DBGC_CFG_DBGBUS_SEL_D 0x00000603 -#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK 0x000000ff -#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT 0 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK 0x0000ff00 -#define A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT 8 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__SHIFT) & A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL__MASK; -} - -#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLT 0x00000604 -#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK 0x0000003f -#define A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT 0 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK 0x00007000 -#define A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT 12 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK 0xf0000000 -#define A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT 28 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK; -} - -#define REG_A6XX_DBGC_CFG_DBGBUS_CNTLM 0x00000605 -#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK 0x0f000000 -#define A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT 24 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK; -} - -#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0 0x00000608 - -#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1 0x00000609 - -#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2 0x0000060a - -#define REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3 0x0000060b - -#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0 0x0000060c - -#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1 0x0000060d - -#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2 0x0000060e - -#define REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3 0x0000060f - -#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0 0x00000610 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK 0x0000000f -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT 0 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK 0x000000f0 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT 4 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK 0x00000f00 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT 8 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK 0x0000f000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT 12 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK 0x000f0000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT 16 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK 0x00f00000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT 20 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK 0x0f000000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT 24 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK 0xf0000000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT 28 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK; -} - -#define REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1 0x00000611 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK 0x0000000f -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT 0 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK 0x000000f0 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT 4 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK 0x00000f00 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT 8 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK 0x0000f000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT 12 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK 0x000f0000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT 16 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK 0x00f00000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT 20 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK 0x0f000000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT 24 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK; -} -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK 0xf0000000 -#define A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT 28 -static inline uint32_t A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val) -{ - return ((val) << A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK; -} - -#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x0000062f - -#define REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x00000630 - -#define REG_A6XX_VSC_PERFCTR_VSC_SEL_0 0x00000cd8 - -#define REG_A6XX_VSC_PERFCTR_VSC_SEL_1 0x00000cd9 - -#define REG_A6XX_GRAS_ADDR_MODE_CNTL 0x00008601 - -#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_0 0x00008610 - -#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_1 0x00008611 - -#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_2 0x00008612 - -#define REG_A6XX_GRAS_PERFCTR_TSE_SEL_3 0x00008613 - -#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_0 0x00008614 - -#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_1 0x00008615 - -#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_2 0x00008616 - -#define REG_A6XX_GRAS_PERFCTR_RAS_SEL_3 0x00008617 - -#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_0 0x00008618 - -#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_1 0x00008619 - -#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_2 0x0000861a - -#define REG_A6XX_GRAS_PERFCTR_LRZ_SEL_3 0x0000861b - -#define REG_A6XX_RB_ADDR_MODE_CNTL 0x00008e05 - -#define REG_A6XX_RB_NC_MODE_CNTL 0x00008e08 - -#define REG_A6XX_RB_PERFCTR_RB_SEL_0 0x00008e10 - -#define REG_A6XX_RB_PERFCTR_RB_SEL_1 0x00008e11 - -#define REG_A6XX_RB_PERFCTR_RB_SEL_2 0x00008e12 - -#define REG_A6XX_RB_PERFCTR_RB_SEL_3 0x00008e13 - -#define REG_A6XX_RB_PERFCTR_RB_SEL_4 0x00008e14 - -#define REG_A6XX_RB_PERFCTR_RB_SEL_5 0x00008e15 - -#define REG_A6XX_RB_PERFCTR_RB_SEL_6 0x00008e16 - -#define REG_A6XX_RB_PERFCTR_RB_SEL_7 0x00008e17 - -#define REG_A6XX_RB_PERFCTR_CCU_SEL_0 0x00008e18 - -#define REG_A6XX_RB_PERFCTR_CCU_SEL_1 0x00008e19 - -#define REG_A6XX_RB_PERFCTR_CCU_SEL_2 0x00008e1a - -#define REG_A6XX_RB_PERFCTR_CCU_SEL_3 0x00008e1b - -#define REG_A6XX_RB_PERFCTR_CCU_SEL_4 0x00008e1c - -#define REG_A6XX_RB_PERFCTR_CMP_SEL_0 0x00008e2c - -#define REG_A6XX_RB_PERFCTR_CMP_SEL_1 0x00008e2d - -#define REG_A6XX_RB_PERFCTR_CMP_SEL_2 0x00008e2e - -#define REG_A6XX_RB_PERFCTR_CMP_SEL_3 0x00008e2f - -#define REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD 0x00008e3d - -#define REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x00008e50 - -#define REG_A6XX_PC_DBG_ECO_CNTL 0x00009e00 - -#define REG_A6XX_PC_ADDR_MODE_CNTL 0x00009e01 - -#define REG_A6XX_PC_PERFCTR_PC_SEL_0 0x00009e34 - -#define REG_A6XX_PC_PERFCTR_PC_SEL_1 0x00009e35 - -#define REG_A6XX_PC_PERFCTR_PC_SEL_2 0x00009e36 - -#define REG_A6XX_PC_PERFCTR_PC_SEL_3 0x00009e37 - -#define REG_A6XX_PC_PERFCTR_PC_SEL_4 0x00009e38 - -#define REG_A6XX_PC_PERFCTR_PC_SEL_5 0x00009e39 - -#define REG_A6XX_PC_PERFCTR_PC_SEL_6 0x00009e3a - -#define REG_A6XX_PC_PERFCTR_PC_SEL_7 0x00009e3b - -#define REG_A6XX_HLSQ_ADDR_MODE_CNTL 0x0000be05 - -#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x0000be10 - -#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x0000be11 - -#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x0000be12 - -#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x0000be13 - -#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x0000be14 - -#define REG_A6XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x0000be15 - -#define REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE 0x0000c800 - -#define REG_A6XX_HLSQ_DBG_READ_SEL 0x0000d000 - -#define REG_A6XX_VFD_ADDR_MODE_CNTL 0x0000a601 - -#define REG_A6XX_VFD_PERFCTR_VFD_SEL_0 0x0000a610 - -#define REG_A6XX_VFD_PERFCTR_VFD_SEL_1 0x0000a611 - -#define REG_A6XX_VFD_PERFCTR_VFD_SEL_2 0x0000a612 - -#define REG_A6XX_VFD_PERFCTR_VFD_SEL_3 0x0000a613 - -#define REG_A6XX_VFD_PERFCTR_VFD_SEL_4 0x0000a614 - -#define REG_A6XX_VFD_PERFCTR_VFD_SEL_5 0x0000a615 - -#define REG_A6XX_VFD_PERFCTR_VFD_SEL_6 0x0000a616 - -#define REG_A6XX_VFD_PERFCTR_VFD_SEL_7 0x0000a617 - -#define REG_A6XX_VPC_ADDR_MODE_CNTL 0x00009601 - -#define REG_A6XX_VPC_PERFCTR_VPC_SEL_0 0x00009604 - -#define REG_A6XX_VPC_PERFCTR_VPC_SEL_1 0x00009605 - -#define REG_A6XX_VPC_PERFCTR_VPC_SEL_2 0x00009606 - -#define REG_A6XX_VPC_PERFCTR_VPC_SEL_3 0x00009607 - -#define REG_A6XX_VPC_PERFCTR_VPC_SEL_4 0x00009608 - -#define REG_A6XX_VPC_PERFCTR_VPC_SEL_5 0x00009609 - -#define REG_A6XX_UCHE_ADDR_MODE_CNTL 0x00000e00 - -#define REG_A6XX_UCHE_MODE_CNTL 0x00000e01 - -#define REG_A6XX_UCHE_WRITE_RANGE_MAX_LO 0x00000e05 - -#define REG_A6XX_UCHE_WRITE_RANGE_MAX_HI 0x00000e06 - -#define REG_A6XX_UCHE_WRITE_THRU_BASE_LO 0x00000e07 - -#define REG_A6XX_UCHE_WRITE_THRU_BASE_HI 0x00000e08 - -#define REG_A6XX_UCHE_TRAP_BASE_LO 0x00000e09 - -#define REG_A6XX_UCHE_TRAP_BASE_HI 0x00000e0a - -#define REG_A6XX_UCHE_GMEM_RANGE_MIN_LO 0x00000e0b - -#define REG_A6XX_UCHE_GMEM_RANGE_MIN_HI 0x00000e0c - -#define REG_A6XX_UCHE_GMEM_RANGE_MAX_LO 0x00000e0d - -#define REG_A6XX_UCHE_GMEM_RANGE_MAX_HI 0x00000e0e - -#define REG_A6XX_UCHE_CACHE_WAYS 0x00000e17 - -#define REG_A6XX_UCHE_FILTER_CNTL 0x00000e18 - -#define REG_A6XX_UCHE_CLIENT_PF 0x00000e19 -#define A6XX_UCHE_CLIENT_PF_PERFSEL__MASK 0x000000ff -#define A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT 0 -static inline uint32_t A6XX_UCHE_CLIENT_PF_PERFSEL(uint32_t val) -{ - return ((val) << A6XX_UCHE_CLIENT_PF_PERFSEL__SHIFT) & A6XX_UCHE_CLIENT_PF_PERFSEL__MASK; -} - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e1c - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e1d - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e1e - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e1f - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e20 - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e21 - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e22 - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e23 - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_8 0x00000e24 - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_9 0x00000e25 - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_10 0x00000e26 - -#define REG_A6XX_UCHE_PERFCTR_UCHE_SEL_11 0x00000e27 - -#define REG_A6XX_SP_ADDR_MODE_CNTL 0x0000ae01 - -#define REG_A6XX_SP_NC_MODE_CNTL 0x0000ae02 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_0 0x0000ae10 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_1 0x0000ae11 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_2 0x0000ae12 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_3 0x0000ae13 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_4 0x0000ae14 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_5 0x0000ae15 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_6 0x0000ae16 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_7 0x0000ae17 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_8 0x0000ae18 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_9 0x0000ae19 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_10 0x0000ae1a - -#define REG_A6XX_SP_PERFCTR_SP_SEL_11 0x0000ae1b - -#define REG_A6XX_SP_PERFCTR_SP_SEL_12 0x0000ae1c - -#define REG_A6XX_SP_PERFCTR_SP_SEL_13 0x0000ae1d - -#define REG_A6XX_SP_PERFCTR_SP_SEL_14 0x0000ae1e - -#define REG_A6XX_SP_PERFCTR_SP_SEL_15 0x0000ae1f - -#define REG_A6XX_SP_PERFCTR_SP_SEL_16 0x0000ae20 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_17 0x0000ae21 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_18 0x0000ae22 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_19 0x0000ae23 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_20 0x0000ae24 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_21 0x0000ae25 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_22 0x0000ae26 - -#define REG_A6XX_SP_PERFCTR_SP_SEL_23 0x0000ae27 - -#define REG_A6XX_TPL1_ADDR_MODE_CNTL 0x0000b601 - -#define REG_A6XX_TPL1_NC_MODE_CNTL 0x0000b604 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_0 0x0000b610 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_1 0x0000b611 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_2 0x0000b612 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_3 0x0000b613 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_4 0x0000b614 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_5 0x0000b615 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_6 0x0000b616 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_7 0x0000b617 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_8 0x0000b618 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_9 0x0000b619 - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_10 0x0000b61a - -#define REG_A6XX_TPL1_PERFCTR_TP_SEL_11 0x0000b61b - -#define REG_A6XX_VBIF_VERSION 0x00003000 - -#define REG_A6XX_VBIF_CLKON 0x00003001 -#define A6XX_VBIF_CLKON_FORCE_ON_TESTBUS 0x00000002 - -#define REG_A6XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a - -#define REG_A6XX_VBIF_XIN_HALT_CTRL0 0x00003080 - -#define REG_A6XX_VBIF_XIN_HALT_CTRL1 0x00003081 - -#define REG_A6XX_VBIF_TEST_BUS_OUT_CTRL 0x00003084 - -#define REG_A6XX_VBIF_TEST_BUS1_CTRL0 0x00003085 - -#define REG_A6XX_VBIF_TEST_BUS1_CTRL1 0x00003086 -#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__MASK 0x0000000f -#define A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__SHIFT 0 -static inline uint32_t A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL(uint32_t val) -{ - return ((val) << A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__SHIFT) & A6XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL__MASK; -} - -#define REG_A6XX_VBIF_TEST_BUS2_CTRL0 0x00003087 - -#define REG_A6XX_VBIF_TEST_BUS2_CTRL1 0x00003088 -#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__MASK 0x000001ff -#define A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__SHIFT 0 -static inline uint32_t A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL(uint32_t val) -{ - return ((val) << A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__SHIFT) & A6XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL__MASK; -} - -#define REG_A6XX_VBIF_TEST_BUS_OUT 0x0000308c - -#define REG_A6XX_VBIF_PERF_CNT_SEL0 0x000030d0 - -#define REG_A6XX_VBIF_PERF_CNT_SEL1 0x000030d1 - -#define REG_A6XX_VBIF_PERF_CNT_SEL2 0x000030d2 - -#define REG_A6XX_VBIF_PERF_CNT_SEL3 0x000030d3 - -#define REG_A6XX_VBIF_PERF_CNT_LOW0 0x000030d8 - -#define REG_A6XX_VBIF_PERF_CNT_LOW1 0x000030d9 - -#define REG_A6XX_VBIF_PERF_CNT_LOW2 0x000030da - -#define REG_A6XX_VBIF_PERF_CNT_LOW3 0x000030db - -#define REG_A6XX_VBIF_PERF_CNT_HIGH0 0x000030e0 - -#define REG_A6XX_VBIF_PERF_CNT_HIGH1 0x000030e1 - -#define REG_A6XX_VBIF_PERF_CNT_HIGH2 0x000030e2 - -#define REG_A6XX_VBIF_PERF_CNT_HIGH3 0x000030e3 - -#define REG_A6XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 - -#define REG_A6XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 - -#define REG_A6XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 - -#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW0 0x00003110 - -#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW1 0x00003111 - -#define REG_A6XX_VBIF_PERF_PWR_CNT_LOW2 0x00003112 - -#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH0 0x00003118 - -#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH1 0x00003119 - -#define REG_A6XX_VBIF_PERF_PWR_CNT_HIGH2 0x0000311a - -#define REG_A6XX_RB_WINDOW_OFFSET2 0x000088d4 -#define A6XX_RB_WINDOW_OFFSET2_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_RB_WINDOW_OFFSET2_X__MASK 0x00007fff -#define A6XX_RB_WINDOW_OFFSET2_X__SHIFT 0 -static inline uint32_t A6XX_RB_WINDOW_OFFSET2_X(uint32_t val) -{ - return ((val) << A6XX_RB_WINDOW_OFFSET2_X__SHIFT) & A6XX_RB_WINDOW_OFFSET2_X__MASK; -} -#define A6XX_RB_WINDOW_OFFSET2_Y__MASK 0x7fff0000 -#define A6XX_RB_WINDOW_OFFSET2_Y__SHIFT 16 -static inline uint32_t A6XX_RB_WINDOW_OFFSET2_Y(uint32_t val) -{ - return ((val) << A6XX_RB_WINDOW_OFFSET2_Y__SHIFT) & A6XX_RB_WINDOW_OFFSET2_Y__MASK; -} - -#define REG_A6XX_SP_WINDOW_OFFSET 0x0000b4d1 -#define A6XX_SP_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_SP_WINDOW_OFFSET_X__MASK 0x00007fff -#define A6XX_SP_WINDOW_OFFSET_X__SHIFT 0 -static inline uint32_t A6XX_SP_WINDOW_OFFSET_X(uint32_t val) -{ - return ((val) << A6XX_SP_WINDOW_OFFSET_X__SHIFT) & A6XX_SP_WINDOW_OFFSET_X__MASK; -} -#define A6XX_SP_WINDOW_OFFSET_Y__MASK 0x7fff0000 -#define A6XX_SP_WINDOW_OFFSET_Y__SHIFT 16 -static inline uint32_t A6XX_SP_WINDOW_OFFSET_Y(uint32_t val) -{ - return ((val) << A6XX_SP_WINDOW_OFFSET_Y__SHIFT) & A6XX_SP_WINDOW_OFFSET_Y__MASK; -} - -#define REG_A6XX_SP_TP_WINDOW_OFFSET 0x0000b307 -#define A6XX_SP_TP_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_SP_TP_WINDOW_OFFSET_X__MASK 0x00007fff -#define A6XX_SP_TP_WINDOW_OFFSET_X__SHIFT 0 -static inline uint32_t A6XX_SP_TP_WINDOW_OFFSET_X(uint32_t val) -{ - return ((val) << A6XX_SP_TP_WINDOW_OFFSET_X__SHIFT) & A6XX_SP_TP_WINDOW_OFFSET_X__MASK; -} -#define A6XX_SP_TP_WINDOW_OFFSET_Y__MASK 0x7fff0000 -#define A6XX_SP_TP_WINDOW_OFFSET_Y__SHIFT 16 -static inline uint32_t A6XX_SP_TP_WINDOW_OFFSET_Y(uint32_t val) -{ - return ((val) << A6XX_SP_TP_WINDOW_OFFSET_Y__SHIFT) & A6XX_SP_TP_WINDOW_OFFSET_Y__MASK; -} - -#define REG_A6XX_GRAS_BIN_CONTROL 0x000080a1 -#define A6XX_GRAS_BIN_CONTROL_BINW__MASK 0x000000ff -#define A6XX_GRAS_BIN_CONTROL_BINW__SHIFT 0 -static inline uint32_t A6XX_GRAS_BIN_CONTROL_BINW(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_GRAS_BIN_CONTROL_BINW__SHIFT) & A6XX_GRAS_BIN_CONTROL_BINW__MASK; -} -#define A6XX_GRAS_BIN_CONTROL_BINH__MASK 0x0001ff00 -#define A6XX_GRAS_BIN_CONTROL_BINH__SHIFT 8 -static inline uint32_t A6XX_GRAS_BIN_CONTROL_BINH(uint32_t val) -{ - assert(!(val & 0xf)); - return ((val >> 4) << A6XX_GRAS_BIN_CONTROL_BINH__SHIFT) & A6XX_GRAS_BIN_CONTROL_BINH__MASK; -} -#define A6XX_GRAS_BIN_CONTROL_BINNING_PASS 0x00040000 -#define A6XX_GRAS_BIN_CONTROL_USE_VIZ 0x00200000 - -#define REG_A6XX_RB_BIN_CONTROL2 0x000088d3 -#define A6XX_RB_BIN_CONTROL2_BINW__MASK 0x000000ff -#define A6XX_RB_BIN_CONTROL2_BINW__SHIFT 0 -static inline uint32_t A6XX_RB_BIN_CONTROL2_BINW(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_RB_BIN_CONTROL2_BINW__SHIFT) & A6XX_RB_BIN_CONTROL2_BINW__MASK; -} -#define A6XX_RB_BIN_CONTROL2_BINH__MASK 0x0001ff00 -#define A6XX_RB_BIN_CONTROL2_BINH__SHIFT 8 -static inline uint32_t A6XX_RB_BIN_CONTROL2_BINH(uint32_t val) -{ - assert(!(val & 0xf)); - return ((val >> 4) << A6XX_RB_BIN_CONTROL2_BINH__SHIFT) & A6XX_RB_BIN_CONTROL2_BINH__MASK; -} - -#define REG_A6XX_VSC_BIN_SIZE 0x00000c02 -#define A6XX_VSC_BIN_SIZE_WIDTH__MASK 0x000000ff -#define A6XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 -static inline uint32_t A6XX_VSC_BIN_SIZE_WIDTH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A6XX_VSC_BIN_SIZE_WIDTH__MASK; -} -#define A6XX_VSC_BIN_SIZE_HEIGHT__MASK 0x0001ff00 -#define A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT 8 -static inline uint32_t A6XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) -{ - assert(!(val & 0xf)); - return ((val >> 4) << A6XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A6XX_VSC_BIN_SIZE_HEIGHT__MASK; -} - -#define REG_A6XX_VSC_SIZE_ADDRESS_LO 0x00000c03 - -#define REG_A6XX_VSC_SIZE_ADDRESS_HI 0x00000c04 - -#define REG_A6XX_VSC_BIN_COUNT 0x00000c06 -#define A6XX_VSC_BIN_COUNT_NX__MASK 0x000007fe -#define A6XX_VSC_BIN_COUNT_NX__SHIFT 1 -static inline uint32_t A6XX_VSC_BIN_COUNT_NX(uint32_t val) -{ - return ((val) << A6XX_VSC_BIN_COUNT_NX__SHIFT) & A6XX_VSC_BIN_COUNT_NX__MASK; -} -#define A6XX_VSC_BIN_COUNT_NY__MASK 0x001ff800 -#define A6XX_VSC_BIN_COUNT_NY__SHIFT 11 -static inline uint32_t A6XX_VSC_BIN_COUNT_NY(uint32_t val) -{ - return ((val) << A6XX_VSC_BIN_COUNT_NY__SHIFT) & A6XX_VSC_BIN_COUNT_NY__MASK; -} - -static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c10 + 0x1*i0; } - -static inline uint32_t REG_A6XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; } -#define A6XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff -#define A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 -static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) -{ - return ((val) << A6XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_X__MASK; -} -#define A6XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 -#define A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 -static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) -{ - return ((val) << A6XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_Y__MASK; -} -#define A6XX_VSC_PIPE_CONFIG_REG_W__MASK 0x03f00000 -#define A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 -static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) -{ - return ((val) << A6XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_W__MASK; -} -#define A6XX_VSC_PIPE_CONFIG_REG_H__MASK 0xfc000000 -#define A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT 26 -static inline uint32_t A6XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) -{ - return ((val) << A6XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A6XX_VSC_PIPE_CONFIG_REG_H__MASK; -} - -#define REG_A6XX_VSC_PIPE_DATA2_ADDRESS_LO 0x00000c30 - -#define REG_A6XX_VSC_PIPE_DATA2_ADDRESS_HI 0x00000c31 - -#define REG_A6XX_VSC_PIPE_DATA2_PITCH 0x00000c32 - -#define REG_A6XX_VSC_PIPE_DATA2_ARRAY_PITCH 0x00000c33 -#define A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__MASK 0xffffffff -#define A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A6XX_VSC_PIPE_DATA2_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0xf)); - return ((val >> 4) << A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__SHIFT) & A6XX_VSC_PIPE_DATA2_ARRAY_PITCH__MASK; -} - -#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO 0x00000c34 - -#define REG_A6XX_VSC_PIPE_DATA_ADDRESS_HI 0x00000c35 - -#define REG_A6XX_VSC_PIPE_DATA_PITCH 0x00000c36 - -#define REG_A6XX_VSC_PIPE_DATA_ARRAY_PITCH 0x00000c37 -#define A6XX_VSC_PIPE_DATA_ARRAY_PITCH__MASK 0xffffffff -#define A6XX_VSC_PIPE_DATA_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A6XX_VSC_PIPE_DATA_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0xf)); - return ((val >> 4) << A6XX_VSC_PIPE_DATA_ARRAY_PITCH__SHIFT) & A6XX_VSC_PIPE_DATA_ARRAY_PITCH__MASK; -} - -static inline uint32_t REG_A6XX_VSC_SIZE(uint32_t i0) { return 0x00000c78 + 0x1*i0; } - -static inline uint32_t REG_A6XX_VSC_SIZE_REG(uint32_t i0) { return 0x00000c78 + 0x1*i0; } - -#define REG_A6XX_UCHE_UNKNOWN_0E12 0x00000e12 - -#define REG_A6XX_GRAS_UNKNOWN_8000 0x00008000 - -#define REG_A6XX_GRAS_UNKNOWN_8001 0x00008001 - -#define REG_A6XX_GRAS_UNKNOWN_8004 0x00008004 - -#define REG_A6XX_GRAS_CNTL 0x00008005 -#define A6XX_GRAS_CNTL_VARYING 0x00000001 -#define A6XX_GRAS_CNTL_UNK3 0x00000008 -#define A6XX_GRAS_CNTL_XCOORD 0x00000040 -#define A6XX_GRAS_CNTL_YCOORD 0x00000080 -#define A6XX_GRAS_CNTL_ZCOORD 0x00000100 -#define A6XX_GRAS_CNTL_WCOORD 0x00000200 - -#define REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ 0x00008006 -#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK 0x000003ff -#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT 0 -static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val) -{ - return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK; -} -#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK 0x000ffc00 -#define A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT 10 -static inline uint32_t A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val) -{ - return ((val) << A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK; -} - -#define REG_A6XX_GRAS_CL_VPORT_XOFFSET_0 0x00008010 -#define A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK 0xffffffff -#define A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT 0 -static inline uint32_t A6XX_GRAS_CL_VPORT_XOFFSET_0(float val) -{ - return ((fui(val)) << A6XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_XOFFSET_0__MASK; -} - -#define REG_A6XX_GRAS_CL_VPORT_XSCALE_0 0x00008011 -#define A6XX_GRAS_CL_VPORT_XSCALE_0__MASK 0xffffffff -#define A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT 0 -static inline uint32_t A6XX_GRAS_CL_VPORT_XSCALE_0(float val) -{ - return ((fui(val)) << A6XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_XSCALE_0__MASK; -} - -#define REG_A6XX_GRAS_CL_VPORT_YOFFSET_0 0x00008012 -#define A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK 0xffffffff -#define A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT 0 -static inline uint32_t A6XX_GRAS_CL_VPORT_YOFFSET_0(float val) -{ - return ((fui(val)) << A6XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_YOFFSET_0__MASK; -} - -#define REG_A6XX_GRAS_CL_VPORT_YSCALE_0 0x00008013 -#define A6XX_GRAS_CL_VPORT_YSCALE_0__MASK 0xffffffff -#define A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT 0 -static inline uint32_t A6XX_GRAS_CL_VPORT_YSCALE_0(float val) -{ - return ((fui(val)) << A6XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_YSCALE_0__MASK; -} - -#define REG_A6XX_GRAS_CL_VPORT_ZOFFSET_0 0x00008014 -#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK 0xffffffff -#define A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT 0 -static inline uint32_t A6XX_GRAS_CL_VPORT_ZOFFSET_0(float val) -{ - return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZOFFSET_0__MASK; -} - -#define REG_A6XX_GRAS_CL_VPORT_ZSCALE_0 0x00008015 -#define A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK 0xffffffff -#define A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT 0 -static inline uint32_t A6XX_GRAS_CL_VPORT_ZSCALE_0(float val) -{ - return ((fui(val)) << A6XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A6XX_GRAS_CL_VPORT_ZSCALE_0__MASK; -} - -#define REG_A6XX_GRAS_SU_CNTL 0x00008090 -#define A6XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001 -#define A6XX_GRAS_SU_CNTL_CULL_BACK 0x00000002 -#define A6XX_GRAS_SU_CNTL_FRONT_CW 0x00000004 -#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8 -#define A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3 -static inline uint32_t A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val) -{ - return ((((int32_t)(val * 4.0))) << A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; -} -#define A6XX_GRAS_SU_CNTL_POLY_OFFSET 0x00000800 -#define A6XX_GRAS_SU_CNTL_MSAA_ENABLE 0x00002000 - -#define REG_A6XX_GRAS_SU_POINT_MINMAX 0x00008091 -#define A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff -#define A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 -static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MIN(float val) -{ - return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MIN__MASK; -} -#define A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 -#define A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 -static inline uint32_t A6XX_GRAS_SU_POINT_MINMAX_MAX(float val) -{ - return ((((uint32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A6XX_GRAS_SU_POINT_MINMAX_MAX__MASK; -} - -#define REG_A6XX_GRAS_SU_POINT_SIZE 0x00008092 -#define A6XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff -#define A6XX_GRAS_SU_POINT_SIZE__SHIFT 0 -static inline uint32_t A6XX_GRAS_SU_POINT_SIZE(float val) -{ - return ((((int32_t)(val * 16.0))) << A6XX_GRAS_SU_POINT_SIZE__SHIFT) & A6XX_GRAS_SU_POINT_SIZE__MASK; -} - -#define REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL 0x00008094 -#define A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 - -#define REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE 0x00008095 -#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff -#define A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT 0 -static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_SCALE(float val) -{ - return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_SCALE__MASK; -} - -#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET 0x00008096 -#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff -#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 -static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) -{ - return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; -} - -#define REG_A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP 0x00008097 -#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK 0xffffffff -#define A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT 0 -static inline uint32_t A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val) -{ - return ((fui(val)) << A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK; -} - -#define REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO 0x00008098 -#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 -#define A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 -static inline uint32_t A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val) -{ - return ((val) << A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; -} - -#define REG_A6XX_GRAS_UNKNOWN_8099 0x00008099 - -#define REG_A6XX_GRAS_UNKNOWN_809B 0x0000809b - -#define REG_A6XX_GRAS_UNKNOWN_80A0 0x000080a0 - -#define REG_A6XX_GRAS_RAS_MSAA_CNTL 0x000080a2 -#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES__MASK; -} - -#define REG_A6XX_GRAS_DEST_MSAA_CNTL 0x000080a3 -#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES__MASK; -} -#define A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 - -#define REG_A6XX_GRAS_UNKNOWN_80A4 0x000080a4 - -#define REG_A6XX_GRAS_UNKNOWN_80A5 0x000080a5 - -#define REG_A6XX_GRAS_UNKNOWN_80A6 0x000080a6 - -#define REG_A6XX_GRAS_UNKNOWN_80AF 0x000080af - -#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0 0x000080b0 -#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK 0x00007fff -#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK; -} -#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK 0x7fff0000 -#define A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK; -} - -#define REG_A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0 0x000080b1 -#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK 0x00007fff -#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK; -} -#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK 0x7fff0000 -#define A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK; -} - -#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0 0x000080d0 -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK 0x00007fff -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK; -} -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK 0x7fff0000 -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK; -} - -#define REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0 0x000080d1 -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK 0x00007fff -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK; -} -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK 0x7fff0000 -#define A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK; -} - -#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL 0x000080f0 -#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff -#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; -} -#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; -} - -#define REG_A6XX_GRAS_SC_WINDOW_SCISSOR_BR 0x000080f1 -#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff -#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; -} -#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; -} - -#define REG_A6XX_GRAS_LRZ_CNTL 0x00008100 -#define A6XX_GRAS_LRZ_CNTL_ENABLE 0x00000001 -#define A6XX_GRAS_LRZ_CNTL_LRZ_WRITE 0x00000002 -#define A6XX_GRAS_LRZ_CNTL_GREATER 0x00000004 -#define A6XX_GRAS_LRZ_CNTL_UNK3 0x00000008 -#define A6XX_GRAS_LRZ_CNTL_UNK4 0x00000010 - -#define REG_A6XX_GRAS_UNKNOWN_8101 0x00008101 - -#define REG_A6XX_GRAS_2D_BLIT_INFO 0x00008102 -#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) -{ - return ((val) << A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__SHIFT) & A6XX_GRAS_2D_BLIT_INFO_COLOR_FORMAT__MASK; -} - -#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO 0x00008103 - -#define REG_A6XX_GRAS_LRZ_BUFFER_BASE_HI 0x00008104 - -#define REG_A6XX_GRAS_LRZ_BUFFER_PITCH 0x00008105 -#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK 0x000007ff -#define A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT 0 -static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_PITCH__MASK; -} -#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK 0x003ff800 -#define A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT 11 -static inline uint32_t A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_GRAS_LRZ_BUFFER_PITCH_ARRAY_PITCH__MASK; -} - -#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO 0x00008106 - -#define REG_A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI 0x00008107 - -#define REG_A6XX_GRAS_UNKNOWN_8109 0x00008109 - -#define REG_A6XX_GRAS_UNKNOWN_8110 0x00008110 - -#define REG_A6XX_GRAS_2D_BLIT_CNTL 0x00008400 -#define A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__MASK 0x0000ff00 -#define A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT 8 -static inline uint32_t A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT(enum a6xx_color_fmt val) -{ - return ((val) << A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT) & A6XX_GRAS_2D_BLIT_CNTL_COLOR_FORMAT__MASK; -} -#define A6XX_GRAS_2D_BLIT_CNTL_SCISSOR 0x00010000 - -#define REG_A6XX_GRAS_2D_SRC_TL_X 0x00008401 -#define A6XX_GRAS_2D_SRC_TL_X_X__MASK 0x00ffff00 -#define A6XX_GRAS_2D_SRC_TL_X_X__SHIFT 8 -static inline uint32_t A6XX_GRAS_2D_SRC_TL_X_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_2D_SRC_TL_X_X__SHIFT) & A6XX_GRAS_2D_SRC_TL_X_X__MASK; -} - -#define REG_A6XX_GRAS_2D_SRC_BR_X 0x00008402 -#define A6XX_GRAS_2D_SRC_BR_X_X__MASK 0x00ffff00 -#define A6XX_GRAS_2D_SRC_BR_X_X__SHIFT 8 -static inline uint32_t A6XX_GRAS_2D_SRC_BR_X_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_2D_SRC_BR_X_X__SHIFT) & A6XX_GRAS_2D_SRC_BR_X_X__MASK; -} - -#define REG_A6XX_GRAS_2D_SRC_TL_Y 0x00008403 -#define A6XX_GRAS_2D_SRC_TL_Y_Y__MASK 0x00ffff00 -#define A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT 8 -static inline uint32_t A6XX_GRAS_2D_SRC_TL_Y_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_2D_SRC_TL_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_TL_Y_Y__MASK; -} - -#define REG_A6XX_GRAS_2D_SRC_BR_Y 0x00008404 -#define A6XX_GRAS_2D_SRC_BR_Y_Y__MASK 0x00ffff00 -#define A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT 8 -static inline uint32_t A6XX_GRAS_2D_SRC_BR_Y_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_2D_SRC_BR_Y_Y__SHIFT) & A6XX_GRAS_2D_SRC_BR_Y_Y__MASK; -} - -#define REG_A6XX_GRAS_2D_DST_TL 0x00008405 -#define A6XX_GRAS_2D_DST_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_2D_DST_TL_X__MASK 0x00007fff -#define A6XX_GRAS_2D_DST_TL_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_2D_DST_TL_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_2D_DST_TL_X__SHIFT) & A6XX_GRAS_2D_DST_TL_X__MASK; -} -#define A6XX_GRAS_2D_DST_TL_Y__MASK 0x7fff0000 -#define A6XX_GRAS_2D_DST_TL_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_2D_DST_TL_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_2D_DST_TL_Y__SHIFT) & A6XX_GRAS_2D_DST_TL_Y__MASK; -} - -#define REG_A6XX_GRAS_2D_DST_BR 0x00008406 -#define A6XX_GRAS_2D_DST_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_2D_DST_BR_X__MASK 0x00007fff -#define A6XX_GRAS_2D_DST_BR_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_2D_DST_BR_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_2D_DST_BR_X__SHIFT) & A6XX_GRAS_2D_DST_BR_X__MASK; -} -#define A6XX_GRAS_2D_DST_BR_Y__MASK 0x7fff0000 -#define A6XX_GRAS_2D_DST_BR_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_2D_DST_BR_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_2D_DST_BR_Y__SHIFT) & A6XX_GRAS_2D_DST_BR_Y__MASK; -} - -#define REG_A6XX_GRAS_RESOLVE_CNTL_1 0x0000840a -#define A6XX_GRAS_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_RESOLVE_CNTL_1_X__MASK 0x00007fff -#define A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_X__MASK; -} -#define A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK 0x7fff0000 -#define A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_1_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_RESOLVE_CNTL_1_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_1_Y__MASK; -} - -#define REG_A6XX_GRAS_RESOLVE_CNTL_2 0x0000840b -#define A6XX_GRAS_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_GRAS_RESOLVE_CNTL_2_X__MASK 0x00007fff -#define A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT 0 -static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_X(uint32_t val) -{ - return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_X__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_X__MASK; -} -#define A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK 0x7fff0000 -#define A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT 16 -static inline uint32_t A6XX_GRAS_RESOLVE_CNTL_2_Y(uint32_t val) -{ - return ((val) << A6XX_GRAS_RESOLVE_CNTL_2_Y__SHIFT) & A6XX_GRAS_RESOLVE_CNTL_2_Y__MASK; -} - -#define REG_A6XX_GRAS_UNKNOWN_8600 0x00008600 - -#define REG_A6XX_RB_BIN_CONTROL 0x00008800 -#define A6XX_RB_BIN_CONTROL_BINW__MASK 0x000000ff -#define A6XX_RB_BIN_CONTROL_BINW__SHIFT 0 -static inline uint32_t A6XX_RB_BIN_CONTROL_BINW(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_RB_BIN_CONTROL_BINW__SHIFT) & A6XX_RB_BIN_CONTROL_BINW__MASK; -} -#define A6XX_RB_BIN_CONTROL_BINH__MASK 0x0001ff00 -#define A6XX_RB_BIN_CONTROL_BINH__SHIFT 8 -static inline uint32_t A6XX_RB_BIN_CONTROL_BINH(uint32_t val) -{ - assert(!(val & 0xf)); - return ((val >> 4) << A6XX_RB_BIN_CONTROL_BINH__SHIFT) & A6XX_RB_BIN_CONTROL_BINH__MASK; -} -#define A6XX_RB_BIN_CONTROL_BINNING_PASS 0x00040000 -#define A6XX_RB_BIN_CONTROL_USE_VIZ 0x00200000 - -#define REG_A6XX_RB_RENDER_CNTL 0x00008801 -#define A6XX_RB_RENDER_CNTL_UNK4 0x00000010 -#define A6XX_RB_RENDER_CNTL_BINNING 0x00000080 -#define A6XX_RB_RENDER_CNTL_FLAG_DEPTH 0x00004000 -#define A6XX_RB_RENDER_CNTL_FLAG_MRTS__MASK 0x00ff0000 -#define A6XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT 16 -static inline uint32_t A6XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val) -{ - return ((val) << A6XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A6XX_RB_RENDER_CNTL_FLAG_MRTS__MASK; -} - -#define REG_A6XX_RB_RAS_MSAA_CNTL 0x00008802 -#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A6XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A6XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK; -} - -#define REG_A6XX_RB_DEST_MSAA_CNTL 0x00008803 -#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A6XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A6XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK; -} -#define A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 - -#define REG_A6XX_RB_UNKNOWN_8804 0x00008804 - -#define REG_A6XX_RB_UNKNOWN_8805 0x00008805 - -#define REG_A6XX_RB_UNKNOWN_8806 0x00008806 - -#define REG_A6XX_RB_RENDER_CONTROL0 0x00008809 -#define A6XX_RB_RENDER_CONTROL0_VARYING 0x00000001 -#define A6XX_RB_RENDER_CONTROL0_UNK3 0x00000008 -#define A6XX_RB_RENDER_CONTROL0_XCOORD 0x00000040 -#define A6XX_RB_RENDER_CONTROL0_YCOORD 0x00000080 -#define A6XX_RB_RENDER_CONTROL0_ZCOORD 0x00000100 -#define A6XX_RB_RENDER_CONTROL0_WCOORD 0x00000200 -#define A6XX_RB_RENDER_CONTROL0_UNK10 0x00000400 - -#define REG_A6XX_RB_RENDER_CONTROL1 0x0000880a -#define A6XX_RB_RENDER_CONTROL1_SAMPLEMASK 0x00000001 -#define A6XX_RB_RENDER_CONTROL1_FACENESS 0x00000002 -#define A6XX_RB_RENDER_CONTROL1_SAMPLEID 0x00000008 - -#define REG_A6XX_RB_FS_OUTPUT_CNTL0 0x0000880b -#define A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z 0x00000002 - -#define REG_A6XX_RB_FS_OUTPUT_CNTL1 0x0000880c -#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK 0x0000000f -#define A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT 0 -static inline uint32_t A6XX_RB_FS_OUTPUT_CNTL1_MRT(uint32_t val) -{ - return ((val) << A6XX_RB_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_RB_FS_OUTPUT_CNTL1_MRT__MASK; -} - -#define REG_A6XX_RB_RENDER_COMPONENTS 0x0000880d -#define A6XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f -#define A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 -static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) -{ - return ((val) << A6XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT0__MASK; -} -#define A6XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 -#define A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 -static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) -{ - return ((val) << A6XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT1__MASK; -} -#define A6XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 -#define A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 -static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) -{ - return ((val) << A6XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT2__MASK; -} -#define A6XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 -#define A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 -static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) -{ - return ((val) << A6XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT3__MASK; -} -#define A6XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 -#define A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 -static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) -{ - return ((val) << A6XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT4__MASK; -} -#define A6XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 -#define A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 -static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) -{ - return ((val) << A6XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT5__MASK; -} -#define A6XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 -#define A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 -static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) -{ - return ((val) << A6XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT6__MASK; -} -#define A6XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 -#define A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 -static inline uint32_t A6XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) -{ - return ((val) << A6XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_RB_RENDER_COMPONENTS_RT7__MASK; -} - -#define REG_A6XX_RB_DITHER_CNTL 0x0000880e -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK 0x00000003 -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT 0 -static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0(enum adreno_rb_dither_mode val) -{ - return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT0__MASK; -} -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK 0x0000000c -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT 2 -static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1(enum adreno_rb_dither_mode val) -{ - return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT1__MASK; -} -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK 0x00000030 -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT 4 -static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2(enum adreno_rb_dither_mode val) -{ - return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT2__MASK; -} -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK 0x000000c0 -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT 6 -static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3(enum adreno_rb_dither_mode val) -{ - return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT3__MASK; -} -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK 0x00000300 -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT 8 -static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4(enum adreno_rb_dither_mode val) -{ - return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT4__MASK; -} -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK 0x00000c00 -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT 10 -static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5(enum adreno_rb_dither_mode val) -{ - return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT5__MASK; -} -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK 0x00001000 -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT 12 -static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6(enum adreno_rb_dither_mode val) -{ - return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT6__MASK; -} -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK 0x0000c000 -#define A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT 14 -static inline uint32_t A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7(enum adreno_rb_dither_mode val) -{ - return ((val) << A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__SHIFT) & A6XX_RB_DITHER_CNTL_DITHER_MODE_MRT7__MASK; -} - -#define REG_A6XX_RB_SRGB_CNTL 0x0000880f -#define A6XX_RB_SRGB_CNTL_SRGB_MRT0 0x00000001 -#define A6XX_RB_SRGB_CNTL_SRGB_MRT1 0x00000002 -#define A6XX_RB_SRGB_CNTL_SRGB_MRT2 0x00000004 -#define A6XX_RB_SRGB_CNTL_SRGB_MRT3 0x00000008 -#define A6XX_RB_SRGB_CNTL_SRGB_MRT4 0x00000010 -#define A6XX_RB_SRGB_CNTL_SRGB_MRT5 0x00000020 -#define A6XX_RB_SRGB_CNTL_SRGB_MRT6 0x00000040 -#define A6XX_RB_SRGB_CNTL_SRGB_MRT7 0x00000080 - -#define REG_A6XX_RB_UNKNOWN_8810 0x00008810 - -#define REG_A6XX_RB_UNKNOWN_8811 0x00008811 - -#define REG_A6XX_RB_UNKNOWN_8818 0x00008818 - -#define REG_A6XX_RB_UNKNOWN_8819 0x00008819 - -#define REG_A6XX_RB_UNKNOWN_881A 0x0000881a - -#define REG_A6XX_RB_UNKNOWN_881B 0x0000881b - -#define REG_A6XX_RB_UNKNOWN_881C 0x0000881c - -#define REG_A6XX_RB_UNKNOWN_881D 0x0000881d - -#define REG_A6XX_RB_UNKNOWN_881E 0x0000881e - -static inline uint32_t REG_A6XX_RB_MRT(uint32_t i0) { return 0x00008820 + 0x8*i0; } - -static inline uint32_t REG_A6XX_RB_MRT_CONTROL(uint32_t i0) { return 0x00008820 + 0x8*i0; } -#define A6XX_RB_MRT_CONTROL_BLEND 0x00000001 -#define A6XX_RB_MRT_CONTROL_BLEND2 0x00000002 -#define A6XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000004 -#define A6XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000078 -#define A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 3 -static inline uint32_t A6XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) -{ - return ((val) << A6XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A6XX_RB_MRT_CONTROL_ROP_CODE__MASK; -} -#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x00000780 -#define A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 7 -static inline uint32_t A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) -{ - return ((val) << A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; -} - -static inline uint32_t REG_A6XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x00008821 + 0x8*i0; } -#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f -#define A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 -static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; -} -#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 -#define A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 -static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; -} -#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 -#define A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 -static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; -} -#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 -#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 -static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; -} -#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 -#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 -static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; -} -#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 -#define A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 -static inline uint32_t A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; -} - -static inline uint32_t REG_A6XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x00008822 + 0x8*i0; } -#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) -{ - return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; -} -#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x00000300 -#define A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 8 -static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a6xx_tile_mode val) -{ - return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; -} -#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00006000 -#define A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 13 -static inline uint32_t A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; -} - -static inline uint32_t REG_A6XX_RB_MRT_PITCH(uint32_t i0) { return 0x00008823 + 0x8*i0; } -#define A6XX_RB_MRT_PITCH__MASK 0xffffffff -#define A6XX_RB_MRT_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_MRT_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_RB_MRT_PITCH__SHIFT) & A6XX_RB_MRT_PITCH__MASK; -} - -static inline uint32_t REG_A6XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x00008824 + 0x8*i0; } -#define A6XX_RB_MRT_ARRAY_PITCH__MASK 0xffffffff -#define A6XX_RB_MRT_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_MRT_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_RB_MRT_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_ARRAY_PITCH__MASK; -} - -static inline uint32_t REG_A6XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x00008825 + 0x8*i0; } - -static inline uint32_t REG_A6XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x00008826 + 0x8*i0; } - -static inline uint32_t REG_A6XX_RB_MRT_BASE_GMEM(uint32_t i0) { return 0x00008827 + 0x8*i0; } - -#define REG_A6XX_RB_BLEND_RED_F32 0x00008860 -#define A6XX_RB_BLEND_RED_F32__MASK 0xffffffff -#define A6XX_RB_BLEND_RED_F32__SHIFT 0 -static inline uint32_t A6XX_RB_BLEND_RED_F32(float val) -{ - return ((fui(val)) << A6XX_RB_BLEND_RED_F32__SHIFT) & A6XX_RB_BLEND_RED_F32__MASK; -} - -#define REG_A6XX_RB_BLEND_GREEN_F32 0x00008861 -#define A6XX_RB_BLEND_GREEN_F32__MASK 0xffffffff -#define A6XX_RB_BLEND_GREEN_F32__SHIFT 0 -static inline uint32_t A6XX_RB_BLEND_GREEN_F32(float val) -{ - return ((fui(val)) << A6XX_RB_BLEND_GREEN_F32__SHIFT) & A6XX_RB_BLEND_GREEN_F32__MASK; -} - -#define REG_A6XX_RB_BLEND_BLUE_F32 0x00008862 -#define A6XX_RB_BLEND_BLUE_F32__MASK 0xffffffff -#define A6XX_RB_BLEND_BLUE_F32__SHIFT 0 -static inline uint32_t A6XX_RB_BLEND_BLUE_F32(float val) -{ - return ((fui(val)) << A6XX_RB_BLEND_BLUE_F32__SHIFT) & A6XX_RB_BLEND_BLUE_F32__MASK; -} - -#define REG_A6XX_RB_BLEND_ALPHA_F32 0x00008863 -#define A6XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff -#define A6XX_RB_BLEND_ALPHA_F32__SHIFT 0 -static inline uint32_t A6XX_RB_BLEND_ALPHA_F32(float val) -{ - return ((fui(val)) << A6XX_RB_BLEND_ALPHA_F32__SHIFT) & A6XX_RB_BLEND_ALPHA_F32__MASK; -} - -#define REG_A6XX_RB_ALPHA_CONTROL 0x00008864 -#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff -#define A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 -static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val) -{ - return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK; -} -#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST 0x00000100 -#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK 0x00000e00 -#define A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT 9 -static inline uint32_t A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) -{ - return ((val) << A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK; -} - -#define REG_A6XX_RB_BLEND_CNTL 0x00008865 -#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK 0x000000ff -#define A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT 0 -static inline uint32_t A6XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val) -{ - return ((val) << A6XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; -} -#define A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND 0x00000100 -#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK 0xffff0000 -#define A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT 16 -static inline uint32_t A6XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val) -{ - return ((val) << A6XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A6XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK; -} - -#define REG_A6XX_RB_DEPTH_PLANE_CNTL 0x00008870 -#define A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 - -#define REG_A6XX_RB_DEPTH_CNTL 0x00008871 -#define A6XX_RB_DEPTH_CNTL_Z_ENABLE 0x00000001 -#define A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE 0x00000002 -#define A6XX_RB_DEPTH_CNTL_ZFUNC__MASK 0x0000001c -#define A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT 2 -static inline uint32_t A6XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val) -{ - return ((val) << A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK; -} -#define A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE 0x00000040 - -#define REG_A6XX_RB_DEPTH_BUFFER_INFO 0x00008872 -#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 -#define A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 -static inline uint32_t A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a6xx_depth_format val) -{ - return ((val) << A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; -} - -#define REG_A6XX_RB_DEPTH_BUFFER_PITCH 0x00008873 -#define A6XX_RB_DEPTH_BUFFER_PITCH__MASK 0xffffffff -#define A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_DEPTH_BUFFER_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_PITCH__MASK; -} - -#define REG_A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH 0x00008874 -#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK 0xffffffff -#define A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK; -} - -#define REG_A6XX_RB_DEPTH_BUFFER_BASE_LO 0x00008875 - -#define REG_A6XX_RB_DEPTH_BUFFER_BASE_HI 0x00008876 - -#define REG_A6XX_RB_DEPTH_BUFFER_BASE_GMEM 0x00008877 - -#define REG_A6XX_RB_UNKNOWN_8878 0x00008878 - -#define REG_A6XX_RB_UNKNOWN_8879 0x00008879 - -#define REG_A6XX_RB_STENCIL_CONTROL 0x00008880 -#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 -#define A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 -#define A6XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 -#define A6XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 -#define A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 -static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) -{ - return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC__MASK; -} -#define A6XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 -#define A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 -static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) -{ - return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL__MASK; -} -#define A6XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 -#define A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 -static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) -{ - return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS__MASK; -} -#define A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 -#define A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 -static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) -{ - return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK; -} -#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 -#define A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 -static inline uint32_t A6XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) -{ - return ((val) << A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; -} -#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 -#define A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 -static inline uint32_t A6XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A6XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; -} -#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 -#define A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 -static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) -{ - return ((val) << A6XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; -} -#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 -#define A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 -static inline uint32_t A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; -} - -#define REG_A6XX_RB_STENCIL_INFO 0x00008881 -#define A6XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001 - -#define REG_A6XX_RB_STENCIL_BUFFER_PITCH 0x00008882 -#define A6XX_RB_STENCIL_BUFFER_PITCH__MASK 0xffffffff -#define A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_STENCIL_BUFFER_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_PITCH__MASK; -} - -#define REG_A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH 0x00008883 -#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK 0xffffffff -#define A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__SHIFT) & A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH__MASK; -} - -#define REG_A6XX_RB_STENCIL_BUFFER_BASE_LO 0x00008884 - -#define REG_A6XX_RB_STENCIL_BUFFER_BASE_HI 0x00008885 - -#define REG_A6XX_RB_STENCIL_BUFFER_BASE_GMEM 0x00008886 - -#define REG_A6XX_RB_STENCILREF 0x00008887 -#define A6XX_RB_STENCILREF_REF__MASK 0x000000ff -#define A6XX_RB_STENCILREF_REF__SHIFT 0 -static inline uint32_t A6XX_RB_STENCILREF_REF(uint32_t val) -{ - return ((val) << A6XX_RB_STENCILREF_REF__SHIFT) & A6XX_RB_STENCILREF_REF__MASK; -} -#define A6XX_RB_STENCILREF_BFREF__MASK 0x0000ff00 -#define A6XX_RB_STENCILREF_BFREF__SHIFT 8 -static inline uint32_t A6XX_RB_STENCILREF_BFREF(uint32_t val) -{ - return ((val) << A6XX_RB_STENCILREF_BFREF__SHIFT) & A6XX_RB_STENCILREF_BFREF__MASK; -} - -#define REG_A6XX_RB_STENCILMASK 0x00008888 -#define A6XX_RB_STENCILMASK_MASK__MASK 0x000000ff -#define A6XX_RB_STENCILMASK_MASK__SHIFT 0 -static inline uint32_t A6XX_RB_STENCILMASK_MASK(uint32_t val) -{ - return ((val) << A6XX_RB_STENCILMASK_MASK__SHIFT) & A6XX_RB_STENCILMASK_MASK__MASK; -} -#define A6XX_RB_STENCILMASK_BFMASK__MASK 0x0000ff00 -#define A6XX_RB_STENCILMASK_BFMASK__SHIFT 8 -static inline uint32_t A6XX_RB_STENCILMASK_BFMASK(uint32_t val) -{ - return ((val) << A6XX_RB_STENCILMASK_BFMASK__SHIFT) & A6XX_RB_STENCILMASK_BFMASK__MASK; -} - -#define REG_A6XX_RB_STENCILWRMASK 0x00008889 -#define A6XX_RB_STENCILWRMASK_WRMASK__MASK 0x000000ff -#define A6XX_RB_STENCILWRMASK_WRMASK__SHIFT 0 -static inline uint32_t A6XX_RB_STENCILWRMASK_WRMASK(uint32_t val) -{ - return ((val) << A6XX_RB_STENCILWRMASK_WRMASK__SHIFT) & A6XX_RB_STENCILWRMASK_WRMASK__MASK; -} -#define A6XX_RB_STENCILWRMASK_BFWRMASK__MASK 0x0000ff00 -#define A6XX_RB_STENCILWRMASK_BFWRMASK__SHIFT 8 -static inline uint32_t A6XX_RB_STENCILWRMASK_BFWRMASK(uint32_t val) -{ - return ((val) << A6XX_RB_STENCILWRMASK_BFWRMASK__SHIFT) & A6XX_RB_STENCILWRMASK_BFWRMASK__MASK; -} - -#define REG_A6XX_RB_WINDOW_OFFSET 0x00008890 -#define A6XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_RB_WINDOW_OFFSET_X__MASK 0x00007fff -#define A6XX_RB_WINDOW_OFFSET_X__SHIFT 0 -static inline uint32_t A6XX_RB_WINDOW_OFFSET_X(uint32_t val) -{ - return ((val) << A6XX_RB_WINDOW_OFFSET_X__SHIFT) & A6XX_RB_WINDOW_OFFSET_X__MASK; -} -#define A6XX_RB_WINDOW_OFFSET_Y__MASK 0x7fff0000 -#define A6XX_RB_WINDOW_OFFSET_Y__SHIFT 16 -static inline uint32_t A6XX_RB_WINDOW_OFFSET_Y(uint32_t val) -{ - return ((val) << A6XX_RB_WINDOW_OFFSET_Y__SHIFT) & A6XX_RB_WINDOW_OFFSET_Y__MASK; -} - -#define REG_A6XX_RB_SAMPLE_COUNT_CONTROL 0x00008891 -#define A6XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 - -#define REG_A6XX_RB_LRZ_CNTL 0x00008898 -#define A6XX_RB_LRZ_CNTL_ENABLE 0x00000001 - -#define REG_A6XX_RB_UNKNOWN_88D0 0x000088d0 - -#define REG_A6XX_RB_BLIT_SCISSOR_TL 0x000088d1 -#define A6XX_RB_BLIT_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_RB_BLIT_SCISSOR_TL_X__MASK 0x00007fff -#define A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A6XX_RB_BLIT_SCISSOR_TL_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_X__MASK; -} -#define A6XX_RB_BLIT_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A6XX_RB_BLIT_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A6XX_RB_BLIT_SCISSOR_TL_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_TL_Y__MASK; -} - -#define REG_A6XX_RB_BLIT_SCISSOR_BR 0x000088d2 -#define A6XX_RB_BLIT_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A6XX_RB_BLIT_SCISSOR_BR_X__MASK 0x00007fff -#define A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A6XX_RB_BLIT_SCISSOR_BR_X__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_X__MASK; -} -#define A6XX_RB_BLIT_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A6XX_RB_BLIT_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A6XX_RB_BLIT_SCISSOR_BR_Y__SHIFT) & A6XX_RB_BLIT_SCISSOR_BR_Y__MASK; -} - -#define REG_A6XX_RB_BLIT_BASE_GMEM 0x000088d6 - -#define REG_A6XX_RB_BLIT_DST_INFO 0x000088d7 -#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK 0x00000003 -#define A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT 0 -static inline uint32_t A6XX_RB_BLIT_DST_INFO_TILE_MODE(enum a6xx_tile_mode val) -{ - return ((val) << A6XX_RB_BLIT_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_BLIT_DST_INFO_TILE_MODE__MASK; -} -#define A6XX_RB_BLIT_DST_INFO_FLAGS 0x00000004 -#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK 0x00007f80 -#define A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT 7 -static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) -{ - return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT__MASK; -} -#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK 0x00000060 -#define A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT 5 -static inline uint32_t A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_BLIT_DST_INFO_COLOR_SWAP__MASK; -} - -#define REG_A6XX_RB_BLIT_DST_LO 0x000088d8 - -#define REG_A6XX_RB_BLIT_DST_HI 0x000088d9 - -#define REG_A6XX_RB_BLIT_DST_PITCH 0x000088da -#define A6XX_RB_BLIT_DST_PITCH__MASK 0xffffffff -#define A6XX_RB_BLIT_DST_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_BLIT_DST_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_RB_BLIT_DST_PITCH__SHIFT) & A6XX_RB_BLIT_DST_PITCH__MASK; -} - -#define REG_A6XX_RB_BLIT_DST_ARRAY_PITCH 0x000088db -#define A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK 0xffffffff -#define A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A6XX_RB_BLIT_DST_ARRAY_PITCH__MASK; -} - -#define REG_A6XX_RB_BLIT_FLAG_DST_LO 0x000088dc - -#define REG_A6XX_RB_BLIT_FLAG_DST_HI 0x000088dd - -#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0 0x000088df - -#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW1 0x000088e0 - -#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW2 0x000088e1 - -#define REG_A6XX_RB_BLIT_CLEAR_COLOR_DW3 0x000088e2 - -#define REG_A6XX_RB_BLIT_INFO 0x000088e3 -#define A6XX_RB_BLIT_INFO_UNK0 0x00000001 -#define A6XX_RB_BLIT_INFO_GMEM 0x00000002 -#define A6XX_RB_BLIT_INFO_INTEGER 0x00000004 -#define A6XX_RB_BLIT_INFO_DEPTH 0x00000008 -#define A6XX_RB_BLIT_INFO_CLEAR_MASK__MASK 0x000000f0 -#define A6XX_RB_BLIT_INFO_CLEAR_MASK__SHIFT 4 -static inline uint32_t A6XX_RB_BLIT_INFO_CLEAR_MASK(uint32_t val) -{ - return ((val) << A6XX_RB_BLIT_INFO_CLEAR_MASK__SHIFT) & A6XX_RB_BLIT_INFO_CLEAR_MASK__MASK; -} - -#define REG_A6XX_RB_UNKNOWN_88F0 0x000088f0 - -#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO 0x00008900 - -#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_HI 0x00008901 - -#define REG_A6XX_RB_DEPTH_FLAG_BUFFER_PITCH 0x00008902 - -static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x00008903 + 0x3*i0; } - -static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x00008903 + 0x3*i0; } - -static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x00008904 + 0x3*i0; } - -static inline uint32_t REG_A6XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x00008905 + 0x3*i0; } -#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK 0x000007ff -#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH__MASK; -} -#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK 0x003ff800 -#define A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT 11 -static inline uint32_t A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__SHIFT) & A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH__MASK; -} - -#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO 0x00008927 - -#define REG_A6XX_RB_SAMPLE_COUNT_ADDR_HI 0x00008928 - -#define REG_A6XX_RB_2D_BLIT_CNTL 0x00008c00 -#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK 0x0000ff00 -#define A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT 8 -static inline uint32_t A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(enum a6xx_color_fmt val) -{ - return ((val) << A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT__MASK; -} -#define A6XX_RB_2D_BLIT_CNTL_SCISSOR 0x00010000 - -#define REG_A6XX_RB_UNKNOWN_8C01 0x00008c01 - -#define REG_A6XX_RB_2D_DST_INFO 0x00008c17 -#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) -{ - return ((val) << A6XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; -} -#define A6XX_RB_2D_DST_INFO_TILE_MODE__MASK 0x00000300 -#define A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A6XX_RB_2D_DST_INFO_TILE_MODE(enum a6xx_tile_mode val) -{ - return ((val) << A6XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A6XX_RB_2D_DST_INFO_TILE_MODE__MASK; -} -#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A6XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A6XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A6XX_RB_2D_DST_INFO_COLOR_SWAP__MASK; -} -#define A6XX_RB_2D_DST_INFO_FLAGS 0x00001000 - -#define REG_A6XX_RB_2D_DST_LO 0x00008c18 - -#define REG_A6XX_RB_2D_DST_HI 0x00008c19 - -#define REG_A6XX_RB_2D_DST_SIZE 0x00008c1a -#define A6XX_RB_2D_DST_SIZE_PITCH__MASK 0x0000ffff -#define A6XX_RB_2D_DST_SIZE_PITCH__SHIFT 0 -static inline uint32_t A6XX_RB_2D_DST_SIZE_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A6XX_RB_2D_DST_SIZE_PITCH__MASK; -} - -#define REG_A6XX_RB_2D_DST_FLAGS_LO 0x00008c20 - -#define REG_A6XX_RB_2D_DST_FLAGS_HI 0x00008c21 - -#define REG_A6XX_RB_2D_SRC_SOLID_C0 0x00008c2c - -#define REG_A6XX_RB_2D_SRC_SOLID_C1 0x00008c2d - -#define REG_A6XX_RB_2D_SRC_SOLID_C2 0x00008c2e - -#define REG_A6XX_RB_2D_SRC_SOLID_C3 0x00008c2f - -#define REG_A6XX_RB_UNKNOWN_8E01 0x00008e01 - -#define REG_A6XX_RB_UNKNOWN_8E04 0x00008e04 - -#define REG_A6XX_RB_CCU_CNTL 0x00008e07 - -#define REG_A6XX_VPC_UNKNOWN_9101 0x00009101 - -#define REG_A6XX_VPC_GS_SIV_CNTL 0x00009104 - -#define REG_A6XX_VPC_UNKNOWN_9107 0x00009107 - -#define REG_A6XX_VPC_UNKNOWN_9108 0x00009108 - -static inline uint32_t REG_A6XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00009200 + 0x1*i0; } - -static inline uint32_t REG_A6XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00009200 + 0x1*i0; } - -static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00009208 + 0x1*i0; } - -static inline uint32_t REG_A6XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00009208 + 0x1*i0; } - -#define REG_A6XX_VPC_UNKNOWN_9210 0x00009210 - -#define REG_A6XX_VPC_UNKNOWN_9211 0x00009211 - -static inline uint32_t REG_A6XX_VPC_VAR(uint32_t i0) { return 0x00009212 + 0x1*i0; } - -static inline uint32_t REG_A6XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x00009212 + 0x1*i0; } - -#define REG_A6XX_VPC_SO_CNTL 0x00009216 -#define A6XX_VPC_SO_CNTL_ENABLE 0x00010000 - -#define REG_A6XX_VPC_SO_PROG 0x00009217 -#define A6XX_VPC_SO_PROG_A_BUF__MASK 0x00000003 -#define A6XX_VPC_SO_PROG_A_BUF__SHIFT 0 -static inline uint32_t A6XX_VPC_SO_PROG_A_BUF(uint32_t val) -{ - return ((val) << A6XX_VPC_SO_PROG_A_BUF__SHIFT) & A6XX_VPC_SO_PROG_A_BUF__MASK; -} -#define A6XX_VPC_SO_PROG_A_OFF__MASK 0x000007fc -#define A6XX_VPC_SO_PROG_A_OFF__SHIFT 2 -static inline uint32_t A6XX_VPC_SO_PROG_A_OFF(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A6XX_VPC_SO_PROG_A_OFF__SHIFT) & A6XX_VPC_SO_PROG_A_OFF__MASK; -} -#define A6XX_VPC_SO_PROG_A_EN 0x00000800 -#define A6XX_VPC_SO_PROG_B_BUF__MASK 0x00003000 -#define A6XX_VPC_SO_PROG_B_BUF__SHIFT 12 -static inline uint32_t A6XX_VPC_SO_PROG_B_BUF(uint32_t val) -{ - return ((val) << A6XX_VPC_SO_PROG_B_BUF__SHIFT) & A6XX_VPC_SO_PROG_B_BUF__MASK; -} -#define A6XX_VPC_SO_PROG_B_OFF__MASK 0x007fc000 -#define A6XX_VPC_SO_PROG_B_OFF__SHIFT 14 -static inline uint32_t A6XX_VPC_SO_PROG_B_OFF(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A6XX_VPC_SO_PROG_B_OFF__SHIFT) & A6XX_VPC_SO_PROG_B_OFF__MASK; -} -#define A6XX_VPC_SO_PROG_B_EN 0x00800000 - -static inline uint32_t REG_A6XX_VPC_SO(uint32_t i0) { return 0x0000921a + 0x7*i0; } - -static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000921a + 0x7*i0; } - -static inline uint32_t REG_A6XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000921b + 0x7*i0; } - -static inline uint32_t REG_A6XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000921c + 0x7*i0; } - -static inline uint32_t REG_A6XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000921d + 0x7*i0; } - -static inline uint32_t REG_A6XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000921e + 0x7*i0; } - -static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000921f + 0x7*i0; } - -static inline uint32_t REG_A6XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x00009220 + 0x7*i0; } - -#define REG_A6XX_VPC_UNKNOWN_9236 0x00009236 - -#define REG_A6XX_VPC_UNKNOWN_9300 0x00009300 - -#define REG_A6XX_VPC_PACK 0x00009301 -#define A6XX_VPC_PACK_STRIDE_IN_VPC__MASK 0x000000ff -#define A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT 0 -static inline uint32_t A6XX_VPC_PACK_STRIDE_IN_VPC(uint32_t val) -{ - return ((val) << A6XX_VPC_PACK_STRIDE_IN_VPC__SHIFT) & A6XX_VPC_PACK_STRIDE_IN_VPC__MASK; -} -#define A6XX_VPC_PACK_NUMNONPOSVAR__MASK 0x0000ff00 -#define A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT 8 -static inline uint32_t A6XX_VPC_PACK_NUMNONPOSVAR(uint32_t val) -{ - return ((val) << A6XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A6XX_VPC_PACK_NUMNONPOSVAR__MASK; -} -#define A6XX_VPC_PACK_PSIZELOC__MASK 0x00ff0000 -#define A6XX_VPC_PACK_PSIZELOC__SHIFT 16 -static inline uint32_t A6XX_VPC_PACK_PSIZELOC(uint32_t val) -{ - return ((val) << A6XX_VPC_PACK_PSIZELOC__SHIFT) & A6XX_VPC_PACK_PSIZELOC__MASK; -} - -#define REG_A6XX_VPC_CNTL_0 0x00009304 -#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK 0x000000ff -#define A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT 0 -static inline uint32_t A6XX_VPC_CNTL_0_NUMNONPOSVAR(uint32_t val) -{ - return ((val) << A6XX_VPC_CNTL_0_NUMNONPOSVAR__SHIFT) & A6XX_VPC_CNTL_0_NUMNONPOSVAR__MASK; -} -#define A6XX_VPC_CNTL_0_VARYING 0x00010000 - -#define REG_A6XX_VPC_SO_BUF_CNTL 0x00009305 -#define A6XX_VPC_SO_BUF_CNTL_BUF0 0x00000001 -#define A6XX_VPC_SO_BUF_CNTL_BUF1 0x00000008 -#define A6XX_VPC_SO_BUF_CNTL_BUF2 0x00000040 -#define A6XX_VPC_SO_BUF_CNTL_BUF3 0x00000200 -#define A6XX_VPC_SO_BUF_CNTL_ENABLE 0x00008000 - -#define REG_A6XX_VPC_SO_OVERRIDE 0x00009306 -#define A6XX_VPC_SO_OVERRIDE_SO_DISABLE 0x00000001 - -#define REG_A6XX_VPC_UNKNOWN_9600 0x00009600 - -#define REG_A6XX_VPC_UNKNOWN_9602 0x00009602 - -#define REG_A6XX_PC_UNKNOWN_9801 0x00009801 - -#define REG_A6XX_PC_RESTART_INDEX 0x00009803 - -#define REG_A6XX_PC_MODE_CNTL 0x00009804 - -#define REG_A6XX_PC_UNKNOWN_9805 0x00009805 - -#define REG_A6XX_PC_UNKNOWN_9806 0x00009806 - -#define REG_A6XX_PC_UNKNOWN_9980 0x00009980 - -#define REG_A6XX_PC_UNKNOWN_9981 0x00009981 - -#define REG_A6XX_PC_UNKNOWN_9990 0x00009990 - -#define REG_A6XX_PC_PRIMITIVE_CNTL_0 0x00009b00 -#define A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART 0x00000001 -#define A6XX_PC_PRIMITIVE_CNTL_0_PROVOKING_VTX_LAST 0x00000002 - -#define REG_A6XX_PC_PRIMITIVE_CNTL_1 0x00009b01 -#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK 0x0000007f -#define A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT 0 -static inline uint32_t A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(uint32_t val) -{ - return ((val) << A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__SHIFT) & A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC__MASK; -} -#define A6XX_PC_PRIMITIVE_CNTL_1_PSIZE 0x00000100 - -#define REG_A6XX_PC_UNKNOWN_9B06 0x00009b06 - -#define REG_A6XX_PC_UNKNOWN_9B07 0x00009b07 - -#define REG_A6XX_PC_TESSFACTOR_ADDR_LO 0x00009e08 - -#define REG_A6XX_PC_TESSFACTOR_ADDR_HI 0x00009e09 - -#define REG_A6XX_PC_UNKNOWN_9E72 0x00009e72 - -#define REG_A6XX_VFD_CONTROL_0 0x0000a000 -#define A6XX_VFD_CONTROL_0_VTXCNT__MASK 0x0000003f -#define A6XX_VFD_CONTROL_0_VTXCNT__SHIFT 0 -static inline uint32_t A6XX_VFD_CONTROL_0_VTXCNT(uint32_t val) -{ - return ((val) << A6XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A6XX_VFD_CONTROL_0_VTXCNT__MASK; -} - -#define REG_A6XX_VFD_CONTROL_1 0x0000a001 -#define A6XX_VFD_CONTROL_1_REGID4VTX__MASK 0x000000ff -#define A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT 0 -static inline uint32_t A6XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) -{ - return ((val) << A6XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A6XX_VFD_CONTROL_1_REGID4VTX__MASK; -} -#define A6XX_VFD_CONTROL_1_REGID4INST__MASK 0x0000ff00 -#define A6XX_VFD_CONTROL_1_REGID4INST__SHIFT 8 -static inline uint32_t A6XX_VFD_CONTROL_1_REGID4INST(uint32_t val) -{ - return ((val) << A6XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A6XX_VFD_CONTROL_1_REGID4INST__MASK; -} -#define A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK 0x00ff0000 -#define A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT 16 -static inline uint32_t A6XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val) -{ - return ((val) << A6XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A6XX_VFD_CONTROL_1_REGID4PRIMID__MASK; -} - -#define REG_A6XX_VFD_CONTROL_2 0x0000a002 -#define A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK 0x000000ff -#define A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT 0 -static inline uint32_t A6XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val) -{ - return ((val) << A6XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_2_REGID_PATCHID__MASK; -} - -#define REG_A6XX_VFD_CONTROL_3 0x0000a003 -#define A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK 0x0000ff00 -#define A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT 8 -static inline uint32_t A6XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val) -{ - return ((val) << A6XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A6XX_VFD_CONTROL_3_REGID_PATCHID__MASK; -} -#define A6XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000 -#define A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16 -static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val) -{ - return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSX__MASK; -} -#define A6XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000 -#define A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24 -static inline uint32_t A6XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val) -{ - return ((val) << A6XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A6XX_VFD_CONTROL_3_REGID_TESSY__MASK; -} - -#define REG_A6XX_VFD_CONTROL_4 0x0000a004 - -#define REG_A6XX_VFD_CONTROL_5 0x0000a005 - -#define REG_A6XX_VFD_CONTROL_6 0x0000a006 - -#define REG_A6XX_VFD_MODE_CNTL 0x0000a007 -#define A6XX_VFD_MODE_CNTL_BINNING_PASS 0x00000001 - -#define REG_A6XX_VFD_UNKNOWN_A008 0x0000a008 - -#define REG_A6XX_VFD_UNKNOWN_A009 0x0000a009 - -#define REG_A6XX_VFD_INDEX_OFFSET 0x0000a00e - -#define REG_A6XX_VFD_INSTANCE_START_OFFSET 0x0000a00f - -static inline uint32_t REG_A6XX_VFD_FETCH(uint32_t i0) { return 0x0000a010 + 0x4*i0; } - -static inline uint32_t REG_A6XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000a010 + 0x4*i0; } - -static inline uint32_t REG_A6XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000a011 + 0x4*i0; } - -static inline uint32_t REG_A6XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000a012 + 0x4*i0; } - -static inline uint32_t REG_A6XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000a013 + 0x4*i0; } - -static inline uint32_t REG_A6XX_VFD_DECODE(uint32_t i0) { return 0x0000a090 + 0x2*i0; } - -static inline uint32_t REG_A6XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000a090 + 0x2*i0; } -#define A6XX_VFD_DECODE_INSTR_IDX__MASK 0x0000001f -#define A6XX_VFD_DECODE_INSTR_IDX__SHIFT 0 -static inline uint32_t A6XX_VFD_DECODE_INSTR_IDX(uint32_t val) -{ - return ((val) << A6XX_VFD_DECODE_INSTR_IDX__SHIFT) & A6XX_VFD_DECODE_INSTR_IDX__MASK; -} -#define A6XX_VFD_DECODE_INSTR_INSTANCED 0x00020000 -#define A6XX_VFD_DECODE_INSTR_FORMAT__MASK 0x0ff00000 -#define A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20 -static inline uint32_t A6XX_VFD_DECODE_INSTR_FORMAT(enum a6xx_vtx_fmt val) -{ - return ((val) << A6XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A6XX_VFD_DECODE_INSTR_FORMAT__MASK; -} -#define A6XX_VFD_DECODE_INSTR_SWAP__MASK 0x30000000 -#define A6XX_VFD_DECODE_INSTR_SWAP__SHIFT 28 -static inline uint32_t A6XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A6XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A6XX_VFD_DECODE_INSTR_SWAP__MASK; -} -#define A6XX_VFD_DECODE_INSTR_UNK30 0x40000000 -#define A6XX_VFD_DECODE_INSTR_FLOAT 0x80000000 - -static inline uint32_t REG_A6XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000a091 + 0x2*i0; } - -static inline uint32_t REG_A6XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; } - -static inline uint32_t REG_A6XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000a0d0 + 0x1*i0; } -#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK 0x0000000f -#define A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT 0 -static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val) -{ - return ((val) << A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK; -} -#define A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK 0x00000ff0 -#define A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT 4 -static inline uint32_t A6XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val) -{ - return ((val) << A6XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A6XX_VFD_DEST_CNTL_INSTR_REGID__MASK; -} - -#define REG_A6XX_SP_UNKNOWN_A0F8 0x0000a0f8 - -#define REG_A6XX_SP_PRIMITIVE_CNTL 0x0000a802 -#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK 0x0000001f -#define A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT 0 -static inline uint32_t A6XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val) -{ - return ((val) << A6XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A6XX_SP_PRIMITIVE_CNTL_VSOUT__MASK; -} - -static inline uint32_t REG_A6XX_SP_VS_OUT(uint32_t i0) { return 0x0000a803 + 0x1*i0; } - -static inline uint32_t REG_A6XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000a803 + 0x1*i0; } -#define A6XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff -#define A6XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 -static inline uint32_t A6XX_SP_VS_OUT_REG_A_REGID(uint32_t val) -{ - return ((val) << A6XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_A_REGID__MASK; -} -#define A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00000f00 -#define A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 8 -static inline uint32_t A6XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) -{ - return ((val) << A6XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_A_COMPMASK__MASK; -} -#define A6XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 -#define A6XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 -static inline uint32_t A6XX_SP_VS_OUT_REG_B_REGID(uint32_t val) -{ - return ((val) << A6XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A6XX_SP_VS_OUT_REG_B_REGID__MASK; -} -#define A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x0f000000 -#define A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 24 -static inline uint32_t A6XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) -{ - return ((val) << A6XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A6XX_SP_VS_OUT_REG_B_COMPMASK__MASK; -} - -static inline uint32_t REG_A6XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000a813 + 0x1*i0; } - -static inline uint32_t REG_A6XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000a813 + 0x1*i0; } -#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff -#define A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 -static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) -{ - return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; -} -#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 -#define A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 -static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) -{ - return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; -} -#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 -#define A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 -static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) -{ - return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; -} -#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 -#define A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 -static inline uint32_t A6XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) -{ - return ((val) << A6XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A6XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; -} - -#define REG_A6XX_SP_VS_CTRL_REG0 0x0000a800 -#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e -#define A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 -static inline uint32_t A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 -#define A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 -static inline uint32_t A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 -#define A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT 14 -static inline uint32_t A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK; -} -#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A6XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A6XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; -} -#define A6XX_SP_VS_CTRL_REG0_VARYING 0x00400000 -#define A6XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x04000000 -#define A6XX_SP_VS_CTRL_REG0_MERGEDREGS 0x80000000 - -#define REG_A6XX_SP_UNKNOWN_A81B 0x0000a81b - -#define REG_A6XX_SP_VS_OBJ_START_LO 0x0000a81c - -#define REG_A6XX_SP_VS_OBJ_START_HI 0x0000a81d - -#define REG_A6XX_SP_VS_TEX_COUNT 0x0000a822 - -#define REG_A6XX_SP_VS_CONFIG 0x0000a823 -#define A6XX_SP_VS_CONFIG_ENABLED 0x00000100 -#define A6XX_SP_VS_CONFIG_NTEX__MASK 0x0001fe00 -#define A6XX_SP_VS_CONFIG_NTEX__SHIFT 9 -static inline uint32_t A6XX_SP_VS_CONFIG_NTEX(uint32_t val) -{ - return ((val) << A6XX_SP_VS_CONFIG_NTEX__SHIFT) & A6XX_SP_VS_CONFIG_NTEX__MASK; -} -#define A6XX_SP_VS_CONFIG_NSAMP__MASK 0x01fe0000 -#define A6XX_SP_VS_CONFIG_NSAMP__SHIFT 17 -static inline uint32_t A6XX_SP_VS_CONFIG_NSAMP(uint32_t val) -{ - return ((val) << A6XX_SP_VS_CONFIG_NSAMP__SHIFT) & A6XX_SP_VS_CONFIG_NSAMP__MASK; -} - -#define REG_A6XX_SP_VS_INSTRLEN 0x0000a824 - -#define REG_A6XX_SP_HS_CTRL_REG0 0x0000a830 -#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e -#define A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 -static inline uint32_t A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 -#define A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 -static inline uint32_t A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 -#define A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT 14 -static inline uint32_t A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK; -} -#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A6XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A6XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_HS_CTRL_REG0_THREADSIZE__MASK; -} -#define A6XX_SP_HS_CTRL_REG0_VARYING 0x00400000 -#define A6XX_SP_HS_CTRL_REG0_PIXLODENABLE 0x04000000 -#define A6XX_SP_HS_CTRL_REG0_MERGEDREGS 0x80000000 - -#define REG_A6XX_SP_HS_UNKNOWN_A831 0x0000a831 - -#define REG_A6XX_SP_HS_OBJ_START_LO 0x0000a834 - -#define REG_A6XX_SP_HS_OBJ_START_HI 0x0000a835 - -#define REG_A6XX_SP_HS_TEX_COUNT 0x0000a83a - -#define REG_A6XX_SP_HS_CONFIG 0x0000a83b -#define A6XX_SP_HS_CONFIG_ENABLED 0x00000100 -#define A6XX_SP_HS_CONFIG_NTEX__MASK 0x0001fe00 -#define A6XX_SP_HS_CONFIG_NTEX__SHIFT 9 -static inline uint32_t A6XX_SP_HS_CONFIG_NTEX(uint32_t val) -{ - return ((val) << A6XX_SP_HS_CONFIG_NTEX__SHIFT) & A6XX_SP_HS_CONFIG_NTEX__MASK; -} -#define A6XX_SP_HS_CONFIG_NSAMP__MASK 0x01fe0000 -#define A6XX_SP_HS_CONFIG_NSAMP__SHIFT 17 -static inline uint32_t A6XX_SP_HS_CONFIG_NSAMP(uint32_t val) -{ - return ((val) << A6XX_SP_HS_CONFIG_NSAMP__SHIFT) & A6XX_SP_HS_CONFIG_NSAMP__MASK; -} - -#define REG_A6XX_SP_HS_INSTRLEN 0x0000a83c - -#define REG_A6XX_SP_DS_CTRL_REG0 0x0000a840 -#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e -#define A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 -static inline uint32_t A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 -#define A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 -static inline uint32_t A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 -#define A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT 14 -static inline uint32_t A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK; -} -#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A6XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A6XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_DS_CTRL_REG0_THREADSIZE__MASK; -} -#define A6XX_SP_DS_CTRL_REG0_VARYING 0x00400000 -#define A6XX_SP_DS_CTRL_REG0_PIXLODENABLE 0x04000000 -#define A6XX_SP_DS_CTRL_REG0_MERGEDREGS 0x80000000 - -#define REG_A6XX_SP_DS_OBJ_START_LO 0x0000a85c - -#define REG_A6XX_SP_DS_OBJ_START_HI 0x0000a85d - -#define REG_A6XX_SP_DS_TEX_COUNT 0x0000a862 - -#define REG_A6XX_SP_DS_CONFIG 0x0000a863 -#define A6XX_SP_DS_CONFIG_ENABLED 0x00000100 -#define A6XX_SP_DS_CONFIG_NTEX__MASK 0x0001fe00 -#define A6XX_SP_DS_CONFIG_NTEX__SHIFT 9 -static inline uint32_t A6XX_SP_DS_CONFIG_NTEX(uint32_t val) -{ - return ((val) << A6XX_SP_DS_CONFIG_NTEX__SHIFT) & A6XX_SP_DS_CONFIG_NTEX__MASK; -} -#define A6XX_SP_DS_CONFIG_NSAMP__MASK 0x01fe0000 -#define A6XX_SP_DS_CONFIG_NSAMP__SHIFT 17 -static inline uint32_t A6XX_SP_DS_CONFIG_NSAMP(uint32_t val) -{ - return ((val) << A6XX_SP_DS_CONFIG_NSAMP__SHIFT) & A6XX_SP_DS_CONFIG_NSAMP__MASK; -} - -#define REG_A6XX_SP_DS_INSTRLEN 0x0000a864 - -#define REG_A6XX_SP_GS_CTRL_REG0 0x0000a870 -#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e -#define A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 -static inline uint32_t A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 -#define A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 -static inline uint32_t A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 -#define A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT 14 -static inline uint32_t A6XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK; -} -#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A6XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A6XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_GS_CTRL_REG0_THREADSIZE__MASK; -} -#define A6XX_SP_GS_CTRL_REG0_VARYING 0x00400000 -#define A6XX_SP_GS_CTRL_REG0_PIXLODENABLE 0x04000000 -#define A6XX_SP_GS_CTRL_REG0_MERGEDREGS 0x80000000 - -#define REG_A6XX_SP_GS_UNKNOWN_A871 0x0000a871 - -#define REG_A6XX_SP_GS_OBJ_START_LO 0x0000a88d - -#define REG_A6XX_SP_GS_OBJ_START_HI 0x0000a88e - -#define REG_A6XX_SP_GS_TEX_COUNT 0x0000a893 - -#define REG_A6XX_SP_GS_CONFIG 0x0000a894 -#define A6XX_SP_GS_CONFIG_ENABLED 0x00000100 -#define A6XX_SP_GS_CONFIG_NTEX__MASK 0x0001fe00 -#define A6XX_SP_GS_CONFIG_NTEX__SHIFT 9 -static inline uint32_t A6XX_SP_GS_CONFIG_NTEX(uint32_t val) -{ - return ((val) << A6XX_SP_GS_CONFIG_NTEX__SHIFT) & A6XX_SP_GS_CONFIG_NTEX__MASK; -} -#define A6XX_SP_GS_CONFIG_NSAMP__MASK 0x01fe0000 -#define A6XX_SP_GS_CONFIG_NSAMP__SHIFT 17 -static inline uint32_t A6XX_SP_GS_CONFIG_NSAMP(uint32_t val) -{ - return ((val) << A6XX_SP_GS_CONFIG_NSAMP__SHIFT) & A6XX_SP_GS_CONFIG_NSAMP__MASK; -} - -#define REG_A6XX_SP_GS_INSTRLEN 0x0000a895 - -#define REG_A6XX_SP_VS_TEX_SAMP_LO 0x0000a8a0 - -#define REG_A6XX_SP_VS_TEX_SAMP_HI 0x0000a8a1 - -#define REG_A6XX_SP_HS_TEX_SAMP_LO 0x0000a8a2 - -#define REG_A6XX_SP_HS_TEX_SAMP_HI 0x0000a8a3 - -#define REG_A6XX_SP_DS_TEX_SAMP_LO 0x0000a8a4 - -#define REG_A6XX_SP_DS_TEX_SAMP_HI 0x0000a8a5 - -#define REG_A6XX_SP_GS_TEX_SAMP_LO 0x0000a8a6 - -#define REG_A6XX_SP_GS_TEX_SAMP_HI 0x0000a8a7 - -#define REG_A6XX_SP_VS_TEX_CONST_LO 0x0000a8a8 - -#define REG_A6XX_SP_VS_TEX_CONST_HI 0x0000a8a9 - -#define REG_A6XX_SP_HS_TEX_CONST_LO 0x0000a8aa - -#define REG_A6XX_SP_HS_TEX_CONST_HI 0x0000a8ab - -#define REG_A6XX_SP_DS_TEX_CONST_LO 0x0000a8ac - -#define REG_A6XX_SP_DS_TEX_CONST_HI 0x0000a8ad - -#define REG_A6XX_SP_GS_TEX_CONST_LO 0x0000a8ae - -#define REG_A6XX_SP_GS_TEX_CONST_HI 0x0000a8af - -#define REG_A6XX_SP_FS_CTRL_REG0 0x0000a980 -#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e -#define A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 -static inline uint32_t A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 -#define A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 -static inline uint32_t A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 -#define A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT 14 -static inline uint32_t A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK; -} -#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A6XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A6XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; -} -#define A6XX_SP_FS_CTRL_REG0_VARYING 0x00400000 -#define A6XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x04000000 -#define A6XX_SP_FS_CTRL_REG0_MERGEDREGS 0x80000000 - -#define REG_A6XX_SP_UNKNOWN_A982 0x0000a982 - -#define REG_A6XX_SP_FS_OBJ_START_LO 0x0000a983 - -#define REG_A6XX_SP_FS_OBJ_START_HI 0x0000a984 - -#define REG_A6XX_SP_BLEND_CNTL 0x0000a989 -#define A6XX_SP_BLEND_CNTL_ENABLED 0x00000001 -#define A6XX_SP_BLEND_CNTL_UNK8 0x00000100 - -#define REG_A6XX_SP_SRGB_CNTL 0x0000a98a -#define A6XX_SP_SRGB_CNTL_SRGB_MRT0 0x00000001 -#define A6XX_SP_SRGB_CNTL_SRGB_MRT1 0x00000002 -#define A6XX_SP_SRGB_CNTL_SRGB_MRT2 0x00000004 -#define A6XX_SP_SRGB_CNTL_SRGB_MRT3 0x00000008 -#define A6XX_SP_SRGB_CNTL_SRGB_MRT4 0x00000010 -#define A6XX_SP_SRGB_CNTL_SRGB_MRT5 0x00000020 -#define A6XX_SP_SRGB_CNTL_SRGB_MRT6 0x00000040 -#define A6XX_SP_SRGB_CNTL_SRGB_MRT7 0x00000080 - -#define REG_A6XX_SP_FS_RENDER_COMPONENTS 0x0000a98b -#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK 0x0000000f -#define A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT 0 -static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT0(uint32_t val) -{ - return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT0__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT0__MASK; -} -#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK 0x000000f0 -#define A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT 4 -static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT1(uint32_t val) -{ - return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT1__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT1__MASK; -} -#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK 0x00000f00 -#define A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT 8 -static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT2(uint32_t val) -{ - return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT2__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT2__MASK; -} -#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK 0x0000f000 -#define A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT 12 -static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT3(uint32_t val) -{ - return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT3__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT3__MASK; -} -#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK 0x000f0000 -#define A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT 16 -static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT4(uint32_t val) -{ - return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT4__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT4__MASK; -} -#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK 0x00f00000 -#define A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT 20 -static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT5(uint32_t val) -{ - return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT5__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT5__MASK; -} -#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK 0x0f000000 -#define A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT 24 -static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT6(uint32_t val) -{ - return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT6__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT6__MASK; -} -#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK 0xf0000000 -#define A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT 28 -static inline uint32_t A6XX_SP_FS_RENDER_COMPONENTS_RT7(uint32_t val) -{ - return ((val) << A6XX_SP_FS_RENDER_COMPONENTS_RT7__SHIFT) & A6XX_SP_FS_RENDER_COMPONENTS_RT7__MASK; -} - -#define REG_A6XX_SP_FS_OUTPUT_CNTL0 0x0000a98c -#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK 0x0000ff00 -#define A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT 8 -static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(uint32_t val) -{ - return ((val) << A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID__MASK; -} - -#define REG_A6XX_SP_FS_OUTPUT_CNTL1 0x0000a98d -#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK 0x0000000f -#define A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT 0 -static inline uint32_t A6XX_SP_FS_OUTPUT_CNTL1_MRT(uint32_t val) -{ - return ((val) << A6XX_SP_FS_OUTPUT_CNTL1_MRT__SHIFT) & A6XX_SP_FS_OUTPUT_CNTL1_MRT__MASK; -} - -static inline uint32_t REG_A6XX_SP_FS_MRT(uint32_t i0) { return 0x0000a996 + 0x1*i0; } - -static inline uint32_t REG_A6XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000a996 + 0x1*i0; } -#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK 0x000000ff -#define A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A6XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a6xx_color_fmt val) -{ - return ((val) << A6XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A6XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK; -} -#define A6XX_SP_FS_MRT_REG_COLOR_SINT 0x00000100 -#define A6XX_SP_FS_MRT_REG_COLOR_UINT 0x00000200 - -#define REG_A6XX_SP_UNKNOWN_A99E 0x0000a99e - -#define REG_A6XX_SP_FS_TEX_COUNT 0x0000a9a7 - -#define REG_A6XX_SP_UNKNOWN_A9A8 0x0000a9a8 - -#define REG_A6XX_SP_FS_TEX_SAMP_LO 0x0000a9e0 - -#define REG_A6XX_SP_FS_TEX_SAMP_HI 0x0000a9e1 - -#define REG_A6XX_SP_CS_TEX_SAMP_LO 0x0000a9e2 - -#define REG_A6XX_SP_CS_TEX_SAMP_HI 0x0000a9e3 - -#define REG_A6XX_SP_FS_TEX_CONST_LO 0x0000a9e4 - -#define REG_A6XX_SP_FS_TEX_CONST_HI 0x0000a9e5 - -#define REG_A6XX_SP_CS_TEX_CONST_LO 0x0000a9e6 - -#define REG_A6XX_SP_CS_TEX_CONST_HI 0x0000a9e7 - -static inline uint32_t REG_A6XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000a98e + 0x1*i0; } - -static inline uint32_t REG_A6XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000a98e + 0x1*i0; } -#define A6XX_SP_FS_OUTPUT_REG_REGID__MASK 0x000000ff -#define A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT 0 -static inline uint32_t A6XX_SP_FS_OUTPUT_REG_REGID(uint32_t val) -{ - return ((val) << A6XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A6XX_SP_FS_OUTPUT_REG_REGID__MASK; -} -#define A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION 0x00000100 - -#define REG_A6XX_SP_CS_CTRL_REG0 0x0000a9b0 -#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x0000007e -#define A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 1 -static inline uint32_t A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x00001f80 -#define A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 7 -static inline uint32_t A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK 0x000fc000 -#define A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT 14 -static inline uint32_t A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A6XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK; -} -#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK 0x00100000 -#define A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT 20 -static inline uint32_t A6XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A6XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A6XX_SP_CS_CTRL_REG0_THREADSIZE__MASK; -} -#define A6XX_SP_CS_CTRL_REG0_VARYING 0x00400000 -#define A6XX_SP_CS_CTRL_REG0_PIXLODENABLE 0x04000000 -#define A6XX_SP_CS_CTRL_REG0_MERGEDREGS 0x80000000 - -#define REG_A6XX_SP_CS_OBJ_START_LO 0x0000a9b4 - -#define REG_A6XX_SP_CS_OBJ_START_HI 0x0000a9b5 - -#define REG_A6XX_SP_CS_INSTRLEN 0x0000a9bc - -#define REG_A6XX_SP_UNKNOWN_AB00 0x0000ab00 - -#define REG_A6XX_SP_FS_CONFIG 0x0000ab04 -#define A6XX_SP_FS_CONFIG_ENABLED 0x00000100 -#define A6XX_SP_FS_CONFIG_NTEX__MASK 0x0001fe00 -#define A6XX_SP_FS_CONFIG_NTEX__SHIFT 9 -static inline uint32_t A6XX_SP_FS_CONFIG_NTEX(uint32_t val) -{ - return ((val) << A6XX_SP_FS_CONFIG_NTEX__SHIFT) & A6XX_SP_FS_CONFIG_NTEX__MASK; -} -#define A6XX_SP_FS_CONFIG_NSAMP__MASK 0x01fe0000 -#define A6XX_SP_FS_CONFIG_NSAMP__SHIFT 17 -static inline uint32_t A6XX_SP_FS_CONFIG_NSAMP(uint32_t val) -{ - return ((val) << A6XX_SP_FS_CONFIG_NSAMP__SHIFT) & A6XX_SP_FS_CONFIG_NSAMP__MASK; -} - -#define REG_A6XX_SP_FS_INSTRLEN 0x0000ab05 - -#define REG_A6XX_SP_UNKNOWN_AB20 0x0000ab20 - -#define REG_A6XX_SP_UNKNOWN_ACC0 0x0000acc0 - -#define REG_A6XX_SP_UNKNOWN_AE00 0x0000ae00 - -#define REG_A6XX_SP_UNKNOWN_AE03 0x0000ae03 - -#define REG_A6XX_SP_UNKNOWN_AE04 0x0000ae04 - -#define REG_A6XX_SP_UNKNOWN_AE0F 0x0000ae0f - -#define REG_A6XX_SP_UNKNOWN_B182 0x0000b182 - -#define REG_A6XX_SP_UNKNOWN_B183 0x0000b183 - -#define REG_A6XX_SP_TP_RAS_MSAA_CNTL 0x0000b300 -#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES__MASK; -} - -#define REG_A6XX_SP_TP_DEST_MSAA_CNTL 0x0000b301 -#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES__MASK; -} -#define A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 - -#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO 0x0000b302 - -#define REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_HI 0x0000b303 - -#define REG_A6XX_SP_TP_UNKNOWN_B304 0x0000b304 - -#define REG_A6XX_SP_TP_UNKNOWN_B309 0x0000b309 - -#define REG_A6XX_SP_PS_2D_SRC_INFO 0x0000b4c0 -#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(enum a6xx_color_fmt val) -{ - return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK; -} -#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 -#define A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(enum a6xx_tile_mode val) -{ - return ((val) << A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_TILE_MODE__MASK; -} -#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP__MASK; -} -#define A6XX_SP_PS_2D_SRC_INFO_FLAGS 0x00001000 -#define A6XX_SP_PS_2D_SRC_INFO_FILTER 0x00010000 - -#define REG_A6XX_SP_PS_2D_SRC_SIZE 0x0000b4c1 -#define A6XX_SP_PS_2D_SRC_SIZE_WIDTH__MASK 0x00007fff -#define A6XX_SP_PS_2D_SRC_SIZE_WIDTH__SHIFT 0 -static inline uint32_t A6XX_SP_PS_2D_SRC_SIZE_WIDTH(uint32_t val) -{ - return ((val) << A6XX_SP_PS_2D_SRC_SIZE_WIDTH__SHIFT) & A6XX_SP_PS_2D_SRC_SIZE_WIDTH__MASK; -} -#define A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__MASK 0x3fff8000 -#define A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__SHIFT 15 -static inline uint32_t A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(uint32_t val) -{ - return ((val) << A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__SHIFT) & A6XX_SP_PS_2D_SRC_SIZE_HEIGHT__MASK; -} - -#define REG_A6XX_SP_PS_2D_SRC_LO 0x0000b4c2 - -#define REG_A6XX_SP_PS_2D_SRC_HI 0x0000b4c3 - -#define REG_A6XX_SP_PS_2D_SRC_PITCH 0x0000b4c4 -#define A6XX_SP_PS_2D_SRC_PITCH_PITCH__MASK 0x01fffe00 -#define A6XX_SP_PS_2D_SRC_PITCH_PITCH__SHIFT 9 -static inline uint32_t A6XX_SP_PS_2D_SRC_PITCH_PITCH(uint32_t val) -{ - assert(!(val & 0x3f)); - return ((val >> 6) << A6XX_SP_PS_2D_SRC_PITCH_PITCH__SHIFT) & A6XX_SP_PS_2D_SRC_PITCH_PITCH__MASK; -} - -#define REG_A6XX_SP_PS_2D_SRC_FLAGS_LO 0x0000b4ca - -#define REG_A6XX_SP_PS_2D_SRC_FLAGS_HI 0x0000b4cb - -#define REG_A6XX_SP_UNKNOWN_B600 0x0000b600 - -#define REG_A6XX_SP_UNKNOWN_B605 0x0000b605 - -#define REG_A6XX_HLSQ_VS_CNTL 0x0000b800 -#define A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK 0x000000ff -#define A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT 0 -static inline uint32_t A6XX_HLSQ_VS_CNTL_CONSTLEN(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A6XX_HLSQ_VS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_VS_CNTL_CONSTLEN__MASK; -} - -#define REG_A6XX_HLSQ_HS_CNTL 0x0000b801 -#define A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK 0x000000ff -#define A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT 0 -static inline uint32_t A6XX_HLSQ_HS_CNTL_CONSTLEN(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A6XX_HLSQ_HS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_HS_CNTL_CONSTLEN__MASK; -} - -#define REG_A6XX_HLSQ_DS_CNTL 0x0000b802 -#define A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK 0x000000ff -#define A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT 0 -static inline uint32_t A6XX_HLSQ_DS_CNTL_CONSTLEN(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A6XX_HLSQ_DS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_DS_CNTL_CONSTLEN__MASK; -} - -#define REG_A6XX_HLSQ_GS_CNTL 0x0000b803 -#define A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK 0x000000ff -#define A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT 0 -static inline uint32_t A6XX_HLSQ_GS_CNTL_CONSTLEN(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A6XX_HLSQ_GS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_GS_CNTL_CONSTLEN__MASK; -} - -#define REG_A6XX_HLSQ_UNKNOWN_B980 0x0000b980 - -#define REG_A6XX_HLSQ_CONTROL_1_REG 0x0000b982 - -#define REG_A6XX_HLSQ_CONTROL_2_REG 0x0000b983 -#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK 0x000000ff -#define A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; -} -#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK 0x0000ff00 -#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT 8 -static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK; -} -#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK 0x00ff0000 -#define A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT 16 -static inline uint32_t A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK; -} - -#define REG_A6XX_HLSQ_CONTROL_3_REG 0x0000b984 -#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK 0x000000ff -#define A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A6XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK; -} - -#define REG_A6XX_HLSQ_CONTROL_4_REG 0x0000b985 -#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK 0x00ff0000 -#define A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT 16 -static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK; -} -#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK 0xff000000 -#define A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT 24 -static inline uint32_t A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK; -} - -#define REG_A6XX_HLSQ_CONTROL_5_REG 0x0000b986 - -#define REG_A6XX_HLSQ_CS_NDRANGE_0 0x0000b990 -#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK 0x00000003 -#define A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK; -} -#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc -#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT 2 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK; -} -#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 -#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT 12 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK; -} -#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 -#define A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT 22 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK; -} - -#define REG_A6XX_HLSQ_CS_NDRANGE_1 0x0000b991 -#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK 0xffffffff -#define A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK; -} - -#define REG_A6XX_HLSQ_CS_NDRANGE_2 0x0000b992 -#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK 0xffffffff -#define A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A6XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK; -} - -#define REG_A6XX_HLSQ_CS_NDRANGE_3 0x0000b993 -#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK 0xffffffff -#define A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK; -} - -#define REG_A6XX_HLSQ_CS_NDRANGE_4 0x0000b994 -#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK 0xffffffff -#define A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A6XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK; -} - -#define REG_A6XX_HLSQ_CS_NDRANGE_5 0x0000b995 -#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK 0xffffffff -#define A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK; -} - -#define REG_A6XX_HLSQ_CS_NDRANGE_6 0x0000b996 -#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK 0xffffffff -#define A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A6XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK; -} - -#define REG_A6XX_HLSQ_CS_CNTL_0 0x0000b997 -#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK 0x000000ff -#define A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT 0 -static inline uint32_t A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK; -} -#define A6XX_HLSQ_CS_CNTL_0_UNK0__MASK 0x0000ff00 -#define A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT 8 -static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK0__MASK; -} -#define A6XX_HLSQ_CS_CNTL_0_UNK1__MASK 0x00ff0000 -#define A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT 16 -static inline uint32_t A6XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A6XX_HLSQ_CS_CNTL_0_UNK1__MASK; -} -#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK 0xff000000 -#define A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT 24 -static inline uint32_t A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val) -{ - return ((val) << A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK; -} - -#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_X 0x0000b999 - -#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Y 0x0000b99a - -#define REG_A6XX_HLSQ_CS_KERNEL_GROUP_Z 0x0000b99b - -#define REG_A6XX_HLSQ_UPDATE_CNTL 0x0000bb08 - -#define REG_A6XX_HLSQ_FS_CNTL 0x0000bb10 -#define A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK 0x000000ff -#define A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT 0 -static inline uint32_t A6XX_HLSQ_FS_CNTL_CONSTLEN(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << A6XX_HLSQ_FS_CNTL_CONSTLEN__SHIFT) & A6XX_HLSQ_FS_CNTL_CONSTLEN__MASK; -} - -#define REG_A6XX_HLSQ_UNKNOWN_BB11 0x0000bb11 - -#define REG_A6XX_HLSQ_UNKNOWN_BE00 0x0000be00 - -#define REG_A6XX_HLSQ_UNKNOWN_BE01 0x0000be01 - -#define REG_A6XX_HLSQ_UNKNOWN_BE04 0x0000be04 - -#define REG_A6XX_TEX_SAMP_0 0x00000000 -#define A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 -#define A6XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 -#define A6XX_TEX_SAMP_0_XY_MAG__SHIFT 1 -static inline uint32_t A6XX_TEX_SAMP_0_XY_MAG(enum a6xx_tex_filter val) -{ - return ((val) << A6XX_TEX_SAMP_0_XY_MAG__SHIFT) & A6XX_TEX_SAMP_0_XY_MAG__MASK; -} -#define A6XX_TEX_SAMP_0_XY_MIN__MASK 0x00000018 -#define A6XX_TEX_SAMP_0_XY_MIN__SHIFT 3 -static inline uint32_t A6XX_TEX_SAMP_0_XY_MIN(enum a6xx_tex_filter val) -{ - return ((val) << A6XX_TEX_SAMP_0_XY_MIN__SHIFT) & A6XX_TEX_SAMP_0_XY_MIN__MASK; -} -#define A6XX_TEX_SAMP_0_WRAP_S__MASK 0x000000e0 -#define A6XX_TEX_SAMP_0_WRAP_S__SHIFT 5 -static inline uint32_t A6XX_TEX_SAMP_0_WRAP_S(enum a6xx_tex_clamp val) -{ - return ((val) << A6XX_TEX_SAMP_0_WRAP_S__SHIFT) & A6XX_TEX_SAMP_0_WRAP_S__MASK; -} -#define A6XX_TEX_SAMP_0_WRAP_T__MASK 0x00000700 -#define A6XX_TEX_SAMP_0_WRAP_T__SHIFT 8 -static inline uint32_t A6XX_TEX_SAMP_0_WRAP_T(enum a6xx_tex_clamp val) -{ - return ((val) << A6XX_TEX_SAMP_0_WRAP_T__SHIFT) & A6XX_TEX_SAMP_0_WRAP_T__MASK; -} -#define A6XX_TEX_SAMP_0_WRAP_R__MASK 0x00003800 -#define A6XX_TEX_SAMP_0_WRAP_R__SHIFT 11 -static inline uint32_t A6XX_TEX_SAMP_0_WRAP_R(enum a6xx_tex_clamp val) -{ - return ((val) << A6XX_TEX_SAMP_0_WRAP_R__SHIFT) & A6XX_TEX_SAMP_0_WRAP_R__MASK; -} -#define A6XX_TEX_SAMP_0_ANISO__MASK 0x0001c000 -#define A6XX_TEX_SAMP_0_ANISO__SHIFT 14 -static inline uint32_t A6XX_TEX_SAMP_0_ANISO(enum a6xx_tex_aniso val) -{ - return ((val) << A6XX_TEX_SAMP_0_ANISO__SHIFT) & A6XX_TEX_SAMP_0_ANISO__MASK; -} -#define A6XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 -#define A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 -static inline uint32_t A6XX_TEX_SAMP_0_LOD_BIAS(float val) -{ - return ((((int32_t)(val * 256.0))) << A6XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A6XX_TEX_SAMP_0_LOD_BIAS__MASK; -} - -#define REG_A6XX_TEX_SAMP_1 0x00000001 -#define A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e -#define A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT 1 -static inline uint32_t A6XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val) -{ - return ((val) << A6XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A6XX_TEX_SAMP_1_COMPARE_FUNC__MASK; -} -#define A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 -#define A6XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 -#define A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 -#define A6XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 -#define A6XX_TEX_SAMP_1_MAX_LOD__SHIFT 8 -static inline uint32_t A6XX_TEX_SAMP_1_MAX_LOD(float val) -{ - return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A6XX_TEX_SAMP_1_MAX_LOD__MASK; -} -#define A6XX_TEX_SAMP_1_MIN_LOD__MASK 0xfff00000 -#define A6XX_TEX_SAMP_1_MIN_LOD__SHIFT 20 -static inline uint32_t A6XX_TEX_SAMP_1_MIN_LOD(float val) -{ - return ((((uint32_t)(val * 256.0))) << A6XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A6XX_TEX_SAMP_1_MIN_LOD__MASK; -} - -#define REG_A6XX_TEX_SAMP_2 0x00000002 -#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK 0xfffffff0 -#define A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT 4 -static inline uint32_t A6XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val) -{ - return ((val) << A6XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A6XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK; -} - -#define REG_A6XX_TEX_SAMP_3 0x00000003 - -#define REG_A6XX_TEX_CONST_0 0x00000000 -#define A6XX_TEX_CONST_0_TILE_MODE__MASK 0x00000003 -#define A6XX_TEX_CONST_0_TILE_MODE__SHIFT 0 -static inline uint32_t A6XX_TEX_CONST_0_TILE_MODE(enum a6xx_tile_mode val) -{ - return ((val) << A6XX_TEX_CONST_0_TILE_MODE__SHIFT) & A6XX_TEX_CONST_0_TILE_MODE__MASK; -} -#define A6XX_TEX_CONST_0_SRGB 0x00000004 -#define A6XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 -#define A6XX_TEX_CONST_0_SWIZ_X__SHIFT 4 -static inline uint32_t A6XX_TEX_CONST_0_SWIZ_X(enum a6xx_tex_swiz val) -{ - return ((val) << A6XX_TEX_CONST_0_SWIZ_X__SHIFT) & A6XX_TEX_CONST_0_SWIZ_X__MASK; -} -#define A6XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 -#define A6XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 -static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Y(enum a6xx_tex_swiz val) -{ - return ((val) << A6XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Y__MASK; -} -#define A6XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 -#define A6XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 -static inline uint32_t A6XX_TEX_CONST_0_SWIZ_Z(enum a6xx_tex_swiz val) -{ - return ((val) << A6XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A6XX_TEX_CONST_0_SWIZ_Z__MASK; -} -#define A6XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 -#define A6XX_TEX_CONST_0_SWIZ_W__SHIFT 13 -static inline uint32_t A6XX_TEX_CONST_0_SWIZ_W(enum a6xx_tex_swiz val) -{ - return ((val) << A6XX_TEX_CONST_0_SWIZ_W__SHIFT) & A6XX_TEX_CONST_0_SWIZ_W__MASK; -} -#define A6XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 -#define A6XX_TEX_CONST_0_MIPLVLS__SHIFT 16 -static inline uint32_t A6XX_TEX_CONST_0_MIPLVLS(uint32_t val) -{ - return ((val) << A6XX_TEX_CONST_0_MIPLVLS__SHIFT) & A6XX_TEX_CONST_0_MIPLVLS__MASK; -} -#define A6XX_TEX_CONST_0_FMT__MASK 0x3fc00000 -#define A6XX_TEX_CONST_0_FMT__SHIFT 22 -static inline uint32_t A6XX_TEX_CONST_0_FMT(enum a6xx_tex_fmt val) -{ - return ((val) << A6XX_TEX_CONST_0_FMT__SHIFT) & A6XX_TEX_CONST_0_FMT__MASK; -} -#define A6XX_TEX_CONST_0_SWAP__MASK 0xc0000000 -#define A6XX_TEX_CONST_0_SWAP__SHIFT 30 -static inline uint32_t A6XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A6XX_TEX_CONST_0_SWAP__SHIFT) & A6XX_TEX_CONST_0_SWAP__MASK; -} - -#define REG_A6XX_TEX_CONST_1 0x00000001 -#define A6XX_TEX_CONST_1_WIDTH__MASK 0x00007fff -#define A6XX_TEX_CONST_1_WIDTH__SHIFT 0 -static inline uint32_t A6XX_TEX_CONST_1_WIDTH(uint32_t val) -{ - return ((val) << A6XX_TEX_CONST_1_WIDTH__SHIFT) & A6XX_TEX_CONST_1_WIDTH__MASK; -} -#define A6XX_TEX_CONST_1_HEIGHT__MASK 0x3fff8000 -#define A6XX_TEX_CONST_1_HEIGHT__SHIFT 15 -static inline uint32_t A6XX_TEX_CONST_1_HEIGHT(uint32_t val) -{ - return ((val) << A6XX_TEX_CONST_1_HEIGHT__SHIFT) & A6XX_TEX_CONST_1_HEIGHT__MASK; -} - -#define REG_A6XX_TEX_CONST_2 0x00000002 -#define A6XX_TEX_CONST_2_FETCHSIZE__MASK 0x0000000f -#define A6XX_TEX_CONST_2_FETCHSIZE__SHIFT 0 -static inline uint32_t A6XX_TEX_CONST_2_FETCHSIZE(enum a6xx_tex_fetchsize val) -{ - return ((val) << A6XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A6XX_TEX_CONST_2_FETCHSIZE__MASK; -} -#define A6XX_TEX_CONST_2_PITCH__MASK 0x1fffff80 -#define A6XX_TEX_CONST_2_PITCH__SHIFT 7 -static inline uint32_t A6XX_TEX_CONST_2_PITCH(uint32_t val) -{ - return ((val) << A6XX_TEX_CONST_2_PITCH__SHIFT) & A6XX_TEX_CONST_2_PITCH__MASK; -} -#define A6XX_TEX_CONST_2_TYPE__MASK 0x60000000 -#define A6XX_TEX_CONST_2_TYPE__SHIFT 29 -static inline uint32_t A6XX_TEX_CONST_2_TYPE(enum a6xx_tex_type val) -{ - return ((val) << A6XX_TEX_CONST_2_TYPE__SHIFT) & A6XX_TEX_CONST_2_TYPE__MASK; -} - -#define REG_A6XX_TEX_CONST_3 0x00000003 -#define A6XX_TEX_CONST_3_ARRAY_PITCH__MASK 0x00003fff -#define A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A6XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val) -{ - assert(!(val & 0xfff)); - return ((val >> 12) << A6XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A6XX_TEX_CONST_3_ARRAY_PITCH__MASK; -} -#define A6XX_TEX_CONST_3_FLAG 0x10000000 - -#define REG_A6XX_TEX_CONST_4 0x00000004 -#define A6XX_TEX_CONST_4_BASE_LO__MASK 0xffffffe0 -#define A6XX_TEX_CONST_4_BASE_LO__SHIFT 5 -static inline uint32_t A6XX_TEX_CONST_4_BASE_LO(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_TEX_CONST_4_BASE_LO__SHIFT) & A6XX_TEX_CONST_4_BASE_LO__MASK; -} - -#define REG_A6XX_TEX_CONST_5 0x00000005 -#define A6XX_TEX_CONST_5_BASE_HI__MASK 0x0001ffff -#define A6XX_TEX_CONST_5_BASE_HI__SHIFT 0 -static inline uint32_t A6XX_TEX_CONST_5_BASE_HI(uint32_t val) -{ - return ((val) << A6XX_TEX_CONST_5_BASE_HI__SHIFT) & A6XX_TEX_CONST_5_BASE_HI__MASK; -} -#define A6XX_TEX_CONST_5_DEPTH__MASK 0x3ffe0000 -#define A6XX_TEX_CONST_5_DEPTH__SHIFT 17 -static inline uint32_t A6XX_TEX_CONST_5_DEPTH(uint32_t val) -{ - return ((val) << A6XX_TEX_CONST_5_DEPTH__SHIFT) & A6XX_TEX_CONST_5_DEPTH__MASK; -} - -#define REG_A6XX_TEX_CONST_6 0x00000006 - -#define REG_A6XX_TEX_CONST_7 0x00000007 -#define A6XX_TEX_CONST_7_FLAG_LO__MASK 0xffffffe0 -#define A6XX_TEX_CONST_7_FLAG_LO__SHIFT 5 -static inline uint32_t A6XX_TEX_CONST_7_FLAG_LO(uint32_t val) -{ - assert(!(val & 0x1f)); - return ((val >> 5) << A6XX_TEX_CONST_7_FLAG_LO__SHIFT) & A6XX_TEX_CONST_7_FLAG_LO__MASK; -} - -#define REG_A6XX_TEX_CONST_8 0x00000008 -#define A6XX_TEX_CONST_8_FLAG_HI__MASK 0x0001ffff -#define A6XX_TEX_CONST_8_FLAG_HI__SHIFT 0 -static inline uint32_t A6XX_TEX_CONST_8_FLAG_HI(uint32_t val) -{ - return ((val) << A6XX_TEX_CONST_8_FLAG_HI__SHIFT) & A6XX_TEX_CONST_8_FLAG_HI__MASK; -} - -#define REG_A6XX_TEX_CONST_9 0x00000009 - -#define REG_A6XX_TEX_CONST_10 0x0000000a - -#define REG_A6XX_TEX_CONST_11 0x0000000b - -#define REG_A6XX_TEX_CONST_12 0x0000000c - -#define REG_A6XX_TEX_CONST_13 0x0000000d - -#define REG_A6XX_TEX_CONST_14 0x0000000e - -#define REG_A6XX_TEX_CONST_15 0x0000000f - -#define REG_A6XX_PDC_GPU_ENABLE_PDC 0x00001140 - -#define REG_A6XX_PDC_GPU_SEQ_START_ADDR 0x00001148 - -#define REG_A6XX_PDC_GPU_TCS0_CONTROL 0x00001540 - -#define REG_A6XX_PDC_GPU_TCS0_CMD_ENABLE_BANK 0x00001541 - -#define REG_A6XX_PDC_GPU_TCS0_CMD_WAIT_FOR_CMPL_BANK 0x00001542 - -#define REG_A6XX_PDC_GPU_TCS0_CMD0_MSGID 0x00001543 - -#define REG_A6XX_PDC_GPU_TCS0_CMD0_ADDR 0x00001544 - -#define REG_A6XX_PDC_GPU_TCS0_CMD0_DATA 0x00001545 - -#define REG_A6XX_PDC_GPU_TCS1_CONTROL 0x00001572 - -#define REG_A6XX_PDC_GPU_TCS1_CMD_ENABLE_BANK 0x00001573 - -#define REG_A6XX_PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK 0x00001574 - -#define REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID 0x00001575 - -#define REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR 0x00001576 - -#define REG_A6XX_PDC_GPU_TCS1_CMD0_DATA 0x00001577 - -#define REG_A6XX_PDC_GPU_TCS2_CONTROL 0x000015a4 - -#define REG_A6XX_PDC_GPU_TCS2_CMD_ENABLE_BANK 0x000015a5 - -#define REG_A6XX_PDC_GPU_TCS2_CMD_WAIT_FOR_CMPL_BANK 0x000015a6 - -#define REG_A6XX_PDC_GPU_TCS2_CMD0_MSGID 0x000015a7 - -#define REG_A6XX_PDC_GPU_TCS2_CMD0_ADDR 0x000015a8 - -#define REG_A6XX_PDC_GPU_TCS2_CMD0_DATA 0x000015a9 - -#define REG_A6XX_PDC_GPU_TCS3_CONTROL 0x000015d6 - -#define REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK 0x000015d7 - -#define REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK 0x000015d8 - -#define REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID 0x000015d9 - -#define REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR 0x000015da - -#define REG_A6XX_PDC_GPU_TCS3_CMD0_DATA 0x000015db - -#define REG_A6XX_PDC_GPU_SEQ_MEM_0 0x00000000 - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A 0x00000000 -#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__MASK 0x000000ff -#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT 0 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK 0x0000ff00 -#define A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT 8 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK; -} - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B 0x00000001 - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C 0x00000002 - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D 0x00000003 - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT 0x00000004 -#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK 0x0000003f -#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT 0 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_TRACEEN__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK 0x00007000 -#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT 12 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_GRANU__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK 0xf0000000 -#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT 28 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLT_SEGT__MASK; -} - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM 0x00000005 -#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK 0x0f000000 -#define A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT 24 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_CNTLM_ENABLE__MASK; -} - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0 0x00000008 - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1 0x00000009 - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2 0x0000000a - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3 0x0000000b - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0 0x0000000c - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1 0x0000000d - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2 0x0000000e - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3 0x0000000f - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0 0x00000010 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK 0x0000000f -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT 0 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL0__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK 0x000000f0 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT 4 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL1__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK 0x00000f00 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT 8 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL2__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK 0x0000f000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT 12 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL3__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK 0x000f0000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT 16 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL4__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK 0x00f00000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT 20 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL5__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK 0x0f000000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT 24 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL6__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK 0xf0000000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT 28 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0_BYTEL7__MASK; -} - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1 0x00000011 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK 0x0000000f -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT 0 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL8__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK 0x000000f0 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT 4 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL9__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK 0x00000f00 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT 8 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL10__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK 0x0000f000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT 12 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL11__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK 0x000f0000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT 16 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL12__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK 0x00f00000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT 20 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL13__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK 0x0f000000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT 24 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL14__MASK; -} -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK 0xf0000000 -#define A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT 28 -static inline uint32_t A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15(uint32_t val) -{ - return ((val) << A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__SHIFT) & A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1_BYTEL15__MASK; -} - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 0x0000002f - -#define REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 0x00000030 - -#define REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0 0x00000001 - -#define REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1 0x00000002 - - -#endif /* A6XX_XML */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_blend.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_blend.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_blend.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_blend.c 2019-03-31 23:16:37.000000000 +0000 @@ -138,8 +138,10 @@ } so->rb_blend_cntl = A6XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) | + COND(cso->alpha_to_coverage, A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE) | COND(cso->independent_blend_enable, A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND); so->sp_blend_cntl = A6XX_SP_BLEND_CNTL_UNK8 | + COND(cso->alpha_to_coverage, A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE) | COND(mrt_blend, A6XX_SP_BLEND_CNTL_ENABLED); return so; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c 2019-03-31 23:16:37.000000000 +0000 @@ -47,12 +47,24 @@ r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl) : r->array_size; - return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) && (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) && (b->z >= 0) && (b->z + b->depth <= last_layer); } +static bool +ok_format(enum pipe_format pfmt) +{ + enum a6xx_color_fmt fmt = fd6_pipe2color(pfmt); + if (fmt == ~0) + return false; + + if (fd6_ifmt(fmt) == 0) + return false; + + return true; +} + #define DEBUG_BLIT_FALLBACK 0 #define fail_if(cond) \ do { \ @@ -82,19 +94,14 @@ fail_if(util_format_is_compressed(info->src.format) != util_format_is_compressed(info->src.format)); + /* Fail if unsupported format: */ + fail_if(!ok_format(info->src.format)); + fail_if(!ok_format(info->dst.format)); + /* ... but only if they're the same compression format. */ fail_if(util_format_is_compressed(info->src.format) && info->src.format != info->dst.format); - /* hw ignores {SRC,DST}_INFO.COLOR_SWAP if {SRC,DST}_INFO.TILE_MODE - * is set (not linear). We can kind of get around that when tiling/ - * untiling by setting both src and dst COLOR_SWAP=WZYX, but that - * means the formats must match: - */ - fail_if((fd_resource(info->dst.resource)->tile_mode || - fd_resource(info->src.resource)->tile_mode) && - info->dst.format != info->src.format); - /* src box can be inverted, which we don't support.. dst box cannot: */ fail_if((info->src.box.width < 0) || (info->src.box.height < 0)); @@ -106,7 +113,9 @@ debug_assert(info->dst.box.height >= 0); debug_assert(info->dst.box.depth >= 0); - fail_if(info->dst.resource->nr_samples + info->src.resource->nr_samples > 2); + /* non-multisampled could either have nr_samples == 0 or == 1 */ + fail_if(info->dst.resource->nr_samples > 1); + fail_if(info->src.resource->nr_samples > 1); fail_if(info->window_rectangle_include); @@ -139,6 +148,15 @@ OUT_RING(ring, 0x10000000); } +static uint32_t +blit_control(enum a6xx_color_fmt fmt) +{ + unsigned blit_cntl = 0xf00000; + blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt); + blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(fd6_ifmt(fmt)); + return blit_cntl; +} + /* buffers need to be handled specially since x/width can exceed the bounds * supported by hw.. if necessary decompose into (potentially) two 2D blits */ @@ -198,7 +216,7 @@ OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); - uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(RB6_R8_UNORM) | 0x20f00000; + uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000; OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1); OUT_RING(ring, blit_cntl); @@ -271,13 +289,13 @@ OUT_RING(ring, 0x3f); OUT_WFI5(ring); - OUT_PKT4(ring, 0x8c01, 1); + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1); OUT_RING(ring, 0); - OUT_PKT4(ring, 0xacc0, 1); + OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1); OUT_RING(ring, 0xf180); - OUT_PKT4(ring, 0x8e04, 1); + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0x01000000); OUT_PKT7(ring, CP_BLIT, 1); @@ -285,7 +303,7 @@ OUT_WFI5(ring); - OUT_PKT4(ring, 0x8e04, 1); + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0); } } @@ -333,8 +351,8 @@ dtile = fd_resource_level_linear(info->dst.resource, info->dst.level) ? TILE6_LINEAR : dst->tile_mode; - sswap = fd6_pipe2swap(info->src.format); - dswap = fd6_pipe2swap(info->dst.format); + sswap = stile ? WZYX : fd6_pipe2swap(info->src.format); + dswap = dtile ? WZYX : fd6_pipe2swap(info->dst.format); if (util_format_is_compressed(info->src.format)) { debug_assert(info->src.format == info->dst.format); @@ -361,20 +379,10 @@ uint32_t width = DIV_ROUND_UP(u_minify(src->base.width0, info->src.level), blockwidth) * nelements; uint32_t height = DIV_ROUND_UP(u_minify(src->base.height0, info->src.level), blockheight); - /* if dtile, then dswap ignored by hw, and likewise if stile then sswap - * ignored by hw.. but in this case we have already rejected the blit - * if src and dst formats differ, so juse use WZYX for both src and - * dst swap mode (so we don't change component order) - */ - if (stile || dtile) { - debug_assert(info->src.format == info->dst.format); - sswap = dswap = WZYX; - } - OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); - uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(dfmt) | 0xf00000; + uint32_t blit_cntl = blit_control(dfmt); if (dtile != stile) blit_cntl |= 0x20000000; @@ -430,7 +438,7 @@ OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) | A6XX_RB_2D_DST_INFO_TILE_MODE(dtile) | A6XX_RB_2D_DST_INFO_COLOR_SWAP(dswap)); - OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */ + OUT_RELOCW(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */ OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(dpitch)); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); @@ -455,13 +463,25 @@ OUT_RING(ring, 0x3f); OUT_WFI5(ring); - OUT_PKT4(ring, 0x8c01, 1); + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8C01, 1); OUT_RING(ring, 0); - OUT_PKT4(ring, 0xacc0, 1); - OUT_RING(ring, 0xf180); + OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1); + OUT_RING(ring, A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(sfmt) | + COND(util_format_is_pure_sint(info->src.format), + A6XX_SP_2D_SRC_FORMAT_SINT) | + COND(util_format_is_pure_uint(info->src.format), + A6XX_SP_2D_SRC_FORMAT_UINT) | + COND(util_format_is_snorm(info->src.format), + A6XX_SP_2D_SRC_FORMAT_SINT | + A6XX_SP_2D_SRC_FORMAT_NORM) | + COND(util_format_is_unorm(info->src.format), +// TODO sometimes blob uses UINT+NORM but dEQP seems unhappy about that +// A6XX_SP_2D_SRC_FORMAT_UINT | + A6XX_SP_2D_SRC_FORMAT_NORM) | + 0xf000); - OUT_PKT4(ring, 0x8e04, 1); + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0x01000000); OUT_PKT7(ring, CP_BLIT, 1); @@ -469,25 +489,17 @@ OUT_WFI5(ring); - OUT_PKT4(ring, 0x8e04, 1); + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); OUT_RING(ring, 0); } } static void -fd6_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) +emit_blit(struct fd_context *ctx, const struct pipe_blit_info *info) { - struct fd_context *ctx = fd_context(pctx); struct fd_batch *batch; - if (!can_do_blit(info)) { - fd_blitter_pipe_begin(ctx, info->render_condition_enable, false, FD_STAGE_BLIT); - fd_blitter_blit(ctx, info); - fd_blitter_pipe_end(ctx); - return; - } - - fd_fence_ref(pctx->screen, &ctx->last_fence, NULL); + fd_fence_ref(ctx->base.screen, &ctx->last_fence, NULL); batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, true); @@ -526,40 +538,16 @@ fd_batch_reference(&batch, NULL); } -static void -fd6_resource_copy_region(struct pipe_context *pctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box) +static bool +fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) { - struct pipe_blit_info info; - - debug_assert(src->format == dst->format); + if (!can_do_blit(info)) { + return false; + } - memset(&info, 0, sizeof info); - info.dst.resource = dst; - info.dst.level = dst_level; - info.dst.box.x = dstx; - info.dst.box.y = dsty; - info.dst.box.z = dstz; - info.dst.box.width = src_box->width; - info.dst.box.height = src_box->height; - assert(info.dst.box.width >= 0); - assert(info.dst.box.height >= 0); - info.dst.box.depth = 1; - info.dst.format = dst->format; - info.src.resource = src; - info.src.level = src_level; - info.src.box = *src_box; - info.src.format = src->format; - info.mask = util_format_get_mask(src->format); - info.filter = PIPE_TEX_FILTER_NEAREST; - info.scissor_enable = 0; + emit_blit(ctx, info); - fd6_blit(pctx, &info); + return true; } void @@ -568,8 +556,7 @@ if (fd_mesa_debug & FD_DBG_NOBLIT) return; - pctx->resource_copy_region = fd6_resource_copy_region; - pctx->blit = fd6_blit; + fd_context(pctx)->blit = fd6_blit; } unsigned @@ -578,5 +565,8 @@ /* basically just has to be a format we can blit, so uploads/downloads * via linear staging buffer works: */ - return TILE6_3; + if (ok_format(tmpl->format)) + return TILE6_3; + + return TILE6_LINEAR; } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_context.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_context.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -48,8 +48,6 @@ fd_context_destroy(pctx); - fd_bo_del(fd6_ctx->vs_pvt_mem); - fd_bo_del(fd6_ctx->fs_pvt_mem); fd_bo_del(fd6_ctx->vsc_data); fd_bo_del(fd6_ctx->vsc_data2); fd_bo_del(fd6_ctx->blit_mem); @@ -104,27 +102,23 @@ if (!pctx) return NULL; + util_blitter_set_texture_multisample(fd6_ctx->base.blitter, true); + /* fd_context_init overwrites delete_rasterizer_state, so set this * here. */ pctx->delete_rasterizer_state = fd6_rasterizer_state_delete; pctx->delete_depth_stencil_alpha_state = fd6_depth_stencil_alpha_state_delete; - fd6_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, - DRM_FREEDRENO_GEM_TYPE_KMEM); - - fd6_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, - DRM_FREEDRENO_GEM_TYPE_KMEM); - fd6_ctx->vsc_data = fd_bo_new(screen->dev, (A6XX_VSC_DATA_PITCH * 32) + 0x100, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_data"); fd6_ctx->vsc_data2 = fd_bo_new(screen->dev, A6XX_VSC_DATA2_PITCH * 32, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "vsc_data2"); fd6_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000, - DRM_FREEDRENO_GEM_TYPE_KMEM); + DRM_FREEDRENO_GEM_TYPE_KMEM, "blit"); fd_context_setup_common_vbos(&fd6_ctx->base); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_context.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_context.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -32,15 +32,13 @@ #include "freedreno_context.h" -#include "ir3_shader.h" +#include "ir3/ir3_shader.h" #include "a6xx.xml.h" struct fd6_context { struct fd_context base; - struct fd_bo *vs_pvt_mem, *fs_pvt_mem; - /* Two buffers related to hw binning / visibility stream (VSC). * Compared to previous generations * (1) we cannot specify individual buffers per VSC, instead diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_draw.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_draw.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_draw.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -254,18 +254,6 @@ return true; } -static bool is_z32(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - case PIPE_FORMAT_Z32_UNORM: - case PIPE_FORMAT_Z32_FLOAT: - return true; - default: - return false; - } -} - static void fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) { @@ -317,7 +305,7 @@ OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_ACC0, 1); + OUT_PKT4(ring, REG_A6XX_SP_2D_SRC_FORMAT, 1); OUT_RING(ring, 0x0000f410); OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); @@ -383,135 +371,48 @@ fd6_cache_flush(batch, ring); } +static bool is_z32(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_Z32_UNORM: + case PIPE_FORMAT_Z32_FLOAT: + return true; + default: + return false; + } +} + static bool fd6_clear(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; - struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); - struct fd_ringbuffer *ring = ctx->batch->draw; - - if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) && - is_z32(pfb->zsbuf->format)) + const bool has_depth = pfb->zsbuf; + unsigned color_buffers = buffers >> 2; + unsigned i; + + /* If we're clearing after draws, fallback to 3D pipe clears. We could + * use blitter clears in the draw batch but then we'd have to patch up the + * gmem offsets. This doesn't seem like a useful thing to optimize for + * however.*/ + if (ctx->batch->num_draws > 0) return false; - OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); - OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(scissor->minx) | - A6XX_RB_BLIT_SCISSOR_TL_Y(scissor->miny)); - OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(scissor->maxx - 1) | - A6XX_RB_BLIT_SCISSOR_BR_Y(scissor->maxy - 1)); - - if (buffers & PIPE_CLEAR_COLOR) { - for (int i = 0; i < pfb->nr_cbufs; i++) { - union util_color uc = {0}; - - if (!pfb->cbufs[i]) - continue; - - if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) - continue; - - enum pipe_format pfmt = pfb->cbufs[i]->format; - - // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP?? - union pipe_color_union swapped; - switch (fd6_pipe2swap(pfmt)) { - case WZYX: - swapped.ui[0] = color->ui[0]; - swapped.ui[1] = color->ui[1]; - swapped.ui[2] = color->ui[2]; - swapped.ui[3] = color->ui[3]; - break; - case WXYZ: - swapped.ui[2] = color->ui[0]; - swapped.ui[1] = color->ui[1]; - swapped.ui[0] = color->ui[2]; - swapped.ui[3] = color->ui[3]; - break; - case ZYXW: - swapped.ui[3] = color->ui[0]; - swapped.ui[0] = color->ui[1]; - swapped.ui[1] = color->ui[2]; - swapped.ui[2] = color->ui[3]; - break; - case XYZW: - swapped.ui[3] = color->ui[0]; - swapped.ui[2] = color->ui[1]; - swapped.ui[1] = color->ui[2]; - swapped.ui[0] = color->ui[3]; - break; - } - - if (util_format_is_pure_uint(pfmt)) { - util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1); - } else if (util_format_is_pure_sint(pfmt)) { - util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1); - } else { - util_pack_color(swapped.f, pfmt, &uc); - } - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | - A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | - A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - OUT_RINGP(ring, i, &ctx->batch->gmem_patches); - - OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); - OUT_RING(ring, 0); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); - OUT_RING(ring, uc.ui[0]); - OUT_RING(ring, uc.ui[1]); - OUT_RING(ring, uc.ui[2]); - OUT_RING(ring, uc.ui[3]); - - fd6_emit_blit(ctx->batch, ring); - } - } - - if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { - enum pipe_format pfmt = pfb->zsbuf->format; - uint32_t clear = util_pack_z_stencil(pfmt, depth, stencil); - uint32_t mask = 0; - - if (buffers & PIPE_CLEAR_DEPTH) - mask |= 0x1; - - if (buffers & PIPE_CLEAR_STENCIL) - mask |= 0x2; - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | - A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); - OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | - // XXX UNK0 for separate stencil ?? - A6XX_RB_BLIT_INFO_DEPTH | - A6XX_RB_BLIT_INFO_CLEAR_MASK(mask)); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - OUT_RINGP(ring, MAX_RENDER_TARGETS, &ctx->batch->gmem_patches); - - OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); - OUT_RING(ring, 0); - - OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); - OUT_RING(ring, clear); - - fd6_emit_blit(ctx->batch, ring); - - if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) { - struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); - if (zsbuf->lrz) { - zsbuf->lrz_valid = true; - fd6_clear_lrz(ctx->batch, zsbuf, depth); - } + foreach_bit(i, color_buffers) + ctx->batch->clear_color[i] = *color; + if (buffers & PIPE_CLEAR_DEPTH) + ctx->batch->clear_depth = depth; + if (buffers & PIPE_CLEAR_STENCIL) + ctx->batch->clear_stencil = stencil; + + ctx->batch->fast_cleared |= buffers; + + if (has_depth && (buffers & PIPE_CLEAR_DEPTH)) { + struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture); + if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) { + zsbuf->lrz_valid = true; + fd6_clear_lrz(ctx->batch, zsbuf, depth); } } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_emit.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_emit.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_emit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_emit.c 2019-03-31 23:16:37.000000000 +0000 @@ -46,16 +46,17 @@ #include "fd6_zsa.h" static uint32_t -shader_t_to_opcode(enum shader_t type) +shader_t_to_opcode(gl_shader_stage type) { switch (type) { - case SHADER_VERTEX: - case SHADER_TCS: - case SHADER_TES: - case SHADER_GEOM: + case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: return CP_LOAD_STATE6_GEOM; - case SHADER_FRAGMENT: - case SHADER_COMPUTE: + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: return CP_LOAD_STATE6_FRAG; default: unreachable("bad shader type"); @@ -67,7 +68,7 @@ * sizedwords: size of const value buffer */ static void -fd6_emit_const(struct fd_ringbuffer *ring, enum shader_t type, +fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc) { @@ -105,7 +106,7 @@ } static void -fd6_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, +fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { uint32_t anum = align(num, 2); @@ -394,13 +395,8 @@ static const struct fd6_pipe_sampler_view dummy_view = {}; const struct fd6_pipe_sampler_view *view = tex->textures[i] ? fd6_pipe_sampler_view(tex->textures[i]) : &dummy_view; - enum a6xx_tile_mode tile_mode = TILE6_LINEAR; - if (view->base.texture) - tile_mode = fd_resource(view->base.texture)->tile_mode; - - OUT_RING(state, view->texconst0 | - A6XX_TEX_CONST_0_TILE_MODE(tile_mode)); + OUT_RING(state, view->texconst0); OUT_RING(state, view->texconst1); OUT_RING(state, view->texconst2); OUT_RING(state, view->texconst3); @@ -677,12 +673,6 @@ OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) | A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1)); - OUT_PKT4(ring, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); - OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) | - A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny)); - OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) | - A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1)); - ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); @@ -690,7 +680,8 @@ } if (dirty & FD_DIRTY_VIEWPORT) { - fd_wfi(ctx->batch, ring); + struct pipe_scissor_state *scissor = &ctx->viewport_scissor; + OUT_PKT4(ring, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6); OUT_RING(ring, A6XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0])); OUT_RING(ring, A6XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0])); @@ -698,6 +689,19 @@ OUT_RING(ring, A6XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1])); OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2])); OUT_RING(ring, A6XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); + + OUT_PKT4(ring, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); + OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) | + A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny)); + OUT_RING(ring, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) | + A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1)); + + unsigned guardband_x = fd_calc_guardband(scissor->maxx - scissor->minx); + unsigned guardband_y = fd_calc_guardband(scissor->maxy - scissor->miny); + + OUT_PKT4(ring, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1); + OUT_RING(ring, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_x) | + A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_y)); } if (dirty & FD_DIRTY_PROG) { @@ -733,7 +737,6 @@ OUT_PKT4(ring, REG_A6XX_VFD_UNKNOWN_A008, 1); OUT_RING(ring, 0); - OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1); OUT_RING(ring, rasterizer->pc_primitive_cntl | COND(emit->info->primitive_restart && emit->info->index_size, @@ -761,6 +764,7 @@ struct fd_ringbuffer *vsconstobj = fd_submit_new_ringbuffer( ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); + OUT_WFI5(vsconstobj); ir3_emit_vs_consts(vp, vsconstobj, ctx, emit->info); fd6_emit_add_group(emit, vsconstobj, FD6_GROUP_VS_CONST, 0x7); fd_ringbuffer_del(vsconstobj); @@ -770,12 +774,13 @@ struct fd_ringbuffer *fsconstobj = fd_submit_new_ringbuffer( ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); + OUT_WFI5(fsconstobj); ir3_emit_fs_consts(fp, fsconstobj, ctx); fd6_emit_add_group(emit, fsconstobj, FD6_GROUP_FS_CONST, 0x6); fd_ringbuffer_del(fsconstobj); } - struct pipe_stream_output_info *info = &vp->shader->stream_output; + struct ir3_stream_output_info *info = &vp->shader->stream_output; if (info->num_outputs) { struct fd_streamout_stateobj *so = &ctx->streamout; @@ -871,14 +876,18 @@ OUT_RING(ring, blend_control); } - OUT_PKT4(ring, REG_A6XX_RB_BLEND_CNTL, 1); - OUT_RING(ring, blend->rb_blend_cntl | - A6XX_RB_BLEND_CNTL_SAMPLE_MASK(0xffff)); - OUT_PKT4(ring, REG_A6XX_SP_BLEND_CNTL, 1); OUT_RING(ring, blend->sp_blend_cntl); } + if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_SAMPLE_MASK)) { + struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend); + + OUT_PKT4(ring, REG_A6XX_RB_BLEND_CNTL, 1); + OUT_RING(ring, blend->rb_blend_cntl | + A6XX_RB_BLEND_CNTL_SAMPLE_MASK(ctx->sample_mask)); + } + if (dirty & FD_DIRTY_BLEND_COLOR) { struct pipe_blend_color *bcolor = &ctx->blend_color; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_emit.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_emit.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_emit.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_emit.h 2019-03-31 23:16:37.000000000 +0000 @@ -34,7 +34,7 @@ #include "fd6_context.h" #include "fd6_format.h" #include "fd6_program.h" -#include "ir3_shader.h" +#include "ir3_gallium.h" struct fd_ringbuffer; @@ -155,14 +155,15 @@ } static inline enum a6xx_state_block -fd6_stage2shadersb(enum shader_t type) +fd6_stage2shadersb(gl_shader_stage type) { switch (type) { - case SHADER_VERTEX: + case MESA_SHADER_VERTEX: return SB6_VS_SHADER; - case SHADER_FRAGMENT: + case MESA_SHADER_FRAGMENT: return SB6_FS_SHADER; - case SHADER_COMPUTE: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: return SB6_CS_SHADER; default: unreachable("bad shader type"); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_format.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_format.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_format.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_format.c 2019-03-31 23:16:37.000000000 +0000 @@ -29,6 +29,7 @@ #include "util/u_format.h" #include "fd6_format.h" +#include "freedreno_resource.h" /* Specifies the table of all the formats and their features. Also supplies @@ -419,8 +420,8 @@ } } -static inline enum a6xx_tex_swiz -tex_swiz(unsigned swiz) +enum a6xx_tex_swiz +fd6_pipe2swiz(unsigned swiz) { switch (swiz) { default: @@ -434,19 +435,37 @@ } uint32_t -fd6_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, +fd6_tex_swiz(struct pipe_resource *prsc, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a) { const struct util_format_description *desc = - util_format_description(format); + util_format_description(prsc->format); unsigned char swiz[4] = { swizzle_r, swizzle_g, swizzle_b, swizzle_a, - }, rswiz[4]; + }, rswiz[4], *swizp; util_format_compose_swizzles(desc->swizzle, swiz, rswiz); - return A6XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) | - A6XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) | - A6XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) | - A6XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3])); + if (fd_resource(prsc)->tile_mode) { + /* for tiled modes, we don't get SWAP, so manually apply that + * extra step of swizzle: + */ + enum a3xx_color_swap swap = fd6_pipe2swap(prsc->format); + unsigned char swapswiz[][4] = { + [WZYX] = { 0, 1, 2, 3 }, + [WXYZ] = { 2, 1, 0, 3 }, + [ZYXW] = { 3, 0, 1, 2 }, + [XYZW] = { 3, 2, 1, 0 }, + }; + + util_format_compose_swizzles(swapswiz[swap], rswiz, swiz); + swizp = swiz; + } else { + swizp = rswiz; + } + + return A6XX_TEX_CONST_0_SWIZ_X(fd6_pipe2swiz(swizp[0])) | + A6XX_TEX_CONST_0_SWIZ_Y(fd6_pipe2swiz(swizp[1])) | + A6XX_TEX_CONST_0_SWIZ_Z(fd6_pipe2swiz(swizp[2])) | + A6XX_TEX_CONST_0_SWIZ_W(fd6_pipe2swiz(swizp[3])); } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_format.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_format.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_format.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_format.h 2019-03-31 23:16:37.000000000 +0000 @@ -38,8 +38,78 @@ enum a3xx_color_swap fd6_pipe2swap(enum pipe_format format); enum a6xx_tex_fetchsize fd6_pipe2fetchsize(enum pipe_format format); enum a6xx_depth_format fd6_pipe2depth(enum pipe_format format); +enum a6xx_tex_swiz fd6_pipe2swiz(unsigned swiz); -uint32_t fd6_tex_swiz(enum pipe_format format, unsigned swizzle_r, +uint32_t fd6_tex_swiz(struct pipe_resource *prsc, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); +static inline enum a6xx_2d_ifmt +fd6_ifmt(enum a6xx_color_fmt fmt) +{ + switch (fmt) { + case RB6_A8_UNORM: + case RB6_R8_UNORM: + case RB6_R8_SNORM: + case RB6_R8G8_UNORM: + case RB6_R8G8_SNORM: + case RB6_R8G8B8A8_UNORM: + case RB6_R8G8B8_UNORM: + case RB6_R8G8B8A8_SNORM: + return R2D_UNORM8; + + case RB6_R32_UINT: + case RB6_R32_SINT: + case RB6_R32G32_UINT: + case RB6_R32G32_SINT: + case RB6_R32G32B32A32_UINT: + case RB6_R32G32B32A32_SINT: + return R2D_INT32; + + case RB6_R16_UINT: + case RB6_R16_SINT: + case RB6_R16G16_UINT: + case RB6_R16G16_SINT: + case RB6_R16G16B16A16_UINT: + case RB6_R16G16B16A16_SINT: + return R2D_INT16; + + case RB6_R8_UINT: + case RB6_R8_SINT: + case RB6_R8G8_UINT: + case RB6_R8G8_SINT: + case RB6_R8G8B8A8_UINT: + case RB6_R8G8B8A8_SINT: + return R2D_INT8; + + case RB6_R16_UNORM: + case RB6_R16_SNORM: + case RB6_R16G16_UNORM: + case RB6_R16G16_SNORM: + case RB6_R16G16B16A16_UNORM: + case RB6_R16G16B16A16_SNORM: + case RB6_R32_FLOAT: + case RB6_R32G32_FLOAT: + case RB6_R32G32B32A32_FLOAT: + return R2D_FLOAT32; + + case RB6_R16_FLOAT: + case RB6_R16G16_FLOAT: + case RB6_R16G16B16A16_FLOAT: + return R2D_FLOAT16; + + case RB6_R4G4B4A4_UNORM: + case RB6_R5G5B5A1_UNORM: + case RB6_R5G6B5_UNORM: + case RB6_R10G10B10A2_UNORM: + case RB6_R10G10B10A2_UINT: + case RB6_R11G11B10_FLOAT: + case RB6_X8Z24_UNORM: + // ??? + return 0; + default: + unreachable("bad format"); + return 0; + } +} + #endif /* FD6_UTIL_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c 2019-03-31 23:16:37.000000000 +0000 @@ -64,6 +64,7 @@ struct fd_resource_slice *slice = NULL; uint32_t stride = 0; uint32_t offset = 0; + uint32_t tile_mode; if (!pfb->cbufs[i]) continue; @@ -79,7 +80,6 @@ uint32_t base = gmem ? gmem->cbuf_base[i] : 0; slice = fd_resource_slice(rsc, psurf->u.tex.level); format = fd6_pipe2color(pformat); - swap = fd6_pipe2swap(pformat); sint = util_format_is_pure_sint(pformat); uint = util_format_is_pure_uint(pformat); @@ -89,14 +89,21 @@ offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); - stride = slice->pitch * rsc->cpp; + stride = slice->pitch * rsc->cpp * pfb->samples; + swap = rsc->tile_mode ? WZYX : fd6_pipe2swap(pformat); + + if (rsc->tile_mode && + fd_resource_level_linear(psurf->texture, psurf->u.tex.level)) + tile_mode = TILE6_LINEAR; + else + tile_mode = rsc->tile_mode; debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo)); OUT_PKT4(ring, REG_A6XX_RB_MRT_BUF_INFO(i), 6); OUT_RING(ring, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | - A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->tile_mode) | + A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap)); OUT_RING(ring, A6XX_RB_MRT_PITCH(stride)); OUT_RING(ring, A6XX_RB_MRT_ARRAY_PITCH(slice->size0)); @@ -254,22 +261,6 @@ } static void -patch_gmem_bases(struct fd_batch *batch) -{ - struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; - unsigned i; - - for (i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) { - struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i); - if (patch->val < MAX_RENDER_TARGETS) - *patch->cs = gmem->cbuf_base[patch->val]; - else - *patch->cs = gmem->zsbuf_base[0]; - } - util_dynarray_resize(&batch->gmem_patches, 0); -} - -static void update_render_cntl(struct fd_batch *batch, bool binning) { struct fd_ringbuffer *ring = batch->gmem; @@ -292,14 +283,13 @@ struct fd6_context *fd6_ctx = fd6_context(ctx); struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_ringbuffer *ring = batch->gmem; - unsigned n = gmem->nbins_x * gmem->nbins_y; int i; OUT_PKT4(ring, REG_A6XX_VSC_BIN_SIZE, 3); OUT_RING(ring, A6XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | A6XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); OUT_RELOCW(ring, fd6_ctx->vsc_data, - n * A6XX_VSC_DATA_PITCH, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */ + 32 * A6XX_VSC_DATA_PITCH, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */ OUT_PKT4(ring, REG_A6XX_VSC_BIN_COUNT, 1); OUT_RING(ring, A6XX_VSC_BIN_COUNT_NX(gmem->nbins_x) | @@ -431,25 +421,32 @@ } static void -disable_msaa(struct fd_ringbuffer *ring) +emit_msaa(struct fd_ringbuffer *ring, unsigned nr) { - // TODO MSAA + enum a3xx_msaa_samples samples = fd_msaa_samples(nr); + OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2); - OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); - OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | - A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE); + OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples)); + OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) | + COND(samples == MSAA_ONE, A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE)); OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2); - OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); - OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | - A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE); + OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples)); + OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) | + COND(samples == MSAA_ONE, A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE)); OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2); - OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); - OUT_RING(ring, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | - A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE); + OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples)); + OUT_RING(ring, A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) | + COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE)); + + OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1); + OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples)); } +static void prepare_tile_setup_ib(struct fd_batch *batch); +static void prepare_tile_fini_ib(struct fd_batch *batch); + /* before first tile */ static void fd6_emit_tile_init(struct fd_batch *batch) @@ -468,6 +465,9 @@ fd6_cache_flush(batch, ring); + prepare_tile_setup_ib(batch); + prepare_tile_fini_ib(batch); + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); @@ -478,10 +478,7 @@ emit_zs(ring, pfb->zsbuf, &ctx->gmem); emit_mrt(ring, pfb, &ctx->gmem); - - patch_gmem_bases(batch); - - disable_msaa(ring); + emit_msaa(ring, pfb->samples); if (use_hw_binning(batch)) { set_bin_size(ring, gmem->bin_w, gmem->bin_h, @@ -552,9 +549,7 @@ OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); if (use_hw_binning(batch)) { - struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[tile->p]; - unsigned n = gmem->nbins_x * gmem->nbins_y; OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); @@ -570,7 +565,7 @@ OUT_RELOC(ring, fd6_ctx->vsc_data, /* VSC_PIPE[p].DATA_ADDRESS */ (tile->p * A6XX_VSC_DATA_PITCH), 0, 0); OUT_RELOC(ring, fd6_ctx->vsc_data, /* VSC_SIZE_ADDRESS + (p * 4) */ - (tile->p * 4) + (n * A6XX_VSC_DATA_PITCH), 0, 0); + (tile->p * 4) + (32 * A6XX_VSC_DATA_PITCH), 0, 0); OUT_RELOC(ring, fd6_ctx->vsc_data2, (tile->p * A6XX_VSC_DATA2_PITCH), 0, 0); } else { @@ -583,9 +578,8 @@ } static void -set_blit_scissor(struct fd_batch *batch) +set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring) { - struct fd_ringbuffer *ring = batch->gmem; struct pipe_scissor_state blit_scissor; struct pipe_framebuffer_state *pfb = &batch->framebuffer; @@ -604,34 +598,47 @@ } static void -emit_blit(struct fd_batch *batch, uint32_t base, +emit_blit(struct fd_batch *batch, + struct fd_ringbuffer *ring, + uint32_t base, struct pipe_surface *psurf, - struct fd_resource *rsc) + bool stencil) { - struct fd_ringbuffer *ring = batch->gmem; struct fd_resource_slice *slice; + struct fd_resource *rsc = fd_resource(psurf->texture); + enum pipe_format pfmt = psurf->format; uint32_t offset; + /* separate stencil case: */ + if (stencil) { + rsc = rsc->stencil; + pfmt = rsc->base.format; + } + slice = fd_resource_slice(rsc, psurf->u.tex.level); offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); - enum pipe_format pfmt = psurf->format; enum a6xx_color_fmt format = fd6_pipe2color(pfmt); uint32_t stride = slice->pitch * rsc->cpp; uint32_t size = slice->size0; - enum a3xx_color_swap swap = fd6_pipe2swap(pfmt); - - // TODO: tile mode - // bool tiled; - // tiled = rsc->tile_mode && - // !fd_resource_level_linear(psurf->texture, psurf->u.tex.level); + enum a3xx_color_swap swap = rsc->tile_mode ? WZYX : fd6_pipe2swap(pfmt); + enum a3xx_msaa_samples samples = + fd_msaa_samples(rsc->base.nr_samples); + uint32_t tile_mode; + + if (rsc->tile_mode && + fd_resource_level_linear(&rsc->base, psurf->u.tex.level)) + tile_mode = TILE6_LINEAR; + else + tile_mode = rsc->tile_mode; OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 5); OUT_RING(ring, - A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | + A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format) | A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(swap)); OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */ @@ -645,13 +652,14 @@ } static void -emit_restore_blit(struct fd_batch *batch, uint32_t base, +emit_restore_blit(struct fd_batch *batch, + struct fd_ringbuffer *ring, + uint32_t base, struct pipe_surface *psurf, - struct fd_resource *rsc, unsigned buffer) { - struct fd_ringbuffer *ring = batch->gmem; uint32_t info = 0; + bool stencil = false; switch (buffer) { case FD_BUFFER_COLOR: @@ -659,6 +667,7 @@ break; case FD_BUFFER_STENCIL: info |= A6XX_RB_BLIT_INFO_UNK0; + stencil = true; break; case FD_BUFFER_DEPTH: info |= A6XX_RB_BLIT_INFO_DEPTH | A6XX_RB_BLIT_INFO_UNK0; @@ -671,64 +680,244 @@ OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); OUT_RING(ring, info | A6XX_RB_BLIT_INFO_GMEM); - emit_blit(batch, base, psurf, rsc); + emit_blit(batch, ring, base, psurf, stencil); +} + +static void +emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples); + + uint32_t buffers = batch->fast_cleared; + + if (buffers & PIPE_CLEAR_COLOR) { + + for (int i = 0; i < pfb->nr_cbufs; i++) { + union pipe_color_union *color = &batch->clear_color[i]; + union util_color uc = {0}; + + if (!pfb->cbufs[i]) + continue; + + if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) + continue; + + enum pipe_format pfmt = pfb->cbufs[i]->format; + + // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP?? + union pipe_color_union swapped; + switch (fd6_pipe2swap(pfmt)) { + case WZYX: + swapped.ui[0] = color->ui[0]; + swapped.ui[1] = color->ui[1]; + swapped.ui[2] = color->ui[2]; + swapped.ui[3] = color->ui[3]; + break; + case WXYZ: + swapped.ui[2] = color->ui[0]; + swapped.ui[1] = color->ui[1]; + swapped.ui[0] = color->ui[2]; + swapped.ui[3] = color->ui[3]; + break; + case ZYXW: + swapped.ui[3] = color->ui[0]; + swapped.ui[0] = color->ui[1]; + swapped.ui[1] = color->ui[2]; + swapped.ui[2] = color->ui[3]; + break; + case XYZW: + swapped.ui[3] = color->ui[0]; + swapped.ui[2] = color->ui[1]; + swapped.ui[1] = color->ui[2]; + swapped.ui[0] = color->ui[3]; + break; + } + + if (util_format_is_pure_uint(pfmt)) { + util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1); + } else if (util_format_is_pure_sint(pfmt)) { + util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1); + } else { + util_pack_color(swapped.f, pfmt, &uc); + } + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | + A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + OUT_RING(ring, gmem->cbuf_base[i]); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); + OUT_RING(ring, uc.ui[0]); + OUT_RING(ring, uc.ui[1]); + OUT_RING(ring, uc.ui[2]); + OUT_RING(ring, uc.ui[3]); + + fd6_emit_blit(batch, ring); + } + } + + const bool has_depth = pfb->zsbuf; + const bool has_separate_stencil = + has_depth && fd_resource(pfb->zsbuf->texture)->stencil; + + /* First clear depth or combined depth/stencil. */ + if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) || + (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) { + enum pipe_format pfmt = pfb->zsbuf->format; + uint32_t clear_value; + uint32_t mask = 0; + + if (has_separate_stencil) { + pfmt = util_format_get_depth_only(pfb->zsbuf->format); + clear_value = util_pack_z(pfmt, batch->clear_depth); + } else { + pfmt = pfb->zsbuf->format; + clear_value = util_pack_z_stencil(pfmt, batch->clear_depth, + batch->clear_stencil); + } + + if (buffers & PIPE_CLEAR_DEPTH) + mask |= 0x1; + + if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) + mask |= 0x2; + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt))); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | + // XXX UNK0 for separate stencil ?? + A6XX_RB_BLIT_INFO_DEPTH | + A6XX_RB_BLIT_INFO_CLEAR_MASK(mask)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + OUT_RING(ring, gmem->zsbuf_base[0]); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, clear_value); + + fd6_emit_blit(batch, ring); + } + + /* Then clear the separate stencil buffer in case of 32 bit depth + * formats with separate stencil. */ + if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) { + OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | + A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(RB6_R8_UINT)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); + OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | + //A6XX_RB_BLIT_INFO_UNK0 | + A6XX_RB_BLIT_INFO_DEPTH | + A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1)); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); + OUT_RING(ring, gmem->zsbuf_base[1]); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); + OUT_RING(ring, 0); + + OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, batch->clear_stencil & 0xff); + + fd6_emit_blit(batch, ring); + } } /* * transfer from system memory to gmem */ static void -fd6_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) +emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring) { struct fd_context *ctx = batch->ctx; struct fd_gmem_stateobj *gmem = &ctx->gmem; struct pipe_framebuffer_state *pfb = &batch->framebuffer; - set_blit_scissor(batch); - - if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { + if (batch->restore & FD_BUFFER_COLOR) { unsigned i; for (i = 0; i < pfb->nr_cbufs; i++) { if (!pfb->cbufs[i]) continue; if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i))) continue; - emit_restore_blit(batch, gmem->cbuf_base[i], pfb->cbufs[i], - fd_resource(pfb->cbufs[i]->texture), + emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i], FD_BUFFER_COLOR); } } - if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); - if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH)) { - emit_restore_blit(batch, gmem->zsbuf_base[0], pfb->zsbuf, rsc, + if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) { + emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf, FD_BUFFER_DEPTH); } - if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL)) { - emit_restore_blit(batch, gmem->zsbuf_base[1], pfb->zsbuf, rsc->stencil, + if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) { + emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf, FD_BUFFER_STENCIL); } } } +static void +prepare_tile_setup_ib(struct fd_batch *batch) +{ + batch->tile_setup = fd_submit_new_ringbuffer(batch->submit, 0x1000, + FD_RINGBUFFER_STREAMING); + + set_blit_scissor(batch, batch->tile_setup); + + emit_restore_blits(batch, batch->tile_setup); + emit_clears(batch, batch->tile_setup); +} + +/* + * transfer from system memory to gmem + */ +static void +fd6_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) +{ +} + /* before IB to rendering cmds: */ static void fd6_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) { + fd6_emit_ib(batch->gmem, batch->tile_setup); } static void -emit_resolve_blit(struct fd_batch *batch, uint32_t base, +emit_resolve_blit(struct fd_batch *batch, + struct fd_ringbuffer *ring, + uint32_t base, struct pipe_surface *psurf, - struct fd_resource *rsc, unsigned buffer) { - struct fd_ringbuffer *ring = batch->gmem; uint32_t info = 0; + bool stencil = false; - if (!rsc->valid) + if (!fd_resource(psurf->texture)->valid) return; switch (buffer) { @@ -736,6 +925,7 @@ break; case FD_BUFFER_STENCIL: info |= A6XX_RB_BLIT_INFO_UNK0; + stencil = true; break; case FD_BUFFER_DEPTH: info |= A6XX_RB_BLIT_INFO_DEPTH; @@ -748,7 +938,7 @@ OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); OUT_RING(ring, info); - emit_blit(batch, base, psurf, rsc); + emit_blit(batch, ring, base, psurf, stencil); } /* @@ -756,12 +946,16 @@ */ static void -fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) +prepare_tile_fini_ib(struct fd_batch *batch) { struct fd_context *ctx = batch->ctx; struct fd_gmem_stateobj *gmem = &ctx->gmem; struct pipe_framebuffer_state *pfb = &batch->framebuffer; - struct fd_ringbuffer *ring = batch->gmem; + struct fd_ringbuffer *ring; + + batch->tile_fini = fd_submit_new_ringbuffer(batch->submit, 0x1000, + FD_RINGBUFFER_STREAMING); + ring = batch->tile_fini; if (use_hw_binning(batch)) { OUT_PKT7(ring, CP_SET_MARKER, 1); @@ -783,17 +977,19 @@ OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); emit_marker6(ring, 7); - set_blit_scissor(batch); + set_blit_scissor(batch, ring); if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) { - emit_resolve_blit(batch, gmem->zsbuf_base[0], pfb->zsbuf, rsc, + emit_resolve_blit(batch, ring, + gmem->zsbuf_base[0], pfb->zsbuf, FD_BUFFER_DEPTH); } if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) { - emit_resolve_blit(batch, gmem->zsbuf_base[1], pfb->zsbuf, rsc->stencil, + emit_resolve_blit(batch, ring, + gmem->zsbuf_base[1], pfb->zsbuf, FD_BUFFER_STENCIL); } } @@ -805,14 +1001,19 @@ continue; if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) continue; - emit_resolve_blit(batch, gmem->cbuf_base[i], pfb->cbufs[i], - fd_resource(pfb->cbufs[i]->texture), + emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i], FD_BUFFER_COLOR); } } } static void +fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) +{ + fd6_emit_ib(batch->gmem, batch->tile_fini); +} + +static void fd6_emit_tile_fini(struct fd_batch *batch) { struct fd_ringbuffer *ring = batch->gmem; @@ -874,8 +1075,7 @@ emit_zs(ring, pfb->zsbuf, NULL); emit_mrt(ring, pfb, NULL); - - disable_msaa(ring); + emit_msaa(ring, pfb->samples); } static void diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_image.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_image.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_image.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_image.c 2019-03-31 23:16:37.000000000 +0000 @@ -43,6 +43,7 @@ }; struct fd6_image { + struct pipe_resource *prsc; enum pipe_format pfmt; enum a6xx_tex_fmt fmt; enum a6xx_tex_fetchsize fetchsize; @@ -70,6 +71,7 @@ return; } + img->prsc = prsc; img->pfmt = format; img->fmt = fd6_pipe2tex(format); img->fetchsize = fd6_pipe2fetchsize(format); @@ -112,7 +114,7 @@ OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); OUT_RING(ring, A6XX_TEX_CONST_0_FMT(img->fmt) | - fd6_tex_swiz(img->pfmt, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, + fd6_tex_swiz(img->prsc, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W) | COND(img->srgb, A6XX_TEX_CONST_0_SRGB)); OUT_RING(ring, A6XX_TEX_CONST_1_WIDTH(img->width) | diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_program.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_program.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -41,7 +41,7 @@ static struct ir3_shader * create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, - enum shader_t type) + gl_shader_stage type) { struct fd_context *ctx = fd_context(pctx); struct ir3_compiler *compiler = ctx->screen->compiler; @@ -52,7 +52,7 @@ fd6_fp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); + return create_shader_stateobj(pctx, cso, MESA_SHADER_FRAGMENT); } static void @@ -68,7 +68,7 @@ fd6_vp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader_stateobj(pctx, cso, SHADER_VERTEX); + return create_shader_stateobj(pctx, cso, MESA_SHADER_VERTEX); } static void @@ -100,11 +100,12 @@ } switch (so->type) { - case SHADER_VERTEX: + case MESA_SHADER_VERTEX: opcode = CP_LOAD_STATE6_GEOM; break; - case SHADER_FRAGMENT: - case SHADER_COMPUTE: + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: opcode = CP_LOAD_STATE6_FRAG; break; default: @@ -121,7 +122,7 @@ OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); } else { - OUT_RELOC(ring, so->bo, 0, 0, 0); + OUT_RELOCD(ring, so->bo, 0, 0, 0); } /* for how clever coverity is, it is sometimes rather dull, and @@ -140,14 +141,14 @@ static void link_stream_out(struct ir3_shader_linkage *l, const struct ir3_shader_variant *v) { - const struct pipe_stream_output_info *strmout = &v->shader->stream_output; + const struct ir3_stream_output_info *strmout = &v->shader->stream_output; /* * First, any stream-out varyings not already in linkage map (ie. also * consumed by frag shader) need to be added: */ for (unsigned i = 0; i < strmout->num_outputs; i++) { - const struct pipe_stream_output *out = &strmout->output[i]; + const struct ir3_stream_output *out = &strmout->output[i]; unsigned k = out->register_index; unsigned compmask = (1 << (out->num_components + out->start_component)) - 1; @@ -185,7 +186,7 @@ setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_variant *v, struct ir3_shader_linkage *l) { - const struct pipe_stream_output_info *strmout = &v->shader->stream_output; + const struct ir3_stream_output_info *strmout = &v->shader->stream_output; struct fd6_streamout_state *tf = &state->tf; memset(tf, 0, sizeof(*tf)); @@ -195,7 +196,7 @@ debug_assert(tf->prog_count < ARRAY_SIZE(tf->prog)); for (unsigned i = 0; i < strmout->num_outputs; i++) { - const struct pipe_stream_output *out = &strmout->output[i]; + const struct ir3_stream_output *out = &strmout->output[i]; unsigned k = out->register_index; unsigned idx; @@ -402,7 +403,7 @@ OUT_RING(ring, A6XX_SP_VS_CTRL_REG0_THREADSIZE(fssz) | A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | A6XX_SP_VS_CTRL_REG0_MERGEDREGS | - A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(s[VS].v->branchstack) | COND(s[VS].v->num_samp > 0, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; @@ -524,7 +525,7 @@ 0x1000000 | A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A6XX_SP_FS_CTRL_REG0_MERGEDREGS | - A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(s[FS].v->branchstack) | COND(s[FS].v->num_samp > 0, A6XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A982, 1); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_program.h mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_program.h --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_program.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_program.h 2019-03-31 23:16:37.000000000 +0000 @@ -30,7 +30,8 @@ #include "pipe/p_context.h" #include "freedreno_context.h" -#include "ir3_shader.h" + +#include "ir3/ir3_shader.h" #include "ir3_cache.h" struct fd6_streamout_state { diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c 2019-03-31 23:16:37.000000000 +0000 @@ -69,7 +69,9 @@ A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp); so->gras_su_cntl = - A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0); + A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0) | + COND(cso->multisample, A6XX_GRAS_SU_CNTL_MSAA_ENABLE); + #if 0 so->pc_raster_cntl = A6XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_resource.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_resource.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_resource.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_resource.c 2019-03-31 23:16:37.000000000 +0000 @@ -27,22 +27,30 @@ #include "fd6_resource.h" -/* indexed by cpp: */ +/* indexed by cpp, including msaa 2x and 4x: */ static const struct { unsigned pitchalign; unsigned heightalign; } tile_alignment[] = { [1] = { 128, 32 }, - [2] = { 128, 16 }, - [3] = { 128, 16 }, + [2] = { 64, 32 }, + [3] = { 64, 32 }, [4] = { 64, 16 }, + [6] = { 64, 16 }, [8] = { 64, 16 }, [12] = { 64, 16 }, [16] = { 64, 16 }, + [24] = { 64, 16 }, + [32] = { 64, 16 }, + [48] = { 64, 16 }, + [64] = { 64, 16 }, + + /* special cases for r16: */ + [0] = { 128, 16 }, }; /* NOTE: good way to test this is: (for example) - * piglit/bin/texelFetch fs sampler2D 100x100x1-100x300x1 + * piglit/bin/texelFetch fs sampler3D 100x100x8 */ static uint32_t setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format) @@ -51,41 +59,61 @@ struct fd_screen *screen = fd_screen(prsc->screen); enum util_format_layout layout = util_format_description(format)->layout; uint32_t pitchalign = screen->gmem_alignw; - uint32_t heightalign; uint32_t level, size = 0; - uint32_t width = prsc->width0; - uint32_t height = prsc->height0; uint32_t depth = prsc->depth0; + /* linear dimensions: */ + uint32_t lwidth = prsc->width0; + uint32_t lheight = prsc->height0; + /* tile_mode dimensions: */ + uint32_t twidth = util_next_power_of_two(lwidth); + uint32_t theight = util_next_power_of_two(lheight); /* in layer_first layout, the level (slice) contains just one * layer (since in fact the layer contains the slices) */ uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size; + int ta = rsc->cpp; + + /* The z16/r16 formats seem to not play by the normal tiling rules: */ + if ((rsc->cpp == 2) && (util_format_get_nr_components(format) == 1)) + ta = 0; - heightalign = tile_alignment[rsc->cpp].heightalign; + debug_assert(ta < ARRAY_SIZE(tile_alignment)); + debug_assert(tile_alignment[ta].pitchalign); for (level = 0; level <= prsc->last_level; level++) { struct fd_resource_slice *slice = fd_resource_slice(rsc, level); bool linear_level = fd_resource_level_linear(prsc, level); + uint32_t width, height; + + /* tiled levels of 3D textures are rounded up to PoT dimensions: */ + if ((prsc->target == PIPE_TEXTURE_3D) && rsc->tile_mode && !linear_level) { + width = twidth; + height = theight; + } else { + width = lwidth; + height = lheight; + } uint32_t aligned_height = height; uint32_t blocks; if (rsc->tile_mode && !linear_level) { - pitchalign = tile_alignment[rsc->cpp].pitchalign; - aligned_height = align(aligned_height, heightalign); + pitchalign = tile_alignment[ta].pitchalign; + aligned_height = align(aligned_height, + tile_alignment[ta].heightalign); } else { pitchalign = 64; - - /* The blits used for mem<->gmem work at a granularity of - * 32x32, which can cause faults due to over-fetch on the - * last level. The simple solution is to over-allocate a - * bit the last level to ensure any over-fetch is harmless. - * The pitch is already sufficiently aligned, but height - * may not be: - */ - if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER)) - aligned_height = align(aligned_height, 32); } + /* The blits used for mem<->gmem work at a granularity of + * 32x32, which can cause faults due to over-fetch on the + * last level. The simple solution is to over-allocate a + * bit the last level to ensure any over-fetch is harmless. + * The pitch is already sufficiently aligned, but height + * may not be: + */ + if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER)) + aligned_height = align(aligned_height, 32); + if (layout == UTIL_FORMAT_LAYOUT_ASTC) slice->pitch = util_align_npot(width, pitchalign * util_format_get_blockwidth(format)); @@ -96,34 +124,36 @@ blocks = util_format_get_nblocks(format, slice->pitch, aligned_height); /* 1d array and 2d array textures must all have the same layer size - * for each miplevel on a3xx. 3d textures can have different layer + * for each miplevel on a6xx. 3d textures can have different layer * sizes for high levels, but the hw auto-sizer is buggy (or at least * different than what this code does), so as soon as the layer size * range gets into range, we stop reducing it. */ - if (prsc->target == PIPE_TEXTURE_3D && ( - level == 1 || - (level > 1 && rsc->slices[level - 1].size0 > 0xf000))) - slice->size0 = align(blocks * rsc->cpp, alignment); - else if (level == 0 || rsc->layer_first || alignment == 1) + if (prsc->target == PIPE_TEXTURE_3D) { + if (level <= 1 || (rsc->slices[level - 1].size0 > 0xf000)) { + slice->size0 = align(blocks * rsc->cpp, alignment); + } else { + slice->size0 = rsc->slices[level - 1].size0; + } + } else { slice->size0 = align(blocks * rsc->cpp, alignment); - else - slice->size0 = rsc->slices[level - 1].size0; + } + + size += slice->size0 * depth * layers_in_level; #if 0 - debug_printf("%s: %ux%ux%u@%u: %2u: stride=%4u, size=%7u, aligned_height=%3u\n", + debug_printf("%s: %ux%ux%u@%u:\t%2u: stride=%4u, size=%6u,%7u, aligned_height=%3u, blocks=%u\n", util_format_name(prsc->format), - prsc->width0, prsc->height0, prsc->depth0, rsc->cpp, + width, height, depth, rsc->cpp, level, slice->pitch * rsc->cpp, - slice->size0 * depth * layers_in_level, - aligned_height); + slice->size0, size, aligned_height, blocks); #endif - size += slice->size0 * depth * layers_in_level; - - width = u_minify(width, 1); - height = u_minify(height, 1); depth = u_minify(depth, 1); + lwidth = u_minify(lwidth, 1); + lheight = u_minify(lheight, 1); + twidth = u_minify(twidth, 1); + theight = u_minify(theight, 1); } return size; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_screen.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -29,11 +29,30 @@ #include "util/u_format.h" #include "fd6_screen.h" +#include "fd6_blitter.h" #include "fd6_context.h" #include "fd6_format.h" #include "fd6_resource.h" -#include "ir3_compiler.h" +#include "ir3/ir3_compiler.h" + +static bool +valid_sample_count(unsigned sample_count) +{ + switch (sample_count) { + case 0: + case 1: + case 2: + case 4: +// TODO seems 8x works, but increases lrz width or height.. but the +// blob I have doesn't seem to expose any egl configs w/ 8x, so +// just hide it for now and revisit later. +// case 8: + return true; + default: + return false; + } +} static boolean fd6_screen_is_format_supported(struct pipe_screen *pscreen, @@ -46,7 +65,7 @@ unsigned retval = 0; if ((target >= PIPE_MAX_TEXTURE_TYPES) || - (sample_count > 1)) { /* TODO add MSAA */ + !valid_sample_count(sample_count)) { DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", util_format_name(format), target, sample_count, usage); return FALSE; @@ -60,11 +79,11 @@ retval |= PIPE_BIND_VERTEX_BUFFER; } - if ((usage & PIPE_BIND_SAMPLER_VIEW) && + if ((usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) && (target == PIPE_BUFFER || util_format_get_blocksize(format) != 12) && (fd6_pipe2tex(format) != (enum a6xx_tex_fmt)~0)) { - retval |= PIPE_BIND_SAMPLER_VIEW; + retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE); } if ((usage & (PIPE_BIND_RENDER_TARGET | @@ -116,4 +135,5 @@ pscreen->is_format_supported = fd6_screen_is_format_supported; screen->setup_slices = fd6_setup_slices; + screen->tile_mode = fd6_tile_mode; } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_texture.c mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_texture.c --- mesa-18.3.3/src/gallium/drivers/freedreno/a6xx/fd6_texture.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/a6xx/fd6_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -245,7 +245,8 @@ so->texconst0 = A6XX_TEX_CONST_0_FMT(fd6_pipe2tex(format)) | - fd6_tex_swiz(format, cso->swizzle_r, cso->swizzle_g, + A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) | + fd6_tex_swiz(prsc, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); /* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle @@ -256,8 +257,12 @@ * Note that gallium expects stencil sampler to return (s,s,s,s) * which isn't quite true. To make that happen we'd have to massage * the swizzle. But in practice only the .x component is used. + * + * Skip this in the tile case because tiled formats are not swapped + * and we have already applied the inverse swap in fd6_tex_swiz() + * to componsate for that. */ - if (format == PIPE_FORMAT_X24S8_UINT) { + if ((format == PIPE_FORMAT_X24S8_UINT) && !rsc->tile_mode) { so->texconst0 |= A6XX_TEX_CONST_0_SWAP(XYZW); } @@ -280,12 +285,17 @@ so->offset = cso->u.buf.offset; } else { unsigned miplevels; + enum a6xx_tile_mode tile_mode = TILE6_LINEAR; lvl = fd_sampler_first_level(cso); miplevels = fd_sampler_last_level(cso) - lvl; layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; - so->texconst0 |= A6XX_TEX_CONST_0_MIPLVLS(miplevels); + if (!fd_resource_level_linear(prsc, lvl)) + tile_mode = fd_resource(prsc)->tile_mode; + + so->texconst0 |= A6XX_TEX_CONST_0_MIPLVLS(miplevels) | + A6XX_TEX_CONST_0_TILE_MODE(tile_mode); so->texconst1 = A6XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A6XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); @@ -324,12 +334,12 @@ break; case PIPE_TEXTURE_3D: so->texconst3 = + A6XX_TEX_CONST_3_MIN_LAYERSZ(rsc->slices[prsc->last_level].size0) | A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0); so->texconst5 = A6XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl)); break; default: - so->texconst3 = 0x00000000; break; } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/adreno_common.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/adreno_common.xml.h --- mesa-18.3.3/src/gallium/drivers/freedreno/adreno_common.xml.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/adreno_common.xml.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,535 +0,0 @@ -#ifndef ADRENO_COMMON_XML -#define ADRENO_COMMON_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/freedreno/envytools/ -git clone https://github.com/freedreno/envytools.git - -The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 37936 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14201 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42864 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 140514 bytes, from 2018-10-08 21:57:35) -- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) -- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) - -Copyright (C) 2013-2018 by the following authors: -- Rob Clark (robclark) -- Ilia Mirkin (imirkin) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -enum chip { - A2XX = 0, - A3XX = 0, - A4XX = 0, - A5XX = 0, - A6XX = 0, -}; - -enum adreno_pa_su_sc_draw { - PC_DRAW_POINTS = 0, - PC_DRAW_LINES = 1, - PC_DRAW_TRIANGLES = 2, -}; - -enum adreno_compare_func { - FUNC_NEVER = 0, - FUNC_LESS = 1, - FUNC_EQUAL = 2, - FUNC_LEQUAL = 3, - FUNC_GREATER = 4, - FUNC_NOTEQUAL = 5, - FUNC_GEQUAL = 6, - FUNC_ALWAYS = 7, -}; - -enum adreno_stencil_op { - STENCIL_KEEP = 0, - STENCIL_ZERO = 1, - STENCIL_REPLACE = 2, - STENCIL_INCR_CLAMP = 3, - STENCIL_DECR_CLAMP = 4, - STENCIL_INVERT = 5, - STENCIL_INCR_WRAP = 6, - STENCIL_DECR_WRAP = 7, -}; - -enum adreno_rb_blend_factor { - FACTOR_ZERO = 0, - FACTOR_ONE = 1, - FACTOR_SRC_COLOR = 4, - FACTOR_ONE_MINUS_SRC_COLOR = 5, - FACTOR_SRC_ALPHA = 6, - FACTOR_ONE_MINUS_SRC_ALPHA = 7, - FACTOR_DST_COLOR = 8, - FACTOR_ONE_MINUS_DST_COLOR = 9, - FACTOR_DST_ALPHA = 10, - FACTOR_ONE_MINUS_DST_ALPHA = 11, - FACTOR_CONSTANT_COLOR = 12, - FACTOR_ONE_MINUS_CONSTANT_COLOR = 13, - FACTOR_CONSTANT_ALPHA = 14, - FACTOR_ONE_MINUS_CONSTANT_ALPHA = 15, - FACTOR_SRC_ALPHA_SATURATE = 16, - FACTOR_SRC1_COLOR = 20, - FACTOR_ONE_MINUS_SRC1_COLOR = 21, - FACTOR_SRC1_ALPHA = 22, - FACTOR_ONE_MINUS_SRC1_ALPHA = 23, -}; - -enum adreno_rb_surface_endian { - ENDIAN_NONE = 0, - ENDIAN_8IN16 = 1, - ENDIAN_8IN32 = 2, - ENDIAN_16IN32 = 3, - ENDIAN_8IN64 = 4, - ENDIAN_8IN128 = 5, -}; - -enum adreno_rb_dither_mode { - DITHER_DISABLE = 0, - DITHER_ALWAYS = 1, - DITHER_IF_ALPHA_OFF = 2, -}; - -enum adreno_rb_depth_format { - DEPTHX_16 = 0, - DEPTHX_24_8 = 1, - DEPTHX_32 = 2, -}; - -enum adreno_rb_copy_control_mode { - RB_COPY_RESOLVE = 1, - RB_COPY_CLEAR = 2, - RB_COPY_DEPTH_STENCIL = 5, -}; - -enum a3xx_rop_code { - ROP_CLEAR = 0, - ROP_NOR = 1, - ROP_AND_INVERTED = 2, - ROP_COPY_INVERTED = 3, - ROP_AND_REVERSE = 4, - ROP_INVERT = 5, - ROP_XOR = 6, - ROP_NAND = 7, - ROP_AND = 8, - ROP_EQUIV = 9, - ROP_NOOP = 10, - ROP_OR_INVERTED = 11, - ROP_COPY = 12, - ROP_OR_REVERSE = 13, - ROP_OR = 14, - ROP_SET = 15, -}; - -enum a3xx_render_mode { - RB_RENDERING_PASS = 0, - RB_TILING_PASS = 1, - RB_RESOLVE_PASS = 2, - RB_COMPUTE_PASS = 3, -}; - -enum a3xx_msaa_samples { - MSAA_ONE = 0, - MSAA_TWO = 1, - MSAA_FOUR = 2, -}; - -enum a3xx_threadmode { - MULTI = 0, - SINGLE = 1, -}; - -enum a3xx_instrbuffermode { - CACHE = 0, - BUFFER = 1, -}; - -enum a3xx_threadsize { - TWO_QUADS = 0, - FOUR_QUADS = 1, -}; - -enum a3xx_color_swap { - WZYX = 0, - WXYZ = 1, - ZYXW = 2, - XYZW = 3, -}; - -enum a3xx_rb_blend_opcode { - BLEND_DST_PLUS_SRC = 0, - BLEND_SRC_MINUS_DST = 1, - BLEND_DST_MINUS_SRC = 2, - BLEND_MIN_DST_SRC = 3, - BLEND_MAX_DST_SRC = 4, -}; - -enum a4xx_tess_spacing { - EQUAL_SPACING = 0, - ODD_SPACING = 2, - EVEN_SPACING = 3, -}; - -#define REG_AXXX_CP_RB_BASE 0x000001c0 - -#define REG_AXXX_CP_RB_CNTL 0x000001c1 -#define AXXX_CP_RB_CNTL_BUFSZ__MASK 0x0000003f -#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT 0 -static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val) -{ - return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK; -} -#define AXXX_CP_RB_CNTL_BLKSZ__MASK 0x00003f00 -#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT 8 -static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val) -{ - return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK; -} -#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK 0x00030000 -#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT 16 -static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val) -{ - return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK; -} -#define AXXX_CP_RB_CNTL_POLL_EN 0x00100000 -#define AXXX_CP_RB_CNTL_NO_UPDATE 0x08000000 -#define AXXX_CP_RB_CNTL_RPTR_WR_EN 0x80000000 - -#define REG_AXXX_CP_RB_RPTR_ADDR 0x000001c3 -#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK 0x00000003 -#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT 0 -static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val) -{ - return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK; -} -#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK 0xfffffffc -#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT 2 -static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK; -} - -#define REG_AXXX_CP_RB_RPTR 0x000001c4 - -#define REG_AXXX_CP_RB_WPTR 0x000001c5 - -#define REG_AXXX_CP_RB_WPTR_DELAY 0x000001c6 - -#define REG_AXXX_CP_RB_RPTR_WR 0x000001c7 - -#define REG_AXXX_CP_RB_WPTR_BASE 0x000001c8 - -#define REG_AXXX_CP_QUEUE_THRESHOLDS 0x000001d5 -#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK 0x0000000f -#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT 0 -static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val) -{ - return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK; -} -#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK 0x00000f00 -#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT 8 -static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val) -{ - return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK; -} -#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK 0x000f0000 -#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT 16 -static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val) -{ - return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK; -} - -#define REG_AXXX_CP_MEQ_THRESHOLDS 0x000001d6 -#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK 0x001f0000 -#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT 16 -static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_MEQ_END(uint32_t val) -{ - return ((val) << AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK; -} -#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK 0x1f000000 -#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT 24 -static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_ROQ_END(uint32_t val) -{ - return ((val) << AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK; -} - -#define REG_AXXX_CP_CSQ_AVAIL 0x000001d7 -#define AXXX_CP_CSQ_AVAIL_RING__MASK 0x0000007f -#define AXXX_CP_CSQ_AVAIL_RING__SHIFT 0 -static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val) -{ - return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK; -} -#define AXXX_CP_CSQ_AVAIL_IB1__MASK 0x00007f00 -#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT 8 -static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val) -{ - return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK; -} -#define AXXX_CP_CSQ_AVAIL_IB2__MASK 0x007f0000 -#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT 16 -static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val) -{ - return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK; -} - -#define REG_AXXX_CP_STQ_AVAIL 0x000001d8 -#define AXXX_CP_STQ_AVAIL_ST__MASK 0x0000007f -#define AXXX_CP_STQ_AVAIL_ST__SHIFT 0 -static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val) -{ - return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK; -} - -#define REG_AXXX_CP_MEQ_AVAIL 0x000001d9 -#define AXXX_CP_MEQ_AVAIL_MEQ__MASK 0x0000001f -#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT 0 -static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val) -{ - return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK; -} - -#define REG_AXXX_SCRATCH_UMSK 0x000001dc -#define AXXX_SCRATCH_UMSK_UMSK__MASK 0x000000ff -#define AXXX_SCRATCH_UMSK_UMSK__SHIFT 0 -static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val) -{ - return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK; -} -#define AXXX_SCRATCH_UMSK_SWAP__MASK 0x00030000 -#define AXXX_SCRATCH_UMSK_SWAP__SHIFT 16 -static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val) -{ - return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK; -} - -#define REG_AXXX_SCRATCH_ADDR 0x000001dd - -#define REG_AXXX_CP_ME_RDADDR 0x000001ea - -#define REG_AXXX_CP_STATE_DEBUG_INDEX 0x000001ec - -#define REG_AXXX_CP_STATE_DEBUG_DATA 0x000001ed - -#define REG_AXXX_CP_INT_CNTL 0x000001f2 -#define AXXX_CP_INT_CNTL_SW_INT_MASK 0x00080000 -#define AXXX_CP_INT_CNTL_T0_PACKET_IN_IB_MASK 0x00800000 -#define AXXX_CP_INT_CNTL_OPCODE_ERROR_MASK 0x01000000 -#define AXXX_CP_INT_CNTL_PROTECTED_MODE_ERROR_MASK 0x02000000 -#define AXXX_CP_INT_CNTL_RESERVED_BIT_ERROR_MASK 0x04000000 -#define AXXX_CP_INT_CNTL_IB_ERROR_MASK 0x08000000 -#define AXXX_CP_INT_CNTL_IB2_INT_MASK 0x20000000 -#define AXXX_CP_INT_CNTL_IB1_INT_MASK 0x40000000 -#define AXXX_CP_INT_CNTL_RB_INT_MASK 0x80000000 - -#define REG_AXXX_CP_INT_STATUS 0x000001f3 - -#define REG_AXXX_CP_INT_ACK 0x000001f4 - -#define REG_AXXX_CP_ME_CNTL 0x000001f6 -#define AXXX_CP_ME_CNTL_BUSY 0x20000000 -#define AXXX_CP_ME_CNTL_HALT 0x10000000 - -#define REG_AXXX_CP_ME_STATUS 0x000001f7 - -#define REG_AXXX_CP_ME_RAM_WADDR 0x000001f8 - -#define REG_AXXX_CP_ME_RAM_RADDR 0x000001f9 - -#define REG_AXXX_CP_ME_RAM_DATA 0x000001fa - -#define REG_AXXX_CP_DEBUG 0x000001fc -#define AXXX_CP_DEBUG_PREDICATE_DISABLE 0x00800000 -#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE 0x01000000 -#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE 0x02000000 -#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS 0x04000000 -#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE 0x08000000 -#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE 0x10000000 -#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL 0x40000000 -#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE 0x80000000 - -#define REG_AXXX_CP_CSQ_RB_STAT 0x000001fd -#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK 0x0000007f -#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT 0 -static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val) -{ - return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK; -} -#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK 0x007f0000 -#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT 16 -static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val) -{ - return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK; -} - -#define REG_AXXX_CP_CSQ_IB1_STAT 0x000001fe -#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK 0x0000007f -#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT 0 -static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val) -{ - return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK; -} -#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK 0x007f0000 -#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT 16 -static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val) -{ - return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK; -} - -#define REG_AXXX_CP_CSQ_IB2_STAT 0x000001ff -#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK 0x0000007f -#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT 0 -static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val) -{ - return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK; -} -#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK 0x007f0000 -#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT 16 -static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val) -{ - return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK; -} - -#define REG_AXXX_CP_NON_PREFETCH_CNTRS 0x00000440 - -#define REG_AXXX_CP_STQ_ST_STAT 0x00000443 - -#define REG_AXXX_CP_ST_BASE 0x0000044d - -#define REG_AXXX_CP_ST_BUFSZ 0x0000044e - -#define REG_AXXX_CP_MEQ_STAT 0x0000044f - -#define REG_AXXX_CP_MIU_TAG_STAT 0x00000452 - -#define REG_AXXX_CP_BIN_MASK_LO 0x00000454 - -#define REG_AXXX_CP_BIN_MASK_HI 0x00000455 - -#define REG_AXXX_CP_BIN_SELECT_LO 0x00000456 - -#define REG_AXXX_CP_BIN_SELECT_HI 0x00000457 - -#define REG_AXXX_CP_IB1_BASE 0x00000458 - -#define REG_AXXX_CP_IB1_BUFSZ 0x00000459 - -#define REG_AXXX_CP_IB2_BASE 0x0000045a - -#define REG_AXXX_CP_IB2_BUFSZ 0x0000045b - -#define REG_AXXX_CP_STAT 0x0000047f -#define AXXX_CP_STAT_CP_BUSY 0x80000000 -#define AXXX_CP_STAT_VS_EVENT_FIFO_BUSY 0x40000000 -#define AXXX_CP_STAT_PS_EVENT_FIFO_BUSY 0x20000000 -#define AXXX_CP_STAT_CF_EVENT_FIFO_BUSY 0x10000000 -#define AXXX_CP_STAT_RB_EVENT_FIFO_BUSY 0x08000000 -#define AXXX_CP_STAT_ME_BUSY 0x04000000 -#define AXXX_CP_STAT_MIU_WR_C_BUSY 0x02000000 -#define AXXX_CP_STAT_CP_3D_BUSY 0x00800000 -#define AXXX_CP_STAT_CP_NRT_BUSY 0x00400000 -#define AXXX_CP_STAT_RBIU_SCRATCH_BUSY 0x00200000 -#define AXXX_CP_STAT_RCIU_ME_BUSY 0x00100000 -#define AXXX_CP_STAT_RCIU_PFP_BUSY 0x00080000 -#define AXXX_CP_STAT_MEQ_RING_BUSY 0x00040000 -#define AXXX_CP_STAT_PFP_BUSY 0x00020000 -#define AXXX_CP_STAT_ST_QUEUE_BUSY 0x00010000 -#define AXXX_CP_STAT_INDIRECT2_QUEUE_BUSY 0x00002000 -#define AXXX_CP_STAT_INDIRECTS_QUEUE_BUSY 0x00001000 -#define AXXX_CP_STAT_RING_QUEUE_BUSY 0x00000800 -#define AXXX_CP_STAT_CSF_BUSY 0x00000400 -#define AXXX_CP_STAT_CSF_ST_BUSY 0x00000200 -#define AXXX_CP_STAT_EVENT_BUSY 0x00000100 -#define AXXX_CP_STAT_CSF_INDIRECT2_BUSY 0x00000080 -#define AXXX_CP_STAT_CSF_INDIRECTS_BUSY 0x00000040 -#define AXXX_CP_STAT_CSF_RING_BUSY 0x00000020 -#define AXXX_CP_STAT_RCIU_BUSY 0x00000010 -#define AXXX_CP_STAT_RBIU_BUSY 0x00000008 -#define AXXX_CP_STAT_MIU_RD_RETURN_BUSY 0x00000004 -#define AXXX_CP_STAT_MIU_RD_REQ_BUSY 0x00000002 -#define AXXX_CP_STAT_MIU_WR_BUSY 0x00000001 - -#define REG_AXXX_CP_SCRATCH_REG0 0x00000578 - -#define REG_AXXX_CP_SCRATCH_REG1 0x00000579 - -#define REG_AXXX_CP_SCRATCH_REG2 0x0000057a - -#define REG_AXXX_CP_SCRATCH_REG3 0x0000057b - -#define REG_AXXX_CP_SCRATCH_REG4 0x0000057c - -#define REG_AXXX_CP_SCRATCH_REG5 0x0000057d - -#define REG_AXXX_CP_SCRATCH_REG6 0x0000057e - -#define REG_AXXX_CP_SCRATCH_REG7 0x0000057f - -#define REG_AXXX_CP_ME_VS_EVENT_SRC 0x00000600 - -#define REG_AXXX_CP_ME_VS_EVENT_ADDR 0x00000601 - -#define REG_AXXX_CP_ME_VS_EVENT_DATA 0x00000602 - -#define REG_AXXX_CP_ME_VS_EVENT_ADDR_SWM 0x00000603 - -#define REG_AXXX_CP_ME_VS_EVENT_DATA_SWM 0x00000604 - -#define REG_AXXX_CP_ME_PS_EVENT_SRC 0x00000605 - -#define REG_AXXX_CP_ME_PS_EVENT_ADDR 0x00000606 - -#define REG_AXXX_CP_ME_PS_EVENT_DATA 0x00000607 - -#define REG_AXXX_CP_ME_PS_EVENT_ADDR_SWM 0x00000608 - -#define REG_AXXX_CP_ME_PS_EVENT_DATA_SWM 0x00000609 - -#define REG_AXXX_CP_ME_CF_EVENT_SRC 0x0000060a - -#define REG_AXXX_CP_ME_CF_EVENT_ADDR 0x0000060b - -#define REG_AXXX_CP_ME_CF_EVENT_DATA 0x0000060c - -#define REG_AXXX_CP_ME_NRT_ADDR 0x0000060d - -#define REG_AXXX_CP_ME_NRT_DATA 0x0000060e - -#define REG_AXXX_CP_ME_VS_FETCH_DONE_SRC 0x00000612 - -#define REG_AXXX_CP_ME_VS_FETCH_DONE_ADDR 0x00000613 - -#define REG_AXXX_CP_ME_VS_FETCH_DONE_DATA 0x00000614 - - -#endif /* ADRENO_COMMON_XML */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/adreno_pm4.xml.h mesa-19.0.1/src/gallium/drivers/freedreno/adreno_pm4.xml.h --- mesa-18.3.3/src/gallium/drivers/freedreno/adreno_pm4.xml.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/adreno_pm4.xml.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,1569 +0,0 @@ -#ifndef ADRENO_PM4_XML -#define ADRENO_PM4_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/freedreno/envytools/ -git clone https://github.com/freedreno/envytools.git - -The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a2xx.xml ( 37936 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_common.xml ( 14201 bytes, from 2018-10-08 11:43:51) -- /home/robclark/src/envytools/rnndb/adreno/adreno_pm4.xml ( 42864 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-07-03 19:37:13) -- /home/robclark/src/envytools/rnndb/adreno/a5xx.xml ( 147240 bytes, from 2018-10-08 21:57:22) -- /home/robclark/src/envytools/rnndb/adreno/a6xx.xml ( 140514 bytes, from 2018-10-08 21:57:35) -- /home/robclark/src/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-09-14 13:03:07) -- /home/robclark/src/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-07-03 19:37:13) - -Copyright (C) 2013-2018 by the following authors: -- Rob Clark (robclark) -- Ilia Mirkin (imirkin) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -enum vgt_event_type { - VS_DEALLOC = 0, - PS_DEALLOC = 1, - VS_DONE_TS = 2, - PS_DONE_TS = 3, - CACHE_FLUSH_TS = 4, - CONTEXT_DONE = 5, - CACHE_FLUSH = 6, - HLSQ_FLUSH = 7, - VIZQUERY_START = 7, - VIZQUERY_END = 8, - SC_WAIT_WC = 9, - RST_PIX_CNT = 13, - RST_VTX_CNT = 14, - TILE_FLUSH = 15, - STAT_EVENT = 16, - CACHE_FLUSH_AND_INV_TS_EVENT = 20, - ZPASS_DONE = 21, - CACHE_FLUSH_AND_INV_EVENT = 22, - PERFCOUNTER_START = 23, - PERFCOUNTER_STOP = 24, - VS_FETCH_DONE = 27, - FACENESS_FLUSH = 28, - FLUSH_SO_0 = 17, - FLUSH_SO_1 = 18, - FLUSH_SO_2 = 19, - FLUSH_SO_3 = 20, - PC_CCU_INVALIDATE_DEPTH = 24, - PC_CCU_INVALIDATE_COLOR = 25, - UNK_1C = 28, - UNK_1D = 29, - BLIT = 30, - UNK_25 = 37, - LRZ_FLUSH = 38, - UNK_2C = 44, - UNK_2D = 45, -}; - -enum pc_di_primtype { - DI_PT_NONE = 0, - DI_PT_POINTLIST_PSIZE = 1, - DI_PT_LINELIST = 2, - DI_PT_LINESTRIP = 3, - DI_PT_TRILIST = 4, - DI_PT_TRIFAN = 5, - DI_PT_TRISTRIP = 6, - DI_PT_LINELOOP = 7, - DI_PT_RECTLIST = 8, - DI_PT_POINTLIST = 9, - DI_PT_LINE_ADJ = 10, - DI_PT_LINESTRIP_ADJ = 11, - DI_PT_TRI_ADJ = 12, - DI_PT_TRISTRIP_ADJ = 13, -}; - -enum pc_di_src_sel { - DI_SRC_SEL_DMA = 0, - DI_SRC_SEL_IMMEDIATE = 1, - DI_SRC_SEL_AUTO_INDEX = 2, - DI_SRC_SEL_RESERVED = 3, -}; - -enum pc_di_face_cull_sel { - DI_FACE_CULL_NONE = 0, - DI_FACE_CULL_FETCH = 1, - DI_FACE_BACKFACE_CULL = 2, - DI_FACE_FRONTFACE_CULL = 3, -}; - -enum pc_di_index_size { - INDEX_SIZE_IGN = 0, - INDEX_SIZE_16_BIT = 0, - INDEX_SIZE_32_BIT = 1, - INDEX_SIZE_8_BIT = 2, - INDEX_SIZE_INVALID = 0, -}; - -enum pc_di_vis_cull_mode { - IGNORE_VISIBILITY = 0, - USE_VISIBILITY = 1, -}; - -enum adreno_pm4_packet_type { - CP_TYPE0_PKT = 0, - CP_TYPE1_PKT = 0x40000000, - CP_TYPE2_PKT = 0x80000000, - CP_TYPE3_PKT = 0xc0000000, - CP_TYPE4_PKT = 0x40000000, - CP_TYPE7_PKT = 0x70000000, -}; - -enum adreno_pm4_type3_packets { - CP_ME_INIT = 72, - CP_NOP = 16, - CP_PREEMPT_ENABLE = 28, - CP_PREEMPT_TOKEN = 30, - CP_INDIRECT_BUFFER = 63, - CP_INDIRECT_BUFFER_PFD = 55, - CP_WAIT_FOR_IDLE = 38, - CP_WAIT_REG_MEM = 60, - CP_WAIT_REG_EQ = 82, - CP_WAIT_REG_GTE = 83, - CP_WAIT_UNTIL_READ = 92, - CP_WAIT_IB_PFD_COMPLETE = 93, - CP_REG_RMW = 33, - CP_SET_BIN_DATA = 47, - CP_SET_BIN_DATA5 = 47, - CP_REG_TO_MEM = 62, - CP_MEM_WRITE = 61, - CP_MEM_WRITE_CNTR = 79, - CP_COND_EXEC = 68, - CP_COND_WRITE = 69, - CP_COND_WRITE5 = 69, - CP_EVENT_WRITE = 70, - CP_EVENT_WRITE_SHD = 88, - CP_EVENT_WRITE_CFL = 89, - CP_EVENT_WRITE_ZPD = 91, - CP_RUN_OPENCL = 49, - CP_DRAW_INDX = 34, - CP_DRAW_INDX_2 = 54, - CP_DRAW_INDX_BIN = 52, - CP_DRAW_INDX_2_BIN = 53, - CP_VIZ_QUERY = 35, - CP_SET_STATE = 37, - CP_SET_CONSTANT = 45, - CP_IM_LOAD = 39, - CP_IM_LOAD_IMMEDIATE = 43, - CP_LOAD_CONSTANT_CONTEXT = 46, - CP_INVALIDATE_STATE = 59, - CP_SET_SHADER_BASES = 74, - CP_SET_BIN_MASK = 80, - CP_SET_BIN_SELECT = 81, - CP_CONTEXT_UPDATE = 94, - CP_INTERRUPT = 64, - CP_IM_STORE = 44, - CP_SET_DRAW_INIT_FLAGS = 75, - CP_SET_PROTECTED_MODE = 95, - CP_BOOTSTRAP_UCODE = 111, - CP_LOAD_STATE = 48, - CP_LOAD_STATE4 = 48, - CP_COND_INDIRECT_BUFFER_PFE = 58, - CP_COND_INDIRECT_BUFFER_PFD = 50, - CP_INDIRECT_BUFFER_PFE = 63, - CP_SET_BIN = 76, - CP_TEST_TWO_MEMS = 113, - CP_REG_WR_NO_CTXT = 120, - CP_RECORD_PFP_TIMESTAMP = 17, - CP_SET_SECURE_MODE = 102, - CP_WAIT_FOR_ME = 19, - CP_SET_DRAW_STATE = 67, - CP_DRAW_INDX_OFFSET = 56, - CP_DRAW_INDIRECT = 40, - CP_DRAW_INDX_INDIRECT = 41, - CP_DRAW_AUTO = 36, - CP_UNKNOWN_19 = 25, - CP_UNKNOWN_1A = 26, - CP_UNKNOWN_4E = 78, - CP_WIDE_REG_WRITE = 116, - CP_SCRATCH_TO_REG = 77, - CP_REG_TO_SCRATCH = 74, - CP_WAIT_MEM_WRITES = 18, - CP_COND_REG_EXEC = 71, - CP_MEM_TO_REG = 66, - CP_EXEC_CS_INDIRECT = 65, - CP_EXEC_CS = 51, - CP_PERFCOUNTER_ACTION = 80, - CP_SMMU_TABLE_UPDATE = 83, - CP_SET_MARKER = 101, - CP_SET_PSEUDO_REG = 86, - CP_CONTEXT_REG_BUNCH = 92, - CP_YIELD_ENABLE = 28, - CP_SKIP_IB2_ENABLE_GLOBAL = 29, - CP_SKIP_IB2_ENABLE_LOCAL = 35, - CP_SET_SUBDRAW_SIZE = 53, - CP_SET_VISIBILITY_OVERRIDE = 100, - CP_PREEMPT_ENABLE_GLOBAL = 105, - CP_PREEMPT_ENABLE_LOCAL = 106, - CP_CONTEXT_SWITCH_YIELD = 107, - CP_SET_RENDER_MODE = 108, - CP_COMPUTE_CHECKPOINT = 110, - CP_MEM_TO_MEM = 115, - CP_BLIT = 44, - CP_REG_TEST = 57, - CP_SET_MODE = 99, - CP_LOAD_STATE6_GEOM = 50, - CP_LOAD_STATE6_FRAG = 52, - IN_IB_PREFETCH_END = 23, - IN_SUBBLK_PREFETCH = 31, - IN_INSTR_PREFETCH = 32, - IN_INSTR_MATCH = 71, - IN_CONST_PREFETCH = 73, - IN_INCR_UPDT_STATE = 85, - IN_INCR_UPDT_CONST = 86, - IN_INCR_UPDT_INSTR = 87, - PKT4 = 4, - CP_UNK_A6XX_14 = 20, - CP_UNK_A6XX_36 = 54, - CP_UNK_A6XX_55 = 85, - CP_REG_WRITE = 109, -}; - -enum adreno_state_block { - SB_VERT_TEX = 0, - SB_VERT_MIPADDR = 1, - SB_FRAG_TEX = 2, - SB_FRAG_MIPADDR = 3, - SB_VERT_SHADER = 4, - SB_GEOM_SHADER = 5, - SB_FRAG_SHADER = 6, - SB_COMPUTE_SHADER = 7, -}; - -enum adreno_state_type { - ST_SHADER = 0, - ST_CONSTANTS = 1, -}; - -enum adreno_state_src { - SS_DIRECT = 0, - SS_INVALID_ALL_IC = 2, - SS_INVALID_PART_IC = 3, - SS_INDIRECT = 4, - SS_INDIRECT_TCM = 5, - SS_INDIRECT_STM = 6, -}; - -enum a4xx_state_block { - SB4_VS_TEX = 0, - SB4_HS_TEX = 1, - SB4_DS_TEX = 2, - SB4_GS_TEX = 3, - SB4_FS_TEX = 4, - SB4_CS_TEX = 5, - SB4_VS_SHADER = 8, - SB4_HS_SHADER = 9, - SB4_DS_SHADER = 10, - SB4_GS_SHADER = 11, - SB4_FS_SHADER = 12, - SB4_CS_SHADER = 13, - SB4_SSBO = 14, - SB4_CS_SSBO = 15, -}; - -enum a4xx_state_type { - ST4_SHADER = 0, - ST4_CONSTANTS = 1, -}; - -enum a4xx_state_src { - SS4_DIRECT = 0, - SS4_INDIRECT = 2, -}; - -enum a6xx_state_block { - SB6_VS_TEX = 0, - SB6_HS_TEX = 1, - SB6_DS_TEX = 2, - SB6_GS_TEX = 3, - SB6_FS_TEX = 4, - SB6_CS_TEX = 5, - SB6_VS_SHADER = 8, - SB6_HS_SHADER = 9, - SB6_DS_SHADER = 10, - SB6_GS_SHADER = 11, - SB6_FS_SHADER = 12, - SB6_CS_SHADER = 13, - SB6_SSBO = 14, - SB6_CS_SSBO = 15, -}; - -enum a6xx_state_type { - ST6_SHADER = 0, - ST6_CONSTANTS = 1, -}; - -enum a6xx_state_src { - SS6_DIRECT = 0, - SS6_INDIRECT = 2, -}; - -enum a4xx_index_size { - INDEX4_SIZE_8_BIT = 0, - INDEX4_SIZE_16_BIT = 1, - INDEX4_SIZE_32_BIT = 2, -}; - -enum cp_cond_function { - WRITE_ALWAYS = 0, - WRITE_LT = 1, - WRITE_LE = 2, - WRITE_EQ = 3, - WRITE_NE = 4, - WRITE_GE = 5, - WRITE_GT = 6, -}; - -enum render_mode_cmd { - BYPASS = 1, - BINNING = 2, - GMEM = 3, - BLIT2D = 5, - BLIT2DSCALE = 7, - END2D = 8, -}; - -enum cp_blit_cmd { - BLIT_OP_FILL = 0, - BLIT_OP_COPY = 1, - BLIT_OP_SCALE = 3, -}; - -enum a6xx_render_mode { - RM6_BYPASS = 1, - RM6_BINNING = 2, - RM6_GMEM = 4, - RM6_BLIT2D = 5, - RM6_RESOLVE = 6, - RM6_BLIT2DSCALE = 12, -}; - -enum pseudo_reg { - SMMU_INFO = 0, - NON_SECURE_SAVE_ADDR = 1, - SECURE_SAVE_ADDR = 2, - NON_PRIV_SAVE_ADDR = 3, - COUNTER = 4, -}; - -#define REG_CP_LOAD_STATE_0 0x00000000 -#define CP_LOAD_STATE_0_DST_OFF__MASK 0x0000ffff -#define CP_LOAD_STATE_0_DST_OFF__SHIFT 0 -static inline uint32_t CP_LOAD_STATE_0_DST_OFF(uint32_t val) -{ - return ((val) << CP_LOAD_STATE_0_DST_OFF__SHIFT) & CP_LOAD_STATE_0_DST_OFF__MASK; -} -#define CP_LOAD_STATE_0_STATE_SRC__MASK 0x00070000 -#define CP_LOAD_STATE_0_STATE_SRC__SHIFT 16 -static inline uint32_t CP_LOAD_STATE_0_STATE_SRC(enum adreno_state_src val) -{ - return ((val) << CP_LOAD_STATE_0_STATE_SRC__SHIFT) & CP_LOAD_STATE_0_STATE_SRC__MASK; -} -#define CP_LOAD_STATE_0_STATE_BLOCK__MASK 0x00380000 -#define CP_LOAD_STATE_0_STATE_BLOCK__SHIFT 19 -static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val) -{ - return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK; -} -#define CP_LOAD_STATE_0_NUM_UNIT__MASK 0xffc00000 -#define CP_LOAD_STATE_0_NUM_UNIT__SHIFT 22 -static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val) -{ - return ((val) << CP_LOAD_STATE_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE_0_NUM_UNIT__MASK; -} - -#define REG_CP_LOAD_STATE_1 0x00000001 -#define CP_LOAD_STATE_1_STATE_TYPE__MASK 0x00000003 -#define CP_LOAD_STATE_1_STATE_TYPE__SHIFT 0 -static inline uint32_t CP_LOAD_STATE_1_STATE_TYPE(enum adreno_state_type val) -{ - return ((val) << CP_LOAD_STATE_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE_1_STATE_TYPE__MASK; -} -#define CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK 0xfffffffc -#define CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT 2 -static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK; -} - -#define REG_CP_LOAD_STATE4_0 0x00000000 -#define CP_LOAD_STATE4_0_DST_OFF__MASK 0x00003fff -#define CP_LOAD_STATE4_0_DST_OFF__SHIFT 0 -static inline uint32_t CP_LOAD_STATE4_0_DST_OFF(uint32_t val) -{ - return ((val) << CP_LOAD_STATE4_0_DST_OFF__SHIFT) & CP_LOAD_STATE4_0_DST_OFF__MASK; -} -#define CP_LOAD_STATE4_0_STATE_SRC__MASK 0x00030000 -#define CP_LOAD_STATE4_0_STATE_SRC__SHIFT 16 -static inline uint32_t CP_LOAD_STATE4_0_STATE_SRC(enum a4xx_state_src val) -{ - return ((val) << CP_LOAD_STATE4_0_STATE_SRC__SHIFT) & CP_LOAD_STATE4_0_STATE_SRC__MASK; -} -#define CP_LOAD_STATE4_0_STATE_BLOCK__MASK 0x003c0000 -#define CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT 18 -static inline uint32_t CP_LOAD_STATE4_0_STATE_BLOCK(enum a4xx_state_block val) -{ - return ((val) << CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE4_0_STATE_BLOCK__MASK; -} -#define CP_LOAD_STATE4_0_NUM_UNIT__MASK 0xffc00000 -#define CP_LOAD_STATE4_0_NUM_UNIT__SHIFT 22 -static inline uint32_t CP_LOAD_STATE4_0_NUM_UNIT(uint32_t val) -{ - return ((val) << CP_LOAD_STATE4_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE4_0_NUM_UNIT__MASK; -} - -#define REG_CP_LOAD_STATE4_1 0x00000001 -#define CP_LOAD_STATE4_1_STATE_TYPE__MASK 0x00000003 -#define CP_LOAD_STATE4_1_STATE_TYPE__SHIFT 0 -static inline uint32_t CP_LOAD_STATE4_1_STATE_TYPE(enum a4xx_state_type val) -{ - return ((val) << CP_LOAD_STATE4_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE4_1_STATE_TYPE__MASK; -} -#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK 0xfffffffc -#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT 2 -static inline uint32_t CP_LOAD_STATE4_1_EXT_SRC_ADDR(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK; -} - -#define REG_CP_LOAD_STATE4_2 0x00000002 -#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK 0xffffffff -#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT 0 -static inline uint32_t CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(uint32_t val) -{ - return ((val) << CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK; -} - -#define REG_CP_LOAD_STATE6_0 0x00000000 -#define CP_LOAD_STATE6_0_DST_OFF__MASK 0x00003fff -#define CP_LOAD_STATE6_0_DST_OFF__SHIFT 0 -static inline uint32_t CP_LOAD_STATE6_0_DST_OFF(uint32_t val) -{ - return ((val) << CP_LOAD_STATE6_0_DST_OFF__SHIFT) & CP_LOAD_STATE6_0_DST_OFF__MASK; -} -#define CP_LOAD_STATE6_0_STATE_TYPE__MASK 0x00004000 -#define CP_LOAD_STATE6_0_STATE_TYPE__SHIFT 14 -static inline uint32_t CP_LOAD_STATE6_0_STATE_TYPE(enum a6xx_state_type val) -{ - return ((val) << CP_LOAD_STATE6_0_STATE_TYPE__SHIFT) & CP_LOAD_STATE6_0_STATE_TYPE__MASK; -} -#define CP_LOAD_STATE6_0_STATE_SRC__MASK 0x00030000 -#define CP_LOAD_STATE6_0_STATE_SRC__SHIFT 16 -static inline uint32_t CP_LOAD_STATE6_0_STATE_SRC(enum a6xx_state_src val) -{ - return ((val) << CP_LOAD_STATE6_0_STATE_SRC__SHIFT) & CP_LOAD_STATE6_0_STATE_SRC__MASK; -} -#define CP_LOAD_STATE6_0_STATE_BLOCK__MASK 0x003c0000 -#define CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT 18 -static inline uint32_t CP_LOAD_STATE6_0_STATE_BLOCK(enum a6xx_state_block val) -{ - return ((val) << CP_LOAD_STATE6_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE6_0_STATE_BLOCK__MASK; -} -#define CP_LOAD_STATE6_0_NUM_UNIT__MASK 0xffc00000 -#define CP_LOAD_STATE6_0_NUM_UNIT__SHIFT 22 -static inline uint32_t CP_LOAD_STATE6_0_NUM_UNIT(uint32_t val) -{ - return ((val) << CP_LOAD_STATE6_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE6_0_NUM_UNIT__MASK; -} - -#define REG_CP_LOAD_STATE6_1 0x00000001 -#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK 0xfffffffc -#define CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT 2 -static inline uint32_t CP_LOAD_STATE6_1_EXT_SRC_ADDR(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << CP_LOAD_STATE6_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE6_1_EXT_SRC_ADDR__MASK; -} - -#define REG_CP_LOAD_STATE6_2 0x00000002 -#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK 0xffffffff -#define CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT 0 -static inline uint32_t CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(uint32_t val) -{ - return ((val) << CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI__MASK; -} - -#define REG_CP_DRAW_INDX_0 0x00000000 -#define CP_DRAW_INDX_0_VIZ_QUERY__MASK 0xffffffff -#define CP_DRAW_INDX_0_VIZ_QUERY__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_0_VIZ_QUERY(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_0_VIZ_QUERY__MASK; -} - -#define REG_CP_DRAW_INDX_1 0x00000001 -#define CP_DRAW_INDX_1_PRIM_TYPE__MASK 0x0000003f -#define CP_DRAW_INDX_1_PRIM_TYPE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_1_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << CP_DRAW_INDX_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_1_PRIM_TYPE__MASK; -} -#define CP_DRAW_INDX_1_SOURCE_SELECT__MASK 0x000000c0 -#define CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT 6 -static inline uint32_t CP_DRAW_INDX_1_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_1_SOURCE_SELECT__MASK; -} -#define CP_DRAW_INDX_1_VIS_CULL__MASK 0x00000600 -#define CP_DRAW_INDX_1_VIS_CULL__SHIFT 9 -static inline uint32_t CP_DRAW_INDX_1_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << CP_DRAW_INDX_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_1_VIS_CULL__MASK; -} -#define CP_DRAW_INDX_1_INDEX_SIZE__MASK 0x00000800 -#define CP_DRAW_INDX_1_INDEX_SIZE__SHIFT 11 -static inline uint32_t CP_DRAW_INDX_1_INDEX_SIZE(enum pc_di_index_size val) -{ - return ((val) << CP_DRAW_INDX_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_1_INDEX_SIZE__MASK; -} -#define CP_DRAW_INDX_1_NOT_EOP 0x00001000 -#define CP_DRAW_INDX_1_SMALL_INDEX 0x00002000 -#define CP_DRAW_INDX_1_PRE_DRAW_INITIATOR_ENABLE 0x00004000 -#define CP_DRAW_INDX_1_NUM_INSTANCES__MASK 0xff000000 -#define CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT 24 -static inline uint32_t CP_DRAW_INDX_1_NUM_INSTANCES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_1_NUM_INSTANCES__MASK; -} - -#define REG_CP_DRAW_INDX_2 0x00000002 -#define CP_DRAW_INDX_2_NUM_INDICES__MASK 0xffffffff -#define CP_DRAW_INDX_2_NUM_INDICES__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_2_NUM_INDICES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_NUM_INDICES__MASK; -} - -#define REG_CP_DRAW_INDX_3 0x00000003 -#define CP_DRAW_INDX_3_INDX_BASE__MASK 0xffffffff -#define CP_DRAW_INDX_3_INDX_BASE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_3_INDX_BASE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_3_INDX_BASE__SHIFT) & CP_DRAW_INDX_3_INDX_BASE__MASK; -} - -#define REG_CP_DRAW_INDX_4 0x00000004 -#define CP_DRAW_INDX_4_INDX_SIZE__MASK 0xffffffff -#define CP_DRAW_INDX_4_INDX_SIZE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_4_INDX_SIZE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_4_INDX_SIZE__SHIFT) & CP_DRAW_INDX_4_INDX_SIZE__MASK; -} - -#define REG_CP_DRAW_INDX_2_0 0x00000000 -#define CP_DRAW_INDX_2_0_VIZ_QUERY__MASK 0xffffffff -#define CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_2_0_VIZ_QUERY(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_2_0_VIZ_QUERY__MASK; -} - -#define REG_CP_DRAW_INDX_2_1 0x00000001 -#define CP_DRAW_INDX_2_1_PRIM_TYPE__MASK 0x0000003f -#define CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_2_1_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_2_1_PRIM_TYPE__MASK; -} -#define CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK 0x000000c0 -#define CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT 6 -static inline uint32_t CP_DRAW_INDX_2_1_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK; -} -#define CP_DRAW_INDX_2_1_VIS_CULL__MASK 0x00000600 -#define CP_DRAW_INDX_2_1_VIS_CULL__SHIFT 9 -static inline uint32_t CP_DRAW_INDX_2_1_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << CP_DRAW_INDX_2_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_2_1_VIS_CULL__MASK; -} -#define CP_DRAW_INDX_2_1_INDEX_SIZE__MASK 0x00000800 -#define CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT 11 -static inline uint32_t CP_DRAW_INDX_2_1_INDEX_SIZE(enum pc_di_index_size val) -{ - return ((val) << CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_2_1_INDEX_SIZE__MASK; -} -#define CP_DRAW_INDX_2_1_NOT_EOP 0x00001000 -#define CP_DRAW_INDX_2_1_SMALL_INDEX 0x00002000 -#define CP_DRAW_INDX_2_1_PRE_DRAW_INITIATOR_ENABLE 0x00004000 -#define CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK 0xff000000 -#define CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT 24 -static inline uint32_t CP_DRAW_INDX_2_1_NUM_INSTANCES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK; -} - -#define REG_CP_DRAW_INDX_2_2 0x00000002 -#define CP_DRAW_INDX_2_2_NUM_INDICES__MASK 0xffffffff -#define CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_2_2_NUM_INDICES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_2_NUM_INDICES__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_0 0x00000000 -#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK 0x0000003f -#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK; -} -#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK 0x000000c0 -#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT 6 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK; -} -#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK 0x00000300 -#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT 8 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT) & CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK; -} -#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK 0x00000c00 -#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT 10 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum a4xx_index_size val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK; -} -#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK 0x01f00000 -#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT 20 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_TESS_MODE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT) & CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_1 0x00000001 -#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK 0xffffffff -#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002 -#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK 0xffffffff -#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_2_NUM_INDICES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_3 0x00000003 - -#define REG_CP_DRAW_INDX_OFFSET_4 0x00000004 -#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK 0xffffffff -#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_4_INDX_BASE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT) & CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_5 0x00000005 -#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK 0xffffffff -#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_5_INDX_SIZE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK; -} - -#define REG_A4XX_CP_DRAW_INDIRECT_0 0x00000000 -#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK 0x0000003f -#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK; -} -#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK 0x000000c0 -#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT 6 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK; -} -#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK 0x00000300 -#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT 8 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK; -} -#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK 0x00000c00 -#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT 10 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK; -} -#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK 0x01f00000 -#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT 20 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_TESS_MODE(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK; -} - -#define REG_A4XX_CP_DRAW_INDIRECT_1 0x00000001 -#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK 0xffffffff -#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_1_INDIRECT(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK; -} - - -#define REG_A5XX_CP_DRAW_INDIRECT_2 0x00000002 -#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK 0xffffffff -#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK; -} - -#define REG_A4XX_CP_DRAW_INDX_INDIRECT_0 0x00000000 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK 0x0000003f -#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK; -} -#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK 0x000000c0 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT 6 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK; -} -#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK 0x00000300 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT 8 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK; -} -#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK 0x00000c00 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT 10 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK; -} -#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK 0x01f00000 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT 20 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK; -} - - -#define REG_A4XX_CP_DRAW_INDX_INDIRECT_1 0x00000001 -#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK 0xffffffff -#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK; -} - -#define REG_A4XX_CP_DRAW_INDX_INDIRECT_2 0x00000002 -#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK 0xffffffff -#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK; -} - -#define REG_A4XX_CP_DRAW_INDX_INDIRECT_3 0x00000003 -#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK 0xffffffff -#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK; -} - - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_1 0x00000001 -#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK; -} - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_2 0x00000002 -#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK; -} - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_3 0x00000003 -#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK; -} - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_4 0x00000004 -#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK; -} - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_5 0x00000005 -#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK; -} - -static inline uint32_t REG_CP_SET_DRAW_STATE_(uint32_t i0) { return 0x00000000 + 0x3*i0; } - -static inline uint32_t REG_CP_SET_DRAW_STATE__0(uint32_t i0) { return 0x00000000 + 0x3*i0; } -#define CP_SET_DRAW_STATE__0_COUNT__MASK 0x0000ffff -#define CP_SET_DRAW_STATE__0_COUNT__SHIFT 0 -static inline uint32_t CP_SET_DRAW_STATE__0_COUNT(uint32_t val) -{ - return ((val) << CP_SET_DRAW_STATE__0_COUNT__SHIFT) & CP_SET_DRAW_STATE__0_COUNT__MASK; -} -#define CP_SET_DRAW_STATE__0_DIRTY 0x00010000 -#define CP_SET_DRAW_STATE__0_DISABLE 0x00020000 -#define CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS 0x00040000 -#define CP_SET_DRAW_STATE__0_LOAD_IMMED 0x00080000 -#define CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK 0x00f00000 -#define CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT 20 -static inline uint32_t CP_SET_DRAW_STATE__0_ENABLE_MASK(uint32_t val) -{ - return ((val) << CP_SET_DRAW_STATE__0_ENABLE_MASK__SHIFT) & CP_SET_DRAW_STATE__0_ENABLE_MASK__MASK; -} -#define CP_SET_DRAW_STATE__0_GROUP_ID__MASK 0x1f000000 -#define CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT 24 -static inline uint32_t CP_SET_DRAW_STATE__0_GROUP_ID(uint32_t val) -{ - return ((val) << CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE__0_GROUP_ID__MASK; -} - -static inline uint32_t REG_CP_SET_DRAW_STATE__1(uint32_t i0) { return 0x00000001 + 0x3*i0; } -#define CP_SET_DRAW_STATE__1_ADDR_LO__MASK 0xffffffff -#define CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT 0 -static inline uint32_t CP_SET_DRAW_STATE__1_ADDR_LO(uint32_t val) -{ - return ((val) << CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT) & CP_SET_DRAW_STATE__1_ADDR_LO__MASK; -} - -static inline uint32_t REG_CP_SET_DRAW_STATE__2(uint32_t i0) { return 0x00000002 + 0x3*i0; } -#define CP_SET_DRAW_STATE__2_ADDR_HI__MASK 0xffffffff -#define CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT 0 -static inline uint32_t CP_SET_DRAW_STATE__2_ADDR_HI(uint32_t val) -{ - return ((val) << CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT) & CP_SET_DRAW_STATE__2_ADDR_HI__MASK; -} - -#define REG_CP_SET_BIN_0 0x00000000 - -#define REG_CP_SET_BIN_1 0x00000001 -#define CP_SET_BIN_1_X1__MASK 0x0000ffff -#define CP_SET_BIN_1_X1__SHIFT 0 -static inline uint32_t CP_SET_BIN_1_X1(uint32_t val) -{ - return ((val) << CP_SET_BIN_1_X1__SHIFT) & CP_SET_BIN_1_X1__MASK; -} -#define CP_SET_BIN_1_Y1__MASK 0xffff0000 -#define CP_SET_BIN_1_Y1__SHIFT 16 -static inline uint32_t CP_SET_BIN_1_Y1(uint32_t val) -{ - return ((val) << CP_SET_BIN_1_Y1__SHIFT) & CP_SET_BIN_1_Y1__MASK; -} - -#define REG_CP_SET_BIN_2 0x00000002 -#define CP_SET_BIN_2_X2__MASK 0x0000ffff -#define CP_SET_BIN_2_X2__SHIFT 0 -static inline uint32_t CP_SET_BIN_2_X2(uint32_t val) -{ - return ((val) << CP_SET_BIN_2_X2__SHIFT) & CP_SET_BIN_2_X2__MASK; -} -#define CP_SET_BIN_2_Y2__MASK 0xffff0000 -#define CP_SET_BIN_2_Y2__SHIFT 16 -static inline uint32_t CP_SET_BIN_2_Y2(uint32_t val) -{ - return ((val) << CP_SET_BIN_2_Y2__SHIFT) & CP_SET_BIN_2_Y2__MASK; -} - -#define REG_CP_SET_BIN_DATA_0 0x00000000 -#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK 0xffffffff -#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA_0_BIN_DATA_ADDR(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT) & CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK; -} - -#define REG_CP_SET_BIN_DATA_1 0x00000001 -#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK 0xffffffff -#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK; -} - -#define REG_CP_SET_BIN_DATA5_0 0x00000000 -#define CP_SET_BIN_DATA5_0_VSC_SIZE__MASK 0x003f0000 -#define CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT 16 -static inline uint32_t CP_SET_BIN_DATA5_0_VSC_SIZE(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT) & CP_SET_BIN_DATA5_0_VSC_SIZE__MASK; -} -#define CP_SET_BIN_DATA5_0_VSC_N__MASK 0x07c00000 -#define CP_SET_BIN_DATA5_0_VSC_N__SHIFT 22 -static inline uint32_t CP_SET_BIN_DATA5_0_VSC_N(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_0_VSC_N__SHIFT) & CP_SET_BIN_DATA5_0_VSC_N__MASK; -} - -#define REG_CP_SET_BIN_DATA5_1 0x00000001 -#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK 0xffffffff -#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT) & CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK; -} - -#define REG_CP_SET_BIN_DATA5_2 0x00000002 -#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK 0xffffffff -#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT) & CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK; -} - -#define REG_CP_SET_BIN_DATA5_3 0x00000003 -#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK 0xffffffff -#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT) & CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK; -} - -#define REG_CP_SET_BIN_DATA5_4 0x00000004 -#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK 0xffffffff -#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT) & CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK; -} - -#define REG_CP_SET_BIN_DATA5_5 0x00000005 -#define CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__MASK 0xffffffff -#define CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__SHIFT) & CP_SET_BIN_DATA5_5_BIN_DATA_ADDR2_LO__MASK; -} - -#define REG_CP_SET_BIN_DATA5_6 0x00000006 -#define CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__MASK 0xffffffff -#define CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__SHIFT) & CP_SET_BIN_DATA5_6_BIN_DATA_ADDR2_LO__MASK; -} - -#define REG_CP_REG_TO_MEM_0 0x00000000 -#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff -#define CP_REG_TO_MEM_0_REG__SHIFT 0 -static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val) -{ - return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK; -} -#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000 -#define CP_REG_TO_MEM_0_CNT__SHIFT 19 -static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val) -{ - return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK; -} -#define CP_REG_TO_MEM_0_64B 0x40000000 -#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000 - -#define REG_CP_REG_TO_MEM_1 0x00000001 -#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff -#define CP_REG_TO_MEM_1_DEST__SHIFT 0 -static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val) -{ - return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK; -} - -#define REG_CP_REG_TO_MEM_2 0x00000002 -#define CP_REG_TO_MEM_2_DEST_HI__MASK 0xffffffff -#define CP_REG_TO_MEM_2_DEST_HI__SHIFT 0 -static inline uint32_t CP_REG_TO_MEM_2_DEST_HI(uint32_t val) -{ - return ((val) << CP_REG_TO_MEM_2_DEST_HI__SHIFT) & CP_REG_TO_MEM_2_DEST_HI__MASK; -} - -#define REG_CP_MEM_TO_REG_0 0x00000000 -#define CP_MEM_TO_REG_0_REG__MASK 0x0000ffff -#define CP_MEM_TO_REG_0_REG__SHIFT 0 -static inline uint32_t CP_MEM_TO_REG_0_REG(uint32_t val) -{ - return ((val) << CP_MEM_TO_REG_0_REG__SHIFT) & CP_MEM_TO_REG_0_REG__MASK; -} -#define CP_MEM_TO_REG_0_CNT__MASK 0x3ff80000 -#define CP_MEM_TO_REG_0_CNT__SHIFT 19 -static inline uint32_t CP_MEM_TO_REG_0_CNT(uint32_t val) -{ - return ((val) << CP_MEM_TO_REG_0_CNT__SHIFT) & CP_MEM_TO_REG_0_CNT__MASK; -} -#define CP_MEM_TO_REG_0_64B 0x40000000 -#define CP_MEM_TO_REG_0_ACCUMULATE 0x80000000 - -#define REG_CP_MEM_TO_REG_1 0x00000001 -#define CP_MEM_TO_REG_1_SRC__MASK 0xffffffff -#define CP_MEM_TO_REG_1_SRC__SHIFT 0 -static inline uint32_t CP_MEM_TO_REG_1_SRC(uint32_t val) -{ - return ((val) << CP_MEM_TO_REG_1_SRC__SHIFT) & CP_MEM_TO_REG_1_SRC__MASK; -} - -#define REG_CP_MEM_TO_REG_2 0x00000002 -#define CP_MEM_TO_REG_2_SRC_HI__MASK 0xffffffff -#define CP_MEM_TO_REG_2_SRC_HI__SHIFT 0 -static inline uint32_t CP_MEM_TO_REG_2_SRC_HI(uint32_t val) -{ - return ((val) << CP_MEM_TO_REG_2_SRC_HI__SHIFT) & CP_MEM_TO_REG_2_SRC_HI__MASK; -} - -#define REG_CP_MEM_TO_MEM_0 0x00000000 -#define CP_MEM_TO_MEM_0_NEG_A 0x00000001 -#define CP_MEM_TO_MEM_0_NEG_B 0x00000002 -#define CP_MEM_TO_MEM_0_NEG_C 0x00000004 -#define CP_MEM_TO_MEM_0_DOUBLE 0x20000000 - -#define REG_CP_COND_WRITE_0 0x00000000 -#define CP_COND_WRITE_0_FUNCTION__MASK 0x00000007 -#define CP_COND_WRITE_0_FUNCTION__SHIFT 0 -static inline uint32_t CP_COND_WRITE_0_FUNCTION(enum cp_cond_function val) -{ - return ((val) << CP_COND_WRITE_0_FUNCTION__SHIFT) & CP_COND_WRITE_0_FUNCTION__MASK; -} -#define CP_COND_WRITE_0_POLL_MEMORY 0x00000010 -#define CP_COND_WRITE_0_WRITE_MEMORY 0x00000100 - -#define REG_CP_COND_WRITE_1 0x00000001 -#define CP_COND_WRITE_1_POLL_ADDR__MASK 0xffffffff -#define CP_COND_WRITE_1_POLL_ADDR__SHIFT 0 -static inline uint32_t CP_COND_WRITE_1_POLL_ADDR(uint32_t val) -{ - return ((val) << CP_COND_WRITE_1_POLL_ADDR__SHIFT) & CP_COND_WRITE_1_POLL_ADDR__MASK; -} - -#define REG_CP_COND_WRITE_2 0x00000002 -#define CP_COND_WRITE_2_REF__MASK 0xffffffff -#define CP_COND_WRITE_2_REF__SHIFT 0 -static inline uint32_t CP_COND_WRITE_2_REF(uint32_t val) -{ - return ((val) << CP_COND_WRITE_2_REF__SHIFT) & CP_COND_WRITE_2_REF__MASK; -} - -#define REG_CP_COND_WRITE_3 0x00000003 -#define CP_COND_WRITE_3_MASK__MASK 0xffffffff -#define CP_COND_WRITE_3_MASK__SHIFT 0 -static inline uint32_t CP_COND_WRITE_3_MASK(uint32_t val) -{ - return ((val) << CP_COND_WRITE_3_MASK__SHIFT) & CP_COND_WRITE_3_MASK__MASK; -} - -#define REG_CP_COND_WRITE_4 0x00000004 -#define CP_COND_WRITE_4_WRITE_ADDR__MASK 0xffffffff -#define CP_COND_WRITE_4_WRITE_ADDR__SHIFT 0 -static inline uint32_t CP_COND_WRITE_4_WRITE_ADDR(uint32_t val) -{ - return ((val) << CP_COND_WRITE_4_WRITE_ADDR__SHIFT) & CP_COND_WRITE_4_WRITE_ADDR__MASK; -} - -#define REG_CP_COND_WRITE_5 0x00000005 -#define CP_COND_WRITE_5_WRITE_DATA__MASK 0xffffffff -#define CP_COND_WRITE_5_WRITE_DATA__SHIFT 0 -static inline uint32_t CP_COND_WRITE_5_WRITE_DATA(uint32_t val) -{ - return ((val) << CP_COND_WRITE_5_WRITE_DATA__SHIFT) & CP_COND_WRITE_5_WRITE_DATA__MASK; -} - -#define REG_CP_COND_WRITE5_0 0x00000000 -#define CP_COND_WRITE5_0_FUNCTION__MASK 0x00000007 -#define CP_COND_WRITE5_0_FUNCTION__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_0_FUNCTION(enum cp_cond_function val) -{ - return ((val) << CP_COND_WRITE5_0_FUNCTION__SHIFT) & CP_COND_WRITE5_0_FUNCTION__MASK; -} -#define CP_COND_WRITE5_0_POLL_MEMORY 0x00000010 -#define CP_COND_WRITE5_0_WRITE_MEMORY 0x00000100 - -#define REG_CP_COND_WRITE5_1 0x00000001 -#define CP_COND_WRITE5_1_POLL_ADDR_LO__MASK 0xffffffff -#define CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_1_POLL_ADDR_LO(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT) & CP_COND_WRITE5_1_POLL_ADDR_LO__MASK; -} - -#define REG_CP_COND_WRITE5_2 0x00000002 -#define CP_COND_WRITE5_2_POLL_ADDR_HI__MASK 0xffffffff -#define CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_2_POLL_ADDR_HI(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT) & CP_COND_WRITE5_2_POLL_ADDR_HI__MASK; -} - -#define REG_CP_COND_WRITE5_3 0x00000003 -#define CP_COND_WRITE5_3_REF__MASK 0xffffffff -#define CP_COND_WRITE5_3_REF__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_3_REF(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_3_REF__SHIFT) & CP_COND_WRITE5_3_REF__MASK; -} - -#define REG_CP_COND_WRITE5_4 0x00000004 -#define CP_COND_WRITE5_4_MASK__MASK 0xffffffff -#define CP_COND_WRITE5_4_MASK__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_4_MASK(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_4_MASK__SHIFT) & CP_COND_WRITE5_4_MASK__MASK; -} - -#define REG_CP_COND_WRITE5_5 0x00000005 -#define CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK 0xffffffff -#define CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_5_WRITE_ADDR_LO(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT) & CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK; -} - -#define REG_CP_COND_WRITE5_6 0x00000006 -#define CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK 0xffffffff -#define CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_6_WRITE_ADDR_HI(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT) & CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK; -} - -#define REG_CP_COND_WRITE5_7 0x00000007 -#define CP_COND_WRITE5_7_WRITE_DATA__MASK 0xffffffff -#define CP_COND_WRITE5_7_WRITE_DATA__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_7_WRITE_DATA(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_7_WRITE_DATA__SHIFT) & CP_COND_WRITE5_7_WRITE_DATA__MASK; -} - -#define REG_CP_DISPATCH_COMPUTE_0 0x00000000 - -#define REG_CP_DISPATCH_COMPUTE_1 0x00000001 -#define CP_DISPATCH_COMPUTE_1_X__MASK 0xffffffff -#define CP_DISPATCH_COMPUTE_1_X__SHIFT 0 -static inline uint32_t CP_DISPATCH_COMPUTE_1_X(uint32_t val) -{ - return ((val) << CP_DISPATCH_COMPUTE_1_X__SHIFT) & CP_DISPATCH_COMPUTE_1_X__MASK; -} - -#define REG_CP_DISPATCH_COMPUTE_2 0x00000002 -#define CP_DISPATCH_COMPUTE_2_Y__MASK 0xffffffff -#define CP_DISPATCH_COMPUTE_2_Y__SHIFT 0 -static inline uint32_t CP_DISPATCH_COMPUTE_2_Y(uint32_t val) -{ - return ((val) << CP_DISPATCH_COMPUTE_2_Y__SHIFT) & CP_DISPATCH_COMPUTE_2_Y__MASK; -} - -#define REG_CP_DISPATCH_COMPUTE_3 0x00000003 -#define CP_DISPATCH_COMPUTE_3_Z__MASK 0xffffffff -#define CP_DISPATCH_COMPUTE_3_Z__SHIFT 0 -static inline uint32_t CP_DISPATCH_COMPUTE_3_Z(uint32_t val) -{ - return ((val) << CP_DISPATCH_COMPUTE_3_Z__SHIFT) & CP_DISPATCH_COMPUTE_3_Z__MASK; -} - -#define REG_CP_SET_RENDER_MODE_0 0x00000000 -#define CP_SET_RENDER_MODE_0_MODE__MASK 0x000001ff -#define CP_SET_RENDER_MODE_0_MODE__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_0_MODE(enum render_mode_cmd val) -{ - return ((val) << CP_SET_RENDER_MODE_0_MODE__SHIFT) & CP_SET_RENDER_MODE_0_MODE__MASK; -} - -#define REG_CP_SET_RENDER_MODE_1 0x00000001 -#define CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK 0xffffffff -#define CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_1_ADDR_0_LO(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT) & CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK; -} - -#define REG_CP_SET_RENDER_MODE_2 0x00000002 -#define CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK 0xffffffff -#define CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_2_ADDR_0_HI(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT) & CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK; -} - -#define REG_CP_SET_RENDER_MODE_3 0x00000003 -#define CP_SET_RENDER_MODE_3_VSC_ENABLE 0x00000008 -#define CP_SET_RENDER_MODE_3_GMEM_ENABLE 0x00000010 - -#define REG_CP_SET_RENDER_MODE_4 0x00000004 - -#define REG_CP_SET_RENDER_MODE_5 0x00000005 -#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK 0xffffffff -#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_5_ADDR_1_LEN(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT) & CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK; -} - -#define REG_CP_SET_RENDER_MODE_6 0x00000006 -#define CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK 0xffffffff -#define CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_6_ADDR_1_LO(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT) & CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK; -} - -#define REG_CP_SET_RENDER_MODE_7 0x00000007 -#define CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK 0xffffffff -#define CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_7_ADDR_1_HI(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT) & CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_0 0x00000000 -#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_1 0x00000001 -#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_2 0x00000002 - -#define REG_CP_COMPUTE_CHECKPOINT_3 0x00000003 -#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT) & CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_4 0x00000004 - -#define REG_CP_COMPUTE_CHECKPOINT_5 0x00000005 -#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_6 0x00000006 -#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_7 0x00000007 - -#define REG_CP_PERFCOUNTER_ACTION_0 0x00000000 - -#define REG_CP_PERFCOUNTER_ACTION_1 0x00000001 -#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK 0xffffffff -#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT 0 -static inline uint32_t CP_PERFCOUNTER_ACTION_1_ADDR_0_LO(uint32_t val) -{ - return ((val) << CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT) & CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK; -} - -#define REG_CP_PERFCOUNTER_ACTION_2 0x00000002 -#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK 0xffffffff -#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT 0 -static inline uint32_t CP_PERFCOUNTER_ACTION_2_ADDR_0_HI(uint32_t val) -{ - return ((val) << CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT) & CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK; -} - -#define REG_CP_EVENT_WRITE_0 0x00000000 -#define CP_EVENT_WRITE_0_EVENT__MASK 0x000000ff -#define CP_EVENT_WRITE_0_EVENT__SHIFT 0 -static inline uint32_t CP_EVENT_WRITE_0_EVENT(enum vgt_event_type val) -{ - return ((val) << CP_EVENT_WRITE_0_EVENT__SHIFT) & CP_EVENT_WRITE_0_EVENT__MASK; -} -#define CP_EVENT_WRITE_0_TIMESTAMP 0x40000000 - -#define REG_CP_EVENT_WRITE_1 0x00000001 -#define CP_EVENT_WRITE_1_ADDR_0_LO__MASK 0xffffffff -#define CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT 0 -static inline uint32_t CP_EVENT_WRITE_1_ADDR_0_LO(uint32_t val) -{ - return ((val) << CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT) & CP_EVENT_WRITE_1_ADDR_0_LO__MASK; -} - -#define REG_CP_EVENT_WRITE_2 0x00000002 -#define CP_EVENT_WRITE_2_ADDR_0_HI__MASK 0xffffffff -#define CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT 0 -static inline uint32_t CP_EVENT_WRITE_2_ADDR_0_HI(uint32_t val) -{ - return ((val) << CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT) & CP_EVENT_WRITE_2_ADDR_0_HI__MASK; -} - -#define REG_CP_EVENT_WRITE_3 0x00000003 - -#define REG_CP_BLIT_0 0x00000000 -#define CP_BLIT_0_OP__MASK 0x0000000f -#define CP_BLIT_0_OP__SHIFT 0 -static inline uint32_t CP_BLIT_0_OP(enum cp_blit_cmd val) -{ - return ((val) << CP_BLIT_0_OP__SHIFT) & CP_BLIT_0_OP__MASK; -} - -#define REG_CP_BLIT_1 0x00000001 -#define CP_BLIT_1_SRC_X1__MASK 0x00003fff -#define CP_BLIT_1_SRC_X1__SHIFT 0 -static inline uint32_t CP_BLIT_1_SRC_X1(uint32_t val) -{ - return ((val) << CP_BLIT_1_SRC_X1__SHIFT) & CP_BLIT_1_SRC_X1__MASK; -} -#define CP_BLIT_1_SRC_Y1__MASK 0x3fff0000 -#define CP_BLIT_1_SRC_Y1__SHIFT 16 -static inline uint32_t CP_BLIT_1_SRC_Y1(uint32_t val) -{ - return ((val) << CP_BLIT_1_SRC_Y1__SHIFT) & CP_BLIT_1_SRC_Y1__MASK; -} - -#define REG_CP_BLIT_2 0x00000002 -#define CP_BLIT_2_SRC_X2__MASK 0x00003fff -#define CP_BLIT_2_SRC_X2__SHIFT 0 -static inline uint32_t CP_BLIT_2_SRC_X2(uint32_t val) -{ - return ((val) << CP_BLIT_2_SRC_X2__SHIFT) & CP_BLIT_2_SRC_X2__MASK; -} -#define CP_BLIT_2_SRC_Y2__MASK 0x3fff0000 -#define CP_BLIT_2_SRC_Y2__SHIFT 16 -static inline uint32_t CP_BLIT_2_SRC_Y2(uint32_t val) -{ - return ((val) << CP_BLIT_2_SRC_Y2__SHIFT) & CP_BLIT_2_SRC_Y2__MASK; -} - -#define REG_CP_BLIT_3 0x00000003 -#define CP_BLIT_3_DST_X1__MASK 0x00003fff -#define CP_BLIT_3_DST_X1__SHIFT 0 -static inline uint32_t CP_BLIT_3_DST_X1(uint32_t val) -{ - return ((val) << CP_BLIT_3_DST_X1__SHIFT) & CP_BLIT_3_DST_X1__MASK; -} -#define CP_BLIT_3_DST_Y1__MASK 0x3fff0000 -#define CP_BLIT_3_DST_Y1__SHIFT 16 -static inline uint32_t CP_BLIT_3_DST_Y1(uint32_t val) -{ - return ((val) << CP_BLIT_3_DST_Y1__SHIFT) & CP_BLIT_3_DST_Y1__MASK; -} - -#define REG_CP_BLIT_4 0x00000004 -#define CP_BLIT_4_DST_X2__MASK 0x00003fff -#define CP_BLIT_4_DST_X2__SHIFT 0 -static inline uint32_t CP_BLIT_4_DST_X2(uint32_t val) -{ - return ((val) << CP_BLIT_4_DST_X2__SHIFT) & CP_BLIT_4_DST_X2__MASK; -} -#define CP_BLIT_4_DST_Y2__MASK 0x3fff0000 -#define CP_BLIT_4_DST_Y2__SHIFT 16 -static inline uint32_t CP_BLIT_4_DST_Y2(uint32_t val) -{ - return ((val) << CP_BLIT_4_DST_Y2__SHIFT) & CP_BLIT_4_DST_Y2__MASK; -} - -#define REG_CP_EXEC_CS_0 0x00000000 - -#define REG_CP_EXEC_CS_1 0x00000001 -#define CP_EXEC_CS_1_NGROUPS_X__MASK 0xffffffff -#define CP_EXEC_CS_1_NGROUPS_X__SHIFT 0 -static inline uint32_t CP_EXEC_CS_1_NGROUPS_X(uint32_t val) -{ - return ((val) << CP_EXEC_CS_1_NGROUPS_X__SHIFT) & CP_EXEC_CS_1_NGROUPS_X__MASK; -} - -#define REG_CP_EXEC_CS_2 0x00000002 -#define CP_EXEC_CS_2_NGROUPS_Y__MASK 0xffffffff -#define CP_EXEC_CS_2_NGROUPS_Y__SHIFT 0 -static inline uint32_t CP_EXEC_CS_2_NGROUPS_Y(uint32_t val) -{ - return ((val) << CP_EXEC_CS_2_NGROUPS_Y__SHIFT) & CP_EXEC_CS_2_NGROUPS_Y__MASK; -} - -#define REG_CP_EXEC_CS_3 0x00000003 -#define CP_EXEC_CS_3_NGROUPS_Z__MASK 0xffffffff -#define CP_EXEC_CS_3_NGROUPS_Z__SHIFT 0 -static inline uint32_t CP_EXEC_CS_3_NGROUPS_Z(uint32_t val) -{ - return ((val) << CP_EXEC_CS_3_NGROUPS_Z__SHIFT) & CP_EXEC_CS_3_NGROUPS_Z__MASK; -} - -#define REG_A4XX_CP_EXEC_CS_INDIRECT_0 0x00000000 - - -#define REG_A4XX_CP_EXEC_CS_INDIRECT_1 0x00000001 -#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK 0xffffffff -#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT 0 -static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_1_ADDR(uint32_t val) -{ - return ((val) << A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK; -} - -#define REG_A4XX_CP_EXEC_CS_INDIRECT_2 0x00000002 -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK 0x00000ffc -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT 2 -static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX(uint32_t val) -{ - return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK; -} -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK 0x003ff000 -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT 12 -static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY(uint32_t val) -{ - return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK; -} -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK 0xffc00000 -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT 22 -static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ(uint32_t val) -{ - return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK; -} - - -#define REG_A5XX_CP_EXEC_CS_INDIRECT_1 0x00000001 -#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK 0xffffffff -#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT 0 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK; -} - -#define REG_A5XX_CP_EXEC_CS_INDIRECT_2 0x00000002 -#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK 0xffffffff -#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT 0 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK; -} - -#define REG_A5XX_CP_EXEC_CS_INDIRECT_3 0x00000003 -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK 0x00000ffc -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT 2 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK; -} -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK 0x003ff000 -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT 12 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK; -} -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK 0xffc00000 -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT 22 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK; -} - -#define REG_A2XX_CP_SET_MARKER_0 0x00000000 -#define A2XX_CP_SET_MARKER_0_MARKER__MASK 0x0000000f -#define A2XX_CP_SET_MARKER_0_MARKER__SHIFT 0 -static inline uint32_t A2XX_CP_SET_MARKER_0_MARKER(uint32_t val) -{ - return ((val) << A2XX_CP_SET_MARKER_0_MARKER__SHIFT) & A2XX_CP_SET_MARKER_0_MARKER__MASK; -} -#define A2XX_CP_SET_MARKER_0_MODE__MASK 0x0000000f -#define A2XX_CP_SET_MARKER_0_MODE__SHIFT 0 -static inline uint32_t A2XX_CP_SET_MARKER_0_MODE(enum a6xx_render_mode val) -{ - return ((val) << A2XX_CP_SET_MARKER_0_MODE__SHIFT) & A2XX_CP_SET_MARKER_0_MODE__MASK; -} -#define A2XX_CP_SET_MARKER_0_IFPC 0x00000100 - -static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG_(uint32_t i0) { return 0x00000000 + 0x3*i0; } - -static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__0(uint32_t i0) { return 0x00000000 + 0x3*i0; } -#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK 0x00000007 -#define A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT 0 -static inline uint32_t A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG(enum pseudo_reg val) -{ - return ((val) << A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__SHIFT) & A2XX_CP_SET_PSEUDO_REG__0_PSEUDO_REG__MASK; -} - -static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__1(uint32_t i0) { return 0x00000001 + 0x3*i0; } -#define A2XX_CP_SET_PSEUDO_REG__1_LO__MASK 0xffffffff -#define A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT 0 -static inline uint32_t A2XX_CP_SET_PSEUDO_REG__1_LO(uint32_t val) -{ - return ((val) << A2XX_CP_SET_PSEUDO_REG__1_LO__SHIFT) & A2XX_CP_SET_PSEUDO_REG__1_LO__MASK; -} - -static inline uint32_t REG_A2XX_CP_SET_PSEUDO_REG__2(uint32_t i0) { return 0x00000002 + 0x3*i0; } -#define A2XX_CP_SET_PSEUDO_REG__2_HI__MASK 0xffffffff -#define A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT 0 -static inline uint32_t A2XX_CP_SET_PSEUDO_REG__2_HI(uint32_t val) -{ - return ((val) << A2XX_CP_SET_PSEUDO_REG__2_HI__SHIFT) & A2XX_CP_SET_PSEUDO_REG__2_HI__MASK; -} - -#define REG_A2XX_CP_REG_TEST_0 0x00000000 -#define A2XX_CP_REG_TEST_0_REG__MASK 0x00000fff -#define A2XX_CP_REG_TEST_0_REG__SHIFT 0 -static inline uint32_t A2XX_CP_REG_TEST_0_REG(uint32_t val) -{ - return ((val) << A2XX_CP_REG_TEST_0_REG__SHIFT) & A2XX_CP_REG_TEST_0_REG__MASK; -} -#define A2XX_CP_REG_TEST_0_BIT__MASK 0x01f00000 -#define A2XX_CP_REG_TEST_0_BIT__SHIFT 20 -static inline uint32_t A2XX_CP_REG_TEST_0_BIT(uint32_t val) -{ - return ((val) << A2XX_CP_REG_TEST_0_BIT__SHIFT) & A2XX_CP_REG_TEST_0_BIT__MASK; -} -#define A2XX_CP_REG_TEST_0_UNK25 0x02000000 - - -#endif /* ADRENO_PM4_XML */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/Android.mk mesa-19.0.1/src/gallium/drivers/freedreno/Android.mk --- mesa-18.3.3/src/gallium/drivers/freedreno/Android.mk 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/Android.mk 2019-03-31 23:16:37.000000000 +0000 @@ -27,7 +27,6 @@ LOCAL_SRC_FILES := \ $(C_SOURCES) \ - $(drm_SOURCES) \ $(a2xx_SOURCES) \ $(a3xx_SOURCES) \ $(a4xx_SOURCES) \ @@ -39,7 +38,8 @@ # -Wno-packed-bitfield-compat LOCAL_C_INCLUDES := \ - $(LOCAL_PATH)/ir3 + $(LOCAL_PATH)/ir3 \ + $(MESA_TOP)/include/drm-uapi LOCAL_GENERATED_SOURCES := $(MESA_GEN_NIR_H) diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/Automake.inc mesa-19.0.1/src/gallium/drivers/freedreno/Automake.inc --- mesa-18.3.3/src/gallium/drivers/freedreno/Automake.inc 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/Automake.inc 2019-03-31 23:16:37.000000000 +0000 @@ -5,6 +5,8 @@ TARGET_LIB_DEPS += \ $(top_builddir)/src/gallium/winsys/freedreno/drm/libfreedrenodrm.la \ $(top_builddir)/src/gallium/drivers/freedreno/libfreedreno.la \ + $(top_builddir)/src/freedreno/libfreedreno_drm.la \ + $(top_builddir)/src/freedreno/libfreedreno_ir3.la \ $(FREEDRENO_LIBS) \ $(LIBDRM_LIBS) diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/disasm.h mesa-19.0.1/src/gallium/drivers/freedreno/disasm.h --- mesa-18.3.3/src/gallium/drivers/freedreno/disasm.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/disasm.h 2019-03-31 23:16:37.000000000 +0000 @@ -27,66 +27,17 @@ #include #include +#include "compiler/shader_enums.h" #include "util/u_debug.h" -enum fd_shader_debug { - FD_DBG_SHADER_VS = 0x01, - FD_DBG_SHADER_FS = 0x02, - FD_DBG_SHADER_CS = 0x04, -}; - -extern enum fd_shader_debug fd_shader_debug; - -enum shader_t { - SHADER_VERTEX, - SHADER_TCS, - SHADER_TES, - SHADER_GEOM, - SHADER_FRAGMENT, - SHADER_COMPUTE, - SHADER_MAX, -}; - -static inline bool -shader_debug_enabled(enum shader_t type) -{ - switch (type) { - case SHADER_VERTEX: return !!(fd_shader_debug & FD_DBG_SHADER_VS); - case SHADER_FRAGMENT: return !!(fd_shader_debug & FD_DBG_SHADER_FS); - case SHADER_COMPUTE: return !!(fd_shader_debug & FD_DBG_SHADER_CS); - default: - debug_assert(0); - return false; - } -} - -static inline const char * -shader_stage_name(enum shader_t type) -{ - /* NOTE these names are chosen to match the INTEL_DEBUG output - * which frameretrace parses. Hurray accidental ABI! - */ - switch (type) { - case SHADER_VERTEX: return "vertex"; - case SHADER_TCS: return "tessellation control"; - case SHADER_TES: return "tessellation evaluation"; - case SHADER_GEOM: return "geometry"; - case SHADER_FRAGMENT: return "fragment"; - case SHADER_COMPUTE: return "compute"; - default: - debug_assert(0); - return NULL; - } -} - /* bitmask of debug flags */ enum debug_t { PRINT_RAW = 0x1, /* dump raw hexdump */ PRINT_VERBOSE = 0x2, }; -int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type); -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out); +int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, gl_shader_stage type); +int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); void disasm_set_debug(enum debug_t debug); #endif /* DISASM_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_bo.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_bo.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_bo.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_bo.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,361 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "os/os_mman.h" - -#include "freedreno_drmif.h" -#include "freedreno_priv.h" - -pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER; -void bo_del(struct fd_bo *bo); - -/* set buffer name, and add to table, call w/ table_lock held: */ -static void set_name(struct fd_bo *bo, uint32_t name) -{ - bo->name = name; - /* add ourself into the handle table: */ - _mesa_hash_table_insert(bo->dev->name_table, &bo->name, bo); -} - -/* lookup a buffer, call w/ table_lock held: */ -static struct fd_bo * lookup_bo(struct hash_table *tbl, uint32_t key) -{ - struct fd_bo *bo = NULL; - struct hash_entry *entry = _mesa_hash_table_search(tbl, &key); - if (entry) { - /* found, incr refcnt and return: */ - bo = fd_bo_ref(entry->data); - - /* don't break the bucket if this bo was found in one */ - list_delinit(&bo->list); - } - return bo; -} - -/* allocate a new buffer object, call w/ table_lock held */ -static struct fd_bo * bo_from_handle(struct fd_device *dev, - uint32_t size, uint32_t handle) -{ - struct fd_bo *bo; - - bo = dev->funcs->bo_from_handle(dev, size, handle); - if (!bo) { - struct drm_gem_close req = { - .handle = handle, - }; - drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req); - return NULL; - } - bo->dev = fd_device_ref(dev); - bo->size = size; - bo->handle = handle; - p_atomic_set(&bo->refcnt, 1); - list_inithead(&bo->list); - /* add ourself into the handle table: */ - _mesa_hash_table_insert(dev->handle_table, &bo->handle, bo); - return bo; -} - -static struct fd_bo * -bo_new(struct fd_device *dev, uint32_t size, uint32_t flags, - struct fd_bo_cache *cache) -{ - struct fd_bo *bo = NULL; - uint32_t handle; - int ret; - - bo = fd_bo_cache_alloc(cache, &size, flags); - if (bo) - return bo; - - ret = dev->funcs->bo_new_handle(dev, size, flags, &handle); - if (ret) - return NULL; - - pthread_mutex_lock(&table_lock); - bo = bo_from_handle(dev, size, handle); - pthread_mutex_unlock(&table_lock); - - VG_BO_ALLOC(bo); - - return bo; -} - -struct fd_bo * -fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags) -{ - struct fd_bo *bo = bo_new(dev, size, flags, &dev->bo_cache); - if (bo) - bo->bo_reuse = BO_CACHE; - return bo; -} - -/* internal function to allocate bo's that use the ringbuffer cache - * instead of the normal bo_cache. The purpose is, because cmdstream - * bo's get vmap'd on the kernel side, and that is expensive, we want - * to re-use cmdstream bo's for cmdstream and not unrelated purposes. - */ -struct fd_bo * -fd_bo_new_ring(struct fd_device *dev, uint32_t size, uint32_t flags) -{ - struct fd_bo *bo = bo_new(dev, size, flags, &dev->ring_cache); - if (bo) - bo->bo_reuse = RING_CACHE; - return bo; -} - -struct fd_bo * -fd_bo_from_handle(struct fd_device *dev, uint32_t handle, uint32_t size) -{ - struct fd_bo *bo = NULL; - - pthread_mutex_lock(&table_lock); - - bo = lookup_bo(dev->handle_table, handle); - if (bo) - goto out_unlock; - - bo = bo_from_handle(dev, size, handle); - - VG_BO_ALLOC(bo); - -out_unlock: - pthread_mutex_unlock(&table_lock); - - return bo; -} - -struct fd_bo * -fd_bo_from_dmabuf(struct fd_device *dev, int fd) -{ - int ret, size; - uint32_t handle; - struct fd_bo *bo; - - pthread_mutex_lock(&table_lock); - ret = drmPrimeFDToHandle(dev->fd, fd, &handle); - if (ret) { - pthread_mutex_unlock(&table_lock); - return NULL; - } - - bo = lookup_bo(dev->handle_table, handle); - if (bo) - goto out_unlock; - - /* lseek() to get bo size */ - size = lseek(fd, 0, SEEK_END); - lseek(fd, 0, SEEK_CUR); - - bo = bo_from_handle(dev, size, handle); - - VG_BO_ALLOC(bo); - -out_unlock: - pthread_mutex_unlock(&table_lock); - - return bo; -} - -struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name) -{ - struct drm_gem_open req = { - .name = name, - }; - struct fd_bo *bo; - - pthread_mutex_lock(&table_lock); - - /* check name table first, to see if bo is already open: */ - bo = lookup_bo(dev->name_table, name); - if (bo) - goto out_unlock; - - if (drmIoctl(dev->fd, DRM_IOCTL_GEM_OPEN, &req)) { - ERROR_MSG("gem-open failed: %s", strerror(errno)); - goto out_unlock; - } - - bo = lookup_bo(dev->handle_table, req.handle); - if (bo) - goto out_unlock; - - bo = bo_from_handle(dev, req.size, req.handle); - if (bo) { - set_name(bo, name); - VG_BO_ALLOC(bo); - } - -out_unlock: - pthread_mutex_unlock(&table_lock); - - return bo; -} - -uint64_t fd_bo_get_iova(struct fd_bo *bo) -{ - if (!bo->iova) - bo->iova = bo->funcs->iova(bo); - return bo->iova; -} - -void fd_bo_put_iova(struct fd_bo *bo) -{ - /* currently a no-op */ -} - -struct fd_bo * fd_bo_ref(struct fd_bo *bo) -{ - p_atomic_inc(&bo->refcnt); - return bo; -} - -void fd_bo_del(struct fd_bo *bo) -{ - struct fd_device *dev = bo->dev; - - if (!atomic_dec_and_test(&bo->refcnt)) - return; - - pthread_mutex_lock(&table_lock); - - if ((bo->bo_reuse == BO_CACHE) && (fd_bo_cache_free(&dev->bo_cache, bo) == 0)) - goto out; - if ((bo->bo_reuse == RING_CACHE) && (fd_bo_cache_free(&dev->ring_cache, bo) == 0)) - goto out; - - bo_del(bo); - fd_device_del_locked(dev); -out: - pthread_mutex_unlock(&table_lock); -} - -/* Called under table_lock */ -void bo_del(struct fd_bo *bo) -{ - VG_BO_FREE(bo); - - if (bo->map) - os_munmap(bo->map, bo->size); - - /* TODO probably bo's in bucket list get removed from - * handle table?? - */ - - if (bo->handle) { - struct drm_gem_close req = { - .handle = bo->handle, - }; - _mesa_hash_table_remove_key(bo->dev->handle_table, &bo->handle); - if (bo->name) - _mesa_hash_table_remove_key(bo->dev->name_table, &bo->name); - drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &req); - } - - bo->funcs->destroy(bo); -} - -int fd_bo_get_name(struct fd_bo *bo, uint32_t *name) -{ - if (!bo->name) { - struct drm_gem_flink req = { - .handle = bo->handle, - }; - int ret; - - ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_FLINK, &req); - if (ret) { - return ret; - } - - pthread_mutex_lock(&table_lock); - set_name(bo, req.name); - pthread_mutex_unlock(&table_lock); - bo->bo_reuse = NO_CACHE; - } - - *name = bo->name; - - return 0; -} - -uint32_t fd_bo_handle(struct fd_bo *bo) -{ - return bo->handle; -} - -int fd_bo_dmabuf(struct fd_bo *bo) -{ - int ret, prime_fd; - - ret = drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC, - &prime_fd); - if (ret) { - ERROR_MSG("failed to get dmabuf fd: %d", ret); - return ret; - } - - bo->bo_reuse = NO_CACHE; - - return prime_fd; -} - -uint32_t fd_bo_size(struct fd_bo *bo) -{ - return bo->size; -} - -void * fd_bo_map(struct fd_bo *bo) -{ - if (!bo->map) { - uint64_t offset; - int ret; - - ret = bo->funcs->offset(bo, &offset); - if (ret) { - return NULL; - } - - bo->map = os_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, - bo->dev->fd, offset); - if (bo->map == MAP_FAILED) { - ERROR_MSG("mmap failed: %s", strerror(errno)); - bo->map = NULL; - } - } - return bo->map; -} - -/* a bit odd to take the pipe as an arg, but it's a, umm, quirk of kgsl.. */ -int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op) -{ - return bo->funcs->cpu_prep(bo, pipe, op); -} - -void fd_bo_cpu_fini(struct fd_bo *bo) -{ - bo->funcs->cpu_fini(bo); -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_bo_cache.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_bo_cache.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_bo_cache.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_bo_cache.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,218 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "freedreno_drmif.h" -#include "freedreno_priv.h" - -void bo_del(struct fd_bo *bo); -extern pthread_mutex_t table_lock; - -static void -add_bucket(struct fd_bo_cache *cache, int size) -{ - unsigned int i = cache->num_buckets; - - assert(i < ARRAY_SIZE(cache->cache_bucket)); - - list_inithead(&cache->cache_bucket[i].list); - cache->cache_bucket[i].size = size; - cache->num_buckets++; -} - -/** - * @coarse: if true, only power-of-two bucket sizes, otherwise - * fill in for a bit smoother size curve.. - */ -void -fd_bo_cache_init(struct fd_bo_cache *cache, int coarse) -{ - unsigned long size, cache_max_size = 64 * 1024 * 1024; - - /* OK, so power of two buckets was too wasteful of memory. - * Give 3 other sizes between each power of two, to hopefully - * cover things accurately enough. (The alternative is - * probably to just go for exact matching of sizes, and assume - * that for things like composited window resize the tiled - * width/height alignment and rounding of sizes to pages will - * get us useful cache hit rates anyway) - */ - add_bucket(cache, 4096); - add_bucket(cache, 4096 * 2); - if (!coarse) - add_bucket(cache, 4096 * 3); - - /* Initialize the linked lists for BO reuse cache. */ - for (size = 4 * 4096; size <= cache_max_size; size *= 2) { - add_bucket(cache, size); - if (!coarse) { - add_bucket(cache, size + size * 1 / 4); - add_bucket(cache, size + size * 2 / 4); - add_bucket(cache, size + size * 3 / 4); - } - } -} - -/* Frees older cached buffers. Called under table_lock */ -void -fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time) -{ - int i; - - if (cache->time == time) - return; - - for (i = 0; i < cache->num_buckets; i++) { - struct fd_bo_bucket *bucket = &cache->cache_bucket[i]; - struct fd_bo *bo; - - while (!LIST_IS_EMPTY(&bucket->list)) { - bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list); - - /* keep things in cache for at least 1 second: */ - if (time && ((time - bo->free_time) <= 1)) - break; - - VG_BO_OBTAIN(bo); - list_del(&bo->list); - bo_del(bo); - } - } - - cache->time = time; -} - -static struct fd_bo_bucket * get_bucket(struct fd_bo_cache *cache, uint32_t size) -{ - int i; - - /* hmm, this is what intel does, but I suppose we could calculate our - * way to the correct bucket size rather than looping.. - */ - for (i = 0; i < cache->num_buckets; i++) { - struct fd_bo_bucket *bucket = &cache->cache_bucket[i]; - if (bucket->size >= size) { - return bucket; - } - } - - return NULL; -} - -static int is_idle(struct fd_bo *bo) -{ - return fd_bo_cpu_prep(bo, NULL, - DRM_FREEDRENO_PREP_READ | - DRM_FREEDRENO_PREP_WRITE | - DRM_FREEDRENO_PREP_NOSYNC) == 0; -} - -static struct fd_bo *find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags) -{ - struct fd_bo *bo = NULL; - - /* TODO .. if we had an ALLOC_FOR_RENDER flag like intel, we could - * skip the busy check.. if it is only going to be a render target - * then we probably don't need to stall.. - * - * NOTE that intel takes ALLOC_FOR_RENDER bo's from the list tail - * (MRU, since likely to be in GPU cache), rather than head (LRU).. - */ - pthread_mutex_lock(&table_lock); - if (!LIST_IS_EMPTY(&bucket->list)) { - bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list); - /* TODO check for compatible flags? */ - if (is_idle(bo)) { - list_del(&bo->list); - } else { - bo = NULL; - } - } - pthread_mutex_unlock(&table_lock); - - return bo; -} - -/* NOTE: size is potentially rounded up to bucket size: */ -struct fd_bo * -fd_bo_cache_alloc(struct fd_bo_cache *cache, uint32_t *size, uint32_t flags) -{ - struct fd_bo *bo = NULL; - struct fd_bo_bucket *bucket; - - *size = align(*size, 4096); - bucket = get_bucket(cache, *size); - - /* see if we can be green and recycle: */ -retry: - if (bucket) { - *size = bucket->size; - bo = find_in_bucket(bucket, flags); - if (bo) { - VG_BO_OBTAIN(bo); - if (bo->funcs->madvise(bo, TRUE) <= 0) { - /* we've lost the backing pages, delete and try again: */ - pthread_mutex_lock(&table_lock); - bo_del(bo); - pthread_mutex_unlock(&table_lock); - goto retry; - } - p_atomic_set(&bo->refcnt, 1); - fd_device_ref(bo->dev); - return bo; - } - } - - return NULL; -} - -int -fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo) -{ - struct fd_bo_bucket *bucket = get_bucket(cache, bo->size); - - /* see if we can be green and recycle: */ - if (bucket) { - struct timespec time; - - bo->funcs->madvise(bo, FALSE); - - clock_gettime(CLOCK_MONOTONIC, &time); - - bo->free_time = time.tv_sec; - VG_BO_RELEASE(bo); - list_addtail(&bo->list, &bucket->list); - fd_bo_cache_cleanup(cache, time.tv_sec); - - /* bo's in the bucket cache don't have a ref and - * don't hold a ref to the dev: - */ - fd_device_del_locked(bo->dev); - - return 0; - } - - return -1; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_device.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_device.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_device.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_device.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,156 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include -#include -#include - -#include "freedreno_drmif.h" -#include "freedreno_priv.h" - -static pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER; - -static uint32_t -u32_hash(const void *key) -{ - return _mesa_hash_data(key, sizeof(uint32_t)); -} - -static bool -u32_equals(const void *key1, const void *key2) -{ - return *(const uint32_t *)key1 == *(const uint32_t *)key2; -} - - -struct fd_device * kgsl_device_new(int fd); -struct fd_device * msm_device_new(int fd); - -struct fd_device * fd_device_new(int fd) -{ - struct fd_device *dev; - drmVersionPtr version; - - /* figure out if we are kgsl or msm drm driver: */ - version = drmGetVersion(fd); - if (!version) { - ERROR_MSG("cannot get version: %s", strerror(errno)); - return NULL; - } - - if (!strcmp(version->name, "msm")) { - DEBUG_MSG("msm DRM device"); - if (version->version_major != 1) { - ERROR_MSG("unsupported version: %u.%u.%u", version->version_major, - version->version_minor, version->version_patchlevel); - dev = NULL; - goto out; - } - - dev = msm_device_new(fd); - dev->version = version->version_minor; -#if HAVE_FREEDRENO_KGSL - } else if (!strcmp(version->name, "kgsl")) { - DEBUG_MSG("kgsl DRM device"); - dev = kgsl_device_new(fd); -#endif - } else { - ERROR_MSG("unknown device: %s", version->name); - dev = NULL; - } - -out: - drmFreeVersion(version); - - if (!dev) - return NULL; - - p_atomic_set(&dev->refcnt, 1); - dev->fd = fd; - dev->handle_table = _mesa_hash_table_create(NULL, u32_hash, u32_equals); - dev->name_table = _mesa_hash_table_create(NULL, u32_hash, u32_equals); - fd_bo_cache_init(&dev->bo_cache, FALSE); - fd_bo_cache_init(&dev->ring_cache, TRUE); - - return dev; -} - -/* like fd_device_new() but creates it's own private dup() of the fd - * which is close()d when the device is finalized. - */ -struct fd_device * fd_device_new_dup(int fd) -{ - int dup_fd = dup(fd); - struct fd_device *dev = fd_device_new(dup_fd); - if (dev) - dev->closefd = 1; - else - close(dup_fd); - return dev; -} - -struct fd_device * fd_device_ref(struct fd_device *dev) -{ - p_atomic_inc(&dev->refcnt); - return dev; -} - -static void fd_device_del_impl(struct fd_device *dev) -{ - int close_fd = dev->closefd ? dev->fd : -1; - fd_bo_cache_cleanup(&dev->bo_cache, 0); - _mesa_hash_table_destroy(dev->handle_table, NULL); - _mesa_hash_table_destroy(dev->name_table, NULL); - dev->funcs->destroy(dev); - if (close_fd >= 0) - close(close_fd); -} - -void fd_device_del_locked(struct fd_device *dev) -{ - if (!atomic_dec_and_test(&dev->refcnt)) - return; - fd_device_del_impl(dev); -} - -void fd_device_del(struct fd_device *dev) -{ - if (!atomic_dec_and_test(&dev->refcnt)) - return; - pthread_mutex_lock(&table_lock); - fd_device_del_impl(dev); - pthread_mutex_unlock(&table_lock); -} - -int fd_device_fd(struct fd_device *dev) -{ - return dev->fd; -} - -enum fd_version fd_device_version(struct fd_device *dev) -{ - return dev->version; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_drmif.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_drmif.h --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_drmif.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_drmif.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,126 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#ifndef FREEDRENO_DRMIF_H_ -#define FREEDRENO_DRMIF_H_ - -#include - -struct fd_bo; -struct fd_pipe; -struct fd_device; - -enum fd_pipe_id { - FD_PIPE_3D = 1, - FD_PIPE_2D = 2, - /* some devices have two 2d blocks.. not really sure how to - * use that yet, so just ignoring the 2nd 2d pipe for now - */ - FD_PIPE_MAX -}; - -enum fd_param_id { - FD_DEVICE_ID, - FD_GMEM_SIZE, - FD_GPU_ID, - FD_CHIP_ID, - FD_MAX_FREQ, - FD_TIMESTAMP, - FD_NR_RINGS, /* # of rings == # of distinct priority levels */ -}; - -/* bo flags: */ -#define DRM_FREEDRENO_GEM_TYPE_SMI 0x00000001 -#define DRM_FREEDRENO_GEM_TYPE_KMEM 0x00000002 -#define DRM_FREEDRENO_GEM_TYPE_MEM_MASK 0x0000000f -#define DRM_FREEDRENO_GEM_CACHE_NONE 0x00000000 -#define DRM_FREEDRENO_GEM_CACHE_WCOMBINE 0x00100000 -#define DRM_FREEDRENO_GEM_CACHE_WTHROUGH 0x00200000 -#define DRM_FREEDRENO_GEM_CACHE_WBACK 0x00400000 -#define DRM_FREEDRENO_GEM_CACHE_WBACKWA 0x00800000 -#define DRM_FREEDRENO_GEM_CACHE_MASK 0x00f00000 -#define DRM_FREEDRENO_GEM_GPUREADONLY 0x01000000 - -/* bo access flags: (keep aligned to MSM_PREP_x) */ -#define DRM_FREEDRENO_PREP_READ 0x01 -#define DRM_FREEDRENO_PREP_WRITE 0x02 -#define DRM_FREEDRENO_PREP_NOSYNC 0x04 - -/* device functions: - */ - -struct fd_device * fd_device_new(int fd); -struct fd_device * fd_device_new_dup(int fd); -struct fd_device * fd_device_ref(struct fd_device *dev); -void fd_device_del(struct fd_device *dev); -int fd_device_fd(struct fd_device *dev); - -enum fd_version { - FD_VERSION_MADVISE = 1, /* kernel supports madvise */ - FD_VERSION_UNLIMITED_CMDS = 1, /* submits w/ >4 cmd buffers (growable ringbuffer) */ - FD_VERSION_FENCE_FD = 2, /* submit command supports in/out fences */ - FD_VERSION_SUBMIT_QUEUES = 3, /* submit queues and multiple priority levels */ - FD_VERSION_BO_IOVA = 3, /* supports fd_bo_get/put_iova() */ -}; -enum fd_version fd_device_version(struct fd_device *dev); - -/* pipe functions: - */ - -struct fd_pipe * fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id); -struct fd_pipe * fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio); -struct fd_pipe * fd_pipe_ref(struct fd_pipe *pipe); -void fd_pipe_del(struct fd_pipe *pipe); -int fd_pipe_get_param(struct fd_pipe *pipe, enum fd_param_id param, - uint64_t *value); -int fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp); -/* timeout in nanosec */ -int fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp, - uint64_t timeout); - - -/* buffer-object functions: - */ - -struct fd_bo * fd_bo_new(struct fd_device *dev, - uint32_t size, uint32_t flags); -struct fd_bo *fd_bo_from_handle(struct fd_device *dev, - uint32_t handle, uint32_t size); -struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name); -struct fd_bo * fd_bo_from_dmabuf(struct fd_device *dev, int fd); -uint64_t fd_bo_get_iova(struct fd_bo *bo); -void fd_bo_put_iova(struct fd_bo *bo); -struct fd_bo * fd_bo_ref(struct fd_bo *bo); -void fd_bo_del(struct fd_bo *bo); -int fd_bo_get_name(struct fd_bo *bo, uint32_t *name); -uint32_t fd_bo_handle(struct fd_bo *bo); -int fd_bo_dmabuf(struct fd_bo *bo); -uint32_t fd_bo_size(struct fd_bo *bo); -void * fd_bo_map(struct fd_bo *bo); -int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op); -void fd_bo_cpu_fini(struct fd_bo *bo); - -#endif /* FREEDRENO_DRMIF_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_pipe.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_pipe.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_pipe.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_pipe.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "freedreno_drmif.h" -#include "freedreno_priv.h" - -/** - * priority of zero is highest priority, and higher numeric values are - * lower priorities - */ -struct fd_pipe * -fd_pipe_new2(struct fd_device *dev, enum fd_pipe_id id, uint32_t prio) -{ - struct fd_pipe *pipe; - uint64_t val; - - if (id > FD_PIPE_MAX) { - ERROR_MSG("invalid pipe id: %d", id); - return NULL; - } - - if ((prio != 1) && (fd_device_version(dev) < FD_VERSION_SUBMIT_QUEUES)) { - ERROR_MSG("invalid priority!"); - return NULL; - } - - pipe = dev->funcs->pipe_new(dev, id, prio); - if (!pipe) { - ERROR_MSG("allocation failed"); - return NULL; - } - - pipe->dev = dev; - pipe->id = id; - p_atomic_set(&pipe->refcnt, 1); - - fd_pipe_get_param(pipe, FD_GPU_ID, &val); - pipe->gpu_id = val; - - return pipe; -} - -struct fd_pipe * -fd_pipe_new(struct fd_device *dev, enum fd_pipe_id id) -{ - return fd_pipe_new2(dev, id, 1); -} - -struct fd_pipe * fd_pipe_ref(struct fd_pipe *pipe) -{ - p_atomic_inc(&pipe->refcnt); - return pipe; -} - -void fd_pipe_del(struct fd_pipe *pipe) -{ - if (!atomic_dec_and_test(&pipe->refcnt)) - return; - pipe->funcs->destroy(pipe); -} - -int fd_pipe_get_param(struct fd_pipe *pipe, - enum fd_param_id param, uint64_t *value) -{ - return pipe->funcs->get_param(pipe, param, value); -} - -int fd_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp) -{ - return fd_pipe_wait_timeout(pipe, timestamp, ~0); -} - -int fd_pipe_wait_timeout(struct fd_pipe *pipe, uint32_t timestamp, - uint64_t timeout) -{ - return pipe->funcs->wait(pipe, timestamp, timeout); -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_priv.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_priv.h --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_priv.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_priv.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,258 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#ifndef FREEDRENO_PRIV_H_ -#define FREEDRENO_PRIV_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "util/hash_table.h" -#include "util/list.h" -#include "util/u_debug.h" -#include "util/u_atomic.h" -#include "util/u_math.h" -#include "util/u_debug.h" - -#include "freedreno_drmif.h" -#include "freedreno_ringbuffer.h" - -#define atomic_dec_and_test(x) (__sync_add_and_fetch (x, -1) == 0) - -struct fd_device_funcs { - int (*bo_new_handle)(struct fd_device *dev, uint32_t size, - uint32_t flags, uint32_t *handle); - struct fd_bo * (*bo_from_handle)(struct fd_device *dev, - uint32_t size, uint32_t handle); - struct fd_pipe * (*pipe_new)(struct fd_device *dev, enum fd_pipe_id id, - unsigned prio); - void (*destroy)(struct fd_device *dev); -}; - -struct fd_bo_bucket { - uint32_t size; - struct list_head list; -}; - -struct fd_bo_cache { - struct fd_bo_bucket cache_bucket[14 * 4]; - int num_buckets; - time_t time; -}; - -struct fd_device { - int fd; - enum fd_version version; - int32_t refcnt; - - /* tables to keep track of bo's, to avoid "evil-twin" fd_bo objects: - * - * handle_table: maps handle to fd_bo - * name_table: maps flink name to fd_bo - * - * We end up needing two tables, because DRM_IOCTL_GEM_OPEN always - * returns a new handle. So we need to figure out if the bo is already - * open in the process first, before calling gem-open. - */ - struct hash_table *handle_table, *name_table; - - const struct fd_device_funcs *funcs; - - struct fd_bo_cache bo_cache; - struct fd_bo_cache ring_cache; - - int closefd; /* call close(fd) upon destruction */ - - /* just for valgrind: */ - int bo_size; -}; - -void fd_bo_cache_init(struct fd_bo_cache *cache, int coarse); -void fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time); -struct fd_bo * fd_bo_cache_alloc(struct fd_bo_cache *cache, - uint32_t *size, uint32_t flags); -int fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo); - -/* for where @table_lock is already held: */ -void fd_device_del_locked(struct fd_device *dev); - -struct fd_pipe_funcs { - struct fd_ringbuffer * (*ringbuffer_new_object)(struct fd_pipe *pipe, uint32_t size); - struct fd_submit * (*submit_new)(struct fd_pipe *pipe); - int (*get_param)(struct fd_pipe *pipe, enum fd_param_id param, uint64_t *value); - int (*wait)(struct fd_pipe *pipe, uint32_t timestamp, uint64_t timeout); - void (*destroy)(struct fd_pipe *pipe); -}; - -struct fd_pipe { - struct fd_device *dev; - enum fd_pipe_id id; - uint32_t gpu_id; - int32_t refcnt; - const struct fd_pipe_funcs *funcs; -}; - -struct fd_submit_funcs { - struct fd_ringbuffer * (*new_ringbuffer)(struct fd_submit *submit, - uint32_t size, enum fd_ringbuffer_flags flags); - int (*flush)(struct fd_submit *submit, int in_fence_fd, - int *out_fence_fd, uint32_t *out_fence); - void (*destroy)(struct fd_submit *submit); -}; - -struct fd_submit { - struct fd_pipe *pipe; - const struct fd_submit_funcs *funcs; -}; - -struct fd_ringbuffer_funcs { - void (*grow)(struct fd_ringbuffer *ring, uint32_t size); - void (*emit_reloc)(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc); - uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx); - uint32_t (*cmd_count)(struct fd_ringbuffer *ring); - void (*destroy)(struct fd_ringbuffer *ring); -}; - -struct fd_bo_funcs { - int (*offset)(struct fd_bo *bo, uint64_t *offset); - int (*cpu_prep)(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op); - void (*cpu_fini)(struct fd_bo *bo); - int (*madvise)(struct fd_bo *bo, int willneed); - uint64_t (*iova)(struct fd_bo *bo); - void (*destroy)(struct fd_bo *bo); -}; - -struct fd_bo { - struct fd_device *dev; - uint32_t size; - uint32_t handle; - uint32_t name; - int32_t refcnt; - uint64_t iova; - void *map; - const struct fd_bo_funcs *funcs; - - enum { - NO_CACHE = 0, - BO_CACHE = 1, - RING_CACHE = 2, - } bo_reuse; - - struct list_head list; /* bucket-list entry */ - time_t free_time; /* time when added to bucket-list */ -}; - -struct fd_bo *fd_bo_new_ring(struct fd_device *dev, - uint32_t size, uint32_t flags); - -#define enable_debug 0 /* TODO make dynamic */ - -#define INFO_MSG(fmt, ...) \ - do { debug_printf("[I] "fmt " (%s:%d)\n", \ - ##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0) -#define DEBUG_MSG(fmt, ...) \ - do if (enable_debug) { debug_printf("[D] "fmt " (%s:%d)\n", \ - ##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0) -#define WARN_MSG(fmt, ...) \ - do { debug_printf("[W] "fmt " (%s:%d)\n", \ - ##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0) -#define ERROR_MSG(fmt, ...) \ - do { debug_printf("[E] " fmt " (%s:%d)\n", \ - ##__VA_ARGS__, __FUNCTION__, __LINE__); } while (0) - -#define U642VOID(x) ((void *)(unsigned long)(x)) -#define VOID2U64(x) ((uint64_t)(unsigned long)(x)) - -#if HAVE_VALGRIND -# include - -/* - * For tracking the backing memory (if valgrind enabled, we force a mmap - * for the purposes of tracking) - */ -static inline void VG_BO_ALLOC(struct fd_bo *bo) -{ - if (bo && RUNNING_ON_VALGRIND) { - VALGRIND_MALLOCLIKE_BLOCK(fd_bo_map(bo), bo->size, 0, 1); - } -} - -static inline void VG_BO_FREE(struct fd_bo *bo) -{ - VALGRIND_FREELIKE_BLOCK(bo->map, 0); -} - -/* - * For tracking bo structs that are in the buffer-cache, so that valgrind - * doesn't attribute ownership to the first one to allocate the recycled - * bo. - * - * Note that the list_head in fd_bo is used to track the buffers in cache - * so disable error reporting on the range while they are in cache so - * valgrind doesn't squawk about list traversal. - * - */ -static inline void VG_BO_RELEASE(struct fd_bo *bo) -{ - if (RUNNING_ON_VALGRIND) { - VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size); - VALGRIND_MAKE_MEM_NOACCESS(bo, bo->dev->bo_size); - VALGRIND_FREELIKE_BLOCK(bo->map, 0); - } -} -static inline void VG_BO_OBTAIN(struct fd_bo *bo) -{ - if (RUNNING_ON_VALGRIND) { - VALGRIND_MAKE_MEM_DEFINED(bo, bo->dev->bo_size); - VALGRIND_ENABLE_ADDR_ERROR_REPORTING_IN_RANGE(bo, bo->dev->bo_size); - VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1); - } -} -#else -static inline void VG_BO_ALLOC(struct fd_bo *bo) {} -static inline void VG_BO_FREE(struct fd_bo *bo) {} -static inline void VG_BO_RELEASE(struct fd_bo *bo) {} -static inline void VG_BO_OBTAIN(struct fd_bo *bo) {} -#endif - -#define FD_DEFINE_CAST(parent, child) \ -static inline struct child * to_ ## child (struct parent *x) \ -{ return (struct child *)x; } - - -#endif /* FREEDRENO_PRIV_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,114 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include - -#include "freedreno_drmif.h" -#include "freedreno_ringbuffer.h" -#include "freedreno_priv.h" - -struct fd_submit * -fd_submit_new(struct fd_pipe *pipe) -{ - return pipe->funcs->submit_new(pipe); -} - -void -fd_submit_del(struct fd_submit *submit) -{ - return submit->funcs->destroy(submit); -} - -int -fd_submit_flush(struct fd_submit *submit, int in_fence_fd, int *out_fence_fd, - uint32_t *out_fence) -{ - return submit->funcs->flush(submit, in_fence_fd, out_fence_fd, out_fence); -} - -struct fd_ringbuffer * -fd_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size, - enum fd_ringbuffer_flags flags) -{ - debug_assert(!(flags & _FD_RINGBUFFER_OBJECT)); - if (flags & FD_RINGBUFFER_STREAMING) { - debug_assert(!(flags & FD_RINGBUFFER_GROWABLE)); - debug_assert(!(flags & FD_RINGBUFFER_PRIMARY)); - } - return submit->funcs->new_ringbuffer(submit, size, flags); -} - -struct fd_ringbuffer * -fd_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size) -{ - return pipe->funcs->ringbuffer_new_object(pipe, size); -} - -void fd_ringbuffer_del(struct fd_ringbuffer *ring) -{ - if (!atomic_dec_and_test(&ring->refcnt)) - return; - - ring->funcs->destroy(ring); -} - -struct fd_ringbuffer * -fd_ringbuffer_ref(struct fd_ringbuffer *ring) -{ - p_atomic_inc(&ring->refcnt); - return ring; -} - -void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords) -{ - assert(ring->funcs->grow); /* unsupported on kgsl */ - - /* there is an upper bound on IB size, which appears to be 0x100000 */ - if (ring->size < 0x100000) - ring->size *= 2; - - ring->funcs->grow(ring, ring->size); -} - -void fd_ringbuffer_reloc(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) -{ - ring->funcs->emit_reloc(ring, reloc); -} - -uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring) -{ - if (!ring->funcs->cmd_count) - return 1; - return ring->funcs->cmd_count(ring); -} - -uint32_t -fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx) -{ - return ring->funcs->emit_reloc_ring(ring, target, cmd_idx); -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.h --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/freedreno_ringbuffer.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,159 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#ifndef FREEDRENO_RINGBUFFER_H_ -#define FREEDRENO_RINGBUFFER_H_ - -#include "util/u_debug.h" - -#include "freedreno_drmif.h" - -struct fd_submit; -struct fd_ringbuffer; - -enum fd_ringbuffer_flags { - - /* Primary ringbuffer for a submit, ie. an IB1 level rb - * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH - * packets. - */ - FD_RINGBUFFER_PRIMARY = 0x1, - - /* Hint that the stateobj will be used for streaming state - * that is used once or a few times and then discarded. - * - * For sub-allocation, non streaming stateobj's should be - * sub-allocated from a page size buffer, so one long lived - * state obj doesn't prevent other pages from being freed. - * (Ie. it would be no worse than allocating a page sized - * bo for each small non-streaming stateobj). - * - * But streaming stateobj's could be sub-allocated from a - * larger buffer to reduce the alloc/del overhead. - */ - FD_RINGBUFFER_STREAMING = 0x2, - - /* Indicates that "growable" cmdstream can be used, - * consisting of multiple physical cmdstream buffers - */ - FD_RINGBUFFER_GROWABLE = 0x4, - - /* Internal use only: */ - _FD_RINGBUFFER_OBJECT = 0x8, -}; - -/* A submit object manages/tracks all the state buildup for a "submit" - * ioctl to the kernel. Additionally, with the exception of long-lived - * non-STREAMING stateobj rb's, rb's are allocated from the submit. - */ -struct fd_submit * fd_submit_new(struct fd_pipe *pipe); - -/* NOTE: all ringbuffer's create from the submit should be unref'd - * before destroying the submit. - */ -void fd_submit_del(struct fd_submit *submit); - -/* Allocate a new rb from the submit. */ -struct fd_ringbuffer * fd_submit_new_ringbuffer(struct fd_submit *submit, - uint32_t size, enum fd_ringbuffer_flags flags); - -/* in_fence_fd: -1 for no in-fence, else fence fd - * out_fence_fd: NULL for no output-fence requested, else ptr to return out-fence - */ -int fd_submit_flush(struct fd_submit *submit, - int in_fence_fd, int *out_fence_fd, - uint32_t *out_fence); - -struct fd_ringbuffer_funcs; - -/* the ringbuffer object is not opaque so that OUT_RING() type stuff - * can be inlined. Note that users should not make assumptions about - * the size of this struct. - */ -struct fd_ringbuffer { - uint32_t *cur, *end, *start; - const struct fd_ringbuffer_funcs *funcs; - -// size or end coudl probably go away - int size; - int32_t refcnt; - enum fd_ringbuffer_flags flags; -}; - -/* Allocate a new long-lived state object, not associated with - * a submit: - */ -struct fd_ringbuffer * fd_ringbuffer_new_object(struct fd_pipe *pipe, - uint32_t size); - -struct fd_ringbuffer *fd_ringbuffer_ref(struct fd_ringbuffer *ring); -void fd_ringbuffer_del(struct fd_ringbuffer *ring); - -void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords); - -static inline void fd_ringbuffer_emit(struct fd_ringbuffer *ring, - uint32_t data) -{ - (*ring->cur++) = data; -} - -struct fd_reloc { - struct fd_bo *bo; -#define FD_RELOC_READ 0x0001 -#define FD_RELOC_WRITE 0x0002 - uint32_t flags; - uint32_t offset; - uint32_t or; - int32_t shift; - uint32_t orhi; /* used for a5xx+ */ -}; - -/* NOTE: relocs are 2 dwords on a5xx+ */ - -void fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc); -uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring); -uint32_t fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx); - -static inline uint32_t -offset_bytes(void *end, void *start) -{ - return ((char *)end) - ((char *)start); -} - -static inline uint32_t -fd_ringbuffer_size(struct fd_ringbuffer *ring) -{ - /* only really needed for stateobj ringbuffers, and won't really - * do what you expect for growable rb's.. so lets just restrict - * this to stateobj's for now: - */ - debug_assert(!(ring->flags & FD_RINGBUFFER_GROWABLE)); - return offset_bytes(ring->cur, ring->start); -} - - -#endif /* FREEDRENO_RINGBUFFER_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_bo.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_bo.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_bo.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_bo.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,170 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "msm_priv.h" - -static int bo_allocate(struct msm_bo *msm_bo) -{ - struct fd_bo *bo = &msm_bo->base; - if (!msm_bo->offset) { - struct drm_msm_gem_info req = { - .handle = bo->handle, - }; - int ret; - - /* if the buffer is already backed by pages then this - * doesn't actually do anything (other than giving us - * the offset) - */ - ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, - &req, sizeof(req)); - if (ret) { - ERROR_MSG("alloc failed: %s", strerror(errno)); - return ret; - } - - msm_bo->offset = req.offset; - } - - return 0; -} - -static int msm_bo_offset(struct fd_bo *bo, uint64_t *offset) -{ - struct msm_bo *msm_bo = to_msm_bo(bo); - int ret = bo_allocate(msm_bo); - if (ret) - return ret; - *offset = msm_bo->offset; - return 0; -} - -static int msm_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op) -{ - struct drm_msm_gem_cpu_prep req = { - .handle = bo->handle, - .op = op, - }; - - get_abs_timeout(&req.timeout, 5000000000); - - return drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_PREP, &req, sizeof(req)); -} - -static void msm_bo_cpu_fini(struct fd_bo *bo) -{ - struct drm_msm_gem_cpu_fini req = { - .handle = bo->handle, - }; - - drmCommandWrite(bo->dev->fd, DRM_MSM_GEM_CPU_FINI, &req, sizeof(req)); -} - -static int msm_bo_madvise(struct fd_bo *bo, int willneed) -{ - struct drm_msm_gem_madvise req = { - .handle = bo->handle, - .madv = willneed ? MSM_MADV_WILLNEED : MSM_MADV_DONTNEED, - }; - int ret; - - /* older kernels do not support this: */ - if (bo->dev->version < FD_VERSION_MADVISE) - return willneed; - - ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_MADVISE, &req, sizeof(req)); - if (ret) - return ret; - - return req.retained; -} - -static uint64_t msm_bo_iova(struct fd_bo *bo) -{ - struct drm_msm_gem_info req = { - .handle = bo->handle, - .flags = MSM_INFO_IOVA, - }; - int ret; - - ret = drmCommandWriteRead(bo->dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req)); - debug_assert(ret == 0); - - return req.offset; -} - -static void msm_bo_destroy(struct fd_bo *bo) -{ - struct msm_bo *msm_bo = to_msm_bo(bo); - free(msm_bo); - -} - -static const struct fd_bo_funcs funcs = { - .offset = msm_bo_offset, - .cpu_prep = msm_bo_cpu_prep, - .cpu_fini = msm_bo_cpu_fini, - .madvise = msm_bo_madvise, - .iova = msm_bo_iova, - .destroy = msm_bo_destroy, -}; - -/* allocate a buffer handle: */ -int msm_bo_new_handle(struct fd_device *dev, - uint32_t size, uint32_t flags, uint32_t *handle) -{ - struct drm_msm_gem_new req = { - .size = size, - .flags = MSM_BO_WC, // TODO figure out proper flags.. - }; - int ret; - - ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, - &req, sizeof(req)); - if (ret) - return ret; - - *handle = req.handle; - - return 0; -} - -/* allocate a new buffer object */ -struct fd_bo * msm_bo_from_handle(struct fd_device *dev, - uint32_t size, uint32_t handle) -{ - struct msm_bo *msm_bo; - struct fd_bo *bo; - - msm_bo = calloc(1, sizeof(*msm_bo)); - if (!msm_bo) - return NULL; - - bo = &msm_bo->base; - bo->funcs = &funcs; - - return bo; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_device.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_device.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_device.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_device.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include -#include -#include - -#include "msm_priv.h" - -static void msm_device_destroy(struct fd_device *dev) -{ - struct msm_device *msm_dev = to_msm_device(dev); - free(msm_dev); -} - -static const struct fd_device_funcs funcs = { - .bo_new_handle = msm_bo_new_handle, - .bo_from_handle = msm_bo_from_handle, - .pipe_new = msm_pipe_new, - .destroy = msm_device_destroy, -}; - -struct fd_device * msm_device_new(int fd) -{ - struct msm_device *msm_dev; - struct fd_device *dev; - - msm_dev = calloc(1, sizeof(*msm_dev)); - if (!msm_dev) - return NULL; - - dev = &msm_dev->base; - dev->funcs = &funcs; - - dev->bo_size = sizeof(struct msm_bo); - - return dev; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_drm.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_drm.h --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_drm.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_drm.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,308 +0,0 @@ -/* - * Copyright (C) 2013 Red Hat - * Author: Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __MSM_DRM_H__ -#define __MSM_DRM_H__ - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -/* Please note that modifications to all structs defined here are - * subject to backwards-compatibility constraints: - * 1) Do not use pointers, use __u64 instead for 32 bit / 64 bit - * user/kernel compatibility - * 2) Keep fields aligned to their size - * 3) Because of how drm_ioctl() works, we can add new fields at - * the end of an ioctl if some care is taken: drm_ioctl() will - * zero out the new fields at the tail of the ioctl, so a zero - * value should have a backwards compatible meaning. And for - * output params, userspace won't see the newly added output - * fields.. so that has to be somehow ok. - */ - -#define MSM_PIPE_NONE 0x00 -#define MSM_PIPE_2D0 0x01 -#define MSM_PIPE_2D1 0x02 -#define MSM_PIPE_3D0 0x10 - -/* The pipe-id just uses the lower bits, so can be OR'd with flags in - * the upper 16 bits (which could be extended further, if needed, maybe - * we extend/overload the pipe-id some day to deal with multiple rings, - * but even then I don't think we need the full lower 16 bits). - */ -#define MSM_PIPE_ID_MASK 0xffff -#define MSM_PIPE_ID(x) ((x) & MSM_PIPE_ID_MASK) -#define MSM_PIPE_FLAGS(x) ((x) & ~MSM_PIPE_ID_MASK) - -/* timeouts are specified in clock-monotonic absolute times (to simplify - * restarting interrupted ioctls). The following struct is logically the - * same as 'struct timespec' but 32/64b ABI safe. - */ -struct drm_msm_timespec { - __s64 tv_sec; /* seconds */ - __s64 tv_nsec; /* nanoseconds */ -}; - -#define MSM_PARAM_GPU_ID 0x01 -#define MSM_PARAM_GMEM_SIZE 0x02 -#define MSM_PARAM_CHIP_ID 0x03 -#define MSM_PARAM_MAX_FREQ 0x04 -#define MSM_PARAM_TIMESTAMP 0x05 -#define MSM_PARAM_GMEM_BASE 0x06 -#define MSM_PARAM_NR_RINGS 0x07 - -struct drm_msm_param { - __u32 pipe; /* in, MSM_PIPE_x */ - __u32 param; /* in, MSM_PARAM_x */ - __u64 value; /* out (get_param) or in (set_param) */ -}; - -/* - * GEM buffers: - */ - -#define MSM_BO_SCANOUT 0x00000001 /* scanout capable */ -#define MSM_BO_GPU_READONLY 0x00000002 -#define MSM_BO_CACHE_MASK 0x000f0000 -/* cache modes */ -#define MSM_BO_CACHED 0x00010000 -#define MSM_BO_WC 0x00020000 -#define MSM_BO_UNCACHED 0x00040000 - -#define MSM_BO_FLAGS (MSM_BO_SCANOUT | \ - MSM_BO_GPU_READONLY | \ - MSM_BO_CACHED | \ - MSM_BO_WC | \ - MSM_BO_UNCACHED) - -struct drm_msm_gem_new { - __u64 size; /* in */ - __u32 flags; /* in, mask of MSM_BO_x */ - __u32 handle; /* out */ -}; - -#define MSM_INFO_IOVA 0x01 - -#define MSM_INFO_FLAGS (MSM_INFO_IOVA) - -struct drm_msm_gem_info { - __u32 handle; /* in */ - __u32 flags; /* in - combination of MSM_INFO_* flags */ - __u64 offset; /* out, mmap() offset or iova */ -}; - -#define MSM_PREP_READ 0x01 -#define MSM_PREP_WRITE 0x02 -#define MSM_PREP_NOSYNC 0x04 - -#define MSM_PREP_FLAGS (MSM_PREP_READ | MSM_PREP_WRITE | MSM_PREP_NOSYNC) - -struct drm_msm_gem_cpu_prep { - __u32 handle; /* in */ - __u32 op; /* in, mask of MSM_PREP_x */ - struct drm_msm_timespec timeout; /* in */ -}; - -struct drm_msm_gem_cpu_fini { - __u32 handle; /* in */ -}; - -/* - * Cmdstream Submission: - */ - -/* The value written into the cmdstream is logically: - * - * ((relocbuf->gpuaddr + reloc_offset) << shift) | or - * - * When we have GPU's w/ >32bit ptrs, it should be possible to deal - * with this by emit'ing two reloc entries with appropriate shift - * values. Or a new MSM_SUBMIT_CMD_x type would also be an option. - * - * NOTE that reloc's must be sorted by order of increasing submit_offset, - * otherwise EINVAL. - */ -struct drm_msm_gem_submit_reloc { - __u32 submit_offset; /* in, offset from submit_bo */ - __u32 or; /* in, value OR'd with result */ - __s32 shift; /* in, amount of left shift (can be negative) */ - __u32 reloc_idx; /* in, index of reloc_bo buffer */ - __u64 reloc_offset; /* in, offset from start of reloc_bo */ -}; - -/* submit-types: - * BUF - this cmd buffer is executed normally. - * IB_TARGET_BUF - this cmd buffer is an IB target. Reloc's are - * processed normally, but the kernel does not setup an IB to - * this buffer in the first-level ringbuffer - * CTX_RESTORE_BUF - only executed if there has been a GPU context - * switch since the last SUBMIT ioctl - */ -#define MSM_SUBMIT_CMD_BUF 0x0001 -#define MSM_SUBMIT_CMD_IB_TARGET_BUF 0x0002 -#define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003 -struct drm_msm_gem_submit_cmd { - __u32 type; /* in, one of MSM_SUBMIT_CMD_x */ - __u32 submit_idx; /* in, index of submit_bo cmdstream buffer */ - __u32 submit_offset; /* in, offset into submit_bo */ - __u32 size; /* in, cmdstream size */ - __u32 pad; - __u32 nr_relocs; /* in, number of submit_reloc's */ - __u64 relocs; /* in, ptr to array of submit_reloc's */ -}; - -/* Each buffer referenced elsewhere in the cmdstream submit (ie. the - * cmdstream buffer(s) themselves or reloc entries) has one (and only - * one) entry in the submit->bos[] table. - * - * As a optimization, the current buffer (gpu virtual address) can be - * passed back through the 'presumed' field. If on a subsequent reloc, - * userspace passes back a 'presumed' address that is still valid, - * then patching the cmdstream for this entry is skipped. This can - * avoid kernel needing to map/access the cmdstream bo in the common - * case. - */ -#define MSM_SUBMIT_BO_READ 0x0001 -#define MSM_SUBMIT_BO_WRITE 0x0002 - -#define MSM_SUBMIT_BO_FLAGS (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE) - -struct drm_msm_gem_submit_bo { - __u32 flags; /* in, mask of MSM_SUBMIT_BO_x */ - __u32 handle; /* in, GEM handle */ - __u64 presumed; /* in/out, presumed buffer address */ -}; - -/* Valid submit ioctl flags: */ -#define MSM_SUBMIT_NO_IMPLICIT 0x80000000 /* disable implicit sync */ -#define MSM_SUBMIT_FENCE_FD_IN 0x40000000 /* enable input fence_fd */ -#define MSM_SUBMIT_FENCE_FD_OUT 0x20000000 /* enable output fence_fd */ -#define MSM_SUBMIT_SUDO 0x10000000 /* run submitted cmds from RB */ -#define MSM_SUBMIT_FLAGS ( \ - MSM_SUBMIT_NO_IMPLICIT | \ - MSM_SUBMIT_FENCE_FD_IN | \ - MSM_SUBMIT_FENCE_FD_OUT | \ - MSM_SUBMIT_SUDO | \ - 0) - -/* Each cmdstream submit consists of a table of buffers involved, and - * one or more cmdstream buffers. This allows for conditional execution - * (context-restore), and IB buffers needed for per tile/bin draw cmds. - */ -struct drm_msm_gem_submit { - __u32 flags; /* MSM_PIPE_x | MSM_SUBMIT_x */ - __u32 fence; /* out */ - __u32 nr_bos; /* in, number of submit_bo's */ - __u32 nr_cmds; /* in, number of submit_cmd's */ - __u64 bos; /* in, ptr to array of submit_bo's */ - __u64 cmds; /* in, ptr to array of submit_cmd's */ - __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ - __u32 queueid; /* in, submitqueue id */ -}; - -/* The normal way to synchronize with the GPU is just to CPU_PREP on - * a buffer if you need to access it from the CPU (other cmdstream - * submission from same or other contexts, PAGE_FLIP ioctl, etc, all - * handle the required synchronization under the hood). This ioctl - * mainly just exists as a way to implement the gallium pipe_fence - * APIs without requiring a dummy bo to synchronize on. - */ -struct drm_msm_wait_fence { - __u32 fence; /* in */ - __u32 pad; - struct drm_msm_timespec timeout; /* in */ - __u32 queueid; /* in, submitqueue id */ -}; - -/* madvise provides a way to tell the kernel in case a buffers contents - * can be discarded under memory pressure, which is useful for userspace - * bo cache where we want to optimistically hold on to buffer allocate - * and potential mmap, but allow the pages to be discarded under memory - * pressure. - * - * Typical usage would involve madvise(DONTNEED) when buffer enters BO - * cache, and madvise(WILLNEED) if trying to recycle buffer from BO cache. - * In the WILLNEED case, 'retained' indicates to userspace whether the - * backing pages still exist. - */ -#define MSM_MADV_WILLNEED 0 /* backing pages are needed, status returned in 'retained' */ -#define MSM_MADV_DONTNEED 1 /* backing pages not needed */ -#define __MSM_MADV_PURGED 2 /* internal state */ - -struct drm_msm_gem_madvise { - __u32 handle; /* in, GEM handle */ - __u32 madv; /* in, MSM_MADV_x */ - __u32 retained; /* out, whether backing store still exists */ -}; - -/* - * Draw queues allow the user to set specific submission parameter. Command - * submissions specify a specific submitqueue to use. ID 0 is reserved for - * backwards compatibility as a "default" submitqueue - */ - -#define MSM_SUBMITQUEUE_FLAGS (0) - -struct drm_msm_submitqueue { - __u32 flags; /* in, MSM_SUBMITQUEUE_x */ - __u32 prio; /* in, Priority level */ - __u32 id; /* out, identifier */ -}; - -#define DRM_MSM_GET_PARAM 0x00 -/* placeholder: -#define DRM_MSM_SET_PARAM 0x01 - */ -#define DRM_MSM_GEM_NEW 0x02 -#define DRM_MSM_GEM_INFO 0x03 -#define DRM_MSM_GEM_CPU_PREP 0x04 -#define DRM_MSM_GEM_CPU_FINI 0x05 -#define DRM_MSM_GEM_SUBMIT 0x06 -#define DRM_MSM_WAIT_FENCE 0x07 -#define DRM_MSM_GEM_MADVISE 0x08 -/* placeholder: -#define DRM_MSM_GEM_SVM_NEW 0x09 - */ -#define DRM_MSM_SUBMITQUEUE_NEW 0x0A -#define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B - -#define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) -#define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new) -#define DRM_IOCTL_MSM_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_INFO, struct drm_msm_gem_info) -#define DRM_IOCTL_MSM_GEM_CPU_PREP DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_PREP, struct drm_msm_gem_cpu_prep) -#define DRM_IOCTL_MSM_GEM_CPU_FINI DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_FINI, struct drm_msm_gem_cpu_fini) -#define DRM_IOCTL_MSM_GEM_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_SUBMIT, struct drm_msm_gem_submit) -#define DRM_IOCTL_MSM_WAIT_FENCE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, struct drm_msm_wait_fence) -#define DRM_IOCTL_MSM_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, struct drm_msm_gem_madvise) -#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue) -#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32) - -#if defined(__cplusplus) -} -#endif - -#endif /* __MSM_DRM_H__ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_pipe.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_pipe.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_pipe.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_pipe.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,223 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "util/slab.h" - -#include "freedreno_util.h" -#include "msm_priv.h" - -static int query_param(struct fd_pipe *pipe, uint32_t param, - uint64_t *value) -{ - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - struct drm_msm_param req = { - .pipe = msm_pipe->pipe, - .param = param, - }; - int ret; - - ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_GET_PARAM, - &req, sizeof(req)); - if (ret) - return ret; - - *value = req.value; - - return 0; -} - -static int msm_pipe_get_param(struct fd_pipe *pipe, - enum fd_param_id param, uint64_t *value) -{ - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - switch(param) { - case FD_DEVICE_ID: // XXX probably get rid of this.. - case FD_GPU_ID: - *value = msm_pipe->gpu_id; - return 0; - case FD_GMEM_SIZE: - *value = msm_pipe->gmem; - return 0; - case FD_CHIP_ID: - *value = msm_pipe->chip_id; - return 0; - case FD_MAX_FREQ: - return query_param(pipe, MSM_PARAM_MAX_FREQ, value); - case FD_TIMESTAMP: - return query_param(pipe, MSM_PARAM_TIMESTAMP, value); - case FD_NR_RINGS: - return query_param(pipe, MSM_PARAM_NR_RINGS, value); - default: - ERROR_MSG("invalid param id: %d", param); - return -1; - } -} - -static int msm_pipe_wait(struct fd_pipe *pipe, uint32_t timestamp, - uint64_t timeout) -{ - struct fd_device *dev = pipe->dev; - struct drm_msm_wait_fence req = { - .fence = timestamp, - .queueid = to_msm_pipe(pipe)->queue_id, - }; - int ret; - - get_abs_timeout(&req.timeout, timeout); - - ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req)); - if (ret) { - ERROR_MSG("wait-fence failed! %d (%s)", ret, strerror(errno)); - return ret; - } - - return 0; -} - -static int open_submitqueue(struct fd_pipe *pipe, uint32_t prio) -{ - struct drm_msm_submitqueue req = { - .flags = 0, - .prio = prio, - }; - uint64_t nr_rings = 1; - int ret; - - if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) { - to_msm_pipe(pipe)->queue_id = 0; - return 0; - } - - msm_pipe_get_param(pipe, FD_NR_RINGS, &nr_rings); - - req.prio = MIN2(req.prio, MAX2(nr_rings, 1) - 1); - - ret = drmCommandWriteRead(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_NEW, - &req, sizeof(req)); - if (ret) { - ERROR_MSG("could not create submitqueue! %d (%s)", ret, strerror(errno)); - return ret; - } - - to_msm_pipe(pipe)->queue_id = req.id; - return 0; -} - -static void close_submitqueue(struct fd_pipe *pipe, uint32_t queue_id) -{ - if (fd_device_version(pipe->dev) < FD_VERSION_SUBMIT_QUEUES) - return; - - drmCommandWrite(pipe->dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE, - &queue_id, sizeof(queue_id)); -} - -static void msm_pipe_destroy(struct fd_pipe *pipe) -{ - struct msm_pipe *msm_pipe = to_msm_pipe(pipe); - close_submitqueue(pipe, msm_pipe->queue_id); - free(msm_pipe); -} - -static const struct fd_pipe_funcs sp_funcs = { - .ringbuffer_new_object = msm_ringbuffer_sp_new_object, - .submit_new = msm_submit_sp_new, - .get_param = msm_pipe_get_param, - .wait = msm_pipe_wait, - .destroy = msm_pipe_destroy, -}; - -static const struct fd_pipe_funcs legacy_funcs = { - .ringbuffer_new_object = msm_ringbuffer_new_object, - .submit_new = msm_submit_new, - .get_param = msm_pipe_get_param, - .wait = msm_pipe_wait, - .destroy = msm_pipe_destroy, -}; - -static uint64_t get_param(struct fd_pipe *pipe, uint32_t param) -{ - uint64_t value; - int ret = query_param(pipe, param, &value); - if (ret) { - ERROR_MSG("get-param failed! %d (%s)", ret, strerror(errno)); - return 0; - } - return value; -} - -struct fd_pipe * msm_pipe_new(struct fd_device *dev, - enum fd_pipe_id id, uint32_t prio) -{ - static const uint32_t pipe_id[] = { - [FD_PIPE_3D] = MSM_PIPE_3D0, - [FD_PIPE_2D] = MSM_PIPE_2D0, - }; - struct msm_pipe *msm_pipe = NULL; - struct fd_pipe *pipe = NULL; - - msm_pipe = calloc(1, sizeof(*msm_pipe)); - if (!msm_pipe) { - ERROR_MSG("allocation failed"); - goto fail; - } - - pipe = &msm_pipe->base; - - // TODO once kernel changes are in place, this switch will be - // based on kernel version: - if (fd_mesa_debug & FD_DBG_SOFTPIN) { - pipe->funcs = &sp_funcs; - } else { - pipe->funcs = &legacy_funcs; - } - - /* initialize before get_param(): */ - pipe->dev = dev; - msm_pipe->pipe = pipe_id[id]; - - /* these params should be supported since the first version of drm/msm: */ - msm_pipe->gpu_id = get_param(pipe, MSM_PARAM_GPU_ID); - msm_pipe->gmem = get_param(pipe, MSM_PARAM_GMEM_SIZE); - msm_pipe->chip_id = get_param(pipe, MSM_PARAM_CHIP_ID); - - if (! msm_pipe->gpu_id) - goto fail; - - INFO_MSG("Pipe Info:"); - INFO_MSG(" GPU-id: %d", msm_pipe->gpu_id); - INFO_MSG(" Chip-id: 0x%08x", msm_pipe->chip_id); - INFO_MSG(" GMEM size: 0x%08x", msm_pipe->gmem); - - if (open_submitqueue(pipe, prio)) - goto fail; - - return pipe; -fail: - if (pipe) - fd_pipe_del(pipe); - return NULL; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_priv.h mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_priv.h --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_priv.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_priv.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,140 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#ifndef MSM_PRIV_H_ -#define MSM_PRIV_H_ - -#include "freedreno_priv.h" - -#ifndef __user -# define __user -#endif - -#include "msm_drm.h" - -struct msm_device { - struct fd_device base; - struct fd_bo_cache ring_cache; -}; -FD_DEFINE_CAST(fd_device, msm_device); - -struct fd_device * msm_device_new(int fd); - -struct msm_pipe { - struct fd_pipe base; - uint32_t pipe; - uint32_t gpu_id; - uint32_t gmem; - uint32_t chip_id; - uint32_t queue_id; -}; -FD_DEFINE_CAST(fd_pipe, msm_pipe); - -struct fd_pipe * msm_pipe_new(struct fd_device *dev, - enum fd_pipe_id id, uint32_t prio); - -struct fd_ringbuffer * msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size); -struct fd_ringbuffer * msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size); - -struct fd_submit * msm_submit_new(struct fd_pipe *pipe); -struct fd_submit * msm_submit_sp_new(struct fd_pipe *pipe); - -struct msm_bo { - struct fd_bo base; - uint64_t offset; - /* to avoid excess hashtable lookups, cache the ring this bo was - * last emitted on (since that will probably also be the next ring - * it is emitted on) - */ - unsigned current_submit_seqno; - uint32_t idx; -}; -FD_DEFINE_CAST(fd_bo, msm_bo); - -int msm_bo_new_handle(struct fd_device *dev, - uint32_t size, uint32_t flags, uint32_t *handle); -struct fd_bo * msm_bo_from_handle(struct fd_device *dev, - uint32_t size, uint32_t handle); - -static inline void -msm_dump_submit(struct drm_msm_gem_submit *req) -{ - for (unsigned i = 0; i < req->nr_bos; i++) { - struct drm_msm_gem_submit_bo *bos = U642VOID(req->bos); - struct drm_msm_gem_submit_bo *bo = &bos[i]; - ERROR_MSG(" bos[%d]: handle=%u, flags=%x", i, bo->handle, bo->flags); - } - for (unsigned i = 0; i < req->nr_cmds; i++) { - struct drm_msm_gem_submit_cmd *cmds = U642VOID(req->cmds); - struct drm_msm_gem_submit_cmd *cmd = &cmds[i]; - struct drm_msm_gem_submit_reloc *relocs = U642VOID(cmd->relocs); - ERROR_MSG(" cmd[%d]: type=%u, submit_idx=%u, submit_offset=%u, size=%u", - i, cmd->type, cmd->submit_idx, cmd->submit_offset, cmd->size); - for (unsigned j = 0; j < cmd->nr_relocs; j++) { - struct drm_msm_gem_submit_reloc *r = &relocs[j]; - ERROR_MSG(" reloc[%d]: submit_offset=%u, or=%08x, shift=%d, reloc_idx=%u" - ", reloc_offset=%"PRIu64, j, r->submit_offset, r->or, r->shift, - r->reloc_idx, r->reloc_offset); - } - } -} - -static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) -{ - struct timespec t; - uint32_t s = ns / 1000000000; - clock_gettime(CLOCK_MONOTONIC, &t); - tv->tv_sec = t.tv_sec + s; - tv->tv_nsec = t.tv_nsec + ns - (s * 1000000000); -} - -/* - * Stupid/simple growable array implementation: - */ - -static inline void * -grow(void *ptr, uint16_t nr, uint16_t *max, uint16_t sz) -{ - if ((nr + 1) > *max) { - if ((*max * 2) < (nr + 1)) - *max = nr + 5; - else - *max = *max * 2; - ptr = realloc(ptr, *max * sz); - } - return ptr; -} - -#define DECLARE_ARRAY(type, name) \ - unsigned short nr_ ## name, max_ ## name; \ - type * name; - -#define APPEND(x, name) ({ \ - (x)->name = grow((x)->name, (x)->nr_ ## name, &(x)->max_ ## name, sizeof((x)->name[0])); \ - (x)->nr_ ## name ++; \ -}) - -#endif /* MSM_PRIV_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_ringbuffer.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,724 +0,0 @@ -/* - * Copyright (C) 2012-2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include -#include - -#include "util/hash_table.h" -#include "util/set.h" -#include "util/slab.h" - -#include "drm/freedreno_ringbuffer.h" -#include "msm_priv.h" - -/* The legacy implementation of submit/ringbuffer, which still does the - * traditional reloc and cmd tracking - */ - - -#define INIT_SIZE 0x1000 - -static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; - - -struct msm_submit { - struct fd_submit base; - - DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); - DECLARE_ARRAY(struct fd_bo *, bos); - - unsigned seqno; - - /* maps fd_bo to idx in bos table: */ - struct hash_table *bo_table; - - struct slab_mempool ring_pool; - - /* hash-set of associated rings: */ - struct set *ring_set; - - struct fd_ringbuffer *primary; - - /* Allow for sub-allocation of stateobj ring buffers (ie. sharing - * the same underlying bo).. - * - * We also rely on previous stateobj having been fully constructed - * so we can reclaim extra space at it's end. - */ - struct fd_ringbuffer *suballoc_ring; -}; -FD_DEFINE_CAST(fd_submit, msm_submit); - -/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers - * and sizes. Ie. a finalized buffer can have no more commands appended to - * it. - */ -struct msm_cmd { - struct fd_bo *ring_bo; - unsigned size; - DECLARE_ARRAY(struct drm_msm_gem_submit_reloc, relocs); -}; - -static struct msm_cmd * -cmd_new(struct fd_bo *ring_bo) -{ - struct msm_cmd *cmd = malloc(sizeof(*cmd)); - cmd->ring_bo = fd_bo_ref(ring_bo); - cmd->size = 0; - cmd->nr_relocs = cmd->max_relocs = 0; - cmd->relocs = NULL; - return cmd; -} - -static void -cmd_free(struct msm_cmd *cmd) -{ - fd_bo_del(cmd->ring_bo); - free(cmd->relocs); - free(cmd); -} - -/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to - * later copy into the submit when the stateobj rb is later referenced by - * a regular rb: - */ -struct msm_reloc_bo { - struct fd_bo *bo; - unsigned flags; -}; - -struct msm_ringbuffer { - struct fd_ringbuffer base; - - /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ - unsigned offset; - - union { - /* for _FD_RINGBUFFER_OBJECT case: */ - struct { - struct fd_pipe *pipe; - DECLARE_ARRAY(struct msm_reloc_bo, reloc_bos); - struct set *ring_set; - }; - /* for other cases: */ - struct { - struct fd_submit *submit; - DECLARE_ARRAY(struct msm_cmd *, cmds); - }; - } u; - - struct msm_cmd *cmd; /* current cmd */ - struct fd_bo *ring_bo; -}; -FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer); - -static void finalize_current_cmd(struct fd_ringbuffer *ring); -static struct fd_ringbuffer * msm_ringbuffer_init( - struct msm_ringbuffer *msm_ring, - uint32_t size, enum fd_ringbuffer_flags flags); - -/* add (if needed) bo to submit and return index: */ -static uint32_t -append_bo(struct msm_submit *submit, struct fd_bo *bo, uint32_t flags) -{ - struct msm_bo *msm_bo = to_msm_bo(bo); - uint32_t idx; - pthread_mutex_lock(&idx_lock); - if (likely(msm_bo->current_submit_seqno == submit->seqno)) { - idx = msm_bo->idx; - } else { - uint32_t hash = _mesa_hash_pointer(bo); - struct hash_entry *entry; - - entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); - if (entry) { - /* found */ - idx = (uint32_t)(uintptr_t)entry->data; - } else { - idx = APPEND(submit, submit_bos); - idx = APPEND(submit, bos); - - submit->submit_bos[idx].flags = 0; - submit->submit_bos[idx].handle = bo->handle; - submit->submit_bos[idx].presumed = 0; - - submit->bos[idx] = fd_bo_ref(bo); - - _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, - (void *)(uintptr_t)idx); - } - msm_bo->current_submit_seqno = submit->seqno; - msm_bo->idx = idx; - } - pthread_mutex_unlock(&idx_lock); - if (flags & FD_RELOC_READ) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ; - if (flags & FD_RELOC_WRITE) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; - return idx; -} - -static void -append_ring(struct set *set, struct fd_ringbuffer *ring) -{ - uint32_t hash = _mesa_hash_pointer(ring); - - if (!_mesa_set_search_pre_hashed(set, hash, ring)) { - fd_ringbuffer_ref(ring); - _mesa_set_add_pre_hashed(set, hash, ring); - } -} - -static void -msm_submit_suballoc_ring_bo(struct fd_submit *submit, - struct msm_ringbuffer *msm_ring, uint32_t size) -{ - struct msm_submit *msm_submit = to_msm_submit(submit); - unsigned suballoc_offset = 0; - struct fd_bo *suballoc_bo = NULL; - - if (msm_submit->suballoc_ring) { - struct msm_ringbuffer *suballoc_ring = - to_msm_ringbuffer(msm_submit->suballoc_ring); - - suballoc_bo = suballoc_ring->ring_bo; - suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) + - suballoc_ring->offset; - - suballoc_offset = align(suballoc_offset, 0x10); - - if ((size + suballoc_offset) > suballoc_bo->size) { - suballoc_bo = NULL; - } - } - - if (!suballoc_bo) { - // TODO possibly larger size for streaming bo? - msm_ring->ring_bo = fd_bo_new_ring( - submit->pipe->dev, 0x8000, 0); - msm_ring->offset = 0; - } else { - msm_ring->ring_bo = fd_bo_ref(suballoc_bo); - msm_ring->offset = suballoc_offset; - } - - struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; - - msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); - - if (old_suballoc_ring) - fd_ringbuffer_del(old_suballoc_ring); -} - -static struct fd_ringbuffer * -msm_submit_new_ringbuffer(struct fd_submit *submit, uint32_t size, - enum fd_ringbuffer_flags flags) -{ - struct msm_submit *msm_submit = to_msm_submit(submit); - struct msm_ringbuffer *msm_ring; - - msm_ring = slab_alloc_st(&msm_submit->ring_pool); - - msm_ring->u.submit = submit; - - /* NOTE: needs to be before _suballoc_ring_bo() since it could - * increment the refcnt of the current ring - */ - msm_ring->base.refcnt = 1; - - if (flags & FD_RINGBUFFER_STREAMING) { - msm_submit_suballoc_ring_bo(submit, msm_ring, size); - } else { - if (flags & FD_RINGBUFFER_GROWABLE) - size = INIT_SIZE; - - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0); - } - - if (!msm_ringbuffer_init(msm_ring, size, flags)) - return NULL; - - if (flags & FD_RINGBUFFER_PRIMARY) { - debug_assert(!msm_submit->primary); - msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base); - } - - return &msm_ring->base; -} - -static struct drm_msm_gem_submit_reloc * -handle_stateobj_relocs(struct msm_submit *submit, struct msm_ringbuffer *ring) -{ - struct msm_cmd *cmd = ring->cmd; - struct drm_msm_gem_submit_reloc *relocs; - - relocs = malloc(cmd->nr_relocs * sizeof(*relocs)); - - for (unsigned i = 0; i < cmd->nr_relocs; i++) { - unsigned idx = cmd->relocs[i].reloc_idx; - struct fd_bo *bo = ring->u.reloc_bos[idx].bo; - unsigned flags = 0; - - if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_READ) - flags |= FD_RELOC_READ; - if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_WRITE) - flags |= FD_RELOC_WRITE; - - relocs[i] = cmd->relocs[i]; - relocs[i].reloc_idx = append_bo(submit, bo, flags); - } - - return relocs; -} - -static int -msm_submit_flush(struct fd_submit *submit, int in_fence_fd, - int *out_fence_fd, uint32_t *out_fence) -{ - struct msm_submit *msm_submit = to_msm_submit(submit); - struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); - struct drm_msm_gem_submit req = { - .flags = msm_pipe->pipe, - .queueid = msm_pipe->queue_id, - }; - int ret; - - debug_assert(msm_submit->primary); - - finalize_current_cmd(msm_submit->primary); - append_ring(msm_submit->ring_set, msm_submit->primary); - - struct set_entry *entry; - unsigned nr_cmds = 0; - unsigned nr_objs = 0; - - set_foreach(msm_submit->ring_set, entry) { - struct fd_ringbuffer *ring = (void *)entry->key; - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - nr_cmds += 1; - nr_objs += 1; - } else { - if (ring != msm_submit->primary) - finalize_current_cmd(ring); - nr_cmds += to_msm_ringbuffer(ring)->u.nr_cmds; - } - } - - void *obj_relocs[nr_objs]; - struct drm_msm_gem_submit_cmd cmds[nr_cmds]; - unsigned i = 0, o = 0; - - set_foreach(msm_submit->ring_set, entry) { - struct fd_ringbuffer *ring = (void *)entry->key; - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - - debug_assert(i < nr_cmds); - - // TODO handle relocs: - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - - debug_assert(o < nr_objs); - - void *relocs = handle_stateobj_relocs(msm_submit, msm_ring); - obj_relocs[o++] = relocs; - - cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; - cmds[i].submit_idx = - append_bo(msm_submit, msm_ring->ring_bo, FD_RELOC_READ); - cmds[i].submit_offset = msm_ring->offset; - cmds[i].size = offset_bytes(ring->cur, ring->start); - cmds[i].pad = 0; - cmds[i].nr_relocs = msm_ring->cmd->nr_relocs; - cmds[i].relocs = VOID2U64(relocs); - - i++; - } else { - for (unsigned j = 0; j < msm_ring->u.nr_cmds; j++) { - if (ring->flags & FD_RINGBUFFER_PRIMARY) { - cmds[i].type = MSM_SUBMIT_CMD_BUF; - } else { - cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF; - } - cmds[i].submit_idx = append_bo(msm_submit, - msm_ring->u.cmds[j]->ring_bo, FD_RELOC_READ); - cmds[i].submit_offset = msm_ring->offset; - cmds[i].size = msm_ring->u.cmds[j]->size; - cmds[i].pad = 0; - cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs; - cmds[i].relocs = VOID2U64(msm_ring->u.cmds[j]->relocs); - - i++; - } - } - } - - if (in_fence_fd != -1) { - req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; - req.fence_fd = in_fence_fd; - } - - if (out_fence_fd) { - req.flags |= MSM_SUBMIT_FENCE_FD_OUT; - } - - /* needs to be after get_cmd() as that could create bos/cmds table: */ - req.bos = VOID2U64(msm_submit->submit_bos), - req.nr_bos = msm_submit->nr_submit_bos; - req.cmds = VOID2U64(cmds), - req.nr_cmds = nr_cmds; - - DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); - - ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, - &req, sizeof(req)); - if (ret) { - ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); - msm_dump_submit(&req); - } else if (!ret) { - if (out_fence) - *out_fence = req.fence; - - if (out_fence_fd) - *out_fence_fd = req.fence_fd; - } - - for (unsigned o = 0; o < nr_objs; o++) - free(obj_relocs[o]); - - return ret; -} - -static void -unref_rings(struct set_entry *entry) -{ - struct fd_ringbuffer *ring = (void *)entry->key; - fd_ringbuffer_del(ring); -} - -static void -msm_submit_destroy(struct fd_submit *submit) -{ - struct msm_submit *msm_submit = to_msm_submit(submit); - - if (msm_submit->primary) - fd_ringbuffer_del(msm_submit->primary); - if (msm_submit->suballoc_ring) - fd_ringbuffer_del(msm_submit->suballoc_ring); - - _mesa_hash_table_destroy(msm_submit->bo_table, NULL); - _mesa_set_destroy(msm_submit->ring_set, unref_rings); - - // TODO it would be nice to have a way to debug_assert() if all - // rb's haven't been free'd back to the slab, because that is - // an indication that we are leaking bo's - slab_destroy(&msm_submit->ring_pool); - - for (unsigned i = 0; i < msm_submit->nr_bos; i++) - fd_bo_del(msm_submit->bos[i]); - - free(msm_submit->submit_bos); - free(msm_submit->bos); - free(msm_submit); -} - -static const struct fd_submit_funcs submit_funcs = { - .new_ringbuffer = msm_submit_new_ringbuffer, - .flush = msm_submit_flush, - .destroy = msm_submit_destroy, -}; - -struct fd_submit * -msm_submit_new(struct fd_pipe *pipe) -{ - struct msm_submit *msm_submit = calloc(1, sizeof(*msm_submit)); - struct fd_submit *submit; - static unsigned submit_cnt = 0; - - msm_submit->seqno = ++submit_cnt; - msm_submit->bo_table = _mesa_hash_table_create(NULL, - _mesa_hash_pointer, _mesa_key_pointer_equal); - msm_submit->ring_set = _mesa_set_create(NULL, - _mesa_hash_pointer, _mesa_key_pointer_equal); - // TODO tune size: - slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer), 16); - - submit = &msm_submit->base; - submit->pipe = pipe; - submit->funcs = &submit_funcs; - - return submit; -} - - -static void -finalize_current_cmd(struct fd_ringbuffer *ring) -{ - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - - debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - - if (!msm_ring->cmd) - return; - - debug_assert(msm_ring->cmd->ring_bo == msm_ring->ring_bo); - - unsigned idx = APPEND(&msm_ring->u, cmds); - - msm_ring->u.cmds[idx] = msm_ring->cmd; - msm_ring->cmd = NULL; - - msm_ring->u.cmds[idx]->size = offset_bytes(ring->cur, ring->start); -} - -static void -msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size) -{ - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_pipe *pipe = msm_ring->u.submit->pipe; - - debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); - - finalize_current_cmd(ring); - - fd_bo_del(msm_ring->ring_bo); - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); - msm_ring->cmd = cmd_new(msm_ring->ring_bo); - - ring->start = fd_bo_map(msm_ring->ring_bo); - ring->end = &(ring->start[size/4]); - ring->cur = ring->start; - ring->size = size; -} - -static void -msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) -{ - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_pipe *pipe; - unsigned reloc_idx; - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - unsigned idx = APPEND(&msm_ring->u, reloc_bos); - - msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); - msm_ring->u.reloc_bos[idx].flags = reloc->flags; - - /* this gets fixed up at submit->flush() time, since this state- - * object rb can be used with many different submits - */ - reloc_idx = idx; - - pipe = msm_ring->u.pipe; - } else { - struct msm_submit *msm_submit = - to_msm_submit(msm_ring->u.submit); - - reloc_idx = append_bo(msm_submit, reloc->bo, reloc->flags); - - pipe = msm_ring->u.submit->pipe; - } - - struct drm_msm_gem_submit_reloc *r; - unsigned idx = APPEND(msm_ring->cmd, relocs); - - r = &msm_ring->cmd->relocs[idx]; - - r->reloc_idx = reloc_idx; - r->reloc_offset = reloc->offset; - r->or = reloc->or; - r->shift = reloc->shift; - r->submit_offset = offset_bytes(ring->cur, ring->start) + - msm_ring->offset; - - ring->cur++; - - if (pipe->gpu_id >= 500) { - idx = APPEND(msm_ring->cmd, relocs); - r = &msm_ring->cmd->relocs[idx]; - - r->reloc_idx = reloc_idx; - r->reloc_offset = reloc->offset; - r->or = reloc->orhi; - r->shift = reloc->shift - 32; - r->submit_offset = offset_bytes(ring->cur, ring->start) + - msm_ring->offset; - - ring->cur++; - } -} - -static void -append_stateobj_rings(struct msm_submit *submit, struct fd_ringbuffer *target) -{ - struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); - - debug_assert(target->flags & _FD_RINGBUFFER_OBJECT); - - struct set_entry *entry; - set_foreach(msm_target->u.ring_set, entry) { - struct fd_ringbuffer *ring = (void *)entry->key; - - append_ring(submit->ring_set, ring); - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - append_stateobj_rings(submit, ring); - } - } -} - -static uint32_t -msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx) -{ - struct msm_ringbuffer *msm_target = to_msm_ringbuffer(target); - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - struct fd_bo *bo; - uint32_t size; - - if ((target->flags & FD_RINGBUFFER_GROWABLE) && - (cmd_idx < msm_target->u.nr_cmds)) { - bo = msm_target->u.cmds[cmd_idx]->ring_bo; - size = msm_target->u.cmds[cmd_idx]->size; - } else { - bo = msm_target->ring_bo; - size = offset_bytes(target->cur, target->start); - } - - msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){ - .bo = bo, - .flags = FD_RELOC_READ, - .offset = msm_target->offset, - }); - - if ((target->flags & _FD_RINGBUFFER_OBJECT) && - !(ring->flags & _FD_RINGBUFFER_OBJECT)) { - struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); - - append_stateobj_rings(msm_submit, target); - } - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - append_ring(msm_ring->u.ring_set, target); - } else { - struct msm_submit *msm_submit = to_msm_submit(msm_ring->u.submit); - append_ring(msm_submit->ring_set, target); - } - - return size; -} - -static uint32_t -msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring) -{ - if (ring->flags & FD_RINGBUFFER_GROWABLE) - return to_msm_ringbuffer(ring)->u.nr_cmds + 1; - return 1; -} - -static void -msm_ringbuffer_destroy(struct fd_ringbuffer *ring) -{ - struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring); - - fd_bo_del(msm_ring->ring_bo); - if (msm_ring->cmd) - cmd_free(msm_ring->cmd); - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - fd_bo_del(msm_ring->u.reloc_bos[i].bo); - } - - _mesa_set_destroy(msm_ring->u.ring_set, unref_rings); - - free(msm_ring->u.reloc_bos); - free(msm_ring); - } else { - struct fd_submit *submit = msm_ring->u.submit; - - for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { - cmd_free(msm_ring->u.cmds[i]); - } - - free(msm_ring->u.cmds); - slab_free_st(&to_msm_submit(submit)->ring_pool, msm_ring); - } -} - -static const struct fd_ringbuffer_funcs ring_funcs = { - .grow = msm_ringbuffer_grow, - .emit_reloc = msm_ringbuffer_emit_reloc, - .emit_reloc_ring = msm_ringbuffer_emit_reloc_ring, - .cmd_count = msm_ringbuffer_cmd_count, - .destroy = msm_ringbuffer_destroy, -}; - -static inline struct fd_ringbuffer * -msm_ringbuffer_init(struct msm_ringbuffer *msm_ring, uint32_t size, - enum fd_ringbuffer_flags flags) -{ - struct fd_ringbuffer *ring = &msm_ring->base; - - debug_assert(msm_ring->ring_bo); - - uint8_t *base = fd_bo_map(msm_ring->ring_bo); - ring->start = (void *)(base + msm_ring->offset); - ring->end = &(ring->start[size/4]); - ring->cur = ring->start; - - ring->size = size; - ring->flags = flags; - - ring->funcs = &ring_funcs; - - msm_ring->u.cmds = NULL; - msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; - - msm_ring->cmd = cmd_new(msm_ring->ring_bo); - - return ring; -} - -struct fd_ringbuffer * -msm_ringbuffer_new_object(struct fd_pipe *pipe, uint32_t size) -{ - struct msm_ringbuffer *msm_ring = malloc(sizeof(*msm_ring)); - - msm_ring->u.pipe = pipe; - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); - msm_ring->base.refcnt = 1; - - msm_ring->u.reloc_bos = NULL; - msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; - - msm_ring->u.ring_set = _mesa_set_create(NULL, - _mesa_hash_pointer, _mesa_key_pointer_equal); - - return msm_ringbuffer_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c --- mesa-18.3.3/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/drm/msm_ringbuffer_sp.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,551 +0,0 @@ -/* - * Copyright (C) 2018 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include -#include - -#include "util/hash_table.h" -#include "util/slab.h" - -#include "drm/freedreno_ringbuffer.h" -#include "msm_priv.h" - -/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead - * by avoiding the additional tracking necessary to build cmds/relocs tables - * (but still builds a bos table) - */ - - -#define INIT_SIZE 0x1000 - -static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER; - - -struct msm_submit_sp { - struct fd_submit base; - - DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos); - DECLARE_ARRAY(struct fd_bo *, bos); - - unsigned seqno; - - /* maps fd_bo to idx in bos table: */ - struct hash_table *bo_table; - - struct slab_mempool ring_pool; - - struct fd_ringbuffer *primary; - - /* Allow for sub-allocation of stateobj ring buffers (ie. sharing - * the same underlying bo).. - * - * We also rely on previous stateobj having been fully constructed - * so we can reclaim extra space at it's end. - */ - struct fd_ringbuffer *suballoc_ring; -}; -FD_DEFINE_CAST(fd_submit, msm_submit_sp); - -/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers - * and sizes. Ie. a finalized buffer can have no more commands appended to - * it. - */ -struct msm_cmd_sp { - struct fd_bo *ring_bo; - unsigned size; -}; - -/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to - * later copy into the submit when the stateobj rb is later referenced by - * a regular rb: - */ -struct msm_reloc_bo_sp { - struct fd_bo *bo; - unsigned flags; -}; - -struct msm_ringbuffer_sp { - struct fd_ringbuffer base; - - /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */ - unsigned offset; - -// TODO check disasm.. hopefully compilers CSE can realize that -// reloc_bos and cmds are at the same offsets and optimize some -// divergent cases into single case - union { - /* for _FD_RINGBUFFER_OBJECT case: */ - struct { - struct fd_pipe *pipe; - DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos); - }; - /* for other cases: */ - struct { - struct fd_submit *submit; - DECLARE_ARRAY(struct msm_cmd_sp, cmds); - }; - } u; - - struct fd_bo *ring_bo; -}; -FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp); - -static void finalize_current_cmd(struct fd_ringbuffer *ring); -static struct fd_ringbuffer * msm_ringbuffer_sp_init( - struct msm_ringbuffer_sp *msm_ring, - uint32_t size, enum fd_ringbuffer_flags flags); - -/* add (if needed) bo to submit and return index: */ -static uint32_t -append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags) -{ - struct msm_bo *msm_bo = to_msm_bo(bo); - uint32_t idx; - pthread_mutex_lock(&idx_lock); - if (likely(msm_bo->current_submit_seqno == submit->seqno)) { - idx = msm_bo->idx; - } else { - uint32_t hash = _mesa_hash_pointer(bo); - struct hash_entry *entry; - - entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo); - if (entry) { - /* found */ - idx = (uint32_t)(uintptr_t)entry->data; - } else { - idx = APPEND(submit, submit_bos); - idx = APPEND(submit, bos); - - submit->submit_bos[idx].flags = 0; - submit->submit_bos[idx].handle = bo->handle; - submit->submit_bos[idx].presumed = 0; - - submit->bos[idx] = fd_bo_ref(bo); - - _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo, - (void *)(uintptr_t)idx); - } - msm_bo->current_submit_seqno = submit->seqno; - msm_bo->idx = idx; - } - pthread_mutex_unlock(&idx_lock); - if (flags & FD_RELOC_READ) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ; - if (flags & FD_RELOC_WRITE) - submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE; - return idx; -} - -static void -msm_submit_suballoc_ring_bo(struct fd_submit *submit, - struct msm_ringbuffer_sp *msm_ring, uint32_t size) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - unsigned suballoc_offset = 0; - struct fd_bo *suballoc_bo = NULL; - - if (msm_submit->suballoc_ring) { - struct msm_ringbuffer_sp *suballoc_ring = - to_msm_ringbuffer_sp(msm_submit->suballoc_ring); - - suballoc_bo = suballoc_ring->ring_bo; - suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) + - suballoc_ring->offset; - - suballoc_offset = align(suballoc_offset, 0x10); - - if ((size + suballoc_offset) > suballoc_bo->size) { - suballoc_bo = NULL; - } - } - - if (!suballoc_bo) { - // TODO possibly larger size for streaming bo? - msm_ring->ring_bo = fd_bo_new_ring( - submit->pipe->dev, 0x8000, 0); - msm_ring->offset = 0; - } else { - msm_ring->ring_bo = fd_bo_ref(suballoc_bo); - msm_ring->offset = suballoc_offset; - } - - struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring; - - msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base); - - if (old_suballoc_ring) - fd_ringbuffer_del(old_suballoc_ring); -} - -static struct fd_ringbuffer * -msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size, - enum fd_ringbuffer_flags flags) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - struct msm_ringbuffer_sp *msm_ring; - - msm_ring = slab_alloc_st(&msm_submit->ring_pool); - - msm_ring->u.submit = submit; - - /* NOTE: needs to be before _suballoc_ring_bo() since it could - * increment the refcnt of the current ring - */ - msm_ring->base.refcnt = 1; - - if (flags & FD_RINGBUFFER_STREAMING) { - msm_submit_suballoc_ring_bo(submit, msm_ring, size); - } else { - if (flags & FD_RINGBUFFER_GROWABLE) - size = INIT_SIZE; - - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0); - } - - if (!msm_ringbuffer_sp_init(msm_ring, size, flags)) - return NULL; - - if (flags & FD_RINGBUFFER_PRIMARY) { - debug_assert(!msm_submit->primary); - msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base); - } - - return &msm_ring->base; -} - -static int -msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, - int *out_fence_fd, uint32_t *out_fence) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe); - struct drm_msm_gem_submit req = { - .flags = msm_pipe->pipe, - .queueid = msm_pipe->queue_id, - }; - int ret; - - debug_assert(msm_submit->primary); - finalize_current_cmd(msm_submit->primary); - - struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary); - struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds]; - - for (unsigned i = 0; i < primary->u.nr_cmds; i++) { - cmds[i].type = MSM_SUBMIT_CMD_BUF; - cmds[i].submit_idx = - append_bo(msm_submit, primary->u.cmds[i].ring_bo, FD_RELOC_READ); - cmds[i].submit_offset = primary->offset; - cmds[i].size = primary->u.cmds[i].size; - cmds[i].pad = 0; - cmds[i].nr_relocs = 0; - } - - if (in_fence_fd != -1) { - req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT; - req.fence_fd = in_fence_fd; - } - - if (out_fence_fd) { - req.flags |= MSM_SUBMIT_FENCE_FD_OUT; - } - - /* needs to be after get_cmd() as that could create bos/cmds table: */ - req.bos = VOID2U64(msm_submit->submit_bos), - req.nr_bos = msm_submit->nr_submit_bos; - req.cmds = VOID2U64(cmds), - req.nr_cmds = primary->u.nr_cmds; - - DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos); - - ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT, - &req, sizeof(req)); - if (ret) { - ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno)); - msm_dump_submit(&req); - } else if (!ret) { - if (out_fence) - *out_fence = req.fence; - - if (out_fence_fd) - *out_fence_fd = req.fence_fd; - } - - return ret; -} - -static void -msm_submit_sp_destroy(struct fd_submit *submit) -{ - struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); - - if (msm_submit->primary) - fd_ringbuffer_del(msm_submit->primary); - if (msm_submit->suballoc_ring) - fd_ringbuffer_del(msm_submit->suballoc_ring); - - _mesa_hash_table_destroy(msm_submit->bo_table, NULL); - - // TODO it would be nice to have a way to debug_assert() if all - // rb's haven't been free'd back to the slab, because that is - // an indication that we are leaking bo's - slab_destroy(&msm_submit->ring_pool); - - for (unsigned i = 0; i < msm_submit->nr_bos; i++) - fd_bo_del(msm_submit->bos[i]); - - free(msm_submit->submit_bos); - free(msm_submit->bos); - free(msm_submit); -} - -static const struct fd_submit_funcs submit_funcs = { - .new_ringbuffer = msm_submit_sp_new_ringbuffer, - .flush = msm_submit_sp_flush, - .destroy = msm_submit_sp_destroy, -}; - -struct fd_submit * -msm_submit_sp_new(struct fd_pipe *pipe) -{ - struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit)); - struct fd_submit *submit; - static unsigned submit_cnt = 0; - - msm_submit->seqno = ++submit_cnt; - msm_submit->bo_table = _mesa_hash_table_create(NULL, - _mesa_hash_pointer, _mesa_key_pointer_equal); - // TODO tune size: - slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16); - - submit = &msm_submit->base; - submit->pipe = pipe; - submit->funcs = &submit_funcs; - - return submit; -} - - -static void -finalize_current_cmd(struct fd_ringbuffer *ring) -{ - debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT)); - - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - unsigned idx = APPEND(&msm_ring->u, cmds); - - msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo); - msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start); -} - -static void -msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size) -{ - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - struct fd_pipe *pipe = msm_ring->u.submit->pipe; - - debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE); - - finalize_current_cmd(ring); - - fd_bo_del(msm_ring->ring_bo); - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); - - ring->start = fd_bo_map(msm_ring->ring_bo); - ring->end = &(ring->start[size/4]); - ring->cur = ring->start; - ring->size = size; -} - -static void -msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring, - const struct fd_reloc *reloc) -{ - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - struct fd_pipe *pipe; - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - unsigned idx = APPEND(&msm_ring->u, reloc_bos); - - msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo); - msm_ring->u.reloc_bos[idx].flags = reloc->flags; - - pipe = msm_ring->u.pipe; - } else { - struct msm_submit_sp *msm_submit = - to_msm_submit_sp(msm_ring->u.submit); - - append_bo(msm_submit, reloc->bo, reloc->flags); - - pipe = msm_ring->u.submit->pipe; - } - - uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset; - uint32_t dword = iova; - int shift = reloc->shift; - - if (shift < 0) - dword >>= -shift; - else - dword <<= shift; - - (*ring->cur++) = dword | reloc->or; - - if (pipe->gpu_id >= 500) { - dword = iova >> 32; - shift -= 32; - - if (shift < 0) - dword >>= -shift; - else - dword <<= shift; - - (*ring->cur++) = dword | reloc->orhi; - } -} - -static uint32_t -msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring, - struct fd_ringbuffer *target, uint32_t cmd_idx) -{ - struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target); - struct fd_bo *bo; - uint32_t size; - - if ((target->flags & FD_RINGBUFFER_GROWABLE) && - (cmd_idx < msm_target->u.nr_cmds)) { - bo = msm_target->u.cmds[cmd_idx].ring_bo; - size = msm_target->u.cmds[cmd_idx].size; - } else { - bo = msm_target->ring_bo; - size = offset_bytes(target->cur, target->start); - } - - msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){ - .bo = bo, - .flags = FD_RELOC_READ, - .offset = msm_target->offset, - }); - - if ((target->flags & _FD_RINGBUFFER_OBJECT) && - !(ring->flags & _FD_RINGBUFFER_OBJECT)) { - // TODO it would be nice to know whether we have already - // seen this target before. But hopefully we hit the - // append_bo() fast path enough for this to not matter: - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit); - - for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) { - append_bo(msm_submit, msm_target->u.reloc_bos[i].bo, - msm_target->u.reloc_bos[i].flags); - } - } - - return size; -} - -static uint32_t -msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring) -{ - if (ring->flags & FD_RINGBUFFER_GROWABLE) - return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1; - return 1; -} - -static void -msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring) -{ - struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring); - - fd_bo_del(msm_ring->ring_bo); - - if (ring->flags & _FD_RINGBUFFER_OBJECT) { - for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) { - fd_bo_del(msm_ring->u.reloc_bos[i].bo); - } - - free(msm_ring); - } else { - struct fd_submit *submit = msm_ring->u.submit; - - for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) { - fd_bo_del(msm_ring->u.cmds[i].ring_bo); - } - - slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring); - } -} - -static const struct fd_ringbuffer_funcs ring_funcs = { - .grow = msm_ringbuffer_sp_grow, - .emit_reloc = msm_ringbuffer_sp_emit_reloc, - .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring, - .cmd_count = msm_ringbuffer_sp_cmd_count, - .destroy = msm_ringbuffer_sp_destroy, -}; - -static inline struct fd_ringbuffer * -msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size, - enum fd_ringbuffer_flags flags) -{ - struct fd_ringbuffer *ring = &msm_ring->base; - - debug_assert(msm_ring->ring_bo); - - uint8_t *base = fd_bo_map(msm_ring->ring_bo); - ring->start = (void *)(base + msm_ring->offset); - ring->end = &(ring->start[size/4]); - ring->cur = ring->start; - - ring->size = size; - ring->flags = flags; - - ring->funcs = &ring_funcs; - - // TODO initializing these could probably be conditional on flags - // since unneed for FD_RINGBUFFER_STAGING case.. - msm_ring->u.cmds = NULL; - msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0; - - msm_ring->u.reloc_bos = NULL; - msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0; - - return ring; -} - -struct fd_ringbuffer * -msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size) -{ - struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring)); - - msm_ring->u.pipe = pipe; - msm_ring->offset = 0; - msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0); - msm_ring->base.refcnt = 1; - - return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT); -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch.c 2019-03-31 23:16:37.000000000 +0000 @@ -76,23 +76,28 @@ batch->fence = fd_fence_create(batch); batch->cleared = 0; + batch->fast_cleared = 0; batch->invalidated = 0; batch->restore = batch->resolve = 0; batch->needs_flush = false; batch->flushed = false; batch->gmem_reason = 0; batch->num_draws = 0; + batch->num_vertices = 0; batch->stage = FD_STAGE_NULL; fd_reset_wfi(batch); util_dynarray_init(&batch->draw_patches, NULL); + if (is_a2xx(ctx->screen)) { + util_dynarray_init(&batch->shader_patches, NULL); + util_dynarray_init(&batch->gmem_patches, NULL); + } + if (is_a3xx(ctx->screen)) util_dynarray_init(&batch->rbrc_patches, NULL); - util_dynarray_init(&batch->gmem_patches, NULL); - assert(batch->resources->entries == 0); util_dynarray_init(&batch->samples, NULL); @@ -144,20 +149,34 @@ debug_assert(!batch->binning); debug_assert(!batch->gmem); } + if (batch->lrz_clear) { fd_ringbuffer_del(batch->lrz_clear); batch->lrz_clear = NULL; } + if (batch->tile_setup) { + fd_ringbuffer_del(batch->tile_setup); + batch->tile_setup = NULL; + } + + if (batch->tile_fini) { + fd_ringbuffer_del(batch->tile_fini); + batch->tile_fini = NULL; + } + fd_submit_del(batch->submit); util_dynarray_fini(&batch->draw_patches); + if (is_a2xx(batch->ctx->screen)) { + util_dynarray_fini(&batch->shader_patches); + util_dynarray_fini(&batch->gmem_patches); + } + if (is_a3xx(batch->ctx->screen)) util_dynarray_fini(&batch->rbrc_patches); - util_dynarray_fini(&batch->gmem_patches); - while (batch->samples.size > 0) { struct fd_hw_sample *samp = util_dynarray_pop(&batch->samples, struct fd_hw_sample *); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch_cache.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch_cache.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch_cache.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch_cache.c 2019-03-31 23:16:37.000000000 +0000 @@ -81,7 +81,8 @@ struct { struct pipe_resource *texture; union pipe_surface_desc u; - uint16_t pos, format; + uint8_t pos, samples; + uint16_t format; } surf[0]; }; @@ -401,6 +402,7 @@ key->surf[idx].texture = psurf->texture; key->surf[idx].u = psurf->u; key->surf[idx].pos = pos; + key->surf[idx].samples = MAX2(1, psurf->nr_samples); key->surf[idx].format = psurf->format; } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_batch.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_batch.h 2019-03-31 23:16:37.000000000 +0000 @@ -95,7 +95,7 @@ FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH, FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL, FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, - } invalidated, cleared, restore, resolve; + } invalidated, cleared, fast_cleared, restore, resolve; /* is this a non-draw batch (ie compute/blit which has no pfb state)? */ bool nondraw : 1; @@ -124,6 +124,7 @@ FD_GMEM_LOGICOP_ENABLED = 0x20, } gmem_reason; unsigned num_draws; /* number of draws in current batch */ + unsigned num_vertices; /* number of vertices in current batch */ /* Track the maximal bounds of the scissor of all the draws within a * batch. Used at the tile rendering step (fd_gmem_render_tiles(), @@ -136,11 +137,6 @@ */ struct util_dynarray draw_patches; - /* Keep track of blitter GMEM offsets that need to be patched up once we - * know the gmem layout: - */ - struct util_dynarray gmem_patches; - /* Keep track of writes to RB_RENDER_CONTROL which need to be patched * once we know whether or not to use GMEM, and GMEM tile pitch. * @@ -149,6 +145,18 @@ */ struct util_dynarray rbrc_patches; + /* Keep track of GMEM related values that need to be patched up once we + * know the gmem layout: + */ + struct util_dynarray gmem_patches; + + /* Keep track of pointer to start of MEM exports for a20x binning shaders + * + * this is so the end of the shader can be cut off at the right point + * depending on the GMEM configuration + */ + struct util_dynarray shader_patches; + struct pipe_framebuffer_state framebuffer; struct fd_submit *submit; @@ -162,6 +170,12 @@ // TODO maybe more generically split out clear and clear_binning rings? struct fd_ringbuffer *lrz_clear; + struct fd_ringbuffer *tile_setup; + struct fd_ringbuffer *tile_fini; + + union pipe_color_union clear_color[MAX_RENDER_TARGETS]; + double clear_depth; + unsigned clear_stencil; /** * hw query related state: diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_blitter.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_blitter.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_blitter.c 2017-12-19 17:21:05.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_blitter.c 2019-03-31 23:16:37.000000000 +0000 @@ -25,9 +25,11 @@ */ #include "util/u_blitter.h" +#include "util/u_surface.h" #include "freedreno_blitter.h" #include "freedreno_context.h" +#include "freedreno_resource.h" /* generic blit using u_blitter.. slightly modified version of util_blitter_blit * which also handles PIPE_BUFFER: @@ -80,7 +82,7 @@ src_templ->swizzle_a = PIPE_SWIZZLE_W; } -void +bool fd_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info) { struct pipe_resource *dst = info->dst.resource; @@ -88,6 +90,16 @@ struct pipe_context *pipe = &ctx->base; struct pipe_surface *dst_view, dst_templ; struct pipe_sampler_view src_templ, *src_view; + bool discard = false; + + if (!info->scissor_enable && !info->alpha_blend) { + discard = util_texrange_covers_whole_level(info->dst.resource, + info->dst.level, info->dst.box.x, info->dst.box.y, + info->dst.box.z, info->dst.box.width, + info->dst.box.height, info->dst.box.depth); + } + + fd_blitter_pipe_begin(ctx, info->render_condition_enable, discard, FD_STAGE_BLIT); /* Initialize the surface. */ default_dst_texture(&dst_templ, dst, info->dst.level, @@ -109,4 +121,97 @@ pipe_surface_reference(&dst_view, NULL); pipe_sampler_view_reference(&src_view, NULL); + + fd_blitter_pipe_end(ctx); + + /* The fallback blitter must never fail: */ + return true; +} + +/** + * _copy_region using pipe (3d engine) + */ +static bool +fd_blitter_pipe_copy_region(struct fd_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + /* not until we allow rendertargets to be buffers */ + if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER) + return false; + + if (!util_blitter_is_copy_supported(ctx->blitter, dst, src)) + return false; + + /* TODO we could discard if dst box covers dst level fully.. */ + fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT); + util_blitter_copy_texture(ctx->blitter, + dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + fd_blitter_pipe_end(ctx); + + return true; +} + +/** + * Copy a block of pixels from one resource to another. + * The resource must be of the same format. + */ +void +fd_resource_copy_region(struct pipe_context *pctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct fd_context *ctx = fd_context(pctx); + + if (ctx->blit) { + struct pipe_blit_info info; + + memset(&info, 0, sizeof info); + info.dst.resource = dst; + info.dst.level = dst_level; + info.dst.box.x = dstx; + info.dst.box.y = dsty; + info.dst.box.z = dstz; + info.dst.box.width = src_box->width; + info.dst.box.height = src_box->height; + assert(info.dst.box.width >= 0); + assert(info.dst.box.height >= 0); + info.dst.box.depth = 1; + info.dst.format = dst->format; + info.src.resource = src; + info.src.level = src_level; + info.src.box = *src_box; + info.src.format = src->format; + info.mask = util_format_get_mask(src->format); + info.filter = PIPE_TEX_FILTER_NEAREST; + info.scissor_enable = 0; + + if (ctx->blit(ctx, &info)) + return; + } + + /* TODO if we have 2d core, or other DMA engine that could be used + * for simple copies and reasonably easily synchronized with the 3d + * core, this is where we'd plug it in.. + */ + + /* try blit on 3d pipe: */ + if (fd_blitter_pipe_copy_region(ctx, + dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box)) + return; + + /* else fallback to pure sw: */ + util_resource_copy_region(pctx, + dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_blitter.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_blitter.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_blitter.h 2017-12-19 17:21:05.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_blitter.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,6 +31,14 @@ #include "freedreno_context.h" -void fd_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info); +bool fd_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info); + +void fd_resource_copy_region(struct pipe_context *pctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box); #endif /* FREEDRENO_BLIT_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_context.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_context.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -333,9 +333,6 @@ slab_create_child(&ctx->transfer_pool, &screen->transfer_pool); - if (!ctx->blit) - ctx->blit = fd_blitter_blit; - fd_draw_init(pctx); fd_resource_context_init(pctx); fd_query_context_init(pctx); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_context.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_context.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -56,14 +56,6 @@ struct fd_program_stateobj { void *vp, *fp; - - /* rest only used by fd2.. split out: */ - uint8_t num_exports; - /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index - * for TGSI_SEMANTIC_GENERIC. Special vs exports (position and point- - * size) are not included in this - */ - uint8_t export_linkage[63]; }; struct fd_constbuf_stateobj { @@ -289,6 +281,7 @@ struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple stipple; struct pipe_viewport_state viewport; + struct pipe_scissor_state viewport_scissor; struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; struct fd_shaderbuf_stateobj shaderbuf[PIPE_SHADER_TYPES]; struct fd_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES]; @@ -324,11 +317,11 @@ void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info); /* constant emit: (note currently not used/needed for a2xx) */ - void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type, + void (*emit_const)(struct fd_ringbuffer *ring, gl_shader_stage type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc); /* emit bo addresses as constant: */ - void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + void (*emit_const_bo)(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets); /* indirect-branch emit: */ @@ -342,7 +335,7 @@ void (*query_set_stage)(struct fd_batch *batch, enum fd_render_stage stage); /* blitter: */ - void (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info); + bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info); /* simple gpu "memcpy": */ void (*mem_to_mem)(struct fd_ringbuffer *ring, struct pipe_resource *dst, diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_draw.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_draw.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_draw.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -144,9 +144,13 @@ } else { batch->invalidated |= FD_BUFFER_DEPTH; } - buffers |= FD_BUFFER_DEPTH; - resource_written(batch, pfb->zsbuf->texture); batch->gmem_reason |= FD_GMEM_DEPTH_ENABLED; + if (fd_depth_write_enabled(ctx)) { + buffers |= FD_BUFFER_DEPTH; + resource_written(batch, pfb->zsbuf->texture); + } else { + resource_read(batch, pfb->zsbuf->texture); + } } if (fd_stencil_enabled(ctx)) { @@ -155,18 +159,9 @@ } else { batch->invalidated |= FD_BUFFER_STENCIL; } + batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED; buffers |= FD_BUFFER_STENCIL; resource_written(batch, pfb->zsbuf->texture); - batch->gmem_reason |= FD_GMEM_STENCIL_ENABLED; - } - } - - if (ctx->dirty & FD_DIRTY_FRAMEBUFFER) { - for (i = 0; i < pfb->nr_cbufs; i++) { - if (!pfb->cbufs[i]) - continue; - - resource_written(batch, pfb->cbufs[i]->texture); } } @@ -191,6 +186,9 @@ if (fd_blend_enabled(ctx, i)) batch->gmem_reason |= FD_GMEM_BLEND_ENABLED; + + if (ctx->dirty & FD_DIRTY_FRAMEBUFFER) + resource_written(batch, pfb->cbufs[i]->texture); } /* Mark SSBOs as being written.. we don't actually know which ones are @@ -291,6 +289,8 @@ if (ctx->draw_vbo(ctx, info, index_offset)) batch->needs_flush = true; + batch->num_vertices += info->count * info->instance_count; + for (i = 0; i < ctx->streamout.num_targets; i++) ctx->streamout.offsets[i] += info->count; @@ -410,7 +410,7 @@ * the depth buffer, etc) */ cleared_buffers = buffers & (FD_BUFFER_ALL & ~batch->restore); - batch->cleared |= cleared_buffers; + batch->cleared |= buffers; batch->invalidated |= cleared_buffers; batch->resolve |= buffers; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_draw.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_draw.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_draw.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_draw.h 2019-03-31 23:16:37.000000000 +0000 @@ -73,9 +73,33 @@ } if (is_a20x(batch->ctx->screen)) { - OUT_PKT3(ring, CP_DRAW_INDX, idx_buffer ? 4 : 2); + /* a20x has a different draw command for drawing with binning data + * note: if we do patching we will have to insert a NOP + * + * binning data is is 1 byte/vertex (8x8x4 bin position of vertex) + * base ptr set by the CP_SET_DRAW_INIT_FLAGS command + * + * TODO: investigate the faceness_cull_select parameter to see how + * it is used with hw binning to use "faceness" bits + */ + uint32_t size = 2; + if (vismode) + size += 2; + if (idx_buffer) + size += 2; + + BEGIN_RING(ring, size+1); + if (vismode) + util_dynarray_append(&batch->draw_patches, uint32_t*, ring->cur); + + OUT_PKT3(ring, vismode ? CP_DRAW_INDX_BIN : CP_DRAW_INDX, size); OUT_RING(ring, 0x00000000); - OUT_RING(ring, DRAW_A20X(primtype, src_sel, idx_type, vismode, count)); + OUT_RING(ring, DRAW_A20X(primtype, DI_FACE_CULL_NONE, src_sel, + idx_type, vismode, vismode, count)); + if (vismode == USE_VISIBILITY) { + OUT_RING(ring, batch->num_vertices); + OUT_RING(ring, count); + } } else { OUT_PKT3(ring, CP_DRAW_INDX, idx_buffer ? 5 : 3); OUT_RING(ring, 0x00000000); /* viz query info. */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_gmem.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_gmem.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_gmem.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_gmem.c 2019-03-31 23:16:37.000000000 +0000 @@ -77,24 +77,25 @@ static uint32_t total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2], - uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem) + uint32_t bin_w, uint32_t bin_h, uint32_t gmem_align, + struct fd_gmem_stateobj *gmem) { uint32_t total = 0, i; for (i = 0; i < MAX_RENDER_TARGETS; i++) { if (cbuf_cpp[i]) { - gmem->cbuf_base[i] = align(total, 0x4000); + gmem->cbuf_base[i] = align(total, gmem_align); total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h; } } if (zsbuf_cpp[0]) { - gmem->zsbuf_base[0] = align(total, 0x4000); + gmem->zsbuf_base[0] = align(total, gmem_align); total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h; } if (zsbuf_cpp[1]) { - gmem->zsbuf_base[1] = align(total, 0x4000); + gmem->zsbuf_base[1] = align(total, gmem_align); total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h; } @@ -116,11 +117,13 @@ uint32_t minx, miny, width, height; uint32_t nbins_x = 1, nbins_y = 1; uint32_t bin_w, bin_h; + uint32_t gmem_align = 0x4000; uint32_t max_width = bin_width(screen); uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0}; uint32_t i, j, t, xoff, yoff; uint32_t tpp_x, tpp_y; - bool has_zs = !!(batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)); + bool has_zs = !!(batch->gmem_reason & (FD_GMEM_DEPTH_ENABLED | + FD_GMEM_STENCIL_ENABLED | FD_GMEM_CLEARS_DEPTH_STENCIL)); int tile_n[npipes]; if (has_zs) { @@ -128,6 +131,10 @@ zsbuf_cpp[0] = rsc->cpp; if (rsc->stencil) zsbuf_cpp[1] = rsc->stencil->cpp; + } else { + /* we might have a zsbuf, but it isn't used */ + batch->restore &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); + batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); } for (i = 0; i < pfb->nr_cbufs; i++) { if (pfb->cbufs[i]) @@ -177,10 +184,18 @@ zsbuf_cpp[0], width, height); } + if (is_a20x(screen) && batch->cleared) { + /* under normal circumstances the requirement would be 4K + * but the fast clear path requires an alignment of 32K + */ + gmem_align = 0x8000; + } + /* then find a bin width/height that satisfies the memory * constraints: */ - while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) { + while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem_align, gmem) > + gmem_size) { if (bin_w > bin_h) { nbins_x++; bin_w = align(width / nbins_x, gmem_alignw); @@ -214,12 +229,21 @@ #define div_round_up(v, a) (((v) + (a) - 1) / (a)) /* figure out number of tiles per pipe: */ - tpp_x = tpp_y = 1; - while (div_round_up(nbins_y, tpp_y) > screen->num_vsc_pipes) - tpp_y += 2; - while ((div_round_up(nbins_y, tpp_y) * - div_round_up(nbins_x, tpp_x)) > screen->num_vsc_pipes) - tpp_x += 1; + if (is_a20x(ctx->screen)) { + /* for a20x we want to minimize the number of "pipes" + * binning data has 3 bits for x/y (8x8) but the edges are used to + * cull off-screen vertices with hw binning, so we have 6x6 pipes + */ + tpp_x = 6; + tpp_y = 6; + } else { + tpp_x = tpp_y = 1; + while (div_round_up(nbins_y, tpp_y) > screen->num_vsc_pipes) + tpp_y += 2; + while ((div_round_up(nbins_y, tpp_y) * + div_round_up(nbins_x, tpp_x)) > screen->num_vsc_pipes) + tpp_x += 1; + } gmem->maxpw = tpp_x; gmem->maxph = tpp_y; @@ -246,6 +270,9 @@ xoff += tpp_x; } + /* number of pipes to use for a20x */ + gmem->num_vsc_pipes = MAX2(1, i); + for (; i < npipes; i++) { struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i]; pipe->x = pipe->y = pipe->w = pipe->h = 0; @@ -280,11 +307,12 @@ /* pipe number: */ p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x); + assert(p < gmem->num_vsc_pipes); /* clip bin width: */ bw = MIN2(bin_w, minx + width - xoff); - - tile->n = tile_n[p]++; + tile->n = !is_a20x(ctx->screen) ? tile_n[p]++ : + ((i % tpp_y + 1) << 3 | (j % tpp_x + 1)); tile->p = p; tile->bin_w = bw; tile->bin_h = bh; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_gmem.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_gmem.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_gmem.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_gmem.h 2019-03-31 23:16:37.000000000 +0000 @@ -57,6 +57,7 @@ uint16_t minx, miny; uint16_t width, height; uint16_t maxpw, maxph; /* maximum pipe width/height */ + uint8_t num_vsc_pipes; /* number of pipes for a20x */ }; struct fd_batch; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_program.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_program.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -67,7 +67,7 @@ "VERT \n" "DCL IN[0] \n" "DCL IN[1] \n" - "DCL OUT[0], TEXCOORD[0] \n" + "DCL OUT[0], GENERIC[0] \n" "DCL OUT[1], POSITION \n" " 0: MOV OUT[0], IN[0] \n" " 0: MOV OUT[1], IN[1] \n" @@ -129,15 +129,14 @@ pctx->bind_fs_state = fd_fp_state_bind; pctx->bind_vs_state = fd_vp_state_bind; - // XXX for now, let a2xx keep it's own hand-rolled shaders - // for solid and blit progs: - if (ctx->screen->gpu_id < 300) - return; - ctx->solid_prog.fp = assemble_tgsi(pctx, solid_fp, true); ctx->solid_prog.vp = assemble_tgsi(pctx, solid_vp, false); ctx->blit_prog[0].vp = assemble_tgsi(pctx, blit_vp, false); ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1, false); + + if (ctx->screen->gpu_id < 300) + return; + for (i = 1; i < ctx->screen->max_rts; i++) { ctx->blit_prog[i].vp = ctx->blit_prog[0].vp; ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1, false); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_resource.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_resource.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_resource.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_resource.c 2019-03-31 23:16:37.000000000 +0000 @@ -35,6 +35,7 @@ #include "freedreno_resource.h" #include "freedreno_batch_cache.h" +#include "freedreno_blitter.h" #include "freedreno_fence.h" #include "freedreno_screen.h" #include "freedreno_surface.h" @@ -42,6 +43,7 @@ #include "freedreno_query_hw.h" #include "freedreno_util.h" +#include #include /* XXX this should go away, needed for 'struct winsys_handle' */ @@ -97,9 +99,12 @@ static void realloc_bo(struct fd_resource *rsc, uint32_t size) { + struct pipe_resource *prsc = &rsc->base; struct fd_screen *screen = fd_screen(rsc->base.screen); uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE | - DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */ + DRM_FREEDRENO_GEM_TYPE_KMEM | + COND(prsc->bind & PIPE_BIND_SCANOUT, DRM_FREEDRENO_GEM_SCANOUT); + /* TODO other flags? */ /* if we start using things other than write-combine, * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT @@ -108,7 +113,8 @@ if (rsc->bo) fd_bo_del(rsc->bo); - rsc->bo = fd_bo_new(screen->dev, size, flags); + rsc->bo = fd_bo_new(screen->dev, size, flags, "%ux%ux%u@%u:%x", + prsc->width0, prsc->height0, prsc->depth0, rsc->cpp, prsc->bind); rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno); util_range_set_empty(&rsc->valid_buffer_range); fd_bc_invalidate_resource(rsc, true); @@ -117,15 +123,15 @@ static void do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback) { + struct pipe_context *pctx = &ctx->base; + /* TODO size threshold too?? */ if (!fallback) { /* do blit on gpu: */ - fd_blitter_pipe_begin(ctx, false, true, FD_STAGE_BLIT); - ctx->blit(ctx, blit); - fd_blitter_pipe_end(ctx); + pctx->blit(pctx, blit); } else { /* do blit on cpu: */ - util_resource_copy_region(&ctx->base, + util_resource_copy_region(pctx, blit->dst.resource, blit->dst.level, blit->dst.box.x, blit->dst.box.y, blit->dst.box.z, blit->src.resource, blit->src.level, &blit->src.box); @@ -289,8 +295,16 @@ tmpl.width0 = box->width; tmpl.height0 = box->height; - tmpl.depth0 = box->depth; - tmpl.array_size = 1; + /* for array textures, box->depth is the array_size, otherwise + * for 3d textures, it is the depth: + */ + if (tmpl.array_size > 1) { + tmpl.array_size = box->depth; + tmpl.depth0 = 1; + } else { + tmpl.array_size = 1; + tmpl.depth0 = box->depth; + } tmpl.last_level = 0; tmpl.bind |= PIPE_BIND_LINEAR; @@ -342,17 +356,6 @@ do_blit(ctx, &blit, false); } -static unsigned -fd_resource_layer_offset(struct fd_resource *rsc, - struct fd_resource_slice *slice, - unsigned layer) -{ - if (rsc->layer_first) - return layer * rsc->layer_size; - else - return layer * slice->size0; -} - static void fd_resource_transfer_flush_region(struct pipe_context *pctx, struct pipe_transfer *ptrans, const struct pipe_box *box) @@ -496,7 +499,21 @@ if (usage & PIPE_TRANSFER_READ) { fd_blit_to_staging(ctx, trans); - fd_bo_cpu_prep(rsc->bo, ctx->pipe, DRM_FREEDRENO_PREP_READ); + + struct fd_batch *batch = NULL; + fd_batch_reference(&batch, staging_rsc->write_batch); + + /* we can't fd_bo_cpu_prep() until the blit to staging + * is submitted to kernel.. in that case write_batch + * wouldn't be NULL yet: + */ + if (batch) { + fd_batch_sync(batch); + fd_batch_reference(&batch, NULL); + } + + fd_bo_cpu_prep(staging_rsc->bo, ctx->pipe, + DRM_FREEDRENO_PREP_READ); } buf = fd_bo_map(staging_rsc->bo); @@ -621,10 +638,10 @@ } buf = fd_bo_map(rsc->bo); - offset = slice->offset + + offset = box->y / util_format_get_blockheight(format) * ptrans->stride + box->x / util_format_get_blockwidth(format) * rsc->cpp + - fd_resource_layer_offset(rsc, slice, box->z); + fd_resource_offset(rsc, level, box->z); if (usage & PIPE_TRANSFER_WRITE) rsc->valid = true; @@ -646,10 +663,23 @@ fd_bc_invalidate_resource(rsc, true); if (rsc->bo) fd_bo_del(rsc->bo); + if (rsc->scanout) + renderonly_scanout_destroy(rsc->scanout, fd_screen(pscreen)->ro); + util_range_destroy(&rsc->valid_buffer_range); FREE(rsc); } +static uint64_t +fd_resource_modifier(struct fd_resource *rsc) +{ + if (!rsc->tile_mode) + return DRM_FORMAT_MOD_LINEAR; + + /* TODO invent a modifier for tiled but not UBWC buffers: */ + return DRM_FORMAT_MOD_INVALID; +} + static boolean fd_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *pctx, @@ -659,7 +689,9 @@ { struct fd_resource *rsc = fd_resource(prsc); - return fd_screen_bo_get_handle(pscreen, rsc->bo, + handle->modifier = fd_resource_modifier(rsc); + + return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->scanout, rsc->slices[0].pitch * rsc->cpp, handle); } @@ -794,19 +826,65 @@ } } +static bool +find_modifier(uint64_t needle, const uint64_t *haystack, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (haystack[i] == needle) + return true; + } + + return false; +} + /** * Create a new texture object, using the given template info. */ static struct pipe_resource * -fd_resource_create(struct pipe_screen *pscreen, - const struct pipe_resource *tmpl) +fd_resource_create_with_modifiers(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl, + const uint64_t *modifiers, int count) { struct fd_screen *screen = fd_screen(pscreen); - struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); - struct pipe_resource *prsc = &rsc->base; + struct fd_resource *rsc; + struct pipe_resource *prsc; enum pipe_format format = tmpl->format; uint32_t size; + /* when using kmsro, scanout buffers are allocated on the display device + * create_with_modifiers() doesn't give us usage flags, so we have to + * assume that all calls with modifiers are scanout-possible + */ + if (screen->ro && + ((tmpl->bind & PIPE_BIND_SCANOUT) || + !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) { + struct pipe_resource scanout_templat = *tmpl; + struct renderonly_scanout *scanout; + struct winsys_handle handle; + + scanout = renderonly_scanout_for_resource(&scanout_templat, + screen->ro, &handle); + if (!scanout) + return NULL; + + renderonly_scanout_destroy(scanout, screen->ro); + + assert(handle.type == WINSYS_HANDLE_TYPE_FD); + rsc = fd_resource(pscreen->resource_from_handle(pscreen, tmpl, + &handle, + PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)); + close(handle.handle); + if (!rsc) + return NULL; + + return &rsc->base; + } + + rsc = CALLOC_STRUCT(fd_resource); + prsc = &rsc->base; + DBG("%p: target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, " "nr_samples=%u, usage=%u, bind=%x, flags=%x", prsc, tmpl->target, util_format_name(format), @@ -824,10 +902,26 @@ PIPE_BIND_LINEAR | \ PIPE_BIND_DISPLAY_TARGET) + bool linear = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count); + if (tmpl->bind & LINEAR) + linear = true; + + /* Normally, for non-shared buffers, allow buffer compression if + * not shared, otherwise only allow if QCOM_COMPRESSED modifier + * is requested: + * + * TODO we should probably also limit tiled in a similar way, + * except we don't have a format modifier for tiled. (We probably + * should.) + */ + bool allow_ubwc = find_modifier(DRM_FORMAT_MOD_INVALID, modifiers, count); + if (tmpl->bind & PIPE_BIND_SHARED) + allow_ubwc = find_modifier(DRM_FORMAT_MOD_QCOM_COMPRESSED, modifiers, count); + if (screen->tile_mode && (tmpl->target != PIPE_BUFFER) && (tmpl->bind & PIPE_BIND_SAMPLER_VIEW) && - !(tmpl->bind & LINEAR)) { + !linear) { rsc->tile_mode = screen->tile_mode(tmpl); } @@ -839,8 +933,7 @@ rsc->internal_format = format; rsc->cpp = util_format_get_blocksize(format); - prsc->nr_samples = MAX2(1, prsc->nr_samples); - rsc->cpp *= prsc->nr_samples; + rsc->cpp *= fd_resource_nr_samples(prsc); assert(rsc->cpp); @@ -851,6 +944,15 @@ DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */ unsigned lrz_pitch = align(DIV_ROUND_UP(tmpl->width0, 8), 64); unsigned lrz_height = DIV_ROUND_UP(tmpl->height0, 8); + + /* LRZ buffer is super-sampled: */ + switch (prsc->nr_samples) { + case 4: + lrz_pitch *= 2; + case 2: + lrz_height *= 2; + } + unsigned size = lrz_pitch * lrz_height * 2; size += 0x1000; /* for GRAS_LRZ_FAST_CLEAR_BUFFER */ @@ -858,11 +960,14 @@ rsc->lrz_height = lrz_height; rsc->lrz_width = lrz_pitch; rsc->lrz_pitch = lrz_pitch; - rsc->lrz = fd_bo_new(screen->dev, size, flags); + rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz"); } size = screen->setup_slices(rsc); + if (allow_ubwc && screen->fill_ubwc_buffer_sizes && rsc->tile_mode) + size += screen->fill_ubwc_buffer_sizes(rsc); + /* special case for hw-query buffer, which we need to allocate before we * know the size: */ @@ -887,6 +992,34 @@ return NULL; } +static struct pipe_resource * +fd_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *tmpl) +{ + const uint64_t mod = DRM_FORMAT_MOD_INVALID; + return fd_resource_create_with_modifiers(pscreen, tmpl, &mod, 1); +} + +static bool +is_supported_modifier(struct pipe_screen *pscreen, enum pipe_format pfmt, + uint64_t mod) +{ + int count; + + /* Get the count of supported modifiers: */ + pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, NULL, NULL, &count); + + /* Get the supported modifiers: */ + uint64_t modifiers[count]; + pscreen->query_dmabuf_modifiers(pscreen, pfmt, 0, modifiers, NULL, &count); + + for (int i = 0; i < count; i++) + if (modifiers[i] == mod) + return true; + + return false; +} + /** * Create a texture from a winsys_handle. The handle is often created in * another process by first creating a pipe texture and then calling @@ -897,6 +1030,7 @@ const struct pipe_resource *tmpl, struct winsys_handle *handle, unsigned usage) { + struct fd_screen *screen = fd_screen(pscreen); struct fd_resource *rsc = CALLOC_STRUCT(fd_resource); struct fd_resource_slice *slice = &rsc->slices[0]; struct pipe_resource *prsc = &rsc->base; @@ -924,9 +1058,9 @@ if (!rsc->bo) goto fail; - prsc->nr_samples = MAX2(1, prsc->nr_samples); rsc->internal_format = tmpl->format; - rsc->cpp = prsc->nr_samples * util_format_get_blocksize(tmpl->format); + rsc->cpp = util_format_get_blocksize(tmpl->format); + rsc->cpp *= fd_resource_nr_samples(prsc); slice->pitch = handle->stride / rsc->cpp; slice->offset = handle->offset; slice->size0 = handle->stride * prsc->height0; @@ -935,8 +1069,27 @@ (slice->pitch & (pitchalign - 1))) goto fail; + if (handle->modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) { + if (!is_supported_modifier(pscreen, tmpl->format, + DRM_FORMAT_MOD_QCOM_COMPRESSED)) { + DBG("bad modifier: %lx", handle->modifier); + goto fail; + } + debug_assert(screen->fill_ubwc_buffer_sizes); + screen->fill_ubwc_buffer_sizes(rsc); + } else if (handle->modifier && + (handle->modifier != DRM_FORMAT_MOD_INVALID)) { + goto fail; + } + assert(rsc->cpp); + if (screen->ro) { + rsc->scanout = + renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL); + /* failure is expected in some cases.. */ + } + return prsc; fail: @@ -944,68 +1097,6 @@ return NULL; } -/** - * _copy_region using pipe (3d engine) - */ -static bool -fd_blitter_pipe_copy_region(struct fd_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box) -{ - /* not until we allow rendertargets to be buffers */ - if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER) - return false; - - if (!util_blitter_is_copy_supported(ctx->blitter, dst, src)) - return false; - - /* TODO we could discard if dst box covers dst level fully.. */ - fd_blitter_pipe_begin(ctx, false, false, FD_STAGE_BLIT); - util_blitter_copy_texture(ctx->blitter, - dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); - fd_blitter_pipe_end(ctx); - - return true; -} - -/** - * Copy a block of pixels from one resource to another. - * The resource must be of the same format. - * Resources with nr_samples > 1 are not allowed. - */ -static void -fd_resource_copy_region(struct pipe_context *pctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box) -{ - struct fd_context *ctx = fd_context(pctx); - - /* TODO if we have 2d core, or other DMA engine that could be used - * for simple copies and reasonably easily synchronized with the 3d - * core, this is where we'd plug it in.. - */ - - /* try blit on 3d pipe: */ - if (fd_blitter_pipe_copy_region(ctx, - dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box)) - return; - - /* else fallback to pure sw: */ - util_resource_copy_region(pctx, - dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); -} - bool fd_render_condition_check(struct pipe_context *pctx) { @@ -1034,22 +1125,10 @@ { struct fd_context *ctx = fd_context(pctx); struct pipe_blit_info info = *blit_info; - bool discard = false; if (info.render_condition_enable && !fd_render_condition_check(pctx)) return; - if (!info.scissor_enable && !info.alpha_blend) { - discard = util_texrange_covers_whole_level(info.dst.resource, - info.dst.level, info.dst.box.x, info.dst.box.y, - info.dst.box.z, info.dst.box.width, - info.dst.box.height, info.dst.box.depth); - } - - if (util_try_blit_via_copy_region(pctx, &info)) { - return; /* done */ - } - if (info.mask & PIPE_MASK_S) { DBG("cannot blit stencil, skipping"); info.mask &= ~PIPE_MASK_S; @@ -1062,9 +1141,8 @@ return; } - fd_blitter_pipe_begin(ctx, info.render_condition_enable, discard, FD_STAGE_BLIT); - ctx->blit(ctx, &info); - fd_blitter_pipe_end(ctx); + if (!(ctx->blit && ctx->blit(ctx, &info))) + fd_blitter_blit(ctx, &info); } void @@ -1116,24 +1194,30 @@ static void fd_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) { + struct fd_context *ctx = fd_context(pctx); struct fd_resource *rsc = fd_resource(prsc); /* * TODO I guess we could track that the resource is invalidated and * use that as a hint to realloc rather than stall in _transfer_map(), * even in the non-DISCARD_WHOLE_RESOURCE case? + * + * Note: we set dirty bits to trigger invalidate logic fd_draw_vbo */ if (rsc->write_batch) { struct fd_batch *batch = rsc->write_batch; struct pipe_framebuffer_state *pfb = &batch->framebuffer; - if (pfb->zsbuf && pfb->zsbuf->texture == prsc) + if (pfb->zsbuf && pfb->zsbuf->texture == prsc) { batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); + ctx->dirty |= FD_DIRTY_ZSA; + } for (unsigned i = 0; i < pfb->nr_cbufs; i++) { if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) { batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i); + ctx->dirty |= FD_DIRTY_FRAMEBUFFER; } } } @@ -1181,6 +1265,10 @@ bool fake_rgtc = screen->gpu_id < 400; pscreen->resource_create = u_transfer_helper_resource_create; + /* NOTE: u_transfer_helper does not yet support the _with_modifiers() + * variant: + */ + pscreen->resource_create_with_modifiers = fd_resource_create_with_modifiers; pscreen->resource_from_handle = fd_resource_from_handle; pscreen->resource_get_handle = fd_resource_get_handle; pscreen->resource_destroy = u_transfer_helper_resource_destroy; @@ -1192,6 +1280,50 @@ screen->setup_slices = fd_setup_slices; } +static void +fd_get_sample_position(struct pipe_context *context, + unsigned sample_count, unsigned sample_index, + float *pos_out) +{ + /* The following is copied from nouveau/nv50 except for position + * values, which are taken from blob driver */ + static const uint8_t pos1[1][2] = { { 0x8, 0x8 } }; + static const uint8_t pos2[2][2] = { + { 0xc, 0xc }, { 0x4, 0x4 } }; + static const uint8_t pos4[4][2] = { + { 0x6, 0x2 }, { 0xe, 0x6 }, + { 0x2, 0xa }, { 0xa, 0xe } }; + /* TODO needs to be verified on supported hw */ + static const uint8_t pos8[8][2] = { + { 0x9, 0x5 }, { 0x7, 0xb }, + { 0xd, 0x9 }, { 0x5, 0x3 }, + { 0x3, 0xd }, { 0x1, 0x7 }, + { 0xb, 0xf }, { 0xf, 0x1 } }; + + const uint8_t (*ptr)[2]; + + switch (sample_count) { + case 1: + ptr = pos1; + break; + case 2: + ptr = pos2; + break; + case 4: + ptr = pos4; + break; + case 8: + ptr = pos8; + break; + default: + assert(0); + return; + } + + pos_out[0] = ptr[sample_index][0] / 16.0f; + pos_out[1] = ptr[sample_index][1] / 16.0f; +} + void fd_resource_context_init(struct pipe_context *pctx) { @@ -1206,4 +1338,5 @@ pctx->blit = fd_blit; pctx->flush_resource = fd_flush_resource; pctx->invalidate_resource = fd_invalidate_resource; + pctx->get_sample_position = fd_get_sample_position; } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_resource.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_resource.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_resource.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_resource.h 2019-03-31 23:16:37.000000000 +0000 @@ -41,7 +41,7 @@ * programmed with the start address of each mipmap level, and hw * derives the layer offset within the level. * - * Texture Layout on a4xx: + * Texture Layout on a4xx+: * * For cubemap and 2d array, each layer contains all of it's mipmap * levels (layer_first layout). @@ -72,6 +72,7 @@ /* buffer range that has been initialized */ struct util_range valid_buffer_range; bool valid; + struct renderonly_scanout *scanout; /* reference to the resource holding stencil data for a z32_s8 texture */ /* TODO rename to secondary or auxiliary? */ @@ -99,7 +100,6 @@ uint16_t seqno; unsigned tile_mode : 2; - unsigned preferred_tile_mode : 2; /* * LRZ @@ -178,6 +178,15 @@ return false; } +/* access # of samples, with 0 normalized to 1 (which is what we care about + * most of the time) + */ +static inline unsigned +fd_resource_nr_samples(struct pipe_resource *prsc) +{ + return MAX2(1, prsc->nr_samples); +} + void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard, enum fd_render_stage stage); void fd_blitter_pipe_end(struct fd_context *ctx); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_screen.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_screen.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -39,6 +39,7 @@ #include "util/os_time.h" +#include #include #include #include @@ -58,13 +59,14 @@ #include "ir3/ir3_nir.h" +#include "a2xx/ir2.h" /* XXX this should go away */ #include "state_tracker/drm_driver.h" static const struct debug_named_value debug_options[] = { {"msgs", FD_DBG_MSGS, "Print debug messages"}, - {"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"}, + {"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly (a2xx only, see IR3_SHADER_DEBUG)"}, {"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"}, {"ddraw", FD_DBG_DDRAW, "Mark all state dirty after draw"}, {"noscis", FD_DBG_NOSCIS, "Disable scissor optimization"}, @@ -72,7 +74,6 @@ {"nobypass", FD_DBG_NOBYPASS, "Disable GMEM bypass"}, {"fraghalf", FD_DBG_FRAGHALF, "Use half-precision in fragment shader"}, {"nobin", FD_DBG_NOBIN, "Disable hw binning"}, - {"optmsgs", FD_DBG_OPTMSGS,"Enable optimizer debug messages"}, {"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 1.20 (rather than 1.30) on a3xx+"}, {"shaderdb", FD_DBG_SHADERDB, "Enable shaderdb output"}, {"flush", FD_DBG_FLUSH, "Force flush after every draw"}, @@ -96,17 +97,6 @@ bool fd_binning_enabled = true; static bool glsl120 = false; -static const struct debug_named_value shader_debug_options[] = { - {"vs", FD_DBG_SHADER_VS, "Print shader disasm for vertex shaders"}, - {"fs", FD_DBG_SHADER_FS, "Print shader disasm for fragment shaders"}, - {"cs", FD_DBG_SHADER_CS, "Print shader disasm for compute shaders"}, - DEBUG_NAMED_VALUE_END -}; - -DEBUG_GET_ONCE_FLAGS_OPTION(fd_shader_debug, "FD_SHADER_DEBUG", shader_debug_options, 0) - -enum fd_shader_debug fd_shader_debug = 0; - static const char * fd_screen_get_name(struct pipe_screen *pscreen) { @@ -157,6 +147,9 @@ if (screen->dev) fd_device_del(screen->dev); + if (screen->ro) + FREE(screen->ro); + fd_bc_fini(&screen->batch_cache); slab_destroy_parent(&screen->transfer_pool); @@ -237,6 +230,9 @@ case PIPE_CAP_TEXTURE_MULTISAMPLE: return is_a5xx(screen) || is_a6xx(screen); + case PIPE_CAP_SURFACE_SAMPLE_COUNT: + return is_a6xx(screen); + case PIPE_CAP_DEPTH_CLIP_DISABLE: return is_a3xx(screen) || is_a4xx(screen); @@ -321,6 +317,9 @@ case PIPE_CAP_MAX_VIEWPORTS: return 1; + case PIPE_CAP_MAX_VARYINGS: + return 16; + case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: /* manage the variants for these ourself, to avoid breaking precompile: */ @@ -505,16 +504,9 @@ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return 16; case PIPE_SHADER_CAP_PREFERRED_IR: - if (is_ir3(screen)) - return PIPE_SHADER_IR_NIR; - return PIPE_SHADER_IR_TGSI; + return PIPE_SHADER_IR_NIR; case PIPE_SHADER_CAP_SUPPORTED_IRS: - if (is_ir3(screen)) { - return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI); - } else { - return (1 << PIPE_SHADER_IR_TGSI); - } - return 0; + return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI); case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_SCALAR_ISA: @@ -645,12 +637,13 @@ if (is_ir3(screen)) return ir3_get_compiler_options(screen->compiler); - return NULL; + return ir2_get_compiler_options(); } boolean fd_screen_bo_get_handle(struct pipe_screen *pscreen, struct fd_bo *bo, + struct renderonly_scanout *scanout, unsigned stride, struct winsys_handle *whandle) { @@ -659,6 +652,8 @@ if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) { return fd_bo_get_name(bo, &whandle->handle) == 0; } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) { + if (renderonly_get_handle(scanout, whandle)) + return TRUE; whandle->handle = fd_bo_handle(bo); return TRUE; } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) { @@ -669,6 +664,37 @@ } } +static void +fd_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen, + enum pipe_format format, + int max, uint64_t *modifiers, + unsigned int *external_only, + int *count) +{ + struct fd_screen *screen = fd_screen(pscreen); + int i, num = 0; + + max = MIN2(max, screen->num_supported_modifiers); + + if (!max) { + max = screen->num_supported_modifiers; + external_only = NULL; + modifiers = NULL; + } + + for (i = 0; i < max; i++) { + if (modifiers) + modifiers[num] = screen->supported_modifiers[i]; + + if (external_only) + external_only[num] = 0; + + num++; + } + + *count = num; +} + struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen, struct winsys_handle *whandle) @@ -696,14 +722,13 @@ } struct pipe_screen * -fd_screen_create(struct fd_device *dev) +fd_screen_create(struct fd_device *dev, struct renderonly *ro) { struct fd_screen *screen = CALLOC_STRUCT(fd_screen); struct pipe_screen *pscreen; uint64_t val; fd_mesa_debug = debug_get_option_fd_mesa_debug(); - fd_shader_debug = debug_get_option_fd_shader_debug(); if (fd_mesa_debug & FD_DBG_NOBIN) fd_binning_enabled = false; @@ -718,6 +743,14 @@ screen->dev = dev; screen->refcnt = 1; + if (ro) { + screen->ro = renderonly_dup(ro); + if (!screen->ro) { + DBG("could not create renderonly object"); + goto fail; + } + } + // maybe this should be in context? screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D); if (!screen->pipe) { @@ -796,6 +829,8 @@ * send a patch ;-) */ switch (screen->gpu_id) { + case 200: + case 201: case 205: case 220: fd2_screen_init(pscreen); @@ -867,6 +902,17 @@ pscreen->fence_finish = fd_fence_finish; pscreen->fence_get_fd = fd_fence_get_fd; + pscreen->query_dmabuf_modifiers = fd_screen_query_dmabuf_modifiers; + + if (!screen->supported_modifiers) { + static const uint64_t supported_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + }; + + screen->supported_modifiers = supported_modifiers; + screen->num_supported_modifiers = ARRAY_SIZE(supported_modifiers); + } + slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16); return pscreen; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_screen.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_screen.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_screen.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_screen.h 2019-03-31 23:16:37.000000000 +0000 @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/slab.h" #include "os/os_thread.h" +#include "renderonly/renderonly.h" #include "freedreno_batch_cache.h" #include "freedreno_perfcntr.h" @@ -87,6 +88,7 @@ */ struct fd_pipe *pipe; + uint32_t (*fill_ubwc_buffer_sizes)(struct fd_resource *rsc); uint32_t (*setup_slices)(struct fd_resource *rsc); unsigned (*tile_mode)(const struct pipe_resource *prsc); @@ -97,6 +99,11 @@ bool reorder; uint16_t rsc_seqno; + + unsigned num_supported_modifiers; + const uint64_t *supported_modifiers; + + struct renderonly *ro; }; static inline struct fd_screen * @@ -107,12 +114,14 @@ boolean fd_screen_bo_get_handle(struct pipe_screen *pscreen, struct fd_bo *bo, + struct renderonly_scanout *scanout, unsigned stride, struct winsys_handle *whandle); struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen, struct winsys_handle *whandle); -struct pipe_screen * fd_screen_create(struct fd_device *dev); +struct pipe_screen * +fd_screen_create(struct fd_device *dev, struct renderonly *ro); static inline boolean is_a20x(struct fd_screen *screen) @@ -120,6 +129,12 @@ return (screen->gpu_id >= 200) && (screen->gpu_id < 210); } +static inline boolean +is_a2xx(struct fd_screen *screen) +{ + return (screen->gpu_id >= 200) && (screen->gpu_id < 300); +} + /* is a3xx patch revision 0? */ /* TODO a306.0 probably doesn't need this.. be more clever?? */ static inline boolean diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_state.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_state.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_state.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -288,7 +288,36 @@ const struct pipe_viewport_state *viewport) { struct fd_context *ctx = fd_context(pctx); + struct pipe_scissor_state *scissor = &ctx->viewport_scissor; + float minx, miny, maxx, maxy; + ctx->viewport = *viewport; + + /* see si_get_scissor_from_viewport(): */ + + /* Convert (-1, -1) and (1, 1) from clip space into window space. */ + minx = -viewport->scale[0] + viewport->translate[0]; + miny = -viewport->scale[1] + viewport->translate[1]; + maxx = viewport->scale[0] + viewport->translate[0]; + maxy = viewport->scale[1] + viewport->translate[1]; + + /* Handle inverted viewports. */ + if (minx > maxx) { + swap(minx, maxx); + } + if (miny > maxy) { + swap(miny, maxy); + } + + debug_assert(miny >= 0); + debug_assert(maxy >= 0); + + /* Convert to integer and round up the max bounds. */ + scissor->minx = minx; + scissor->miny = miny; + scissor->maxx = ceilf(maxx); + scissor->maxy = ceilf(maxy); + ctx->dirty |= FD_DIRTY_VIEWPORT; } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_state.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_state.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_state.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_state.h 2019-03-31 23:16:37.000000000 +0000 @@ -35,6 +35,11 @@ return ctx->zsa && ctx->zsa->depth.enabled; } +static inline bool fd_depth_write_enabled(struct fd_context *ctx) +{ + return ctx->zsa && ctx->zsa->depth.writemask; +} + static inline bool fd_stencil_enabled(struct fd_context *ctx) { return ctx->zsa && ctx->zsa->stencil[0].enabled; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_surface.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_surface.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_surface.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_surface.c 2019-03-31 23:16:37.000000000 +0000 @@ -53,6 +53,7 @@ psurf->format = surf_tmpl->format; psurf->width = u_minify(ptex->width0, level); psurf->height = u_minify(ptex->height0, level); + psurf->nr_samples = surf_tmpl->nr_samples; if (ptex->target == PIPE_BUFFER) { psurf->u.buf.first_element = surf_tmpl->u.buf.first_element; diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_surface.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_surface.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_surface.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_surface.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,11 +31,6 @@ struct fd_surface { struct pipe_surface base; - uint32_t offset; - uint32_t pitch; - uint32_t width; - uint16_t height; - uint16_t depth; }; static inline struct fd_surface * diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_texture.c mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_texture.c --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_texture.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -31,6 +31,7 @@ #include "freedreno_texture.h" #include "freedreno_context.h" +#include "freedreno_resource.h" #include "freedreno_util.h" static void @@ -83,7 +84,7 @@ tex->num_textures = util_last_bit(tex->valid_textures); for (i = 0; i < tex->num_textures; i++) { - uint nr_samples = tex->textures[i]->texture->nr_samples; + uint nr_samples = fd_resource_nr_samples(tex->textures[i]->texture); samplers |= (nr_samples >> 1) << (i * 2); } diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_util.h mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_util.h --- mesa-18.3.3/src/gallium/drivers/freedreno/freedreno_util.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/freedreno_util.h 2019-03-31 23:16:37.000000000 +0000 @@ -70,7 +70,7 @@ #define FD_DBG_NOBYPASS 0x0040 #define FD_DBG_FRAGHALF 0x0080 #define FD_DBG_NOBIN 0x0100 -#define FD_DBG_OPTMSGS 0x0200 +/* unused 0x0200 */ #define FD_DBG_GLSL120 0x0400 #define FD_DBG_SHADERDB 0x0800 #define FD_DBG_FLUSH 0x1000 @@ -114,15 +114,19 @@ } static inline uint32_t DRAW_A20X(enum pc_di_primtype prim_type, + enum pc_di_face_cull_sel faceness_cull_select, enum pc_di_src_sel source_select, enum pc_di_index_size index_size, - enum pc_di_vis_cull_mode vis_cull_mode, + bool pre_fetch_cull_enable, + bool grp_cull_enable, uint16_t count) { return (prim_type << 0) | (source_select << 6) | + (faceness_cull_select << 8) | ((index_size & 1) << 11) | ((index_size >> 1) << 13) | - (vis_cull_mode << 9) | + (pre_fetch_cull_enable << 14) | + (grp_cull_enable << 15) | (count << 16); } @@ -194,6 +198,18 @@ return true; } +/* Note sure if this is same on all gens, but seems to be same on the later + * gen's + */ +static inline unsigned +fd_calc_guardband(unsigned x) +{ + float l = log2(x); + if (l <= 8) + return 511; + return 511 - ((l - 8) * 65); +} + #define LOG_DWORDS 0 static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx); @@ -228,8 +244,8 @@ */ static inline void -OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift) +__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint64_t or, int32_t shift, uint32_t flags) { if (LOG_DWORDS) { DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, @@ -238,7 +254,7 @@ debug_assert(offset < fd_bo_size(bo)); fd_ringbuffer_reloc(ring, &(struct fd_reloc){ .bo = bo, - .flags = FD_RELOC_READ, + .flags = flags, .offset = offset, .or = or, .shift = shift, @@ -247,22 +263,24 @@ } static inline void +OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint64_t or, int32_t shift) +{ + __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ); +} + +static inline void OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset, uint64_t or, int32_t shift) { - if (LOG_DWORDS) { - DBG("ring[%p]: OUT_RELOCW %04x: %p+%u << %d", ring, - (uint32_t)(ring->cur - ring->start), bo, offset, shift); - } - debug_assert(offset < fd_bo_size(bo)); - fd_ringbuffer_reloc(ring, &(struct fd_reloc){ - .bo = bo, - .flags = FD_RELOC_READ | FD_RELOC_WRITE, - .offset = offset, - .or = or, - .shift = shift, - .orhi = or >> 32, - }); + __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE); +} + +static inline void +OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint64_t or, int32_t shift) +{ + __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP); } static inline void @@ -411,18 +429,6 @@ OUT_RING(ring, ++marker_cnt); } -/* helper to get numeric value from environment variable.. mostly - * just leaving this here because it is helpful to brute-force figure - * out unknown formats, etc, which blob driver does not support: - */ -static inline uint32_t env2u(const char *envvar) -{ - char *str = getenv(envvar); - if (str) - return strtoul(str, NULL, 0); - return 0; -} - static inline uint32_t pack_rgba(enum pipe_format format, const float *rgba) { @@ -453,9 +459,11 @@ switch (samples) { default: debug_assert(0); + case 0: case 1: return MSAA_ONE; case 2: return MSAA_TWO; case 4: return MSAA_FOUR; + case 8: return MSAA_EIGHT; } } @@ -464,14 +472,15 @@ */ static inline enum a4xx_state_block -fd4_stage2shadersb(enum shader_t type) +fd4_stage2shadersb(gl_shader_stage type) { switch (type) { - case SHADER_VERTEX: + case MESA_SHADER_VERTEX: return SB4_VS_SHADER; - case SHADER_FRAGMENT: + case MESA_SHADER_FRAGMENT: return SB4_FS_SHADER; - case SHADER_COMPUTE: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: return SB4_CS_SHADER; default: unreachable("bad shader type"); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1033 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "disasm.h" -#include "instr-a3xx.h" - -static enum debug_t debug; - -#define printf debug_printf - -static const char *levels[] = { - "", - "\t", - "\t\t", - "\t\t\t", - "\t\t\t\t", - "\t\t\t\t\t", - "\t\t\t\t\t\t", - "\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t\t", - "x", - "x", - "x", - "x", - "x", - "x", -}; - -static const char *component = "xyzw"; - -static const char *type[] = { - [TYPE_F16] = "f16", - [TYPE_F32] = "f32", - [TYPE_U16] = "u16", - [TYPE_U32] = "u32", - [TYPE_S16] = "s16", - [TYPE_S32] = "s32", - [TYPE_U8] = "u8", - [TYPE_S8] = "s8", -}; - -struct disasm_ctx { - FILE *out; - int level; - - /* current instruction repeat flag: */ - unsigned repeat; -}; - -static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, bool r, - bool c, bool im, bool neg, bool abs, bool addr_rel) -{ - const char type = c ? 'c' : 'r'; - - // XXX I prefer - and || for neg/abs, but preserving format used - // by libllvm-a3xx for easy diffing.. - - if (abs && neg) - fprintf(ctx->out, "(absneg)"); - else if (neg) - fprintf(ctx->out, "(neg)"); - else if (abs) - fprintf(ctx->out, "(abs)"); - - if (r) - fprintf(ctx->out, "(r)"); - - if (im) { - fprintf(ctx->out, "%d", reg.iim_val); - } else if (addr_rel) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - if (reg.iim_val < 0) - fprintf(ctx->out, "%s%c", full ? "" : "h", type, -reg.iim_val); - else if (reg.iim_val > 0) - fprintf(ctx->out, "%s%c", full ? "" : "h", type, reg.iim_val); - else - fprintf(ctx->out, "%s%c", full ? "" : "h", type); - } else if ((reg.num == REG_A0) && !c) { - fprintf(ctx->out, "a0.%c", component[reg.comp]); - } else if ((reg.num == REG_P0) && !c) { - fprintf(ctx->out, "p0.%c", component[reg.comp]); - } else { - fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num & 0x3f, component[reg.comp]); - } -} - - -static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel) -{ - print_reg(ctx, reg, full, false, false, false, false, false, addr_rel); -} - -static void print_reg_src(struct disasm_ctx *ctx, reg_t reg, bool full, bool r, - bool c, bool im, bool neg, bool abs, bool addr_rel) -{ - print_reg(ctx, reg, full, r, c, im, neg, abs, addr_rel); -} - -/* TODO switch to using reginfo struct everywhere, since more readable - * than passing a bunch of bools to print_reg_src - */ - -struct reginfo { - reg_t reg; - bool full; - bool r; - bool c; - bool im; - bool neg; - bool abs; - bool addr_rel; -}; - -static void print_src(struct disasm_ctx *ctx, struct reginfo *info) -{ - print_reg_src(ctx, info->reg, info->full, info->r, info->c, info->im, - info->neg, info->abs, info->addr_rel); -} - -//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info) -//{ -// print_reg_dst(ctx, info->reg, info->full, info->addr_rel); -//} - -static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat0_t *cat0 = &instr->cat0; - - switch (cat0->opc) { - case OPC_KILL: - fprintf(ctx->out, " %sp0.%c", cat0->inv ? "!" : "", - component[cat0->comp]); - break; - case OPC_BR: - fprintf(ctx->out, " %sp0.%c, #%d", cat0->inv ? "!" : "", - component[cat0->comp], cat0->a3xx.immed); - break; - case OPC_JUMP: - case OPC_CALL: - fprintf(ctx->out, " #%d", cat0->a3xx.immed); - break; - } - - if ((debug & PRINT_VERBOSE) && (cat0->dummy2|cat0->dummy3|cat0->dummy4)) - fprintf(ctx->out, "\t{0: %x,%x,%x}", cat0->dummy2, cat0->dummy3, cat0->dummy4); -} - -static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat1_t *cat1 = &instr->cat1; - - if (cat1->ul) - fprintf(ctx->out, "(ul)"); - - if (cat1->src_type == cat1->dst_type) { - if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) { - /* special case (nmemonic?): */ - fprintf(ctx->out, "mova"); - } else { - fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - } else { - fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - - fprintf(ctx->out, " "); - - if (cat1->even) - fprintf(ctx->out, "(even)"); - - if (cat1->pos_inf) - fprintf(ctx->out, "(pos_infinity)"); - - print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, - cat1->dst_rel); - - fprintf(ctx->out, ", "); - - /* ugg, have to special case this.. vs print_reg().. */ - if (cat1->src_im) { - if (type_float(cat1->src_type)) - fprintf(ctx->out, "(%f)", cat1->fim_val); - else if (type_uint(cat1->src_type)) - fprintf(ctx->out, "0x%08x", cat1->uim_val); - else - fprintf(ctx->out, "%d", cat1->iim_val); - } else if (cat1->src_rel && !cat1->src_c) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - char type = cat1->src_rel_c ? 'c' : 'r'; - if (cat1->off < 0) - fprintf(ctx->out, "%c", type, -cat1->off); - else if (cat1->off > 0) - fprintf(ctx->out, "%c", type, cat1->off); - else - fprintf(ctx->out, "%c", type); - } else { - print_reg_src(ctx, (reg_t)(cat1->src), type_size(cat1->src_type) == 32, - cat1->src_r, cat1->src_c, cat1->src_im, false, false, false); - } - - if ((debug & PRINT_VERBOSE) && (cat1->must_be_0)) - fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0); -} - -static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat2_t *cat2 = &instr->cat2; - static const char *cond[] = { - "lt", - "le", - "gt", - "ge", - "eq", - "ne", - "?6?", - }; - - switch (_OPC(2, cat2->opc)) { - case OPC_CMPS_F: - case OPC_CMPS_U: - case OPC_CMPS_S: - case OPC_CMPV_F: - case OPC_CMPV_U: - case OPC_CMPV_S: - fprintf(ctx->out, ".%s", cond[cat2->cond]); - break; - } - - fprintf(ctx->out, " "); - if (cat2->ei) - fprintf(ctx->out, "(ei)"); - print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false); - fprintf(ctx->out, ", "); - - if (cat2->c1.src1_c) { - print_reg_src(ctx, (reg_t)(cat2->c1.src1), cat2->full, cat2->src1_r, - cat2->c1.src1_c, cat2->src1_im, cat2->src1_neg, - cat2->src1_abs, false); - } else if (cat2->rel1.src1_rel) { - print_reg_src(ctx, (reg_t)(cat2->rel1.src1), cat2->full, cat2->src1_r, - cat2->rel1.src1_c, cat2->src1_im, cat2->src1_neg, - cat2->src1_abs, cat2->rel1.src1_rel); - } else { - print_reg_src(ctx, (reg_t)(cat2->src1), cat2->full, cat2->src1_r, - false, cat2->src1_im, cat2->src1_neg, - cat2->src1_abs, false); - } - - switch (_OPC(2, cat2->opc)) { - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - case OPC_CLZ_B: - case OPC_CLZ_S: - case OPC_SIGN_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_NOT_B: - case OPC_BFREV_B: - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ - break; - default: - fprintf(ctx->out, ", "); - if (cat2->c2.src2_c) { - print_reg_src(ctx, (reg_t)(cat2->c2.src2), cat2->full, cat2->src2_r, - cat2->c2.src2_c, cat2->src2_im, cat2->src2_neg, - cat2->src2_abs, false); - } else if (cat2->rel2.src2_rel) { - print_reg_src(ctx, (reg_t)(cat2->rel2.src2), cat2->full, cat2->src2_r, - cat2->rel2.src2_c, cat2->src2_im, cat2->src2_neg, - cat2->src2_abs, cat2->rel2.src2_rel); - } else { - print_reg_src(ctx, (reg_t)(cat2->src2), cat2->full, cat2->src2_r, - false, cat2->src2_im, cat2->src2_neg, - cat2->src2_abs, false); - } - break; - } -} - -static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat3_t *cat3 = &instr->cat3; - bool full = instr_cat3_full(cat3); - - fprintf(ctx->out, " "); - print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false); - fprintf(ctx->out, ", "); - if (cat3->c1.src1_c) { - print_reg_src(ctx, (reg_t)(cat3->c1.src1), full, - cat3->src1_r, cat3->c1.src1_c, false, cat3->src1_neg, - false, false); - } else if (cat3->rel1.src1_rel) { - print_reg_src(ctx, (reg_t)(cat3->rel1.src1), full, - cat3->src1_r, cat3->rel1.src1_c, false, cat3->src1_neg, - false, cat3->rel1.src1_rel); - } else { - print_reg_src(ctx, (reg_t)(cat3->src1), full, - cat3->src1_r, false, false, cat3->src1_neg, - false, false); - } - fprintf(ctx->out, ", "); - print_reg_src(ctx, (reg_t)cat3->src2, full, - cat3->src2_r, cat3->src2_c, false, cat3->src2_neg, - false, false); - fprintf(ctx->out, ", "); - if (cat3->c2.src3_c) { - print_reg_src(ctx, (reg_t)(cat3->c2.src3), full, - cat3->src3_r, cat3->c2.src3_c, false, cat3->src3_neg, - false, false); - } else if (cat3->rel2.src3_rel) { - print_reg_src(ctx, (reg_t)(cat3->rel2.src3), full, - cat3->src3_r, cat3->rel2.src3_c, false, cat3->src3_neg, - false, cat3->rel2.src3_rel); - } else { - print_reg_src(ctx, (reg_t)(cat3->src3), full, - cat3->src3_r, false, false, cat3->src3_neg, - false, false); - } -} - -static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat4_t *cat4 = &instr->cat4; - - fprintf(ctx->out, " "); - print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false); - fprintf(ctx->out, ", "); - - if (cat4->c.src_c) { - print_reg_src(ctx, (reg_t)(cat4->c.src), cat4->full, - cat4->src_r, cat4->c.src_c, cat4->src_im, - cat4->src_neg, cat4->src_abs, false); - } else if (cat4->rel.src_rel) { - print_reg_src(ctx, (reg_t)(cat4->rel.src), cat4->full, - cat4->src_r, cat4->rel.src_c, cat4->src_im, - cat4->src_neg, cat4->src_abs, cat4->rel.src_rel); - } else { - print_reg_src(ctx, (reg_t)(cat4->src), cat4->full, - cat4->src_r, false, cat4->src_im, - cat4->src_neg, cat4->src_abs, false); - } - - if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2)) - fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2); -} - -static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr) -{ - static const struct { - bool src1, src2, samp, tex; - } info[0x1f] = { - [opc_op(OPC_ISAM)] = { true, false, true, true, }, - [opc_op(OPC_ISAML)] = { true, true, true, true, }, - [opc_op(OPC_ISAMM)] = { true, false, true, true, }, - [opc_op(OPC_SAM)] = { true, false, true, true, }, - [opc_op(OPC_SAMB)] = { true, true, true, true, }, - [opc_op(OPC_SAML)] = { true, true, true, true, }, - [opc_op(OPC_SAMGQ)] = { true, false, true, true, }, - [opc_op(OPC_GETLOD)] = { true, false, true, true, }, - [opc_op(OPC_CONV)] = { true, true, true, true, }, - [opc_op(OPC_CONVM)] = { true, true, true, true, }, - [opc_op(OPC_GETSIZE)] = { true, false, false, true, }, - [opc_op(OPC_GETBUF)] = { false, false, false, true, }, - [opc_op(OPC_GETPOS)] = { true, false, false, true, }, - [opc_op(OPC_GETINFO)] = { false, false, false, true, }, - [opc_op(OPC_DSX)] = { true, false, false, false, }, - [opc_op(OPC_DSY)] = { true, false, false, false, }, - [opc_op(OPC_GATHER4R)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4G)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4B)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4A)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP0)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP1)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP2)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP3)] = { true, false, true, true, }, - [opc_op(OPC_DSXPP_1)] = { true, false, false, false, }, - [opc_op(OPC_DSYPP_1)] = { true, false, false, false, }, - [opc_op(OPC_RGETPOS)] = { false, false, false, false, }, - [opc_op(OPC_RGETINFO)] = { false, false, false, false, }, - }; - instr_cat5_t *cat5 = &instr->cat5; - int i; - - if (cat5->is_3d) fprintf(ctx->out, ".3d"); - if (cat5->is_a) fprintf(ctx->out, ".a"); - if (cat5->is_o) fprintf(ctx->out, ".o"); - if (cat5->is_p) fprintf(ctx->out, ".p"); - if (cat5->is_s) fprintf(ctx->out, ".s"); - if (cat5->is_s2en) fprintf(ctx->out, ".s2en"); - - fprintf(ctx->out, " "); - - switch (_OPC(5, cat5->opc)) { - case OPC_DSXPP_1: - case OPC_DSYPP_1: - break; - default: - fprintf(ctx->out, "(%s)", type[cat5->type]); - break; - } - - fprintf(ctx->out, "("); - for (i = 0; i < 4; i++) - if (cat5->wrmask & (1 << i)) - fprintf(ctx->out, "%c", "xyzw"[i]); - fprintf(ctx->out, ")"); - - print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false); - - if (info[cat5->opc].src1) { - fprintf(ctx->out, ", "); - print_reg_src(ctx, (reg_t)(cat5->src1), cat5->full, false, false, false, - false, false, false); - } - - if (cat5->is_s2en) { - fprintf(ctx->out, ", "); - print_reg_src(ctx, (reg_t)(cat5->s2en.src2), cat5->full, false, false, false, - false, false, false); - fprintf(ctx->out, ", "); - print_reg_src(ctx, (reg_t)(cat5->s2en.src3), false, false, false, false, - false, false, false); - } else { - if (cat5->is_o || info[cat5->opc].src2) { - fprintf(ctx->out, ", "); - print_reg_src(ctx, (reg_t)(cat5->norm.src2), cat5->full, - false, false, false, false, false, false); - } - if (info[cat5->opc].samp) - fprintf(ctx->out, ", s#%d", cat5->norm.samp); - if (info[cat5->opc].tex) - fprintf(ctx->out, ", t#%d", cat5->norm.tex); - } - - if (debug & PRINT_VERBOSE) { - if (cat5->is_s2en) { - if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2)) - fprintf(ctx->out, "\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2); - } else { - if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2)) - fprintf(ctx->out, "\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2); - } - } -} - -static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat6_t *cat6 = &instr->cat6; - char sd = 0, ss = 0; /* dst/src address space */ - bool nodst = false; - struct reginfo dst, src1, src2; - int src1off = 0, dstoff = 0; - - memset(&dst, 0, sizeof(dst)); - memset(&src1, 0, sizeof(src1)); - memset(&src2, 0, sizeof(src2)); - - switch (_OPC(6, cat6->opc)) { - case OPC_RESINFO: - case OPC_RESFMT: - dst.full = type_size(cat6->type) == 32; - src1.full = type_size(cat6->type) == 32; - src2.full = type_size(cat6->type) == 32; - break; - case OPC_L2G: - case OPC_G2L: - dst.full = true; - src1.full = true; - src2.full = true; - break; - case OPC_STG: - case OPC_STL: - case OPC_STP: - case OPC_STI: - case OPC_STLW: - case OPC_STIB: - dst.full = true; - src1.full = type_size(cat6->type) == 32; - src2.full = type_size(cat6->type) == 32; - break; - default: - dst.full = type_size(cat6->type) == 32; - src1.full = true; - src2.full = true; - break; - } - - switch (_OPC(6, cat6->opc)) { - case OPC_PREFETCH: - break; - case OPC_RESINFO: - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - break; - case OPC_LDGB: - fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); - break; - case OPC_STGB: - case OPC_STIB: - fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->stgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1); - break; - case OPC_ATOMIC_ADD: - case OPC_ATOMIC_SUB: - case OPC_ATOMIC_XCHG: - case OPC_ATOMIC_INC: - case OPC_ATOMIC_DEC: - case OPC_ATOMIC_CMPXCHG: - case OPC_ATOMIC_MIN: - case OPC_ATOMIC_MAX: - case OPC_ATOMIC_AND: - case OPC_ATOMIC_OR: - case OPC_ATOMIC_XOR: - ss = cat6->g ? 'g' : 'l'; - fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); - fprintf(ctx->out, ".%c", ss); - break; - default: - dst.im = cat6->g && !cat6->dst_off; - fprintf(ctx->out, ".%s", type[cat6->type]); - break; - } - fprintf(ctx->out, " "); - - switch (_OPC(6, cat6->opc)) { - case OPC_STG: - sd = 'g'; - break; - case OPC_STP: - sd = 'p'; - break; - case OPC_STL: - case OPC_STLW: - sd = 'l'; - break; - - case OPC_LDG: - case OPC_LDC: - ss = 'g'; - break; - case OPC_LDP: - ss = 'p'; - break; - case OPC_LDL: - case OPC_LDLW: - case OPC_LDLV: - ss = 'l'; - break; - - case OPC_L2G: - ss = 'l'; - sd = 'g'; - break; - - case OPC_G2L: - ss = 'g'; - sd = 'l'; - break; - - case OPC_PREFETCH: - ss = 'g'; - nodst = true; - break; - - case OPC_STI: - dst.full = false; // XXX or inverts?? - break; - } - - if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) { - struct reginfo src3; - - memset(&src3, 0, sizeof(src3)); - - src1.reg = (reg_t)(cat6->stgb.src1); - src2.reg = (reg_t)(cat6->stgb.src2); - src2.im = cat6->stgb.src2_im; - src3.reg = (reg_t)(cat6->stgb.src3); - src3.im = cat6->stgb.src3_im; - src3.full = true; - - fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo); - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - fprintf(ctx->out, ", "); - print_src(ctx, &src3); - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3); - - return; - } - - if (is_atomic(_OPC(6, cat6->opc))) { - - src1.reg = (reg_t)(cat6->ldgb.src1); - src1.im = cat6->ldgb.src1_im; - src2.reg = (reg_t)(cat6->ldgb.src2); - src2.im = cat6->ldgb.src2_im; - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - if (ss == 'g') { - struct reginfo src3; - memset(&src3, 0, sizeof(src3)); - - src3.reg = (reg_t)(cat6->ldgb.src3); - src3.full = true; - - /* For images, the ".typed" variant is used and src2 is - * the ivecN coordinates, ie ivec2 for 2d. - * - * For SSBOs, the ".untyped" variant is used and src2 is - * a simple dword offset.. src3 appears to be - * uvec2(offset * 4, 0). Not sure the point of that. - */ - - fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); - print_src(ctx, &src1); /* value */ - fprintf(ctx->out, ", "); - print_src(ctx, &src2); /* offset/coords */ - fprintf(ctx->out, ", "); - print_src(ctx, &src3); /* 64b byte offset.. */ - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, - cat6->ldgb.pad3, cat6->ldgb.mustbe0); - } - } else { /* ss == 'l' */ - fprintf(ctx->out, "l["); - print_src(ctx, &src1); /* simple byte offset */ - fprintf(ctx->out, "], "); - print_src(ctx, &src2); /* value */ - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)", - cat6->ldgb.src3, cat6->ldgb.pad0, - cat6->ldgb.pad3, cat6->ldgb.mustbe0); - } - } - - return; - } else if (_OPC(6, cat6->opc) == OPC_RESINFO) { - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo); - - return; - } else if (_OPC(6, cat6->opc) == OPC_LDGB) { - - src1.reg = (reg_t)(cat6->ldgb.src1); - src1.im = cat6->ldgb.src1_im; - src2.reg = (reg_t)(cat6->ldgb.src2); - src2.im = cat6->ldgb.src2_im; - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0); - - return; - } - if (cat6->dst_off) { - dst.reg = (reg_t)(cat6->c.dst); - dstoff = cat6->c.off; - } else { - dst.reg = (reg_t)(cat6->d.dst); - } - - if (cat6->src_off) { - src1.reg = (reg_t)(cat6->a.src1); - src1.im = cat6->a.src1_im; - src2.reg = (reg_t)(cat6->a.src2); - src2.im = cat6->a.src2_im; - src1off = cat6->a.off; - } else { - src1.reg = (reg_t)(cat6->b.src1); - src1.im = cat6->b.src1_im; - src2.reg = (reg_t)(cat6->b.src2); - src2.im = cat6->b.src2_im; - } - - if (!nodst) { - if (sd) - fprintf(ctx->out, "%c[", sd); - /* note: dst might actually be a src (ie. address to store to) */ - print_src(ctx, &dst); - if (dstoff) - fprintf(ctx->out, "%+d", dstoff); - if (sd) - fprintf(ctx->out, "]"); - fprintf(ctx->out, ", "); - } - - if (ss) - fprintf(ctx->out, "%c[", ss); - - /* can have a larger than normal immed, so hack: */ - if (src1.im) { - fprintf(ctx->out, "%u", src1.reg.dummy13); - } else { - print_src(ctx, &src1); - } - - if (src1off) - fprintf(ctx->out, "%+d", src1off); - if (ss) - fprintf(ctx->out, "]"); - - switch (_OPC(6, cat6->opc)) { - case OPC_RESINFO: - case OPC_RESFMT: - break; - default: - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - break; - } -} - -static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat7_t *cat7 = &instr->cat7; - - if (cat7->g) - fprintf(ctx->out, ".g"); - if (cat7->l) - fprintf(ctx->out, ".l"); - - if (_OPC(7, cat7->opc) == OPC_FENCE) { - if (cat7->r) - fprintf(ctx->out, ".r"); - if (cat7->w) - fprintf(ctx->out, ".w"); - } -} - -/* size of largest OPC field of all the instruction categories: */ -#define NOPC_BITS 6 - -static const struct opc_info { - uint16_t cat; - uint16_t opc; - const char *name; - void (*print)(struct disasm_ctx *ctx, instr_t *instr); -} opcs[1 << (3+NOPC_BITS)] = { -#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat } - /* category 0: */ - OPC(0, OPC_NOP, nop), - OPC(0, OPC_BR, br), - OPC(0, OPC_JUMP, jump), - OPC(0, OPC_CALL, call), - OPC(0, OPC_RET, ret), - OPC(0, OPC_KILL, kill), - OPC(0, OPC_END, end), - OPC(0, OPC_EMIT, emit), - OPC(0, OPC_CUT, cut), - OPC(0, OPC_CHMASK, chmask), - OPC(0, OPC_CHSH, chsh), - OPC(0, OPC_FLOW_REV, flow_rev), - - /* category 1: */ - OPC(1, OPC_MOV, ), - - /* category 2: */ - OPC(2, OPC_ADD_F, add.f), - OPC(2, OPC_MIN_F, min.f), - OPC(2, OPC_MAX_F, max.f), - OPC(2, OPC_MUL_F, mul.f), - OPC(2, OPC_SIGN_F, sign.f), - OPC(2, OPC_CMPS_F, cmps.f), - OPC(2, OPC_ABSNEG_F, absneg.f), - OPC(2, OPC_CMPV_F, cmpv.f), - OPC(2, OPC_FLOOR_F, floor.f), - OPC(2, OPC_CEIL_F, ceil.f), - OPC(2, OPC_RNDNE_F, rndne.f), - OPC(2, OPC_RNDAZ_F, rndaz.f), - OPC(2, OPC_TRUNC_F, trunc.f), - OPC(2, OPC_ADD_U, add.u), - OPC(2, OPC_ADD_S, add.s), - OPC(2, OPC_SUB_U, sub.u), - OPC(2, OPC_SUB_S, sub.s), - OPC(2, OPC_CMPS_U, cmps.u), - OPC(2, OPC_CMPS_S, cmps.s), - OPC(2, OPC_MIN_U, min.u), - OPC(2, OPC_MIN_S, min.s), - OPC(2, OPC_MAX_U, max.u), - OPC(2, OPC_MAX_S, max.s), - OPC(2, OPC_ABSNEG_S, absneg.s), - OPC(2, OPC_AND_B, and.b), - OPC(2, OPC_OR_B, or.b), - OPC(2, OPC_NOT_B, not.b), - OPC(2, OPC_XOR_B, xor.b), - OPC(2, OPC_CMPV_U, cmpv.u), - OPC(2, OPC_CMPV_S, cmpv.s), - OPC(2, OPC_MUL_U, mul.u), - OPC(2, OPC_MUL_S, mul.s), - OPC(2, OPC_MULL_U, mull.u), - OPC(2, OPC_BFREV_B, bfrev.b), - OPC(2, OPC_CLZ_S, clz.s), - OPC(2, OPC_CLZ_B, clz.b), - OPC(2, OPC_SHL_B, shl.b), - OPC(2, OPC_SHR_B, shr.b), - OPC(2, OPC_ASHR_B, ashr.b), - OPC(2, OPC_BARY_F, bary.f), - OPC(2, OPC_MGEN_B, mgen.b), - OPC(2, OPC_GETBIT_B, getbit.b), - OPC(2, OPC_SETRM, setrm), - OPC(2, OPC_CBITS_B, cbits.b), - OPC(2, OPC_SHB, shb), - OPC(2, OPC_MSAD, msad), - - /* category 3: */ - OPC(3, OPC_MAD_U16, mad.u16), - OPC(3, OPC_MADSH_U16, madsh.u16), - OPC(3, OPC_MAD_S16, mad.s16), - OPC(3, OPC_MADSH_M16, madsh.m16), - OPC(3, OPC_MAD_U24, mad.u24), - OPC(3, OPC_MAD_S24, mad.s24), - OPC(3, OPC_MAD_F16, mad.f16), - OPC(3, OPC_MAD_F32, mad.f32), - OPC(3, OPC_SEL_B16, sel.b16), - OPC(3, OPC_SEL_B32, sel.b32), - OPC(3, OPC_SEL_S16, sel.s16), - OPC(3, OPC_SEL_S32, sel.s32), - OPC(3, OPC_SEL_F16, sel.f16), - OPC(3, OPC_SEL_F32, sel.f32), - OPC(3, OPC_SAD_S16, sad.s16), - OPC(3, OPC_SAD_S32, sad.s32), - - /* category 4: */ - OPC(4, OPC_RCP, rcp), - OPC(4, OPC_RSQ, rsq), - OPC(4, OPC_LOG2, log2), - OPC(4, OPC_EXP2, exp2), - OPC(4, OPC_SIN, sin), - OPC(4, OPC_COS, cos), - OPC(4, OPC_SQRT, sqrt), - - /* category 5: */ - OPC(5, OPC_ISAM, isam), - OPC(5, OPC_ISAML, isaml), - OPC(5, OPC_ISAMM, isamm), - OPC(5, OPC_SAM, sam), - OPC(5, OPC_SAMB, samb), - OPC(5, OPC_SAML, saml), - OPC(5, OPC_SAMGQ, samgq), - OPC(5, OPC_GETLOD, getlod), - OPC(5, OPC_CONV, conv), - OPC(5, OPC_CONVM, convm), - OPC(5, OPC_GETSIZE, getsize), - OPC(5, OPC_GETBUF, getbuf), - OPC(5, OPC_GETPOS, getpos), - OPC(5, OPC_GETINFO, getinfo), - OPC(5, OPC_DSX, dsx), - OPC(5, OPC_DSY, dsy), - OPC(5, OPC_GATHER4R, gather4r), - OPC(5, OPC_GATHER4G, gather4g), - OPC(5, OPC_GATHER4B, gather4b), - OPC(5, OPC_GATHER4A, gather4a), - OPC(5, OPC_SAMGP0, samgp0), - OPC(5, OPC_SAMGP1, samgp1), - OPC(5, OPC_SAMGP2, samgp2), - OPC(5, OPC_SAMGP3, samgp3), - OPC(5, OPC_DSXPP_1, dsxpp.1), - OPC(5, OPC_DSYPP_1, dsypp.1), - OPC(5, OPC_RGETPOS, rgetpos), - OPC(5, OPC_RGETINFO, rgetinfo), - - - /* category 6: */ - OPC(6, OPC_LDG, ldg), - OPC(6, OPC_LDL, ldl), - OPC(6, OPC_LDP, ldp), - OPC(6, OPC_STG, stg), - OPC(6, OPC_STL, stl), - OPC(6, OPC_STP, stp), - OPC(6, OPC_STI, sti), - OPC(6, OPC_G2L, g2l), - OPC(6, OPC_L2G, l2g), - OPC(6, OPC_PREFETCH, prefetch), - OPC(6, OPC_LDLW, ldlw), - OPC(6, OPC_STLW, stlw), - OPC(6, OPC_RESFMT, resfmt), - OPC(6, OPC_RESINFO, resinfo), - OPC(6, OPC_ATOMIC_ADD, atomic.add), - OPC(6, OPC_ATOMIC_SUB, atomic.sub), - OPC(6, OPC_ATOMIC_XCHG, atomic.xchg), - OPC(6, OPC_ATOMIC_INC, atomic.inc), - OPC(6, OPC_ATOMIC_DEC, atomic.dec), - OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg), - OPC(6, OPC_ATOMIC_MIN, atomic.min), - OPC(6, OPC_ATOMIC_MAX, atomic.max), - OPC(6, OPC_ATOMIC_AND, atomic.and), - OPC(6, OPC_ATOMIC_OR, atomic.or), - OPC(6, OPC_ATOMIC_XOR, atomic.xor), - OPC(6, OPC_LDGB, ldgb), - OPC(6, OPC_STGB, stgb), - OPC(6, OPC_STIB, stib), - OPC(6, OPC_LDC, ldc), - OPC(6, OPC_LDLV, ldlv), - - OPC(7, OPC_BAR, bar), - OPC(7, OPC_FENCE, fence), - -#undef OPC -}; - -#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)])) - -// XXX hack.. probably should move this table somewhere common: -#include "ir3.h" -const char *ir3_instr_name(struct ir3_instruction *instr) -{ - if (opc_cat(instr->opc) == -1) return "??meta??"; - return opcs[instr->opc].name; -} - -static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) -{ - instr_t *instr = (instr_t *)dwords; - uint32_t opc = instr_opc(instr); - const char *name; - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, "%s%04d[%08xx_%08xx] ", levels[ctx->level], n, dwords[1], dwords[0]); - - /* NOTE: order flags are printed is a bit fugly.. but for now I - * try to match the order in llvm-a3xx disassembler for easy - * diff'ing.. - */ - - ctx->repeat = instr_repeat(instr); - - if (instr->sync) - fprintf(ctx->out, "(sy)"); - if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) - fprintf(ctx->out, "(ss)"); - if (instr->jmp_tgt) - fprintf(ctx->out, "(jp)"); - if (instr_sat(instr)) - fprintf(ctx->out, "(sat)"); - if (ctx->repeat) - fprintf(ctx->out, "(rpt%d)", ctx->repeat); - if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) - fprintf(ctx->out, "(ul)"); - - name = GETINFO(instr)->name; - - if (name) { - fprintf(ctx->out, "%s", name); - GETINFO(instr)->print(ctx, instr); - } else { - fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc); - } - - fprintf(ctx->out, "\n"); - - return (instr->opc_cat == 0) && (opc == OPC_END); -} - -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out) -{ - struct disasm_ctx ctx; - int i; - - assert((sizedwords % 2) == 0); - - memset(&ctx, 0, sizeof(ctx)); - ctx.out = out; - ctx.level = level; - - for (i = 0; i < sizedwords; i += 2) - print_instr(&ctx, &dwords[i], i/2); - - return 0; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/instr-a3xx.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/instr-a3xx.h --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/instr-a3xx.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/instr-a3xx.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,869 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef INSTR_A3XX_H_ -#define INSTR_A3XX_H_ - -#define PACKED __attribute__((__packed__)) - -#include -#include -#include - -/* size of largest OPC field of all the instruction categories: */ -#define NOPC_BITS 6 - -#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) - -typedef enum { - /* category 0: */ - OPC_NOP = _OPC(0, 0), - OPC_BR = _OPC(0, 1), - OPC_JUMP = _OPC(0, 2), - OPC_CALL = _OPC(0, 3), - OPC_RET = _OPC(0, 4), - OPC_KILL = _OPC(0, 5), - OPC_END = _OPC(0, 6), - OPC_EMIT = _OPC(0, 7), - OPC_CUT = _OPC(0, 8), - OPC_CHMASK = _OPC(0, 9), - OPC_CHSH = _OPC(0, 10), - OPC_FLOW_REV = _OPC(0, 11), - - /* category 1: */ - OPC_MOV = _OPC(1, 0), - - /* category 2: */ - OPC_ADD_F = _OPC(2, 0), - OPC_MIN_F = _OPC(2, 1), - OPC_MAX_F = _OPC(2, 2), - OPC_MUL_F = _OPC(2, 3), - OPC_SIGN_F = _OPC(2, 4), - OPC_CMPS_F = _OPC(2, 5), - OPC_ABSNEG_F = _OPC(2, 6), - OPC_CMPV_F = _OPC(2, 7), - /* 8 - invalid */ - OPC_FLOOR_F = _OPC(2, 9), - OPC_CEIL_F = _OPC(2, 10), - OPC_RNDNE_F = _OPC(2, 11), - OPC_RNDAZ_F = _OPC(2, 12), - OPC_TRUNC_F = _OPC(2, 13), - /* 14-15 - invalid */ - OPC_ADD_U = _OPC(2, 16), - OPC_ADD_S = _OPC(2, 17), - OPC_SUB_U = _OPC(2, 18), - OPC_SUB_S = _OPC(2, 19), - OPC_CMPS_U = _OPC(2, 20), - OPC_CMPS_S = _OPC(2, 21), - OPC_MIN_U = _OPC(2, 22), - OPC_MIN_S = _OPC(2, 23), - OPC_MAX_U = _OPC(2, 24), - OPC_MAX_S = _OPC(2, 25), - OPC_ABSNEG_S = _OPC(2, 26), - /* 27 - invalid */ - OPC_AND_B = _OPC(2, 28), - OPC_OR_B = _OPC(2, 29), - OPC_NOT_B = _OPC(2, 30), - OPC_XOR_B = _OPC(2, 31), - /* 32 - invalid */ - OPC_CMPV_U = _OPC(2, 33), - OPC_CMPV_S = _OPC(2, 34), - /* 35-47 - invalid */ - OPC_MUL_U = _OPC(2, 48), - OPC_MUL_S = _OPC(2, 49), - OPC_MULL_U = _OPC(2, 50), - OPC_BFREV_B = _OPC(2, 51), - OPC_CLZ_S = _OPC(2, 52), - OPC_CLZ_B = _OPC(2, 53), - OPC_SHL_B = _OPC(2, 54), - OPC_SHR_B = _OPC(2, 55), - OPC_ASHR_B = _OPC(2, 56), - OPC_BARY_F = _OPC(2, 57), - OPC_MGEN_B = _OPC(2, 58), - OPC_GETBIT_B = _OPC(2, 59), - OPC_SETRM = _OPC(2, 60), - OPC_CBITS_B = _OPC(2, 61), - OPC_SHB = _OPC(2, 62), - OPC_MSAD = _OPC(2, 63), - - /* category 3: */ - OPC_MAD_U16 = _OPC(3, 0), - OPC_MADSH_U16 = _OPC(3, 1), - OPC_MAD_S16 = _OPC(3, 2), - OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ - OPC_MAD_U24 = _OPC(3, 4), - OPC_MAD_S24 = _OPC(3, 5), - OPC_MAD_F16 = _OPC(3, 6), - OPC_MAD_F32 = _OPC(3, 7), - OPC_SEL_B16 = _OPC(3, 8), - OPC_SEL_B32 = _OPC(3, 9), - OPC_SEL_S16 = _OPC(3, 10), - OPC_SEL_S32 = _OPC(3, 11), - OPC_SEL_F16 = _OPC(3, 12), - OPC_SEL_F32 = _OPC(3, 13), - OPC_SAD_S16 = _OPC(3, 14), - OPC_SAD_S32 = _OPC(3, 15), - - /* category 4: */ - OPC_RCP = _OPC(4, 0), - OPC_RSQ = _OPC(4, 1), - OPC_LOG2 = _OPC(4, 2), - OPC_EXP2 = _OPC(4, 3), - OPC_SIN = _OPC(4, 4), - OPC_COS = _OPC(4, 5), - OPC_SQRT = _OPC(4, 6), - // 7-63 - invalid - - /* category 5: */ - OPC_ISAM = _OPC(5, 0), - OPC_ISAML = _OPC(5, 1), - OPC_ISAMM = _OPC(5, 2), - OPC_SAM = _OPC(5, 3), - OPC_SAMB = _OPC(5, 4), - OPC_SAML = _OPC(5, 5), - OPC_SAMGQ = _OPC(5, 6), - OPC_GETLOD = _OPC(5, 7), - OPC_CONV = _OPC(5, 8), - OPC_CONVM = _OPC(5, 9), - OPC_GETSIZE = _OPC(5, 10), - OPC_GETBUF = _OPC(5, 11), - OPC_GETPOS = _OPC(5, 12), - OPC_GETINFO = _OPC(5, 13), - OPC_DSX = _OPC(5, 14), - OPC_DSY = _OPC(5, 15), - OPC_GATHER4R = _OPC(5, 16), - OPC_GATHER4G = _OPC(5, 17), - OPC_GATHER4B = _OPC(5, 18), - OPC_GATHER4A = _OPC(5, 19), - OPC_SAMGP0 = _OPC(5, 20), - OPC_SAMGP1 = _OPC(5, 21), - OPC_SAMGP2 = _OPC(5, 22), - OPC_SAMGP3 = _OPC(5, 23), - OPC_DSXPP_1 = _OPC(5, 24), - OPC_DSYPP_1 = _OPC(5, 25), - OPC_RGETPOS = _OPC(5, 26), - OPC_RGETINFO = _OPC(5, 27), - - /* category 6: */ - OPC_LDG = _OPC(6, 0), /* load-global */ - OPC_LDL = _OPC(6, 1), - OPC_LDP = _OPC(6, 2), - OPC_STG = _OPC(6, 3), /* store-global */ - OPC_STL = _OPC(6, 4), - OPC_STP = _OPC(6, 5), - OPC_STI = _OPC(6, 6), - OPC_G2L = _OPC(6, 7), - OPC_L2G = _OPC(6, 8), - OPC_PREFETCH = _OPC(6, 9), - OPC_LDLW = _OPC(6, 10), - OPC_STLW = _OPC(6, 11), - OPC_RESFMT = _OPC(6, 14), - OPC_RESINFO = _OPC(6, 15), - OPC_ATOMIC_ADD = _OPC(6, 16), - OPC_ATOMIC_SUB = _OPC(6, 17), - OPC_ATOMIC_XCHG = _OPC(6, 18), - OPC_ATOMIC_INC = _OPC(6, 19), - OPC_ATOMIC_DEC = _OPC(6, 20), - OPC_ATOMIC_CMPXCHG = _OPC(6, 21), - OPC_ATOMIC_MIN = _OPC(6, 22), - OPC_ATOMIC_MAX = _OPC(6, 23), - OPC_ATOMIC_AND = _OPC(6, 24), - OPC_ATOMIC_OR = _OPC(6, 25), - OPC_ATOMIC_XOR = _OPC(6, 26), - OPC_LDGB = _OPC(6, 27), - OPC_STGB = _OPC(6, 28), - OPC_STIB = _OPC(6, 29), - OPC_LDC = _OPC(6, 30), - OPC_LDLV = _OPC(6, 31), - - /* category 7: */ - OPC_BAR = _OPC(7, 0), - OPC_FENCE = _OPC(7, 1), - - /* meta instructions (category -1): */ - /* placeholder instr to mark shader inputs: */ - OPC_META_INPUT = _OPC(-1, 0), - /* The "fan-in" and "fan-out" instructions are used for keeping - * track of instructions that write to multiple dst registers - * (fan-out) like texture sample instructions, or read multiple - * consecutive scalar registers (fan-in) (bary.f, texture samp) - */ - OPC_META_FO = _OPC(-1, 2), - OPC_META_FI = _OPC(-1, 3), - -} opc_t; - -#define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) -#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) - -typedef enum { - TYPE_F16 = 0, - TYPE_F32 = 1, - TYPE_U16 = 2, - TYPE_U32 = 3, - TYPE_S16 = 4, - TYPE_S32 = 5, - TYPE_U8 = 6, - TYPE_S8 = 7, // XXX I assume? -} type_t; - -static inline uint32_t type_size(type_t type) -{ - switch (type) { - case TYPE_F32: - case TYPE_U32: - case TYPE_S32: - return 32; - case TYPE_F16: - case TYPE_U16: - case TYPE_S16: - return 16; - case TYPE_U8: - case TYPE_S8: - return 8; - default: - assert(0); /* invalid type */ - return 0; - } -} - -static inline int type_float(type_t type) -{ - return (type == TYPE_F32) || (type == TYPE_F16); -} - -static inline int type_uint(type_t type) -{ - return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); -} - -static inline int type_sint(type_t type) -{ - return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); -} - -typedef union PACKED { - /* normal gpr or const src register: */ - struct PACKED { - uint32_t comp : 2; - uint32_t num : 10; - }; - /* for immediate val: */ - int32_t iim_val : 11; - /* to make compiler happy: */ - uint32_t dummy32; - uint32_t dummy10 : 10; - int32_t idummy10 : 10; - uint32_t dummy11 : 11; - uint32_t dummy12 : 12; - uint32_t dummy13 : 13; - uint32_t dummy8 : 8; -} reg_t; - -/* special registers: */ -#define REG_A0 61 /* address register */ -#define REG_P0 62 /* predicate register */ - -static inline int reg_special(reg_t reg) -{ - return (reg.num == REG_A0) || (reg.num == REG_P0); -} - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - int16_t immed : 16; - uint32_t dummy1 : 16; - } a3xx; - struct PACKED { - int32_t immed : 20; - uint32_t dummy1 : 12; - } a4xx; - struct PACKED { - int32_t immed : 32; - } a5xx; - }; - - /* dword1: */ - uint32_t dummy2 : 8; - uint32_t repeat : 3; - uint32_t dummy3 : 1; - uint32_t ss : 1; - uint32_t dummy4 : 7; - uint32_t inv : 1; - uint32_t comp : 2; - uint32_t opc : 4; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat0_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - /* for normal src register: */ - struct PACKED { - uint32_t src : 11; - /* at least low bit of pad must be zero or it will - * look like a address relative src - */ - uint32_t pad : 21; - }; - /* for address relative: */ - struct PACKED { - int32_t off : 10; - uint32_t src_rel_c : 1; - uint32_t src_rel : 1; - uint32_t unknown : 20; - }; - /* for immediate: */ - int32_t iim_val; - uint32_t uim_val; - float fim_val; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 3; - uint32_t src_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_type : 3; - uint32_t dst_rel : 1; - uint32_t src_type : 3; - uint32_t src_c : 1; - uint32_t src_im : 1; - uint32_t even : 1; - uint32_t pos_inf : 1; - uint32_t must_be_0 : 2; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat1_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src1_im : 1; /* immediate */ - uint32_t src1_neg : 1; /* negate */ - uint32_t src1_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; /* relative-const */ - uint32_t src1_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; /* const */ - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src2 : 11; - uint32_t must_be_zero2: 2; - uint32_t src2_im : 1; /* immediate */ - uint32_t src2_neg : 1; /* negate */ - uint32_t src2_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src2 : 10; - uint32_t src2_c : 1; /* relative-const */ - uint32_t src2_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src2 : 12; - uint32_t src2_c : 1; /* const */ - uint32_t dummy : 3; - } c2; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 2; - uint32_t sat : 1; - uint32_t src1_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; /* dunno */ - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t ei : 1; - uint32_t cond : 3; - uint32_t src2_r : 1; - uint32_t full : 1; /* not half */ - uint32_t opc : 6; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat2_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src2_c : 1; - uint32_t src1_neg : 1; - uint32_t src2_r : 1; - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; - uint32_t src1_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src3 : 11; - uint32_t must_be_zero2: 2; - uint32_t src3_r : 1; - uint32_t src2_neg : 1; - uint32_t src3_neg : 1; - }; - struct PACKED { - uint32_t src3 : 10; - uint32_t src3_c : 1; - uint32_t src3_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src3 : 12; - uint32_t src3_c : 1; - uint32_t dummy : 3; - } c2; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 2; - uint32_t sat : 1; - uint32_t src1_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t src2 : 8; - uint32_t opc : 4; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat3_t; - -static inline bool instr_cat3_full(instr_cat3_t *cat3) -{ - switch (_OPC(3, cat3->opc)) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - return false; - default: - return true; - } -} - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src : 11; - uint32_t must_be_zero1: 2; - uint32_t src_im : 1; /* immediate */ - uint32_t src_neg : 1; /* negate */ - uint32_t src_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src : 10; - uint32_t src_c : 1; /* relative-const */ - uint32_t src_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel; - struct PACKED { - uint32_t src : 12; - uint32_t src_c : 1; /* const */ - uint32_t dummy : 3; - } c; - }; - uint32_t dummy1 : 16; /* seem to be ignored */ - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 2; - uint32_t sat : 1; - uint32_t src_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t dummy2 : 5; /* seem to be ignored */ - uint32_t full : 1; /* not half */ - uint32_t opc : 6; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat4_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - /* normal case: */ - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 8; - uint32_t dummy1 : 4; /* seem to be ignored */ - uint32_t samp : 4; - uint32_t tex : 7; - } norm; - /* s2en case: */ - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 11; - uint32_t dummy1 : 1; - uint32_t src3 : 8; - uint32_t dummy2 : 3; - } s2en; - /* same in either case: */ - // XXX I think, confirm this - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t pad : 23; - }; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t wrmask : 4; /* write-mask */ - uint32_t type : 3; - uint32_t dummy2 : 1; /* seems to be ignored */ - uint32_t is_3d : 1; - - uint32_t is_a : 1; - uint32_t is_s : 1; - uint32_t is_s2en : 1; - uint32_t is_o : 1; - uint32_t is_p : 1; - - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat5_t; - -/* dword0 encoding for src_off: [src1 + off], src2: */ -typedef struct PACKED { - /* dword0: */ - uint32_t mustbe1 : 1; - int32_t off : 13; - uint32_t src1 : 8; - uint32_t src1_im : 1; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t dword1; -} instr_cat6a_t; - -/* dword0 encoding for !src_off: [src1], src2 */ -typedef struct PACKED { - /* dword0: */ - uint32_t mustbe0 : 1; - uint32_t src1 : 13; - uint32_t ignore0 : 8; - uint32_t src1_im : 1; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t dword1; -} instr_cat6b_t; - -/* dword1 encoding for dst_off: */ -typedef struct PACKED { - /* dword0: */ - uint32_t dword0; - - /* note: there is some weird stuff going on where sometimes - * cat6->a.off is involved.. but that seems like a bug in - * the blob, since it is used even if !cat6->src_off - * It would make sense for there to be some more bits to - * bring us to 11 bits worth of offset, but not sure.. - */ - int32_t off : 8; - uint32_t mustbe1 : 1; - uint32_t dst : 8; - uint32_t pad1 : 15; -} instr_cat6c_t; - -/* dword1 encoding for !dst_off: */ -typedef struct PACKED { - /* dword0: */ - uint32_t dword0; - - uint32_t dst : 8; - uint32_t mustbe0 : 1; - uint32_t idx : 8; - uint32_t pad0 : 15; -} instr_cat6d_t; - -/* ldgb and atomics.. - * - * ldgb: pad0=0, pad3=1 - * atomic .g: pad0=1, pad3=1 - * .l: pad0=1, pad3=0 - */ -typedef struct PACKED { - /* dword0: */ - uint32_t pad0 : 1; - uint32_t src3 : 8; - uint32_t d : 2; - uint32_t typed : 1; - uint32_t type_size : 2; - uint32_t src1 : 8; - uint32_t src1_im : 1; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t dst : 8; - uint32_t mustbe0 : 1; - uint32_t src_ssbo : 8; - uint32_t pad2 : 3; // type - uint32_t g : 1; - uint32_t pad3 : 1; - uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat -} instr_cat6ldgb_t; - -/* stgb, pad0=0, pad3=2 - */ -typedef struct PACKED { - /* dword0: */ - uint32_t mustbe1 : 1; // ??? - uint32_t src1 : 8; - uint32_t d : 2; - uint32_t typed : 1; - uint32_t type_size : 2; - uint32_t pad0 : 9; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t src3 : 8; - uint32_t src3_im : 1; - uint32_t dst_ssbo : 8; - uint32_t pad2 : 3; // type - uint32_t pad3 : 2; - uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat -} instr_cat6stgb_t; - -typedef union PACKED { - instr_cat6a_t a; - instr_cat6b_t b; - instr_cat6c_t c; - instr_cat6d_t d; - instr_cat6ldgb_t ldgb; - instr_cat6stgb_t stgb; - struct PACKED { - /* dword0: */ - uint32_t src_off : 1; - uint32_t pad1 : 31; - - /* dword1: */ - uint32_t pad2 : 8; - uint32_t dst_off : 1; - uint32_t pad3 : 8; - uint32_t type : 3; - uint32_t g : 1; /* or in some cases it means dst immed */ - uint32_t pad4 : 1; - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; - }; -} instr_cat6_t; - -typedef struct PACKED { - /* dword0: */ - uint32_t pad1 : 32; - - /* dword1: */ - uint32_t pad2 : 12; - uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */ - uint32_t pad3 : 6; - uint32_t w : 1; /* write */ - uint32_t r : 1; /* read */ - uint32_t l : 1; /* local */ - uint32_t g : 1; /* global */ - uint32_t opc : 4; /* presumed, but only a couple known OPCs */ - uint32_t jmp_tgt : 1; /* (jp) */ - uint32_t sync : 1; /* (sy) */ - uint32_t opc_cat : 3; -} instr_cat7_t; - -typedef union PACKED { - instr_cat0_t cat0; - instr_cat1_t cat1; - instr_cat2_t cat2; - instr_cat3_t cat3; - instr_cat4_t cat4; - instr_cat5_t cat5; - instr_cat6_t cat6; - instr_cat7_t cat7; - struct PACKED { - /* dword0: */ - uint32_t pad1 : 32; - - /* dword1: */ - uint32_t pad2 : 12; - uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */ - uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ - uint32_t pad3 : 13; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; - - }; -} instr_t; - -static inline uint32_t instr_repeat(instr_t *instr) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.repeat; - case 1: return instr->cat1.repeat; - case 2: return instr->cat2.repeat; - case 3: return instr->cat3.repeat; - case 4: return instr->cat4.repeat; - default: return 0; - } -} - -static inline bool instr_sat(instr_t *instr) -{ - switch (instr->opc_cat) { - case 2: return instr->cat2.sat; - case 3: return instr->cat3.sat; - case 4: return instr->cat4.sat; - default: return false; - } -} - -static inline uint32_t instr_opc(instr_t *instr) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.opc; - case 1: return 0; - case 2: return instr->cat2.opc; - case 3: return instr->cat3.opc; - case 4: return instr->cat4.opc; - case 5: return instr->cat5.opc; - case 6: return instr->cat6.opc; - case 7: return instr->cat7.opc; - default: return 0; - } -} - -static inline bool is_mad(opc_t opc) -{ - switch (opc) { - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_MAD_U24: - case OPC_MAD_S24: - case OPC_MAD_F16: - case OPC_MAD_F32: - return true; - default: - return false; - } -} - -static inline bool is_madsh(opc_t opc) -{ - switch (opc) { - case OPC_MADSH_U16: - case OPC_MADSH_M16: - return true; - default: - return false; - } -} - -static inline bool is_atomic(opc_t opc) -{ - switch (opc) { - case OPC_ATOMIC_ADD: - case OPC_ATOMIC_SUB: - case OPC_ATOMIC_XCHG: - case OPC_ATOMIC_INC: - case OPC_ATOMIC_DEC: - case OPC_ATOMIC_CMPXCHG: - case OPC_ATOMIC_MIN: - case OPC_ATOMIC_MAX: - case OPC_ATOMIC_AND: - case OPC_ATOMIC_OR: - case OPC_ATOMIC_XOR: - return true; - default: - return false; - } -} - -static inline bool is_ssbo(opc_t opc) -{ - switch (opc) { - case OPC_RESFMT: - case OPC_RESINFO: - case OPC_LDGB: - case OPC_STGB: - case OPC_STIB: - return true; - default: - return false; - } -} - -#endif /* INSTR_A3XX_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,940 +0,0 @@ -/* - * Copyright (c) 2012 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ir3.h" - -#include -#include -#include -#include -#include -#include - -#include "util/ralloc.h" - -#include "freedreno_util.h" -#include "instr-a3xx.h" - -/* simple allocator to carve allocations out of an up-front allocated heap, - * so that we can free everything easily in one shot. - */ -void * ir3_alloc(struct ir3 *shader, int sz) -{ - return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */ -} - -struct ir3 * ir3_create(struct ir3_compiler *compiler, - unsigned nin, unsigned nout) -{ - struct ir3 *shader = rzalloc(compiler, struct ir3); - - shader->compiler = compiler; - shader->ninputs = nin; - shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin); - - shader->noutputs = nout; - shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout); - - list_inithead(&shader->block_list); - list_inithead(&shader->array_list); - - return shader; -} - -void ir3_destroy(struct ir3 *shader) -{ - ralloc_free(shader); -} - -#define iassert(cond) do { \ - if (!(cond)) { \ - debug_assert(cond); \ - return -1; \ - } } while (0) - -#define iassert_type(reg, full) do { \ - if ((full)) { \ - iassert(!((reg)->flags & IR3_REG_HALF)); \ - } else { \ - iassert((reg)->flags & IR3_REG_HALF); \ - } } while (0); - -static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, - uint32_t repeat, uint32_t valid_flags) -{ - reg_t val = { .dummy32 = 0 }; - - if (reg->flags & ~valid_flags) { - debug_printf("INVALID FLAGS: %x vs %x\n", - reg->flags, valid_flags); - } - - if (!(reg->flags & IR3_REG_R)) - repeat = 0; - - if (reg->flags & IR3_REG_IMMED) { - val.iim_val = reg->iim_val; - } else { - unsigned components; - int16_t max; - - if (reg->flags & IR3_REG_RELATIV) { - components = reg->size; - val.idummy10 = reg->array.offset; - max = (reg->array.offset + repeat + components - 1) >> 2; - } else { - components = util_last_bit(reg->wrmask); - val.comp = reg->num & 0x3; - val.num = reg->num >> 2; - max = (reg->num + repeat + components - 1) >> 2; - } - - if (reg->flags & IR3_REG_CONST) { - info->max_const = MAX2(info->max_const, max); - } else if (val.num == 63) { - /* ignore writes to dummy register r63.x */ - } else if (max < 48) { - if (reg->flags & IR3_REG_HALF) { - if (info->gpu_id >= 600) { - /* starting w/ a6xx, half regs conflict with full regs: */ - info->max_reg = MAX2(info->max_reg, (max+1)/2); - } else { - info->max_half_reg = MAX2(info->max_half_reg, max); - } - } else { - info->max_reg = MAX2(info->max_reg, max); - } - } - } - - return val.dummy32; -} - -static int emit_cat0(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - instr_cat0_t *cat0 = ptr; - - if (info->gpu_id >= 500) { - cat0->a5xx.immed = instr->cat0.immed; - } else if (info->gpu_id >= 400) { - cat0->a4xx.immed = instr->cat0.immed; - } else { - cat0->a3xx.immed = instr->cat0.immed; - } - cat0->repeat = instr->repeat; - cat0->ss = !!(instr->flags & IR3_INSTR_SS); - cat0->inv = instr->cat0.inv; - cat0->comp = instr->cat0.comp; - cat0->opc = instr->opc; - cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat0->sync = !!(instr->flags & IR3_INSTR_SY); - cat0->opc_cat = 0; - - return 0; -} - -static int emit_cat1(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - instr_cat1_t *cat1 = ptr; - - iassert(instr->regs_count == 2); - iassert_type(dst, type_size(instr->cat1.dst_type) == 32); - if (!(src->flags & IR3_REG_IMMED)) - iassert_type(src, type_size(instr->cat1.src_type) == 32); - - if (src->flags & IR3_REG_IMMED) { - cat1->iim_val = src->iim_val; - cat1->src_im = 1; - } else if (src->flags & IR3_REG_RELATIV) { - cat1->off = reg(src, info, instr->repeat, - IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV); - cat1->src_rel = 1; - cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); - } else { - cat1->src = reg(src, info, instr->repeat, - IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF); - cat1->src_c = !!(src->flags & IR3_REG_CONST); - } - - cat1->dst = reg(dst, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_EVEN | - IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF); - cat1->repeat = instr->repeat; - cat1->src_r = !!(src->flags & IR3_REG_R); - cat1->ss = !!(instr->flags & IR3_INSTR_SS); - cat1->ul = !!(instr->flags & IR3_INSTR_UL); - cat1->dst_type = instr->cat1.dst_type; - cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); - cat1->src_type = instr->cat1.src_type; - cat1->even = !!(dst->flags & IR3_REG_EVEN); - cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); - cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat1->sync = !!(instr->flags & IR3_INSTR_SY); - cat1->opc_cat = 1; - - return 0; -} - -static int emit_cat2(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src1 = instr->regs[1]; - struct ir3_register *src2 = instr->regs[2]; - instr_cat2_t *cat2 = ptr; - unsigned absneg = ir3_cat2_absneg(instr->opc); - - iassert((instr->regs_count == 2) || (instr->regs_count == 3)); - - if (src1->flags & IR3_REG_RELATIV) { - iassert(src1->array.offset < (1 << 10)); - cat2->rel1.src1 = reg(src1, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | - IR3_REG_HALF | absneg); - cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); - cat2->rel1.src1_rel = 1; - } else if (src1->flags & IR3_REG_CONST) { - iassert(src1->num < (1 << 12)); - cat2->c1.src1 = reg(src1, info, instr->repeat, - IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); - cat2->c1.src1_c = 1; - } else { - iassert(src1->num < (1 << 11)); - cat2->src1 = reg(src1, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | - absneg); - } - cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); - cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); - cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS)); - cat2->src1_r = !!(src1->flags & IR3_REG_R); - - if (src2) { - iassert((src2->flags & IR3_REG_IMMED) || - !((src1->flags ^ src2->flags) & IR3_REG_HALF)); - - if (src2->flags & IR3_REG_RELATIV) { - iassert(src2->array.offset < (1 << 10)); - cat2->rel2.src2 = reg(src2, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | - IR3_REG_HALF | absneg); - cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST); - cat2->rel2.src2_rel = 1; - } else if (src2->flags & IR3_REG_CONST) { - iassert(src2->num < (1 << 12)); - cat2->c2.src2 = reg(src2, info, instr->repeat, - IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); - cat2->c2.src2_c = 1; - } else { - iassert(src2->num < (1 << 11)); - cat2->src2 = reg(src2, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | - absneg); - } - - cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); - cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); - cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS)); - cat2->src2_r = !!(src2->flags & IR3_REG_R); - } - - cat2->dst = reg(dst, info, instr->repeat, - IR3_REG_R | IR3_REG_EI | IR3_REG_HALF); - cat2->repeat = instr->repeat; - cat2->sat = !!(instr->flags & IR3_INSTR_SAT); - cat2->ss = !!(instr->flags & IR3_INSTR_SS); - cat2->ul = !!(instr->flags & IR3_INSTR_UL); - cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF); - cat2->ei = !!(dst->flags & IR3_REG_EI); - cat2->cond = instr->cat2.condition; - cat2->full = ! (src1->flags & IR3_REG_HALF); - cat2->opc = instr->opc; - cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat2->sync = !!(instr->flags & IR3_INSTR_SY); - cat2->opc_cat = 2; - - return 0; -} - -static int emit_cat3(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src1 = instr->regs[1]; - struct ir3_register *src2 = instr->regs[2]; - struct ir3_register *src3 = instr->regs[3]; - unsigned absneg = ir3_cat3_absneg(instr->opc); - instr_cat3_t *cat3 = ptr; - uint32_t src_flags = 0; - - switch (instr->opc) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - src_flags |= IR3_REG_HALF; - break; - default: - break; - } - - iassert(instr->regs_count == 4); - iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF)); - iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); - iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); - - if (src1->flags & IR3_REG_RELATIV) { - iassert(src1->array.offset < (1 << 10)); - cat3->rel1.src1 = reg(src1, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | - IR3_REG_HALF | absneg); - cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); - cat3->rel1.src1_rel = 1; - } else if (src1->flags & IR3_REG_CONST) { - iassert(src1->num < (1 << 12)); - cat3->c1.src1 = reg(src1, info, instr->repeat, - IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); - cat3->c1.src1_c = 1; - } else { - iassert(src1->num < (1 << 11)); - cat3->src1 = reg(src1, info, instr->repeat, - IR3_REG_R | IR3_REG_HALF | absneg); - } - - cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); - cat3->src1_r = !!(src1->flags & IR3_REG_R); - - cat3->src2 = reg(src2, info, instr->repeat, - IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg); - cat3->src2_c = !!(src2->flags & IR3_REG_CONST); - cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); - cat3->src2_r = !!(src2->flags & IR3_REG_R); - - - if (src3->flags & IR3_REG_RELATIV) { - iassert(src3->array.offset < (1 << 10)); - cat3->rel2.src3 = reg(src3, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | - IR3_REG_HALF | absneg); - cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST); - cat3->rel2.src3_rel = 1; - } else if (src3->flags & IR3_REG_CONST) { - iassert(src3->num < (1 << 12)); - cat3->c2.src3 = reg(src3, info, instr->repeat, - IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); - cat3->c2.src3_c = 1; - } else { - iassert(src3->num < (1 << 11)); - cat3->src3 = reg(src3, info, instr->repeat, - IR3_REG_R | IR3_REG_HALF | absneg); - } - - cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); - cat3->src3_r = !!(src3->flags & IR3_REG_R); - - cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat3->repeat = instr->repeat; - cat3->sat = !!(instr->flags & IR3_INSTR_SAT); - cat3->ss = !!(instr->flags & IR3_INSTR_SS); - cat3->ul = !!(instr->flags & IR3_INSTR_UL); - cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF); - cat3->opc = instr->opc; - cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat3->sync = !!(instr->flags & IR3_INSTR_SY); - cat3->opc_cat = 3; - - return 0; -} - -static int emit_cat4(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - instr_cat4_t *cat4 = ptr; - - iassert(instr->regs_count == 2); - - if (src->flags & IR3_REG_RELATIV) { - iassert(src->array.offset < (1 << 10)); - cat4->rel.src = reg(src, info, instr->repeat, - IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG | - IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF); - cat4->rel.src_c = !!(src->flags & IR3_REG_CONST); - cat4->rel.src_rel = 1; - } else if (src->flags & IR3_REG_CONST) { - iassert(src->num < (1 << 12)); - cat4->c.src = reg(src, info, instr->repeat, - IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS | - IR3_REG_R | IR3_REG_HALF); - cat4->c.src_c = 1; - } else { - iassert(src->num < (1 << 11)); - cat4->src = reg(src, info, instr->repeat, - IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS | - IR3_REG_R | IR3_REG_HALF); - } - - cat4->src_im = !!(src->flags & IR3_REG_IMMED); - cat4->src_neg = !!(src->flags & IR3_REG_FNEG); - cat4->src_abs = !!(src->flags & IR3_REG_FABS); - cat4->src_r = !!(src->flags & IR3_REG_R); - - cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat4->repeat = instr->repeat; - cat4->sat = !!(instr->flags & IR3_INSTR_SAT); - cat4->ss = !!(instr->flags & IR3_INSTR_SS); - cat4->ul = !!(instr->flags & IR3_INSTR_UL); - cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF); - cat4->full = ! (src->flags & IR3_REG_HALF); - cat4->opc = instr->opc; - cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat4->sync = !!(instr->flags & IR3_INSTR_SY); - cat4->opc_cat = 4; - - return 0; -} - -static int emit_cat5(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src1 = instr->regs[1]; - struct ir3_register *src2 = instr->regs[2]; - struct ir3_register *src3 = instr->regs[3]; - instr_cat5_t *cat5 = ptr; - - iassert_type(dst, type_size(instr->cat5.type) == 32) - - assume(src1 || !src2); - assume(src2 || !src3); - - if (src1) { - cat5->full = ! (src1->flags & IR3_REG_HALF); - cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); - } - - if (instr->flags & IR3_INSTR_S2EN) { - if (src2) { - iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); - cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); - } - if (src3) { - iassert(src3->flags & IR3_REG_HALF); - cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF); - } - iassert(!(instr->cat5.samp | instr->cat5.tex)); - } else { - iassert(!src3); - if (src2) { - iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); - cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); - } - cat5->norm.samp = instr->cat5.samp; - cat5->norm.tex = instr->cat5.tex; - } - - cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat5->wrmask = dst->wrmask; - cat5->type = instr->cat5.type; - cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); - cat5->is_a = !!(instr->flags & IR3_INSTR_A); - cat5->is_s = !!(instr->flags & IR3_INSTR_S); - cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); - cat5->is_o = !!(instr->flags & IR3_INSTR_O); - cat5->is_p = !!(instr->flags & IR3_INSTR_P); - cat5->opc = instr->opc; - cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat5->sync = !!(instr->flags & IR3_INSTR_SY); - cat5->opc_cat = 5; - - return 0; -} - -static int emit_cat6(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - struct ir3_register *dst, *src1, *src2; - instr_cat6_t *cat6 = ptr; - bool type_full = type_size(instr->cat6.type) == 32; - - cat6->type = instr->cat6.type; - cat6->opc = instr->opc; - cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat6->sync = !!(instr->flags & IR3_INSTR_SY); - cat6->g = !!(instr->flags & IR3_INSTR_G); - cat6->opc_cat = 6; - - switch (instr->opc) { - case OPC_RESINFO: - case OPC_RESFMT: - iassert_type(instr->regs[0], type_full); /* dst */ - iassert_type(instr->regs[1], type_full); /* src1 */ - break; - case OPC_L2G: - case OPC_G2L: - iassert_type(instr->regs[0], true); /* dst */ - iassert_type(instr->regs[1], true); /* src1 */ - break; - case OPC_STG: - case OPC_STL: - case OPC_STP: - case OPC_STI: - case OPC_STLW: - case OPC_STIB: - /* no dst, so regs[0] is dummy */ - iassert_type(instr->regs[1], true); /* dst */ - iassert_type(instr->regs[2], type_full); /* src1 */ - iassert_type(instr->regs[3], true); /* src2 */ - break; - default: - iassert_type(instr->regs[0], type_full); /* dst */ - iassert_type(instr->regs[1], true); /* src1 */ - if (instr->regs_count > 2) - iassert_type(instr->regs[2], true); /* src1 */ - break; - } - - /* the "dst" for a store instruction is (from the perspective - * of data flow in the shader, ie. register use/def, etc) in - * fact a register that is read by the instruction, rather - * than written: - */ - if (is_store(instr)) { - iassert(instr->regs_count >= 3); - - dst = instr->regs[1]; - src1 = instr->regs[2]; - src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL; - } else { - iassert(instr->regs_count >= 2); - - dst = instr->regs[0]; - src1 = instr->regs[1]; - src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL; - } - - /* TODO we need a more comprehensive list about which instructions - * can be encoded which way. Or possibly use IR3_INSTR_0 flag to - * indicate to use the src_off encoding even if offset is zero - * (but then what to do about dst_off?) - */ - if (is_atomic(instr->opc)) { - instr_cat6ldgb_t *ldgb = ptr; - - /* maybe these two bits both determine the instruction encoding? */ - cat6->src_off = false; - - ldgb->d = instr->cat6.d - 1; - ldgb->typed = instr->cat6.typed; - ldgb->type_size = instr->cat6.iim_val - 1; - - ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - - if (ldgb->g) { - struct ir3_register *src3 = instr->regs[3]; - struct ir3_register *src4 = instr->regs[4]; - - /* first src is src_ssbo: */ - iassert(src1->flags & IR3_REG_IMMED); - ldgb->src_ssbo = src1->uim_val; - - ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); - ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); - ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); - ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); - - ldgb->src3 = reg(src4, info, instr->repeat, 0); - ldgb->pad0 = 0x1; - ldgb->pad3 = 0x1; - } else { - ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); - ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED); - ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); - ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED); - ldgb->pad0 = 0x1; - ldgb->pad3 = 0x0; - } - - return 0; - } else if (instr->opc == OPC_LDGB) { - struct ir3_register *src3 = instr->regs[3]; - instr_cat6ldgb_t *ldgb = ptr; - - /* maybe these two bits both determine the instruction encoding? */ - cat6->src_off = false; - - ldgb->d = instr->cat6.d - 1; - ldgb->typed = instr->cat6.typed; - ldgb->type_size = instr->cat6.iim_val - 1; - - ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - - /* first src is src_ssbo: */ - iassert(src1->flags & IR3_REG_IMMED); - ldgb->src_ssbo = src1->uim_val; - - /* then next two are src1/src2: */ - ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); - ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); - ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); - ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); - - ldgb->pad0 = 0x0; - ldgb->pad3 = 0x1; - - return 0; - } else if (instr->opc == OPC_RESINFO) { - instr_cat6ldgb_t *ldgb = ptr; - - ldgb->d = instr->cat6.d - 1; - - ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - - /* first src is src_ssbo: */ - iassert(src1->flags & IR3_REG_IMMED); - ldgb->src_ssbo = src1->uim_val; - - return 0; - } else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) { - struct ir3_register *src3 = instr->regs[4]; - instr_cat6stgb_t *stgb = ptr; - - /* maybe these two bits both determine the instruction encoding? */ - cat6->src_off = true; - stgb->pad3 = 0x2; - - stgb->d = instr->cat6.d - 1; - stgb->typed = instr->cat6.typed; - stgb->type_size = instr->cat6.iim_val - 1; - - /* first src is dst_ssbo: */ - iassert(dst->flags & IR3_REG_IMMED); - stgb->dst_ssbo = dst->uim_val; - - /* then src1/src2/src3: */ - stgb->src1 = reg(src1, info, instr->repeat, 0); - stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); - stgb->src2_im = !!(src2->flags & IR3_REG_IMMED); - stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED); - stgb->src3_im = !!(src3->flags & IR3_REG_IMMED); - - return 0; - } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) || - (instr->opc == OPC_LDL)) { - instr_cat6a_t *cat6a = ptr; - - cat6->src_off = true; - - cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); - cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED); - if (src2) { - cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); - cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED); - } - cat6a->off = instr->cat6.src_offset; - } else { - instr_cat6b_t *cat6b = ptr; - - cat6->src_off = false; - - cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED | IR3_REG_HALF); - cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED); - if (src2) { - cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); - cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED); - } - } - - if (instr->cat6.dst_offset || (instr->opc == OPC_STG) || - (instr->opc == OPC_STL)) { - instr_cat6c_t *cat6c = ptr; - cat6->dst_off = true; - cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - cat6c->off = instr->cat6.dst_offset; - } else { - instr_cat6d_t *cat6d = ptr; - cat6->dst_off = false; - cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); - } - - return 0; -} - -static int emit_cat7(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) -{ - instr_cat7_t *cat7 = ptr; - - cat7->ss = !!(instr->flags & IR3_INSTR_SS); - cat7->w = instr->cat7.w; - cat7->r = instr->cat7.r; - cat7->l = instr->cat7.l; - cat7->g = instr->cat7.g; - cat7->opc = instr->opc; - cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); - cat7->sync = !!(instr->flags & IR3_INSTR_SY); - cat7->opc_cat = 7; - - return 0; -} - -static int (*emit[])(struct ir3_instruction *instr, void *ptr, - struct ir3_info *info) = { - emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, - emit_cat7, -}; - -void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, - uint32_t gpu_id) -{ - uint32_t *ptr, *dwords; - - info->gpu_id = gpu_id; - info->max_reg = -1; - info->max_half_reg = -1; - info->max_const = -1; - info->instrs_count = 0; - info->sizedwords = 0; - info->ss = info->sy = 0; - - list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - info->sizedwords += 2; - } - } - - /* need an integer number of instruction "groups" (sets of 16 - * instructions on a4xx or sets of 4 instructions on a3xx), - * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits) - */ - if (gpu_id >= 400) { - info->sizedwords = align(info->sizedwords, 16 * 2); - } else { - info->sizedwords = align(info->sizedwords, 4 * 2); - } - - ptr = dwords = calloc(4, info->sizedwords); - - list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - int ret = emit[opc_cat(instr->opc)](instr, dwords, info); - if (ret) - goto fail; - info->instrs_count += 1 + instr->repeat; - dwords += 2; - - if (instr->flags & IR3_INSTR_SS) - info->ss++; - - if (instr->flags & IR3_INSTR_SY) - info->sy++; - } - } - - return ptr; - -fail: - free(ptr); - return NULL; -} - -static struct ir3_register * reg_create(struct ir3 *shader, - int num, int flags) -{ - struct ir3_register *reg = - ir3_alloc(shader, sizeof(struct ir3_register)); - reg->wrmask = 1; - reg->flags = flags; - reg->num = num; - return reg; -} - -static void insert_instr(struct ir3_block *block, - struct ir3_instruction *instr) -{ - struct ir3 *shader = block->shader; -#ifdef DEBUG - instr->serialno = ++shader->instr_count; -#endif - list_addtail(&instr->node, &block->instr_list); - - if (is_input(instr)) - array_insert(shader, shader->baryfs, instr); -} - -struct ir3_block * ir3_block_create(struct ir3 *shader) -{ - struct ir3_block *block = ir3_alloc(shader, sizeof(*block)); -#ifdef DEBUG - block->serialno = ++shader->block_count; -#endif - block->shader = shader; - list_inithead(&block->node); - list_inithead(&block->instr_list); - return block; -} - -static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg) -{ - struct ir3_instruction *instr; - unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0])); - char *ptr = ir3_alloc(block->shader, sz); - - instr = (struct ir3_instruction *)ptr; - ptr += sizeof(*instr); - instr->regs = (struct ir3_register **)ptr; - -#ifdef DEBUG - instr->regs_max = nreg; -#endif - - return instr; -} - -struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, - opc_t opc, int nreg) -{ - struct ir3_instruction *instr = instr_create(block, nreg); - instr->block = block; - instr->opc = opc; - insert_instr(block, instr); - return instr; -} - -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc) -{ - /* NOTE: we could be slightly more clever, at least for non-meta, - * and choose # of regs based on category. - */ - return ir3_instr_create2(block, opc, 4); -} - -struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) -{ - struct ir3_instruction *new_instr = instr_create(instr->block, - instr->regs_count); - struct ir3_register **regs; - unsigned i; - - regs = new_instr->regs; - *new_instr = *instr; - new_instr->regs = regs; - - insert_instr(instr->block, new_instr); - - /* clone registers: */ - new_instr->regs_count = 0; - for (i = 0; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - struct ir3_register *new_reg = - ir3_reg_create(new_instr, reg->num, reg->flags); - *new_reg = *reg; - } - - return new_instr; -} - -/* Add a false dependency to instruction, to ensure it is scheduled first: */ -void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep) -{ - array_insert(instr, instr->deps, dep); -} - -struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, - int num, int flags) -{ - struct ir3 *shader = instr->block->shader; - struct ir3_register *reg = reg_create(shader, num, flags); -#ifdef DEBUG - debug_assert(instr->regs_count < instr->regs_max); -#endif - instr->regs[instr->regs_count++] = reg; - return reg; -} - -struct ir3_register * ir3_reg_clone(struct ir3 *shader, - struct ir3_register *reg) -{ - struct ir3_register *new_reg = reg_create(shader, 0, 0); - *new_reg = *reg; - return new_reg; -} - -void -ir3_instr_set_address(struct ir3_instruction *instr, - struct ir3_instruction *addr) -{ - if (instr->address != addr) { - struct ir3 *ir = instr->block->shader; - instr->address = addr; - array_insert(ir, ir->indirects, instr); - } -} - -void -ir3_block_clear_mark(struct ir3_block *block) -{ - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) - instr->flags &= ~IR3_INSTR_MARK; -} - -void -ir3_clear_mark(struct ir3 *ir) -{ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - ir3_block_clear_mark(block); - } -} - -/* note: this will destroy instr->depth, don't do it until after sched! */ -unsigned -ir3_count_instructions(struct ir3 *ir) -{ - unsigned cnt = 0; - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - instr->ip = cnt++; - } - block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip; - block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip; - } - return cnt; -} - -struct ir3_array * -ir3_lookup_array(struct ir3 *ir, unsigned id) -{ - list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) - if (arr->id == id) - return arr; - return NULL; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cache.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cache.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cache.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cache.c 2019-03-31 23:16:37.000000000 +0000 @@ -1,5 +1,3 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - /* * Copyright (C) 2015 Rob Clark * @@ -30,7 +28,7 @@ #include "util/hash_table.h" #include "ir3_cache.h" -#include "ir3_shader.h" +#include "ir3_gallium.h" static uint32_t diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cache.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cache.h --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cache.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cache.h 2019-03-31 23:16:37.000000000 +0000 @@ -27,7 +27,7 @@ #ifndef IR3_CACHE_H_ #define IR3_CACHE_H_ -#include "ir3_shader.h" +#include "ir3/ir3_shader.h" /* * An in-memory cache for mapping shader state objects plus shader key to diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c 2019-03-31 23:16:37.000000000 +0000 @@ -37,12 +37,11 @@ #include "tgsi/tgsi_text.h" #include "tgsi/tgsi_dump.h" -#include "freedreno_util.h" - -#include "ir3_compiler.h" -#include "ir3_nir.h" -#include "instr-a3xx.h" -#include "ir3.h" +#include "ir3/ir3_compiler.h" +#include "ir3/ir3_gallium.h" +#include "ir3/ir3_nir.h" +#include "ir3/instr-a3xx.h" +#include "ir3/ir3.h" #include "compiler/glsl/standalone.h" #include "compiler/glsl/glsl_to_nir.h" @@ -103,29 +102,42 @@ load_glsl(unsigned num_files, char* const* files, gl_shader_stage stage) { static const struct standalone_options options = { - .glsl_version = 140, + .glsl_version = 460, .do_link = true, }; struct gl_shader_program *prog; + const nir_shader_compiler_options *nir_options = + ir3_get_compiler_options(compiler); prog = standalone_compile_shader(&options, num_files, files); if (!prog) errx(1, "couldn't parse `%s'", files[0]); - nir_shader *nir = glsl_to_nir(prog, stage, ir3_get_compiler_options(compiler)); + nir_shader *nir = glsl_to_nir(prog, stage, nir_options); /* required NIR passes: */ - /* TODO cmdline args for some of the conditional lowering passes? */ + if (nir_options->lower_all_io_to_temps || + nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_GEOMETRY) { + NIR_PASS_V(nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(nir), + true, true); + } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(nir), + true, false); + } - NIR_PASS_V(nir, nir_lower_io_to_temporaries, - nir_shader_get_entrypoint(nir), - true, true); NIR_PASS_V(nir, nir_lower_global_vars_to_local); NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_lower_var_copies); NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_lower_var_copies); + nir_print_shader(nir, stdout); + NIR_PASS_V(nir, gl_nir_lower_atomics, prog, true); + NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, 8); + nir_print_shader(nir, stdout); switch (stage) { case MESA_SHADER_VERTEX: @@ -152,6 +164,9 @@ &nir->num_outputs, ir3_glsl_type_size); break; + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: + break; default: errx(1, "unhandled shader stage: %d", stage); } @@ -215,6 +230,7 @@ .variable_pointers = true, }, .lower_workgroup_access_to_offsets = true, + .lower_ubo_ssbo_access_to_offsets = true, .debug = { .func = debug_func, } @@ -282,7 +298,7 @@ while (n < argc) { if (!strcmp(argv[n], "--verbose")) { - fd_mesa_debug |= FD_DBG_MSGS | FD_DBG_OPTMSGS | FD_DBG_DISASM; + ir3_shader_debug |= IR3_DBG_OPTMSGS | IR3_DBG_DISASM; n++; continue; } @@ -337,7 +353,7 @@ } if (!strcmp(argv[n], "--stream-out")) { - struct pipe_stream_output_info *so = &s.stream_output; + struct ir3_stream_output_info *so = &s.stream_output; debug_printf(" %s", argv[n]); /* TODO more dynamic config based on number of outputs, etc * rather than just hard-code for first output: @@ -396,6 +412,12 @@ errx(1, "in SPIR-V mode, an entry point must be specified"); entry = argv[n]; n++; + } else if (strcmp(ext, ".comp") == 0) { + if (s.from_tgsi || from_spirv) + errx(1, "cannot mix GLSL/TGSI/SPIRV"); + if (num_files >= ARRAY_SIZE(filenames)) + errx(1, "too many GLSL files"); + stage = MESA_SHADER_COMPUTE; } else if (strcmp(ext, ".frag") == 0) { if (s.from_tgsi || from_spirv) errx(1, "cannot mix GLSL/TGSI/SPIRV"); @@ -431,16 +453,16 @@ return ret; } - if (fd_mesa_debug & FD_DBG_OPTMSGS) + if (ir3_shader_debug & IR3_DBG_OPTMSGS) debug_printf("%s\n", (char *)ptr); if (!tgsi_text_translate(ptr, toks, ARRAY_SIZE(toks))) errx(1, "could not parse `%s'", filenames[0]); - if (fd_mesa_debug & FD_DBG_OPTMSGS) + if (ir3_shader_debug & IR3_DBG_OPTMSGS) tgsi_dump(toks, 0); - nir = ir3_tgsi_to_nir(toks); + nir = ir3_tgsi_to_nir(compiler, toks); NIR_PASS_V(nir, nir_lower_global_vars_to_local); } else if (from_spirv) { nir = load_spirv(filenames[0], entry, stage); @@ -463,20 +485,7 @@ v.key = key; v.shader = &s; - - switch (nir->info.stage) { - case MESA_SHADER_FRAGMENT: - s.type = v.type = SHADER_FRAGMENT; - break; - case MESA_SHADER_VERTEX: - s.type = v.type = SHADER_VERTEX; - break; - case MESA_SHADER_COMPUTE: - s.type = v.type = SHADER_COMPUTE; - break; - default: - errx(1, "unhandled shader stage: %d", nir->info.stage); - } + s.type = v.type = nir->info.stage; info = "NIR compiler"; ret = ir3_compile_shader_nir(s.compiler, &v); diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2015 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "util/ralloc.h" - -#include "ir3_compiler.h" - -struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id) -{ - struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler); - - compiler->dev = dev; - compiler->gpu_id = gpu_id; - compiler->set = ir3_ra_alloc_reg_set(compiler); - - if (compiler->gpu_id >= 400) { - /* need special handling for "flat" */ - compiler->flat_bypass = true; - compiler->levels_add_one = false; - compiler->unminify_coords = false; - compiler->txf_ms_with_isaml = false; - compiler->array_index_add_half = true; - } else { - /* no special handling for "flat" */ - compiler->flat_bypass = false; - compiler->levels_add_one = true; - compiler->unminify_coords = true; - compiler->txf_ms_with_isaml = true; - compiler->array_index_add_half = false; - } - - return compiler; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler.h --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#ifndef IR3_COMPILER_H_ -#define IR3_COMPILER_H_ - -#include "ir3_shader.h" - -struct ir3_ra_reg_set; - -struct ir3_compiler { - struct fd_device *dev; - uint32_t gpu_id; - struct ir3_ra_reg_set *set; - uint32_t shader_count; - - /* - * Configuration options for things that are handled differently on - * different generations: - */ - - /* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate - * so we need to use ldlv.u32 to load the varying directly: - */ - bool flat_bypass; - - /* on a3xx, we need to add one to # of array levels: - */ - bool levels_add_one; - - /* on a3xx, we need to scale up integer coords for isaml based - * on LoD: - */ - bool unminify_coords; - - /* on a3xx do txf_ms w/ isaml and scaled coords: */ - bool txf_ms_with_isaml; - - /* on a4xx, for array textures we need to add 0.5 to the array - * index coordinate: - */ - bool array_index_add_half; -}; - -struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id); - -int ir3_compile_shader_nir(struct ir3_compiler *compiler, - struct ir3_shader_variant *so); - -#endif /* IR3_COMPILER_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,3823 +0,0 @@ -/* - * Copyright (C) 2015 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include - -#include "pipe/p_state.h" -#include "util/u_string.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" - -#include "freedreno_util.h" - -#include "ir3_compiler.h" -#include "ir3_shader.h" -#include "ir3_nir.h" - -#include "instr-a3xx.h" -#include "ir3.h" - - -struct ir3_context { - struct ir3_compiler *compiler; - - struct nir_shader *s; - - struct nir_instr *cur_instr; /* current instruction, just for debug */ - - struct ir3 *ir; - struct ir3_shader_variant *so; - - struct ir3_block *block; /* the current block */ - struct ir3_block *in_block; /* block created for shader inputs */ - - nir_function_impl *impl; - - /* For fragment shaders, varyings are not actual shader inputs, - * instead the hw passes a varying-coord which is used with - * bary.f. - * - * But NIR doesn't know that, it still declares varyings as - * inputs. So we do all the input tracking normally and fix - * things up after compile_instructions() - * - * NOTE that frag_vcoord is the hardware position (possibly it - * is actually an index or tag or some such.. it is *not* - * values that can be directly used for gl_FragCoord..) - */ - struct ir3_instruction *frag_vcoord; - - /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */ - struct ir3_instruction *frag_face, *frag_coord; - - /* For vertex shaders, keep track of the system values sources */ - struct ir3_instruction *vertex_id, *basevertex, *instance_id; - - /* For fragment shaders: */ - struct ir3_instruction *samp_id, *samp_mask_in; - - /* Compute shader inputs: */ - struct ir3_instruction *local_invocation_id, *work_group_id; - - /* mapping from nir_register to defining instruction: */ - struct hash_table *def_ht; - - unsigned num_arrays; - - /* a common pattern for indirect addressing is to request the - * same address register multiple times. To avoid generating - * duplicate instruction sequences (which our backend does not - * try to clean up, since that should be done as the NIR stage) - * we cache the address value generated for a given src value: - * - * Note that we have to cache these per alignment, since same - * src used for an array of vec1 cannot be also used for an - * array of vec4. - */ - struct hash_table *addr_ht[4]; - - /* last dst array, for indirect we need to insert a var-store. - */ - struct ir3_instruction **last_dst; - unsigned last_dst_n; - - /* maps nir_block to ir3_block, mostly for the purposes of - * figuring out the blocks successors - */ - struct hash_table *block_ht; - - /* on a4xx, bitmask of samplers which need astc+srgb workaround: */ - unsigned astc_srgb; - - unsigned samples; /* bitmask of x,y sample shifts */ - - unsigned max_texture_index; - - /* set if we encounter something we can't handle yet, so we - * can bail cleanly and fallback to TGSI compiler f/e - */ - bool error; -}; - -/* gpu pointer size in units of 32bit registers/slots */ -static unsigned pointer_size(struct ir3_context *ctx) -{ - return (ctx->compiler->gpu_id >= 500) ? 2 : 1; -} - -static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val); -static struct ir3_block * get_block(struct ir3_context *ctx, const nir_block *nblock); - - -static struct ir3_context * -compile_init(struct ir3_compiler *compiler, - struct ir3_shader_variant *so) -{ - struct ir3_context *ctx = rzalloc(NULL, struct ir3_context); - - if (compiler->gpu_id >= 400) { - if (so->type == SHADER_VERTEX) { - ctx->astc_srgb = so->key.vastc_srgb; - } else if (so->type == SHADER_FRAGMENT) { - ctx->astc_srgb = so->key.fastc_srgb; - } - - } else { - if (so->type == SHADER_VERTEX) { - ctx->samples = so->key.vsamples; - } else if (so->type == SHADER_FRAGMENT) { - ctx->samples = so->key.fsamples; - } - } - - ctx->compiler = compiler; - ctx->so = so; - ctx->def_ht = _mesa_hash_table_create(ctx, - _mesa_hash_pointer, _mesa_key_pointer_equal); - ctx->block_ht = _mesa_hash_table_create(ctx, - _mesa_hash_pointer, _mesa_key_pointer_equal); - - /* TODO: maybe generate some sort of bitmask of what key - * lowers vs what shader has (ie. no need to lower - * texture clamp lowering if no texture sample instrs).. - * although should be done further up the stack to avoid - * creating duplicate variants.. - */ - - if (ir3_key_lowers_nir(&so->key)) { - nir_shader *s = nir_shader_clone(ctx, so->shader->nir); - ctx->s = ir3_optimize_nir(so->shader, s, &so->key); - } else { - /* fast-path for shader key that lowers nothing in NIR: */ - ctx->s = so->shader->nir; - } - - /* this needs to be the last pass run, so do this here instead of - * in ir3_optimize_nir(): - */ - NIR_PASS_V(ctx->s, nir_lower_locals_to_regs); - NIR_PASS_V(ctx->s, nir_convert_from_ssa, true); - - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}", - so->shader->id, so->id, so->type, - so->key.color_two_side, so->key.half_precision); - nir_print_shader(ctx->s, stdout); - } - - if (shader_debug_enabled(so->type)) { - fprintf(stderr, "NIR (final form) for %s shader:\n", - shader_stage_name(so->type)); - nir_print_shader(ctx->s, stderr); - } - - ir3_nir_scan_driver_consts(ctx->s, &so->const_layout); - - so->num_uniforms = ctx->s->num_uniforms; - so->num_ubos = ctx->s->info.num_ubos; - - /* Layout of constant registers, each section aligned to vec4. Note - * that pointer size (ubo, etc) changes depending on generation. - * - * user consts - * UBO addresses - * SSBO sizes - * if (vertex shader) { - * driver params (IR3_DP_*) - * if (stream_output.num_outputs > 0) - * stream-out addresses - * } - * immediates - * - * Immediates go last mostly because they are inserted in the CP pass - * after the nir -> ir3 frontend. - */ - unsigned constoff = align(ctx->s->num_uniforms, 4); - unsigned ptrsz = pointer_size(ctx); - - memset(&so->constbase, ~0, sizeof(so->constbase)); - - if (so->num_ubos > 0) { - so->constbase.ubo = constoff; - constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4; - } - - if (so->const_layout.ssbo_size.count > 0) { - unsigned cnt = so->const_layout.ssbo_size.count; - so->constbase.ssbo_sizes = constoff; - constoff += align(cnt, 4) / 4; - } - - if (so->const_layout.image_dims.count > 0) { - unsigned cnt = so->const_layout.image_dims.count; - so->constbase.image_dims = constoff; - constoff += align(cnt, 4) / 4; - } - - unsigned num_driver_params = 0; - if (so->type == SHADER_VERTEX) { - num_driver_params = IR3_DP_VS_COUNT; - } else if (so->type == SHADER_COMPUTE) { - num_driver_params = IR3_DP_CS_COUNT; - } - - so->constbase.driver_param = constoff; - constoff += align(num_driver_params, 4) / 4; - - if ((so->type == SHADER_VERTEX) && - (compiler->gpu_id < 500) && - so->shader->stream_output.num_outputs > 0) { - so->constbase.tfbo = constoff; - constoff += align(PIPE_MAX_SO_BUFFERS * ptrsz, 4) / 4; - } - - so->constbase.immediate = constoff; - - return ctx; -} - -static void -compile_error(struct ir3_context *ctx, const char *format, ...) -{ - struct hash_table *errors = NULL; - va_list ap; - va_start(ap, format); - if (ctx->cur_instr) { - errors = _mesa_hash_table_create(NULL, - _mesa_hash_pointer, - _mesa_key_pointer_equal); - char *msg = ralloc_vasprintf(errors, format, ap); - _mesa_hash_table_insert(errors, ctx->cur_instr, msg); - } else { - _debug_vprintf(format, ap); - } - va_end(ap); - nir_print_shader_annotated(ctx->s, stdout, errors); - ralloc_free(errors); - ctx->error = true; - debug_assert(0); -} - -#define compile_assert(ctx, cond) do { \ - if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ - } while (0) - -static void -compile_free(struct ir3_context *ctx) -{ - ralloc_free(ctx); -} - -static void -declare_array(struct ir3_context *ctx, nir_register *reg) -{ - struct ir3_array *arr = rzalloc(ctx, struct ir3_array); - arr->id = ++ctx->num_arrays; - /* NOTE: sometimes we get non array regs, for example for arrays of - * length 1. See fs-const-array-of-struct-of-array.shader_test. So - * treat a non-array as if it was an array of length 1. - * - * It would be nice if there was a nir pass to convert arrays of - * length 1 to ssa. - */ - arr->length = reg->num_components * MAX2(1, reg->num_array_elems); - compile_assert(ctx, arr->length > 0); - arr->r = reg; - list_addtail(&arr->node, &ctx->ir->array_list); -} - -static struct ir3_array * -get_array(struct ir3_context *ctx, nir_register *reg) -{ - list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { - if (arr->r == reg) - return arr; - } - compile_error(ctx, "bogus reg: %s\n", reg->name); - return NULL; -} - -/* relative (indirect) if address!=NULL */ -static struct ir3_instruction * -create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, - struct ir3_instruction *address) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *mov; - struct ir3_register *src; - - mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = TYPE_U32; - mov->cat1.dst_type = TYPE_U32; - mov->barrier_class = IR3_BARRIER_ARRAY_R; - mov->barrier_conflict = IR3_BARRIER_ARRAY_W; - ir3_reg_create(mov, 0, 0); - src = ir3_reg_create(mov, 0, IR3_REG_ARRAY | - COND(address, IR3_REG_RELATIV)); - src->instr = arr->last_write; - src->size = arr->length; - src->array.id = arr->id; - src->array.offset = n; - - if (address) - ir3_instr_set_address(mov, address); - - return mov; -} - -/* relative (indirect) if address!=NULL */ -static void -create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, - struct ir3_instruction *src, struct ir3_instruction *address) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *mov; - struct ir3_register *dst; - - /* if not relative store, don't create an extra mov, since that - * ends up being difficult for cp to remove. - */ - if (!address) { - dst = src->regs[0]; - - src->barrier_class |= IR3_BARRIER_ARRAY_W; - src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; - - dst->flags |= IR3_REG_ARRAY; - dst->instr = arr->last_write; - dst->size = arr->length; - dst->array.id = arr->id; - dst->array.offset = n; - - arr->last_write = src; - - array_insert(block, block->keeps, src); - - return; - } - - mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = TYPE_U32; - mov->cat1.dst_type = TYPE_U32; - mov->barrier_class = IR3_BARRIER_ARRAY_W; - mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; - dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY | - COND(address, IR3_REG_RELATIV)); - dst->instr = arr->last_write; - dst->size = arr->length; - dst->array.id = arr->id; - dst->array.offset = n; - ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src; - - if (address) - ir3_instr_set_address(mov, address); - - arr->last_write = mov; - - /* the array store may only matter to something in an earlier - * block (ie. loops), but since arrays are not in SSA, depth - * pass won't know this.. so keep all array stores: - */ - array_insert(block, block->keeps, mov); -} - -static inline type_t utype_for_size(unsigned bit_size) -{ - switch (bit_size) { - case 32: return TYPE_U32; - case 16: return TYPE_U16; - case 8: return TYPE_U8; - default: unreachable("bad bitsize"); return ~0; - } -} - -static inline type_t utype_src(nir_src src) -{ return utype_for_size(nir_src_bit_size(src)); } - -static inline type_t utype_dst(nir_dest dst) -{ return utype_for_size(nir_dest_bit_size(dst)); } - -/* allocate a n element value array (to be populated by caller) and - * insert in def_ht - */ -static struct ir3_instruction ** -get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n) -{ - struct ir3_instruction **value = - ralloc_array(ctx->def_ht, struct ir3_instruction *, n); - _mesa_hash_table_insert(ctx->def_ht, dst, value); - return value; -} - -static struct ir3_instruction ** -get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n) -{ - struct ir3_instruction **value; - - if (dst->is_ssa) { - value = get_dst_ssa(ctx, &dst->ssa, n); - } else { - value = ralloc_array(ctx, struct ir3_instruction *, n); - } - - /* NOTE: in non-ssa case, we don't really need to store last_dst - * but this helps us catch cases where put_dst() call is forgotten - */ - compile_assert(ctx, !ctx->last_dst); - ctx->last_dst = value; - ctx->last_dst_n = n; - - return value; -} - -static struct ir3_instruction * get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align); - -static struct ir3_instruction * const * -get_src(struct ir3_context *ctx, nir_src *src) -{ - if (src->is_ssa) { - struct hash_entry *entry; - entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); - compile_assert(ctx, entry); - return entry->data; - } else { - nir_register *reg = src->reg.reg; - struct ir3_array *arr = get_array(ctx, reg); - unsigned num_components = arr->r->num_components; - struct ir3_instruction *addr = NULL; - struct ir3_instruction **value = - ralloc_array(ctx, struct ir3_instruction *, num_components); - - if (src->reg.indirect) - addr = get_addr(ctx, get_src(ctx, src->reg.indirect)[0], - reg->num_components); - - for (unsigned i = 0; i < num_components; i++) { - unsigned n = src->reg.base_offset * reg->num_components + i; - compile_assert(ctx, n < arr->length); - value[i] = create_array_load(ctx, arr, n, addr); - } - - return value; - } -} - -static void -put_dst(struct ir3_context *ctx, nir_dest *dst) -{ - unsigned bit_size = nir_dest_bit_size(*dst); - - if (bit_size < 32) { - for (unsigned i = 0; i < ctx->last_dst_n; i++) { - struct ir3_instruction *dst = ctx->last_dst[i]; - dst->regs[0]->flags |= IR3_REG_HALF; - if (ctx->last_dst[i]->opc == OPC_META_FO) - dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF; - } - } - - if (!dst->is_ssa) { - nir_register *reg = dst->reg.reg; - struct ir3_array *arr = get_array(ctx, reg); - unsigned num_components = ctx->last_dst_n; - struct ir3_instruction *addr = NULL; - - if (dst->reg.indirect) - addr = get_addr(ctx, get_src(ctx, dst->reg.indirect)[0], - reg->num_components); - - for (unsigned i = 0; i < num_components; i++) { - unsigned n = dst->reg.base_offset * reg->num_components + i; - compile_assert(ctx, n < arr->length); - if (!ctx->last_dst[i]) - continue; - create_array_store(ctx, arr, n, ctx->last_dst[i], addr); - } - - ralloc_free(ctx->last_dst); - } - ctx->last_dst = NULL; - ctx->last_dst_n = 0; -} - -static struct ir3_instruction * -create_immed_typed(struct ir3_block *block, uint32_t val, type_t type) -{ - struct ir3_instruction *mov; - unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; - - mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = type; - mov->cat1.dst_type = type; - ir3_reg_create(mov, 0, flags); - ir3_reg_create(mov, 0, IR3_REG_IMMED)->uim_val = val; - - return mov; -} - -static struct ir3_instruction * -create_immed(struct ir3_block *block, uint32_t val) -{ - return create_immed_typed(block, val, TYPE_U32); -} - -static struct ir3_instruction * -create_addr(struct ir3_block *block, struct ir3_instruction *src, int align) -{ - struct ir3_instruction *instr, *immed; - - /* TODO in at least some cases, the backend could probably be - * made clever enough to propagate IR3_REG_HALF.. - */ - instr = ir3_COV(block, src, TYPE_U32, TYPE_S16); - instr->regs[0]->flags |= IR3_REG_HALF; - - switch(align){ - case 1: - /* src *= 1: */ - break; - case 2: - /* src *= 2 => src <<= 1: */ - immed = create_immed(block, 1); - immed->regs[0]->flags |= IR3_REG_HALF; - - instr = ir3_SHL_B(block, instr, 0, immed, 0); - instr->regs[0]->flags |= IR3_REG_HALF; - instr->regs[1]->flags |= IR3_REG_HALF; - break; - case 3: - /* src *= 3: */ - immed = create_immed(block, 3); - immed->regs[0]->flags |= IR3_REG_HALF; - - instr = ir3_MULL_U(block, instr, 0, immed, 0); - instr->regs[0]->flags |= IR3_REG_HALF; - instr->regs[1]->flags |= IR3_REG_HALF; - break; - case 4: - /* src *= 4 => src <<= 2: */ - immed = create_immed(block, 2); - immed->regs[0]->flags |= IR3_REG_HALF; - - instr = ir3_SHL_B(block, instr, 0, immed, 0); - instr->regs[0]->flags |= IR3_REG_HALF; - instr->regs[1]->flags |= IR3_REG_HALF; - break; - default: - unreachable("bad align"); - return NULL; - } - - instr = ir3_MOV(block, instr, TYPE_S16); - instr->regs[0]->num = regid(REG_A0, 0); - instr->regs[0]->flags |= IR3_REG_HALF; - instr->regs[1]->flags |= IR3_REG_HALF; - - return instr; -} - -/* caches addr values to avoid generating multiple cov/shl/mova - * sequences for each use of a given NIR level src as address - */ -static struct ir3_instruction * -get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align) -{ - struct ir3_instruction *addr; - unsigned idx = align - 1; - - compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht)); - - if (!ctx->addr_ht[idx]) { - ctx->addr_ht[idx] = _mesa_hash_table_create(ctx, - _mesa_hash_pointer, _mesa_key_pointer_equal); - } else { - struct hash_entry *entry; - entry = _mesa_hash_table_search(ctx->addr_ht[idx], src); - if (entry) - return entry->data; - } - - addr = create_addr(ctx->block, src, align); - _mesa_hash_table_insert(ctx->addr_ht[idx], src, addr); - - return addr; -} - -static struct ir3_instruction * -get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *cond; - - /* NOTE: only cmps.*.* can write p0.x: */ - cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0); - cond->cat2.condition = IR3_COND_NE; - - /* condition always goes in predicate register: */ - cond->regs[0]->num = regid(REG_P0, 0); - - return cond; -} - -static struct ir3_instruction * -create_uniform(struct ir3_context *ctx, unsigned n) -{ - struct ir3_instruction *mov; - - mov = ir3_instr_create(ctx->block, OPC_MOV); - /* TODO get types right? */ - mov->cat1.src_type = TYPE_F32; - mov->cat1.dst_type = TYPE_F32; - ir3_reg_create(mov, 0, 0); - ir3_reg_create(mov, n, IR3_REG_CONST); - - return mov; -} - -static struct ir3_instruction * -create_uniform_indirect(struct ir3_context *ctx, int n, - struct ir3_instruction *address) -{ - struct ir3_instruction *mov; - - mov = ir3_instr_create(ctx->block, OPC_MOV); - mov->cat1.src_type = TYPE_U32; - mov->cat1.dst_type = TYPE_U32; - ir3_reg_create(mov, 0, 0); - ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n; - - ir3_instr_set_address(mov, address); - - return mov; -} - -static struct ir3_instruction * -create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr, - unsigned arrsz) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *collect; - - if (arrsz == 0) - return NULL; - - unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF; - - collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz); - ir3_reg_create(collect, 0, flags); /* dst */ - for (unsigned i = 0; i < arrsz; i++) { - struct ir3_instruction *elem = arr[i]; - - /* Since arrays are pre-colored in RA, we can't assume that - * things will end up in the right place. (Ie. if a collect - * joins elements from two different arrays.) So insert an - * extra mov. - * - * We could possibly skip this if all the collected elements - * are contiguous elements in a single array.. not sure how - * likely that is to happen. - * - * Fixes a problem with glamor shaders, that in effect do - * something like: - * - * if (foo) - * texcoord = .. - * else - * texcoord = .. - * color = texture2D(tex, texcoord); - * - * In this case, texcoord will end up as nir registers (which - * translate to ir3 array's of length 1. And we can't assume - * the two (or more) arrays will get allocated in consecutive - * scalar registers. - * - */ - if (elem->regs[0]->flags & IR3_REG_ARRAY) { - type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; - elem = ir3_MOV(block, elem, type); - } - - compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags); - ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem; - } - - return collect; -} - -static struct ir3_instruction * -create_indirect_load(struct ir3_context *ctx, unsigned arrsz, int n, - struct ir3_instruction *address, struct ir3_instruction *collect) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *mov; - struct ir3_register *src; - - mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = TYPE_U32; - mov->cat1.dst_type = TYPE_U32; - ir3_reg_create(mov, 0, 0); - src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV); - src->instr = collect; - src->size = arrsz; - src->array.offset = n; - - ir3_instr_set_address(mov, address); - - return mov; -} - -static struct ir3_instruction * -create_input_compmask(struct ir3_context *ctx, unsigned n, unsigned compmask) -{ - struct ir3_instruction *in; - - in = ir3_instr_create(ctx->in_block, OPC_META_INPUT); - in->inout.block = ctx->in_block; - ir3_reg_create(in, n, 0); - - in->regs[0]->wrmask = compmask; - - return in; -} - -static struct ir3_instruction * -create_input(struct ir3_context *ctx, unsigned n) -{ - return create_input_compmask(ctx, n, 0x1); -} - -static struct ir3_instruction * -create_frag_input(struct ir3_context *ctx, bool use_ldlv) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *instr; - /* actual inloc is assigned and fixed up later: */ - struct ir3_instruction *inloc = create_immed(block, 0); - - if (use_ldlv) { - instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0); - instr->cat6.type = TYPE_U32; - instr->cat6.iim_val = 1; - } else { - instr = ir3_BARY_F(block, inloc, 0, ctx->frag_vcoord, 0); - instr->regs[2]->wrmask = 0x3; - } - - return instr; -} - -static struct ir3_instruction * -create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) -{ - /* first four vec4 sysval's reserved for UBOs: */ - /* NOTE: dp is in scalar, but there can be >4 dp components: */ - unsigned n = ctx->so->constbase.driver_param; - unsigned r = regid(n + dp / 4, dp % 4); - return create_uniform(ctx, r); -} - -/* helper for instructions that produce multiple consecutive scalar - * outputs which need to have a split/fanout meta instruction inserted - */ -static void -split_dest(struct ir3_block *block, struct ir3_instruction **dst, - struct ir3_instruction *src, unsigned base, unsigned n) -{ - struct ir3_instruction *prev = NULL; - - if ((n == 1) && (src->regs[0]->wrmask == 0x1)) { - dst[0] = src; - return; - } - - for (int i = 0, j = 0; i < n; i++) { - struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO); - ir3_reg_create(split, 0, IR3_REG_SSA); - ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src; - split->fo.off = i + base; - - if (prev) { - split->cp.left = prev; - split->cp.left_cnt++; - prev->cp.right = split; - prev->cp.right_cnt++; - } - prev = split; - - if (src->regs[0]->wrmask & (1 << (i + base))) - dst[j++] = split; - } -} - -/* - * Adreno uses uint rather than having dedicated bool type, - * which (potentially) requires some conversion, in particular - * when using output of an bool instr to int input, or visa - * versa. - * - * | Adreno | NIR | - * -------+---------+-------+- - * true | 1 | ~0 | - * false | 0 | 0 | - * - * To convert from an adreno bool (uint) to nir, use: - * - * absneg.s dst, (neg)src - * - * To convert back in the other direction: - * - * absneg.s dst, (abs)arc - * - * The CP step can clean up the absneg.s that cancel each other - * out, and with a slight bit of extra cleverness (to recognize - * the instructions which produce either a 0 or 1) can eliminate - * the absneg.s's completely when an instruction that wants - * 0/1 consumes the result. For example, when a nir 'bcsel' - * consumes the result of 'feq'. So we should be able to get by - * without a boolean resolve step, and without incuring any - * extra penalty in instruction count. - */ - -/* NIR bool -> native (adreno): */ -static struct ir3_instruction * -ir3_b2n(struct ir3_block *block, struct ir3_instruction *instr) -{ - return ir3_ABSNEG_S(block, instr, IR3_REG_SABS); -} - -/* native (adreno) -> NIR bool: */ -static struct ir3_instruction * -ir3_n2b(struct ir3_block *block, struct ir3_instruction *instr) -{ - return ir3_ABSNEG_S(block, instr, IR3_REG_SNEG); -} - -/* - * alu/sfu instructions: - */ - -static struct ir3_instruction * -create_cov(struct ir3_context *ctx, struct ir3_instruction *src, - unsigned src_bitsize, nir_op op) -{ - type_t src_type, dst_type; - - switch (op) { - case nir_op_f2f32: - case nir_op_f2f16_rtne: - case nir_op_f2f16_rtz: - case nir_op_f2f16: - case nir_op_f2i32: - case nir_op_f2i16: - case nir_op_f2i8: - case nir_op_f2u32: - case nir_op_f2u16: - case nir_op_f2u8: - switch (src_bitsize) { - case 32: - src_type = TYPE_F32; - break; - case 16: - src_type = TYPE_F16; - break; - default: - compile_error(ctx, "invalid src bit size: %u", src_bitsize); - } - break; - - case nir_op_i2f32: - case nir_op_i2f16: - case nir_op_i2i32: - case nir_op_i2i16: - case nir_op_i2i8: - switch (src_bitsize) { - case 32: - src_type = TYPE_S32; - break; - case 16: - src_type = TYPE_S16; - break; - case 8: - src_type = TYPE_S8; - break; - default: - compile_error(ctx, "invalid src bit size: %u", src_bitsize); - } - break; - - case nir_op_u2f32: - case nir_op_u2f16: - case nir_op_u2u32: - case nir_op_u2u16: - case nir_op_u2u8: - switch (src_bitsize) { - case 32: - src_type = TYPE_U32; - break; - case 16: - src_type = TYPE_U16; - break; - case 8: - src_type = TYPE_U8; - break; - default: - compile_error(ctx, "invalid src bit size: %u", src_bitsize); - } - break; - - default: - compile_error(ctx, "invalid conversion op: %u", op); - } - - switch (op) { - case nir_op_f2f32: - case nir_op_i2f32: - case nir_op_u2f32: - dst_type = TYPE_F32; - break; - - case nir_op_f2f16_rtne: - case nir_op_f2f16_rtz: - case nir_op_f2f16: - /* TODO how to handle rounding mode? */ - case nir_op_i2f16: - case nir_op_u2f16: - dst_type = TYPE_F16; - break; - - case nir_op_f2i32: - case nir_op_i2i32: - dst_type = TYPE_S32; - break; - - case nir_op_f2i16: - case nir_op_i2i16: - dst_type = TYPE_S16; - break; - - case nir_op_f2i8: - case nir_op_i2i8: - dst_type = TYPE_S8; - break; - - case nir_op_f2u32: - case nir_op_u2u32: - dst_type = TYPE_U32; - break; - - case nir_op_f2u16: - case nir_op_u2u16: - dst_type = TYPE_U16; - break; - - case nir_op_f2u8: - case nir_op_u2u8: - dst_type = TYPE_U8; - break; - - default: - compile_error(ctx, "invalid conversion op: %u", op); - } - - return ir3_COV(ctx->block, src, src_type, dst_type); -} - -static void -emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) -{ - const nir_op_info *info = &nir_op_infos[alu->op]; - struct ir3_instruction **dst, *src[info->num_inputs]; - unsigned bs[info->num_inputs]; /* bit size */ - struct ir3_block *b = ctx->block; - unsigned dst_sz, wrmask; - - if (alu->dest.dest.is_ssa) { - dst_sz = alu->dest.dest.ssa.num_components; - wrmask = (1 << dst_sz) - 1; - } else { - dst_sz = alu->dest.dest.reg.reg->num_components; - wrmask = alu->dest.write_mask; - } - - dst = get_dst(ctx, &alu->dest.dest, dst_sz); - - /* Vectors are special in that they have non-scalarized writemasks, - * and just take the first swizzle channel for each argument in - * order into each writemask channel. - */ - if ((alu->op == nir_op_vec2) || - (alu->op == nir_op_vec3) || - (alu->op == nir_op_vec4)) { - - for (int i = 0; i < info->num_inputs; i++) { - nir_alu_src *asrc = &alu->src[i]; - - compile_assert(ctx, !asrc->abs); - compile_assert(ctx, !asrc->negate); - - src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[0]]; - if (!src[i]) - src[i] = create_immed(ctx->block, 0); - dst[i] = ir3_MOV(b, src[i], TYPE_U32); - } - - put_dst(ctx, &alu->dest.dest); - return; - } - - /* We also get mov's with more than one component for mov's so - * handle those specially: - */ - if ((alu->op == nir_op_imov) || (alu->op == nir_op_fmov)) { - type_t type = (alu->op == nir_op_imov) ? TYPE_U32 : TYPE_F32; - nir_alu_src *asrc = &alu->src[0]; - struct ir3_instruction *const *src0 = get_src(ctx, &asrc->src); - - for (unsigned i = 0; i < dst_sz; i++) { - if (wrmask & (1 << i)) { - dst[i] = ir3_MOV(b, src0[asrc->swizzle[i]], type); - } else { - dst[i] = NULL; - } - } - - put_dst(ctx, &alu->dest.dest); - return; - } - - /* General case: We can just grab the one used channel per src. */ - for (int i = 0; i < info->num_inputs; i++) { - unsigned chan = ffs(alu->dest.write_mask) - 1; - nir_alu_src *asrc = &alu->src[i]; - - compile_assert(ctx, !asrc->abs); - compile_assert(ctx, !asrc->negate); - - src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[chan]]; - bs[i] = nir_src_bit_size(asrc->src); - - compile_assert(ctx, src[i]); - } - - switch (alu->op) { - case nir_op_f2f32: - case nir_op_f2f16_rtne: - case nir_op_f2f16_rtz: - case nir_op_f2f16: - case nir_op_f2i32: - case nir_op_f2i16: - case nir_op_f2i8: - case nir_op_f2u32: - case nir_op_f2u16: - case nir_op_f2u8: - case nir_op_i2f32: - case nir_op_i2f16: - case nir_op_i2i32: - case nir_op_i2i16: - case nir_op_i2i8: - case nir_op_u2f32: - case nir_op_u2f16: - case nir_op_u2u32: - case nir_op_u2u16: - case nir_op_u2u8: - dst[0] = create_cov(ctx, src[0], bs[0], alu->op); - break; - case nir_op_f2b: - dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0); - dst[0]->cat2.condition = IR3_COND_NE; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_b2f: - dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32); - break; - case nir_op_b2i: - dst[0] = ir3_b2n(b, src[0]); - break; - case nir_op_i2b: - dst[0] = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0); - dst[0]->cat2.condition = IR3_COND_NE; - dst[0] = ir3_n2b(b, dst[0]); - break; - - case nir_op_fneg: - dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FNEG); - break; - case nir_op_fabs: - dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FABS); - break; - case nir_op_fmax: - dst[0] = ir3_MAX_F(b, src[0], 0, src[1], 0); - break; - case nir_op_fmin: - dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0); - break; - case nir_op_fsat: - /* if there is just a single use of the src, and it supports - * (sat) bit, we can just fold the (sat) flag back to the - * src instruction and create a mov. This is easier for cp - * to eliminate. - * - * TODO probably opc_cat==4 is ok too - */ - if (alu->src[0].src.is_ssa && - (list_length(&alu->src[0].src.ssa->uses) == 1) && - ((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) { - src[0]->flags |= IR3_INSTR_SAT; - dst[0] = ir3_MOV(b, src[0], TYPE_U32); - } else { - /* otherwise generate a max.f that saturates.. blob does - * similar (generating a cat2 mov using max.f) - */ - dst[0] = ir3_MAX_F(b, src[0], 0, src[0], 0); - dst[0]->flags |= IR3_INSTR_SAT; - } - break; - case nir_op_fmul: - dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0); - break; - case nir_op_fadd: - dst[0] = ir3_ADD_F(b, src[0], 0, src[1], 0); - break; - case nir_op_fsub: - dst[0] = ir3_ADD_F(b, src[0], 0, src[1], IR3_REG_FNEG); - break; - case nir_op_ffma: - dst[0] = ir3_MAD_F32(b, src[0], 0, src[1], 0, src[2], 0); - break; - case nir_op_fddx: - dst[0] = ir3_DSX(b, src[0], 0); - dst[0]->cat5.type = TYPE_F32; - break; - case nir_op_fddy: - dst[0] = ir3_DSY(b, src[0], 0); - dst[0]->cat5.type = TYPE_F32; - break; - break; - case nir_op_flt: - dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_LT; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_fge: - dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_GE; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_feq: - dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_EQ; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_fne: - dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_NE; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_fceil: - dst[0] = ir3_CEIL_F(b, src[0], 0); - break; - case nir_op_ffloor: - dst[0] = ir3_FLOOR_F(b, src[0], 0); - break; - case nir_op_ftrunc: - dst[0] = ir3_TRUNC_F(b, src[0], 0); - break; - case nir_op_fround_even: - dst[0] = ir3_RNDNE_F(b, src[0], 0); - break; - case nir_op_fsign: - dst[0] = ir3_SIGN_F(b, src[0], 0); - break; - - case nir_op_fsin: - dst[0] = ir3_SIN(b, src[0], 0); - break; - case nir_op_fcos: - dst[0] = ir3_COS(b, src[0], 0); - break; - case nir_op_frsq: - dst[0] = ir3_RSQ(b, src[0], 0); - break; - case nir_op_frcp: - dst[0] = ir3_RCP(b, src[0], 0); - break; - case nir_op_flog2: - dst[0] = ir3_LOG2(b, src[0], 0); - break; - case nir_op_fexp2: - dst[0] = ir3_EXP2(b, src[0], 0); - break; - case nir_op_fsqrt: - dst[0] = ir3_SQRT(b, src[0], 0); - break; - - case nir_op_iabs: - dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SABS); - break; - case nir_op_iadd: - dst[0] = ir3_ADD_U(b, src[0], 0, src[1], 0); - break; - case nir_op_iand: - dst[0] = ir3_AND_B(b, src[0], 0, src[1], 0); - break; - case nir_op_imax: - dst[0] = ir3_MAX_S(b, src[0], 0, src[1], 0); - break; - case nir_op_umax: - dst[0] = ir3_MAX_U(b, src[0], 0, src[1], 0); - break; - case nir_op_imin: - dst[0] = ir3_MIN_S(b, src[0], 0, src[1], 0); - break; - case nir_op_umin: - dst[0] = ir3_MIN_U(b, src[0], 0, src[1], 0); - break; - case nir_op_imul: - /* - * dst = (al * bl) + (ah * bl << 16) + (al * bh << 16) - * mull.u tmp0, a, b ; mul low, i.e. al * bl - * madsh.m16 tmp1, a, b, tmp0 ; mul-add shift high mix, i.e. ah * bl << 16 - * madsh.m16 dst, b, a, tmp1 ; i.e. al * bh << 16 - */ - dst[0] = ir3_MADSH_M16(b, src[1], 0, src[0], 0, - ir3_MADSH_M16(b, src[0], 0, src[1], 0, - ir3_MULL_U(b, src[0], 0, src[1], 0), 0), 0); - break; - case nir_op_ineg: - dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG); - break; - case nir_op_inot: - dst[0] = ir3_NOT_B(b, src[0], 0); - break; - case nir_op_ior: - dst[0] = ir3_OR_B(b, src[0], 0, src[1], 0); - break; - case nir_op_ishl: - dst[0] = ir3_SHL_B(b, src[0], 0, src[1], 0); - break; - case nir_op_ishr: - dst[0] = ir3_ASHR_B(b, src[0], 0, src[1], 0); - break; - case nir_op_isign: { - /* maybe this would be sane to lower in nir.. */ - struct ir3_instruction *neg, *pos; - - neg = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0); - neg->cat2.condition = IR3_COND_LT; - - pos = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0); - pos->cat2.condition = IR3_COND_GT; - - dst[0] = ir3_SUB_U(b, pos, 0, neg, 0); - - break; - } - case nir_op_isub: - dst[0] = ir3_SUB_U(b, src[0], 0, src[1], 0); - break; - case nir_op_ixor: - dst[0] = ir3_XOR_B(b, src[0], 0, src[1], 0); - break; - case nir_op_ushr: - dst[0] = ir3_SHR_B(b, src[0], 0, src[1], 0); - break; - case nir_op_ilt: - dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_LT; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_ige: - dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_GE; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_ieq: - dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_EQ; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_ine: - dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_NE; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_ult: - dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_LT; - dst[0] = ir3_n2b(b, dst[0]); - break; - case nir_op_uge: - dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0); - dst[0]->cat2.condition = IR3_COND_GE; - dst[0] = ir3_n2b(b, dst[0]); - break; - - case nir_op_bcsel: { - struct ir3_instruction *cond = ir3_b2n(b, src[0]); - compile_assert(ctx, bs[1] == bs[2]); - /* the boolean condition is 32b even if src[1] and src[2] are - * half-precision, but sel.b16 wants all three src's to be the - * same type. - */ - if (bs[1] < 32) - cond = ir3_COV(b, cond, TYPE_U32, TYPE_U16); - dst[0] = ir3_SEL_B32(b, src[1], 0, cond, 0, src[2], 0); - break; - } - case nir_op_bit_count: - dst[0] = ir3_CBITS_B(b, src[0], 0); - break; - case nir_op_ifind_msb: { - struct ir3_instruction *cmp; - dst[0] = ir3_CLZ_S(b, src[0], 0); - cmp = ir3_CMPS_S(b, dst[0], 0, create_immed(b, 0), 0); - cmp->cat2.condition = IR3_COND_GE; - dst[0] = ir3_SEL_B32(b, - ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, - cmp, 0, dst[0], 0); - break; - } - case nir_op_ufind_msb: - dst[0] = ir3_CLZ_B(b, src[0], 0); - dst[0] = ir3_SEL_B32(b, - ir3_SUB_U(b, create_immed(b, 31), 0, dst[0], 0), 0, - src[0], 0, dst[0], 0); - break; - case nir_op_find_lsb: - dst[0] = ir3_BFREV_B(b, src[0], 0); - dst[0] = ir3_CLZ_B(b, dst[0], 0); - break; - case nir_op_bitfield_reverse: - dst[0] = ir3_BFREV_B(b, src[0], 0); - break; - - default: - compile_error(ctx, "Unhandled ALU op: %s\n", - nir_op_infos[alu->op].name); - break; - } - - put_dst(ctx, &alu->dest.dest); -} - -/* handles direct/indirect UBO reads: */ -static void -emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr, - struct ir3_instruction **dst) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1; - nir_const_value *const_offset; - /* UBO addresses are the first driver params: */ - unsigned ubo = regid(ctx->so->constbase.ubo, 0); - const unsigned ptrsz = pointer_size(ctx); - - int off = 0; - - /* First src is ubo index, which could either be an immed or not: */ - src0 = get_src(ctx, &intr->src[0])[0]; - if (is_same_type_mov(src0) && - (src0->regs[1]->flags & IR3_REG_IMMED)) { - base_lo = create_uniform(ctx, ubo + (src0->regs[1]->iim_val * ptrsz)); - base_hi = create_uniform(ctx, ubo + (src0->regs[1]->iim_val * ptrsz) + 1); - } else { - base_lo = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0, 4)); - base_hi = create_uniform_indirect(ctx, ubo + 1, get_addr(ctx, src0, 4)); - } - - /* note: on 32bit gpu's base_hi is ignored and DCE'd */ - addr = base_lo; - - const_offset = nir_src_as_const_value(intr->src[1]); - if (const_offset) { - off += const_offset->u32[0]; - } else { - /* For load_ubo_indirect, second src is indirect offset: */ - src1 = get_src(ctx, &intr->src[1])[0]; - - /* and add offset to addr: */ - addr = ir3_ADD_S(b, addr, 0, src1, 0); - } - - /* if offset is to large to encode in the ldg, split it out: */ - if ((off + (intr->num_components * 4)) > 1024) { - /* split out the minimal amount to improve the odds that - * cp can fit the immediate in the add.s instruction: - */ - unsigned off2 = off + (intr->num_components * 4) - 1024; - addr = ir3_ADD_S(b, addr, 0, create_immed(b, off2), 0); - off -= off2; - } - - if (ptrsz == 2) { - struct ir3_instruction *carry; - - /* handle 32b rollover, ie: - * if (addr < base_lo) - * base_hi++ - */ - carry = ir3_CMPS_U(b, addr, 0, base_lo, 0); - carry->cat2.condition = IR3_COND_LT; - base_hi = ir3_ADD_S(b, base_hi, 0, carry, 0); - - addr = create_collect(ctx, (struct ir3_instruction*[]){ addr, base_hi }, 2); - } - - for (int i = 0; i < intr->num_components; i++) { - struct ir3_instruction *load = - ir3_LDG(b, addr, 0, create_immed(b, 1), 0); - load->cat6.type = TYPE_U32; - load->cat6.src_offset = off + i * 4; /* byte offset */ - dst[i] = load; - } -} - -/* src[] = { buffer_index, offset }. No const_index */ -static void -emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, - struct ir3_instruction **dst) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *ldgb, *src0, *src1, *offset; - nir_const_value *const_offset; - - /* can this be non-const buffer_index? how do we handle that? */ - const_offset = nir_src_as_const_value(intr->src[0]); - compile_assert(ctx, const_offset); - - offset = get_src(ctx, &intr->src[1])[0]; - - /* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */ - src0 = create_collect(ctx, (struct ir3_instruction*[]){ - offset, - create_immed(b, 0), - }, 2); - src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); - - ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0, - src0, 0, src1, 0); - ldgb->regs[0]->wrmask = MASK(intr->num_components); - ldgb->cat6.iim_val = intr->num_components; - ldgb->cat6.d = 4; - ldgb->cat6.type = TYPE_U32; - ldgb->barrier_class = IR3_BARRIER_BUFFER_R; - ldgb->barrier_conflict = IR3_BARRIER_BUFFER_W; - - split_dest(b, dst, ldgb, 0, intr->num_components); -} - -/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ -static void -emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *stgb, *src0, *src1, *src2, *offset; - nir_const_value *const_offset; - /* TODO handle wrmask properly, see _store_shared().. but I think - * it is more a PITA than that, since blob ends up loading the - * masked components and writing them back out. - */ - unsigned wrmask = intr->const_index[0]; - unsigned ncomp = ffs(~wrmask) - 1; - - /* can this be non-const buffer_index? how do we handle that? */ - const_offset = nir_src_as_const_value(intr->src[1]); - compile_assert(ctx, const_offset); - - offset = get_src(ctx, &intr->src[2])[0]; - - /* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0).. - * nir already *= 4: - */ - src0 = create_collect(ctx, get_src(ctx, &intr->src[0]), ncomp); - src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); - src2 = create_collect(ctx, (struct ir3_instruction*[]){ - offset, - create_immed(b, 0), - }, 2); - - stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0, - src0, 0, src1, 0, src2, 0); - stgb->cat6.iim_val = ncomp; - stgb->cat6.d = 4; - stgb->cat6.type = TYPE_U32; - stgb->barrier_class = IR3_BARRIER_BUFFER_W; - stgb->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; - - array_insert(b, b->keeps, stgb); -} - -/* src[] = { block_index } */ -static void -emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, - struct ir3_instruction **dst) -{ - /* SSBO size stored as a const starting at ssbo_sizes: */ - unsigned blk_idx = nir_src_as_const_value(intr->src[0])->u32[0]; - unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) + - ctx->so->const_layout.ssbo_size.off[blk_idx]; - - debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx)); - - dst[0] = create_uniform(ctx, idx); -} - -/* - * SSBO atomic intrinsics - * - * All of the SSBO atomic memory operations read a value from memory, - * compute a new value using one of the operations below, write the new - * value to memory, and return the original value read. - * - * All operations take 3 sources except CompSwap that takes 4. These - * sources represent: - * - * 0: The SSBO buffer index. - * 1: The offset into the SSBO buffer of the variable that the atomic - * operation will operate on. - * 2: The data parameter to the atomic function (i.e. the value to add - * in ssbo_atomic_add, etc). - * 3: For CompSwap only: the second data parameter. - */ -static struct ir3_instruction * -emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *atomic, *ssbo, *src0, *src1, *src2, *offset; - nir_const_value *const_offset; - type_t type = TYPE_U32; - - /* can this be non-const buffer_index? how do we handle that? */ - const_offset = nir_src_as_const_value(intr->src[0]); - compile_assert(ctx, const_offset); - ssbo = create_immed(b, const_offset->u32[0]); - - offset = get_src(ctx, &intr->src[1])[0]; - - /* src0 is data (or uvec2(data, compare)) - * src1 is offset - * src2 is uvec2(offset*4, 0) (appears to be 64b byte offset) - * - * Note that nir already multiplies the offset by four - */ - src0 = get_src(ctx, &intr->src[2])[0]; - src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); - src2 = create_collect(ctx, (struct ir3_instruction*[]){ - offset, - create_immed(b, 0), - }, 2); - - switch (intr->intrinsic) { - case nir_intrinsic_ssbo_atomic_add: - atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_ssbo_atomic_imin: - atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - type = TYPE_S32; - break; - case nir_intrinsic_ssbo_atomic_umin: - atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_ssbo_atomic_imax: - atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - type = TYPE_S32; - break; - case nir_intrinsic_ssbo_atomic_umax: - atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_ssbo_atomic_and: - atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_ssbo_atomic_or: - atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_ssbo_atomic_xor: - atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_ssbo_atomic_exchange: - atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_ssbo_atomic_comp_swap: - /* for cmpxchg, src0 is [ui]vec2(data, compare): */ - src0 = create_collect(ctx, (struct ir3_instruction*[]){ - get_src(ctx, &intr->src[3])[0], - src0, - }, 2); - atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, src0, 0, src1, 0, src2, 0); - break; - default: - unreachable("boo"); - } - - atomic->cat6.iim_val = 1; - atomic->cat6.d = 4; - atomic->cat6.type = type; - atomic->barrier_class = IR3_BARRIER_BUFFER_W; - atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; - - /* even if nothing consume the result, we can't DCE the instruction: */ - array_insert(b, b->keeps, atomic); - - return atomic; -} - -/* src[] = { offset }. const_index[] = { base } */ -static void -emit_intrinsic_load_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr, - struct ir3_instruction **dst) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *ldl, *offset; - unsigned base; - - offset = get_src(ctx, &intr->src[0])[0]; - base = nir_intrinsic_base(intr); - - ldl = ir3_LDL(b, offset, 0, create_immed(b, intr->num_components), 0); - ldl->cat6.src_offset = base; - ldl->cat6.type = utype_dst(intr->dest); - ldl->regs[0]->wrmask = MASK(intr->num_components); - - ldl->barrier_class = IR3_BARRIER_SHARED_R; - ldl->barrier_conflict = IR3_BARRIER_SHARED_W; - - split_dest(b, dst, ldl, 0, intr->num_components); -} - -/* src[] = { value, offset }. const_index[] = { base, write_mask } */ -static void -emit_intrinsic_store_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *stl, *offset; - struct ir3_instruction * const *value; - unsigned base, wrmask; - - value = get_src(ctx, &intr->src[0]); - offset = get_src(ctx, &intr->src[1])[0]; - - base = nir_intrinsic_base(intr); - wrmask = nir_intrinsic_write_mask(intr); - - /* Combine groups of consecutive enabled channels in one write - * message. We use ffs to find the first enabled channel and then ffs on - * the bit-inverse, down-shifted writemask to determine the length of - * the block of enabled bits. - * - * (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic()) - */ - while (wrmask) { - unsigned first_component = ffs(wrmask) - 1; - unsigned length = ffs(~(wrmask >> first_component)) - 1; - - stl = ir3_STL(b, offset, 0, - create_collect(ctx, &value[first_component], length), 0, - create_immed(b, length), 0); - stl->cat6.dst_offset = first_component + base; - stl->cat6.type = utype_src(intr->src[0]); - stl->barrier_class = IR3_BARRIER_SHARED_W; - stl->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W; - - array_insert(b, b->keeps, stl); - - /* Clear the bits in the writemask that we just wrote, then try - * again to see if more channels are left. - */ - wrmask &= (15 << (first_component + length)); - } -} - -/* - * CS shared variable atomic intrinsics - * - * All of the shared variable atomic memory operations read a value from - * memory, compute a new value using one of the operations below, write the - * new value to memory, and return the original value read. - * - * All operations take 2 sources except CompSwap that takes 3. These - * sources represent: - * - * 0: The offset into the shared variable storage region that the atomic - * operation will operate on. - * 1: The data parameter to the atomic function (i.e. the value to add - * in shared_atomic_add, etc). - * 2: For CompSwap only: the second data parameter. - */ -static struct ir3_instruction * -emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *atomic, *src0, *src1; - type_t type = TYPE_U32; - - src0 = get_src(ctx, &intr->src[0])[0]; /* offset */ - src1 = get_src(ctx, &intr->src[1])[0]; /* value */ - - switch (intr->intrinsic) { - case nir_intrinsic_shared_atomic_add: - atomic = ir3_ATOMIC_ADD(b, src0, 0, src1, 0); - break; - case nir_intrinsic_shared_atomic_imin: - atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0); - type = TYPE_S32; - break; - case nir_intrinsic_shared_atomic_umin: - atomic = ir3_ATOMIC_MIN(b, src0, 0, src1, 0); - break; - case nir_intrinsic_shared_atomic_imax: - atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0); - type = TYPE_S32; - break; - case nir_intrinsic_shared_atomic_umax: - atomic = ir3_ATOMIC_MAX(b, src0, 0, src1, 0); - break; - case nir_intrinsic_shared_atomic_and: - atomic = ir3_ATOMIC_AND(b, src0, 0, src1, 0); - break; - case nir_intrinsic_shared_atomic_or: - atomic = ir3_ATOMIC_OR(b, src0, 0, src1, 0); - break; - case nir_intrinsic_shared_atomic_xor: - atomic = ir3_ATOMIC_XOR(b, src0, 0, src1, 0); - break; - case nir_intrinsic_shared_atomic_exchange: - atomic = ir3_ATOMIC_XCHG(b, src0, 0, src1, 0); - break; - case nir_intrinsic_shared_atomic_comp_swap: - /* for cmpxchg, src1 is [ui]vec2(data, compare): */ - src1 = create_collect(ctx, (struct ir3_instruction*[]){ - get_src(ctx, &intr->src[2])[0], - src1, - }, 2); - atomic = ir3_ATOMIC_CMPXCHG(b, src0, 0, src1, 0); - break; - default: - unreachable("boo"); - } - - atomic->cat6.iim_val = 1; - atomic->cat6.d = 1; - atomic->cat6.type = type; - atomic->barrier_class = IR3_BARRIER_SHARED_W; - atomic->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W; - - /* even if nothing consume the result, we can't DCE the instruction: */ - array_insert(b, b->keeps, atomic); - - return atomic; -} - -/* Images get mapped into SSBO/image state (for store/atomic) and texture - * state block (for load). To simplify things, invert the image id and - * map it from end of state block, ie. image 0 becomes num-1, image 1 - * becomes num-2, etc. This potentially avoids needing to re-emit texture - * state when switching shaders. - * - * TODO is max # of samplers and SSBOs the same. This shouldn't be hard- - * coded. Also, since all the gl shader stages (ie. everything but CS) - * share the same SSBO/image state block, this might require some more - * logic if we supported images in anything other than FS.. - */ -static unsigned -get_image_slot(struct ir3_context *ctx, nir_deref_instr *deref) -{ - unsigned int loc = 0; - unsigned inner_size = 1; - - while (deref->deref_type != nir_deref_type_var) { - assert(deref->deref_type == nir_deref_type_array); - nir_const_value *const_index = nir_src_as_const_value(deref->arr.index); - assert(const_index); - - /* Go to the next instruction */ - deref = nir_deref_instr_parent(deref); - - assert(glsl_type_is_array(deref->type)); - const unsigned array_len = glsl_get_length(deref->type); - loc += MIN2(const_index->u32[0], array_len - 1) * inner_size; - - /* Update the inner size */ - inner_size *= array_len; - } - - loc += deref->var->data.driver_location; - - /* TODO figure out real limit per generation, and don't hardcode: */ - const unsigned max_samplers = 16; - return max_samplers - loc - 1; -} - -/* see tex_info() for equiv logic for texture instructions.. it would be - * nice if this could be better unified.. - */ -static unsigned -get_image_coords(const nir_variable *var, unsigned *flagsp) -{ - const struct glsl_type *type = glsl_without_array(var->type); - unsigned coords, flags = 0; - - switch (glsl_get_sampler_dim(type)) { - case GLSL_SAMPLER_DIM_1D: - case GLSL_SAMPLER_DIM_BUF: - coords = 1; - break; - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_EXTERNAL: - case GLSL_SAMPLER_DIM_MS: - coords = 2; - break; - case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - flags |= IR3_INSTR_3D; - coords = 3; - break; - default: - unreachable("bad sampler dim"); - return 0; - } - - if (glsl_sampler_type_is_array(type)) { - /* note: unlike tex_info(), adjust # of coords to include array idx: */ - coords++; - flags |= IR3_INSTR_A; - } - - if (flagsp) - *flagsp = flags; - - return coords; -} - -static type_t -get_image_type(const nir_variable *var) -{ - switch (glsl_get_sampler_result_type(glsl_without_array(var->type))) { - case GLSL_TYPE_UINT: - return TYPE_U32; - case GLSL_TYPE_INT: - return TYPE_S32; - case GLSL_TYPE_FLOAT: - return TYPE_F32; - default: - unreachable("bad sampler type."); - return 0; - } -} - -static struct ir3_instruction * -get_image_offset(struct ir3_context *ctx, const nir_variable *var, - struct ir3_instruction * const *coords, bool byteoff) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *offset; - unsigned ncoords = get_image_coords(var, NULL); - - /* to calculate the byte offset (yes, uggg) we need (up to) three - * const values to know the bytes per pixel, and y and z stride: - */ - unsigned cb = regid(ctx->so->constbase.image_dims, 0) + - ctx->so->const_layout.image_dims.off[var->data.driver_location]; - - debug_assert(ctx->so->const_layout.image_dims.mask & - (1 << var->data.driver_location)); - - /* offset = coords.x * bytes_per_pixel: */ - offset = ir3_MUL_S(b, coords[0], 0, create_uniform(ctx, cb + 0), 0); - if (ncoords > 1) { - /* offset += coords.y * y_pitch: */ - offset = ir3_MAD_S24(b, create_uniform(ctx, cb + 1), 0, - coords[1], 0, offset, 0); - } - if (ncoords > 2) { - /* offset += coords.z * z_pitch: */ - offset = ir3_MAD_S24(b, create_uniform(ctx, cb + 2), 0, - coords[2], 0, offset, 0); - } - - if (!byteoff) { - /* Some cases, like atomics, seem to use dword offset instead - * of byte offsets.. blob just puts an extra shr.b in there - * in those cases: - */ - offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0); - } - - return create_collect(ctx, (struct ir3_instruction*[]){ - offset, - create_immed(b, 0), - }, 2); -} - -/* src[] = { deref, coord, sample_index }. const_index[] = {} */ -static void -emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, - struct ir3_instruction **dst) -{ - struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); - struct ir3_instruction *sam; - struct ir3_instruction * const *src0 = get_src(ctx, &intr->src[1]); - struct ir3_instruction *coords[4]; - unsigned flags, ncoords = get_image_coords(var, &flags); - unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0])); - type_t type = get_image_type(var); - - /* hmm, this seems a bit odd, but it is what blob does and (at least - * a5xx) just faults on bogus addresses otherwise: - */ - if (flags & IR3_INSTR_3D) { - flags &= ~IR3_INSTR_3D; - flags |= IR3_INSTR_A; - } - - for (unsigned i = 0; i < ncoords; i++) - coords[i] = src0[i]; - - if (ncoords == 1) - coords[ncoords++] = create_immed(b, 0); - - sam = ir3_SAM(b, OPC_ISAM, type, TGSI_WRITEMASK_XYZW, flags, - tex_idx, tex_idx, create_collect(ctx, coords, ncoords), NULL); - - sam->barrier_class = IR3_BARRIER_IMAGE_R; - sam->barrier_conflict = IR3_BARRIER_IMAGE_W; - - split_dest(b, dst, sam, 0, 4); -} - -/* src[] = { deref, coord, sample_index, value }. const_index[] = {} */ -static void -emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) -{ - struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); - struct ir3_instruction *stib, *offset; - struct ir3_instruction * const *value = get_src(ctx, &intr->src[3]); - struct ir3_instruction * const *coords = get_src(ctx, &intr->src[1]); - unsigned ncoords = get_image_coords(var, NULL); - unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0])); - - /* src0 is value - * src1 is coords - * src2 is 64b byte offset - */ - - offset = get_image_offset(ctx, var, coords, true); - - /* NOTE: stib seems to take byte offset, but stgb.typed can be used - * too and takes a dword offset.. not quite sure yet why blob uses - * one over the other in various cases. - */ - - stib = ir3_STIB(b, create_immed(b, tex_idx), 0, - create_collect(ctx, value, 4), 0, - create_collect(ctx, coords, ncoords), 0, - offset, 0); - stib->cat6.iim_val = 4; - stib->cat6.d = ncoords; - stib->cat6.type = get_image_type(var); - stib->cat6.typed = true; - stib->barrier_class = IR3_BARRIER_IMAGE_W; - stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; - - array_insert(b, b->keeps, stib); -} - -static void -emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, - struct ir3_instruction **dst) -{ - struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); - unsigned tex_idx = get_image_slot(ctx, nir_src_as_deref(intr->src[0])); - struct ir3_instruction *sam, *lod; - unsigned flags, ncoords = get_image_coords(var, &flags); - - lod = create_immed(b, 0); - sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags, - tex_idx, tex_idx, lod, NULL); - - /* Array size actually ends up in .w rather than .z. This doesn't - * matter for miplevel 0, but for higher mips the value in z is - * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is - * returned, which means that we have to add 1 to it for arrays for - * a3xx. - * - * Note use a temporary dst and then copy, since the size of the dst - * array that is passed in is based on nir's understanding of the - * result size, not the hardware's - */ - struct ir3_instruction *tmp[4]; - - split_dest(b, tmp, sam, 0, 4); - - /* get_size instruction returns size in bytes instead of texels - * for imageBuffer, so we need to divide it by the pixel size - * of the image format. - * - * TODO: This is at least true on a5xx. Check other gens. - */ - enum glsl_sampler_dim dim = - glsl_get_sampler_dim(glsl_without_array(var->type)); - if (dim == GLSL_SAMPLER_DIM_BUF) { - /* Since all the possible values the divisor can take are - * power-of-two (4, 8, or 16), the division is implemented - * as a shift-right. - * During shader setup, the log2 of the image format's - * bytes-per-pixel should have been emitted in 2nd slot of - * image_dims. See ir3_shader::emit_image_dims(). - */ - unsigned cb = regid(ctx->so->constbase.image_dims, 0) + - ctx->so->const_layout.image_dims.off[var->data.driver_location]; - struct ir3_instruction *aux = create_uniform(ctx, cb + 1); - - tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0); - } - - for (unsigned i = 0; i < ncoords; i++) - dst[i] = tmp[i]; - - if (flags & IR3_INSTR_A) { - if (ctx->compiler->levels_add_one) { - dst[ncoords-1] = ir3_ADD_U(b, tmp[3], 0, create_immed(b, 1), 0); - } else { - dst[ncoords-1] = ir3_MOV(b, tmp[3], TYPE_U32); - } - } -} - -/* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */ -static struct ir3_instruction * -emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) -{ - struct ir3_block *b = ctx->block; - const nir_variable *var = nir_intrinsic_get_var(intr, 0); - struct ir3_instruction *atomic, *image, *src0, *src1, *src2; - struct ir3_instruction * const *coords = get_src(ctx, &intr->src[1]); - unsigned ncoords = get_image_coords(var, NULL); - - image = create_immed(b, get_image_slot(ctx, nir_src_as_deref(intr->src[0]))); - - /* src0 is value (or uvec2(value, compare)) - * src1 is coords - * src2 is 64b byte offset - */ - src0 = get_src(ctx, &intr->src[3])[0]; - src1 = create_collect(ctx, coords, ncoords); - src2 = get_image_offset(ctx, var, coords, false); - - switch (intr->intrinsic) { - case nir_intrinsic_image_deref_atomic_add: - atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_image_deref_atomic_min: - atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_image_deref_atomic_max: - atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_image_deref_atomic_and: - atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_image_deref_atomic_or: - atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_image_deref_atomic_xor: - atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_image_deref_atomic_exchange: - atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0); - break; - case nir_intrinsic_image_deref_atomic_comp_swap: - /* for cmpxchg, src0 is [ui]vec2(data, compare): */ - src0 = create_collect(ctx, (struct ir3_instruction*[]){ - get_src(ctx, &intr->src[4])[0], - src0, - }, 2); - atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0); - break; - default: - unreachable("boo"); - } - - atomic->cat6.iim_val = 1; - atomic->cat6.d = ncoords; - atomic->cat6.type = get_image_type(var); - atomic->cat6.typed = true; - atomic->barrier_class = IR3_BARRIER_IMAGE_W; - atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; - - /* even if nothing consume the result, we can't DCE the instruction: */ - array_insert(b, b->keeps, atomic); - - return atomic; -} - -static void -emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction *barrier; - - switch (intr->intrinsic) { - case nir_intrinsic_barrier: - barrier = ir3_BAR(b); - barrier->cat7.g = true; - barrier->cat7.l = true; - barrier->flags = IR3_INSTR_SS | IR3_INSTR_SY; - barrier->barrier_class = IR3_BARRIER_EVERYTHING; - break; - case nir_intrinsic_memory_barrier: - barrier = ir3_FENCE(b); - barrier->cat7.g = true; - barrier->cat7.r = true; - barrier->cat7.w = true; - barrier->barrier_class = IR3_BARRIER_IMAGE_W | - IR3_BARRIER_BUFFER_W; - barrier->barrier_conflict = - IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W | - IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; - break; - case nir_intrinsic_memory_barrier_atomic_counter: - case nir_intrinsic_memory_barrier_buffer: - barrier = ir3_FENCE(b); - barrier->cat7.g = true; - barrier->cat7.r = true; - barrier->cat7.w = true; - barrier->barrier_class = IR3_BARRIER_BUFFER_W; - barrier->barrier_conflict = IR3_BARRIER_BUFFER_R | - IR3_BARRIER_BUFFER_W; - break; - case nir_intrinsic_memory_barrier_image: - // TODO double check if this should have .g set - barrier = ir3_FENCE(b); - barrier->cat7.g = true; - barrier->cat7.r = true; - barrier->cat7.w = true; - barrier->barrier_class = IR3_BARRIER_IMAGE_W; - barrier->barrier_conflict = IR3_BARRIER_IMAGE_R | - IR3_BARRIER_IMAGE_W; - break; - case nir_intrinsic_memory_barrier_shared: - barrier = ir3_FENCE(b); - barrier->cat7.g = true; - barrier->cat7.l = true; - barrier->cat7.r = true; - barrier->cat7.w = true; - barrier->barrier_class = IR3_BARRIER_SHARED_W; - barrier->barrier_conflict = IR3_BARRIER_SHARED_R | - IR3_BARRIER_SHARED_W; - break; - case nir_intrinsic_group_memory_barrier: - barrier = ir3_FENCE(b); - barrier->cat7.g = true; - barrier->cat7.l = true; - barrier->cat7.r = true; - barrier->cat7.w = true; - barrier->barrier_class = IR3_BARRIER_SHARED_W | - IR3_BARRIER_IMAGE_W | - IR3_BARRIER_BUFFER_W; - barrier->barrier_conflict = - IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W | - IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W | - IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; - break; - default: - unreachable("boo"); - } - - /* make sure barrier doesn't get DCE'd */ - array_insert(b, b->keeps, barrier); -} - -static void add_sysval_input_compmask(struct ir3_context *ctx, - gl_system_value slot, unsigned compmask, - struct ir3_instruction *instr) -{ - struct ir3_shader_variant *so = ctx->so; - unsigned r = regid(so->inputs_count, 0); - unsigned n = so->inputs_count++; - - so->inputs[n].sysval = true; - so->inputs[n].slot = slot; - so->inputs[n].compmask = compmask; - so->inputs[n].regid = r; - so->inputs[n].interpolate = INTERP_MODE_FLAT; - so->total_in++; - - ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1); - ctx->ir->inputs[r] = instr; -} - -static void add_sysval_input(struct ir3_context *ctx, gl_system_value slot, - struct ir3_instruction *instr) -{ - add_sysval_input_compmask(ctx, slot, 0x1, instr); -} - -static void -emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) -{ - const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; - struct ir3_instruction **dst; - struct ir3_instruction * const *src; - struct ir3_block *b = ctx->block; - nir_const_value *const_offset; - int idx, comp; - - if (info->has_dest) { - unsigned n = nir_intrinsic_dest_components(intr); - dst = get_dst(ctx, &intr->dest, n); - } else { - dst = NULL; - } - - switch (intr->intrinsic) { - case nir_intrinsic_load_uniform: - idx = nir_intrinsic_base(intr); - const_offset = nir_src_as_const_value(intr->src[0]); - if (const_offset) { - idx += const_offset->u32[0]; - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_uniform(ctx, n); - } - } else { - src = get_src(ctx, &intr->src[0]); - for (int i = 0; i < intr->num_components; i++) { - int n = idx * 4 + i; - dst[i] = create_uniform_indirect(ctx, n, - get_addr(ctx, src[0], 4)); - } - /* NOTE: if relative addressing is used, we set - * constlen in the compiler (to worst-case value) - * since we don't know in the assembler what the max - * addr reg value can be: - */ - ctx->so->constlen = ctx->s->num_uniforms; - } - break; - case nir_intrinsic_load_ubo: - emit_intrinsic_load_ubo(ctx, intr, dst); - break; - case nir_intrinsic_load_input: - idx = nir_intrinsic_base(intr); - comp = nir_intrinsic_component(intr); - const_offset = nir_src_as_const_value(intr->src[0]); - if (const_offset) { - idx += const_offset->u32[0]; - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i + comp; - dst[i] = ctx->ir->inputs[n]; - } - } else { - src = get_src(ctx, &intr->src[0]); - struct ir3_instruction *collect = - create_collect(ctx, ctx->ir->inputs, ctx->ir->ninputs); - struct ir3_instruction *addr = get_addr(ctx, src[0], 4); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i + comp; - dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, - n, addr, collect); - } - } - break; - case nir_intrinsic_load_ssbo: - emit_intrinsic_load_ssbo(ctx, intr, dst); - break; - case nir_intrinsic_store_ssbo: - emit_intrinsic_store_ssbo(ctx, intr); - break; - case nir_intrinsic_get_buffer_size: - emit_intrinsic_ssbo_size(ctx, intr, dst); - break; - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - dst[0] = emit_intrinsic_atomic_ssbo(ctx, intr); - break; - case nir_intrinsic_load_shared: - emit_intrinsic_load_shared(ctx, intr, dst); - break; - case nir_intrinsic_store_shared: - emit_intrinsic_store_shared(ctx, intr); - break; - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: - dst[0] = emit_intrinsic_atomic_shared(ctx, intr); - break; - case nir_intrinsic_image_deref_load: - emit_intrinsic_load_image(ctx, intr, dst); - break; - case nir_intrinsic_image_deref_store: - emit_intrinsic_store_image(ctx, intr); - break; - case nir_intrinsic_image_deref_size: - emit_intrinsic_image_size(ctx, intr, dst); - break; - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: - dst[0] = emit_intrinsic_atomic_image(ctx, intr); - break; - case nir_intrinsic_barrier: - case nir_intrinsic_memory_barrier: - case nir_intrinsic_group_memory_barrier: - case nir_intrinsic_memory_barrier_atomic_counter: - case nir_intrinsic_memory_barrier_buffer: - case nir_intrinsic_memory_barrier_image: - case nir_intrinsic_memory_barrier_shared: - emit_intrinsic_barrier(ctx, intr); - /* note that blk ptr no longer valid, make that obvious: */ - b = NULL; - break; - case nir_intrinsic_store_output: - idx = nir_intrinsic_base(intr); - comp = nir_intrinsic_component(intr); - const_offset = nir_src_as_const_value(intr->src[1]); - compile_assert(ctx, const_offset != NULL); - idx += const_offset->u32[0]; - - src = get_src(ctx, &intr->src[0]); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i + comp; - ctx->ir->outputs[n] = src[i]; - } - break; - case nir_intrinsic_load_base_vertex: - case nir_intrinsic_load_first_vertex: - if (!ctx->basevertex) { - ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE); - add_sysval_input(ctx, SYSTEM_VALUE_FIRST_VERTEX, ctx->basevertex); - } - dst[0] = ctx->basevertex; - break; - case nir_intrinsic_load_vertex_id_zero_base: - case nir_intrinsic_load_vertex_id: - if (!ctx->vertex_id) { - gl_system_value sv = (intr->intrinsic == nir_intrinsic_load_vertex_id) ? - SYSTEM_VALUE_VERTEX_ID : SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; - ctx->vertex_id = create_input(ctx, 0); - add_sysval_input(ctx, sv, ctx->vertex_id); - } - dst[0] = ctx->vertex_id; - break; - case nir_intrinsic_load_instance_id: - if (!ctx->instance_id) { - ctx->instance_id = create_input(ctx, 0); - add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID, - ctx->instance_id); - } - dst[0] = ctx->instance_id; - break; - case nir_intrinsic_load_sample_id: - case nir_intrinsic_load_sample_id_no_per_sample: - if (!ctx->samp_id) { - ctx->samp_id = create_input(ctx, 0); - ctx->samp_id->regs[0]->flags |= IR3_REG_HALF; - add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_ID, - ctx->samp_id); - } - dst[0] = ir3_COV(b, ctx->samp_id, TYPE_U16, TYPE_U32); - break; - case nir_intrinsic_load_sample_mask_in: - if (!ctx->samp_mask_in) { - ctx->samp_mask_in = create_input(ctx, 0); - add_sysval_input(ctx, SYSTEM_VALUE_SAMPLE_MASK_IN, - ctx->samp_mask_in); - } - dst[0] = ctx->samp_mask_in; - break; - case nir_intrinsic_load_user_clip_plane: - idx = nir_intrinsic_ucp_id(intr); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n); - } - break; - case nir_intrinsic_load_front_face: - if (!ctx->frag_face) { - ctx->so->frag_face = true; - ctx->frag_face = create_input(ctx, 0); - add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face); - ctx->frag_face->regs[0]->flags |= IR3_REG_HALF; - } - /* for fragface, we get -1 for back and 0 for front. However this is - * the inverse of what nir expects (where ~0 is true). - */ - dst[0] = ir3_COV(b, ctx->frag_face, TYPE_S16, TYPE_S32); - dst[0] = ir3_NOT_B(b, dst[0], 0); - break; - case nir_intrinsic_load_local_invocation_id: - if (!ctx->local_invocation_id) { - ctx->local_invocation_id = create_input_compmask(ctx, 0, 0x7); - add_sysval_input_compmask(ctx, SYSTEM_VALUE_LOCAL_INVOCATION_ID, - 0x7, ctx->local_invocation_id); - } - split_dest(b, dst, ctx->local_invocation_id, 0, 3); - break; - case nir_intrinsic_load_work_group_id: - if (!ctx->work_group_id) { - ctx->work_group_id = create_input_compmask(ctx, 0, 0x7); - add_sysval_input_compmask(ctx, SYSTEM_VALUE_WORK_GROUP_ID, - 0x7, ctx->work_group_id); - ctx->work_group_id->regs[0]->flags |= IR3_REG_HIGH; - } - split_dest(b, dst, ctx->work_group_id, 0, 3); - break; - case nir_intrinsic_load_num_work_groups: - for (int i = 0; i < intr->num_components; i++) { - dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i); - } - break; - case nir_intrinsic_load_local_group_size: - for (int i = 0; i < intr->num_components; i++) { - dst[i] = create_driver_param(ctx, IR3_DP_LOCAL_GROUP_SIZE_X + i); - } - break; - case nir_intrinsic_discard_if: - case nir_intrinsic_discard: { - struct ir3_instruction *cond, *kill; - - if (intr->intrinsic == nir_intrinsic_discard_if) { - /* conditional discard: */ - src = get_src(ctx, &intr->src[0]); - cond = ir3_b2n(b, src[0]); - } else { - /* unconditional discard: */ - cond = create_immed(b, 1); - } - - /* NOTE: only cmps.*.* can write p0.x: */ - cond = ir3_CMPS_S(b, cond, 0, create_immed(b, 0), 0); - cond->cat2.condition = IR3_COND_NE; - - /* condition always goes in predicate register: */ - cond->regs[0]->num = regid(REG_P0, 0); - - kill = ir3_KILL(b, cond, 0); - array_insert(ctx->ir, ctx->ir->predicates, kill); - - array_insert(b, b->keeps, kill); - ctx->so->has_kill = true; - - break; - } - default: - compile_error(ctx, "Unhandled intrinsic type: %s\n", - nir_intrinsic_infos[intr->intrinsic].name); - break; - } - - if (info->has_dest) - put_dst(ctx, &intr->dest); -} - -static void -emit_load_const(struct ir3_context *ctx, nir_load_const_instr *instr) -{ - struct ir3_instruction **dst = get_dst_ssa(ctx, &instr->def, - instr->def.num_components); - type_t type = (instr->def.bit_size < 32) ? TYPE_U16 : TYPE_U32; - - for (int i = 0; i < instr->def.num_components; i++) - dst[i] = create_immed_typed(ctx->block, instr->value.u32[i], type); -} - -static void -emit_undef(struct ir3_context *ctx, nir_ssa_undef_instr *undef) -{ - struct ir3_instruction **dst = get_dst_ssa(ctx, &undef->def, - undef->def.num_components); - type_t type = (undef->def.bit_size < 32) ? TYPE_U16 : TYPE_U32; - - /* backend doesn't want undefined instructions, so just plug - * in 0.0.. - */ - for (int i = 0; i < undef->def.num_components; i++) - dst[i] = create_immed_typed(ctx->block, fui(0.0), type); -} - -/* - * texture fetch/sample instructions: - */ - -static void -tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp) -{ - unsigned coords, flags = 0; - - /* note: would use tex->coord_components.. except txs.. also, - * since array index goes after shadow ref, we don't want to - * count it: - */ - switch (tex->sampler_dim) { - case GLSL_SAMPLER_DIM_1D: - case GLSL_SAMPLER_DIM_BUF: - coords = 1; - break; - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_EXTERNAL: - case GLSL_SAMPLER_DIM_MS: - coords = 2; - break; - case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - coords = 3; - flags |= IR3_INSTR_3D; - break; - default: - unreachable("bad sampler_dim"); - } - - if (tex->is_shadow && tex->op != nir_texop_lod) - flags |= IR3_INSTR_S; - - if (tex->is_array && tex->op != nir_texop_lod) - flags |= IR3_INSTR_A; - - *flagsp = flags; - *coordsp = coords; -} - -static void -emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction **dst, *sam, *src0[12], *src1[4]; - struct ir3_instruction * const *coord, * const *off, * const *ddx, * const *ddy; - struct ir3_instruction *lod, *compare, *proj, *sample_index; - bool has_bias = false, has_lod = false, has_proj = false, has_off = false; - unsigned i, coords, flags; - unsigned nsrc0 = 0, nsrc1 = 0; - type_t type; - opc_t opc = 0; - - coord = off = ddx = ddy = NULL; - lod = proj = compare = sample_index = NULL; - - /* TODO: might just be one component for gathers? */ - dst = get_dst(ctx, &tex->dest, 4); - - for (unsigned i = 0; i < tex->num_srcs; i++) { - switch (tex->src[i].src_type) { - case nir_tex_src_coord: - coord = get_src(ctx, &tex->src[i].src); - break; - case nir_tex_src_bias: - lod = get_src(ctx, &tex->src[i].src)[0]; - has_bias = true; - break; - case nir_tex_src_lod: - lod = get_src(ctx, &tex->src[i].src)[0]; - has_lod = true; - break; - case nir_tex_src_comparator: /* shadow comparator */ - compare = get_src(ctx, &tex->src[i].src)[0]; - break; - case nir_tex_src_projector: - proj = get_src(ctx, &tex->src[i].src)[0]; - has_proj = true; - break; - case nir_tex_src_offset: - off = get_src(ctx, &tex->src[i].src); - has_off = true; - break; - case nir_tex_src_ddx: - ddx = get_src(ctx, &tex->src[i].src); - break; - case nir_tex_src_ddy: - ddy = get_src(ctx, &tex->src[i].src); - break; - case nir_tex_src_ms_index: - sample_index = get_src(ctx, &tex->src[i].src)[0]; - break; - default: - compile_error(ctx, "Unhandled NIR tex src type: %d\n", - tex->src[i].src_type); - return; - } - } - - switch (tex->op) { - case nir_texop_tex: opc = has_lod ? OPC_SAML : OPC_SAM; break; - case nir_texop_txb: opc = OPC_SAMB; break; - case nir_texop_txl: opc = OPC_SAML; break; - case nir_texop_txd: opc = OPC_SAMGQ; break; - case nir_texop_txf: opc = OPC_ISAML; break; - case nir_texop_lod: opc = OPC_GETLOD; break; - case nir_texop_tg4: - /* NOTE: a4xx might need to emulate gather w/ txf (this is - * what blob does, seems gather is broken?), and a3xx did - * not support it (but probably could also emulate). - */ - switch (tex->component) { - case 0: opc = OPC_GATHER4R; break; - case 1: opc = OPC_GATHER4G; break; - case 2: opc = OPC_GATHER4B; break; - case 3: opc = OPC_GATHER4A; break; - } - break; - case nir_texop_txf_ms: opc = OPC_ISAMM; break; - case nir_texop_txs: - case nir_texop_query_levels: - case nir_texop_texture_samples: - case nir_texop_samples_identical: - case nir_texop_txf_ms_mcs: - compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op); - return; - } - - tex_info(tex, &flags, &coords); - - /* - * lay out the first argument in the proper order: - * - actual coordinates first - * - shadow reference - * - array index - * - projection w - * - starting at offset 4, dpdx.xy, dpdy.xy - * - * bias/lod go into the second arg - */ - - /* insert tex coords: */ - for (i = 0; i < coords; i++) - src0[i] = coord[i]; - - nsrc0 = i; - - /* NOTE a3xx (and possibly a4xx?) might be different, using isaml - * with scaled x coord according to requested sample: - */ - if (tex->op == nir_texop_txf_ms) { - if (ctx->compiler->txf_ms_with_isaml) { - /* the samples are laid out in x dimension as - * 0 1 2 3 - * x_ms = (x << ms) + sample_index; - */ - struct ir3_instruction *ms; - ms = create_immed(b, (ctx->samples >> (2 * tex->texture_index)) & 3); - - src0[0] = ir3_SHL_B(b, src0[0], 0, ms, 0); - src0[0] = ir3_ADD_U(b, src0[0], 0, sample_index, 0); - - opc = OPC_ISAML; - } else { - src0[nsrc0++] = sample_index; - } - } - - /* scale up integer coords for TXF based on the LOD */ - if (ctx->compiler->unminify_coords && (opc == OPC_ISAML)) { - assert(has_lod); - for (i = 0; i < coords; i++) - src0[i] = ir3_SHL_B(b, src0[i], 0, lod, 0); - } - - if (coords == 1) { - /* hw doesn't do 1d, so we treat it as 2d with - * height of 1, and patch up the y coord. - * TODO: y coord should be (int)0 in some cases.. - */ - src0[nsrc0++] = create_immed(b, fui(0.5)); - } - - if (tex->is_shadow && tex->op != nir_texop_lod) - src0[nsrc0++] = compare; - - if (tex->is_array && tex->op != nir_texop_lod) { - struct ir3_instruction *idx = coord[coords]; - - /* the array coord for cube arrays needs 0.5 added to it */ - if (ctx->compiler->array_index_add_half && (opc != OPC_ISAML)) - idx = ir3_ADD_F(b, idx, 0, create_immed(b, fui(0.5)), 0); - - src0[nsrc0++] = idx; - } - - if (has_proj) { - src0[nsrc0++] = proj; - flags |= IR3_INSTR_P; - } - - /* pad to 4, then ddx/ddy: */ - if (tex->op == nir_texop_txd) { - while (nsrc0 < 4) - src0[nsrc0++] = create_immed(b, fui(0.0)); - for (i = 0; i < coords; i++) - src0[nsrc0++] = ddx[i]; - if (coords < 2) - src0[nsrc0++] = create_immed(b, fui(0.0)); - for (i = 0; i < coords; i++) - src0[nsrc0++] = ddy[i]; - if (coords < 2) - src0[nsrc0++] = create_immed(b, fui(0.0)); - } - - /* - * second argument (if applicable): - * - offsets - * - lod - * - bias - */ - if (has_off | has_lod | has_bias) { - if (has_off) { - unsigned off_coords = coords; - if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) - off_coords--; - for (i = 0; i < off_coords; i++) - src1[nsrc1++] = off[i]; - if (off_coords < 2) - src1[nsrc1++] = create_immed(b, fui(0.0)); - flags |= IR3_INSTR_O; - } - - if (has_lod | has_bias) - src1[nsrc1++] = lod; - } - - switch (tex->dest_type) { - case nir_type_invalid: - case nir_type_float: - type = TYPE_F32; - break; - case nir_type_int: - type = TYPE_S32; - break; - case nir_type_uint: - case nir_type_bool: - type = TYPE_U32; - break; - default: - unreachable("bad dest_type"); - } - - if (opc == OPC_GETLOD) - type = TYPE_U32; - - unsigned tex_idx = tex->texture_index; - - ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx); - - struct ir3_instruction *col0 = create_collect(ctx, src0, nsrc0); - struct ir3_instruction *col1 = create_collect(ctx, src1, nsrc1); - - sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, flags, - tex_idx, tex_idx, col0, col1); - - if ((ctx->astc_srgb & (1 << tex_idx)) && !nir_tex_instr_is_query(tex)) { - /* only need first 3 components: */ - sam->regs[0]->wrmask = 0x7; - split_dest(b, dst, sam, 0, 3); - - /* we need to sample the alpha separately with a non-ASTC - * texture state: - */ - sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_W, flags, - tex_idx, tex_idx, col0, col1); - - array_insert(ctx->ir, ctx->ir->astc_srgb, sam); - - /* fixup .w component: */ - split_dest(b, &dst[3], sam, 3, 1); - } else { - /* normal (non-workaround) case: */ - split_dest(b, dst, sam, 0, 4); - } - - /* GETLOD returns results in 4.8 fixed point */ - if (opc == OPC_GETLOD) { - struct ir3_instruction *factor = create_immed(b, fui(1.0 / 256)); - - compile_assert(ctx, tex->dest_type == nir_type_float); - for (i = 0; i < 2; i++) { - dst[i] = ir3_MUL_F(b, ir3_COV(b, dst[i], TYPE_U32, TYPE_F32), 0, - factor, 0); - } - } - - put_dst(ctx, &tex->dest); -} - -static void -emit_tex_query_levels(struct ir3_context *ctx, nir_tex_instr *tex) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction **dst, *sam; - - dst = get_dst(ctx, &tex->dest, 1); - - sam = ir3_SAM(b, OPC_GETINFO, TYPE_U32, TGSI_WRITEMASK_Z, 0, - tex->texture_index, tex->texture_index, NULL, NULL); - - /* even though there is only one component, since it ends - * up in .z rather than .x, we need a split_dest() - */ - split_dest(b, dst, sam, 0, 3); - - /* The # of levels comes from getinfo.z. We need to add 1 to it, since - * the value in TEX_CONST_0 is zero-based. - */ - if (ctx->compiler->levels_add_one) - dst[0] = ir3_ADD_U(b, dst[0], 0, create_immed(b, 1), 0); - - put_dst(ctx, &tex->dest); -} - -static void -emit_tex_txs(struct ir3_context *ctx, nir_tex_instr *tex) -{ - struct ir3_block *b = ctx->block; - struct ir3_instruction **dst, *sam; - struct ir3_instruction *lod; - unsigned flags, coords; - - tex_info(tex, &flags, &coords); - - /* Actually we want the number of dimensions, not coordinates. This - * distinction only matters for cubes. - */ - if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) - coords = 2; - - dst = get_dst(ctx, &tex->dest, 4); - - compile_assert(ctx, tex->num_srcs == 1); - compile_assert(ctx, tex->src[0].src_type == nir_tex_src_lod); - - lod = get_src(ctx, &tex->src[0].src)[0]; - - sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags, - tex->texture_index, tex->texture_index, lod, NULL); - - split_dest(b, dst, sam, 0, 4); - - /* Array size actually ends up in .w rather than .z. This doesn't - * matter for miplevel 0, but for higher mips the value in z is - * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is - * returned, which means that we have to add 1 to it for arrays. - */ - if (tex->is_array) { - if (ctx->compiler->levels_add_one) { - dst[coords] = ir3_ADD_U(b, dst[3], 0, create_immed(b, 1), 0); - } else { - dst[coords] = ir3_MOV(b, dst[3], TYPE_U32); - } - } - - put_dst(ctx, &tex->dest); -} - -static void -emit_jump(struct ir3_context *ctx, nir_jump_instr *jump) -{ - switch (jump->type) { - case nir_jump_break: - case nir_jump_continue: - case nir_jump_return: - /* I *think* we can simply just ignore this, and use the - * successor block link to figure out where we need to - * jump to for break/continue - */ - break; - default: - compile_error(ctx, "Unhandled NIR jump type: %d\n", jump->type); - break; - } -} - -static void -emit_instr(struct ir3_context *ctx, nir_instr *instr) -{ - switch (instr->type) { - case nir_instr_type_alu: - emit_alu(ctx, nir_instr_as_alu(instr)); - break; - case nir_instr_type_deref: - /* ignored, handled as part of the intrinsic they are src to */ - break; - case nir_instr_type_intrinsic: - emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); - break; - case nir_instr_type_load_const: - emit_load_const(ctx, nir_instr_as_load_const(instr)); - break; - case nir_instr_type_ssa_undef: - emit_undef(ctx, nir_instr_as_ssa_undef(instr)); - break; - case nir_instr_type_tex: { - nir_tex_instr *tex = nir_instr_as_tex(instr); - /* couple tex instructions get special-cased: - */ - switch (tex->op) { - case nir_texop_txs: - emit_tex_txs(ctx, tex); - break; - case nir_texop_query_levels: - emit_tex_query_levels(ctx, tex); - break; - default: - emit_tex(ctx, tex); - break; - } - break; - } - case nir_instr_type_jump: - emit_jump(ctx, nir_instr_as_jump(instr)); - break; - case nir_instr_type_phi: - /* we have converted phi webs to regs in NIR by now */ - compile_error(ctx, "Unexpected NIR instruction type: %d\n", instr->type); - break; - case nir_instr_type_call: - case nir_instr_type_parallel_copy: - compile_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type); - break; - } -} - -static struct ir3_block * -get_block(struct ir3_context *ctx, const nir_block *nblock) -{ - struct ir3_block *block; - struct hash_entry *hentry; - unsigned i; - - hentry = _mesa_hash_table_search(ctx->block_ht, nblock); - if (hentry) - return hentry->data; - - block = ir3_block_create(ctx->ir); - block->nblock = nblock; - _mesa_hash_table_insert(ctx->block_ht, nblock, block); - - block->predecessors_count = nblock->predecessors->entries; - block->predecessors = ralloc_array_size(block, - sizeof(block->predecessors[0]), block->predecessors_count); - i = 0; - set_foreach(nblock->predecessors, sentry) { - block->predecessors[i++] = get_block(ctx, sentry->key); - } - - return block; -} - -static void -emit_block(struct ir3_context *ctx, nir_block *nblock) -{ - struct ir3_block *block = get_block(ctx, nblock); - - for (int i = 0; i < ARRAY_SIZE(block->successors); i++) { - if (nblock->successors[i]) { - block->successors[i] = - get_block(ctx, nblock->successors[i]); - } - } - - ctx->block = block; - list_addtail(&block->node, &ctx->ir->block_list); - - /* re-emit addr register in each block if needed: */ - for (int i = 0; i < ARRAY_SIZE(ctx->addr_ht); i++) { - _mesa_hash_table_destroy(ctx->addr_ht[i], NULL); - ctx->addr_ht[i] = NULL; - } - - nir_foreach_instr(instr, nblock) { - ctx->cur_instr = instr; - emit_instr(ctx, instr); - ctx->cur_instr = NULL; - if (ctx->error) - return; - } -} - -static void emit_cf_list(struct ir3_context *ctx, struct exec_list *list); - -static void -emit_if(struct ir3_context *ctx, nir_if *nif) -{ - struct ir3_instruction *condition = get_src(ctx, &nif->condition)[0]; - - ctx->block->condition = - get_predicate(ctx, ir3_b2n(condition->block, condition)); - - emit_cf_list(ctx, &nif->then_list); - emit_cf_list(ctx, &nif->else_list); -} - -static void -emit_loop(struct ir3_context *ctx, nir_loop *nloop) -{ - emit_cf_list(ctx, &nloop->body); -} - -static void -emit_cf_list(struct ir3_context *ctx, struct exec_list *list) -{ - foreach_list_typed(nir_cf_node, node, node, list) { - switch (node->type) { - case nir_cf_node_block: - emit_block(ctx, nir_cf_node_as_block(node)); - break; - case nir_cf_node_if: - emit_if(ctx, nir_cf_node_as_if(node)); - break; - case nir_cf_node_loop: - emit_loop(ctx, nir_cf_node_as_loop(node)); - break; - case nir_cf_node_function: - compile_error(ctx, "TODO\n"); - break; - } - } -} - -/* emit stream-out code. At this point, the current block is the original - * (nir) end block, and nir ensures that all flow control paths terminate - * into the end block. We re-purpose the original end block to generate - * the 'if (vtxcnt < maxvtxcnt)' condition, then append the conditional - * block holding stream-out write instructions, followed by the new end - * block: - * - * blockOrigEnd { - * p0.x = (vtxcnt < maxvtxcnt) - * // succs: blockStreamOut, blockNewEnd - * } - * blockStreamOut { - * ... stream-out instructions ... - * // succs: blockNewEnd - * } - * blockNewEnd { - * } - */ -static void -emit_stream_out(struct ir3_context *ctx) -{ - struct ir3_shader_variant *v = ctx->so; - struct ir3 *ir = ctx->ir; - struct pipe_stream_output_info *strmout = - &ctx->so->shader->stream_output; - struct ir3_block *orig_end_block, *stream_out_block, *new_end_block; - struct ir3_instruction *vtxcnt, *maxvtxcnt, *cond; - struct ir3_instruction *bases[PIPE_MAX_SO_BUFFERS]; - - /* create vtxcnt input in input block at top of shader, - * so that it is seen as live over the entire duration - * of the shader: - */ - vtxcnt = create_input(ctx, 0); - add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_CNT, vtxcnt); - - maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX); - - /* at this point, we are at the original 'end' block, - * re-purpose this block to stream-out condition, then - * append stream-out block and new-end block - */ - orig_end_block = ctx->block; - -// TODO these blocks need to update predecessors.. -// maybe w/ store_global intrinsic, we could do this -// stuff in nir->nir pass - - stream_out_block = ir3_block_create(ir); - list_addtail(&stream_out_block->node, &ir->block_list); - - new_end_block = ir3_block_create(ir); - list_addtail(&new_end_block->node, &ir->block_list); - - orig_end_block->successors[0] = stream_out_block; - orig_end_block->successors[1] = new_end_block; - stream_out_block->successors[0] = new_end_block; - - /* setup 'if (vtxcnt < maxvtxcnt)' condition: */ - cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0); - cond->regs[0]->num = regid(REG_P0, 0); - cond->cat2.condition = IR3_COND_LT; - - /* condition goes on previous block to the conditional, - * since it is used to pick which of the two successor - * paths to take: - */ - orig_end_block->condition = cond; - - /* switch to stream_out_block to generate the stream-out - * instructions: - */ - ctx->block = stream_out_block; - - /* Calculate base addresses based on vtxcnt. Instructions - * generated for bases not used in following loop will be - * stripped out in the backend. - */ - for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { - unsigned stride = strmout->stride[i]; - struct ir3_instruction *base, *off; - - base = create_uniform(ctx, regid(v->constbase.tfbo, i)); - - /* 24-bit should be enough: */ - off = ir3_MUL_U(ctx->block, vtxcnt, 0, - create_immed(ctx->block, stride * 4), 0); - - bases[i] = ir3_ADD_S(ctx->block, off, 0, base, 0); - } - - /* Generate the per-output store instructions: */ - for (unsigned i = 0; i < strmout->num_outputs; i++) { - for (unsigned j = 0; j < strmout->output[i].num_components; j++) { - unsigned c = j + strmout->output[i].start_component; - struct ir3_instruction *base, *out, *stg; - - base = bases[strmout->output[i].output_buffer]; - out = ctx->ir->outputs[regid(strmout->output[i].register_index, c)]; - - stg = ir3_STG(ctx->block, base, 0, out, 0, - create_immed(ctx->block, 1), 0); - stg->cat6.type = TYPE_U32; - stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4; - - array_insert(ctx->block, ctx->block->keeps, stg); - } - } - - /* and finally switch to the new_end_block: */ - ctx->block = new_end_block; -} - -static void -emit_function(struct ir3_context *ctx, nir_function_impl *impl) -{ - nir_metadata_require(impl, nir_metadata_block_index); - - emit_cf_list(ctx, &impl->body); - emit_block(ctx, impl->end_block); - - /* at this point, we should have a single empty block, - * into which we emit the 'end' instruction. - */ - compile_assert(ctx, list_empty(&ctx->block->instr_list)); - - /* If stream-out (aka transform-feedback) enabled, emit the - * stream-out instructions, followed by a new empty block (into - * which the 'end' instruction lands). - * - * NOTE: it is done in this order, rather than inserting before - * we emit end_block, because NIR guarantees that all blocks - * flow into end_block, and that end_block has no successors. - * So by re-purposing end_block as the first block of stream- - * out, we guarantee that all exit paths flow into the stream- - * out instructions. - */ - if ((ctx->compiler->gpu_id < 500) && - (ctx->so->shader->stream_output.num_outputs > 0) && - !ctx->so->binning_pass) { - debug_assert(ctx->so->type == SHADER_VERTEX); - emit_stream_out(ctx); - } - - ir3_END(ctx->block); -} - -static struct ir3_instruction * -create_frag_coord(struct ir3_context *ctx, unsigned comp) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *instr; - - if (!ctx->frag_coord) { - ctx->frag_coord = create_input_compmask(ctx, 0, 0xf); - /* defer add_sysval_input() until after all inputs created */ - } - - split_dest(block, &instr, ctx->frag_coord, comp, 1); - - switch (comp) { - case 0: /* .x */ - case 1: /* .y */ - /* for frag_coord, we get unsigned values.. we need - * to subtract (integer) 8 and divide by 16 (right- - * shift by 4) then convert to float: - * - * sub.s tmp, src, 8 - * shr.b tmp, tmp, 4 - * mov.u32f32 dst, tmp - * - */ - instr = ir3_SUB_S(block, instr, 0, - create_immed(block, 8), 0); - instr = ir3_SHR_B(block, instr, 0, - create_immed(block, 4), 0); - instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32); - - return instr; - case 2: /* .z */ - case 3: /* .w */ - default: - /* seems that we can use these as-is: */ - return instr; - } -} - -static void -setup_input(struct ir3_context *ctx, nir_variable *in) -{ - struct ir3_shader_variant *so = ctx->so; - unsigned array_len = MAX2(glsl_get_length(in->type), 1); - unsigned ncomp = glsl_get_components(in->type); - unsigned n = in->data.driver_location; - unsigned slot = in->data.location; - - DBG("; in: slot=%u, len=%ux%u, drvloc=%u", - slot, array_len, ncomp, n); - - /* let's pretend things other than vec4 don't exist: */ - ncomp = MAX2(ncomp, 4); - - /* skip unread inputs, we could end up with (for example), unsplit - * matrix/etc inputs in the case they are not read, so just silently - * skip these. - */ - if (ncomp > 4) - return; - - compile_assert(ctx, ncomp == 4); - - so->inputs[n].slot = slot; - so->inputs[n].compmask = (1 << ncomp) - 1; - so->inputs_count = MAX2(so->inputs_count, n + 1); - so->inputs[n].interpolate = in->data.interpolation; - - if (ctx->so->type == SHADER_FRAGMENT) { - for (int i = 0; i < ncomp; i++) { - struct ir3_instruction *instr = NULL; - unsigned idx = (n * 4) + i; - - if (slot == VARYING_SLOT_POS) { - so->inputs[n].bary = false; - so->frag_coord = true; - instr = create_frag_coord(ctx, i); - } else if (slot == VARYING_SLOT_PNTC) { - /* see for example st_nir_fixup_varying_slots().. this is - * maybe a bit mesa/st specific. But we need things to line - * up for this in fdN_program: - * unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); - * if (emit->sprite_coord_enable & texmask) { - * ... - * } - */ - so->inputs[n].slot = VARYING_SLOT_VAR8; - so->inputs[n].bary = true; - instr = create_frag_input(ctx, false); - } else { - bool use_ldlv = false; - - /* detect the special case for front/back colors where - * we need to do flat vs smooth shading depending on - * rast state: - */ - if (in->data.interpolation == INTERP_MODE_NONE) { - switch (slot) { - case VARYING_SLOT_COL0: - case VARYING_SLOT_COL1: - case VARYING_SLOT_BFC0: - case VARYING_SLOT_BFC1: - so->inputs[n].rasterflat = true; - break; - default: - break; - } - } - - if (ctx->compiler->flat_bypass) { - if ((so->inputs[n].interpolate == INTERP_MODE_FLAT) || - (so->inputs[n].rasterflat && ctx->so->key.rasterflat)) - use_ldlv = true; - } - - so->inputs[n].bary = true; - - instr = create_frag_input(ctx, use_ldlv); - } - - compile_assert(ctx, idx < ctx->ir->ninputs); - - ctx->ir->inputs[idx] = instr; - } - } else if (ctx->so->type == SHADER_VERTEX) { - for (int i = 0; i < ncomp; i++) { - unsigned idx = (n * 4) + i; - compile_assert(ctx, idx < ctx->ir->ninputs); - ctx->ir->inputs[idx] = create_input(ctx, idx); - } - } else { - compile_error(ctx, "unknown shader type: %d\n", ctx->so->type); - } - - if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) { - so->total_in += ncomp; - } -} - -static void -setup_output(struct ir3_context *ctx, nir_variable *out) -{ - struct ir3_shader_variant *so = ctx->so; - unsigned array_len = MAX2(glsl_get_length(out->type), 1); - unsigned ncomp = glsl_get_components(out->type); - unsigned n = out->data.driver_location; - unsigned slot = out->data.location; - unsigned comp = 0; - - DBG("; out: slot=%u, len=%ux%u, drvloc=%u", - slot, array_len, ncomp, n); - - /* let's pretend things other than vec4 don't exist: */ - ncomp = MAX2(ncomp, 4); - compile_assert(ctx, ncomp == 4); - - if (ctx->so->type == SHADER_FRAGMENT) { - switch (slot) { - case FRAG_RESULT_DEPTH: - comp = 2; /* tgsi will write to .z component */ - so->writes_pos = true; - break; - case FRAG_RESULT_COLOR: - so->color0_mrt = 1; - break; - default: - if (slot >= FRAG_RESULT_DATA0) - break; - compile_error(ctx, "unknown FS output name: %s\n", - gl_frag_result_name(slot)); - } - } else if (ctx->so->type == SHADER_VERTEX) { - switch (slot) { - case VARYING_SLOT_POS: - so->writes_pos = true; - break; - case VARYING_SLOT_PSIZ: - so->writes_psize = true; - break; - case VARYING_SLOT_COL0: - case VARYING_SLOT_COL1: - case VARYING_SLOT_BFC0: - case VARYING_SLOT_BFC1: - case VARYING_SLOT_FOGC: - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - case VARYING_SLOT_CLIP_VERTEX: - break; - default: - if (slot >= VARYING_SLOT_VAR0) - break; - if ((VARYING_SLOT_TEX0 <= slot) && (slot <= VARYING_SLOT_TEX7)) - break; - compile_error(ctx, "unknown VS output name: %s\n", - gl_varying_slot_name(slot)); - } - } else { - compile_error(ctx, "unknown shader type: %d\n", ctx->so->type); - } - - compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); - - so->outputs[n].slot = slot; - so->outputs[n].regid = regid(n, comp); - so->outputs_count = MAX2(so->outputs_count, n + 1); - - for (int i = 0; i < ncomp; i++) { - unsigned idx = (n * 4) + i; - compile_assert(ctx, idx < ctx->ir->noutputs); - ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0)); - } -} - -static int -max_drvloc(struct exec_list *vars) -{ - int drvloc = -1; - nir_foreach_variable(var, vars) { - drvloc = MAX2(drvloc, (int)var->data.driver_location); - } - return drvloc; -} - -static const unsigned max_sysvals[SHADER_MAX] = { - [SHADER_FRAGMENT] = 24, // TODO - [SHADER_VERTEX] = 16, - [SHADER_COMPUTE] = 16, // TODO how many do we actually need? -}; - -static void -emit_instructions(struct ir3_context *ctx) -{ - unsigned ninputs, noutputs; - nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s); - - ninputs = (max_drvloc(&ctx->s->inputs) + 1) * 4; - noutputs = (max_drvloc(&ctx->s->outputs) + 1) * 4; - - /* we need to leave room for sysvals: - */ - ninputs += max_sysvals[ctx->so->type]; - - ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs); - - /* Create inputs in first block: */ - ctx->block = get_block(ctx, nir_start_block(fxn)); - ctx->in_block = ctx->block; - list_addtail(&ctx->block->node, &ctx->ir->block_list); - - ninputs -= max_sysvals[ctx->so->type]; - - /* for fragment shader, the vcoord input register is used as the - * base for bary.f varying fetch instrs: - */ - struct ir3_instruction *vcoord = NULL; - if (ctx->so->type == SHADER_FRAGMENT) { - struct ir3_instruction *xy[2]; - - vcoord = create_input_compmask(ctx, 0, 0x3); - split_dest(ctx->block, xy, vcoord, 0, 2); - - ctx->frag_vcoord = create_collect(ctx, xy, 2); - } - - /* Setup inputs: */ - nir_foreach_variable(var, &ctx->s->inputs) { - setup_input(ctx, var); - } - - /* Defer add_sysval_input() stuff until after setup_inputs(), - * because sysvals need to be appended after varyings: - */ - if (vcoord) { - add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD, - 0x3, vcoord); - } - - if (ctx->frag_coord) { - add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD, - 0xf, ctx->frag_coord); - } - - /* Setup outputs: */ - nir_foreach_variable(var, &ctx->s->outputs) { - setup_output(ctx, var); - } - - /* Setup registers (which should only be arrays): */ - nir_foreach_register(reg, &ctx->s->registers) { - declare_array(ctx, reg); - } - - /* NOTE: need to do something more clever when we support >1 fxn */ - nir_foreach_register(reg, &fxn->registers) { - declare_array(ctx, reg); - } - /* And emit the body: */ - ctx->impl = fxn; - emit_function(ctx, fxn); -} - -/* from NIR perspective, we actually have varying inputs. But the varying - * inputs, from an IR standpoint, are just bary.f/ldlv instructions. The - * only actual inputs are the sysvals. - */ -static void -fixup_frag_inputs(struct ir3_context *ctx) -{ - struct ir3_shader_variant *so = ctx->so; - struct ir3 *ir = ctx->ir; - unsigned i = 0; - - /* sysvals should appear at the end of the inputs, drop everything else: */ - while ((i < so->inputs_count) && !so->inputs[i].sysval) - i++; - - /* at IR level, inputs are always blocks of 4 scalars: */ - i *= 4; - - ir->inputs = &ir->inputs[i]; - ir->ninputs -= i; -} - -/* Fixup tex sampler state for astc/srgb workaround instructions. We - * need to assign the tex state indexes for these after we know the - * max tex index. - */ -static void -fixup_astc_srgb(struct ir3_context *ctx) -{ - struct ir3_shader_variant *so = ctx->so; - /* indexed by original tex idx, value is newly assigned alpha sampler - * state tex idx. Zero is invalid since there is at least one sampler - * if we get here. - */ - unsigned alt_tex_state[16] = {0}; - unsigned tex_idx = ctx->max_texture_index + 1; - unsigned idx = 0; - - so->astc_srgb.base = tex_idx; - - for (unsigned i = 0; i < ctx->ir->astc_srgb_count; i++) { - struct ir3_instruction *sam = ctx->ir->astc_srgb[i]; - - compile_assert(ctx, sam->cat5.tex < ARRAY_SIZE(alt_tex_state)); - - if (alt_tex_state[sam->cat5.tex] == 0) { - /* assign new alternate/alpha tex state slot: */ - alt_tex_state[sam->cat5.tex] = tex_idx++; - so->astc_srgb.orig_idx[idx++] = sam->cat5.tex; - so->astc_srgb.count++; - } - - sam->cat5.tex = alt_tex_state[sam->cat5.tex]; - } -} - -static void -fixup_binning_pass(struct ir3_context *ctx) -{ - struct ir3_shader_variant *so = ctx->so; - struct ir3 *ir = ctx->ir; - unsigned i, j; - - for (i = 0, j = 0; i < so->outputs_count; i++) { - unsigned slot = so->outputs[i].slot; - - /* throw away everything but first position/psize */ - if ((slot == VARYING_SLOT_POS) || (slot == VARYING_SLOT_PSIZ)) { - if (i != j) { - so->outputs[j] = so->outputs[i]; - ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0]; - ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1]; - ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2]; - ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3]; - } - j++; - } - } - so->outputs_count = j; - ir->noutputs = j * 4; -} - -int -ir3_compile_shader_nir(struct ir3_compiler *compiler, - struct ir3_shader_variant *so) -{ - struct ir3_context *ctx; - struct ir3 *ir; - struct ir3_instruction **inputs; - unsigned i, actual_in, inloc; - int ret = 0, max_bary; - - assert(!so->ir); - - ctx = compile_init(compiler, so); - if (!ctx) { - DBG("INIT failed!"); - ret = -1; - goto out; - } - - emit_instructions(ctx); - - if (ctx->error) { - DBG("EMIT failed!"); - ret = -1; - goto out; - } - - ir = so->ir = ctx->ir; - - /* keep track of the inputs from TGSI perspective.. */ - inputs = ir->inputs; - - /* but fixup actual inputs for frag shader: */ - if (so->type == SHADER_FRAGMENT) - fixup_frag_inputs(ctx); - - /* at this point, for binning pass, throw away unneeded outputs: */ - if (so->binning_pass && (ctx->compiler->gpu_id < 600)) - fixup_binning_pass(ctx); - - /* if we want half-precision outputs, mark the output registers - * as half: - */ - if (so->key.half_precision) { - for (i = 0; i < ir->noutputs; i++) { - struct ir3_instruction *out = ir->outputs[i]; - - if (!out) - continue; - - /* if frag shader writes z, that needs to be full precision: */ - if (so->outputs[i/4].slot == FRAG_RESULT_DEPTH) - continue; - - out->regs[0]->flags |= IR3_REG_HALF; - /* output could be a fanout (ie. texture fetch output) - * in which case we need to propagate the half-reg flag - * up to the definer so that RA sees it: - */ - if (out->opc == OPC_META_FO) { - out = out->regs[1]->instr; - out->regs[0]->flags |= IR3_REG_HALF; - } - - if (out->opc == OPC_MOV) { - out->cat1.dst_type = half_type(out->cat1.dst_type); - } - } - } - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("BEFORE CP:\n"); - ir3_print(ir); - } - - ir3_cp(ir, so); - - /* at this point, for binning pass, throw away unneeded outputs: - * Note that for a6xx and later, we do this after ir3_cp to ensure - * that the uniform/constant layout for BS and VS matches, so that - * we can re-use same VS_CONST state group. - */ - if (so->binning_pass && (ctx->compiler->gpu_id >= 600)) - fixup_binning_pass(ctx); - - /* Insert mov if there's same instruction for each output. - * eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow - */ - for (int i = ir->noutputs - 1; i >= 0; i--) { - if (!ir->outputs[i]) - continue; - for (unsigned j = 0; j < i; j++) { - if (ir->outputs[i] == ir->outputs[j]) { - ir->outputs[i] = - ir3_MOV(ir->outputs[i]->block, ir->outputs[i], TYPE_F32); - } - } - } - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("BEFORE GROUPING:\n"); - ir3_print(ir); - } - - ir3_sched_add_deps(ir); - - /* Group left/right neighbors, inserting mov's where needed to - * solve conflicts: - */ - ir3_group(ir); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER GROUPING:\n"); - ir3_print(ir); - } - - ir3_depth(ir); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER DEPTH:\n"); - ir3_print(ir); - } - - ret = ir3_sched(ir); - if (ret) { - DBG("SCHED failed!"); - goto out; - } - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER SCHED:\n"); - ir3_print(ir); - } - - ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face); - if (ret) { - DBG("RA failed!"); - goto out; - } - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER RA:\n"); - ir3_print(ir); - } - - /* fixup input/outputs: */ - for (i = 0; i < so->outputs_count; i++) { - so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num; - } - - /* Note that some or all channels of an input may be unused: */ - actual_in = 0; - inloc = 0; - for (i = 0; i < so->inputs_count; i++) { - unsigned j, reg = regid(63,0), compmask = 0, maxcomp = 0; - so->inputs[i].ncomp = 0; - so->inputs[i].inloc = inloc; - for (j = 0; j < 4; j++) { - struct ir3_instruction *in = inputs[(i*4) + j]; - if (in && !(in->flags & IR3_INSTR_UNUSED)) { - compmask |= (1 << j); - reg = in->regs[0]->num - j; - actual_in++; - so->inputs[i].ncomp++; - if ((so->type == SHADER_FRAGMENT) && so->inputs[i].bary) { - /* assign inloc: */ - assert(in->regs[1]->flags & IR3_REG_IMMED); - in->regs[1]->iim_val = inloc + j; - maxcomp = j + 1; - } - } - } - if ((so->type == SHADER_FRAGMENT) && compmask && so->inputs[i].bary) { - so->varying_in++; - so->inputs[i].compmask = (1 << maxcomp) - 1; - inloc += maxcomp; - } else if (!so->inputs[i].sysval) { - so->inputs[i].compmask = compmask; - } - so->inputs[i].regid = reg; - } - - if (ctx->astc_srgb) - fixup_astc_srgb(ctx); - - /* We need to do legalize after (for frag shader's) the "bary.f" - * offsets (inloc) have been assigned. - */ - ir3_legalize(ir, &so->num_samp, &so->has_ssbo, &max_bary); - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - printf("AFTER LEGALIZE:\n"); - ir3_print(ir); - } - - /* Note that actual_in counts inputs that are not bary.f'd for FS: */ - if (so->type == SHADER_VERTEX) - so->total_in = actual_in; - else - so->total_in = max_bary + 1; - -out: - if (ret) { - if (so->ir) - ir3_destroy(so->ir); - so->ir = NULL; - } - compile_free(ctx); - - return ret; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cp.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cp.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_cp.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_cp.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,653 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "freedreno_util.h" - -#include "ir3.h" -#include "ir3_shader.h" - -/* - * Copy Propagate: - */ - -struct ir3_cp_ctx { - struct ir3 *shader; - struct ir3_shader_variant *so; - unsigned immediate_idx; -}; - -/* is it a type preserving mov, with ok flags? */ -static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) -{ - if (is_same_type_mov(instr)) { - struct ir3_register *dst = instr->regs[0]; - struct ir3_register *src = instr->regs[1]; - struct ir3_instruction *src_instr = ssa(src); - - /* only if mov src is SSA (not const/immed): */ - if (!src_instr) - return false; - - /* no indirect: */ - if (dst->flags & IR3_REG_RELATIV) - return false; - if (src->flags & IR3_REG_RELATIV) - return false; - - if (src->flags & IR3_REG_ARRAY) - return false; - - if (!allow_flags) - if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG | - IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) - return false; - - /* TODO: remove this hack: */ - if (src_instr->opc == OPC_META_FO) - return false; - - return true; - } - return false; -} - -static unsigned cp_flags(unsigned flags) -{ - /* only considering these flags (at least for now): */ - flags &= (IR3_REG_CONST | IR3_REG_IMMED | - IR3_REG_FNEG | IR3_REG_FABS | - IR3_REG_SNEG | IR3_REG_SABS | - IR3_REG_BNOT | IR3_REG_RELATIV); - return flags; -} - -static bool valid_flags(struct ir3_instruction *instr, unsigned n, - unsigned flags) -{ - unsigned valid_flags; - flags = cp_flags(flags); - - /* If destination is indirect, then source cannot be.. at least - * I don't think so.. - */ - if ((instr->regs[0]->flags & IR3_REG_RELATIV) && - (flags & IR3_REG_RELATIV)) - return false; - - /* TODO it seems to *mostly* work to cp RELATIV, except we get some - * intermittent piglit variable-indexing fails. Newer blob driver - * doesn't seem to cp these. Possibly this is hw workaround? Not - * sure, but until that is understood better, lets just switch off - * cp for indirect src's: - */ - if (flags & IR3_REG_RELATIV) - return false; - - switch (opc_cat(instr->opc)) { - case 1: - valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV; - if (flags & ~valid_flags) - return false; - break; - case 2: - valid_flags = ir3_cat2_absneg(instr->opc) | - IR3_REG_CONST | IR3_REG_RELATIV; - - if (ir3_cat2_int(instr->opc)) - valid_flags |= IR3_REG_IMMED; - - if (flags & ~valid_flags) - return false; - - if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) { - unsigned m = (n ^ 1) + 1; - /* cannot deal w/ const in both srcs: - * (note that some cat2 actually only have a single src) - */ - if (m < instr->regs_count) { - struct ir3_register *reg = instr->regs[m]; - if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST)) - return false; - if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED)) - return false; - } - /* cannot be const + ABS|NEG: */ - if (flags & (IR3_REG_FABS | IR3_REG_FNEG | - IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) - return false; - } - break; - case 3: - valid_flags = ir3_cat3_absneg(instr->opc) | - IR3_REG_CONST | IR3_REG_RELATIV; - - if (flags & ~valid_flags) - return false; - - if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) { - /* cannot deal w/ const/relativ in 2nd src: */ - if (n == 1) - return false; - } - - if (flags & IR3_REG_CONST) { - /* cannot be const + ABS|NEG: */ - if (flags & (IR3_REG_FABS | IR3_REG_FNEG | - IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) - return false; - } - break; - case 4: - /* seems like blob compiler avoids const as src.. */ - /* TODO double check if this is still the case on a4xx */ - if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) - return false; - if (flags & (IR3_REG_SABS | IR3_REG_SNEG)) - return false; - break; - case 5: - /* no flags allowed */ - if (flags) - return false; - break; - case 6: - valid_flags = IR3_REG_IMMED; - if (flags & ~valid_flags) - return false; - - if (flags & IR3_REG_IMMED) { - /* doesn't seem like we can have immediate src for store - * instructions: - * - * TODO this restriction could also apply to load instructions, - * but for load instructions this arg is the address (and not - * really sure any good way to test a hard-coded immed addr src) - */ - if (is_store(instr) && (n == 1)) - return false; - - if ((instr->opc == OPC_LDL) && (n != 1)) - return false; - - if ((instr->opc == OPC_STL) && (n != 2)) - return false; - - /* disallow CP into anything but the SSBO slot argument for - * atomics: - */ - if (is_atomic(instr->opc) && (n != 0)) - return false; - - if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G)) - return false; - } - - break; - } - - return true; -} - -/* propagate register flags from src to dst.. negates need special - * handling to cancel each other out. - */ -static void combine_flags(unsigned *dstflags, struct ir3_instruction *src) -{ - unsigned srcflags = src->regs[1]->flags; - - /* if what we are combining into already has (abs) flags, - * we can drop (neg) from src: - */ - if (*dstflags & IR3_REG_FABS) - srcflags &= ~IR3_REG_FNEG; - if (*dstflags & IR3_REG_SABS) - srcflags &= ~IR3_REG_SNEG; - - if (srcflags & IR3_REG_FABS) - *dstflags |= IR3_REG_FABS; - if (srcflags & IR3_REG_SABS) - *dstflags |= IR3_REG_SABS; - if (srcflags & IR3_REG_FNEG) - *dstflags ^= IR3_REG_FNEG; - if (srcflags & IR3_REG_SNEG) - *dstflags ^= IR3_REG_SNEG; - if (srcflags & IR3_REG_BNOT) - *dstflags ^= IR3_REG_BNOT; - - *dstflags &= ~IR3_REG_SSA; - *dstflags |= srcflags & IR3_REG_SSA; - *dstflags |= srcflags & IR3_REG_CONST; - *dstflags |= srcflags & IR3_REG_IMMED; - *dstflags |= srcflags & IR3_REG_RELATIV; - *dstflags |= srcflags & IR3_REG_ARRAY; - - /* if src of the src is boolean we can drop the (abs) since we know - * the source value is already a postitive integer. This cleans - * up the absnegs that get inserted when converting between nir and - * native boolean (see ir3_b2n/n2b) - */ - struct ir3_instruction *srcsrc = ssa(src->regs[1]); - if (srcsrc && is_bool(srcsrc)) - *dstflags &= ~IR3_REG_SABS; -} - -static struct ir3_register * -lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags) -{ - unsigned swiz, idx, i; - - reg = ir3_reg_clone(ctx->shader, reg); - - /* in some cases, there are restrictions on (abs)/(neg) plus const.. - * so just evaluate those and clear the flags: - */ - if (new_flags & IR3_REG_SABS) { - reg->iim_val = abs(reg->iim_val); - new_flags &= ~IR3_REG_SABS; - } - - if (new_flags & IR3_REG_FABS) { - reg->fim_val = fabs(reg->fim_val); - new_flags &= ~IR3_REG_FABS; - } - - if (new_flags & IR3_REG_SNEG) { - reg->iim_val = -reg->iim_val; - new_flags &= ~IR3_REG_SNEG; - } - - if (new_flags & IR3_REG_FNEG) { - reg->fim_val = -reg->fim_val; - new_flags &= ~IR3_REG_FNEG; - } - - /* Reallocate for 4 more elements whenever it's necessary */ - if (ctx->immediate_idx == ctx->so->immediates_size * 4) { - ctx->so->immediates_size += 4; - ctx->so->immediates = realloc (ctx->so->immediates, - ctx->so->immediates_size * sizeof (ctx->so->immediates[0])); - } - - for (i = 0; i < ctx->immediate_idx; i++) { - swiz = i % 4; - idx = i / 4; - - if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) { - break; - } - } - - if (i == ctx->immediate_idx) { - /* need to generate a new immediate: */ - swiz = i % 4; - idx = i / 4; - ctx->so->immediates[idx].val[swiz] = reg->uim_val; - ctx->so->immediates_count = idx + 1; - ctx->immediate_idx++; - } - - new_flags &= ~IR3_REG_IMMED; - new_flags |= IR3_REG_CONST; - reg->flags = new_flags; - reg->num = i + (4 * ctx->so->constbase.immediate); - - return reg; -} - -static void -unuse(struct ir3_instruction *instr) -{ - debug_assert(instr->use_count > 0); - - if (--instr->use_count == 0) { - struct ir3_block *block = instr->block; - - instr->barrier_class = 0; - instr->barrier_conflict = 0; - - /* we don't want to remove anything in keeps (which could - * be things like array store's) - */ - for (unsigned i = 0; i < block->keeps_count; i++) { - debug_assert(block->keeps[i] != instr); - } - } -} - -/** - * Handle cp for a given src register. This additionally handles - * the cases of collapsing immedate/const (which replace the src - * register with a non-ssa src) or collapsing mov's from relative - * src (which needs to also fixup the address src reference by the - * instruction). - */ -static void -reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, - struct ir3_register *reg, unsigned n) -{ - struct ir3_instruction *src = ssa(reg); - - if (is_eligible_mov(src, true)) { - /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ - struct ir3_register *src_reg = src->regs[1]; - unsigned new_flags = reg->flags; - - combine_flags(&new_flags, src); - - if (valid_flags(instr, n, new_flags)) { - if (new_flags & IR3_REG_ARRAY) { - debug_assert(!(reg->flags & IR3_REG_ARRAY)); - reg->array = src_reg->array; - } - reg->flags = new_flags; - reg->instr = ssa(src_reg); - - instr->barrier_class |= src->barrier_class; - instr->barrier_conflict |= src->barrier_conflict; - - unuse(src); - reg->instr->use_count++; - } - - } else if (is_same_type_mov(src) && - /* cannot collapse const/immed/etc into meta instrs: */ - !is_meta(instr)) { - /* immed/const/etc cases, which require some special handling: */ - struct ir3_register *src_reg = src->regs[1]; - unsigned new_flags = reg->flags; - - combine_flags(&new_flags, src); - - if (!valid_flags(instr, n, new_flags)) { - /* See if lowering an immediate to const would help. */ - if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { - debug_assert(new_flags & IR3_REG_IMMED); - instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags); - return; - } - - /* special case for "normal" mad instructions, we can - * try swapping the first two args if that fits better. - * - * the "plain" MAD's (ie. the ones that don't shift first - * src prior to multiply) can swap their first two srcs if - * src[0] is !CONST and src[1] is CONST: - */ - if ((n == 1) && is_mad(instr->opc) && - !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) && - valid_flags(instr, 0, new_flags & ~IR3_REG_IMMED)) { - /* swap src[0] and src[1]: */ - struct ir3_register *tmp; - tmp = instr->regs[0 + 1]; - instr->regs[0 + 1] = instr->regs[1 + 1]; - instr->regs[1 + 1] = tmp; - - n = 0; - } else { - return; - } - } - - /* Here we handle the special case of mov from - * CONST and/or RELATIV. These need to be handled - * specially, because in the case of move from CONST - * there is no src ir3_instruction so we need to - * replace the ir3_register. And in the case of - * RELATIV we need to handle the address register - * dependency. - */ - if (src_reg->flags & IR3_REG_CONST) { - /* an instruction cannot reference two different - * address registers: - */ - if ((src_reg->flags & IR3_REG_RELATIV) && - conflicts(instr->address, reg->instr->address)) - return; - - /* This seems to be a hw bug, or something where the timings - * just somehow don't work out. This restriction may only - * apply if the first src is also CONST. - */ - if ((opc_cat(instr->opc) == 3) && (n == 2) && - (src_reg->flags & IR3_REG_RELATIV) && - (src_reg->array.offset == 0)) - return; - - src_reg = ir3_reg_clone(instr->block->shader, src_reg); - src_reg->flags = new_flags; - instr->regs[n+1] = src_reg; - - if (src_reg->flags & IR3_REG_RELATIV) - ir3_instr_set_address(instr, reg->instr->address); - - return; - } - - if ((src_reg->flags & IR3_REG_RELATIV) && - !conflicts(instr->address, reg->instr->address)) { - src_reg = ir3_reg_clone(instr->block->shader, src_reg); - src_reg->flags = new_flags; - instr->regs[n+1] = src_reg; - ir3_instr_set_address(instr, reg->instr->address); - - return; - } - - /* NOTE: seems we can only do immed integers, so don't - * need to care about float. But we do need to handle - * abs/neg *before* checking that the immediate requires - * few enough bits to encode: - * - * TODO: do we need to do something to avoid accidentally - * catching a float immed? - */ - if (src_reg->flags & IR3_REG_IMMED) { - int32_t iim_val = src_reg->iim_val; - - debug_assert((opc_cat(instr->opc) == 1) || - (opc_cat(instr->opc) == 6) || - ir3_cat2_int(instr->opc) || - (is_mad(instr->opc) && (n == 0))); - - if (new_flags & IR3_REG_SABS) - iim_val = abs(iim_val); - - if (new_flags & IR3_REG_SNEG) - iim_val = -iim_val; - - if (new_flags & IR3_REG_BNOT) - iim_val = ~iim_val; - - /* other than category 1 (mov) we can only encode up to 10 bits: */ - if ((instr->opc == OPC_MOV) || - !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) { - new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); - src_reg = ir3_reg_clone(instr->block->shader, src_reg); - src_reg->flags = new_flags; - src_reg->iim_val = iim_val; - instr->regs[n+1] = src_reg; - } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { - /* See if lowering an immediate to const would help. */ - instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags); - } - - return; - } - } -} - -/* Handle special case of eliminating output mov, and similar cases where - * there isn't a normal "consuming" instruction. In this case we cannot - * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot - * be eliminated) - */ -static struct ir3_instruction * -eliminate_output_mov(struct ir3_instruction *instr) -{ - if (is_eligible_mov(instr, false)) { - struct ir3_register *reg = instr->regs[1]; - if (!(reg->flags & IR3_REG_ARRAY)) { - struct ir3_instruction *src_instr = ssa(reg); - debug_assert(src_instr); - return src_instr; - } - } - return instr; -} - -/** - * Find instruction src's which are mov's that can be collapsed, replacing - * the mov dst with the mov src - */ -static void -instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) -{ - struct ir3_register *reg; - - if (instr->regs_count == 0) - return; - - if (ir3_instr_check_mark(instr)) - return; - - /* walk down the graph from each src: */ - foreach_src_n(reg, n, instr) { - struct ir3_instruction *src = ssa(reg); - - if (!src) - continue; - - instr_cp(ctx, src); - - /* TODO non-indirect access we could figure out which register - * we actually want and allow cp.. - */ - if (reg->flags & IR3_REG_ARRAY) - continue; - - /* Don't CP absneg into meta instructions, that won't end well: */ - if (is_meta(instr) && (src->opc != OPC_MOV)) - continue; - - reg_cp(ctx, instr, reg, n); - } - - if (instr->regs[0]->flags & IR3_REG_ARRAY) { - struct ir3_instruction *src = ssa(instr->regs[0]); - if (src) - instr_cp(ctx, src); - } - - if (instr->address) { - instr_cp(ctx, instr->address); - ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); - } - - /* we can end up with extra cmps.s from frontend, which uses a - * - * cmps.s p0.x, cond, 0 - * - * as a way to mov into the predicate register. But frequently 'cond' - * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and - * just re-write the instruction writing predicate register to get rid - * of the double cmps. - */ - if ((instr->opc == OPC_CMPS_S) && - (instr->regs[0]->num == regid(REG_P0, 0)) && - ssa(instr->regs[1]) && - (instr->regs[2]->flags & IR3_REG_IMMED) && - (instr->regs[2]->iim_val == 0)) { - struct ir3_instruction *cond = ssa(instr->regs[1]); - switch (cond->opc) { - case OPC_CMPS_S: - case OPC_CMPS_F: - case OPC_CMPS_U: - instr->opc = cond->opc; - instr->flags = cond->flags; - instr->cat2 = cond->cat2; - instr->address = cond->address; - instr->regs[1] = cond->regs[1]; - instr->regs[2] = cond->regs[2]; - instr->barrier_class |= cond->barrier_class; - instr->barrier_conflict |= cond->barrier_conflict; - unuse(cond); - break; - default: - break; - } - } -} - -void -ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so) -{ - struct ir3_cp_ctx ctx = { - .shader = ir, - .so = so, - }; - - /* This is a bit annoying, and probably wouldn't be necessary if we - * tracked a reverse link from producing instruction to consumer. - * But we need to know when we've eliminated the last consumer of - * a mov, so we need to do a pass to first count consumers of a - * mov. - */ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - struct ir3_instruction *src; - - /* by the way, we don't account for false-dep's, so the CP - * pass should always happen before false-dep's are inserted - */ - debug_assert(instr->deps_count == 0); - - foreach_ssa_src(src, instr) { - src->use_count++; - } - } - } - - ir3_clear_mark(ir); - - for (unsigned i = 0; i < ir->noutputs; i++) { - if (ir->outputs[i]) { - instr_cp(&ctx, ir->outputs[i]); - ir->outputs[i] = eliminate_output_mov(ir->outputs[i]); - } - } - - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - if (block->condition) { - instr_cp(&ctx, block->condition); - block->condition = eliminate_output_mov(block->condition); - } - - for (unsigned i = 0; i < block->keeps_count; i++) { - instr_cp(&ctx, block->keeps[i]); - block->keeps[i] = eliminate_output_mov(block->keeps[i]); - } - } -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_depth.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_depth.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_depth.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_depth.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,245 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "util/u_math.h" - -#include "ir3.h" - -/* - * Instruction Depth: - * - * Calculates weighted instruction depth, ie. the sum of # of needed - * instructions plus delay slots back to original input (ie INPUT or - * CONST). That is to say, an instructions depth is: - * - * depth(instr) { - * d = 0; - * // for each src register: - * foreach (src in instr->regs[1..n]) - * d = max(d, delayslots(src->instr, n) + depth(src->instr)); - * return d + 1; - * } - * - * After an instruction's depth is calculated, it is inserted into the - * blocks depth sorted list, which is used by the scheduling pass. - */ - -/* generally don't count false dependencies, since this can just be - * something like a barrier, or SSBO store. The exception is array - * dependencies if the assigner is an array write and the consumer - * reads the same array. - */ -static bool -ignore_dep(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n) -{ - if (!__is_false_dep(consumer, n)) - return false; - - if (assigner->barrier_class & IR3_BARRIER_ARRAY_W) { - struct ir3_register *dst = assigner->regs[0]; - struct ir3_register *src; - - debug_assert(dst->flags & IR3_REG_ARRAY); - - foreach_src(src, consumer) { - if ((src->flags & IR3_REG_ARRAY) && - (dst->array.id == src->array.id)) { - return false; - } - } - } - - return true; -} - -/* calculate required # of delay slots between the instruction that - * assigns a value and the one that consumes - */ -int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n) -{ - if (ignore_dep(assigner, consumer, n)) - return 0; - - /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal - * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch - * handled with sync bits - */ - - if (is_meta(assigner)) - return 0; - - if (writes_addr(assigner)) - return 6; - - /* handled via sync flags: */ - if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner)) - return 0; - - /* assigner must be alu: */ - if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || - is_mem(consumer)) { - return 6; - } else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && - (n == 3)) { - /* special case, 3rd src to cat3 not required on first cycle */ - return 1; - } else { - return 3; - } -} - -void -ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list) -{ - /* remove from existing spot in list: */ - list_delinit(&instr->node); - - /* find where to re-insert instruction: */ - list_for_each_entry (struct ir3_instruction, pos, list, node) { - if (pos->depth > instr->depth) { - list_add(&instr->node, &pos->node); - return; - } - } - /* if we get here, we didn't find an insertion spot: */ - list_addtail(&instr->node, list); -} - -static void -ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep) -{ - struct ir3_instruction *src; - - /* don't mark falsedep's as used, but otherwise process them normally: */ - if (!falsedep) - instr->flags &= ~IR3_INSTR_UNUSED; - - if (ir3_instr_check_mark(instr)) - return; - - instr->depth = 0; - - foreach_ssa_src_n(src, i, instr) { - unsigned sd; - - /* visit child to compute it's depth: */ - ir3_instr_depth(src, boost, __is_false_dep(instr, i)); - - /* for array writes, no need to delay on previous write: */ - if (i == 0) - continue; - - sd = ir3_delayslots(src, instr, i) + src->depth; - sd += boost; - - instr->depth = MAX2(instr->depth, sd); - } - - if (!is_meta(instr)) - instr->depth++; - - ir3_insert_by_depth(instr, &instr->block->instr_list); -} - -static bool -remove_unused_by_block(struct ir3_block *block) -{ - bool progress = false; - list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) { - if (instr->opc == OPC_END) - continue; - if (instr->flags & IR3_INSTR_UNUSED) { - list_delinit(&instr->node); - progress = true; - } - } - return progress; -} - -static bool -compute_depth_and_remove_unused(struct ir3 *ir) -{ - unsigned i; - bool progress = false; - - ir3_clear_mark(ir); - - /* initially mark everything as unused, we'll clear the flag as we - * visit the instructions: - */ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - instr->flags |= IR3_INSTR_UNUSED; - } - } - - for (i = 0; i < ir->noutputs; i++) - if (ir->outputs[i]) - ir3_instr_depth(ir->outputs[i], 0, false); - - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - for (i = 0; i < block->keeps_count; i++) - ir3_instr_depth(block->keeps[i], 0, false); - - /* We also need to account for if-condition: */ - if (block->condition) - ir3_instr_depth(block->condition, 6, false); - } - - /* mark un-used instructions: */ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - progress |= remove_unused_by_block(block); - } - - /* note that we can end up with unused indirects, but we should - * not end up with unused predicates. - */ - for (i = 0; i < ir->indirects_count; i++) { - struct ir3_instruction *instr = ir->indirects[i]; - if (instr && (instr->flags & IR3_INSTR_UNUSED)) - ir->indirects[i] = NULL; - } - - /* cleanup unused inputs: */ - for (i = 0; i < ir->ninputs; i++) { - struct ir3_instruction *in = ir->inputs[i]; - if (in && (in->flags & IR3_INSTR_UNUSED)) - ir->inputs[i] = NULL; - } - - return progress; -} - -void -ir3_depth(struct ir3 *ir) -{ - bool progress; - do { - progress = compute_depth_and_remove_unused(ir); - } while (progress); -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_gallium.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_gallium.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_gallium.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_gallium.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,643 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "nir/tgsi_to_nir.h" + +#include "freedreno_context.h" +#include "freedreno_util.h" + +#include "ir3/ir3_shader.h" +#include "ir3/ir3_gallium.h" +#include "ir3/ir3_compiler.h" +#include "ir3/ir3_nir.h" + +static void +dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug) +{ + if (!unlikely(fd_mesa_debug & FD_DBG_SHADERDB)) + return; + + pipe_debug_message(debug, SHADER_INFO, "\n" + "SHADER-DB: %s prog %d/%d: %u instructions, %u dwords\n" + "SHADER-DB: %s prog %d/%d: %u half, %u full\n" + "SHADER-DB: %s prog %d/%d: %u const, %u constlen\n" + "SHADER-DB: %s prog %d/%d: %u (ss), %u (sy)\n", + ir3_shader_stage(v->shader), + v->shader->id, v->id, + v->info.instrs_count, + v->info.sizedwords, + ir3_shader_stage(v->shader), + v->shader->id, v->id, + v->info.max_half_reg + 1, + v->info.max_reg + 1, + ir3_shader_stage(v->shader), + v->shader->id, v->id, + v->info.max_const + 1, + v->constlen, + ir3_shader_stage(v->shader), + v->shader->id, v->id, + v->info.ss, v->info.sy); +} + +struct ir3_shader_variant * +ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, + bool binning_pass, struct pipe_debug_callback *debug) +{ + struct ir3_shader_variant *v; + bool created = false; + + /* some shader key values only apply to vertex or frag shader, + * so normalize the key to avoid constructing multiple identical + * variants: + */ + ir3_normalize_key(&key, shader->type); + + v = ir3_shader_get_variant(shader, &key, binning_pass, &created); + + if (created) { + dump_shader_info(v, debug); + } + + return v; +} + +static void +copy_stream_out(struct ir3_stream_output_info *i, + const struct pipe_stream_output_info *p) +{ + STATIC_ASSERT(ARRAY_SIZE(i->stride) == ARRAY_SIZE(p->stride)); + STATIC_ASSERT(ARRAY_SIZE(i->output) == ARRAY_SIZE(p->output)); + + i->num_outputs = p->num_outputs; + for (int n = 0; n < ARRAY_SIZE(i->stride); n++) + i->stride[n] = p->stride[n]; + + for (int n = 0; n < ARRAY_SIZE(i->output); n++) { + i->output[n].register_index = p->output[n].register_index; + i->output[n].start_component = p->output[n].start_component; + i->output[n].num_components = p->output[n].num_components; + i->output[n].output_buffer = p->output[n].output_buffer; + i->output[n].dst_offset = p->output[n].dst_offset; + i->output[n].stream = p->output[n].stream; + } +} + +struct ir3_shader * +ir3_shader_create(struct ir3_compiler *compiler, + const struct pipe_shader_state *cso, gl_shader_stage type, + struct pipe_debug_callback *debug) +{ + nir_shader *nir; + if (cso->type == PIPE_SHADER_IR_NIR) { + /* we take ownership of the reference: */ + nir = cso->ir.nir; + } else { + debug_assert(cso->type == PIPE_SHADER_IR_TGSI); + if (ir3_shader_debug & IR3_DBG_DISASM) { + tgsi_dump(cso->tokens, 0); + } + nir = ir3_tgsi_to_nir(compiler, cso->tokens); + } + + struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir); + + copy_stream_out(&shader->stream_output, &cso->stream_output); + + if (fd_mesa_debug & FD_DBG_SHADERDB) { + /* if shader-db run, create a standard variant immediately + * (as otherwise nothing will trigger the shader to be + * actually compiled) + */ + static struct ir3_shader_key key; + memset(&key, 0, sizeof(key)); + ir3_shader_variant(shader, key, false, debug); + } + return shader; +} + +/* a bit annoying that compute-shader and normal shader state objects + * aren't a bit more aligned. + */ +struct ir3_shader * +ir3_shader_create_compute(struct ir3_compiler *compiler, + const struct pipe_compute_state *cso, + struct pipe_debug_callback *debug) +{ + nir_shader *nir; + if (cso->ir_type == PIPE_SHADER_IR_NIR) { + /* we take ownership of the reference: */ + nir = (nir_shader *)cso->prog; + } else { + debug_assert(cso->ir_type == PIPE_SHADER_IR_TGSI); + if (ir3_shader_debug & IR3_DBG_DISASM) { + tgsi_dump(cso->prog, 0); + } + nir = ir3_tgsi_to_nir(compiler, cso->prog); + } + + struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir); + + return shader; +} + +struct nir_shader * +ir3_tgsi_to_nir(struct ir3_compiler *compiler, const struct tgsi_token *tokens) +{ + return tgsi_to_nir(tokens, ir3_get_compiler_options(compiler)); +} + +/* This has to reach into the fd_context a bit more than the rest of + * ir3, but it needs to be aligned with the compiler, so both agree + * on which const regs hold what. And the logic is identical between + * a3xx/a4xx, the only difference is small details in the actual + * CP_LOAD_STATE packets (which is handled inside the generation + * specific ctx->emit_const(_bo)() fxns) + */ + +#include "freedreno_resource.h" + +static inline bool +is_stateobj(struct fd_ringbuffer *ring) +{ + /* XXX this is an ugly way to differentiate.. */ + return !!(ring->flags & FD_RINGBUFFER_STREAMING); +} + +static inline void +ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + /* when we emit const state via ring (IB2) we need a WFI, but when + * it is emit'd via stateobj, we don't + */ + if (is_stateobj(ring)) + return; + + fd_wfi(batch, ring); +} + +static void +emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) +{ + const unsigned index = 0; /* user consts are index 0 */ + + if (constbuf->enabled_mask & (1 << index)) { + struct pipe_constant_buffer *cb = &constbuf->cb[index]; + unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ + + /* in particular, with binning shader we may end up with + * unused consts, ie. we could end up w/ constlen that is + * smaller than first_driver_param. In that case truncate + * the user consts early to avoid HLSQ lockup caused by + * writing too many consts + */ + uint32_t max_const = MIN2(v->num_uniforms, v->constlen); + + // I expect that size should be a multiple of vec4's: + assert(size == align(size, 4)); + + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: + */ + size = MIN2(size, 4 * max_const); + + if (size > 0) { + ring_wfi(ctx->batch, ring); + ctx->emit_const(ring, v->type, 0, + cb->buffer_offset, size, + cb->user_buffer, cb->buffer); + } + } +} + +static void +emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) +{ + uint32_t offset = v->constbase.ubo; + if (v->constlen > offset) { + uint32_t params = v->num_ubos; + uint32_t offsets[params]; + struct pipe_resource *prscs[params]; + + for (uint32_t i = 0; i < params; i++) { + const uint32_t index = i + 1; /* UBOs start at index 1 */ + struct pipe_constant_buffer *cb = &constbuf->cb[index]; + assert(!cb->user_buffer); + + if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) { + offsets[i] = cb->buffer_offset; + prscs[i] = cb->buffer; + } else { + offsets[i] = 0; + prscs[i] = NULL; + } + } + + ring_wfi(ctx->batch, ring); + ctx->emit_const_bo(ring, v->type, false, offset * 4, params, prscs, offsets); + } +} + +static void +emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb) +{ + uint32_t offset = v->constbase.ssbo_sizes; + if (v->constlen > offset) { + uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)]; + unsigned mask = v->const_layout.ssbo_size.mask; + + while (mask) { + unsigned index = u_bit_scan(&mask); + unsigned off = v->const_layout.ssbo_size.off[index]; + sizes[off] = sb->sb[index].buffer_size; + } + + ring_wfi(ctx->batch, ring); + ctx->emit_const(ring, v->type, offset * 4, + 0, ARRAY_SIZE(sizes), sizes, NULL); + } +} + +static void +emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si) +{ + uint32_t offset = v->constbase.image_dims; + if (v->constlen > offset) { + uint32_t dims[align(v->const_layout.image_dims.count, 4)]; + unsigned mask = v->const_layout.image_dims.mask; + + while (mask) { + struct pipe_image_view *img; + struct fd_resource *rsc; + unsigned index = u_bit_scan(&mask); + unsigned off = v->const_layout.image_dims.off[index]; + + img = &si->si[index]; + rsc = fd_resource(img->resource); + + dims[off + 0] = util_format_get_blocksize(img->format); + if (img->resource->target != PIPE_BUFFER) { + unsigned lvl = img->u.tex.level; + /* note for 2d/cube/etc images, even if re-interpreted + * as a different color format, the pixel size should + * be the same, so use original dimensions for y and z + * stride: + */ + dims[off + 1] = rsc->slices[lvl].pitch * rsc->cpp; + /* see corresponding logic in fd_resource_offset(): */ + if (rsc->layer_first) { + dims[off + 2] = rsc->layer_size; + } else { + dims[off + 2] = rsc->slices[lvl].size0; + } + } else { + /* For buffer-backed images, the log2 of the format's + * bytes-per-pixel is placed on the 2nd slot. This is useful + * when emitting image_size instructions, for which we need + * to divide by bpp for image buffers. Since the bpp + * can only be power-of-two, the division is implemented + * as a SHR, and for that it is handy to have the log2 of + * bpp as a constant. (log2 = first-set-bit - 1) + */ + dims[off + 1] = ffs(dims[off + 0]) - 1; + } + } + + ring_wfi(ctx->batch, ring); + ctx->emit_const(ring, v->type, offset * 4, + 0, ARRAY_SIZE(dims), dims, NULL); + } +} + +static void +emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring) +{ + int size = v->immediates_count; + uint32_t base = v->constbase.immediate; + + /* truncate size to avoid writing constants that shader + * does not use: + */ + size = MIN2(size + base, v->constlen) - base; + + /* convert out of vec4: */ + base *= 4; + size *= 4; + + if (size > 0) { + ring_wfi(ctx->batch, ring); + ctx->emit_const(ring, v->type, base, + 0, size, v->immediates[0].val, NULL); + } +} + +/* emit stream-out buffers: */ +static void +emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring) +{ + /* streamout addresses after driver-params: */ + uint32_t offset = v->constbase.tfbo; + if (v->constlen > offset) { + struct fd_streamout_stateobj *so = &ctx->streamout; + struct ir3_stream_output_info *info = &v->shader->stream_output; + uint32_t params = 4; + uint32_t offsets[params]; + struct pipe_resource *prscs[params]; + + for (uint32_t i = 0; i < params; i++) { + struct pipe_stream_output_target *target = so->targets[i]; + + if (target) { + offsets[i] = (so->offsets[i] * info->stride[i] * 4) + + target->buffer_offset; + prscs[i] = target->buffer; + } else { + offsets[i] = 0; + prscs[i] = NULL; + } + } + + ring_wfi(ctx->batch, ring); + ctx->emit_const_bo(ring, v->type, true, offset * 4, params, prscs, offsets); + } +} + +static uint32_t +max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v) +{ + struct fd_streamout_stateobj *so = &ctx->streamout; + struct ir3_stream_output_info *info = &v->shader->stream_output; + uint32_t maxvtxcnt = 0x7fffffff; + + if (ctx->screen->gpu_id >= 500) + return 0; + if (v->binning_pass) + return 0; + if (v->shader->stream_output.num_outputs == 0) + return 0; + if (so->num_targets == 0) + return 0; + + /* offset to write to is: + * + * total_vtxcnt = vtxcnt + offsets[i] + * offset = total_vtxcnt * stride[i] + * + * offset = vtxcnt * stride[i] ; calculated in shader + * + offsets[i] * stride[i] ; calculated at emit_tfbos() + * + * assuming for each vtx, each target buffer will have data written + * up to 'offset + stride[i]', that leaves maxvtxcnt as: + * + * buffer_size = (maxvtxcnt * stride[i]) + stride[i] + * maxvtxcnt = (buffer_size - stride[i]) / stride[i] + * + * but shader is actually doing a less-than (rather than less-than- + * equal) check, so we can drop the -stride[i]. + * + * TODO is assumption about `offset + stride[i]` legit? + */ + for (unsigned i = 0; i < so->num_targets; i++) { + struct pipe_stream_output_target *target = so->targets[i]; + unsigned stride = info->stride[i] * 4; /* convert dwords->bytes */ + if (target) { + uint32_t max = target->buffer_size / stride; + maxvtxcnt = MIN2(maxvtxcnt, max); + } + } + + return maxvtxcnt; +} + +static void +emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, enum pipe_shader_type t) +{ + enum fd_dirty_shader_state dirty = ctx->dirty_shader[t]; + + /* When we use CP_SET_DRAW_STATE objects to emit constant state, + * if we emit any of it we need to emit all. This is because + * we are using the same state-group-id each time for uniform + * state, and if previous update is never evaluated (due to no + * visible primitives in the current tile) then the new stateobj + * completely replaces the old one. + * + * Possibly if we split up different parts of the const state to + * different state-objects we could avoid this. + */ + if (dirty && is_stateobj(ring)) + dirty = ~0; + + if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { + struct fd_constbuf_stateobj *constbuf; + bool shader_dirty; + + constbuf = &ctx->constbuf[t]; + shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG); + + emit_user_consts(ctx, v, ring, constbuf); + emit_ubos(ctx, v, ring, constbuf); + if (shader_dirty) + emit_immediates(ctx, v, ring); + } + + if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) { + struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t]; + emit_ssbo_sizes(ctx, v, ring, sb); + } + + if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) { + struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t]; + emit_image_dims(ctx, v, ring, si); + } +} + +void +ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_draw_info *info) +{ + debug_assert(v->type == MESA_SHADER_VERTEX); + + emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX); + + /* emit driver params every time: */ + /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ + if (info) { + uint32_t offset = v->constbase.driver_param; + if (v->constlen > offset) { + uint32_t vertex_params[IR3_DP_VS_COUNT] = { + [IR3_DP_VTXID_BASE] = info->index_size ? + info->index_bias : info->start, + [IR3_DP_VTXCNT_MAX] = max_tf_vtx(ctx, v), + }; + /* if no user-clip-planes, we don't need to emit the + * entire thing: + */ + uint32_t vertex_params_size = 4; + + if (v->key.ucp_enables) { + struct pipe_clip_state *ucp = &ctx->ucp; + unsigned pos = IR3_DP_UCP0_X; + for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) { + for (unsigned j = 0; j < 4; j++) { + vertex_params[pos] = fui(ucp->ucp[i][j]); + pos++; + } + } + vertex_params_size = ARRAY_SIZE(vertex_params); + } + + ring_wfi(ctx->batch, ring); + + bool needs_vtxid_base = + ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != regid(63, 0); + + /* for indirect draw, we need to copy VTXID_BASE from + * indirect-draw parameters buffer.. which is annoying + * and means we can't easily emit these consts in cmd + * stream so need to copy them to bo. + */ + if (info->indirect && needs_vtxid_base) { + struct pipe_draw_indirect_info *indirect = info->indirect; + struct pipe_resource *vertex_params_rsc = + pipe_buffer_create(&ctx->screen->base, + PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM, + vertex_params_size * 4); + unsigned src_off = info->indirect->offset;; + void *ptr; + + ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo); + memcpy(ptr, vertex_params, vertex_params_size * 4); + + if (info->index_size) { + /* indexed draw, index_bias is 4th field: */ + src_off += 3 * 4; + } else { + /* non-indexed draw, start is 3rd field: */ + src_off += 2 * 4; + } + + /* copy index_bias or start from draw params: */ + ctx->mem_to_mem(ring, vertex_params_rsc, 0, + indirect->buffer, src_off, 1); + + ctx->emit_const(ring, MESA_SHADER_VERTEX, offset * 4, 0, + vertex_params_size, NULL, vertex_params_rsc); + + pipe_resource_reference(&vertex_params_rsc, NULL); + } else { + ctx->emit_const(ring, MESA_SHADER_VERTEX, offset * 4, 0, + vertex_params_size, vertex_params, NULL); + } + + /* if needed, emit stream-out buffer addresses: */ + if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) { + emit_tfbos(ctx, v, ring); + } + } + } +} + +void +ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx) +{ + debug_assert(v->type == MESA_SHADER_FRAGMENT); + + emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT); +} + +/* emit compute-shader consts: */ +void +ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_grid_info *info) +{ + debug_assert(gl_shader_stage_is_compute(v->type)); + + emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); + + /* emit compute-shader driver-params: */ + uint32_t offset = v->constbase.driver_param; + if (v->constlen > offset) { + ring_wfi(ctx->batch, ring); + + if (info->indirect) { + struct pipe_resource *indirect = NULL; + unsigned indirect_offset; + + /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs + * to be aligned more strongly than 4 bytes. So in this case + * we need a temporary buffer to copy NumWorkGroups.xyz to. + * + * TODO if previous compute job is writing to info->indirect, + * we might need a WFI.. but since we currently flush for each + * compute job, we are probably ok for now. + */ + if (info->indirect_offset & 0xf) { + indirect = pipe_buffer_create(&ctx->screen->base, + PIPE_BIND_COMMAND_ARGS_BUFFER, PIPE_USAGE_STREAM, + 0x1000); + indirect_offset = 0; + + ctx->mem_to_mem(ring, indirect, 0, info->indirect, + info->indirect_offset, 3); + } else { + pipe_resource_reference(&indirect, info->indirect); + indirect_offset = info->indirect_offset; + } + + ctx->emit_const(ring, MESA_SHADER_COMPUTE, offset * 4, + indirect_offset, 4, NULL, indirect); + + pipe_resource_reference(&indirect, NULL); + } else { + uint32_t compute_params[IR3_DP_CS_COUNT] = { + [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0], + [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1], + [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2], + [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0], + [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1], + [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2], + }; + + ctx->emit_const(ring, MESA_SHADER_COMPUTE, offset * 4, 0, + ARRAY_SIZE(compute_params), compute_params, NULL); + } + } +} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_gallium.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_gallium.h --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_gallium.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_gallium.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef IR3_GALLIUM_H_ +#define IR3_GALLIUM_H_ + +#include "pipe/p_state.h" +#include "ir3/ir3_shader.h" + +struct ir3_shader * ir3_shader_create(struct ir3_compiler *compiler, + const struct pipe_shader_state *cso, gl_shader_stage type, + struct pipe_debug_callback *debug); +struct ir3_shader * +ir3_shader_create_compute(struct ir3_compiler *compiler, + const struct pipe_compute_state *cso, + struct pipe_debug_callback *debug); +struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, + struct ir3_shader_key key, bool binning_pass, + struct pipe_debug_callback *debug); +struct nir_shader * ir3_tgsi_to_nir(struct ir3_compiler *compiler, + const struct tgsi_token *tokens); + +struct fd_ringbuffer; +struct fd_context; +void ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_draw_info *info); +void ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx); +void ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, + struct fd_context *ctx, const struct pipe_grid_info *info); + +#endif /* IR3_GALLIUM_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_group.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_group.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_group.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_group.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,276 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "freedreno_util.h" - -#include "ir3.h" - -/* - * Find/group instruction neighbors: - */ - -/* bleh.. we need to do the same group_n() thing for both inputs/outputs - * (where we have a simple instr[] array), and fanin nodes (where we have - * an extra indirection via reg->instr). - */ -struct group_ops { - struct ir3_instruction *(*get)(void *arr, int idx); - void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr); -}; - -static struct ir3_instruction *arr_get(void *arr, int idx) -{ - return ((struct ir3_instruction **)arr)[idx]; -} -static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr) -{ - ((struct ir3_instruction **)arr)[idx] = - ir3_MOV(instr->block, instr, TYPE_F32); -} -static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr) -{ - /* so, we can't insert a mov in front of a meta:in.. and the downstream - * instruction already has a pointer to 'instr'. So we cheat a bit and - * morph the meta:in instruction into a mov and insert a new meta:in - * in front. - */ - struct ir3_instruction *in; - - debug_assert(instr->regs_count == 1); - - in = ir3_instr_create(instr->block, OPC_META_INPUT); - in->inout.block = instr->block; - ir3_reg_create(in, instr->regs[0]->num, 0); - - /* create src reg for meta:in and fixup to now be a mov: */ - ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in; - instr->opc = OPC_MOV; - instr->cat1.src_type = TYPE_F32; - instr->cat1.dst_type = TYPE_F32; - - ((struct ir3_instruction **)arr)[idx] = in; -} -static struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out }; -static struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in }; - -static struct ir3_instruction *instr_get(void *arr, int idx) -{ - return ssa(((struct ir3_instruction *)arr)->regs[idx+1]); -} -static void -instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) -{ - ((struct ir3_instruction *)arr)->regs[idx+1]->instr = - ir3_MOV(instr->block, instr, TYPE_F32); -} -static struct group_ops instr_ops = { instr_get, instr_insert_mov }; - -/* verify that cur != instr, but cur is also not in instr's neighbor-list: */ -static bool -in_neighbor_list(struct ir3_instruction *instr, struct ir3_instruction *cur, int pos) -{ - int idx = 0; - - if (!instr) - return false; - - if (instr == cur) - return true; - - for (instr = ir3_neighbor_first(instr); instr; instr = instr->cp.right) - if ((idx++ != pos) && (instr == cur)) - return true; - - return false; -} - -static void -group_n(struct group_ops *ops, void *arr, unsigned n) -{ - unsigned i, j; - - /* first pass, figure out what has conflicts and needs a mov - * inserted. Do this up front, before starting to setup - * left/right neighbor pointers. Trying to do it in a single - * pass could result in a situation where we can't even setup - * the mov's right neighbor ptr if the next instr also needs - * a mov. - */ -restart: - for (i = 0; i < n; i++) { - struct ir3_instruction *instr = ops->get(arr, i); - if (instr) { - struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; - struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; - bool conflict; - - /* check for left/right neighbor conflicts: */ - conflict = conflicts(instr->cp.left, left) || - conflicts(instr->cp.right, right); - - /* Mixing array elements and higher register classes - * (ie. groups) doesn't really work out in RA. See: - * - * https://trello.com/c/DqeDkeVf/156-bug-with-stk-70frag - */ - if (instr->regs[0]->flags & IR3_REG_ARRAY) - conflict = true; - - /* we also can't have an instr twice in the group: */ - for (j = i + 1; (j < n) && !conflict; j++) - if (in_neighbor_list(ops->get(arr, j), instr, i)) - conflict = true; - - if (conflict) { - ops->insert_mov(arr, i, instr); - /* inserting the mov may have caused a conflict - * against the previous: - */ - goto restart; - } - } - } - - /* second pass, now that we've inserted mov's, fixup left/right - * neighbors. This is guaranteed to succeed, since by definition - * the newly inserted mov's cannot conflict with anything. - */ - for (i = 0; i < n; i++) { - struct ir3_instruction *instr = ops->get(arr, i); - if (instr) { - struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; - struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; - - debug_assert(!conflicts(instr->cp.left, left)); - if (left) { - instr->cp.left_cnt++; - instr->cp.left = left; - } - - debug_assert(!conflicts(instr->cp.right, right)); - if (right) { - instr->cp.right_cnt++; - instr->cp.right = right; - } - } - } -} - -static void -instr_find_neighbors(struct ir3_instruction *instr) -{ - struct ir3_instruction *src; - - if (ir3_instr_check_mark(instr)) - return; - - if (instr->opc == OPC_META_FI) - group_n(&instr_ops, instr, instr->regs_count - 1); - - foreach_ssa_src(src, instr) - instr_find_neighbors(src); -} - -/* a bit of sadness.. we can't have "holes" in inputs from PoV of - * register assignment, they still need to be grouped together. So - * we need to insert dummy/padding instruction for grouping, and - * then take it back out again before anyone notices. - */ -static void -pad_and_group_input(struct ir3_instruction **input, unsigned n) -{ - int i, mask = 0; - struct ir3_block *block = NULL; - - for (i = n - 1; i >= 0; i--) { - struct ir3_instruction *instr = input[i]; - if (instr) { - block = instr->block; - } else if (block) { - instr = ir3_NOP(block); - ir3_reg_create(instr, 0, IR3_REG_SSA); /* dummy dst */ - input[i] = instr; - mask |= (1 << i); - } - } - - group_n(&arr_ops_in, input, n); - - for (i = 0; i < n; i++) { - if (mask & (1 << i)) - input[i] = NULL; - } -} - -static void -find_neighbors(struct ir3 *ir) -{ - unsigned i; - - /* shader inputs/outputs themselves must be contiguous as well: - * - * NOTE: group inputs first, since we only insert mov's - * *before* the conflicted instr (and that would go badly - * for inputs). By doing inputs first, we should never - * have a conflict on inputs.. pushing any conflict to - * resolve to the outputs, for stuff like: - * - * MOV OUT[n], IN[m].wzyx - * - * NOTE: we assume here inputs/outputs are grouped in vec4. - * This logic won't quite cut it if we don't align smaller - * on vec4 boundaries - */ - for (i = 0; i < ir->ninputs; i += 4) - pad_and_group_input(&ir->inputs[i], 4); - for (i = 0; i < ir->noutputs; i += 4) - group_n(&arr_ops_out, &ir->outputs[i], 4); - - for (i = 0; i < ir->noutputs; i++) { - if (ir->outputs[i]) { - struct ir3_instruction *instr = ir->outputs[i]; - instr_find_neighbors(instr); - } - } - - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - for (i = 0; i < block->keeps_count; i++) { - struct ir3_instruction *instr = block->keeps[i]; - instr_find_neighbors(instr); - } - - /* We also need to account for if-condition: */ - if (block->condition) - instr_find_neighbors(block->condition); - } -} - -void -ir3_group(struct ir3 *ir) -{ - ir3_clear_mark(ir); - find_neighbors(ir); -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3.h --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,1393 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IR3_H_ -#define IR3_H_ - -#include -#include - -#include "util/u_debug.h" -#include "util/list.h" - -#include "instr-a3xx.h" -#include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */ - -/* low level intermediate representation of an adreno shader program */ - -struct ir3_compiler; -struct ir3; -struct ir3_instruction; -struct ir3_block; - -struct ir3_info { - uint32_t gpu_id; - uint16_t sizedwords; - uint16_t instrs_count; /* expanded to account for rpt's */ - /* NOTE: max_reg, etc, does not include registers not touched - * by the shader (ie. vertex fetched via VFD_DECODE but not - * touched by shader) - */ - int8_t max_reg; /* highest GPR # used by shader */ - int8_t max_half_reg; - int16_t max_const; - - /* number of sync bits: */ - uint16_t ss, sy; -}; - -struct ir3_register { - enum { - IR3_REG_CONST = 0x001, - IR3_REG_IMMED = 0x002, - IR3_REG_HALF = 0x004, - /* high registers are used for some things in compute shaders, - * for example. Seems to be for things that are global to all - * threads in a wave, so possibly these are global/shared by - * all the threads in the wave? - */ - IR3_REG_HIGH = 0x008, - IR3_REG_RELATIV= 0x010, - IR3_REG_R = 0x020, - /* Most instructions, it seems, can do float abs/neg but not - * integer. The CP pass needs to know what is intended (int or - * float) in order to do the right thing. For this reason the - * abs/neg flags are split out into float and int variants. In - * addition, .b (bitwise) operations, the negate is actually a - * bitwise not, so split that out into a new flag to make it - * more clear. - */ - IR3_REG_FNEG = 0x040, - IR3_REG_FABS = 0x080, - IR3_REG_SNEG = 0x100, - IR3_REG_SABS = 0x200, - IR3_REG_BNOT = 0x400, - IR3_REG_EVEN = 0x800, - IR3_REG_POS_INF= 0x1000, - /* (ei) flag, end-input? Set on last bary, presumably to signal - * that the shader needs no more input: - */ - IR3_REG_EI = 0x2000, - /* meta-flags, for intermediate stages of IR, ie. - * before register assignment is done: - */ - IR3_REG_SSA = 0x4000, /* 'instr' is ptr to assigning instr */ - IR3_REG_ARRAY = 0x8000, - - } flags; - - /* normal registers: - * the component is in the low two bits of the reg #, so - * rN.x becomes: (N << 2) | x - */ - int num; - union { - /* immediate: */ - int32_t iim_val; - uint32_t uim_val; - float fim_val; - /* relative: */ - struct { - uint16_t id; - int16_t offset; - } array; - }; - - /* For IR3_REG_SSA, src registers contain ptr back to assigning - * instruction. - * - * For IR3_REG_ARRAY, the pointer is back to the last dependent - * array access (although the net effect is the same, it points - * back to a previous instruction that we depend on). - */ - struct ir3_instruction *instr; - - union { - /* used for cat5 instructions, but also for internal/IR level - * tracking of what registers are read/written by an instruction. - * wrmask may be a bad name since it is used to represent both - * src and dst that touch multiple adjacent registers. - */ - unsigned wrmask; - /* for relative addressing, 32bits for array size is too small, - * but otoh we don't need to deal with disjoint sets, so instead - * use a simple size field (number of scalar components). - */ - unsigned size; - }; -}; - -/* - * Stupid/simple growable array implementation: - */ -#define DECLARE_ARRAY(type, name) \ - unsigned name ## _count, name ## _sz; \ - type * name; - -#define array_insert(ctx, arr, val) do { \ - if (arr ## _count == arr ## _sz) { \ - arr ## _sz = MAX2(2 * arr ## _sz, 16); \ - arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \ - } \ - arr[arr ##_count++] = val; \ - } while (0) - -struct ir3_instruction { - struct ir3_block *block; - opc_t opc; - enum { - /* (sy) flag is set on first instruction, and after sample - * instructions (probably just on RAW hazard). - */ - IR3_INSTR_SY = 0x001, - /* (ss) flag is set on first instruction, and first instruction - * to depend on the result of "long" instructions (RAW hazard): - * - * rcp, rsq, log2, exp2, sin, cos, sqrt - * - * It seems to synchronize until all in-flight instructions are - * completed, for example: - * - * rsq hr1.w, hr1.w - * add.f hr2.z, (neg)hr2.z, hc0.y - * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y - * rsq hr2.x, hr2.x - * (rpt1)nop - * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w - * nop - * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w - * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w - * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x - * - * The last mul.f does not have (ss) set, presumably because the - * (ss) on the previous instruction does the job. - * - * The blob driver also seems to set it on WAR hazards, although - * not really clear if this is needed or just blob compiler being - * sloppy. So far I haven't found a case where removing the (ss) - * causes problems for WAR hazard, but I could just be getting - * lucky: - * - * rcp r1.y, r3.y - * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z - * - */ - IR3_INSTR_SS = 0x002, - /* (jp) flag is set on jump targets: - */ - IR3_INSTR_JP = 0x004, - IR3_INSTR_UL = 0x008, - IR3_INSTR_3D = 0x010, - IR3_INSTR_A = 0x020, - IR3_INSTR_O = 0x040, - IR3_INSTR_P = 0x080, - IR3_INSTR_S = 0x100, - IR3_INSTR_S2EN = 0x200, - IR3_INSTR_G = 0x400, - IR3_INSTR_SAT = 0x800, - /* meta-flags, for intermediate stages of IR, ie. - * before register assignment is done: - */ - IR3_INSTR_MARK = 0x1000, - IR3_INSTR_UNUSED= 0x2000, - } flags; - int repeat; -#ifdef DEBUG - unsigned regs_max; -#endif - unsigned regs_count; - struct ir3_register **regs; - union { - struct { - char inv; - char comp; - int immed; - struct ir3_block *target; - } cat0; - struct { - type_t src_type, dst_type; - } cat1; - struct { - enum { - IR3_COND_LT = 0, - IR3_COND_LE = 1, - IR3_COND_GT = 2, - IR3_COND_GE = 3, - IR3_COND_EQ = 4, - IR3_COND_NE = 5, - } condition; - } cat2; - struct { - unsigned samp, tex; - type_t type; - } cat5; - struct { - type_t type; - int src_offset; - int dst_offset; - int iim_val : 3; /* for ldgb/stgb, # of components */ - int d : 3; - bool typed : 1; - } cat6; - struct { - unsigned w : 1; /* write */ - unsigned r : 1; /* read */ - unsigned l : 1; /* local */ - unsigned g : 1; /* global */ - } cat7; - /* for meta-instructions, just used to hold extra data - * before instruction scheduling, etc - */ - struct { - int off; /* component/offset */ - } fo; - struct { - struct ir3_block *block; - } inout; - }; - - /* transient values used during various algorithms: */ - union { - /* The instruction depth is the max dependency distance to output. - * - * You can also think of it as the "cost", if we did any sort of - * optimization for register footprint. Ie. a value that is just - * result of moving a const to a reg would have a low cost, so to - * it could make sense to duplicate the instruction at various - * points where the result is needed to reduce register footprint. - */ - unsigned depth; - /* When we get to the RA stage, we no longer need depth, but - * we do need instruction's position/name: - */ - struct { - uint16_t ip; - uint16_t name; - }; - }; - - /* used for per-pass extra instruction data. - */ - void *data; - - /* Used during CP and RA stages. For fanin and shader inputs/ - * outputs where we need a sequence of consecutive registers, - * keep track of each src instructions left (ie 'n-1') and right - * (ie 'n+1') neighbor. The front-end must insert enough mov's - * to ensure that each instruction has at most one left and at - * most one right neighbor. During the copy-propagation pass, - * we only remove mov's when we can preserve this constraint. - * And during the RA stage, we use the neighbor information to - * allocate a block of registers in one shot. - * - * TODO: maybe just add something like: - * struct ir3_instruction_ref { - * struct ir3_instruction *instr; - * unsigned cnt; - * } - * - * Or can we get away without the refcnt stuff? It seems like - * it should be overkill.. the problem is if, potentially after - * already eliminating some mov's, if you have a single mov that - * needs to be grouped with it's neighbors in two different - * places (ex. shader output and a fanin). - */ - struct { - struct ir3_instruction *left, *right; - uint16_t left_cnt, right_cnt; - } cp; - - /* an instruction can reference at most one address register amongst - * it's src/dst registers. Beyond that, you need to insert mov's. - * - * NOTE: do not write this directly, use ir3_instr_set_address() - */ - struct ir3_instruction *address; - - /* Tracking for additional dependent instructions. Used to handle - * barriers, WAR hazards for arrays/SSBOs/etc. - */ - DECLARE_ARRAY(struct ir3_instruction *, deps); - - /* - * From PoV of instruction scheduling, not execution (ie. ignores global/ - * local distinction): - * shared image atomic SSBO everything - * barrier()/ - R/W R/W R/W R/W X - * groupMemoryBarrier() - * memoryBarrier() - R/W R/W - * (but only images declared coherent?) - * memoryBarrierAtomic() - R/W - * memoryBarrierBuffer() - R/W - * memoryBarrierImage() - R/W - * memoryBarrierShared() - R/W - * - * TODO I think for SSBO/image/shared, in cases where we can determine - * which variable is accessed, we don't need to care about accesses to - * different variables (unless declared coherent??) - */ - enum { - IR3_BARRIER_EVERYTHING = 1 << 0, - IR3_BARRIER_SHARED_R = 1 << 1, - IR3_BARRIER_SHARED_W = 1 << 2, - IR3_BARRIER_IMAGE_R = 1 << 3, - IR3_BARRIER_IMAGE_W = 1 << 4, - IR3_BARRIER_BUFFER_R = 1 << 5, - IR3_BARRIER_BUFFER_W = 1 << 6, - IR3_BARRIER_ARRAY_R = 1 << 7, - IR3_BARRIER_ARRAY_W = 1 << 8, - } barrier_class, barrier_conflict; - - /* Entry in ir3_block's instruction list: */ - struct list_head node; - - int use_count; /* currently just updated/used by cp */ - -#ifdef DEBUG - uint32_t serialno; -#endif -}; - -static inline struct ir3_instruction * -ir3_neighbor_first(struct ir3_instruction *instr) -{ - int cnt = 0; - while (instr->cp.left) { - instr = instr->cp.left; - if (++cnt > 0xffff) { - debug_assert(0); - break; - } - } - return instr; -} - -static inline int ir3_neighbor_count(struct ir3_instruction *instr) -{ - int num = 1; - - debug_assert(!instr->cp.left); - - while (instr->cp.right) { - num++; - instr = instr->cp.right; - if (num > 0xffff) { - debug_assert(0); - break; - } - } - - return num; -} - -struct ir3 { - struct ir3_compiler *compiler; - - unsigned ninputs, noutputs; - struct ir3_instruction **inputs; - struct ir3_instruction **outputs; - - /* Track bary.f (and ldlv) instructions.. this is needed in - * scheduling to ensure that all varying fetches happen before - * any potential kill instructions. The hw gets grumpy if all - * threads in a group are killed before the last bary.f gets - * a chance to signal end of input (ei). - */ - DECLARE_ARRAY(struct ir3_instruction *, baryfs); - - /* Track all indirect instructions (read and write). To avoid - * deadlock scenario where an address register gets scheduled, - * but other dependent src instructions cannot be scheduled due - * to dependency on a *different* address register value, the - * scheduler needs to ensure that all dependencies other than - * the instruction other than the address register are scheduled - * before the one that writes the address register. Having a - * convenient list of instructions that reference some address - * register simplifies this. - */ - DECLARE_ARRAY(struct ir3_instruction *, indirects); - - /* and same for instructions that consume predicate register: */ - DECLARE_ARRAY(struct ir3_instruction *, predicates); - - /* Track texture sample instructions which need texture state - * patched in (for astc-srgb workaround): - */ - DECLARE_ARRAY(struct ir3_instruction *, astc_srgb); - - /* List of blocks: */ - struct list_head block_list; - - /* List of ir3_array's: */ - struct list_head array_list; - -#ifdef DEBUG - unsigned block_count, instr_count; -#endif -}; - -struct ir3_array { - struct list_head node; - unsigned length; - unsigned id; - - struct nir_register *r; - - /* To avoid array write's from getting DCE'd, keep track of the - * most recent write. Any array access depends on the most - * recent write. This way, nothing depends on writes after the - * last read. But all the writes that happen before that have - * something depending on them - */ - struct ir3_instruction *last_write; - - /* extra stuff used in RA pass: */ - unsigned base; /* base vreg name */ - unsigned reg; /* base physical reg */ - uint16_t start_ip, end_ip; -}; - -struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id); - -struct ir3_block { - struct list_head node; - struct ir3 *shader; - - const struct nir_block *nblock; - - struct list_head instr_list; /* list of ir3_instruction */ - - /* each block has either one or two successors.. in case of - * two successors, 'condition' decides which one to follow. - * A block preceding an if/else has two successors. - */ - struct ir3_instruction *condition; - struct ir3_block *successors[2]; - - unsigned predecessors_count; - struct ir3_block **predecessors; - - uint16_t start_ip, end_ip; - - /* Track instructions which do not write a register but other- - * wise must not be discarded (such as kill, stg, etc) - */ - DECLARE_ARRAY(struct ir3_instruction *, keeps); - - /* used for per-pass extra block data. Mainly used right - * now in RA step to track livein/liveout. - */ - void *data; - -#ifdef DEBUG - uint32_t serialno; -#endif -}; - -static inline uint32_t -block_id(struct ir3_block *block) -{ -#ifdef DEBUG - return block->serialno; -#else - return (uint32_t)(unsigned long)block; -#endif -} - -struct ir3 * ir3_create(struct ir3_compiler *compiler, - unsigned nin, unsigned nout); -void ir3_destroy(struct ir3 *shader); -void * ir3_assemble(struct ir3 *shader, - struct ir3_info *info, uint32_t gpu_id); -void * ir3_alloc(struct ir3 *shader, int sz); - -struct ir3_block * ir3_block_create(struct ir3 *shader); - -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc); -struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, - opc_t opc, int nreg); -struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); -void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep); -const char *ir3_instr_name(struct ir3_instruction *instr); - -struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, - int num, int flags); -struct ir3_register * ir3_reg_clone(struct ir3 *shader, - struct ir3_register *reg); - -void ir3_instr_set_address(struct ir3_instruction *instr, - struct ir3_instruction *addr); - -static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) -{ - if (instr->flags & IR3_INSTR_MARK) - return true; /* already visited */ - instr->flags |= IR3_INSTR_MARK; - return false; -} - -void ir3_block_clear_mark(struct ir3_block *block); -void ir3_clear_mark(struct ir3 *shader); - -unsigned ir3_count_instructions(struct ir3 *ir); - -static inline int ir3_instr_regno(struct ir3_instruction *instr, - struct ir3_register *reg) -{ - unsigned i; - for (i = 0; i < instr->regs_count; i++) - if (reg == instr->regs[i]) - return i; - return -1; -} - - -#define MAX_ARRAYS 16 - -/* comp: - * 0 - x - * 1 - y - * 2 - z - * 3 - w - */ -static inline uint32_t regid(int num, int comp) -{ - return (num << 2) | (comp & 0x3); -} - -static inline uint32_t reg_num(struct ir3_register *reg) -{ - return reg->num >> 2; -} - -static inline uint32_t reg_comp(struct ir3_register *reg) -{ - return reg->num & 0x3; -} - -static inline bool is_flow(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 0); -} - -static inline bool is_kill(struct ir3_instruction *instr) -{ - return instr->opc == OPC_KILL; -} - -static inline bool is_nop(struct ir3_instruction *instr) -{ - return instr->opc == OPC_NOP; -} - -/* Is it a non-transformative (ie. not type changing) mov? This can - * also include absneg.s/absneg.f, which for the most part can be - * treated as a mov (single src argument). - */ -static inline bool is_same_type_mov(struct ir3_instruction *instr) -{ - struct ir3_register *dst; - - switch (instr->opc) { - case OPC_MOV: - if (instr->cat1.src_type != instr->cat1.dst_type) - return false; - break; - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - if (instr->flags & IR3_INSTR_SAT) - return false; - break; - default: - return false; - } - - dst = instr->regs[0]; - - /* mov's that write to a0.x or p0.x are special: */ - if (dst->num == regid(REG_P0, 0)) - return false; - if (dst->num == regid(REG_A0, 0)) - return false; - - if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) - return false; - - return true; -} - -static inline bool is_alu(struct ir3_instruction *instr) -{ - return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3); -} - -static inline bool is_sfu(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 4); -} - -static inline bool is_tex(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 5); -} - -static inline bool is_mem(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 6); -} - -static inline bool is_barrier(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 7); -} - -static inline bool -is_store(struct ir3_instruction *instr) -{ - /* these instructions, the "destination" register is - * actually a source, the address to store to. - */ - switch (instr->opc) { - case OPC_STG: - case OPC_STGB: - case OPC_STIB: - case OPC_STP: - case OPC_STL: - case OPC_STLW: - case OPC_L2G: - case OPC_G2L: - return true; - default: - return false; - } -} - -static inline bool is_load(struct ir3_instruction *instr) -{ - switch (instr->opc) { - case OPC_LDG: - case OPC_LDGB: - case OPC_LDL: - case OPC_LDP: - case OPC_L2G: - case OPC_LDLW: - case OPC_LDC: - case OPC_LDLV: - /* probably some others too.. */ - return true; - default: - return false; - } -} - -static inline bool is_input(struct ir3_instruction *instr) -{ - /* in some cases, ldlv is used to fetch varying without - * interpolation.. fortunately inloc is the first src - * register in either case - */ - switch (instr->opc) { - case OPC_LDLV: - case OPC_BARY_F: - return true; - default: - return false; - } -} - -static inline bool is_bool(struct ir3_instruction *instr) -{ - switch (instr->opc) { - case OPC_CMPS_F: - case OPC_CMPS_S: - case OPC_CMPS_U: - return true; - default: - return false; - } -} - -static inline bool is_meta(struct ir3_instruction *instr) -{ - /* TODO how should we count PHI (and maybe fan-in/out) which - * might actually contribute some instructions to the final - * result? - */ - return (opc_cat(instr->opc) == -1); -} - -static inline bool writes_addr(struct ir3_instruction *instr) -{ - if (instr->regs_count > 0) { - struct ir3_register *dst = instr->regs[0]; - return reg_num(dst) == REG_A0; - } - return false; -} - -static inline bool writes_pred(struct ir3_instruction *instr) -{ - if (instr->regs_count > 0) { - struct ir3_register *dst = instr->regs[0]; - return reg_num(dst) == REG_P0; - } - return false; -} - -/* returns defining instruction for reg */ -/* TODO better name */ -static inline struct ir3_instruction *ssa(struct ir3_register *reg) -{ - if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) { - return reg->instr; - } - return NULL; -} - -static inline bool conflicts(struct ir3_instruction *a, - struct ir3_instruction *b) -{ - return (a && b) && (a != b); -} - -static inline bool reg_gpr(struct ir3_register *r) -{ - if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED)) - return false; - if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) - return false; - return true; -} - -static inline type_t half_type(type_t type) -{ - switch (type) { - case TYPE_F32: return TYPE_F16; - case TYPE_U32: return TYPE_U16; - case TYPE_S32: return TYPE_S16; - case TYPE_F16: - case TYPE_U16: - case TYPE_S16: - return type; - default: - assert(0); - return ~0; - } -} - -/* some cat2 instructions (ie. those which are not float) can embed an - * immediate: - */ -static inline bool ir3_cat2_int(opc_t opc) -{ - switch (opc) { - case OPC_ADD_U: - case OPC_ADD_S: - case OPC_SUB_U: - case OPC_SUB_S: - case OPC_CMPS_U: - case OPC_CMPS_S: - case OPC_MIN_U: - case OPC_MIN_S: - case OPC_MAX_U: - case OPC_MAX_S: - case OPC_CMPV_U: - case OPC_CMPV_S: - case OPC_MUL_U: - case OPC_MUL_S: - case OPC_MULL_U: - case OPC_CLZ_S: - case OPC_ABSNEG_S: - case OPC_AND_B: - case OPC_OR_B: - case OPC_NOT_B: - case OPC_XOR_B: - case OPC_BFREV_B: - case OPC_CLZ_B: - case OPC_SHL_B: - case OPC_SHR_B: - case OPC_ASHR_B: - case OPC_MGEN_B: - case OPC_GETBIT_B: - case OPC_CBITS_B: - case OPC_BARY_F: - return true; - - default: - return false; - } -} - - -/* map cat2 instruction to valid abs/neg flags: */ -static inline unsigned ir3_cat2_absneg(opc_t opc) -{ - switch (opc) { - case OPC_ADD_F: - case OPC_MIN_F: - case OPC_MAX_F: - case OPC_MUL_F: - case OPC_SIGN_F: - case OPC_CMPS_F: - case OPC_ABSNEG_F: - case OPC_CMPV_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_BARY_F: - return IR3_REG_FABS | IR3_REG_FNEG; - - case OPC_ADD_U: - case OPC_ADD_S: - case OPC_SUB_U: - case OPC_SUB_S: - case OPC_CMPS_U: - case OPC_CMPS_S: - case OPC_MIN_U: - case OPC_MIN_S: - case OPC_MAX_U: - case OPC_MAX_S: - case OPC_CMPV_U: - case OPC_CMPV_S: - case OPC_MUL_U: - case OPC_MUL_S: - case OPC_MULL_U: - case OPC_CLZ_S: - return 0; - - case OPC_ABSNEG_S: - return IR3_REG_SABS | IR3_REG_SNEG; - - case OPC_AND_B: - case OPC_OR_B: - case OPC_NOT_B: - case OPC_XOR_B: - case OPC_BFREV_B: - case OPC_CLZ_B: - case OPC_SHL_B: - case OPC_SHR_B: - case OPC_ASHR_B: - case OPC_MGEN_B: - case OPC_GETBIT_B: - case OPC_CBITS_B: - return IR3_REG_BNOT; - - default: - return 0; - } -} - -/* map cat3 instructions to valid abs/neg flags: */ -static inline unsigned ir3_cat3_absneg(opc_t opc) -{ - switch (opc) { - case OPC_MAD_F16: - case OPC_MAD_F32: - case OPC_SEL_F16: - case OPC_SEL_F32: - return IR3_REG_FNEG; - - case OPC_MAD_U16: - case OPC_MADSH_U16: - case OPC_MAD_S16: - case OPC_MADSH_M16: - case OPC_MAD_U24: - case OPC_MAD_S24: - case OPC_SEL_S16: - case OPC_SEL_S32: - case OPC_SAD_S16: - case OPC_SAD_S32: - /* neg *may* work on 3rd src.. */ - - case OPC_SEL_B16: - case OPC_SEL_B32: - - default: - return 0; - } -} - -#define MASK(n) ((1 << (n)) - 1) - -/* iterator for an instructions's sources (reg), also returns src #: */ -#define foreach_src_n(__srcreg, __n, __instr) \ - if ((__instr)->regs_count) \ - for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \ - if ((__srcreg = (__instr)->regs[__n + 1])) - -/* iterator for an instructions's sources (reg): */ -#define foreach_src(__srcreg, __instr) \ - foreach_src_n(__srcreg, __i, __instr) - -static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr) -{ - unsigned cnt = instr->regs_count + instr->deps_count; - if (instr->address) - cnt++; - return cnt; -} - -static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n) -{ - if (n == (instr->regs_count + instr->deps_count)) - return instr->address; - if (n >= instr->regs_count) - return instr->deps[n - instr->regs_count]; - return ssa(instr->regs[n]); -} - -static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n) -{ - if (n == (instr->regs_count + instr->deps_count)) - return false; - if (n >= instr->regs_count) - return true; - return false; -} - -#define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1) - -/* iterator for an instruction's SSA sources (instr), also returns src #: */ -#define foreach_ssa_src_n(__srcinst, __n, __instr) \ - for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \ - if ((__srcinst = __ssa_src_n(__instr, __n))) - -/* iterator for an instruction's SSA sources (instr): */ -#define foreach_ssa_src(__srcinst, __instr) \ - foreach_ssa_src_n(__srcinst, __i, __instr) - - -/* dump: */ -void ir3_print(struct ir3 *ir); -void ir3_print_instr(struct ir3_instruction *instr); - -/* depth calculation: */ -int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n); -void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list); -void ir3_depth(struct ir3 *ir); - -/* copy-propagate: */ -struct ir3_shader_variant; -void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); - -/* group neighbors and insert mov's to resolve conflicts: */ -void ir3_group(struct ir3 *ir); - -/* scheduling: */ -void ir3_sched_add_deps(struct ir3 *ir); -int ir3_sched(struct ir3 *ir); - -/* register assignment: */ -struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler); -int ir3_ra(struct ir3 *ir3, enum shader_t type, - bool frag_coord, bool frag_face); - -/* legalize: */ -void ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary); - -/* ************************************************************************* */ -/* instruction helpers */ - -/* creates SSA src of correct type (ie. half vs full precision) */ -static inline struct ir3_register * __ssa_src(struct ir3_instruction *instr, - struct ir3_instruction *src, unsigned flags) -{ - struct ir3_register *reg; - if (src->regs[0]->flags & IR3_REG_HALF) - flags |= IR3_REG_HALF; - reg = ir3_reg_create(instr, 0, IR3_REG_SSA | flags); - reg->instr = src; - return reg; -} - -static inline struct ir3_instruction * -ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) -{ - struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); - ir3_reg_create(instr, 0, 0); /* dst */ - if (src->regs[0]->flags & IR3_REG_ARRAY) { - struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY); - src_reg->array = src->regs[0]->array; - } else { - __ssa_src(instr, src, 0); - } - debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV)); - instr->cat1.src_type = type; - instr->cat1.dst_type = type; - return instr; -} - -static inline struct ir3_instruction * -ir3_COV(struct ir3_block *block, struct ir3_instruction *src, - type_t src_type, type_t dst_type) -{ - struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); - unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0; - unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0; - - debug_assert((src->regs[0]->flags & IR3_REG_HALF) == src_flags); - - ir3_reg_create(instr, 0, dst_flags); /* dst */ - __ssa_src(instr, src, 0); - instr->cat1.src_type = src_type; - instr->cat1.dst_type = dst_type; - debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY)); - return instr; -} - -static inline struct ir3_instruction * -ir3_NOP(struct ir3_block *block) -{ - return ir3_instr_create(block, OPC_NOP); -} - -#define INSTR0(name) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create(block, OPC_##name); \ - return instr; \ -} - -#define INSTR1(name) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block, \ - struct ir3_instruction *a, unsigned aflags) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create(block, OPC_##name); \ - ir3_reg_create(instr, 0, 0); /* dst */ \ - __ssa_src(instr, a, aflags); \ - return instr; \ -} - -#define INSTR2(name) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block, \ - struct ir3_instruction *a, unsigned aflags, \ - struct ir3_instruction *b, unsigned bflags) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create(block, OPC_##name); \ - ir3_reg_create(instr, 0, 0); /* dst */ \ - __ssa_src(instr, a, aflags); \ - __ssa_src(instr, b, bflags); \ - return instr; \ -} - -#define INSTR3(name) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block, \ - struct ir3_instruction *a, unsigned aflags, \ - struct ir3_instruction *b, unsigned bflags, \ - struct ir3_instruction *c, unsigned cflags) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create(block, OPC_##name); \ - ir3_reg_create(instr, 0, 0); /* dst */ \ - __ssa_src(instr, a, aflags); \ - __ssa_src(instr, b, bflags); \ - __ssa_src(instr, c, cflags); \ - return instr; \ -} - -#define INSTR4(name) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block, \ - struct ir3_instruction *a, unsigned aflags, \ - struct ir3_instruction *b, unsigned bflags, \ - struct ir3_instruction *c, unsigned cflags, \ - struct ir3_instruction *d, unsigned dflags) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create2(block, OPC_##name, 5); \ - ir3_reg_create(instr, 0, 0); /* dst */ \ - __ssa_src(instr, a, aflags); \ - __ssa_src(instr, b, bflags); \ - __ssa_src(instr, c, cflags); \ - __ssa_src(instr, d, dflags); \ - return instr; \ -} - -#define INSTR4F(f, name) \ -static inline struct ir3_instruction * \ -ir3_##name##_##f(struct ir3_block *block, \ - struct ir3_instruction *a, unsigned aflags, \ - struct ir3_instruction *b, unsigned bflags, \ - struct ir3_instruction *c, unsigned cflags, \ - struct ir3_instruction *d, unsigned dflags) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create2(block, OPC_##name, 5); \ - ir3_reg_create(instr, 0, 0); /* dst */ \ - __ssa_src(instr, a, aflags); \ - __ssa_src(instr, b, bflags); \ - __ssa_src(instr, c, cflags); \ - __ssa_src(instr, d, dflags); \ - instr->flags |= IR3_INSTR_##f; \ - return instr; \ -} - -/* cat0 instructions: */ -INSTR0(BR) -INSTR0(JUMP) -INSTR1(KILL) -INSTR0(END) - -/* cat2 instructions, most 2 src but some 1 src: */ -INSTR2(ADD_F) -INSTR2(MIN_F) -INSTR2(MAX_F) -INSTR2(MUL_F) -INSTR1(SIGN_F) -INSTR2(CMPS_F) -INSTR1(ABSNEG_F) -INSTR2(CMPV_F) -INSTR1(FLOOR_F) -INSTR1(CEIL_F) -INSTR1(RNDNE_F) -INSTR1(RNDAZ_F) -INSTR1(TRUNC_F) -INSTR2(ADD_U) -INSTR2(ADD_S) -INSTR2(SUB_U) -INSTR2(SUB_S) -INSTR2(CMPS_U) -INSTR2(CMPS_S) -INSTR2(MIN_U) -INSTR2(MIN_S) -INSTR2(MAX_U) -INSTR2(MAX_S) -INSTR1(ABSNEG_S) -INSTR2(AND_B) -INSTR2(OR_B) -INSTR1(NOT_B) -INSTR2(XOR_B) -INSTR2(CMPV_U) -INSTR2(CMPV_S) -INSTR2(MUL_U) -INSTR2(MUL_S) -INSTR2(MULL_U) -INSTR1(BFREV_B) -INSTR1(CLZ_S) -INSTR1(CLZ_B) -INSTR2(SHL_B) -INSTR2(SHR_B) -INSTR2(ASHR_B) -INSTR2(BARY_F) -INSTR2(MGEN_B) -INSTR2(GETBIT_B) -INSTR1(SETRM) -INSTR1(CBITS_B) -INSTR2(SHB) -INSTR2(MSAD) - -/* cat3 instructions: */ -INSTR3(MAD_U16) -INSTR3(MADSH_U16) -INSTR3(MAD_S16) -INSTR3(MADSH_M16) -INSTR3(MAD_U24) -INSTR3(MAD_S24) -INSTR3(MAD_F16) -INSTR3(MAD_F32) -INSTR3(SEL_B16) -INSTR3(SEL_B32) -INSTR3(SEL_S16) -INSTR3(SEL_S32) -INSTR3(SEL_F16) -INSTR3(SEL_F32) -INSTR3(SAD_S16) -INSTR3(SAD_S32) - -/* cat4 instructions: */ -INSTR1(RCP) -INSTR1(RSQ) -INSTR1(LOG2) -INSTR1(EXP2) -INSTR1(SIN) -INSTR1(COS) -INSTR1(SQRT) - -/* cat5 instructions: */ -INSTR1(DSX) -INSTR1(DSY) - -static inline struct ir3_instruction * -ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, - unsigned wrmask, unsigned flags, unsigned samp, unsigned tex, - struct ir3_instruction *src0, struct ir3_instruction *src1) -{ - struct ir3_instruction *sam; - struct ir3_register *reg; - - sam = ir3_instr_create(block, opc); - sam->flags |= flags; - ir3_reg_create(sam, 0, 0)->wrmask = wrmask; - if (src0) { - reg = ir3_reg_create(sam, 0, IR3_REG_SSA); - reg->wrmask = (1 << (src0->regs_count - 1)) - 1; - reg->instr = src0; - } - if (src1) { - reg = ir3_reg_create(sam, 0, IR3_REG_SSA); - reg->instr = src1; - reg->wrmask = (1 << (src1->regs_count - 1)) - 1; - } - sam->cat5.samp = samp; - sam->cat5.tex = tex; - sam->cat5.type = type; - - return sam; -} - -/* cat6 instructions: */ -INSTR2(LDLV) -INSTR2(LDG) -INSTR2(LDL) -INSTR3(STG) -INSTR3(STL) -INSTR3(LDGB) -INSTR4(STGB) -INSTR4(STIB) -INSTR1(RESINFO) -INSTR1(RESFMT) -INSTR2(ATOMIC_ADD) -INSTR2(ATOMIC_SUB) -INSTR2(ATOMIC_XCHG) -INSTR2(ATOMIC_INC) -INSTR2(ATOMIC_DEC) -INSTR2(ATOMIC_CMPXCHG) -INSTR2(ATOMIC_MIN) -INSTR2(ATOMIC_MAX) -INSTR2(ATOMIC_AND) -INSTR2(ATOMIC_OR) -INSTR2(ATOMIC_XOR) -INSTR4F(G, ATOMIC_ADD) -INSTR4F(G, ATOMIC_SUB) -INSTR4F(G, ATOMIC_XCHG) -INSTR4F(G, ATOMIC_INC) -INSTR4F(G, ATOMIC_DEC) -INSTR4F(G, ATOMIC_CMPXCHG) -INSTR4F(G, ATOMIC_MIN) -INSTR4F(G, ATOMIC_MAX) -INSTR4F(G, ATOMIC_AND) -INSTR4F(G, ATOMIC_OR) -INSTR4F(G, ATOMIC_XOR) - -/* cat7 instructions: */ -INSTR0(BAR) -INSTR0(FENCE) - -/* ************************************************************************* */ -/* split this out or find some helper to use.. like main/bitset.h.. */ - -#include - -#define MAX_REG 256 - -typedef uint8_t regmask_t[2 * MAX_REG / 8]; - -static inline unsigned regmask_idx(struct ir3_register *reg) -{ - unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num; - debug_assert(num < MAX_REG); - if (reg->flags & IR3_REG_HALF) - num += MAX_REG; - return num; -} - -static inline void regmask_init(regmask_t *regmask) -{ - memset(regmask, 0, sizeof(*regmask)); -} - -static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) -{ - unsigned idx = regmask_idx(reg); - if (reg->flags & IR3_REG_RELATIV) { - unsigned i; - for (i = 0; i < reg->size; i++, idx++) - (*regmask)[idx / 8] |= 1 << (idx % 8); - } else { - unsigned mask; - for (mask = reg->wrmask; mask; mask >>= 1, idx++) - if (mask & 1) - (*regmask)[idx / 8] |= 1 << (idx % 8); - } -} - -static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b) -{ - unsigned i; - for (i = 0; i < ARRAY_SIZE(*dst); i++) - (*dst)[i] = (*a)[i] | (*b)[i]; -} - -/* set bits in a if not set in b, conceptually: - * a |= (reg & ~b) - */ -static inline void regmask_set_if_not(regmask_t *a, - struct ir3_register *reg, regmask_t *b) -{ - unsigned idx = regmask_idx(reg); - if (reg->flags & IR3_REG_RELATIV) { - unsigned i; - for (i = 0; i < reg->size; i++, idx++) - if (!((*b)[idx / 8] & (1 << (idx % 8)))) - (*a)[idx / 8] |= 1 << (idx % 8); - } else { - unsigned mask; - for (mask = reg->wrmask; mask; mask >>= 1, idx++) - if (mask & 1) - if (!((*b)[idx / 8] & (1 << (idx % 8)))) - (*a)[idx / 8] |= 1 << (idx % 8); - } -} - -static inline bool regmask_get(regmask_t *regmask, - struct ir3_register *reg) -{ - unsigned idx = regmask_idx(reg); - if (reg->flags & IR3_REG_RELATIV) { - unsigned i; - for (i = 0; i < reg->size; i++, idx++) - if ((*regmask)[idx / 8] & (1 << (idx % 8))) - return true; - } else { - unsigned mask; - for (mask = reg->wrmask; mask; mask >>= 1, idx++) - if (mask & 1) - if ((*regmask)[idx / 8] & (1 << (idx % 8))) - return true; - } - return false; -} - -/* ************************************************************************* */ - -#endif /* IR3_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_legalize.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_legalize.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_legalize.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_legalize.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,497 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "util/u_math.h" - -#include "freedreno_util.h" - -#include "ir3.h" - -/* - * Legalize: - * - * We currently require that scheduling ensures that we have enough nop's - * in all the right places. The legalize step mostly handles fixing up - * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's - * into fewer nop's w/ rpt flag. - */ - -struct ir3_legalize_ctx { - int num_samp; - bool has_ssbo; - int max_bary; -}; - -struct ir3_legalize_state { - regmask_t needs_ss; - regmask_t needs_ss_war; /* write after read */ - regmask_t needs_sy; -}; - -struct ir3_legalize_block_data { - bool valid; - struct ir3_legalize_state state; -}; - -/* We want to evaluate each block from the position of any other - * predecessor block, in order that the flags set are the union of - * all possible program paths. - * - * To do this, we need to know the output state (needs_ss/ss_war/sy) - * of all predecessor blocks. The tricky thing is loops, which mean - * that we can't simply recursively process each predecessor block - * before legalizing the current block. - * - * How we handle that is by looping over all the blocks until the - * results converge. If the output state of a given block changes - * in a given pass, this means that all successor blocks are not - * yet fully legalized. - */ - -static bool -legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) -{ - struct ir3_legalize_block_data *bd = block->data; - - if (bd->valid) - return false; - - struct ir3_instruction *last_input = NULL; - struct ir3_instruction *last_rel = NULL; - struct ir3_instruction *last_n = NULL; - struct list_head instr_list; - struct ir3_legalize_state prev_state = bd->state; - struct ir3_legalize_state *state = &bd->state; - - /* our input state is the OR of all predecessor blocks' state: */ - for (unsigned i = 0; i < block->predecessors_count; i++) { - struct ir3_legalize_block_data *pbd = block->predecessors[i]->data; - struct ir3_legalize_state *pstate = &pbd->state; - - /* Our input (ss)/(sy) state is based on OR'ing the output - * state of all our predecessor blocks - */ - regmask_or(&state->needs_ss, - &state->needs_ss, &pstate->needs_ss); - regmask_or(&state->needs_ss_war, - &state->needs_ss_war, &pstate->needs_ss_war); - regmask_or(&state->needs_sy, - &state->needs_sy, &pstate->needs_sy); - } - - /* remove all the instructions from the list, we'll be adding - * them back in as we go - */ - list_replace(&block->instr_list, &instr_list); - list_inithead(&block->instr_list); - - list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) { - struct ir3_register *reg; - unsigned i; - - n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY); - - if (is_meta(n)) - continue; - - if (is_input(n)) { - struct ir3_register *inloc = n->regs[1]; - assert(inloc->flags & IR3_REG_IMMED); - ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val); - } - - if (last_n && is_barrier(last_n)) - n->flags |= IR3_INSTR_SS | IR3_INSTR_SY; - - /* NOTE: consider dst register too.. it could happen that - * texture sample instruction (for example) writes some - * components which are unused. A subsequent instruction - * that writes the same register can race w/ the sam instr - * resulting in undefined results: - */ - for (i = 0; i < n->regs_count; i++) { - reg = n->regs[i]; - - if (reg_gpr(reg)) { - - /* TODO: we probably only need (ss) for alu - * instr consuming sfu result.. need to make - * some tests for both this and (sy).. - */ - if (regmask_get(&state->needs_ss, reg)) { - n->flags |= IR3_INSTR_SS; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); - } - - if (regmask_get(&state->needs_sy, reg)) { - n->flags |= IR3_INSTR_SY; - regmask_init(&state->needs_sy); - } - } - - /* TODO: is it valid to have address reg loaded from a - * relative src (ie. mova a0, c)? If so, the - * last_rel check below should be moved ahead of this: - */ - if (reg->flags & IR3_REG_RELATIV) - last_rel = n; - } - - if (n->regs_count > 0) { - reg = n->regs[0]; - if (regmask_get(&state->needs_ss_war, reg)) { - n->flags |= IR3_INSTR_SS; - regmask_init(&state->needs_ss_war); - regmask_init(&state->needs_ss); - } - - if (last_rel && (reg->num == regid(REG_A0, 0))) { - last_rel->flags |= IR3_INSTR_UL; - last_rel = NULL; - } - } - - /* cat5+ does not have an (ss) bit, if needed we need to - * insert a nop to carry the sync flag. Would be kinda - * clever if we were aware of this during scheduling, but - * this should be a pretty rare case: - */ - if ((n->flags & IR3_INSTR_SS) && (opc_cat(n->opc) >= 5)) { - struct ir3_instruction *nop; - nop = ir3_NOP(block); - nop->flags |= IR3_INSTR_SS; - n->flags &= ~IR3_INSTR_SS; - } - - /* need to be able to set (ss) on first instruction: */ - if (list_empty(&block->instr_list) && (opc_cat(n->opc) >= 5)) - ir3_NOP(block); - - if (is_nop(n) && !list_empty(&block->instr_list)) { - struct ir3_instruction *last = list_last_entry(&block->instr_list, - struct ir3_instruction, node); - if (is_nop(last) && (last->repeat < 5)) { - last->repeat++; - last->flags |= n->flags; - continue; - } - } - - list_addtail(&n->node, &block->instr_list); - - if (is_sfu(n)) - regmask_set(&state->needs_ss, n->regs[0]); - - if (is_tex(n)) { - /* this ends up being the # of samp instructions.. but that - * is ok, everything else only cares whether it is zero or - * not. We do this here, rather than when we encounter a - * SAMP decl, because (especially in binning pass shader) - * the samp instruction(s) could get eliminated if the - * result is not used. - */ - ctx->num_samp = MAX2(ctx->num_samp, n->cat5.samp + 1); - regmask_set(&state->needs_sy, n->regs[0]); - } else if (n->opc == OPC_RESINFO) { - regmask_set(&state->needs_ss, n->regs[0]); - ir3_NOP(block)->flags |= IR3_INSTR_SS; - } else if (is_load(n)) { - /* seems like ldlv needs (ss) bit instead?? which is odd but - * makes a bunch of flat-varying tests start working on a4xx. - */ - if ((n->opc == OPC_LDLV) || (n->opc == OPC_LDL)) - regmask_set(&state->needs_ss, n->regs[0]); - else - regmask_set(&state->needs_sy, n->regs[0]); - } else if (is_atomic(n->opc)) { - if (n->flags & IR3_INSTR_G) - regmask_set(&state->needs_sy, n->regs[0]); - else - regmask_set(&state->needs_ss, n->regs[0]); - } - - if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G))) - ctx->has_ssbo = true; - - /* both tex/sfu appear to not always immediately consume - * their src register(s): - */ - if (is_tex(n) || is_sfu(n) || is_mem(n)) { - foreach_src(reg, n) { - if (reg_gpr(reg)) - regmask_set(&state->needs_ss_war, reg); - } - } - - if (is_input(n)) - last_input = n; - - last_n = n; - } - - if (last_input) { - /* special hack.. if using ldlv to bypass interpolation, - * we need to insert a dummy bary.f on which we can set - * the (ei) flag: - */ - if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) { - struct ir3_instruction *baryf; - - /* (ss)bary.f (ei)r63.x, 0, r0.x */ - baryf = ir3_instr_create(block, OPC_BARY_F); - baryf->flags |= IR3_INSTR_SS; - ir3_reg_create(baryf, regid(63, 0), 0); - ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0; - ir3_reg_create(baryf, regid(0, 0), 0); - - /* insert the dummy bary.f after last_input: */ - list_delinit(&baryf->node); - list_add(&baryf->node, &last_input->node); - - last_input = baryf; - } - last_input->regs[0]->flags |= IR3_REG_EI; - } - - if (last_rel) - last_rel->flags |= IR3_INSTR_UL; - - bd->valid = true; - - if (memcmp(&prev_state, state, sizeof(*state))) { - /* our output state changed, this invalidates all of our - * successors: - */ - for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { - if (!block->successors[i]) - break; - struct ir3_legalize_block_data *pbd = block->successors[i]->data; - pbd->valid = false; - } - } - - return true; -} - -/* NOTE: branch instructions are always the last instruction(s) - * in the block. We take advantage of this as we resolve the - * branches, since "if (foo) break;" constructs turn into - * something like: - * - * block3 { - * ... - * 0029:021: mov.s32s32 r62.x, r1.y - * 0082:022: br !p0.x, target=block5 - * 0083:023: br p0.x, target=block4 - * // succs: if _[0029:021: mov.s32s32] block4; else block5; - * } - * block4 { - * 0084:024: jump, target=block6 - * // succs: block6; - * } - * block5 { - * 0085:025: jump, target=block7 - * // succs: block7; - * } - * - * ie. only instruction in block4/block5 is a jump, so when - * resolving branches we can easily detect this by checking - * that the first instruction in the target block is itself - * a jump, and setup the br directly to the jump's target - * (and strip back out the now unreached jump) - * - * TODO sometimes we end up with things like: - * - * br !p0.x, #2 - * br p0.x, #12 - * add.u r0.y, r0.y, 1 - * - * If we swapped the order of the branches, we could drop one. - */ -static struct ir3_block * -resolve_dest_block(struct ir3_block *block) -{ - /* special case for last block: */ - if (!block->successors[0]) - return block; - - /* NOTE that we may or may not have inserted the jump - * in the target block yet, so conditions to resolve - * the dest to the dest block's successor are: - * - * (1) successor[1] == NULL && - * (2) (block-is-empty || only-instr-is-jump) - */ - if (block->successors[1] == NULL) { - if (list_empty(&block->instr_list)) { - return block->successors[0]; - } else if (list_length(&block->instr_list) == 1) { - struct ir3_instruction *instr = list_first_entry( - &block->instr_list, struct ir3_instruction, node); - if (instr->opc == OPC_JUMP) - return block->successors[0]; - } - } - return block; -} - -static bool -resolve_jump(struct ir3_instruction *instr) -{ - struct ir3_block *tblock = - resolve_dest_block(instr->cat0.target); - struct ir3_instruction *target; - - if (tblock != instr->cat0.target) { - list_delinit(&instr->cat0.target->node); - instr->cat0.target = tblock; - return true; - } - - target = list_first_entry(&tblock->instr_list, - struct ir3_instruction, node); - - /* TODO maybe a less fragile way to do this. But we are expecting - * a pattern from sched_block() that looks like: - * - * br !p0.x, #else-block - * br p0.x, #if-block - * - * if the first branch target is +2, or if 2nd branch target is +1 - * then we can just drop the jump. - */ - unsigned next_block; - if (instr->cat0.inv == true) - next_block = 2; - else - next_block = 1; - - if ((!target) || (target->ip == (instr->ip + next_block))) { - list_delinit(&instr->node); - return true; - } else { - instr->cat0.immed = - (int)target->ip - (int)instr->ip; - } - return false; -} - -/* resolve jumps, removing jumps/branches to immediately following - * instruction which we end up with from earlier stages. Since - * removing an instruction can invalidate earlier instruction's - * branch offsets, we need to do this iteratively until no more - * branches are removed. - */ -static bool -resolve_jumps(struct ir3 *ir) -{ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) - if (is_flow(instr) && instr->cat0.target) - if (resolve_jump(instr)) - return true; - - return false; -} - -/* we want to mark points where divergent flow control re-converges - * with (jp) flags. For now, since we don't do any optimization for - * things that start out as a 'do {} while()', re-convergence points - * will always be a branch or jump target. Note that this is overly - * conservative, since unconditional jump targets are not convergence - * points, we are just assuming that the other path to reach the jump - * target was divergent. If we were clever enough to optimize the - * jump at end of a loop back to a conditional branch into a single - * conditional branch, ie. like: - * - * add.f r1.w, r0.x, (neg)(r)c2.x <= loop start - * mul.f r1.z, r1.z, r0.x - * mul.f r1.y, r1.y, r0.x - * mul.f r0.z, r1.x, r0.x - * mul.f r0.w, r0.y, r0.x - * cmps.f.ge r0.x, (r)c2.y, (r)r1.w - * add.s r0.x, (r)r0.x, (r)-1 - * sel.f32 r0.x, (r)c3.y, (r)r0.x, c3.x - * cmps.f.eq p0.x, r0.x, c3.y - * mov.f32f32 r0.x, r1.w - * mov.f32f32 r0.y, r0.w - * mov.f32f32 r1.x, r0.z - * (rpt2)nop - * br !p0.x, #-13 - * (jp)mul.f r0.x, c263.y, r1.y - * - * Then we'd have to be more clever, as the convergence point is no - * longer a branch or jump target. - */ -static void -mark_convergence_points(struct ir3 *ir) -{ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - if (is_flow(instr) && instr->cat0.target) { - struct ir3_instruction *target = - list_first_entry(&instr->cat0.target->instr_list, - struct ir3_instruction, node); - target->flags |= IR3_INSTR_JP; - } - } - } -} - -void -ir3_legalize(struct ir3 *ir, int *num_samp, bool *has_ssbo, int *max_bary) -{ - struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx); - bool progress; - - ctx->max_bary = -1; - - /* allocate per-block data: */ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - block->data = rzalloc(ctx, struct ir3_legalize_block_data); - } - - /* process each block: */ - do { - progress = false; - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - progress |= legalize_block(ctx, block); - } - } while (progress); - - *num_samp = ctx->num_samp; - *has_ssbo = ctx->has_ssbo; - *max_bary = ctx->max_bary; - - do { - ir3_count_instructions(ir); - } while(resolve_jumps(ir)); - - mark_convergence_points(ir); - - ralloc_free(ctx); -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,272 +0,0 @@ -/* - * Copyright (C) 2015 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - - -#include "freedreno_util.h" - -#include "ir3_nir.h" -#include "ir3_compiler.h" -#include "ir3_shader.h" - -#include "nir/tgsi_to_nir.h" - - -static const nir_shader_compiler_options options = { - .lower_fpow = true, - .lower_scmp = true, - .lower_flrp32 = true, - .lower_flrp64 = true, - .lower_ffract = true, - .lower_fmod32 = true, - .lower_fmod64 = true, - .lower_fdiv = true, - .lower_ldexp = true, - .fuse_ffma = true, - .native_integers = true, - .vertex_id_zero_based = true, - .lower_extract_byte = true, - .lower_extract_word = true, - .lower_all_io_to_temps = true, - .lower_helper_invocation = true, -}; - -struct nir_shader * -ir3_tgsi_to_nir(const struct tgsi_token *tokens) -{ - return tgsi_to_nir(tokens, &options); -} - -const nir_shader_compiler_options * -ir3_get_compiler_options(struct ir3_compiler *compiler) -{ - return &options; -} - -/* for given shader key, are any steps handled in nir? */ -bool -ir3_key_lowers_nir(const struct ir3_shader_key *key) -{ - return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r | - key->vsaturate_s | key->vsaturate_t | key->vsaturate_r | - key->ucp_enables | key->color_two_side | - key->fclamp_color | key->vclamp_color; -} - -#define OPT(nir, pass, ...) ({ \ - bool this_progress = false; \ - NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ - this_progress; \ -}) - -#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) - -static void -ir3_optimize_loop(nir_shader *s) -{ - bool progress; - do { - progress = false; - - OPT_V(s, nir_lower_vars_to_ssa); - progress |= OPT(s, nir_opt_copy_prop_vars); - progress |= OPT(s, nir_opt_dead_write_vars); - progress |= OPT(s, nir_lower_alu_to_scalar); - progress |= OPT(s, nir_lower_phis_to_scalar); - - progress |= OPT(s, nir_copy_prop); - progress |= OPT(s, nir_opt_dce); - progress |= OPT(s, nir_opt_cse); - static int gcm = -1; - if (gcm == -1) - gcm = env2u("GCM"); - if (gcm == 1) - progress |= OPT(s, nir_opt_gcm, true); - else if (gcm == 2) - progress |= OPT(s, nir_opt_gcm, false); - progress |= OPT(s, nir_opt_peephole_select, 16); - progress |= OPT(s, nir_opt_intrinsics); - progress |= OPT(s, nir_opt_algebraic); - progress |= OPT(s, nir_opt_constant_folding); - progress |= OPT(s, nir_opt_dead_cf); - if (OPT(s, nir_opt_trivial_continues)) { - progress |= true; - /* If nir_opt_trivial_continues makes progress, then we need to clean - * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll - * to make progress. - */ - OPT(s, nir_copy_prop); - OPT(s, nir_opt_dce); - } - progress |= OPT(s, nir_opt_if); - progress |= OPT(s, nir_opt_remove_phis); - progress |= OPT(s, nir_opt_undef); - - } while (progress); -} - -struct nir_shader * -ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, - const struct ir3_shader_key *key) -{ - struct nir_lower_tex_options tex_options = { - .lower_rect = 0, - }; - - if (key) { - switch (shader->type) { - case SHADER_FRAGMENT: - tex_options.saturate_s = key->fsaturate_s; - tex_options.saturate_t = key->fsaturate_t; - tex_options.saturate_r = key->fsaturate_r; - break; - case SHADER_VERTEX: - tex_options.saturate_s = key->vsaturate_s; - tex_options.saturate_t = key->vsaturate_t; - tex_options.saturate_r = key->vsaturate_r; - break; - default: - /* TODO */ - break; - } - } - - if (shader->compiler->gpu_id >= 400) { - /* a4xx seems to have *no* sam.p */ - tex_options.lower_txp = ~0; /* lower all txp */ - } else { - /* a3xx just needs to avoid sam.p for 3d tex */ - tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D); - } - - if (fd_mesa_debug & FD_DBG_DISASM) { - debug_printf("----------------------\n"); - nir_print_shader(s, stdout); - debug_printf("----------------------\n"); - } - - OPT_V(s, nir_opt_global_to_local); - OPT_V(s, nir_lower_regs_to_ssa); - - if (key) { - if (s->info.stage == MESA_SHADER_VERTEX) { - OPT_V(s, nir_lower_clip_vs, key->ucp_enables); - if (key->vclamp_color) - OPT_V(s, nir_lower_clamp_color_outputs); - } else if (s->info.stage == MESA_SHADER_FRAGMENT) { - OPT_V(s, nir_lower_clip_fs, key->ucp_enables); - if (key->fclamp_color) - OPT_V(s, nir_lower_clamp_color_outputs); - } - if (key->color_two_side) { - OPT_V(s, nir_lower_two_sided_color); - } - } else { - /* only want to do this the first time (when key is null) - * and not again on any potential 2nd variant lowering pass: - */ - OPT_V(s, ir3_nir_apply_trig_workarounds); - } - - OPT_V(s, nir_lower_tex, &tex_options); - OPT_V(s, nir_lower_load_const_to_scalar); - if (shader->compiler->gpu_id < 500) - OPT_V(s, ir3_nir_lower_tg4_to_tex); - - ir3_optimize_loop(s); - - /* do idiv lowering after first opt loop to give a chance for - * divide by immed power-of-two to be caught first: - */ - if (OPT(s, nir_lower_idiv)) - ir3_optimize_loop(s); - - OPT_V(s, nir_remove_dead_variables, nir_var_local); - - OPT_V(s, nir_move_load_const); - - if (fd_mesa_debug & FD_DBG_DISASM) { - debug_printf("----------------------\n"); - nir_print_shader(s, stdout); - debug_printf("----------------------\n"); - } - - nir_sweep(s); - - return s; -} - -void -ir3_nir_scan_driver_consts(nir_shader *shader, - struct ir3_driver_const_layout *layout) -{ - nir_foreach_function(function, shader) { - if (!function->impl) - continue; - - nir_foreach_block(block, function->impl) { - nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intr = - nir_instr_as_intrinsic(instr); - unsigned idx; - - switch (intr->intrinsic) { - case nir_intrinsic_get_buffer_size: - idx = nir_src_as_const_value(intr->src[0])->u32[0]; - if (layout->ssbo_size.mask & (1 << idx)) - break; - layout->ssbo_size.mask |= (1 << idx); - layout->ssbo_size.off[idx] = - layout->ssbo_size.count; - layout->ssbo_size.count += 1; /* one const per */ - break; - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_min: - case nir_intrinsic_image_deref_atomic_max: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: - case nir_intrinsic_image_deref_store: - case nir_intrinsic_image_deref_size: - idx = nir_intrinsic_get_var(intr, 0)->data.driver_location; - if (layout->image_dims.mask & (1 << idx)) - break; - layout->image_dims.mask |= (1 << idx); - layout->image_dims.off[idx] = - layout->image_dims.count; - layout->image_dims.count += 3; /* three const per */ - break; - default: - break; - } - } - } - } -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir.h --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2015 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#ifndef IR3_NIR_H_ -#define IR3_NIR_H_ - -#include "compiler/nir/nir.h" -#include "compiler/shader_enums.h" - -#include "ir3_shader.h" - -void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout); - -bool ir3_nir_apply_trig_workarounds(nir_shader *shader); -bool ir3_nir_lower_tg4_to_tex(nir_shader *shader); - -struct nir_shader * ir3_tgsi_to_nir(const struct tgsi_token *tokens); -const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler); -bool ir3_key_lowers_nir(const struct ir3_shader_key *key); -struct nir_shader * ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, - const struct ir3_shader_key *key); - -#endif /* IR3_NIR_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,138 +0,0 @@ -/* - * Copyright © 2017 Ilia Mirkin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "ir3_nir.h" -#include "compiler/nir/nir_builder.h" - -/* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the - * gather results, rather than before. As a result, it must be emulated with - * direct texture calls. - */ - -static bool -lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx) -{ - bool progress = false; - - static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} }; - - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_tex) - continue; - - nir_tex_instr *tg4 = (nir_tex_instr *)instr; - - if (tg4->op != nir_texop_tg4) - continue; - - b->cursor = nir_before_instr(&tg4->instr); - - nir_ssa_def *results[4]; - int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset); - for (int i = 0; i < 4; i++) { - int num_srcs = tg4->num_srcs + 1 /* lod */; - if (offset_index < 0 && i < 3) - num_srcs++; - - nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs); - tex->op = nir_texop_txl; - tex->sampler_dim = tg4->sampler_dim; - tex->coord_components = tg4->coord_components; - tex->is_array = tg4->is_array; - tex->is_shadow = tg4->is_shadow; - tex->is_new_style_shadow = tg4->is_new_style_shadow; - tex->texture_index = tg4->texture_index; - tex->sampler_index = tg4->sampler_index; - tex->dest_type = tg4->dest_type; - - for (int j = 0; j < tg4->num_srcs; j++) { - nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex); - tex->src[j].src_type = tg4->src[j].src_type; - } - if (i != 3) { - nir_ssa_def *offset = - nir_vec2(b, nir_imm_int(b, offsets[i][0]), - nir_imm_int(b, offsets[i][1])); - if (offset_index < 0) { - tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset); - tex->src[tg4->num_srcs].src_type = nir_tex_src_offset; - } else { - assert(nir_tex_instr_src_size(tex, offset_index) == 2); - nir_ssa_def *orig = nir_ssa_for_src( - b, tex->src[offset_index].src, 2); - tex->src[offset_index].src = - nir_src_for_ssa(nir_iadd(b, orig, offset)); - } - } - tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0)); - tex->src[num_srcs - 1].src_type = nir_tex_src_lod; - - nir_ssa_dest_init(&tex->instr, &tex->dest, - nir_tex_instr_dest_size(tex), 32, NULL); - nir_builder_instr_insert(b, &tex->instr); - - results[i] = nir_channel(b, &tex->dest.ssa, tg4->component); - } - - nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]); - nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result)); - - nir_instr_remove(&tg4->instr); - - progress = true; - } - - return progress; -} - -static bool -lower_tg4_func(nir_function_impl *impl) -{ - void *mem_ctx = ralloc_parent(impl); - nir_builder b; - nir_builder_init(&b, impl); - - bool progress = false; - nir_foreach_block_safe(block, impl) { - progress |= lower_tg4(block, &b, mem_ctx); - } - - if (progress) - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - return progress; -} - -bool -ir3_nir_lower_tg4_to_tex(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(function, shader) { - if (function->impl) - progress |= lower_tg4_func(function->impl); - } - - return progress; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_nir_trig.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -# -# Copyright (C) 2016 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -from __future__ import print_function - -import argparse -import sys - -trig_workarounds = [ - (('fsin', 'x'), ('fsin', ('fsub', ('fmul', 6.283185, ('ffract', ('fadd', ('fmul', 0.159155, 'x'), 0.5))), 3.141593))), - (('fcos', 'x'), ('fcos', ('fsub', ('fmul', 6.283185, ('ffract', ('fadd', ('fmul', 0.159155, 'x'), 0.5))), 3.141593))), -] - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('-p', '--import-path', required=True) - args = parser.parse_args() - sys.path.insert(0, args.import_path) - run() - - -def run(): - import nir_algebraic # pylint: disable=import-error - - print('#include "ir3_nir.h"') - print(nir_algebraic.AlgebraicPass("ir3_nir_apply_trig_workarounds", - trig_workarounds).render()) - - -if __name__ == '__main__': - main() diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_print.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_print.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_print.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_print.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,264 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include -#include - -#include "ir3.h" - -#define PTRID(x) ((unsigned long)(x)) - -static void print_instr_name(struct ir3_instruction *instr) -{ - if (!instr) - return; -#ifdef DEBUG - printf("%04u:", instr->serialno); -#endif - printf("%04u:", instr->name); - printf("%04u:", instr->ip); - printf("%03u: ", instr->depth); - - if (instr->flags & IR3_INSTR_SY) - printf("(sy)"); - if (instr->flags & IR3_INSTR_SS) - printf("(ss)"); - - if (is_meta(instr)) { - switch (instr->opc) { - case OPC_META_INPUT: printf("_meta:in"); break; - case OPC_META_FO: printf("_meta:fo"); break; - case OPC_META_FI: printf("_meta:fi"); break; - - /* shouldn't hit here.. just for debugging: */ - default: printf("_meta:%d", instr->opc); break; - } - } else if (instr->opc == OPC_MOV) { - static const char *type[] = { - [TYPE_F16] = "f16", - [TYPE_F32] = "f32", - [TYPE_U16] = "u16", - [TYPE_U32] = "u32", - [TYPE_S16] = "s16", - [TYPE_S32] = "s32", - [TYPE_U8] = "u8", - [TYPE_S8] = "s8", - }; - if (instr->cat1.src_type == instr->cat1.dst_type) - printf("mov"); - else - printf("cov"); - printf(".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]); - } else { - printf("%s", ir3_instr_name(instr)); - if (instr->flags & IR3_INSTR_3D) - printf(".3d"); - if (instr->flags & IR3_INSTR_A) - printf(".a"); - if (instr->flags & IR3_INSTR_O) - printf(".o"); - if (instr->flags & IR3_INSTR_P) - printf(".p"); - if (instr->flags & IR3_INSTR_S) - printf(".s"); - if (instr->flags & IR3_INSTR_S2EN) - printf(".s2en"); - } -} - -static void print_reg_name(struct ir3_register *reg) -{ - if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) && - (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))) - printf("(absneg)"); - else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)) - printf("(neg)"); - else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) - printf("(abs)"); - - if (reg->flags & IR3_REG_IMMED) { - printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val); - } else if (reg->flags & IR3_REG_ARRAY) { - printf("arr[id=%u, offset=%d, size=%u", reg->array.id, - reg->array.offset, reg->size); - /* for ARRAY we could have null src, for example first write - * instruction.. - */ - if (reg->instr) { - printf(", _["); - print_instr_name(reg->instr); - printf("]"); - } - printf("]"); - } else if (reg->flags & IR3_REG_SSA) { - printf("_["); - print_instr_name(reg->instr); - printf("]"); - } else if (reg->flags & IR3_REG_RELATIV) { - if (reg->flags & IR3_REG_HALF) - printf("h"); - if (reg->flags & IR3_REG_CONST) - printf("c", reg->array.offset); - else - printf("\x1b[0;31mr\x1b[0m (%u)", reg->array.offset, reg->size); - } else { - if (reg->flags & IR3_REG_HALF) - printf("h"); - if (reg->flags & IR3_REG_CONST) - printf("c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]); - else - printf("\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]); - } -} - -static void -tab(int lvl) -{ - for (int i = 0; i < lvl; i++) - printf("\t"); -} - -static void -print_instr(struct ir3_instruction *instr, int lvl) -{ - unsigned i; - - tab(lvl); - - print_instr_name(instr); - for (i = 0; i < instr->regs_count; i++) { - struct ir3_register *reg = instr->regs[i]; - printf(i ? ", " : " "); - print_reg_name(reg); - } - - if (instr->address) { - printf(", address=_"); - printf("["); - print_instr_name(instr->address); - printf("]"); - } - - if (instr->cp.left) { - printf(", left=_"); - printf("["); - print_instr_name(instr->cp.left); - printf("]"); - } - - if (instr->cp.right) { - printf(", right=_"); - printf("["); - print_instr_name(instr->cp.right); - printf("]"); - } - - if (instr->opc == OPC_META_FO) { - printf(", off=%d", instr->fo.off); - } - - if (is_flow(instr) && instr->cat0.target) { - /* the predicate register src is implied: */ - if (instr->opc == OPC_BR) { - printf(" %sp0.x", instr->cat0.inv ? "!" : ""); - } - printf(", target=block%u", block_id(instr->cat0.target)); - } - - if (instr->deps_count) { - printf(", false-deps:"); - for (unsigned i = 0; i < instr->deps_count; i++) { - if (i > 0) - printf(", "); - printf("_["); - print_instr_name(instr->deps[i]); - printf("]"); - } - } - - printf("\n"); -} - -void ir3_print_instr(struct ir3_instruction *instr) -{ - print_instr(instr, 0); -} - -static void -print_block(struct ir3_block *block, int lvl) -{ - tab(lvl); printf("block%u {\n", block_id(block)); - - if (block->predecessors_count > 0) { - tab(lvl+1); - printf("pred: "); - for (unsigned i = 0; i < block->predecessors_count; i++) { - if (i) - printf(", "); - printf("block%u", block_id(block->predecessors[i])); - } - printf("\n"); - } - - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - print_instr(instr, lvl+1); - } - - tab(lvl+1); printf("/* keeps:\n"); - for (unsigned i = 0; i < block->keeps_count; i++) { - print_instr(block->keeps[i], lvl+2); - } - tab(lvl+1); printf(" */\n"); - - if (block->successors[1]) { - /* leading into if/else: */ - tab(lvl+1); - printf("/* succs: if _["); - print_instr_name(block->condition); - printf("] block%u; else block%u; */\n", - block_id(block->successors[0]), - block_id(block->successors[1])); - } else if (block->successors[0]) { - tab(lvl+1); - printf("/* succs: block%u; */\n", - block_id(block->successors[0])); - } - tab(lvl); printf("}\n"); -} - -void -ir3_print(struct ir3 *ir) -{ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) - print_block(block, 0); - - for (unsigned i = 0; i < ir->noutputs; i++) { - if (!ir->outputs[i]) - continue; - printf("out%d: ", i); - print_instr(ir->outputs[i], 0); - } -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_ra.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_ra.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_ra.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_ra.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1126 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "util/u_math.h" -#include "util/register_allocate.h" -#include "util/ralloc.h" -#include "util/bitset.h" - -#include "freedreno_util.h" - -#include "ir3.h" -#include "ir3_compiler.h" - -/* - * Register Assignment: - * - * Uses the register_allocate util, which implements graph coloring - * algo with interference classes. To handle the cases where we need - * consecutive registers (for example, texture sample instructions), - * we model these as larger (double/quad/etc) registers which conflict - * with the corresponding registers in other classes. - * - * Additionally we create additional classes for half-regs, which - * do not conflict with the full-reg classes. We do need at least - * sizes 1-4 (to deal w/ texture sample instructions output to half- - * reg). At the moment we don't create the higher order half-reg - * classes as half-reg frequently does not have enough precision - * for texture coords at higher resolutions. - * - * There are some additional cases that we need to handle specially, - * as the graph coloring algo doesn't understand "partial writes". - * For example, a sequence like: - * - * add r0.z, ... - * sam (f32)(xy)r0.x, ... - * ... - * sam (f32)(xyzw)r0.w, r0.x, ... ; 3d texture, so r0.xyz are coord - * - * In this scenario, we treat r0.xyz as class size 3, which is written - * (from a use/def perspective) at the 'add' instruction and ignore the - * subsequent partial writes to r0.xy. So the 'add r0.z, ...' is the - * defining instruction, as it is the first to partially write r0.xyz. - * - * Note i965 has a similar scenario, which they solve with a virtual - * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after - * register assignment. But for us that is horrible from a scheduling - * standpoint. Instead what we do is use idea of 'definer' instruction. - * Ie. the first instruction (lowest ip) to write to the variable is the - * one we consider from use/def perspective when building interference - * graph. (Other instructions which write other variable components - * just define the variable some more.) - * - * Arrays of arbitrary size are handled via pre-coloring a consecutive - * sequence of registers. Additional scalar (single component) reg - * names are allocated starting at ctx->class_base[total_class_count] - * (see arr->base), which are pre-colored. In the use/def graph direct - * access is treated as a single element use/def, and indirect access - * is treated as use or def of all array elements. (Only the first - * def is tracked, in case of multiple indirect writes, etc.) - * - * TODO arrays that fit in one of the pre-defined class sizes should - * not need to be pre-colored, but instead could be given a normal - * vreg name. (Ignoring this for now since it is a good way to work - * out the kinks with arbitrary sized arrays.) - * - * TODO might be easier for debugging to split this into two passes, - * the first assigning vreg names in a way that we could ir3_print() - * the result. - */ - -static const unsigned class_sizes[] = { - 1, 2, 3, 4, - 4 + 4, /* txd + 1d/2d */ - 4 + 6, /* txd + 3d */ -}; -#define class_count ARRAY_SIZE(class_sizes) - -static const unsigned half_class_sizes[] = { - 1, 2, 3, 4, -}; -#define half_class_count ARRAY_SIZE(half_class_sizes) - -/* seems to just be used for compute shaders? Seems like vec1 and vec3 - * are sufficient (for now?) - */ -static const unsigned high_class_sizes[] = { - 1, 3, -}; -#define high_class_count ARRAY_SIZE(high_class_sizes) - -#define total_class_count (class_count + half_class_count + high_class_count) - -/* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */ -#define NUM_REGS (4 * 48) /* r0 to r47 */ -#define NUM_HIGH_REGS (4 * 8) /* r48 to r55 */ -#define FIRST_HIGH_REG (4 * 48) -/* Number of virtual regs in a given class: */ -#define CLASS_REGS(i) (NUM_REGS - (class_sizes[i] - 1)) -#define HALF_CLASS_REGS(i) (NUM_REGS - (half_class_sizes[i] - 1)) -#define HIGH_CLASS_REGS(i) (NUM_HIGH_REGS - (high_class_sizes[i] - 1)) - -#define HALF_OFFSET (class_count) -#define HIGH_OFFSET (class_count + half_class_count) - -/* register-set, created one time, used for all shaders: */ -struct ir3_ra_reg_set { - struct ra_regs *regs; - unsigned int classes[class_count]; - unsigned int half_classes[half_class_count]; - unsigned int high_classes[high_class_count]; - /* maps flat virtual register space to base gpr: */ - uint16_t *ra_reg_to_gpr; - /* maps cls,gpr to flat virtual register space: */ - uint16_t **gpr_to_ra_reg; -}; - -static void -build_q_values(unsigned int **q_values, unsigned off, - const unsigned *sizes, unsigned count) -{ - for (unsigned i = 0; i < count; i++) { - q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count); - - /* From register_allocate.c: - * - * q(B,C) (indexed by C, B is this register class) in - * Runeson/Nyström paper. This is "how many registers of B could - * the worst choice register from C conflict with". - * - * If we just let the register allocation algorithm compute these - * values, is extremely expensive. However, since all of our - * registers are laid out, we can very easily compute them - * ourselves. View the register from C as fixed starting at GRF n - * somewhere in the middle, and the register from B as sliding back - * and forth. Then the first register to conflict from B is the - * one starting at n - class_size[B] + 1 and the last register to - * conflict will start at n + class_size[B] - 1. Therefore, the - * number of conflicts from B is class_size[B] + class_size[C] - 1. - * - * +-+-+-+-+-+-+ +-+-+-+-+-+-+ - * B | | | | | |n| --> | | | | | | | - * +-+-+-+-+-+-+ +-+-+-+-+-+-+ - * +-+-+-+-+-+ - * C |n| | | | | - * +-+-+-+-+-+ - * - * (Idea copied from brw_fs_reg_allocate.cpp) - */ - for (unsigned j = 0; j < count; j++) - q_values[i + off][j + off] = sizes[i] + sizes[j] - 1; - } -} - -/* One-time setup of RA register-set, which describes all the possible - * "virtual" registers and their interferences. Ie. double register - * occupies (and conflicts with) two single registers, and so forth. - * Since registers do not need to be aligned to their class size, they - * can conflict with other registers in the same class too. Ie: - * - * Single (base) | Double - * --------------+--------------- - * R0 | D0 - * R1 | D0 D1 - * R2 | D1 D2 - * R3 | D2 - * .. and so on.. - * - * (NOTE the disassembler uses notation like r0.x/y/z/w but those are - * really just four scalar registers. Don't let that confuse you.) - */ -struct ir3_ra_reg_set * -ir3_ra_alloc_reg_set(struct ir3_compiler *compiler) -{ - struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set); - unsigned ra_reg_count, reg, first_half_reg, first_high_reg, base; - unsigned int **q_values; - - /* calculate # of regs across all classes: */ - ra_reg_count = 0; - for (unsigned i = 0; i < class_count; i++) - ra_reg_count += CLASS_REGS(i); - for (unsigned i = 0; i < half_class_count; i++) - ra_reg_count += HALF_CLASS_REGS(i); - for (unsigned i = 0; i < high_class_count; i++) - ra_reg_count += HIGH_CLASS_REGS(i); - - /* allocate and populate q_values: */ - q_values = ralloc_array(set, unsigned *, total_class_count); - - build_q_values(q_values, 0, class_sizes, class_count); - build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count); - build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count); - - /* allocate the reg-set.. */ - set->regs = ra_alloc_reg_set(set, ra_reg_count, true); - set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count); - set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count); - - /* .. and classes */ - reg = 0; - for (unsigned i = 0; i < class_count; i++) { - set->classes[i] = ra_alloc_reg_class(set->regs); - - set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i)); - - for (unsigned j = 0; j < CLASS_REGS(i); j++) { - ra_class_add_reg(set->regs, set->classes[i], reg); - - set->ra_reg_to_gpr[reg] = j; - set->gpr_to_ra_reg[i][j] = reg; - - for (unsigned br = j; br < j + class_sizes[i]; br++) - ra_add_transitive_reg_conflict(set->regs, br, reg); - - reg++; - } - } - - first_half_reg = reg; - base = HALF_OFFSET; - - for (unsigned i = 0; i < half_class_count; i++) { - set->half_classes[i] = ra_alloc_reg_class(set->regs); - - set->gpr_to_ra_reg[base + i] = - ralloc_array(set, uint16_t, HALF_CLASS_REGS(i)); - - for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) { - ra_class_add_reg(set->regs, set->half_classes[i], reg); - - set->ra_reg_to_gpr[reg] = j; - set->gpr_to_ra_reg[base + i][j] = reg; - - for (unsigned br = j; br < j + half_class_sizes[i]; br++) - ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg); - - reg++; - } - } - - first_high_reg = reg; - base = HIGH_OFFSET; - - for (unsigned i = 0; i < high_class_count; i++) { - set->high_classes[i] = ra_alloc_reg_class(set->regs); - - set->gpr_to_ra_reg[base + i] = - ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i)); - - for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) { - ra_class_add_reg(set->regs, set->high_classes[i], reg); - - set->ra_reg_to_gpr[reg] = j; - set->gpr_to_ra_reg[base + i][j] = reg; - - for (unsigned br = j; br < j + high_class_sizes[i]; br++) - ra_add_transitive_reg_conflict(set->regs, br + first_high_reg, reg); - - reg++; - } - } - - /* starting a6xx, half precision regs conflict w/ full precision regs: */ - if (compiler->gpu_id >= 600) { - /* because of transitivity, we can get away with just setting up - * conflicts between the first class of full and half regs: - */ - for (unsigned j = 0; j < CLASS_REGS(0) / 2; j++) { - unsigned freg = set->gpr_to_ra_reg[0][j]; - unsigned hreg0 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 0]; - unsigned hreg1 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 1]; - - ra_add_transitive_reg_conflict(set->regs, freg, hreg0); - ra_add_transitive_reg_conflict(set->regs, freg, hreg1); - } - - // TODO also need to update q_values, but for now: - ra_set_finalize(set->regs, NULL); - } else { - ra_set_finalize(set->regs, q_values); - } - - ralloc_free(q_values); - - return set; -} - -/* additional block-data (per-block) */ -struct ir3_ra_block_data { - BITSET_WORD *def; /* variables defined before used in block */ - BITSET_WORD *use; /* variables used before defined in block */ - BITSET_WORD *livein; /* which defs reach entry point of block */ - BITSET_WORD *liveout; /* which defs reach exit point of block */ -}; - -/* additional instruction-data (per-instruction) */ -struct ir3_ra_instr_data { - /* cached instruction 'definer' info: */ - struct ir3_instruction *defn; - int off, sz, cls; -}; - -/* register-assign context, per-shader */ -struct ir3_ra_ctx { - struct ir3 *ir; - enum shader_t type; - bool frag_face; - - struct ir3_ra_reg_set *set; - struct ra_graph *g; - unsigned alloc_count; - /* one per class, plus one slot for arrays: */ - unsigned class_alloc_count[total_class_count + 1]; - unsigned class_base[total_class_count + 1]; - unsigned instr_cnt; - unsigned *def, *use; /* def/use table */ - struct ir3_ra_instr_data *instrd; -}; - -/* does it conflict? */ -static inline bool -intersects(unsigned a_start, unsigned a_end, unsigned b_start, unsigned b_end) -{ - return !((a_start >= b_end) || (b_start >= a_end)); -} - -static bool -is_half(struct ir3_instruction *instr) -{ - return !!(instr->regs[0]->flags & IR3_REG_HALF); -} - -static bool -is_high(struct ir3_instruction *instr) -{ - return !!(instr->regs[0]->flags & IR3_REG_HIGH); -} - -static int -size_to_class(unsigned sz, bool half, bool high) -{ - if (high) { - for (unsigned i = 0; i < high_class_count; i++) - if (high_class_sizes[i] >= sz) - return i + HIGH_OFFSET; - } else if (half) { - for (unsigned i = 0; i < half_class_count; i++) - if (half_class_sizes[i] >= sz) - return i + HALF_OFFSET; - } else { - for (unsigned i = 0; i < class_count; i++) - if (class_sizes[i] >= sz) - return i; - } - debug_assert(0); - return -1; -} - -static bool -writes_gpr(struct ir3_instruction *instr) -{ - if (is_store(instr)) - return false; - /* is dest a normal temp register: */ - struct ir3_register *reg = instr->regs[0]; - if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)) - return false; - if ((reg->num == regid(REG_A0, 0)) || - (reg->num == regid(REG_P0, 0))) - return false; - return true; -} - -static bool -instr_before(struct ir3_instruction *a, struct ir3_instruction *b) -{ - if (a->flags & IR3_INSTR_UNUSED) - return false; - return (a->ip < b->ip); -} - -static struct ir3_instruction * -get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, - int *sz, int *off) -{ - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - struct ir3_instruction *d = NULL; - - if (id->defn) { - *sz = id->sz; - *off = id->off; - return id->defn; - } - - if (instr->opc == OPC_META_FI) { - /* What about the case where collect is subset of array, we - * need to find the distance between where actual array starts - * and fanin.. that probably doesn't happen currently. - */ - struct ir3_register *src; - int dsz, doff; - - /* note: don't use foreach_ssa_src as this gets called once - * while assigning regs (which clears SSA flag) - */ - foreach_src_n(src, n, instr) { - struct ir3_instruction *dd; - if (!src->instr) - continue; - - dd = get_definer(ctx, src->instr, &dsz, &doff); - - if ((!d) || instr_before(dd, d)) { - d = dd; - *sz = dsz; - *off = doff - n; - } - } - - } else if (instr->cp.right || instr->cp.left) { - /* covers also the meta:fo case, which ends up w/ single - * scalar instructions for each component: - */ - struct ir3_instruction *f = ir3_neighbor_first(instr); - - /* by definition, the entire sequence forms one linked list - * of single scalar register nodes (even if some of them may - * be fanouts from a texture sample (for example) instr. We - * just need to walk the list finding the first element of - * the group defined (lowest ip) - */ - int cnt = 0; - - /* need to skip over unused in the group: */ - while (f && (f->flags & IR3_INSTR_UNUSED)) { - f = f->cp.right; - cnt++; - } - - while (f) { - if ((!d) || instr_before(f, d)) - d = f; - if (f == instr) - *off = cnt; - f = f->cp.right; - cnt++; - } - - *sz = cnt; - - } else { - /* second case is looking directly at the instruction which - * produces multiple values (eg, texture sample), rather - * than the fanout nodes that point back to that instruction. - * This isn't quite right, because it may be part of a larger - * group, such as: - * - * sam (f32)(xyzw)r0.x, ... - * add r1.x, ... - * add r1.y, ... - * sam (f32)(xyzw)r2.x, r0.w <-- (r0.w, r1.x, r1.y) - * - * need to come up with a better way to handle that case. - */ - if (instr->address) { - *sz = instr->regs[0]->size; - } else { - *sz = util_last_bit(instr->regs[0]->wrmask); - } - *off = 0; - d = instr; - } - - if (d->opc == OPC_META_FO) { - struct ir3_instruction *dd; - int dsz, doff; - - dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff); - - /* by definition, should come before: */ - debug_assert(instr_before(dd, d)); - - *sz = MAX2(*sz, dsz); - - debug_assert(instr->opc == OPC_META_FO); - *off = MAX2(*off, instr->fo.off); - - d = dd; - } - - id->defn = d; - id->sz = *sz; - id->off = *off; - - return d; -} - -static void -ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block) -{ - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - if (instr->regs_count == 0) - continue; - /* couple special cases: */ - if (writes_addr(instr) || writes_pred(instr)) { - id->cls = -1; - } else if (instr->regs[0]->flags & IR3_REG_ARRAY) { - id->cls = total_class_count; - } else { - id->defn = get_definer(ctx, instr, &id->sz, &id->off); - id->cls = size_to_class(id->sz, is_half(id->defn), is_high(id->defn)); - } - } -} - -/* give each instruction a name (and ip), and count up the # of names - * of each class - */ -static void -ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block) -{ - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - -#ifdef DEBUG - instr->name = ~0; -#endif - - ctx->instr_cnt++; - - if (instr->regs_count == 0) - continue; - - if (!writes_gpr(instr)) - continue; - - if (id->defn != instr) - continue; - - /* arrays which don't fit in one of the pre-defined class - * sizes are pre-colored: - */ - if ((id->cls >= 0) && (id->cls < total_class_count)) { - instr->name = ctx->class_alloc_count[id->cls]++; - ctx->alloc_count++; - } - } -} - -static void -ra_init(struct ir3_ra_ctx *ctx) -{ - unsigned n, base; - - ir3_clear_mark(ctx->ir); - n = ir3_count_instructions(ctx->ir); - - ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n); - - list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { - ra_block_find_definers(ctx, block); - } - - list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { - ra_block_name_instructions(ctx, block); - } - - /* figure out the base register name for each class. The - * actual ra name is class_base[cls] + instr->name; - */ - ctx->class_base[0] = 0; - for (unsigned i = 1; i <= total_class_count; i++) { - ctx->class_base[i] = ctx->class_base[i-1] + - ctx->class_alloc_count[i-1]; - } - - /* and vreg names for array elements: */ - base = ctx->class_base[total_class_count]; - list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { - arr->base = base; - ctx->class_alloc_count[total_class_count] += arr->length; - base += arr->length; - } - ctx->alloc_count += ctx->class_alloc_count[total_class_count]; - - ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count); - ralloc_steal(ctx->g, ctx->instrd); - ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); - ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); -} - -static unsigned -__ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn) -{ - unsigned name; - debug_assert(cls >= 0); - debug_assert(cls < total_class_count); /* we shouldn't get arrays here.. */ - name = ctx->class_base[cls] + defn->name; - debug_assert(name < ctx->alloc_count); - return name; -} - -static int -ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id) -{ - /* TODO handle name mapping for arrays */ - return __ra_name(ctx, id->cls, id->defn); -} - -static void -ra_destroy(struct ir3_ra_ctx *ctx) -{ - ralloc_free(ctx->g); -} - -static void -ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) -{ - struct ir3_ra_block_data *bd; - unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); - -#define def(name, instr) \ - do { \ - /* defined on first write: */ \ - if (!ctx->def[name]) \ - ctx->def[name] = instr->ip; \ - ctx->use[name] = instr->ip; \ - BITSET_SET(bd->def, name); \ - } while(0); - -#define use(name, instr) \ - do { \ - ctx->use[name] = MAX2(ctx->use[name], instr->ip); \ - if (!BITSET_TEST(bd->def, name)) \ - BITSET_SET(bd->use, name); \ - } while(0); - - bd = rzalloc(ctx->g, struct ir3_ra_block_data); - - bd->def = rzalloc_array(bd, BITSET_WORD, bitset_words); - bd->use = rzalloc_array(bd, BITSET_WORD, bitset_words); - bd->livein = rzalloc_array(bd, BITSET_WORD, bitset_words); - bd->liveout = rzalloc_array(bd, BITSET_WORD, bitset_words); - - block->data = bd; - - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - struct ir3_instruction *src; - struct ir3_register *reg; - - if (instr->regs_count == 0) - continue; - - /* There are a couple special cases to deal with here: - * - * fanout: used to split values from a higher class to a lower - * class, for example split the results of a texture fetch - * into individual scalar values; We skip over these from - * a 'def' perspective, and for a 'use' we walk the chain - * up to the defining instruction. - * - * fanin: used to collect values from lower class and assemble - * them together into a higher class, for example arguments - * to texture sample instructions; We consider these to be - * defined at the earliest fanin source. - * - * Most of this is handled in the get_definer() helper. - * - * In either case, we trace the instruction back to the original - * definer and consider that as the def/use ip. - */ - - if (writes_gpr(instr)) { - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - struct ir3_register *dst = instr->regs[0]; - - if (dst->flags & IR3_REG_ARRAY) { - struct ir3_array *arr = - ir3_lookup_array(ctx->ir, dst->array.id); - unsigned i; - - arr->start_ip = MIN2(arr->start_ip, instr->ip); - arr->end_ip = MAX2(arr->end_ip, instr->ip); - - /* set the node class now.. in case we don't encounter - * this array dst again. From register_alloc algo's - * perspective, these are all single/scalar regs: - */ - for (i = 0; i < arr->length; i++) { - unsigned name = arr->base + i; - ra_set_node_class(ctx->g, name, ctx->set->classes[0]); - } - - /* indirect write is treated like a write to all array - * elements, since we don't know which one is actually - * written: - */ - if (dst->flags & IR3_REG_RELATIV) { - for (i = 0; i < arr->length; i++) { - unsigned name = arr->base + i; - def(name, instr); - } - } else { - unsigned name = arr->base + dst->array.offset; - def(name, instr); - } - - } else if (id->defn == instr) { - unsigned name = ra_name(ctx, id); - - /* since we are in SSA at this point: */ - debug_assert(!BITSET_TEST(bd->use, name)); - - def(name, id->defn); - - if (is_high(id->defn)) { - ra_set_node_class(ctx->g, name, - ctx->set->high_classes[id->cls - HIGH_OFFSET]); - } else if (is_half(id->defn)) { - ra_set_node_class(ctx->g, name, - ctx->set->half_classes[id->cls - HALF_OFFSET]); - } else { - ra_set_node_class(ctx->g, name, - ctx->set->classes[id->cls]); - } - } - } - - foreach_src(reg, instr) { - if (reg->flags & IR3_REG_ARRAY) { - struct ir3_array *arr = - ir3_lookup_array(ctx->ir, reg->array.id); - arr->start_ip = MIN2(arr->start_ip, instr->ip); - arr->end_ip = MAX2(arr->end_ip, instr->ip); - - /* indirect read is treated like a read fromall array - * elements, since we don't know which one is actually - * read: - */ - if (reg->flags & IR3_REG_RELATIV) { - unsigned i; - for (i = 0; i < arr->length; i++) { - unsigned name = arr->base + i; - use(name, instr); - } - } else { - unsigned name = arr->base + reg->array.offset; - use(name, instr); - /* NOTE: arrays are not SSA so unconditionally - * set use bit: - */ - BITSET_SET(bd->use, name); - debug_assert(reg->array.offset < arr->length); - } - } else if ((src = ssa(reg)) && writes_gpr(src)) { - unsigned name = ra_name(ctx, &ctx->instrd[src->ip]); - use(name, instr); - } - } - } -} - -static bool -ra_compute_livein_liveout(struct ir3_ra_ctx *ctx) -{ - unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); - bool progress = false; - - list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { - struct ir3_ra_block_data *bd = block->data; - - /* update livein: */ - for (unsigned i = 0; i < bitset_words; i++) { - BITSET_WORD new_livein = - (bd->use[i] | (bd->liveout[i] & ~bd->def[i])); - - if (new_livein & ~bd->livein[i]) { - bd->livein[i] |= new_livein; - progress = true; - } - } - - /* update liveout: */ - for (unsigned j = 0; j < ARRAY_SIZE(block->successors); j++) { - struct ir3_block *succ = block->successors[j]; - struct ir3_ra_block_data *succ_bd; - - if (!succ) - continue; - - succ_bd = succ->data; - - for (unsigned i = 0; i < bitset_words; i++) { - BITSET_WORD new_liveout = - (succ_bd->livein[i] & ~bd->liveout[i]); - - if (new_liveout) { - bd->liveout[i] |= new_liveout; - progress = true; - } - } - } - } - - return progress; -} - -static void -print_bitset(const char *name, BITSET_WORD *bs, unsigned cnt) -{ - bool first = true; - debug_printf(" %s:", name); - for (unsigned i = 0; i < cnt; i++) { - if (BITSET_TEST(bs, i)) { - if (!first) - debug_printf(","); - debug_printf(" %04u", i); - first = false; - } - } - debug_printf("\n"); -} - -static void -ra_add_interference(struct ir3_ra_ctx *ctx) -{ - struct ir3 *ir = ctx->ir; - - /* initialize array live ranges: */ - list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) { - arr->start_ip = ~0; - arr->end_ip = 0; - } - - /* compute live ranges (use/def) on a block level, also updating - * block's def/use bitmasks (used below to calculate per-block - * livein/liveout): - */ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - ra_block_compute_live_ranges(ctx, block); - } - - /* update per-block livein/liveout: */ - while (ra_compute_livein_liveout(ctx)) {} - - if (fd_mesa_debug & FD_DBG_OPTMSGS) { - debug_printf("AFTER LIVEIN/OUT:\n"); - ir3_print(ir); - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - struct ir3_ra_block_data *bd = block->data; - debug_printf("block%u:\n", block_id(block)); - print_bitset(" def", bd->def, ctx->alloc_count); - print_bitset(" use", bd->use, ctx->alloc_count); - print_bitset(" l/i", bd->livein, ctx->alloc_count); - print_bitset(" l/o", bd->liveout, ctx->alloc_count); - } - list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) { - debug_printf("array%u:\n", arr->id); - debug_printf(" length: %u\n", arr->length); - debug_printf(" start_ip: %u\n", arr->start_ip); - debug_printf(" end_ip: %u\n", arr->end_ip); - } - } - - /* extend start/end ranges based on livein/liveout info from cfg: */ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - struct ir3_ra_block_data *bd = block->data; - - for (unsigned i = 0; i < ctx->alloc_count; i++) { - if (BITSET_TEST(bd->livein, i)) { - ctx->def[i] = MIN2(ctx->def[i], block->start_ip); - ctx->use[i] = MAX2(ctx->use[i], block->start_ip); - } - - if (BITSET_TEST(bd->liveout, i)) { - ctx->def[i] = MIN2(ctx->def[i], block->end_ip); - ctx->use[i] = MAX2(ctx->use[i], block->end_ip); - } - } - - list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { - for (unsigned i = 0; i < arr->length; i++) { - if (BITSET_TEST(bd->livein, i + arr->base)) { - arr->start_ip = MIN2(arr->start_ip, block->start_ip); - } - if (BITSET_TEST(bd->livein, i + arr->base)) { - arr->end_ip = MAX2(arr->end_ip, block->end_ip); - } - } - } - } - - /* need to fix things up to keep outputs live: */ - for (unsigned i = 0; i < ir->noutputs; i++) { - struct ir3_instruction *instr = ir->outputs[i]; - unsigned name = ra_name(ctx, &ctx->instrd[instr->ip]); - ctx->use[name] = ctx->instr_cnt; - } - - for (unsigned i = 0; i < ctx->alloc_count; i++) { - for (unsigned j = 0; j < ctx->alloc_count; j++) { - if (intersects(ctx->def[i], ctx->use[i], - ctx->def[j], ctx->use[j])) { - ra_add_node_interference(ctx->g, i, j); - } - } - } -} - -/* some instructions need fix-up if dst register is half precision: */ -static void fixup_half_instr_dst(struct ir3_instruction *instr) -{ - switch (opc_cat(instr->opc)) { - case 1: /* move instructions */ - instr->cat1.dst_type = half_type(instr->cat1.dst_type); - break; - case 3: - switch (instr->opc) { - case OPC_MAD_F32: - instr->opc = OPC_MAD_F16; - break; - case OPC_SEL_B32: - instr->opc = OPC_SEL_B16; - break; - case OPC_SEL_S32: - instr->opc = OPC_SEL_S16; - break; - case OPC_SEL_F32: - instr->opc = OPC_SEL_F16; - break; - case OPC_SAD_S32: - instr->opc = OPC_SAD_S16; - break; - /* instructions may already be fixed up: */ - case OPC_MAD_F16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - break; - default: - assert(0); - break; - } - break; - case 5: - instr->cat5.type = half_type(instr->cat5.type); - break; - } -} -/* some instructions need fix-up if src register is half precision: */ -static void fixup_half_instr_src(struct ir3_instruction *instr) -{ - switch (instr->opc) { - case OPC_MOV: - instr->cat1.src_type = half_type(instr->cat1.src_type); - break; - default: - break; - } -} - -/* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first - * array access(es) which do not have any previous access to depend - * on from scheduling point of view - */ -static void -reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg, - struct ir3_instruction *instr) -{ - struct ir3_ra_instr_data *id; - - if (reg->flags & IR3_REG_ARRAY) { - struct ir3_array *arr = - ir3_lookup_array(ctx->ir, reg->array.id); - unsigned name = arr->base + reg->array.offset; - unsigned r = ra_get_node_reg(ctx->g, name); - unsigned num = ctx->set->ra_reg_to_gpr[r]; - - if (reg->flags & IR3_REG_RELATIV) { - reg->array.offset = num; - } else { - reg->num = num; - reg->flags &= ~IR3_REG_SSA; - } - - reg->flags &= ~IR3_REG_ARRAY; - } else if ((id = &ctx->instrd[instr->ip]) && id->defn) { - unsigned name = ra_name(ctx, id); - unsigned r = ra_get_node_reg(ctx->g, name); - unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off; - - debug_assert(!(reg->flags & IR3_REG_RELATIV)); - - if (is_high(id->defn)) - num += FIRST_HIGH_REG; - - reg->num = num; - reg->flags &= ~IR3_REG_SSA; - - if (is_half(id->defn)) - reg->flags |= IR3_REG_HALF; - } -} - -static void -ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) -{ - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - struct ir3_register *reg; - - if (instr->regs_count == 0) - continue; - - if (writes_gpr(instr)) { - reg_assign(ctx, instr->regs[0], instr); - if (instr->regs[0]->flags & IR3_REG_HALF) - fixup_half_instr_dst(instr); - } - - foreach_src_n(reg, n, instr) { - struct ir3_instruction *src = reg->instr; - /* Note: reg->instr could be null for IR3_REG_ARRAY */ - if (!(src || (reg->flags & IR3_REG_ARRAY))) - continue; - reg_assign(ctx, instr->regs[n+1], src); - if (instr->regs[n+1]->flags & IR3_REG_HALF) - fixup_half_instr_src(instr); - } - } -} - -static int -ra_alloc(struct ir3_ra_ctx *ctx) -{ - /* pre-assign array elements: - */ - list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { - unsigned base = 0; - - if (arr->end_ip == 0) - continue; - - /* figure out what else we conflict with which has already - * been assigned: - */ -retry: - list_for_each_entry (struct ir3_array, arr2, &ctx->ir->array_list, node) { - if (arr2 == arr) - break; - if (arr2->end_ip == 0) - continue; - /* if it intersects with liverange AND register range.. */ - if (intersects(arr->start_ip, arr->end_ip, - arr2->start_ip, arr2->end_ip) && - intersects(base, base + arr->length, - arr2->reg, arr2->reg + arr2->length)) { - base = MAX2(base, arr2->reg + arr2->length); - goto retry; - } - } - - arr->reg = base; - - for (unsigned i = 0; i < arr->length; i++) { - unsigned name, reg; - - name = arr->base + i; - reg = ctx->set->gpr_to_ra_reg[0][base++]; - - ra_set_node_reg(ctx->g, name, reg); - } - } - - if (!ra_allocate(ctx->g)) - return -1; - - list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) { - ra_block_alloc(ctx, block); - } - - return 0; -} - -int ir3_ra(struct ir3 *ir, enum shader_t type, - bool frag_coord, bool frag_face) -{ - struct ir3_ra_ctx ctx = { - .ir = ir, - .type = type, - .frag_face = frag_face, - .set = ir->compiler->set, - }; - int ret; - - ra_init(&ctx); - ra_add_interference(&ctx); - ret = ra_alloc(&ctx); - ra_destroy(&ctx); - - return ret; -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_sched.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_sched.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_sched.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_sched.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,818 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - - -#include "util/u_math.h" - -#include "ir3.h" - -/* - * Instruction Scheduling: - * - * A recursive depth based scheduling algo. Recursively find an eligible - * instruction to schedule from the deepest instruction (recursing through - * it's unscheduled src instructions). Normally this would result in a - * lot of re-traversal of the same instructions, so we cache results in - * instr->data (and clear cached results that would be no longer valid - * after scheduling an instruction). - * - * There are a few special cases that need to be handled, since sched - * is currently independent of register allocation. Usages of address - * register (a0.x) or predicate register (p0.x) must be serialized. Ie. - * if you have two pairs of instructions that write the same special - * register and then read it, then those pairs cannot be interleaved. - * To solve this, when we are in such a scheduling "critical section", - * and we encounter a conflicting write to a special register, we try - * to schedule any remaining instructions that use that value first. - */ - -struct ir3_sched_ctx { - struct ir3_block *block; /* the current block */ - struct list_head depth_list; /* depth sorted unscheduled instrs */ - struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/ - struct ir3_instruction *addr; /* current a0.x user, if any */ - struct ir3_instruction *pred; /* current p0.x user, if any */ - bool error; -}; - -static bool is_sfu_or_mem(struct ir3_instruction *instr) -{ - return is_sfu(instr) || is_mem(instr); -} - -#define NULL_INSTR ((void *)~0) - -static void -clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) -{ - list_for_each_entry (struct ir3_instruction, instr2, &ctx->depth_list, node) { - if ((instr2->data == instr) || (instr2->data == NULL_INSTR) || !instr) - instr2->data = NULL; - } -} - -static void -schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) -{ - debug_assert(ctx->block == instr->block); - - /* maybe there is a better way to handle this than just stuffing - * a nop.. ideally we'd know about this constraint in the - * scheduling and depth calculation.. - */ - if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr)) - ir3_NOP(ctx->block); - - /* remove from depth list: - */ - list_delinit(&instr->node); - - if (writes_addr(instr)) { - debug_assert(ctx->addr == NULL); - ctx->addr = instr; - } - - if (writes_pred(instr)) { - debug_assert(ctx->pred == NULL); - ctx->pred = instr; - } - - instr->flags |= IR3_INSTR_MARK; - - list_addtail(&instr->node, &instr->block->instr_list); - ctx->scheduled = instr; - - if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) { - clear_cache(ctx, NULL); - } else { - /* invalidate only the necessary entries.. */ - clear_cache(ctx, instr); - } -} - -static struct ir3_instruction * -deepest(struct ir3_instruction **srcs, unsigned nsrcs) -{ - struct ir3_instruction *d = NULL; - unsigned i = 0, id = 0; - - while ((i < nsrcs) && !(d = srcs[id = i])) - i++; - - if (!d) - return NULL; - - for (; i < nsrcs; i++) - if (srcs[i] && (srcs[i]->depth > d->depth)) - d = srcs[id = i]; - - srcs[id] = NULL; - - return d; -} - -/** - * @block: the block to search in, starting from end; in first pass, - * this will be the block the instruction would be inserted into - * (but has not yet, ie. it only contains already scheduled - * instructions). For intra-block scheduling (second pass), this - * would be one of the predecessor blocks. - * @instr: the instruction to search for - * @maxd: max distance, bail after searching this # of instruction - * slots, since it means the instruction we are looking for is - * far enough away - * @pred: if true, recursively search into predecessor blocks to - * find the worst case (shortest) distance (only possible after - * individual blocks are all scheduled - */ -static unsigned -distance(struct ir3_block *block, struct ir3_instruction *instr, - unsigned maxd, bool pred) -{ - unsigned d = 0; - - list_for_each_entry_rev (struct ir3_instruction, n, &block->instr_list, node) { - if ((n == instr) || (d >= maxd)) - return d; - /* NOTE: don't count branch/jump since we don't know yet if they will - * be eliminated later in resolve_jumps().. really should do that - * earlier so we don't have this constraint. - */ - if (is_alu(n) || (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_BR))) - d++; - } - - /* if coming from a predecessor block, assume it is assigned far - * enough away.. we'll fix up later. - */ - if (!pred) - return maxd; - - if (pred && (block->data != block)) { - /* Search into predecessor blocks, finding the one with the - * shortest distance, since that will be the worst case - */ - unsigned min = maxd - d; - - /* (ab)use block->data to prevent recursion: */ - block->data = block; - - for (unsigned i = 0; i < block->predecessors_count; i++) { - unsigned n; - - n = distance(block->predecessors[i], instr, min, pred); - - min = MIN2(min, n); - } - - block->data = NULL; - d += min; - } - - return d; -} - -/* calculate delay for specified src: */ -static unsigned -delay_calc_srcn(struct ir3_block *block, - struct ir3_instruction *assigner, - struct ir3_instruction *consumer, - unsigned srcn, bool soft, bool pred) -{ - unsigned delay = 0; - - if (is_meta(assigner)) { - struct ir3_instruction *src; - foreach_ssa_src(src, assigner) { - unsigned d; - d = delay_calc_srcn(block, src, consumer, srcn, soft, pred); - delay = MAX2(delay, d); - } - } else { - if (soft) { - if (is_sfu(assigner)) { - delay = 4; - } else { - delay = ir3_delayslots(assigner, consumer, srcn); - } - } else { - delay = ir3_delayslots(assigner, consumer, srcn); - } - delay -= distance(block, assigner, delay, pred); - } - - return delay; -} - -/* calculate delay for instruction (maximum of delay for all srcs): */ -static unsigned -delay_calc(struct ir3_block *block, struct ir3_instruction *instr, - bool soft, bool pred) -{ - unsigned delay = 0; - struct ir3_instruction *src; - - foreach_ssa_src_n(src, i, instr) { - unsigned d; - d = delay_calc_srcn(block, src, instr, i, soft, pred); - delay = MAX2(delay, d); - } - - return delay; -} - -struct ir3_sched_notes { - /* there is at least one kill which could be scheduled, except - * for unscheduled bary.f's: - */ - bool blocked_kill; - /* there is at least one instruction that could be scheduled, - * except for conflicting address/predicate register usage: - */ - bool addr_conflict, pred_conflict; -}; - -static bool is_scheduled(struct ir3_instruction *instr) -{ - return !!(instr->flags & IR3_INSTR_MARK); -} - -/* could an instruction be scheduled if specified ssa src was scheduled? */ -static bool -could_sched(struct ir3_instruction *instr, struct ir3_instruction *src) -{ - struct ir3_instruction *other_src; - foreach_ssa_src(other_src, instr) { - /* if dependency not scheduled, we aren't ready yet: */ - if ((src != other_src) && !is_scheduled(other_src)) { - return false; - } - } - return true; -} - -/* Check if instruction is ok to schedule. Make sure it is not blocked - * by use of addr/predicate register, etc. - */ -static bool -check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, - struct ir3_instruction *instr) -{ - /* For instructions that write address register we need to - * make sure there is at least one instruction that uses the - * addr value which is otherwise ready. - * - * TODO if any instructions use pred register and have other - * src args, we would need to do the same for writes_pred().. - */ - if (writes_addr(instr)) { - struct ir3 *ir = instr->block->shader; - bool ready = false; - for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) { - struct ir3_instruction *indirect = ir->indirects[i]; - if (!indirect) - continue; - if (indirect->address != instr) - continue; - ready = could_sched(indirect, instr); - } - - /* nothing could be scheduled, so keep looking: */ - if (!ready) - return false; - } - - /* if this is a write to address/predicate register, and that - * register is currently in use, we need to defer until it is - * free: - */ - if (writes_addr(instr) && ctx->addr) { - debug_assert(ctx->addr != instr); - notes->addr_conflict = true; - return false; - } - - if (writes_pred(instr) && ctx->pred) { - debug_assert(ctx->pred != instr); - notes->pred_conflict = true; - return false; - } - - /* if the instruction is a kill, we need to ensure *every* - * bary.f is scheduled. The hw seems unhappy if the thread - * gets killed before the end-input (ei) flag is hit. - * - * We could do this by adding each bary.f instruction as - * virtual ssa src for the kill instruction. But we have - * fixed length instr->regs[]. - * - * TODO this wouldn't be quite right if we had multiple - * basic blocks, if any block was conditional. We'd need - * to schedule the bary.f's outside of any block which - * was conditional that contained a kill.. I think.. - */ - if (is_kill(instr)) { - struct ir3 *ir = instr->block->shader; - - for (unsigned i = 0; i < ir->baryfs_count; i++) { - struct ir3_instruction *baryf = ir->baryfs[i]; - if (baryf->flags & IR3_INSTR_UNUSED) - continue; - if (!is_scheduled(baryf)) { - notes->blocked_kill = true; - return false; - } - } - } - - return true; -} - -/* Find the best instruction to schedule from specified instruction or - * recursively it's ssa sources. - */ -static struct ir3_instruction * -find_instr_recursive(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, - struct ir3_instruction *instr) -{ - struct ir3_instruction *srcs[__ssa_src_cnt(instr)]; - struct ir3_instruction *src; - unsigned nsrcs = 0; - - if (is_scheduled(instr)) - return NULL; - - /* use instr->data to cache the results of recursing up the - * instr src's. Otherwise the recursive algo can scale quite - * badly w/ shader size. But this takes some care to clear - * the cache appropriately when instructions are scheduled. - */ - if (instr->data) { - if (instr->data == NULL_INSTR) - return NULL; - return instr->data; - } - - /* find unscheduled srcs: */ - foreach_ssa_src(src, instr) { - if (!is_scheduled(src)) { - debug_assert(nsrcs < ARRAY_SIZE(srcs)); - srcs[nsrcs++] = src; - } - } - - /* if all our src's are already scheduled: */ - if (nsrcs == 0) { - if (check_instr(ctx, notes, instr)) { - instr->data = instr; - return instr; - } - return NULL; - } - - while ((src = deepest(srcs, nsrcs))) { - struct ir3_instruction *candidate; - - candidate = find_instr_recursive(ctx, notes, src); - if (!candidate) - continue; - - if (check_instr(ctx, notes, candidate)) { - instr->data = candidate; - return candidate; - } - } - - instr->data = NULL_INSTR; - return NULL; -} - -/* find instruction to schedule: */ -static struct ir3_instruction * -find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes, - bool soft) -{ - struct ir3_instruction *best_instr = NULL; - unsigned min_delay = ~0; - - /* TODO we'd really rather use the list/array of block outputs. But we - * don't have such a thing. Recursing *every* instruction in the list - * will result in a lot of repeated traversal, since instructions will - * get traversed both when they appear as ssa src to a later instruction - * as well as where they appear in the depth_list. - */ - list_for_each_entry_rev (struct ir3_instruction, instr, &ctx->depth_list, node) { - struct ir3_instruction *candidate; - unsigned delay; - - candidate = find_instr_recursive(ctx, notes, instr); - if (!candidate) - continue; - - delay = delay_calc(ctx->block, candidate, soft, false); - if (delay < min_delay) { - best_instr = candidate; - min_delay = delay; - } - - if (min_delay == 0) - break; - } - - return best_instr; -} - -/* "spill" the address register by remapping any unscheduled - * instructions which depend on the current address register - * to a clone of the instruction which wrote the address reg. - */ -static struct ir3_instruction * -split_addr(struct ir3_sched_ctx *ctx) -{ - struct ir3 *ir; - struct ir3_instruction *new_addr = NULL; - unsigned i; - - debug_assert(ctx->addr); - - ir = ctx->addr->block->shader; - - for (i = 0; i < ir->indirects_count; i++) { - struct ir3_instruction *indirect = ir->indirects[i]; - - if (!indirect) - continue; - - /* skip instructions already scheduled: */ - if (is_scheduled(indirect)) - continue; - - /* remap remaining instructions using current addr - * to new addr: - */ - if (indirect->address == ctx->addr) { - if (!new_addr) { - new_addr = ir3_instr_clone(ctx->addr); - /* original addr is scheduled, but new one isn't: */ - new_addr->flags &= ~IR3_INSTR_MARK; - } - ir3_instr_set_address(indirect, new_addr); - } - } - - /* all remaining indirects remapped to new addr: */ - ctx->addr = NULL; - - return new_addr; -} - -/* "spill" the predicate register by remapping any unscheduled - * instructions which depend on the current predicate register - * to a clone of the instruction which wrote the address reg. - */ -static struct ir3_instruction * -split_pred(struct ir3_sched_ctx *ctx) -{ - struct ir3 *ir; - struct ir3_instruction *new_pred = NULL; - unsigned i; - - debug_assert(ctx->pred); - - ir = ctx->pred->block->shader; - - for (i = 0; i < ir->predicates_count; i++) { - struct ir3_instruction *predicated = ir->predicates[i]; - - /* skip instructions already scheduled: */ - if (is_scheduled(predicated)) - continue; - - /* remap remaining instructions using current pred - * to new pred: - * - * TODO is there ever a case when pred isn't first - * (and only) src? - */ - if (ssa(predicated->regs[1]) == ctx->pred) { - if (!new_pred) { - new_pred = ir3_instr_clone(ctx->pred); - /* original pred is scheduled, but new one isn't: */ - new_pred->flags &= ~IR3_INSTR_MARK; - } - predicated->regs[1]->instr = new_pred; - } - } - - /* all remaining predicated remapped to new pred: */ - ctx->pred = NULL; - - return new_pred; -} - -static void -sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) -{ - struct list_head unscheduled_list; - - ctx->block = block; - - /* addr/pred writes are per-block: */ - ctx->addr = NULL; - ctx->pred = NULL; - - /* move all instructions to the unscheduled list, and - * empty the block's instruction list (to which we will - * be inserting). - */ - list_replace(&block->instr_list, &unscheduled_list); - list_inithead(&block->instr_list); - list_inithead(&ctx->depth_list); - - /* first a pre-pass to schedule all meta:input instructions - * (which need to appear first so that RA knows the register is - * occupied), and move remaining to depth sorted list: - */ - list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { - if (instr->opc == OPC_META_INPUT) { - schedule(ctx, instr); - } else { - ir3_insert_by_depth(instr, &ctx->depth_list); - } - } - - while (!list_empty(&ctx->depth_list)) { - struct ir3_sched_notes notes = {0}; - struct ir3_instruction *instr; - - instr = find_eligible_instr(ctx, ¬es, true); - if (!instr) - instr = find_eligible_instr(ctx, ¬es, false); - - if (instr) { - unsigned delay = delay_calc(ctx->block, instr, false, false); - - /* and if we run out of instructions that can be scheduled, - * then it is time for nop's: - */ - debug_assert(delay <= 6); - while (delay > 0) { - ir3_NOP(block); - delay--; - } - - schedule(ctx, instr); - } else { - struct ir3_instruction *new_instr = NULL; - - /* nothing available to schedule.. if we are blocked on - * address/predicate register conflict, then break the - * deadlock by cloning the instruction that wrote that - * reg: - */ - if (notes.addr_conflict) { - new_instr = split_addr(ctx); - } else if (notes.pred_conflict) { - new_instr = split_pred(ctx); - } else { - debug_assert(0); - ctx->error = true; - return; - } - - if (new_instr) { - /* clearing current addr/pred can change what is - * available to schedule, so clear cache.. - */ - clear_cache(ctx, NULL); - - ir3_insert_by_depth(new_instr, &ctx->depth_list); - /* the original instr that wrote addr/pred may have - * originated from a different block: - */ - new_instr->block = block; - } - } - } - - /* And lastly, insert branch/jump instructions to take us to - * the next block. Later we'll strip back out the branches - * that simply jump to next instruction. - */ - if (block->successors[1]) { - /* if/else, conditional branches to "then" or "else": */ - struct ir3_instruction *br; - unsigned delay = 6; - - debug_assert(ctx->pred); - debug_assert(block->condition); - - delay -= distance(ctx->block, ctx->pred, delay, false); - - while (delay > 0) { - ir3_NOP(block); - delay--; - } - - /* create "else" branch first (since "then" block should - * frequently/always end up being a fall-thru): - */ - br = ir3_BR(block); - br->cat0.inv = true; - br->cat0.target = block->successors[1]; - - /* NOTE: we have to hard code delay of 6 above, since - * we want to insert the nop's before constructing the - * branch. Throw in an assert so we notice if this - * ever breaks on future generation: - */ - debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6); - - br = ir3_BR(block); - br->cat0.target = block->successors[0]; - - } else if (block->successors[0]) { - /* otherwise unconditional jump to next block: */ - struct ir3_instruction *jmp; - - jmp = ir3_JUMP(block); - jmp->cat0.target = block->successors[0]; - } - - /* NOTE: if we kept track of the predecessors, we could do a better - * job w/ (jp) flags.. every node w/ > predecessor is a join point. - * Note that as we eliminate blocks which contain only an unconditional - * jump we probably need to propagate (jp) flag.. - */ -} - -/* After scheduling individual blocks, we still could have cases where - * one (or more) paths into a block, a value produced by a previous - * has too few delay slots to be legal. We can't deal with this in the - * first pass, because loops (ie. we can't ensure all predecessor blocks - * are already scheduled in the first pass). All we can really do at - * this point is stuff in extra nop's until things are legal. - */ -static void -sched_intra_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) -{ - unsigned n = 0; - - ctx->block = block; - - list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) { - unsigned delay = 0; - - for (unsigned i = 0; i < block->predecessors_count; i++) { - unsigned d = delay_calc(block->predecessors[i], instr, false, true); - delay = MAX2(d, delay); - } - - while (delay > n) { - struct ir3_instruction *nop = ir3_NOP(block); - - /* move to before instr: */ - list_delinit(&nop->node); - list_addtail(&nop->node, &instr->node); - - n++; - } - - /* we can bail once we hit worst case delay: */ - if (++n > 6) - break; - } -} - -int ir3_sched(struct ir3 *ir) -{ - struct ir3_sched_ctx ctx = {0}; - - ir3_clear_mark(ir); - - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - sched_block(&ctx, block); - } - - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - sched_intra_block(&ctx, block); - } - - if (ctx.error) - return -1; - return 0; -} - -/* does instruction 'prior' need to be scheduled before 'instr'? */ -static bool -depends_on(struct ir3_instruction *instr, struct ir3_instruction *prior) -{ - /* TODO for dependencies that are related to a specific object, ie - * a specific SSBO/image/array, we could relax this constraint to - * make accesses to unrelated objects not depend on each other (at - * least as long as not declared coherent) - */ - if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && prior->barrier_class) || - ((prior->barrier_class & IR3_BARRIER_EVERYTHING) && instr->barrier_class)) - return true; - return !!(instr->barrier_class & prior->barrier_conflict); -} - -static void -add_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr) -{ - struct list_head *prev = instr->node.prev; - struct list_head *next = instr->node.next; - - /* add dependencies on previous instructions that must be scheduled - * prior to the current instruction - */ - while (prev != &block->instr_list) { - struct ir3_instruction *pi = - LIST_ENTRY(struct ir3_instruction, prev, node); - - prev = prev->prev; - - if (is_meta(pi)) - continue; - - if (instr->barrier_class == pi->barrier_class) { - ir3_instr_add_dep(instr, pi); - break; - } - - if (depends_on(instr, pi)) - ir3_instr_add_dep(instr, pi); - } - - /* add dependencies on this instruction to following instructions - * that must be scheduled after the current instruction: - */ - while (next != &block->instr_list) { - struct ir3_instruction *ni = - LIST_ENTRY(struct ir3_instruction, next, node); - - next = next->next; - - if (is_meta(ni)) - continue; - - if (instr->barrier_class == ni->barrier_class) { - ir3_instr_add_dep(ni, instr); - break; - } - - if (depends_on(ni, instr)) - ir3_instr_add_dep(ni, instr); - } -} - -/* before scheduling a block, we need to add any necessary false-dependencies - * to ensure that: - * - * (1) barriers are scheduled in the right order wrt instructions related - * to the barrier - * - * (2) reads that come before a write actually get scheduled before the - * write - */ -static void -calculate_deps(struct ir3_block *block) -{ - list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - if (instr->barrier_class) { - add_barrier_deps(block, instr); - } - } -} - -void -ir3_sched_add_deps(struct ir3 *ir) -{ - list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - calculate_deps(block); - } -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.c mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_shader.c --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_shader.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1025 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "pipe/p_state.h" -#include "util/u_string.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" - -#include "freedreno_context.h" -#include "freedreno_util.h" - -#include "ir3_shader.h" -#include "ir3_compiler.h" -#include "ir3_nir.h" - -int -ir3_glsl_type_size(const struct glsl_type *type) -{ - return glsl_count_attribute_slots(type, false); -} - -static void -delete_variant(struct ir3_shader_variant *v) -{ - if (v->ir) - ir3_destroy(v->ir); - if (v->bo) - fd_bo_del(v->bo); - if (v->immediates) - free(v->immediates); - free(v); -} - -/* for vertex shader, the inputs are loaded into registers before the shader - * is executed, so max_regs from the shader instructions might not properly - * reflect the # of registers actually used, especially in case passthrough - * varyings. - * - * Likewise, for fragment shader, we can have some regs which are passed - * input values but never touched by the resulting shader (ie. as result - * of dead code elimination or simply because we don't know how to turn - * the reg off. - */ -static void -fixup_regfootprint(struct ir3_shader_variant *v) -{ - unsigned i; - - for (i = 0; i < v->inputs_count; i++) { - /* skip frag inputs fetch via bary.f since their reg's are - * not written by gpu before shader starts (and in fact the - * regid's might not even be valid) - */ - if (v->inputs[i].bary) - continue; - - /* ignore high regs that are global to all threads in a warp - * (they exist by default) (a5xx+) - */ - if (v->inputs[i].regid >= regid(48,0)) - continue; - - if (v->inputs[i].compmask) { - unsigned n = util_last_bit(v->inputs[i].compmask) - 1; - int32_t regid = (v->inputs[i].regid + n) >> 2; - v->info.max_reg = MAX2(v->info.max_reg, regid); - } - } - - for (i = 0; i < v->outputs_count; i++) { - int32_t regid = (v->outputs[i].regid + 3) >> 2; - v->info.max_reg = MAX2(v->info.max_reg, regid); - } -} - -/* wrapper for ir3_assemble() which does some info fixup based on - * shader state. Non-static since used by ir3_cmdline too. - */ -void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id) -{ - void *bin; - - bin = ir3_assemble(v->ir, &v->info, gpu_id); - if (!bin) - return NULL; - - if (gpu_id >= 400) { - v->instrlen = v->info.sizedwords / (2 * 16); - } else { - v->instrlen = v->info.sizedwords / (2 * 4); - } - - /* NOTE: if relative addressing is used, we set constlen in - * the compiler (to worst-case value) since we don't know in - * the assembler what the max addr reg value can be: - */ - v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1)); - - fixup_regfootprint(v); - - return bin; -} - -static void -assemble_variant(struct ir3_shader_variant *v) -{ - struct ir3_compiler *compiler = v->shader->compiler; - uint32_t gpu_id = compiler->gpu_id; - uint32_t sz, *bin; - - bin = ir3_shader_assemble(v, gpu_id); - sz = v->info.sizedwords * 4; - - v->bo = fd_bo_new(compiler->dev, sz, - DRM_FREEDRENO_GEM_CACHE_WCOMBINE | - DRM_FREEDRENO_GEM_TYPE_KMEM); - - memcpy(fd_bo_map(v->bo), bin, sz); - - if (fd_mesa_debug & FD_DBG_DISASM) { - struct ir3_shader_key key = v->key; - printf("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type, - v->binning_pass, key.color_two_side, key.half_precision); - ir3_shader_disasm(v, bin, stdout); - } - - if (shader_debug_enabled(v->shader->type)) { - fprintf(stderr, "Native code for unnamed %s shader %s:\n", - shader_stage_name(v->shader->type), v->shader->nir->info.name); - if (v->shader->type == SHADER_FRAGMENT) - fprintf(stderr, "SIMD0\n"); - ir3_shader_disasm(v, bin, stderr); - } - - free(bin); - - /* no need to keep the ir around beyond this point: */ - ir3_destroy(v->ir); - v->ir = NULL; -} - -static void -dump_shader_info(struct ir3_shader_variant *v, struct pipe_debug_callback *debug) -{ - if (!unlikely(fd_mesa_debug & FD_DBG_SHADERDB)) - return; - - pipe_debug_message(debug, SHADER_INFO, "\n" - "SHADER-DB: %s prog %d/%d: %u instructions, %u dwords\n" - "SHADER-DB: %s prog %d/%d: %u half, %u full\n" - "SHADER-DB: %s prog %d/%d: %u const, %u constlen\n" - "SHADER-DB: %s prog %d/%d: %u (ss), %u (sy)\n", - ir3_shader_stage(v->shader), - v->shader->id, v->id, - v->info.instrs_count, - v->info.sizedwords, - ir3_shader_stage(v->shader), - v->shader->id, v->id, - v->info.max_half_reg + 1, - v->info.max_reg + 1, - ir3_shader_stage(v->shader), - v->shader->id, v->id, - v->info.max_const + 1, - v->constlen, - ir3_shader_stage(v->shader), - v->shader->id, v->id, - v->info.ss, v->info.sy); -} - -static struct ir3_shader_variant * -create_variant(struct ir3_shader *shader, struct ir3_shader_key key, - bool binning_pass) -{ - struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant); - int ret; - - if (!v) - return NULL; - - v->id = ++shader->variant_count; - v->shader = shader; - v->binning_pass = binning_pass; - v->key = key; - v->type = shader->type; - - ret = ir3_compile_shader_nir(shader->compiler, v); - if (ret) { - debug_error("compile failed!"); - goto fail; - } - - assemble_variant(v); - if (!v->bo) { - debug_error("assemble failed!"); - goto fail; - } - - return v; - -fail: - delete_variant(v); - return NULL; -} - -static inline struct ir3_shader_variant * -shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, - struct pipe_debug_callback *debug) -{ - struct ir3_shader_variant *v; - - /* some shader key values only apply to vertex or frag shader, - * so normalize the key to avoid constructing multiple identical - * variants: - */ - switch (shader->type) { - case SHADER_FRAGMENT: - if (key.has_per_samp) { - key.vsaturate_s = 0; - key.vsaturate_t = 0; - key.vsaturate_r = 0; - key.vastc_srgb = 0; - key.vsamples = 0; - } - break; - case SHADER_VERTEX: - key.color_two_side = false; - key.half_precision = false; - key.rasterflat = false; - if (key.has_per_samp) { - key.fsaturate_s = 0; - key.fsaturate_t = 0; - key.fsaturate_r = 0; - key.fastc_srgb = 0; - key.fsamples = 0; - } - break; - default: - /* TODO */ - break; - } - - for (v = shader->variants; v; v = v->next) - if (ir3_shader_key_equal(&key, &v->key)) - return v; - - /* compile new variant if it doesn't exist already: */ - v = create_variant(shader, key, false); - if (v) { - v->next = shader->variants; - shader->variants = v; - dump_shader_info(v, debug); - } - - return v; -} - - -struct ir3_shader_variant * -ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, - bool binning_pass, struct pipe_debug_callback *debug) -{ - struct ir3_shader_variant *v = - shader_variant(shader, key, debug); - - if (binning_pass) { - if (!v->binning) - v->binning = create_variant(shader, key, true); - return v->binning; - } - - return v; -} - -void -ir3_shader_destroy(struct ir3_shader *shader) -{ - struct ir3_shader_variant *v, *t; - for (v = shader->variants; v; ) { - t = v; - v = v->next; - delete_variant(t); - } - ralloc_free(shader->nir); - free(shader); -} - -struct ir3_shader * -ir3_shader_create(struct ir3_compiler *compiler, - const struct pipe_shader_state *cso, enum shader_t type, - struct pipe_debug_callback *debug) -{ - struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader); - shader->compiler = compiler; - shader->id = ++shader->compiler->shader_count; - shader->type = type; - - nir_shader *nir; - if (cso->type == PIPE_SHADER_IR_NIR) { - /* we take ownership of the reference: */ - nir = cso->ir.nir; - } else { - debug_assert(cso->type == PIPE_SHADER_IR_TGSI); - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("dump tgsi: type=%d", shader->type); - tgsi_dump(cso->tokens, 0); - } - nir = ir3_tgsi_to_nir(cso->tokens); - } - NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, - (nir_lower_io_options)0); - /* do first pass optimization, ignoring the key: */ - shader->nir = ir3_optimize_nir(shader, nir, NULL); - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("dump nir%d: type=%d", shader->id, shader->type); - nir_print_shader(shader->nir, stdout); - } - - shader->stream_output = cso->stream_output; - if (fd_mesa_debug & FD_DBG_SHADERDB) { - /* if shader-db run, create a standard variant immediately - * (as otherwise nothing will trigger the shader to be - * actually compiled) - */ - static struct ir3_shader_key key; - memset(&key, 0, sizeof(key)); - ir3_shader_variant(shader, key, false, debug); - } - return shader; -} - -/* a bit annoying that compute-shader and normal shader state objects - * aren't a bit more aligned. - */ -struct ir3_shader * -ir3_shader_create_compute(struct ir3_compiler *compiler, - const struct pipe_compute_state *cso, - struct pipe_debug_callback *debug) -{ - struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader); - - shader->compiler = compiler; - shader->id = ++shader->compiler->shader_count; - shader->type = SHADER_COMPUTE; - - nir_shader *nir; - if (cso->ir_type == PIPE_SHADER_IR_NIR) { - /* we take ownership of the reference: */ - nir = (nir_shader *)cso->prog; - - NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, - (nir_lower_io_options)0); - } else { - debug_assert(cso->ir_type == PIPE_SHADER_IR_TGSI); - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("dump tgsi: type=%d", shader->type); - tgsi_dump(cso->prog, 0); - } - nir = ir3_tgsi_to_nir(cso->prog); - } - - /* do first pass optimization, ignoring the key: */ - shader->nir = ir3_optimize_nir(shader, nir, NULL); - if (fd_mesa_debug & FD_DBG_DISASM) { - printf("dump nir%d: type=%d\n", shader->id, shader->type); - nir_print_shader(shader->nir, stdout); - } - - return shader; -} - -static void dump_reg(FILE *out, const char *name, uint32_t r) -{ - if (r != regid(63,0)) - fprintf(out, "; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]); -} - -static void dump_output(FILE *out, struct ir3_shader_variant *so, - unsigned slot, const char *name) -{ - uint32_t regid; - regid = ir3_find_output_regid(so, slot); - dump_reg(out, name, regid); -} - -void -ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) -{ - struct ir3 *ir = so->ir; - struct ir3_register *reg; - const char *type = ir3_shader_stage(so->shader); - uint8_t regid; - unsigned i; - - for (i = 0; i < ir->ninputs; i++) { - if (!ir->inputs[i]) { - fprintf(out, "; in%d unused\n", i); - continue; - } - reg = ir->inputs[i]->regs[0]; - regid = reg->num; - fprintf(out, "@in(%sr%d.%c)\tin%d\n", - (reg->flags & IR3_REG_HALF) ? "h" : "", - (regid >> 2), "xyzw"[regid & 0x3], i); - } - - for (i = 0; i < ir->noutputs; i++) { - if (!ir->outputs[i]) { - fprintf(out, "; out%d unused\n", i); - continue; - } - /* kill shows up as a virtual output.. skip it! */ - if (is_kill(ir->outputs[i])) - continue; - reg = ir->outputs[i]->regs[0]; - regid = reg->num; - fprintf(out, "@out(%sr%d.%c)\tout%d\n", - (reg->flags & IR3_REG_HALF) ? "h" : "", - (regid >> 2), "xyzw"[regid & 0x3], i); - } - - for (i = 0; i < so->immediates_count; i++) { - fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i); - fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", - so->immediates[i].val[0], - so->immediates[i].val[1], - so->immediates[i].val[2], - so->immediates[i].val[3]); - } - - disasm_a3xx(bin, so->info.sizedwords, 0, out); - - switch (so->type) { - case SHADER_VERTEX: - fprintf(out, "; %s: outputs:", type); - for (i = 0; i < so->outputs_count; i++) { - uint8_t regid = so->outputs[i].regid; - fprintf(out, " r%d.%c (%s)", - (regid >> 2), "xyzw"[regid & 0x3], - gl_varying_slot_name(so->outputs[i].slot)); - } - fprintf(out, "\n"); - fprintf(out, "; %s: inputs:", type); - for (i = 0; i < so->inputs_count; i++) { - uint8_t regid = so->inputs[i].regid; - fprintf(out, " r%d.%c (cm=%x,il=%u,b=%u)", - (regid >> 2), "xyzw"[regid & 0x3], - so->inputs[i].compmask, - so->inputs[i].inloc, - so->inputs[i].bary); - } - fprintf(out, "\n"); - break; - case SHADER_FRAGMENT: - fprintf(out, "; %s: outputs:", type); - for (i = 0; i < so->outputs_count; i++) { - uint8_t regid = so->outputs[i].regid; - fprintf(out, " r%d.%c (%s)", - (regid >> 2), "xyzw"[regid & 0x3], - gl_frag_result_name(so->outputs[i].slot)); - } - fprintf(out, "\n"); - fprintf(out, "; %s: inputs:", type); - for (i = 0; i < so->inputs_count; i++) { - uint8_t regid = so->inputs[i].regid; - fprintf(out, " r%d.%c (%s,cm=%x,il=%u,b=%u)", - (regid >> 2), "xyzw"[regid & 0x3], - gl_varying_slot_name(so->inputs[i].slot), - so->inputs[i].compmask, - so->inputs[i].inloc, - so->inputs[i].bary); - } - fprintf(out, "\n"); - break; - default: - /* TODO */ - break; - } - - /* print generic shader info: */ - fprintf(out, "; %s prog %d/%d: %u instructions, %d half, %d full\n", - type, so->shader->id, so->id, - so->info.instrs_count, - so->info.max_half_reg + 1, - so->info.max_reg + 1); - - fprintf(out, "; %d const, %u constlen\n", - so->info.max_const + 1, - so->constlen); - - fprintf(out, "; %u (ss), %u (sy)\n", so->info.ss, so->info.sy); - - /* print shader type specific info: */ - switch (so->type) { - case SHADER_VERTEX: - dump_output(out, so, VARYING_SLOT_POS, "pos"); - dump_output(out, so, VARYING_SLOT_PSIZ, "psize"); - break; - case SHADER_FRAGMENT: - dump_reg(out, "pos (bary)", - ir3_find_sysval_regid(so, SYSTEM_VALUE_VARYING_COORD)); - dump_output(out, so, FRAG_RESULT_DEPTH, "posz"); - if (so->color0_mrt) { - dump_output(out, so, FRAG_RESULT_COLOR, "color"); - } else { - dump_output(out, so, FRAG_RESULT_DATA0, "data0"); - dump_output(out, so, FRAG_RESULT_DATA1, "data1"); - dump_output(out, so, FRAG_RESULT_DATA2, "data2"); - dump_output(out, so, FRAG_RESULT_DATA3, "data3"); - dump_output(out, so, FRAG_RESULT_DATA4, "data4"); - dump_output(out, so, FRAG_RESULT_DATA5, "data5"); - dump_output(out, so, FRAG_RESULT_DATA6, "data6"); - dump_output(out, so, FRAG_RESULT_DATA7, "data7"); - } - /* these two are hard-coded since we don't know how to - * program them to anything but all 0's... - */ - if (so->frag_coord) - fprintf(out, "; fragcoord: r0.x\n"); - if (so->frag_face) - fprintf(out, "; fragface: hr0.x\n"); - break; - default: - /* TODO */ - break; - } - - fprintf(out, "\n"); -} - -uint64_t -ir3_shader_outputs(const struct ir3_shader *so) -{ - return so->nir->info.outputs_written; -} - -/* This has to reach into the fd_context a bit more than the rest of - * ir3, but it needs to be aligned with the compiler, so both agree - * on which const regs hold what. And the logic is identical between - * a3xx/a4xx, the only difference is small details in the actual - * CP_LOAD_STATE packets (which is handled inside the generation - * specific ctx->emit_const(_bo)() fxns) - */ - -#include "freedreno_resource.h" - -static inline bool -is_stateobj(struct fd_ringbuffer *ring) -{ - /* XXX this is an ugly way to differentiate.. */ - return !!(ring->flags & FD_RINGBUFFER_STREAMING); -} - -static inline void -ring_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) -{ - /* when we emit const state via ring (IB2) we need a WFI, but when - * it is emit'd via stateobj, we don't - */ - if (is_stateobj(ring)) - return; - - fd_wfi(batch, ring); -} - -static void -emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v, - struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) -{ - const unsigned index = 0; /* user consts are index 0 */ - - if (constbuf->enabled_mask & (1 << index)) { - struct pipe_constant_buffer *cb = &constbuf->cb[index]; - unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ - - /* in particular, with binning shader we may end up with - * unused consts, ie. we could end up w/ constlen that is - * smaller than first_driver_param. In that case truncate - * the user consts early to avoid HLSQ lockup caused by - * writing too many consts - */ - uint32_t max_const = MIN2(v->num_uniforms, v->constlen); - - // I expect that size should be a multiple of vec4's: - assert(size == align(size, 4)); - - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, 4 * max_const); - - if (size > 0) { - ring_wfi(ctx->batch, ring); - ctx->emit_const(ring, v->type, 0, - cb->buffer_offset, size, - cb->user_buffer, cb->buffer); - } - } -} - -static void -emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, - struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) -{ - uint32_t offset = v->constbase.ubo; - if (v->constlen > offset) { - uint32_t params = v->num_ubos; - uint32_t offsets[params]; - struct pipe_resource *prscs[params]; - - for (uint32_t i = 0; i < params; i++) { - const uint32_t index = i + 1; /* UBOs start at index 1 */ - struct pipe_constant_buffer *cb = &constbuf->cb[index]; - assert(!cb->user_buffer); - - if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) { - offsets[i] = cb->buffer_offset; - prscs[i] = cb->buffer; - } else { - offsets[i] = 0; - prscs[i] = NULL; - } - } - - ring_wfi(ctx->batch, ring); - ctx->emit_const_bo(ring, v->type, false, offset * 4, params, prscs, offsets); - } -} - -static void -emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v, - struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb) -{ - uint32_t offset = v->constbase.ssbo_sizes; - if (v->constlen > offset) { - uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)]; - unsigned mask = v->const_layout.ssbo_size.mask; - - while (mask) { - unsigned index = u_bit_scan(&mask); - unsigned off = v->const_layout.ssbo_size.off[index]; - sizes[off] = sb->sb[index].buffer_size; - } - - ring_wfi(ctx->batch, ring); - ctx->emit_const(ring, v->type, offset * 4, - 0, ARRAY_SIZE(sizes), sizes, NULL); - } -} - -static void -emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v, - struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si) -{ - uint32_t offset = v->constbase.image_dims; - if (v->constlen > offset) { - uint32_t dims[align(v->const_layout.image_dims.count, 4)]; - unsigned mask = v->const_layout.image_dims.mask; - - while (mask) { - struct pipe_image_view *img; - struct fd_resource *rsc; - unsigned index = u_bit_scan(&mask); - unsigned off = v->const_layout.image_dims.off[index]; - - img = &si->si[index]; - rsc = fd_resource(img->resource); - - dims[off + 0] = util_format_get_blocksize(img->format); - if (img->resource->target != PIPE_BUFFER) { - unsigned lvl = img->u.tex.level; - /* note for 2d/cube/etc images, even if re-interpreted - * as a different color format, the pixel size should - * be the same, so use original dimensions for y and z - * stride: - */ - dims[off + 1] = rsc->slices[lvl].pitch * rsc->cpp; - /* see corresponding logic in fd_resource_offset(): */ - if (rsc->layer_first) { - dims[off + 2] = rsc->layer_size; - } else { - dims[off + 2] = rsc->slices[lvl].size0; - } - } else { - /* For buffer-backed images, the log2 of the format's - * bytes-per-pixel is placed on the 2nd slot. This is useful - * when emitting image_size instructions, for which we need - * to divide by bpp for image buffers. Since the bpp - * can only be power-of-two, the division is implemented - * as a SHR, and for that it is handy to have the log2 of - * bpp as a constant. (log2 = first-set-bit - 1) - */ - dims[off + 1] = ffs(dims[off + 0]) - 1; - } - } - - ring_wfi(ctx->batch, ring); - ctx->emit_const(ring, v->type, offset * 4, - 0, ARRAY_SIZE(dims), dims, NULL); - } -} - -static void -emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v, - struct fd_ringbuffer *ring) -{ - int size = v->immediates_count; - uint32_t base = v->constbase.immediate; - - /* truncate size to avoid writing constants that shader - * does not use: - */ - size = MIN2(size + base, v->constlen) - base; - - /* convert out of vec4: */ - base *= 4; - size *= 4; - - if (size > 0) { - ring_wfi(ctx->batch, ring); - ctx->emit_const(ring, v->type, base, - 0, size, v->immediates[0].val, NULL); - } -} - -/* emit stream-out buffers: */ -static void -emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, - struct fd_ringbuffer *ring) -{ - /* streamout addresses after driver-params: */ - uint32_t offset = v->constbase.tfbo; - if (v->constlen > offset) { - struct fd_streamout_stateobj *so = &ctx->streamout; - struct pipe_stream_output_info *info = &v->shader->stream_output; - uint32_t params = 4; - uint32_t offsets[params]; - struct pipe_resource *prscs[params]; - - for (uint32_t i = 0; i < params; i++) { - struct pipe_stream_output_target *target = so->targets[i]; - - if (target) { - offsets[i] = (so->offsets[i] * info->stride[i] * 4) + - target->buffer_offset; - prscs[i] = target->buffer; - } else { - offsets[i] = 0; - prscs[i] = NULL; - } - } - - ring_wfi(ctx->batch, ring); - ctx->emit_const_bo(ring, v->type, true, offset * 4, params, prscs, offsets); - } -} - -static uint32_t -max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v) -{ - struct fd_streamout_stateobj *so = &ctx->streamout; - struct pipe_stream_output_info *info = &v->shader->stream_output; - uint32_t maxvtxcnt = 0x7fffffff; - - if (ctx->screen->gpu_id >= 500) - return 0; - if (v->binning_pass) - return 0; - if (v->shader->stream_output.num_outputs == 0) - return 0; - if (so->num_targets == 0) - return 0; - - /* offset to write to is: - * - * total_vtxcnt = vtxcnt + offsets[i] - * offset = total_vtxcnt * stride[i] - * - * offset = vtxcnt * stride[i] ; calculated in shader - * + offsets[i] * stride[i] ; calculated at emit_tfbos() - * - * assuming for each vtx, each target buffer will have data written - * up to 'offset + stride[i]', that leaves maxvtxcnt as: - * - * buffer_size = (maxvtxcnt * stride[i]) + stride[i] - * maxvtxcnt = (buffer_size - stride[i]) / stride[i] - * - * but shader is actually doing a less-than (rather than less-than- - * equal) check, so we can drop the -stride[i]. - * - * TODO is assumption about `offset + stride[i]` legit? - */ - for (unsigned i = 0; i < so->num_targets; i++) { - struct pipe_stream_output_target *target = so->targets[i]; - unsigned stride = info->stride[i] * 4; /* convert dwords->bytes */ - if (target) { - uint32_t max = target->buffer_size / stride; - maxvtxcnt = MIN2(maxvtxcnt, max); - } - } - - return maxvtxcnt; -} - -static void -emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, - struct fd_context *ctx, enum pipe_shader_type t) -{ - enum fd_dirty_shader_state dirty = ctx->dirty_shader[t]; - - /* When we use CP_SET_DRAW_STATE objects to emit constant state, - * if we emit any of it we need to emit all. This is because - * we are using the same state-group-id each time for uniform - * state, and if previous update is never evaluated (due to no - * visible primitives in the current tile) then the new stateobj - * completely replaces the old one. - * - * Possibly if we split up different parts of the const state to - * different state-objects we could avoid this. - */ - if (dirty && is_stateobj(ring)) - dirty = ~0; - - if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST)) { - struct fd_constbuf_stateobj *constbuf; - bool shader_dirty; - - constbuf = &ctx->constbuf[t]; - shader_dirty = !!(dirty & FD_DIRTY_SHADER_PROG); - - emit_user_consts(ctx, v, ring, constbuf); - emit_ubos(ctx, v, ring, constbuf); - if (shader_dirty) - emit_immediates(ctx, v, ring); - } - - if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) { - struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t]; - emit_ssbo_sizes(ctx, v, ring, sb); - } - - if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) { - struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t]; - emit_image_dims(ctx, v, ring, si); - } -} - -void -ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, - struct fd_context *ctx, const struct pipe_draw_info *info) -{ - debug_assert(v->type == SHADER_VERTEX); - - emit_common_consts(v, ring, ctx, PIPE_SHADER_VERTEX); - - /* emit driver params every time: */ - /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ - if (info) { - uint32_t offset = v->constbase.driver_param; - if (v->constlen > offset) { - uint32_t vertex_params[IR3_DP_VS_COUNT] = { - [IR3_DP_VTXID_BASE] = info->index_size ? - info->index_bias : info->start, - [IR3_DP_VTXCNT_MAX] = max_tf_vtx(ctx, v), - }; - /* if no user-clip-planes, we don't need to emit the - * entire thing: - */ - uint32_t vertex_params_size = 4; - - if (v->key.ucp_enables) { - struct pipe_clip_state *ucp = &ctx->ucp; - unsigned pos = IR3_DP_UCP0_X; - for (unsigned i = 0; pos <= IR3_DP_UCP7_W; i++) { - for (unsigned j = 0; j < 4; j++) { - vertex_params[pos] = fui(ucp->ucp[i][j]); - pos++; - } - } - vertex_params_size = ARRAY_SIZE(vertex_params); - } - - ring_wfi(ctx->batch, ring); - - bool needs_vtxid_base = - ir3_find_sysval_regid(v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) != regid(63, 0); - - /* for indirect draw, we need to copy VTXID_BASE from - * indirect-draw parameters buffer.. which is annoying - * and means we can't easily emit these consts in cmd - * stream so need to copy them to bo. - */ - if (info->indirect && needs_vtxid_base) { - struct pipe_draw_indirect_info *indirect = info->indirect; - struct pipe_resource *vertex_params_rsc = - pipe_buffer_create(&ctx->screen->base, - PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM, - vertex_params_size * 4); - unsigned src_off = info->indirect->offset;; - void *ptr; - - ptr = fd_bo_map(fd_resource(vertex_params_rsc)->bo); - memcpy(ptr, vertex_params, vertex_params_size * 4); - - if (info->index_size) { - /* indexed draw, index_bias is 4th field: */ - src_off += 3 * 4; - } else { - /* non-indexed draw, start is 3rd field: */ - src_off += 2 * 4; - } - - /* copy index_bias or start from draw params: */ - ctx->mem_to_mem(ring, vertex_params_rsc, 0, - indirect->buffer, src_off, 1); - - ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0, - vertex_params_size, NULL, vertex_params_rsc); - - pipe_resource_reference(&vertex_params_rsc, NULL); - } else { - ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0, - vertex_params_size, vertex_params, NULL); - } - - /* if needed, emit stream-out buffer addresses: */ - if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) { - emit_tfbos(ctx, v, ring); - } - } - } -} - -void -ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, - struct fd_context *ctx) -{ - debug_assert(v->type == SHADER_FRAGMENT); - - emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT); -} - -/* emit compute-shader consts: */ -void -ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, - struct fd_context *ctx, const struct pipe_grid_info *info) -{ - debug_assert(v->type == SHADER_COMPUTE); - - emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE); - - /* emit compute-shader driver-params: */ - uint32_t offset = v->constbase.driver_param; - if (v->constlen > offset) { - ring_wfi(ctx->batch, ring); - - if (info->indirect) { - struct pipe_resource *indirect = NULL; - unsigned indirect_offset; - - /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs - * to be aligned more strongly than 4 bytes. So in this case - * we need a temporary buffer to copy NumWorkGroups.xyz to. - * - * TODO if previous compute job is writing to info->indirect, - * we might need a WFI.. but since we currently flush for each - * compute job, we are probably ok for now. - */ - if (info->indirect_offset & 0xf) { - indirect = pipe_buffer_create(&ctx->screen->base, - PIPE_BIND_COMMAND_ARGS_BUFFER, PIPE_USAGE_STREAM, - 0x1000); - indirect_offset = 0; - - ctx->mem_to_mem(ring, indirect, 0, info->indirect, - info->indirect_offset, 3); - } else { - pipe_resource_reference(&indirect, info->indirect); - indirect_offset = info->indirect_offset; - } - - ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, - indirect_offset, 4, NULL, indirect); - - pipe_resource_reference(&indirect, NULL); - } else { - uint32_t compute_params[IR3_DP_CS_COUNT] = { - [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0], - [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1], - [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2], - [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0], - [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1], - [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2], - }; - - ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, 0, - ARRAY_SIZE(compute_params), compute_params, NULL); - } - } -} diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.h mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_shader.h --- mesa-18.3.3/src/gallium/drivers/freedreno/ir3/ir3_shader.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/ir3/ir3_shader.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,538 +0,0 @@ -/* - * Copyright (C) 2014 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#ifndef IR3_SHADER_H_ -#define IR3_SHADER_H_ - -#include "pipe/p_state.h" -#include "compiler/shader_enums.h" -#include "util/bitscan.h" - -#include "ir3.h" -#include "disasm.h" - -struct glsl_type; - -/* driver param indices: */ -enum ir3_driver_param { - /* compute shader driver params: */ - IR3_DP_NUM_WORK_GROUPS_X = 0, - IR3_DP_NUM_WORK_GROUPS_Y = 1, - IR3_DP_NUM_WORK_GROUPS_Z = 2, - IR3_DP_LOCAL_GROUP_SIZE_X = 4, - IR3_DP_LOCAL_GROUP_SIZE_Y = 5, - IR3_DP_LOCAL_GROUP_SIZE_Z = 6, - /* NOTE: gl_NumWorkGroups should be vec4 aligned because - * glDispatchComputeIndirect() needs to load these from - * the info->indirect buffer. Keep that in mind when/if - * adding any addition CS driver params. - */ - IR3_DP_CS_COUNT = 8, /* must be aligned to vec4 */ - - /* vertex shader driver params: */ - IR3_DP_VTXID_BASE = 0, - IR3_DP_VTXCNT_MAX = 1, - /* user-clip-plane components, up to 8x vec4's: */ - IR3_DP_UCP0_X = 4, - /* .... */ - IR3_DP_UCP7_W = 35, - IR3_DP_VS_COUNT = 36 /* must be aligned to vec4 */ -}; - -/** - * For consts needed to pass internal values to shader which may or may not - * be required, rather than allocating worst-case const space, we scan the - * shader and allocate consts as-needed: - * - * + SSBO sizes: only needed if shader has a get_buffer_size intrinsic - * for a given SSBO - * - * + Image dimensions: needed to calculate pixel offset, but only for - * images that have a image_store intrinsic - */ -struct ir3_driver_const_layout { - struct { - uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */ - uint32_t count; /* number of consts allocated */ - /* one const allocated per SSBO which has get_buffer_size, - * ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes - * consts: - */ - uint32_t off[PIPE_MAX_SHADER_BUFFERS]; - } ssbo_size; - - struct { - uint32_t mask; /* bitmask of images that have image_store */ - uint32_t count; /* number of consts allocated */ - /* three const allocated per image which has image_store: - * + cpp (bytes per pixel) - * + pitch (y pitch) - * + array_pitch (z pitch) - */ - uint32_t off[PIPE_MAX_SHADER_IMAGES]; - } image_dims; -}; - -/* Configuration key used to identify a shader variant.. different - * shader variants can be used to implement features not supported - * in hw (two sided color), binning-pass vertex shader, etc. - */ -struct ir3_shader_key { - union { - struct { - /* - * Combined Vertex/Fragment shader parameters: - */ - unsigned ucp_enables : 8; - - /* do we need to check {v,f}saturate_{s,t,r}? */ - unsigned has_per_samp : 1; - - /* - * Vertex shader variant parameters: - */ - unsigned vclamp_color : 1; - - /* - * Fragment shader variant parameters: - */ - unsigned color_two_side : 1; - unsigned half_precision : 1; - /* used when shader needs to handle flat varyings (a4xx) - * for front/back color inputs to frag shader: - */ - unsigned rasterflat : 1; - unsigned fclamp_color : 1; - }; - uint32_t global; - }; - - /* bitmask of sampler which needs coords clamped for vertex - * shader: - */ - uint16_t vsaturate_s, vsaturate_t, vsaturate_r; - - /* bitmask of sampler which needs coords clamped for frag - * shader: - */ - uint16_t fsaturate_s, fsaturate_t, fsaturate_r; - - /* bitmask of ms shifts */ - uint32_t vsamples, fsamples; - - /* bitmask of samplers which need astc srgb workaround: */ - uint16_t vastc_srgb, fastc_srgb; -}; - -static inline bool -ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b) -{ - /* slow-path if we need to check {v,f}saturate_{s,t,r} */ - if (a->has_per_samp || b->has_per_samp) - return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0; - return a->global == b->global; -} - -/* will the two keys produce different lowering for a fragment shader? */ -static inline bool -ir3_shader_key_changes_fs(struct ir3_shader_key *key, struct ir3_shader_key *last_key) -{ - if (last_key->has_per_samp || key->has_per_samp) { - if ((last_key->fsaturate_s != key->fsaturate_s) || - (last_key->fsaturate_t != key->fsaturate_t) || - (last_key->fsaturate_r != key->fsaturate_r) || - (last_key->fsamples != key->fsamples) || - (last_key->fastc_srgb != key->fastc_srgb)) - return true; - } - - if (last_key->fclamp_color != key->fclamp_color) - return true; - - if (last_key->color_two_side != key->color_two_side) - return true; - - if (last_key->half_precision != key->half_precision) - return true; - - if (last_key->rasterflat != key->rasterflat) - return true; - - if (last_key->ucp_enables != key->ucp_enables) - return true; - - return false; -} - -/* will the two keys produce different lowering for a vertex shader? */ -static inline bool -ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *last_key) -{ - if (last_key->has_per_samp || key->has_per_samp) { - if ((last_key->vsaturate_s != key->vsaturate_s) || - (last_key->vsaturate_t != key->vsaturate_t) || - (last_key->vsaturate_r != key->vsaturate_r) || - (last_key->vsamples != key->vsamples) || - (last_key->vastc_srgb != key->vastc_srgb)) - return true; - } - - if (last_key->vclamp_color != key->vclamp_color) - return true; - - if (last_key->ucp_enables != key->ucp_enables) - return true; - - return false; -} - -struct ir3_shader_variant { - struct fd_bo *bo; - - /* variant id (for debug) */ - uint32_t id; - - struct ir3_shader_key key; - - /* vertex shaders can have an extra version for hwbinning pass, - * which is pointed to by so->binning: - */ - bool binning_pass; - struct ir3_shader_variant *binning; - - struct ir3_driver_const_layout const_layout; - struct ir3_info info; - struct ir3 *ir; - - /* the instructions length is in units of instruction groups - * (4 instructions for a3xx, 16 instructions for a4xx.. each - * instruction is 2 dwords): - */ - unsigned instrlen; - - /* the constants length is in units of vec4's, and is the sum of - * the uniforms and the built-in compiler constants - */ - unsigned constlen; - - /* number of uniforms (in vec4), not including built-in compiler - * constants, etc. - */ - unsigned num_uniforms; - - unsigned num_ubos; - - /* About Linkage: - * + Let the frag shader determine the position/compmask for the - * varyings, since it is the place where we know if the varying - * is actually used, and if so, which components are used. So - * what the hw calls "outloc" is taken from the "inloc" of the - * frag shader. - * + From the vert shader, we only need the output regid - */ - - bool frag_coord, frag_face, color0_mrt; - - /* NOTE: for input/outputs, slot is: - * gl_vert_attrib - for VS inputs - * gl_varying_slot - for VS output / FS input - * gl_frag_result - for FS output - */ - - /* varyings/outputs: */ - unsigned outputs_count; - struct { - uint8_t slot; - uint8_t regid; - } outputs[16 + 2]; /* +POSITION +PSIZE */ - bool writes_pos, writes_psize; - - /* attributes (VS) / varyings (FS): - * Note that sysval's should come *after* normal inputs. - */ - unsigned inputs_count; - struct { - uint8_t slot; - uint8_t regid; - uint8_t compmask; - uint8_t ncomp; - /* location of input (ie. offset passed to bary.f, etc). This - * matches the SP_VS_VPC_DST_REG.OUTLOCn value (a3xx and a4xx - * have the OUTLOCn value offset by 8, presumably to account - * for gl_Position/gl_PointSize) - */ - uint8_t inloc; - /* vertex shader specific: */ - bool sysval : 1; /* slot is a gl_system_value */ - /* fragment shader specific: */ - bool bary : 1; /* fetched varying (vs one loaded into reg) */ - bool rasterflat : 1; /* special handling for emit->rasterflat */ - enum glsl_interp_mode interpolate; - } inputs[16 + 2]; /* +POSITION +FACE */ - - /* sum of input components (scalar). For frag shaders, it only counts - * the varying inputs: - */ - unsigned total_in; - - /* For frag shaders, the total number of inputs (not scalar, - * ie. SP_VS_PARAM_REG.TOTALVSOUTVAR) - */ - unsigned varying_in; - - /* number of samplers/textures (which are currently 1:1): */ - int num_samp; - - /* do we have one or more SSBO instructions: */ - bool has_ssbo; - - /* do we have kill instructions: */ - bool has_kill; - - /* Layout of constant registers, each section (in vec4). Pointer size - * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the - * UBO and stream-out consts. - */ - struct { - /* user const start at zero */ - unsigned ubo; - /* NOTE that a3xx might need a section for SSBO addresses too */ - unsigned ssbo_sizes; - unsigned image_dims; - unsigned driver_param; - unsigned tfbo; - unsigned immediate; - } constbase; - - unsigned immediates_count; - unsigned immediates_size; - struct { - uint32_t val[4]; - } *immediates; - - /* for astc srgb workaround, the number/base of additional - * alpha tex states we need, and index of original tex states - */ - struct { - unsigned base, count; - unsigned orig_idx[16]; - } astc_srgb; - - /* shader variants form a linked list: */ - struct ir3_shader_variant *next; - - /* replicated here to avoid passing extra ptrs everywhere: */ - enum shader_t type; - struct ir3_shader *shader; -}; - -struct ir3_shader { - enum shader_t type; - - /* shader id (for debug): */ - uint32_t id; - uint32_t variant_count; - - /* so we know when we can disable TGSI related hacks: */ - bool from_tgsi; - - struct ir3_compiler *compiler; - - struct nir_shader *nir; - struct pipe_stream_output_info stream_output; - - struct ir3_shader_variant *variants; -}; - -void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id); - -struct ir3_shader * ir3_shader_create(struct ir3_compiler *compiler, - const struct pipe_shader_state *cso, enum shader_t type, - struct pipe_debug_callback *debug); -struct ir3_shader * -ir3_shader_create_compute(struct ir3_compiler *compiler, - const struct pipe_compute_state *cso, - struct pipe_debug_callback *debug); -void ir3_shader_destroy(struct ir3_shader *shader); -struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, - struct ir3_shader_key key, bool binning_pass, - struct pipe_debug_callback *debug); -void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out); -uint64_t ir3_shader_outputs(const struct ir3_shader *so); - -struct fd_ringbuffer; -struct fd_context; -void ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, - struct fd_context *ctx, const struct pipe_draw_info *info); -void ir3_emit_fs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, - struct fd_context *ctx); -void ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, - struct fd_context *ctx, const struct pipe_grid_info *info); - -int -ir3_glsl_type_size(const struct glsl_type *type); - -static inline const char * -ir3_shader_stage(struct ir3_shader *shader) -{ - switch (shader->type) { - case SHADER_VERTEX: return "VERT"; - case SHADER_FRAGMENT: return "FRAG"; - case SHADER_COMPUTE: return "CL"; - default: - unreachable("invalid type"); - return NULL; - } -} - -/* - * Helper/util: - */ - -#include "pipe/p_shader_tokens.h" - -static inline int -ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot) -{ - int j; - - for (j = 0; j < so->outputs_count; j++) - if (so->outputs[j].slot == slot) - return j; - - /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n] - * in the vertex shader.. but the fragment shader doesn't know this - * so it will always have both IN.COLOR[n] and IN.BCOLOR[n]. So - * at link time if there is no matching OUT.BCOLOR[n], we must map - * OUT.COLOR[n] to IN.BCOLOR[n]. And visa versa if there is only - * a OUT.BCOLOR[n] but no matching OUT.COLOR[n] - */ - if (slot == VARYING_SLOT_BFC0) { - slot = VARYING_SLOT_COL0; - } else if (slot == VARYING_SLOT_BFC1) { - slot = VARYING_SLOT_COL1; - } else if (slot == VARYING_SLOT_COL0) { - slot = VARYING_SLOT_BFC0; - } else if (slot == VARYING_SLOT_COL1) { - slot = VARYING_SLOT_BFC1; - } else { - return 0; - } - - for (j = 0; j < so->outputs_count; j++) - if (so->outputs[j].slot == slot) - return j; - - debug_assert(0); - - return 0; -} - -static inline int -ir3_next_varying(const struct ir3_shader_variant *so, int i) -{ - while (++i < so->inputs_count) - if (so->inputs[i].compmask && so->inputs[i].bary) - break; - return i; -} - -struct ir3_shader_linkage { - uint8_t max_loc; - uint8_t cnt; - struct { - uint8_t regid; - uint8_t compmask; - uint8_t loc; - } var[32]; -}; - -static inline void -ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid, uint8_t compmask, uint8_t loc) -{ - int i = l->cnt++; - - debug_assert(i < ARRAY_SIZE(l->var)); - - l->var[i].regid = regid; - l->var[i].compmask = compmask; - l->var[i].loc = loc; - l->max_loc = MAX2(l->max_loc, loc + util_last_bit(compmask)); -} - -static inline void -ir3_link_shaders(struct ir3_shader_linkage *l, - const struct ir3_shader_variant *vs, - const struct ir3_shader_variant *fs) -{ - int j = -1, k; - - while (l->cnt < ARRAY_SIZE(l->var)) { - j = ir3_next_varying(fs, j); - - if (j >= fs->inputs_count) - break; - - if (fs->inputs[j].inloc >= fs->total_in) - continue; - - k = ir3_find_output(vs, fs->inputs[j].slot); - - ir3_link_add(l, vs->outputs[k].regid, - fs->inputs[j].compmask, fs->inputs[j].inloc); - } -} - -static inline uint32_t -ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot) -{ - int j; - for (j = 0; j < so->outputs_count; j++) - if (so->outputs[j].slot == slot) - return so->outputs[j].regid; - return regid(63, 0); -} - -static inline uint32_t -ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot) -{ - int j; - for (j = 0; j < so->inputs_count; j++) - if (so->inputs[j].sysval && (so->inputs[j].slot == slot)) - return so->inputs[j].regid; - return regid(63, 0); -} - -/* calculate register footprint in terms of half-regs (ie. one full - * reg counts as two half-regs). - */ -static inline uint32_t -ir3_shader_halfregs(const struct ir3_shader_variant *v) -{ - return (2 * (v->info.max_reg + 1)) + (v->info.max_half_reg + 1); -} - -#endif /* IR3_SHADER_H_ */ diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/Makefile.am mesa-19.0.1/src/gallium/drivers/freedreno/Makefile.am --- mesa-18.3.3/src/gallium/drivers/freedreno/Makefile.am 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -3,50 +3,26 @@ AM_CFLAGS = \ -Wno-packed-bitfield-compat \ + -I$(top_srcdir)/include/drm-uapi \ -I$(top_srcdir)/src/gallium/drivers/freedreno/ir3 \ + -I$(top_srcdir)/src/freedreno \ + -I$(top_srcdir)/src/freedreno/registers \ -I$(top_builddir)/src/compiler/nir \ -I$(top_srcdir)/src/compiler/nir \ - $(GALLIUM_DRIVER_CFLAGS) \ $(LIBDRM_CFLAGS) \ - $(VALGRIND_CFLAGS) - -MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) -ir3/ir3_nir_trig.c: ir3/ir3_nir_trig.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py - $(MKDIR_GEN) - $(AM_V_GEN) $(PYTHON) $(PYTHON_FLAGS) $(srcdir)/ir3/ir3_nir_trig.py -p $(top_srcdir)/src/compiler/nir > $@ || ($(RM) $@; false) + $(GALLIUM_DRIVER_CFLAGS) noinst_LTLIBRARIES = libfreedreno.la libfreedreno_la_SOURCES = \ $(C_SOURCES) \ - $(drm_SOURCES) \ $(a2xx_SOURCES) \ $(a3xx_SOURCES) \ $(a4xx_SOURCES) \ $(a5xx_SOURCES) \ $(a6xx_SOURCES) \ - $(ir3_SOURCES) \ - $(ir3_GENERATED_FILES) - -BUILT_SOURCES := $(ir3_GENERATED_FILES) -CLEANFILES := $(BUILT_SOURCES) -EXTRA_DIST = ir3/ir3_nir_trig.py - -noinst_PROGRAMS = ir3_compiler - -# XXX: Required due to the C++ sources in libnir -nodist_EXTRA_ir3_compiler_SOURCES = dummy.cpp -ir3_compiler_SOURCES = \ - ir3/ir3_cmdline.c - -ir3_compiler_LDADD = \ - libfreedreno.la \ - $(top_builddir)/src/gallium/auxiliary/libgallium.la \ - $(top_builddir)/src/compiler/nir/libnir.la \ - $(top_builddir)/src/compiler/glsl/libstandalone.la \ - $(top_builddir)/src/util/libmesautil.la \ - $(top_builddir)/src/mesa/libmesagallium.la \ - $(GALLIUM_COMMON_LIB_DEPS) \ - $(LIBDRM_LIBS) + $(ir3_SOURCES) -EXTRA_DIST += meson.build +EXTRA_DIST = \ + ir3/ir3_cmdline.c \ + meson.build diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/Makefile.sources mesa-19.0.1/src/gallium/drivers/freedreno/Makefile.sources --- mesa-18.3.3/src/gallium/drivers/freedreno/Makefile.sources 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -1,6 +1,4 @@ C_SOURCES := \ - adreno_common.xml.h \ - adreno_pm4.xml.h \ disasm.h \ freedreno_batch.c \ freedreno_batch.h \ @@ -40,30 +38,10 @@ freedreno_util.c \ freedreno_util.h -drm_SOURCES := \ - drm/freedreno_bo.c \ - drm/freedreno_bo_cache.c \ - drm/freedreno_device.c \ - drm/freedreno_drmif.h \ - drm/freedreno_pipe.c \ - drm/freedreno_priv.h \ - drm/freedreno_ringbuffer.c \ - drm/freedreno_ringbuffer.h \ - drm/msm_bo.c \ - drm/msm_device.c \ - drm/msm_drm.h \ - drm/msm_pipe.c \ - drm/msm_priv.h \ - drm/msm_ringbuffer.c \ - drm/msm_ringbuffer_sp.c - a2xx_SOURCES := \ - a2xx/a2xx.xml.h \ a2xx/disasm-a2xx.c \ a2xx/fd2_blend.c \ a2xx/fd2_blend.h \ - a2xx/fd2_compiler.c \ - a2xx/fd2_compiler.h \ a2xx/fd2_context.c \ a2xx/fd2_context.h \ a2xx/fd2_draw.c \ @@ -72,10 +50,15 @@ a2xx/fd2_emit.h \ a2xx/fd2_gmem.c \ a2xx/fd2_gmem.h \ + a2xx/fd2_perfcntr.c \ a2xx/fd2_program.c \ a2xx/fd2_program.h \ + a2xx/fd2_query.c \ + a2xx/fd2_query.h \ a2xx/fd2_rasterizer.c \ a2xx/fd2_rasterizer.h \ + a2xx/fd2_resource.c \ + a2xx/fd2_resource.h \ a2xx/fd2_screen.c \ a2xx/fd2_screen.h \ a2xx/fd2_texture.c \ @@ -85,11 +68,16 @@ a2xx/fd2_zsa.c \ a2xx/fd2_zsa.h \ a2xx/instr-a2xx.h \ - a2xx/ir-a2xx.c \ - a2xx/ir-a2xx.h + a2xx/ir2.c \ + a2xx/ir2.h \ + a2xx/ir2_assemble.c \ + a2xx/ir2_cp.c \ + a2xx/ir2_nir.c \ + a2xx/ir2_nir_lower_scalar.c \ + a2xx/ir2_private.h \ + a2xx/ir2_ra.c a3xx_SOURCES := \ - a3xx/a3xx.xml.h \ a3xx/fd3_blend.c \ a3xx/fd3_blend.h \ a3xx/fd3_context.c \ @@ -116,7 +104,6 @@ a3xx/fd3_zsa.h a4xx_SOURCES := \ - a4xx/a4xx.xml.h \ a4xx/fd4_blend.c \ a4xx/fd4_blend.h \ a4xx/fd4_context.c \ @@ -143,7 +130,6 @@ a4xx/fd4_zsa.h a5xx_SOURCES := \ - a5xx/a5xx.xml.h \ a5xx/fd5_blend.c \ a5xx/fd5_blend.h \ a5xx/fd5_blitter.c \ @@ -179,7 +165,6 @@ a5xx/fd5_zsa.h a6xx_SOURCES := \ - a6xx/a6xx.xml.h \ a6xx/fd6_blend.c \ a6xx/fd6_blend.h \ a6xx/fd6_blitter.c \ @@ -212,27 +197,8 @@ a6xx/fd6_zsa.h ir3_SOURCES := \ - ir3/disasm-a3xx.c \ - ir3/instr-a3xx.h \ - ir3/ir3.c \ ir3/ir3_cache.c \ ir3/ir3_cache.h \ - ir3/ir3_compiler_nir.c \ - ir3/ir3_compiler.c \ - ir3/ir3_compiler.h \ - ir3/ir3_cp.c \ - ir3/ir3_depth.c \ - ir3/ir3_group.c \ - ir3/ir3.h \ - ir3/ir3_legalize.c \ - ir3/ir3_nir.c \ - ir3/ir3_nir.h \ - ir3/ir3_nir_lower_tg4_to_tex.c \ - ir3/ir3_print.c \ - ir3/ir3_ra.c \ - ir3/ir3_sched.c \ - ir3/ir3_shader.c \ - ir3/ir3_shader.h + ir3/ir3_gallium.c \ + ir3/ir3_gallium.h -ir3_GENERATED_FILES := \ - ir3/ir3_nir_trig.c diff -Nru mesa-18.3.3/src/gallium/drivers/freedreno/meson.build mesa-19.0.1/src/gallium/drivers/freedreno/meson.build --- mesa-18.3.3/src/gallium/drivers/freedreno/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/freedreno/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -18,21 +18,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -ir3_nir_trig_c = custom_target( - 'ir3_nir_trig.c', - input : 'ir3/ir3_nir_trig.py', - output : 'ir3_nir_trig.c', - command : [ - prog_python, '@INPUT@', - '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), - ], - capture : true, - depend_files : nir_algebraic_py, -) - files_libfreedreno = files( - 'adreno_common.xml.h', - 'adreno_pm4.xml.h', 'disasm.h', 'freedreno_batch.c', 'freedreno_batch.h', @@ -71,27 +57,9 @@ 'freedreno_texture.h', 'freedreno_util.c', 'freedreno_util.h', - 'drm/freedreno_bo.c', - 'drm/freedreno_bo_cache.c', - 'drm/freedreno_device.c', - 'drm/freedreno_drmif.h', - 'drm/freedreno_pipe.c', - 'drm/freedreno_priv.h', - 'drm/freedreno_ringbuffer.c', - 'drm/freedreno_ringbuffer.h', - 'drm/msm_bo.c', - 'drm/msm_device.c', - 'drm/msm_drm.h', - 'drm/msm_pipe.c', - 'drm/msm_priv.h', - 'drm/msm_ringbuffer.c', - 'drm/msm_ringbuffer_sp.c', - 'a2xx/a2xx.xml.h', 'a2xx/disasm-a2xx.c', 'a2xx/fd2_blend.c', 'a2xx/fd2_blend.h', - 'a2xx/fd2_compiler.c', - 'a2xx/fd2_compiler.h', 'a2xx/fd2_context.c', 'a2xx/fd2_context.h', 'a2xx/fd2_draw.c', @@ -100,10 +68,15 @@ 'a2xx/fd2_emit.h', 'a2xx/fd2_gmem.c', 'a2xx/fd2_gmem.h', + 'a2xx/fd2_perfcntr.c', 'a2xx/fd2_program.c', 'a2xx/fd2_program.h', + 'a2xx/fd2_query.c', + 'a2xx/fd2_query.h', 'a2xx/fd2_rasterizer.c', 'a2xx/fd2_rasterizer.h', + 'a2xx/fd2_resource.c', + 'a2xx/fd2_resource.h', 'a2xx/fd2_screen.c', 'a2xx/fd2_screen.h', 'a2xx/fd2_texture.c', @@ -113,9 +86,14 @@ 'a2xx/fd2_zsa.c', 'a2xx/fd2_zsa.h', 'a2xx/instr-a2xx.h', - 'a2xx/ir-a2xx.c', - 'a2xx/ir-a2xx.h', - 'a3xx/a3xx.xml.h', + 'a2xx/ir2.c', + 'a2xx/ir2.h', + 'a2xx/ir2_assemble.c', + 'a2xx/ir2_cp.c', + 'a2xx/ir2_nir.c', + 'a2xx/ir2_nir_lower_scalar.c', + 'a2xx/ir2_private.h', + 'a2xx/ir2_ra.c', 'a3xx/fd3_blend.c', 'a3xx/fd3_blend.h', 'a3xx/fd3_context.c', @@ -140,7 +118,6 @@ 'a3xx/fd3_texture.h', 'a3xx/fd3_zsa.c', 'a3xx/fd3_zsa.h', - 'a4xx/a4xx.xml.h', 'a4xx/fd4_blend.c', 'a4xx/fd4_blend.h', 'a4xx/fd4_context.c', @@ -165,7 +142,6 @@ 'a4xx/fd4_texture.h', 'a4xx/fd4_zsa.c', 'a4xx/fd4_zsa.h', - 'a5xx/a5xx.xml.h', 'a5xx/fd5_blend.c', 'a5xx/fd5_blend.h', 'a5xx/fd5_blitter.c', @@ -199,7 +175,6 @@ 'a5xx/fd5_texture.h', 'a5xx/fd5_zsa.c', 'a5xx/fd5_zsa.h', - 'a6xx/a6xx.xml.h', 'a6xx/fd6_blend.c', 'a6xx/fd6_blend.h', 'a6xx/fd6_blitter.c', @@ -230,32 +205,15 @@ 'a6xx/fd6_texture.h', 'a6xx/fd6_zsa.c', 'a6xx/fd6_zsa.h', - 'ir3/disasm-a3xx.c', - 'ir3/instr-a3xx.h', - 'ir3/ir3.c', 'ir3/ir3_cache.c', 'ir3/ir3_cache.h', - 'ir3/ir3_compiler_nir.c', - 'ir3/ir3_compiler.c', - 'ir3/ir3_compiler.h', - 'ir3/ir3_cp.c', - 'ir3/ir3_depth.c', - 'ir3/ir3_group.c', - 'ir3/ir3.h', - 'ir3/ir3_legalize.c', - 'ir3/ir3_nir.c', - 'ir3/ir3_nir.h', - 'ir3/ir3_nir_lower_tg4_to_tex.c', - 'ir3/ir3_print.c', - 'ir3/ir3_ra.c', - 'ir3/ir3_sched.c', - 'ir3/ir3_shader.c', - 'ir3/ir3_shader.h', + 'ir3/ir3_gallium.c', + 'ir3/ir3_gallium.h', ) freedreno_includes = [ inc_src, inc_include, inc_gallium, inc_gallium_aux, - include_directories('ir3') + inc_freedreno, include_directories('ir3'), ] freedreno_c_args = [] @@ -270,20 +228,21 @@ libfreedreno = static_library( 'freedreno', - [files_libfreedreno, ir3_nir_trig_c], + [files_libfreedreno], include_directories : freedreno_includes, c_args : [freedreno_c_args, c_vis_args], cpp_args : [freedreno_cpp_args, cpp_vis_args], - dependencies : [ - dep_libdrm, - dep_valgrind, - idep_nir_headers - ], + dependencies : [dep_libdrm, idep_nir_headers], ) driver_freedreno = declare_dependency( compile_args : '-DGALLIUM_FREEDRENO', - link_with : [libfreedrenowinsys, libfreedreno], + link_with : [ + libfreedrenowinsys, + libfreedreno, + libfreedreno_drm, + libfreedreno_ir3, + ], dependencies : idep_nir, ) @@ -292,13 +251,13 @@ 'ir3/ir3_cmdline.c', include_directories : freedreno_includes, dependencies : [ - dep_libdrm, - dep_valgrind, dep_thread, idep_nir, ], link_with : [ libfreedreno, + libfreedreno_drm, + libfreedreno_ir3, libgallium, libglsl_standalone, libmesa_util, diff -Nru mesa-18.3.3/src/gallium/drivers/i915/i915_screen.c mesa-19.0.1/src/gallium/drivers/i915/i915_screen.c --- mesa-18.3.3/src/gallium/drivers/i915/i915_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/i915/i915_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -402,6 +402,8 @@ return 0; case PIPE_CAP_ENDIANNESS: return PIPE_ENDIAN_LITTLE; + case PIPE_CAP_MAX_VARYINGS: + return 10; case PIPE_CAP_VENDOR_ID: return 0x8086; diff -Nru mesa-18.3.3/src/gallium/drivers/imx/Automake.inc mesa-19.0.1/src/gallium/drivers/imx/Automake.inc --- mesa-18.3.3/src/gallium/drivers/imx/Automake.inc 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/imx/Automake.inc 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -if HAVE_GALLIUM_IMX - -TARGET_DRIVERS += imx-drm -TARGET_CPPFLAGS += -DGALLIUM_IMX -TARGET_LIB_DEPS += \ - $(top_builddir)/src/gallium/winsys/imx/drm/libimxdrm.la \ - $(LIBDRM_LIBS) - -endif diff -Nru mesa-18.3.3/src/gallium/drivers/imx/Makefile.am mesa-19.0.1/src/gallium/drivers/imx/Makefile.am --- mesa-18.3.3/src/gallium/drivers/imx/Makefile.am 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/imx/Makefile.am 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -include $(top_srcdir)/src/gallium/Automake.inc - -AM_CPPFLAGS = \ - $(GALLIUM_CFLAGS) - -noinst_LTLIBRARIES = libimx.la - -libimx_la_SOURCES = diff -Nru mesa-18.3.3/src/gallium/drivers/kmsro/Android.mk mesa-19.0.1/src/gallium/drivers/kmsro/Android.mk --- mesa-18.3.3/src/gallium/drivers/kmsro/Android.mk 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/kmsro/Android.mk 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,41 @@ +# Copyright (C) 2014 Emil Velikov +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(C_SOURCES) + +LOCAL_MODULE := libmesa_pipe_kmsro + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + +ifneq ($(HAVE_GALLIUM_KMSRO),) +GALLIUM_TARGET_DRIVERS += pl111 +GALLIUM_TARGET_DRIVERS += hx8357d +GALLIUM_TARGET_DRIVERS += imx +$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_kmsro) +endif diff -Nru mesa-18.3.3/src/gallium/drivers/kmsro/Automake.inc mesa-19.0.1/src/gallium/drivers/kmsro/Automake.inc --- mesa-18.3.3/src/gallium/drivers/kmsro/Automake.inc 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/kmsro/Automake.inc 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,10 @@ +if HAVE_GALLIUM_KMSRO + +TARGET_DRIVERS += pl111 +TARGET_DRIVERS += hx8357d +TARGET_CPPFLAGS += -DGALLIUM_KMSRO +TARGET_LIB_DEPS += \ + $(top_builddir)/src/gallium/winsys/kmsro/drm/libkmsrodrm.la \ + $(LIBDRM_LIBS) + +endif diff -Nru mesa-18.3.3/src/gallium/drivers/kmsro/Makefile.am mesa-19.0.1/src/gallium/drivers/kmsro/Makefile.am --- mesa-18.3.3/src/gallium/drivers/kmsro/Makefile.am 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/kmsro/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,8 @@ +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CPPFLAGS = \ + $(GALLIUM_CFLAGS) + +noinst_LTLIBRARIES = libkmsro.la + +libkmsro_la_SOURCES = $(C_SOURCES) diff -Nru mesa-18.3.3/src/gallium/drivers/kmsro/Makefile.sources mesa-19.0.1/src/gallium/drivers/kmsro/Makefile.sources --- mesa-18.3.3/src/gallium/drivers/kmsro/Makefile.sources 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/kmsro/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,2 @@ +C_SOURCES := + diff -Nru mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_screen.c mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_screen.c --- mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -310,6 +310,8 @@ return 1; case PIPE_CAP_CLEAR_TEXTURE: return 1; + case PIPE_CAP_MAX_VARYINGS: + return 32; case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: diff -Nru mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_test_arit.c mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_test_arit.c --- mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_test_arit.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_test_arit.c 2019-03-31 23:16:37.000000000 +0000 @@ -458,7 +458,8 @@ continue; } - if (test->ref == &nearbyintf && length == 2 && + if (!util_cpu_caps.has_neon && + test->ref == &nearbyintf && length == 2 && ref != roundf(testval)) { /* FIXME: The generic (non SSE) path in lp_build_iround, which is * always taken for length==2 regardless of native round support, diff -Nru mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_test_format.c mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_test_format.c --- mesa-18.3.3/src/gallium/drivers/llvmpipe/lp_test_format.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/llvmpipe/lp_test_format.c 2019-03-31 23:16:37.000000000 +0000 @@ -44,8 +44,6 @@ #include "lp_test.h" -#define USE_TEXTURE_CACHE 1 - static struct lp_build_format_cache *cache_ptr; void @@ -80,7 +78,8 @@ static LLVMValueRef add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, const struct util_format_description *desc, - struct lp_type type) + struct lp_type type, + unsigned use_cache) { char name[256]; LLVMContextRef context = gallivm->context; @@ -114,7 +113,7 @@ i = LLVMGetParam(func, 2); j = LLVMGetParam(func, 3); - if (cache_ptr) { + if (use_cache) { cache = LLVMGetParam(func, 4); } @@ -137,7 +136,8 @@ PIPE_ALIGN_STACK static boolean test_format_float(unsigned verbose, FILE *fp, - const struct util_format_description *desc) + const struct util_format_description *desc, + unsigned use_cache) { LLVMContextRef context; struct gallivm_state *gallivm; @@ -152,7 +152,8 @@ context = LLVMContextCreate(); gallivm = gallivm_create("test_module_float", context); - fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type()); + fetch = add_fetch_rgba_test(gallivm, verbose, desc, + lp_float32_vec4_type(), use_cache); gallivm_compile_module(gallivm); @@ -181,7 +182,7 @@ memset(unpacked, 0, sizeof unpacked); - fetch_ptr(unpacked, packed, j, i, cache_ptr); + fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL); for(k = 0; k < 4; ++k) { if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) { @@ -236,7 +237,8 @@ PIPE_ALIGN_STACK static boolean test_format_unorm8(unsigned verbose, FILE *fp, - const struct util_format_description *desc) + const struct util_format_description *desc, + unsigned use_cache) { LLVMContextRef context; struct gallivm_state *gallivm; @@ -251,7 +253,8 @@ context = LLVMContextCreate(); gallivm = gallivm_create("test_module_unorm8", context); - fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type()); + fetch = add_fetch_rgba_test(gallivm, verbose, desc, + lp_unorm8_vec4_type(), use_cache); gallivm_compile_module(gallivm); @@ -280,7 +283,7 @@ memset(unpacked, 0, sizeof unpacked); - fetch_ptr(unpacked, packed, j, i, cache_ptr); + fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL); match = TRUE; for(k = 0; k < 4; ++k) { @@ -335,15 +338,16 @@ static boolean test_one(unsigned verbose, FILE *fp, - const struct util_format_description *format_desc) + const struct util_format_description *format_desc, + unsigned use_cache) { boolean success = TRUE; - if (!test_format_float(verbose, fp, format_desc)) { + if (!test_format_float(verbose, fp, format_desc, use_cache)) { success = FALSE; } - if (!test_format_unorm8(verbose, fp, format_desc)) { + if (!test_format_unorm8(verbose, fp, format_desc, use_cache)) { success = FALSE; } @@ -356,49 +360,52 @@ { enum pipe_format format; boolean success = TRUE; + unsigned use_cache; -#if USE_TEXTURE_CACHE cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16); -#endif - - for (format = 1; format < PIPE_FORMAT_COUNT; ++format) { - const struct util_format_description *format_desc; - format_desc = util_format_description(format); - if (!format_desc) { - continue; - } + for (use_cache = 0; use_cache < 2; use_cache++) { + for (format = 1; format < PIPE_FORMAT_COUNT; ++format) { + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if (!format_desc) { + continue; + } + /* + * TODO: test more + */ - /* - * TODO: test more - */ + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + continue; + } - if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - continue; - } + if (util_format_is_pure_integer(format)) + continue; - if (util_format_is_pure_integer(format)) - continue; + /* only have util fetch func for etc1 */ + if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC && + format != PIPE_FORMAT_ETC1_RGB8) { + continue; + } - /* only have util fetch func for etc1 */ - if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC && - format != PIPE_FORMAT_ETC1_RGB8) { - continue; - } + /* missing fetch funcs */ + if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { + continue; + } - /* missing fetch funcs */ - if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { - continue; - } + /* only test twice with formats which can use cache */ + if (format_desc->layout != UTIL_FORMAT_LAYOUT_S3TC && use_cache) { + continue; + } - if (!test_one(verbose, fp, format_desc)) { - success = FALSE; + if (!test_one(verbose, fp, format_desc, use_cache)) { + success = FALSE; + } } } -#if USE_TEXTURE_CACHE align_free(cache_ptr); -#endif return success; } diff -Nru mesa-18.3.3/src/gallium/drivers/llvmpipe/meson.build mesa-19.0.1/src/gallium/drivers/llvmpipe/meson.build --- mesa-18.3.3/src/gallium/drivers/llvmpipe/meson.build 2018-01-12 19:24:23.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/llvmpipe/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -119,7 +119,8 @@ dependencies : [dep_llvm, dep_dl, dep_thread, dep_clock], include_directories : [inc_gallium, inc_gallium_aux, inc_include, inc_src], link_with : [libllvmpipe, libgallium, libmesa_util], - ) + ), + suite : ['llvmpipe'], ) endforeach endif diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk104.asm mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk104.asm --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk104.asm 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk104.asm 2019-03-31 23:16:37.000000000 +0000 @@ -543,6 +543,8 @@ $p1 suldgb b32 $r3 cv zero u8 g[$r4d] $r2 $p0 long mov b32 $r3 0x3f800000 long nop +sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 +long nop long ret @@ -554,7 +556,144 @@ // SIZE: 9 * 8 bytes // gk104_rcp_f64: - long nop + // Step 1: classify input according to exponent and value, and calculate + // result for 0/inf/nan. $r2 holds the exponent value, which starts at + // bit 52 (bit 20 of the upper half) and is 11 bits in length + ext u32 $r2 $r1 0xb14 + add b32 $r3 $r2 0xffffffff + joinat #rcp_rejoin + // We want to check whether the exponent is 0 or 0x7ff (i.e. NaN, inf, + // denorm, or 0). Do this by substracting 1 from the exponent, which will + // mean that it's > 0x7fd in those cases when doing unsigned comparison + set $p0 0x1 gt u32 $r3 0x7fd + // $r3: 0 for norms, 0x36 for denorms, -1 for others + long mov b32 $r3 0x0 + sched 0x2f 0x04 0x2d 0x2b 0x2f 0x28 0x28 + join (not $p0) nop + // Process all special values: NaN, inf, denorm, 0 + mov b32 $r3 0xffffffff + // A number is NaN if its abs value is greater than or unordered with inf + set $p0 0x1 gtu f64 abs $r0d 0x7ff0000000000000 + (not $p0) bra #rcp_inf_or_denorm_or_zero + // NaN -> NaN, the next line sets the "quiet" bit of the result. This + // behavior is both seen on the CPU and the blob + join or b32 $r1 $r1 0x80000 +rcp_inf_or_denorm_or_zero: + and b32 $r4 $r1 0x7ff00000 + // Other values with nonzero in exponent field should be inf + set $p0 0x1 eq s32 $r4 0x0 + sched 0x2b 0x04 0x2f 0x2d 0x2b 0x2f 0x20 + $p0 bra #rcp_denorm_or_zero + // +/-Inf -> +/-0 + xor b32 $r1 $r1 0x7ff00000 + join mov b32 $r0 0x0 +rcp_denorm_or_zero: + set $p0 0x1 gtu f64 abs $r0d 0x0 + $p0 bra #rcp_denorm + // +/-0 -> +/-Inf + join or b32 $r1 $r1 0x7ff00000 +rcp_denorm: + // non-0 denorms: multiply with 2^54 (the 0x36 in $r3), join with norms + mul rn f64 $r0d $r0d 0x4350000000000000 + sched 0x2f 0x28 0x2b 0x28 0x28 0x04 0x28 + join mov b32 $r3 0x36 +rcp_rejoin: + // All numbers with -1 in $r3 have their result ready in $r0d, return them + // others need further calculation + set $p0 0x1 lt s32 $r3 0x0 + $p0 bra #rcp_end + // Step 2: Before the real calculation goes on, renormalize the values to + // range [1, 2) by setting exponent field to 0x3ff (the exponent of 1) + // result in $r6d. The exponent will be recovered later. + ext u32 $r2 $r1 0xb14 + and b32 $r7 $r1 0x800fffff + add b32 $r7 $r7 0x3ff00000 + long mov b32 $r6 $r0 + sched 0x2b 0x04 0x28 0x28 0x2a 0x2b 0x2e + // Step 3: Convert new value to float (no overflow will occur due to step + // 2), calculate rcp and do newton-raphson step once + cvt rz f32 $r5 f64 $r6d + long rcp f32 $r4 $r5 + mov b32 $r0 0xbf800000 + fma rn f32 $r5 $r4 $r5 $r0 + fma rn f32 $r0 neg $r4 $r5 $r4 + // Step 4: convert result $r0 back to double, do newton-raphson steps + cvt f64 $r0d f32 $r0 + cvt f64 $r6d neg f64 $r6d + sched 0x2e 0x29 0x29 0x29 0x29 0x29 0x29 + cvt f64 $r8d f32 0x3f800000 + // 4 Newton-Raphson Steps, tmp in $r4d, result in $r0d + // The formula used here (and above) is: + // RCP_{n + 1} = 2 * RCP_{n} - x * RCP_{n} * RCP_{n} + // The following code uses 2 FMAs for each step, and it will basically + // looks like: + // tmp = -src * RCP_{n} + 1 + // RCP_{n + 1} = RCP_{n} * tmp + RCP_{n} + fma rn f64 $r4d $r6d $r0d $r8d + fma rn f64 $r0d $r0d $r4d $r0d + fma rn f64 $r4d $r6d $r0d $r8d + fma rn f64 $r0d $r0d $r4d $r0d + fma rn f64 $r4d $r6d $r0d $r8d + fma rn f64 $r0d $r0d $r4d $r0d + sched 0x29 0x20 0x28 0x28 0x28 0x28 0x28 + fma rn f64 $r4d $r6d $r0d $r8d + fma rn f64 $r0d $r0d $r4d $r0d + // Step 5: Exponent recovery and final processing + // The exponent is recovered by adding what we added to the exponent. + // Suppose we want to calculate rcp(x), but we have rcp(cx), then + // rcp(x) = c * rcp(cx) + // The delta in exponent comes from two sources: + // 1) The renormalization in step 2. The delta is: + // 0x3ff - $r2 + // 2) (For the denorm input) The 2^54 we multiplied at rcp_denorm, stored + // in $r3 + // These 2 sources are calculated in the first two lines below, and then + // added to the exponent extracted from the result above. + // Note that after processing, the new exponent may >= 0x7ff (inf) + // or <= 0 (denorm). Those cases will be handled respectively below + subr b32 $r2 $r2 0x3ff + long add b32 $r4 $r2 $r3 + ext u32 $r3 $r1 0xb14 + // New exponent in $r3 + long add b32 $r3 $r3 $r4 + add b32 $r2 $r3 0xffffffff + sched 0x28 0x2b 0x28 0x2b 0x28 0x28 0x2b + // (exponent-1) < 0x7fe (unsigned) means the result is in norm range + // (same logic as in step 1) + set $p0 0x1 lt u32 $r2 0x7fe + (not $p0) bra #rcp_result_inf_or_denorm + // Norms: convert exponents back and return + shl b32 $r4 $r4 clamp 0x14 + long add b32 $r1 $r4 $r1 + bra #rcp_end +rcp_result_inf_or_denorm: + // New exponent >= 0x7ff means that result is inf + set $p0 0x1 ge s32 $r3 0x7ff + (not $p0) bra #rcp_result_denorm + sched 0x20 0x25 0x28 0x2b 0x23 0x25 0x2f + // Infinity + and b32 $r1 $r1 0x80000000 + long mov b32 $r0 0x0 + add b32 $r1 $r1 0x7ff00000 + bra #rcp_end +rcp_result_denorm: + // Denorm result comes from huge input. The greatest possible fp64, i.e. + // 0x7fefffffffffffff's rcp is 0x0004000000000000, 1/4 of the smallest + // normal value. Other rcp result should be greater than that. If we + // set the exponent field to 1, we can recover the result by multiplying + // it with 1/2 or 1/4. 1/2 is used if the "exponent" $r3 is 0, otherwise + // 1/4 ($r3 should be -1 then). This is quite tricky but greatly simplifies + // the logic here. + set $p0 0x1 ne u32 $r3 0x0 + and b32 $r1 $r1 0x800fffff + // 0x3e800000: 1/4 + $p0 cvt f64 $r6d f32 0x3e800000 + sched 0x2f 0x28 0x2c 0x2e 0x2a 0x20 0x27 + // 0x3f000000: 1/2 + (not $p0) cvt f64 $r6d f32 0x3f000000 + add b32 $r1 $r1 0x00100000 + mul rn f64 $r0d $r0d $r6d +rcp_end: long ret // RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i) @@ -565,7 +704,67 @@ // SIZE: 14 * 8 bytes // gk104_rsq_f64: - long nop + // Before getting initial result rsqrt64h, two special cases should be + // handled first. + // 1. NaN: set the highest bit in mantissa so it'll be surely recognized + // as NaN in rsqrt64h + set $p0 0x1 gtu f64 abs $r0d 0x7ff0000000000000 + $p0 or b32 $r1 $r1 0x00080000 + and b32 $r2 $r1 0x7fffffff + sched 0x27 0x20 0x28 0x2c 0x25 0x28 0x28 + // 2. denorms and small normal values: using their original value will + // lose precision either at rsqrt64h or the first step in newton-raphson + // steps below. Take 2 as a threshold in exponent field, and multiply + // with 2^54 if the exponent is smaller or equal. (will multiply 2^27 + // to recover in the end) + ext u32 $r3 $r1 0xb14 + set $p1 0x1 le u32 $r3 0x2 + long or b32 $r2 $r0 $r2 + $p1 mul rn f64 $r0d $r0d 0x4350000000000000 + rsqrt64h $r5 $r1 + // rsqrt64h will give correct result for 0/inf/nan, the following logic + // checks whether the input is one of those (exponent is 0x7ff or all 0 + // except for the sign bit) + set b32 $r6 ne u32 $r3 0x7ff + long and b32 $r2 $r2 $r6 + sched 0x28 0x2b 0x20 0x27 0x28 0x2e 0x28 + set $p0 0x1 ne u32 $r2 0x0 + $p0 bra #rsq_norm + // For 0/inf/nan, make sure the sign bit agrees with input and return + and b32 $r1 $r1 0x80000000 + long mov b32 $r0 0x0 + long or b32 $r1 $r1 $r5 + long ret +rsq_norm: + // For others, do 4 Newton-Raphson steps with the formula: + // RSQ_{n + 1} = RSQ_{n} * (1.5 - 0.5 * x * RSQ_{n} * RSQ_{n}) + // In the code below, each step is written as: + // tmp1 = 0.5 * x * RSQ_{n} + // tmp2 = -RSQ_{n} * tmp1 + 0.5 + // RSQ_{n + 1} = RSQ_{n} * tmp2 + RSQ_{n} + long mov b32 $r4 0x0 + sched 0x2f 0x29 0x29 0x29 0x29 0x29 0x29 + // 0x3f000000: 1/2 + cvt f64 $r8d f32 0x3f000000 + mul rn f64 $r2d $r0d $r8d + mul rn f64 $r0d $r2d $r4d + fma rn f64 $r6d neg $r4d $r0d $r8d + fma rn f64 $r4d $r4d $r6d $r4d + mul rn f64 $r0d $r2d $r4d + fma rn f64 $r6d neg $r4d $r0d $r8d + sched 0x29 0x29 0x29 0x29 0x29 0x29 0x29 + fma rn f64 $r4d $r4d $r6d $r4d + mul rn f64 $r0d $r2d $r4d + fma rn f64 $r6d neg $r4d $r0d $r8d + fma rn f64 $r4d $r4d $r6d $r4d + mul rn f64 $r0d $r2d $r4d + fma rn f64 $r6d neg $r4d $r0d $r8d + fma rn f64 $r4d $r4d $r6d $r4d + sched 0x29 0x20 0x28 0x2e 0x00 0x00 0x00 + // Multiply 2^27 to result for small inputs to recover + $p1 mul rn f64 $r4d $r4d 0x41a0000000000000 + long mov b32 $r1 $r5 + long mov b32 $r0 $r4 long ret // diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h 2019-03-31 23:16:37.000000000 +0000 @@ -481,12 +481,132 @@ 0xd40040000840c785, 0x18fe00000000dde2, 0x4000000000001de4, - 0x9000000000001de7, -/* 0x0f08: gk104_rcp_f64 */ + 0x2000000000000007, 0x4000000000001de4, 0x9000000000001de7, -/* 0x0f18: gk104_rsq_f64 */ - 0x4000000000001de4, +/* 0x0f18: gk104_rcp_f64 */ + 0x7000c02c50109c03, + 0x0bfffffffc20dc02, + 0x6000000280000007, + 0x1a0ec01ff431dc03, + 0x180000000000dde2, + 0x228282f2b2d042f7, + 0x40000000000021f4, + 0x1bfffffffc00dde2, + 0x1e0edffc0001dc81, + 0x40000000200021e7, + 0x3800200000105c52, +/* 0x0f70: rcp_inf_or_denorm_or_zero */ + 0x39ffc00000111c02, + 0x190e0000fc41dc23, + 0x2202f2b2d2f042b7, + 0x40000000400001e7, + 0x39ffc00000105c82, + 0x1800000000001df2, +/* 0x0fa0: rcp_denorm_or_zero */ + 0x1e0ec0000001dc81, + 0x40000000200001e7, + 0x39ffc00000105c52, +/* 0x0fb8: rcp_denorm */ + 0x5000d0d400001c01, + 0x2280428282b282f7, + 0x18000000d800ddf2, +/* 0x0fd0: rcp_rejoin */ + 0x188e0000fc31dc23, + 0x40000006000001e7, + 0x7000c02c50109c03, + 0x3a003ffffc11dc02, + 0x08ffc0000071dc02, + 0x2800000000019de4, + 0x22e2b2a2828042b7, + 0x1006000019a15c04, + 0xc800000010511c00, + 0x1afe000000001de2, + 0x3000000014415c00, + 0x3008000014401e00, + 0x1000000001301c04, + 0x1000000019b19d04, + 0x22929292929292e7, + 0x1000cfe001321c04, + 0x2010000000611c01, + 0x2000000010001c01, + 0x2010000000611c01, + 0x2000000010001c01, + 0x2010000000611c01, + 0x2000000010001c01, + 0x2282828282820297, + 0x2010000000611c01, + 0x2000000010001c01, + 0x0800000ffc209e02, + 0x480000000c211c03, + 0x7000c02c5010dc03, + 0x480000001030dc03, + 0x0bfffffffc309c02, + 0x22b28282b282b287, + 0x188ec01ff821dc03, + 0x40000000600021e7, + 0x6000c00050411c03, + 0x4800000004405c03, + 0x40000001c0001de7, +/* 0x10f0: rcp_result_inf_or_denorm */ + 0x1b0ec01ffc31dc23, + 0x40000000a00021e7, + 0x22f25232b2825207, + 0x3a00000000105c02, + 0x1800000000001de2, + 0x09ffc00000105c02, + 0x40000000e0001de7, +/* 0x1128: rcp_result_denorm */ + 0x1a8e0000fc31dc03, + 0x3a003ffffc105c02, + 0x1000cfa001318004, + 0x227202a2e2c282f7, + 0x1000cfc00131a004, + 0x0800400000105c02, + 0x5000000018001c01, +/* 0x1160: rcp_end */ + 0x9000000000001de7, +/* 0x1168: gk104_rsq_f64 */ + 0x1e0edffc0001dc81, + 0x3800200000104042, + 0x39fffffffc109c02, + 0x22828252c2820277, + 0x7000c02c5010dc03, + 0x198ec0000833dc03, + 0x6800000008009c43, + 0x5000d0d400000401, + 0xc80000001c115c00, + 0x128ec01ffc319c03, + 0x6800000018209c03, + 0x2282e2827202b287, + 0x1a8e0000fc21dc03, + 0x40000000800001e7, + 0x3a00000000105c02, + 0x1800000000001de2, + 0x6800000014105c43, + 0x9000000000001de7, +/* 0x11f8: rsq_norm */ + 0x1800000000011de2, + 0x22929292929292f7, + 0x1000cfc001321c04, + 0x5000000020009c01, + 0x5000000010201c01, + 0x2010000000419e01, + 0x2008000018411c01, + 0x5000000010201c01, + 0x2010000000419e01, + 0x2292929292929297, + 0x2008000018411c01, + 0x5000000010201c01, + 0x2010000000419e01, + 0x2008000018411c01, + 0x5000000010201c01, + 0x2010000000419e01, + 0x2008000018411c01, + 0x20000002e2820297, + 0x5000d06800410401, + 0x2800000014005de4, + 0x2800000010001de4, 0x9000000000001de7, 0xc800000003f01cc5, 0x2c00000100005c04, @@ -495,7 +615,7 @@ 0x680100000c1fdc03, 0x4000000a60001c47, 0x180000004000dde2, -/* 0x0f60: spill_cfstack */ +/* 0x12e0: spill_cfstack */ 0x78000009c0000007, 0x0c0000000430dd02, 0x4003ffffa0001ca7, @@ -543,14 +663,14 @@ 0x4000000100001ea7, 0x480100000c001c03, 0x0800000000105c42, -/* 0x10d8: shared_loop */ +/* 0x1458: shared_loop */ 0xc100000000309c85, 0x9400000500009c85, 0x0c00000010001d02, 0x0800000000105d42, 0x0c0000001030dd02, 0x4003ffff40001ca7, -/* 0x1108: shared_done */ +/* 0x1488: shared_done */ 0x2800406420001de4, 0x2800406430005de4, 0xe000000000001c45, @@ -564,7 +684,7 @@ 0x480000000c209c03, 0x4801000008001c03, 0x0800000000105c42, -/* 0x1170: search_cstack */ +/* 0x14f0: search_cstack */ 0x280040646000dde4, 0x8400000020009f05, 0x190ec0002821dc03, @@ -573,17 +693,17 @@ 0x0800000000105c42, 0x0c0000004030dd02, 0x00029dff0ffc5cbf, -/* 0x11b0: entry_found */ +/* 0x1530: entry_found */ 0x8400000000009f85, 0x2800406400001de4, 0x2800406410005de4, 0x9400000010009c85, 0x4000000000001df4, -/* 0x11d8: end_exit */ +/* 0x1558: end_exit */ 0x9800000003ffdcc5, 0xd000000000008007, 0xa000000000004007, -/* 0x11f0: end_cont */ +/* 0x1570: end_cont */ 0xd000000000008007, 0x3400c3fffc201c04, 0xc000000003f01ec5, @@ -593,6 +713,6 @@ uint64_t gk104_builtin_offsets[] = { 0x0000000000000000, 0x00000000000000f0, - 0x0000000000000f08, 0x0000000000000f18, + 0x0000000000001168, }; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk110.asm mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk110.asm --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk110.asm 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk110.asm 2019-03-31 23:16:37.000000000 +0000 @@ -83,12 +83,229 @@ $p0 sub b32 $r1 $r1 $r2 $p0 add b32 $r0 $r0 0x1 $p3 cvt s32 $r0 neg s32 $r0 - sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c + sched 0x04 0x2e 0x28 0x04 0x28 0x28 0x28 $p2 cvt s32 $r1 neg s32 $r1 ret +// RCP F64 +// +// INPUT: $r0d +// OUTPUT: $r0d +// CLOBBER: $r2 - $r9, $p0 +// +// The core of RCP and RSQ implementation is Newton-Raphson step, which is +// used to find successively better approximation from an imprecise initial +// value (single precision rcp in RCP and rsqrt64h in RSQ). +// gk110_rcp_f64: + // Step 1: classify input according to exponent and value, and calculate + // result for 0/inf/nan. $r2 holds the exponent value, which starts at + // bit 52 (bit 20 of the upper half) and is 11 bits in length + ext u32 $r2 $r1 0xb14 + add b32 $r3 $r2 0xffffffff + joinat #rcp_rejoin + // We want to check whether the exponent is 0 or 0x7ff (i.e. NaN, inf, + // denorm, or 0). Do this by substracting 1 from the exponent, which will + // mean that it's > 0x7fd in those cases when doing unsigned comparison + set b32 $p0 0x1 gt u32 $r3 0x7fd + // $r3: 0 for norms, 0x36 for denorms, -1 for others + mov b32 $r3 0x0 + sched 0x2f 0x04 0x2d 0x2b 0x2f 0x28 0x28 + join (not $p0) nop + // Process all special values: NaN, inf, denorm, 0 + mov b32 $r3 0xffffffff + // A number is NaN if its abs value is greater than or unordered with inf + set $p0 0x1 gtu f64 abs $r0d 0x7ff0000000000000 + (not $p0) bra #rcp_inf_or_denorm_or_zero + // NaN -> NaN, the next line sets the "quiet" bit of the result. This + // behavior is both seen on the CPU and the blob + join or b32 $r1 $r1 0x80000 +rcp_inf_or_denorm_or_zero: + and b32 $r4 $r1 0x7ff00000 + // Other values with nonzero in exponent field should be inf + set b32 $p0 0x1 eq s32 $r4 0x0 + sched 0x2b 0x04 0x2f 0x2d 0x2b 0x2f 0x20 + $p0 bra #rcp_denorm_or_zero + // +/-Inf -> +/-0 + xor b32 $r1 $r1 0x7ff00000 + join mov b32 $r0 0x0 +rcp_denorm_or_zero: + set $p0 0x1 gtu f64 abs $r0d 0x0 + $p0 bra #rcp_denorm + // +/-0 -> +/-Inf + join or b32 $r1 $r1 0x7ff00000 +rcp_denorm: + // non-0 denorms: multiply with 2^54 (the 0x36 in $r3), join with norms + mul rn f64 $r0d $r0d 0x4350000000000000 + sched 0x2f 0x28 0x2b 0x28 0x28 0x04 0x28 + join mov b32 $r3 0x36 +rcp_rejoin: + // All numbers with -1 in $r3 have their result ready in $r0d, return them + // others need further calculation + set b32 $p0 0x1 lt s32 $r3 0x0 + $p0 bra #rcp_end + // Step 2: Before the real calculation goes on, renormalize the values to + // range [1, 2) by setting exponent field to 0x3ff (the exponent of 1) + // result in $r6d. The exponent will be recovered later. + ext u32 $r2 $r1 0xb14 + and b32 $r7 $r1 0x800fffff + add b32 $r7 $r7 0x3ff00000 + mov b32 $r6 $r0 + sched 0x2b 0x04 0x28 0x28 0x2a 0x2b 0x2e + // Step 3: Convert new value to float (no overflow will occur due to step + // 2), calculate rcp and do newton-raphson step once + cvt rz f32 $r5 f64 $r6d + rcp f32 $r4 $r5 + mov b32 $r0 0xbf800000 + fma rn f32 $r5 $r4 $r5 $r0 + fma rn f32 $r0 neg $r4 $r5 $r4 + // Step 4: convert result $r0 back to double, do newton-raphson steps + cvt f64 $r0d f32 $r0 + cvt f64 $r6d f64 neg $r6d + sched 0x2e 0x29 0x29 0x29 0x29 0x29 0x29 + cvt f64 $r8d f32 0x3f800000 + // 4 Newton-Raphson Steps, tmp in $r4d, result in $r0d + // The formula used here (and above) is: + // RCP_{n + 1} = 2 * RCP_{n} - x * RCP_{n} * RCP_{n} + // The following code uses 2 FMAs for each step, and it will basically + // looks like: + // tmp = -src * RCP_{n} + 1 + // RCP_{n + 1} = RCP_{n} * tmp + RCP_{n} + fma rn f64 $r4d $r6d $r0d $r8d + fma rn f64 $r0d $r0d $r4d $r0d + fma rn f64 $r4d $r6d $r0d $r8d + fma rn f64 $r0d $r0d $r4d $r0d + fma rn f64 $r4d $r6d $r0d $r8d + fma rn f64 $r0d $r0d $r4d $r0d + sched 0x29 0x20 0x28 0x28 0x28 0x28 0x28 + fma rn f64 $r4d $r6d $r0d $r8d + fma rn f64 $r0d $r0d $r4d $r0d + // Step 5: Exponent recovery and final processing + // The exponent is recovered by adding what we added to the exponent. + // Suppose we want to calculate rcp(x), but we have rcp(cx), then + // rcp(x) = c * rcp(cx) + // The delta in exponent comes from two sources: + // 1) The renormalization in step 2. The delta is: + // 0x3ff - $r2 + // 2) (For the denorm input) The 2^54 we multiplied at rcp_denorm, stored + // in $r3 + // These 2 sources are calculated in the first two lines below, and then + // added to the exponent extracted from the result above. + // Note that after processing, the new exponent may >= 0x7ff (inf) + // or <= 0 (denorm). Those cases will be handled respectively below + subr b32 $r2 $r2 0x3ff + add b32 $r4 $r2 $r3 + ext u32 $r3 $r1 0xb14 + // New exponent in $r3 + add b32 $r3 $r3 $r4 + add b32 $r2 $r3 0xffffffff + sched 0x28 0x2b 0x28 0x2b 0x28 0x28 0x2b + // (exponent-1) < 0x7fe (unsigned) means the result is in norm range + // (same logic as in step 1) + set b32 $p0 0x1 lt u32 $r2 0x7fe + (not $p0) bra #rcp_result_inf_or_denorm + // Norms: convert exponents back and return + shl b32 $r4 $r4 clamp 0x14 + add b32 $r1 $r4 $r1 + bra #rcp_end +rcp_result_inf_or_denorm: + // New exponent >= 0x7ff means that result is inf + set b32 $p0 0x1 ge s32 $r3 0x7ff + (not $p0) bra #rcp_result_denorm + sched 0x20 0x25 0x28 0x2b 0x23 0x25 0x2f + // Infinity + and b32 $r1 $r1 0x80000000 + mov b32 $r0 0x0 + add b32 $r1 $r1 0x7ff00000 + bra #rcp_end +rcp_result_denorm: + // Denorm result comes from huge input. The greatest possible fp64, i.e. + // 0x7fefffffffffffff's rcp is 0x0004000000000000, 1/4 of the smallest + // normal value. Other rcp result should be greater than that. If we + // set the exponent field to 1, we can recover the result by multiplying + // it with 1/2 or 1/4. 1/2 is used if the "exponent" $r3 is 0, otherwise + // 1/4 ($r3 should be -1 then). This is quite tricky but greatly simplifies + // the logic here. + set b32 $p0 0x1 ne u32 $r3 0x0 + and b32 $r1 $r1 0x800fffff + // 0x3e800000: 1/4 + $p0 cvt f64 $r6d f32 0x3e800000 + sched 0x2f 0x28 0x2c 0x2e 0x2a 0x20 0x27 + // 0x3f000000: 1/2 + (not $p0) cvt f64 $r6d f32 0x3f000000 + add b32 $r1 $r1 0x00100000 + mul rn f64 $r0d $r0d $r6d +rcp_end: + ret + +// RSQ F64 +// +// INPUT: $r0d +// OUTPUT: $r0d +// CLOBBER: $r2 - $r9, $p0 - $p1 +// gk110_rsq_f64: + // Before getting initial result rsqrt64h, two special cases should be + // handled first. + // 1. NaN: set the highest bit in mantissa so it'll be surely recognized + // as NaN in rsqrt64h + set $p0 0x1 gtu f64 abs $r0d 0x7ff0000000000000 + $p0 or b32 $r1 $r1 0x00080000 + and b32 $r2 $r1 0x7fffffff + sched 0x27 0x20 0x28 0x2c 0x25 0x28 0x28 + // 2. denorms and small normal values: using their original value will + // lose precision either at rsqrt64h or the first step in newton-raphson + // steps below. Take 2 as a threshold in exponent field, and multiply + // with 2^54 if the exponent is smaller or equal. (will multiply 2^27 + // to recover in the end) + ext u32 $r3 $r1 0xb14 + set b32 $p1 0x1 le u32 $r3 0x2 + or b32 $r2 $r0 $r2 + $p1 mul rn f64 $r0d $r0d 0x4350000000000000 + rsqrt64h f32 $r5 $r1 + // rsqrt64h will give correct result for 0/inf/nan, the following logic + // checks whether the input is one of those (exponent is 0x7ff or all 0 + // except for the sign bit) + set b32 $r6 ne u32 $r3 0x7ff + and b32 $r2 $r2 $r6 + sched 0x28 0x2b 0x20 0x27 0x28 0x2e 0x28 + set b32 $p0 0x1 ne u32 $r2 0x0 + $p0 bra #rsq_norm + // For 0/inf/nan, make sure the sign bit agrees with input and return + and b32 $r1 $r1 0x80000000 + mov b32 $r0 0x0 + or b32 $r1 $r1 $r5 + ret +rsq_norm: + // For others, do 4 Newton-Raphson steps with the formula: + // RSQ_{n + 1} = RSQ_{n} * (1.5 - 0.5 * x * RSQ_{n} * RSQ_{n}) + // In the code below, each step is written as: + // tmp1 = 0.5 * x * RSQ_{n} + // tmp2 = -RSQ_{n} * tmp1 + 0.5 + // RSQ_{n + 1} = RSQ_{n} * tmp2 + RSQ_{n} + mov b32 $r4 0x0 + sched 0x2f 0x29 0x29 0x29 0x29 0x29 0x29 + // 0x3f000000: 1/2 + cvt f64 $r8d f32 0x3f000000 + mul rn f64 $r2d $r0d $r8d + mul rn f64 $r0d $r2d $r4d + fma rn f64 $r6d neg $r4d $r0d $r8d + fma rn f64 $r4d $r4d $r6d $r4d + mul rn f64 $r0d $r2d $r4d + fma rn f64 $r6d neg $r4d $r0d $r8d + sched 0x29 0x29 0x29 0x29 0x29 0x29 0x29 + fma rn f64 $r4d $r4d $r6d $r4d + mul rn f64 $r0d $r2d $r4d + fma rn f64 $r6d neg $r4d $r0d $r8d + fma rn f64 $r4d $r4d $r6d $r4d + mul rn f64 $r0d $r2d $r4d + fma rn f64 $r6d neg $r4d $r0d $r8d + fma rn f64 $r4d $r4d $r6d $r4d + sched 0x29 0x20 0x28 0x2e 0x00 0x00 0x00 + // Multiply 2^27 to result for small inputs to recover + $p1 mul rn f64 $r4d $r4d 0x41a0000000000000 + mov b32 $r1 $r5 + mov b32 $r0 $r4 ret .section #gk110_builtin_offsets diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h 2019-03-31 23:16:37.000000000 +0000 @@ -65,11 +65,132 @@ 0xe088000001000406, 0x4000000000800001, 0xe6010000000ce802, - 0x08b08010a010b810, + 0x08a0a0a010a0b810, 0xe60100000088e806, 0x19000000001c003c, /* 0x0218: gk110_rcp_f64 */ -/* 0x0218: gk110_rsq_f64 */ + 0xc00000058a1c0409, + 0x407fffffff9c080d, + 0x1480000050000000, + 0xb3401c03fe9c0c1d, + 0xe4c03c007f9c000e, + 0x08a0a0bcacb410bc, + 0x8580000000603c02, + 0x747fffffff9fc00e, + 0xb4601fff801c021d, + 0x120000000420003c, + 0x21000400005c0404, +/* 0x0270: rcp_inf_or_denorm_or_zero */ + 0x203ff800001c0410, + 0xb3281c00001c101d, + 0x0880bcacb4bc10ac, + 0x120000000800003c, + 0x223ff800001c0404, + 0xe4c03c007fdc0002, +/* 0x02a0: rcp_denorm_or_zero */ + 0xb4601c00001c021d, + 0x120000000400003c, + 0x213ff800005c0404, +/* 0x02b8: rcp_denorm */ + 0xc400021a801c0001, + 0x08a010a0a0aca0bc, + 0x740000001b5fc00e, +/* 0x02d0: rcp_rejoin */ + 0xb3181c00001c0c1d, + 0x12000000c000003c, + 0xc00000058a1c0409, + 0x204007ffff9c041c, + 0x401ff800001c1c1d, + 0xe4c03c00001c001a, + 0x08b8aca8a0a010ac, + 0xe5400c00031c3816, + 0x84000000021c1412, + 0x745fc000001fc002, + 0xcc000000029c1016, + 0xcc081000029c1002, + 0xe5400000001c2c02, + 0xe5410000031c3c1a, + 0x08a4a4a4a4a4a4b8, + 0xc54001fc001c2c21, + 0xdb802000001c1812, + 0xdb800000021c0002, + 0xdb802000001c1812, + 0xdb800000021c0002, + 0xdb802000001c1812, + 0xdb800000021c0002, + 0x08a0a0a0a0a080a4, + 0xdb802000001c1812, + 0xdb800000021c0002, + 0x48000001ff9c0809, + 0xe0800000019c0812, + 0xc00000058a1c040d, + 0xe0800000021c0c0e, + 0x407fffffff9c0c09, + 0x08aca0a0aca0aca0, + 0xb3101c03ff1c081d, + 0x120000000c20003c, + 0xc24000000a1c1011, + 0xe0800000009c1006, + 0x12000000381c003c, +/* 0x03f0: rcp_result_inf_or_denorm */ + 0xb3681c03ff9c0c1d, + 0x120000001420003c, + 0x08bc948caca09480, + 0x20400000001c0404, + 0xe4c03c007f9c0002, + 0x403ff800001c0405, + 0x120000001c1c003c, +/* 0x0428: rcp_result_denorm */ + 0xb3501c00001c0c1d, + 0x204007ffff9c0404, + 0xc54001f400002c19, + 0x089c80a8b8b0a0bc, + 0xc54001f800202c19, + 0x40000800001c0405, + 0xe4000000031c0002, +/* 0x0460: rcp_end */ + 0x19000000001c003c, +/* 0x0468: gk110_rsq_f64 */ + 0xb4601fff801c021d, + 0x2100040000000404, + 0x203fffffff9c0408, + 0x08a0a094b0a0809c, + 0xc00000058a1c040d, + 0xb3301c00011c0c3d, + 0xe2001000011c000a, + 0xc400021a80040001, + 0x84000000039c0416, + 0xb2d01c03ff9c0c19, + 0xe2000000031c080a, + 0x08a0b8a09c80aca0, + 0xb3501c00001c081d, + 0x120000001000003c, + 0x20400000001c0404, + 0xe4c03c007f9c0002, + 0xe2001000029c0406, + 0x19000000001c003c, +/* 0x04f8: rsq_norm */ + 0xe4c03c007f9c0012, + 0x08a4a4a4a4a4a4bc, + 0xc54001f8001c2c21, + 0xe4000000041c000a, + 0xe4000000021c0802, + 0xdb882000001c101a, + 0xdb801000031c1012, + 0xe4000000021c0802, + 0xdb882000001c101a, + 0x08a4a4a4a4a4a4a4, + 0xdb801000031c1012, + 0xe4000000021c0802, + 0xdb882000001c101a, + 0xdb801000031c1012, + 0xe4000000021c0802, + 0xdb882000001c101a, + 0xdb801000031c1012, + 0x08000000b8a080a4, + 0xc400020d00041011, + 0xe4c03c00029c0006, + 0xe4c03c00021c0002, 0x19000000001c003c, }; @@ -77,5 +198,5 @@ 0x0000000000000000, 0x00000000000000f0, 0x0000000000000218, - 0x0000000000000218, + 0x0000000000000468, }; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gm107.asm mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gm107.asm --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gm107.asm 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gm107.asm 2019-03-31 23:16:37.000000000 +0000 @@ -100,10 +100,253 @@ ret nop 0 -// STUB +// RCP F64 +// +// INPUT: $r0d +// OUTPUT: $r0d +// CLOBBER: $r2 - $r9, $p0 +// +// The core of RCP and RSQ implementation is Newton-Raphson step, which is +// used to find successively better approximation from an imprecise initial +// value (single precision rcp in RCP and rsqrt64h in RSQ). +// gm107_rcp_f64: -gm107_rsq_f64: + // Step 1: classify input according to exponent and value, and calculate + // result for 0/inf/nan. $r2 holds the exponent value, which starts at + // bit 52 (bit 20 of the upper half) and is 11 bits in length + sched (st 0x0) (st 0x0) (st 0x0) + bfe u32 $r2 $r1 0xb14 + iadd32i $r3 $r2 -1 + ssy #rcp_rejoin + // We want to check whether the exponent is 0 or 0x7ff (i.e. NaN, inf, + // denorm, or 0). Do this by substracting 1 from the exponent, which will + // mean that it's > 0x7fd in those cases when doing unsigned comparison + sched (st 0x0) (st 0x0) (st 0x0) + isetp gt u32 and $p0 1 $r3 0x7fd 1 + // $r3: 0 for norms, 0x36 for denorms, -1 for others + mov $r3 0x0 0xf + not $p0 sync + // Process all special values: NaN, inf, denorm, 0 + sched (st 0x0) (st 0x0) (st 0x0) + mov32i $r3 0xffffffff 0xf + // A number is NaN if its abs value is greater than or unordered with inf + dsetp gtu and $p0 1 abs $r0 0x7ff0000000000000 1 + not $p0 bra #rcp_inf_or_denorm_or_zero + // NaN -> NaN, the next line sets the "quiet" bit of the result. This + // behavior is both seen on the CPU and the blob + sched (st 0x0) (st 0x0) (st 0x0) + lop32i or $r1 $r1 0x80000 + sync +rcp_inf_or_denorm_or_zero: + lop32i and $r4 $r1 0x7ff00000 + sched (st 0x0) (st 0x0) (st 0x0) + // Other values with nonzero in exponent field should be inf + isetp eq and $p0 1 $r4 0x0 1 + $p0 bra #rcp_denorm_or_zero + // +/-Inf -> +/-0 + lop32i xor $r1 $r1 0x7ff00000 + sched (st 0x0) (st 0x0) (st 0x0) + mov $r0 0x0 0xf + sync +rcp_denorm_or_zero: + dsetp gtu and $p0 1 abs $r0 0x0 1 + sched (st 0x0) (st 0x0) (st 0x0) + $p0 bra #rcp_denorm + // +/-0 -> +/-Inf + lop32i or $r1 $r1 0x7ff00000 + sync +rcp_denorm: + // non-0 denorms: multiply with 2^54 (the 0x36 in $r3), join with norms + sched (st 0x0) (st 0x0) (st 0x0) + dmul $r0 $r0 0x4350000000000000 + mov $r3 0x36 0xf + sync +rcp_rejoin: + // All numbers with -1 in $r3 have their result ready in $r0d, return them + // others need further calculation + sched (st 0x0) (st 0x0) (st 0x0) + isetp lt and $p0 1 $r3 0x0 1 + $p0 bra #rcp_end + // Step 2: Before the real calculation goes on, renormalize the values to + // range [1, 2) by setting exponent field to 0x3ff (the exponent of 1) + // result in $r6d. The exponent will be recovered later. + bfe u32 $r2 $r1 0xb14 + sched (st 0x0) (st 0x0) (st 0x0) + lop32i and $r7 $r1 0x800fffff + iadd32i $r7 $r7 0x3ff00000 + mov $r6 $r0 0xf + // Step 3: Convert new value to float (no overflow will occur due to step + // 2), calculate rcp and do newton-raphson step once + sched (st 0x0) (st 0x0) (st 0x0) + f2f ftz f64 f32 $r5 $r6 + mufu rcp $r4 $r5 + mov32i $r0 0xbf800000 0xf + sched (st 0x0) (st 0x0) (st 0x0) + ffma $r5 $r4 $r5 $r0 + ffma $r0 $r5 neg $r4 $r4 + // Step 4: convert result $r0 back to double, do newton-raphson steps + f2f f32 f64 $r0 $r0 + sched (st 0x0) (st 0x0) (st 0x0) + f2f f64 f64 $r6 neg $r6 + f2f f32 f64 $r8 0x3f800000 + // 4 Newton-Raphson Steps, tmp in $r4d, result in $r0d + // The formula used here (and above) is: + // RCP_{n + 1} = 2 * RCP_{n} - x * RCP_{n} * RCP_{n} + // The following code uses 2 FMAs for each step, and it will basically + // looks like: + // tmp = -src * RCP_{n} + 1 + // RCP_{n + 1} = RCP_{n} * tmp + RCP_{n} + dfma $r4 $r6 $r0 $r8 + sched (st 0x0) (st 0x0) (st 0x0) + dfma $r0 $r0 $r4 $r0 + dfma $r4 $r6 $r0 $r8 + dfma $r0 $r0 $r4 $r0 sched (st 0x0) (st 0x0) (st 0x0) + dfma $r4 $r6 $r0 $r8 + dfma $r0 $r0 $r4 $r0 + dfma $r4 $r6 $r0 $r8 + sched (st 0x0) (st 0x0) (st 0x0) + dfma $r0 $r0 $r4 $r0 + // Step 5: Exponent recovery and final processing + // The exponent is recovered by adding what we added to the exponent. + // Suppose we want to calculate rcp(x), but we have rcp(cx), then + // rcp(x) = c * rcp(cx) + // The delta in exponent comes from two sources: + // 1) The renormalization in step 2. The delta is: + // 0x3ff - $r2 + // 2) (For the denorm input) The 2^54 we multiplied at rcp_denorm, stored + // in $r3 + // These 2 sources are calculated in the first two lines below, and then + // added to the exponent extracted from the result above. + // Note that after processing, the new exponent may >= 0x7ff (inf) + // or <= 0 (denorm). Those cases will be handled respectively below + iadd $r2 neg $r2 0x3ff + iadd $r4 $r2 $r3 + sched (st 0x0) (st 0x0) (st 0x0) + bfe u32 $r3 $r1 0xb14 + // New exponent in $r3 + iadd $r3 $r3 $r4 + iadd32i $r2 $r3 -1 + // (exponent-1) < 0x7fe (unsigned) means the result is in norm range + // (same logic as in step 1) + sched (st 0x0) (st 0x0) (st 0x0) + isetp lt u32 and $p0 1 $r2 0x7fe 1 + not $p0 bra #rcp_result_inf_or_denorm + // Norms: convert exponents back and return + shl $r4 $r4 0x14 + sched (st 0x0) (st 0x0) (st 0x0) + iadd $r1 $r4 $r1 + bra #rcp_end +rcp_result_inf_or_denorm: + // New exponent >= 0x7ff means that result is inf + isetp ge and $p0 1 $r3 0x7ff 1 + sched (st 0x0) (st 0x0) (st 0x0) + not $p0 bra #rcp_result_denorm + // Infinity + lop32i and $r1 $r1 0x80000000 + mov $r0 0x0 0xf + sched (st 0x0) (st 0x0) (st 0x0) + iadd32i $r1 $r1 0x7ff00000 + bra #rcp_end +rcp_result_denorm: + // Denorm result comes from huge input. The greatest possible fp64, i.e. + // 0x7fefffffffffffff's rcp is 0x0004000000000000, 1/4 of the smallest + // normal value. Other rcp result should be greater than that. If we + // set the exponent field to 1, we can recover the result by multiplying + // it with 1/2 or 1/4. 1/2 is used if the "exponent" $r3 is 0, otherwise + // 1/4 ($r3 should be -1 then). This is quite tricky but greatly simplifies + // the logic here. + isetp ne u32 and $p0 1 $r3 0x0 1 + sched (st 0x0) (st 0x0) (st 0x0) + lop32i and $r1 $r1 0x800fffff + // 0x3e800000: 1/4 + $p0 f2f f32 f64 $r6 0x3e800000 + // 0x3f000000: 1/2 + not $p0 f2f f32 f64 $r6 0x3f000000 + sched (st 0x0) (st 0x0) (st 0x0) + iadd32i $r1 $r1 0x00100000 + dmul $r0 $r0 $r6 +rcp_end: + ret + +// RSQ F64 +// +// INPUT: $r0d +// OUTPUT: $r0d +// CLOBBER: $r2 - $r9, $p0 - $p1 +// +gm107_rsq_f64: + // Before getting initial result rsqrt64h, two special cases should be + // handled first. + // 1. NaN: set the highest bit in mantissa so it'll be surely recognized + // as NaN in rsqrt64h + sched (st 0xd wr 0x0 wt 0x3f) (st 0xd wt 0x1) (st 0xd) + dsetp gtu and $p0 1 abs $r0 0x7ff0000000000000 1 + $p0 lop32i or $r1 $r1 0x00080000 + lop32i and $r2 $r1 0x7fffffff + // 2. denorms and small normal values: using their original value will + // lose precision either at rsqrt64h or the first step in newton-raphson + // steps below. Take 2 as a threshold in exponent field, and multiply + // with 2^54 if the exponent is smaller or equal. (will multiply 2^27 + // to recover in the end) + sched (st 0xd) (st 0xd) (st 0xd) + bfe u32 $r3 $r1 0xb14 + isetp le u32 and $p1 1 $r3 0x2 1 + lop or 1 $r2 $r0 $r2 + sched (st 0xd wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xd) + $p1 dmul $r0 $r0 0x4350000000000000 + mufu rsq64h $r5 $r1 + // rsqrt64h will give correct result for 0/inf/nan, the following logic + // checks whether the input is one of those (exponent is 0x7ff or all 0 + // except for the sign bit) + iset ne u32 and $r6 $r3 0x7ff 1 + sched (st 0xd) (st 0xd) (st 0xd) + lop and 1 $r2 $r2 $r6 + isetp ne u32 and $p0 1 $r2 0x0 1 + $p0 bra #rsq_norm + // For 0/inf/nan, make sure the sign bit agrees with input and return + sched (st 0xd) (st 0xd) (st 0xd wt 0x1) + lop32i and $r1 $r1 0x80000000 + mov $r0 0x0 0xf + lop or 1 $r1 $r1 $r5 + sched (st 0xd) (st 0xf) (st 0xf) + ret + nop 0 + nop 0 +rsq_norm: + // For others, do 4 Newton-Raphson steps with the formula: + // RSQ_{n + 1} = RSQ_{n} * (1.5 - 0.5 * x * RSQ_{n} * RSQ_{n}) + // In the code below, each step is written as: + // tmp1 = 0.5 * x * RSQ_{n} + // tmp2 = -RSQ_{n} * tmp1 + 0.5 + // RSQ_{n + 1} = RSQ_{n} * tmp2 + RSQ_{n} + sched (st 0xd) (st 0xd wr 0x1) (st 0xd wr 0x1 rd 0x0 wt 0x3) + mov $r4 0x0 0xf + // 0x3f000000: 1/2 + f2f f32 f64 $r8 0x3f000000 + dmul $r2 $r0 $r8 + sched (st 0xd wr 0x0 wt 0x3) (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) + dmul $r0 $r2 $r4 + dfma $r6 $r0 neg $r4 $r8 + dfma $r4 $r4 $r6 $r4 + sched (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) + dmul $r0 $r2 $r4 + dfma $r6 $r0 neg $r4 $r8 + dfma $r4 $r4 $r6 $r4 + sched (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) + dmul $r0 $r2 $r4 + dfma $r6 $r0 neg $r4 $r8 + dfma $r4 $r4 $r6 $r4 + sched (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) (st 0xd wr 0x0 wt 0x1) + dmul $r0 $r2 $r4 + dfma $r6 $r0 neg $r4 $r8 + dfma $r4 $r4 $r6 $r4 + // Multiply 2^27 to result for small inputs to recover + sched (st 0xd wr 0x0 wt 0x1) (st 0xd wt 0x1) (st 0xd) + $p1 dmul $r4 $r4 0x41a0000000000000 + mov $r1 $r5 0xf + mov $r0 $r4 0xf + sched (st 0xd) (st 0xf) (st 0xf) ret nop 0 nop 0 diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h 2019-03-31 23:16:37.000000000 +0000 @@ -82,8 +82,156 @@ 0xe32000000007000f, 0x50b0000000070f00, /* 0x0280: gm107_rcp_f64 */ -/* 0x0280: gm107_rsq_f64 */ 0x001f8000fc0007e0, + 0x38000000b1470102, + 0x1c0ffffffff70203, + 0xe29000000e000000, + 0x001f8000fc0007e0, + 0x366803807fd70307, + 0x5c9807800ff70003, + 0xf0f800000008000f, + 0x001f8000fc0007e0, + 0x010ffffffff7f003, + 0x368c03fff0070087, + 0xe24000000188000f, + 0x001f8000fc0007e0, + 0x0420008000070101, + 0xf0f800000007000f, +/* 0x02f8: rcp_inf_or_denorm_or_zero */ + 0x0407ff0000070104, + 0x001f8000fc0007e0, + 0x5b6503800ff70407, + 0xe24000000200000f, + 0x0447ff0000070101, + 0x001f8000fc0007e0, + 0x5c9807800ff70000, + 0xf0f800000007000f, +/* 0x0338: rcp_denorm_or_zero */ + 0x5b8c03800ff70087, + 0x001f8000fc0007e0, + 0xe24000000100000f, + 0x0427ff0000070101, + 0xf0f800000007000f, +/* 0x0360: rcp_denorm */ + 0x001f8000fc0007e0, + 0x3880004350070000, + 0x3898078003670003, + 0xf0f800000007000f, +/* 0x0380: rcp_rejoin */ + 0x001f8000fc0007e0, + 0x5b6303800ff70307, + 0xe24000001c00000f, + 0x38000000b1470102, + 0x001f8000fc0007e0, + 0x040800fffff70107, + 0x1c03ff0000070707, + 0x5c98078000070006, + 0x001f8000fc0007e0, + 0x5ca8100000670e05, + 0x5080000000470504, + 0x010bf8000007f000, + 0x001f8000fc0007e0, + 0x5980000000570405, + 0x5981020000470500, + 0x5ca8000000070b00, + 0x001f8000fc0007e0, + 0x5ca8200000670f06, + 0x38a8003f80070b08, + 0x5b70040000070604, + 0x001f8000fc0007e0, + 0x5b70000000470000, + 0x5b70040000070604, + 0x5b70000000470000, + 0x001f8000fc0007e0, + 0x5b70040000070604, + 0x5b70000000470000, + 0x5b70040000070604, + 0x001f8000fc0007e0, + 0x5b70000000470000, + 0x381200003ff70202, + 0x5c10000000370204, + 0x001f8000fc0007e0, + 0x38000000b1470103, + 0x5c10000000470303, + 0x1c0ffffffff70302, + 0x001f8000fc0007e0, + 0x366203807fe70207, + 0xe24000000208000f, + 0x3848000001470404, + 0x001f8000fc0007e0, + 0x5c10000000170401, + 0xe24000000807000f, +/* 0x04d8: rcp_result_inf_or_denorm */ + 0x366d03807ff70307, + 0x001f8000fc0007e0, + 0xe24000000288000f, + 0x0408000000070101, + 0x5c9807800ff70000, + 0x001f8000fc0007e0, + 0x1c07ff0000070101, + 0xe24000000407000f, +/* 0x0518: rcp_result_denorm */ + 0x5b6a03800ff70307, + 0x001f8000fc0007e0, + 0x040800fffff70101, + 0x38a8003e80000b06, + 0x38a8003f00080b06, + 0x001f8000fc0007e0, + 0x1c00010000070101, + 0x5c80000000670000, +/* 0x0558: rcp_end */ + 0xe32000000007000f, +/* 0x0560: gm107_rsq_f64 */ + 0x001fb401fda1ff0d, + 0x368c03fff0070087, + 0x0420008000000101, + 0x0407fffffff70102, + 0x001fb400fda007ed, + 0x38000000b1470103, + 0x366603800027030f, + 0x5c47020000270002, + 0x001fb401e1a0070d, + 0x3880004350010000, + 0x5080000000770105, + 0x365a03807ff70306, + 0x001fb400fda007ed, + 0x5c47000000670202, + 0x5b6a03800ff70207, + 0xe24000000400000f, + 0x003fb400fda007ed, + 0x0408000000070101, + 0x5c9807800ff70000, + 0x5c47020000570101, + 0x001fbc00fde007ed, + 0xe32000000007000f, + 0x50b0000000070f00, + 0x50b0000000070f00, +/* 0x0620: rsq_norm */ + 0x0060b400e5a007ed, + 0x5c9807800ff70004, + 0x38a8003f00070b08, + 0x5c80000000870002, + 0x003c3401e1a01f0d, + 0x5c80000000470200, + 0x5b71040000470006, + 0x5b70020000670404, + 0x003c3401e1a00f0d, + 0x5c80000000470200, + 0x5b71040000470006, + 0x5b70020000670404, + 0x003c3401e1a00f0d, + 0x5c80000000470200, + 0x5b71040000470006, + 0x5b70020000670404, + 0x003c3401e1a00f0d, + 0x5c80000000470200, + 0x5b71040000470006, + 0x5b70020000670404, + 0x001fb401fda00f0d, + 0x38800041a0010404, + 0x5c98078000570001, + 0x5c98078000470000, + 0x001fbc00fde007ed, 0xe32000000007000f, 0x50b0000000070f00, 0x50b0000000070f00, @@ -93,5 +241,5 @@ 0x0000000000000000, 0x0000000000000120, 0x0000000000000280, - 0x0000000000000280, + 0x0000000000000560, }; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -1119,6 +1119,7 @@ binSize = 0; maxGPR = -1; + fp64 = false; main = new Function(this, "MAIN", ~0); calls.insert(&main->call); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -192,6 +192,7 @@ void emitTEXs(int); void emitTEX(); + void emitTEXS(); void emitTLD(); void emitTLD4(); void emitTXD(); @@ -2718,6 +2719,104 @@ emitGPR(pos); } +static uint8_t +getTEXSMask(uint8_t mask) +{ + switch (mask) { + case 0x1: return 0x0; + case 0x2: return 0x1; + case 0x3: return 0x4; + case 0x4: return 0x2; + case 0x7: return 0x0; + case 0x8: return 0x3; + case 0x9: return 0x5; + case 0xa: return 0x6; + case 0xb: return 0x1; + case 0xc: return 0x7; + case 0xd: return 0x2; + case 0xe: return 0x3; + case 0xf: return 0x4; + default: + assert(!"invalid mask"); + return 0; + } +} + +static uint8_t +getTEXSTarget(const TexInstruction *tex) +{ + assert(tex->op == OP_TEX || tex->op == OP_TXL); + + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_1D: + assert(tex->tex.levelZero); + return 0x0; + case TEX_TARGET_2D: + case TEX_TARGET_RECT: + if (tex->tex.levelZero) + return 0x2; + if (tex->op == OP_TXL) + return 0x3; + return 0x1; + case TEX_TARGET_2D_SHADOW: + case TEX_TARGET_RECT_SHADOW: + if (tex->tex.levelZero) + return 0x6; + if (tex->op == OP_TXL) + return 0x5; + return 0x4; + case TEX_TARGET_2D_ARRAY: + if (tex->tex.levelZero) + return 0x8; + return 0x7; + case TEX_TARGET_2D_ARRAY_SHADOW: + assert(tex->tex.levelZero); + return 0x9; + case TEX_TARGET_3D: + if (tex->tex.levelZero) + return 0xb; + assert(tex->op != OP_TXL); + return 0xa; + case TEX_TARGET_CUBE: + assert(!tex->tex.levelZero); + if (tex->op == OP_TXL) + return 0xd; + return 0xc; + default: + assert(false); + return 0x0; + } +} + +static uint8_t +getTLDSTarget(const TexInstruction *tex) +{ + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_1D: + if (tex->tex.levelZero) + return 0x0; + return 0x1; + case TEX_TARGET_2D: + case TEX_TARGET_RECT: + if (tex->tex.levelZero) + return tex->tex.useOffsets ? 0x4 : 0x2; + return tex->tex.useOffsets ? 0xc : 0x5; + case TEX_TARGET_2D_MS: + assert(tex->tex.levelZero); + return 0x6; + case TEX_TARGET_3D: + assert(tex->tex.levelZero); + return 0x7; + case TEX_TARGET_2D_ARRAY: + assert(tex->tex.levelZero); + return 0x8; + + default: + assert(false); + return 0x0; + } +} + void CodeEmitterGM107::emitTEX() { @@ -2761,6 +2860,50 @@ } void +CodeEmitterGM107::emitTEXS() +{ + const TexInstruction *insn = this->insn->asTex(); + assert(!insn->tex.derivAll); + + switch (insn->op) { + case OP_TEX: + case OP_TXL: + emitInsn (0xd8000000); + emitField(0x35, 4, getTEXSTarget(insn)); + emitField(0x32, 3, getTEXSMask(insn->tex.mask)); + break; + case OP_TXF: + emitInsn (0xda000000); + emitField(0x35, 4, getTLDSTarget(insn)); + emitField(0x32, 3, getTEXSMask(insn->tex.mask)); + break; + case OP_TXG: + assert(insn->tex.useOffsets != 4); + emitInsn (0xdf000000); + emitField(0x34, 2, insn->tex.gatherComp); + emitField(0x33, 1, insn->tex.useOffsets == 1); + emitField(0x32, 1, insn->tex.target.isShadow()); + break; + default: + unreachable("unknown op in emitTEXS()"); + break; + } + + emitField(0x31, 1, insn->tex.liveOnly); + emitField(0x24, 13, insn->tex.r); + if (insn->defExists(1)) + emitGPR(0x1c, insn->def(1)); + else + emitGPR(0x1c); + if (insn->srcExists(1)) + emitGPR(0x14, insn->getSrc(1)); + else + emitGPR(0x14); + emitGPR (0x08, insn->src(0)); + emitGPR (0x00, insn->def(0)); +} + +void CodeEmitterGM107::emitTLD() { const TexInstruction *insn = this->insn->asTex(); @@ -3474,15 +3617,26 @@ emitPIXLD(); break; case OP_TEX: - case OP_TXB: case OP_TXL: + if (insn->asTex()->tex.scalar) + emitTEXS(); + else + emitTEX(); + break; + case OP_TXB: emitTEX(); break; case OP_TXF: - emitTLD(); + if (insn->asTex()->tex.scalar) + emitTEXS(); + else + emitTLD(); break; case OP_TXG: - emitTLD4(); + if (insn->asTex()->tex.scalar) + emitTEXS(); + else + emitTLD4(); break; case OP_TXD: emitTXD(); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -968,6 +968,7 @@ NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM); NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM); NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM); + NV50_IR_OPCODE_CASE(ATOMFADD, ATOM); NV50_IR_OPCODE_CASE(TEX2, TEX); NV50_IR_OPCODE_CASE(TXB2, TXB); @@ -1010,6 +1011,7 @@ case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; + case TGSI_OPCODE_ATOMFADD: return NV50_IR_SUBOP_ATOM_ADD; case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_UMUL_HI: return NV50_IR_SUBOP_MUL_HIGH; @@ -1085,6 +1087,8 @@ }; std::vector memoryFiles; + std::vector bufferAtomics; + private: int inferSysValDirection(unsigned sn) const; bool scanDeclaration(const struct tgsi_full_declaration *); @@ -1135,6 +1139,7 @@ //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1); memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1); + bufferAtomics.resize(scan.file_max[TGSI_FILE_BUFFER] + 1); info->immd.bufSize = 0; @@ -1481,11 +1486,14 @@ tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair( first, last - first + 1))); break; + case TGSI_FILE_BUFFER: + for (i = first; i <= last; ++i) + bufferAtomics[i] = decl->Declaration.Atomic; + break; case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: case TGSI_FILE_SAMPLER: - case TGSI_FILE_BUFFER: case TGSI_FILE_IMAGE: break; default: @@ -1619,6 +1627,7 @@ case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: case TGSI_OPCODE_LOAD: info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? 0x1 : 0x2; @@ -2717,7 +2726,11 @@ } Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off); - ld->cache = tgsi.getCacheMode(); + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER && + code->bufferAtomics[r]) + ld->cache = nv50_ir::CACHE_CG; + else + ld->cache = tgsi.getCacheMode(); if (ind) ld->setIndirect(0, 1, ind); } @@ -3834,6 +3847,7 @@ case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); break; case TGSI_OPCODE_RESQ: diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir.h --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir.h 2019-03-31 23:16:37.000000000 +0000 @@ -1058,6 +1058,8 @@ enum TexQuery query; const struct ImgFormatDesc *format; + + bool scalar; // for GM107s TEXS, TLDS, TLD4S } tex; ValueRef dPdx[3]; @@ -1309,6 +1311,7 @@ uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL int maxGPR; + bool fp64; MemoryPool mem_Instruction; MemoryPool mem_CmpInstruction; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -84,6 +84,38 @@ } void +NVC0LegalizeSSA::handleRCPRSQLib(Instruction *i, Value *src[]) +{ + FlowInstruction *call; + Value *def[2]; + int builtin; + + def[0] = bld.mkMovToReg(0, src[0])->getDef(0); + def[1] = bld.mkMovToReg(1, src[1])->getDef(0); + + if (i->op == OP_RCP) + builtin = NVC0_BUILTIN_RCP_F64; + else + builtin = NVC0_BUILTIN_RSQ_F64; + + call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); + def[0] = bld.getSSA(); + def[1] = bld.getSSA(); + bld.mkMovFromReg(def[0], 0); + bld.mkMovFromReg(def[1], 1); + bld.mkClobber(FILE_GPR, 0x3fc, 2); + bld.mkClobber(FILE_PREDICATE, i->op == OP_RSQ ? 0x3 : 0x1, 0); + bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), def[0], def[1]); + + call->fixed = 1; + call->absolute = call->builtin = 1; + call->target.builtin = builtin; + delete_Instruction(prog, i); + + prog->fp64 = true; +} + +void NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) { assert(i->dType == TYPE_F64); @@ -96,6 +128,12 @@ Value *src[2], *dst[2], *def = i->getDef(0); bld.mkSplit(src, 4, i->getSrc(0)); + int chip = prog->getTarget()->getChipset(); + if (chip >= NVISA_GK104_CHIPSET) { + handleRCPRSQLib(i, src); + return; + } + // 2. We don't care about the low 32 bits of the destination. Stick a 0 in. dst[0] = bld.loadImm(NULL, 0); dst[1] = bld.getSSA(); @@ -1063,22 +1101,6 @@ } } - if (chipset >= NVISA_GK104_CHIPSET) { - // - // If TEX requires more than 4 sources, the 2nd register tuple must be - // aligned to 4, even if it consists of just a single 4-byte register. - // - // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. - // - int s = i->srcCount(0xff, true); - if (s > 4 && s < 7) { - if (i->srcExists(s)) // move potential predicate out of the way - i->moveSources(s, 7 - s); - while (s < 7) - i->setSrc(s++, bld.loadImm(NULL, 0)); - } - } - return true; } @@ -1887,7 +1909,8 @@ su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB; const int slot = su->tex.r; const int dim = su->tex.target.getDim(); - const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube()); + const bool array = su->tex.target.isArray() || su->tex.target.isCube(); + const int arg = dim + array; int c; Value *zero = bld.mkImm(0); Value *p1 = NULL; @@ -1896,6 +1919,7 @@ Value *bf, *eau, *off; Value *addr, *pred; Value *ind = su->getIndirectR(); + Value *y, *z; off = bld.getScratch(4); bf = bld.getScratch(4); @@ -1926,34 +1950,42 @@ for (; c < 3; ++c) src[c] = zero; + if (dim == 2 && !array) { + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); + src[2] = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), + v, bld.loadImm(NULL, 16)); + + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_DIM(2), su->tex.bindless); + bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[2], src[2], v, zero) + ->subOp = NV50_IR_SUBOP_SUCLAMP_SD(0, 2); + } + // set predicate output if (su->tex.target == TEX_TARGET_BUFFER) { src[0]->getInsn()->setFlagsDef(1, pred); } else - if (su->tex.target.isArray() || su->tex.target.isCube()) { + if (array) { p1 = bld.getSSA(1, FILE_PREDICATE); src[dim]->getInsn()->setFlagsDef(1, p1); } // calculate pixel offset if (dim == 1) { + y = z = zero; if (su->tex.target != TEX_TARGET_BUFFER) bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); - } else - if (dim == 3) { + } else { + y = src[1]; + z = src[2]; + v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) - ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l + ->subOp = NV50_IR_SUBOP_MADSP(4,4,8); // u16l u16l u16l v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless); bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) - ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l - } else { - assert(dim == 2); - v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH, su->tex.bindless); - bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) - ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ? - NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l + ->subOp = array ? + NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l } // calculate effective address part 1 @@ -1966,19 +1998,15 @@ ->subOp = NV50_IR_SUBOP_V1(7,6,8|2); } } else { - Value *y = src[1]; - Value *z = src[2]; uint16_t subOp = 0; switch (dim) { case 1: - y = zero; - z = zero; break; case 2: - z = off; - if (!su->tex.target.isArray() && !su->tex.target.isCube()) { - z = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless); + if (array) { + z = off; + } else { subOp = NV50_IR_SUBOP_SUBFM_3D; } break; @@ -2001,7 +2029,7 @@ eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v); } // add array layer offset - if (su->tex.target.isArray() || su->tex.target.isCube()) { + if (array) { v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY, su->tex.bindless); if (dim == 1) bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 2019-03-31 23:16:37.000000000 +0000 @@ -62,6 +62,7 @@ // we want to insert calls to the builtin library only after optimization void handleDIV(Instruction *); // integer division, modulus + void handleRCPRSQLib(Instruction *, Value *[]); void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt void handleFTZ(Instruction *); void handleSET(CmpInstruction *); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -744,6 +744,7 @@ // restrictions, so move it into a separate LValue. bld.setPosition(i, false); i->op = OP_ADD; + i->dnz = 0; i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(0), type)->getDef(0)); i->setSrc(0, i->getSrc(2)); i->src(0).mod = i->src(2).mod; @@ -1100,6 +1101,7 @@ if (imm0.isNegative()) i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); i->op = OP_ADD; + i->dnz = 0; i->setSrc(s, i->getSrc(t)); i->src(s).mod = i->src(t).mod; } else @@ -1140,6 +1142,7 @@ i->setSrc(1, i->getSrc(2)); i->src(1).mod = i->src(2).mod; i->setSrc(2, NULL); + i->dnz = 0; i->op = OP_ADD; } else if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) { @@ -1914,7 +1917,7 @@ if (minmax->src(0).mod == minmax->src(1).mod) { if (minmax->def(0).mayReplace(minmax->src(0))) { minmax->def(0).replace(minmax->src(0), false); - minmax->bb->remove(minmax); + delete_Instruction(prog, minmax); } else { minmax->op = OP_CVT; minmax->setSrc(1, NULL); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -359,6 +359,31 @@ "samp sc" }; +static const char *texMaskStr[16] = +{ + "____", + "r___", + "_g__", + "rg__", + "__b_", + "r_b_", + "_gb_", + "rgb_", + "___a", + "r__a", + "_g_a", + "rg_a", + "__ba", + "r_ba", + "_gba", + "rgba", +}; + +static const char *gatherCompStr[4] = +{ + "r", "g", "b", "a", +}; + #define PRINT(args...) \ do { \ pos += snprintf(&buf[pos], size - pos, args); \ @@ -587,7 +612,10 @@ if (asFlow()->target.bb) PRINT(" %sBB:%i", colour[TXT_BRA], asFlow()->target.bb->getId()); } else { - PRINT("%s ", operationStr[op]); + if (asTex()) + PRINT("%s%s ", operationStr[op], asTex()->tex.scalar ? "s" : ""); + else + PRINT("%s ", operationStr[op]); if (op == OP_LINTERP || op == OP_PINTERP) PRINT("%s ", interpStr[ipa]); switch (op) { @@ -651,10 +679,14 @@ } if (perPatch) PRINT("patch "); - if (asTex()) - PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(), - colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s, - colour[TXT_INSN]); + if (asTex()) { + PRINT("%s %s$r%u $s%u ", asTex()->tex.target.getName(), + colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s); + if (op == OP_TXG) + PRINT("%s ", gatherCompStr[asTex()->tex.gatherComp]); + PRINT("%s %s", texMaskStr[asTex()->tex.mask], colour[TXT_INSN]); + } + if (postFactor) PRINT("x2^%i ", postFactor); PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -55,7 +55,7 @@ void periodicMask(DataFile f, uint32_t lock, uint32_t unlock); void intersect(DataFile f, const RegisterSet *); - bool assign(int32_t& reg, DataFile f, unsigned int size); + bool assign(int32_t& reg, DataFile f, unsigned int size, unsigned int maxReg); void release(DataFile f, int32_t reg, unsigned int size); void occupy(DataFile f, int32_t reg, unsigned int size); void occupy(const Value *); @@ -66,10 +66,8 @@ inline int getMaxAssigned(DataFile f) const { return fill[f]; } - inline unsigned int getFileSize(DataFile f, uint8_t regSize) const + inline unsigned int getFileSize(DataFile f) const { - if (restrictedGPR16Range && f == FILE_GPR && regSize == 2) - return (last[f] + 1) / 2; return last[f] + 1; } @@ -162,9 +160,9 @@ } bool -RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size) +RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size, unsigned int maxReg) { - reg = bits[f].findFreeRange(size); + reg = bits[f].findFreeRange(size, maxReg); if (reg < 0) return false; fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); @@ -261,6 +259,7 @@ bool insertConstraintMoves(); void condenseDefs(Instruction *); + void condenseDefs(Instruction *, const int first, const int last); void condenseSrcs(Instruction *, const int first, const int last); void addHazard(Instruction *i, const ValueRef *src); @@ -274,6 +273,9 @@ void texConstraintNVE0(TexInstruction *); void texConstraintGM107(TexInstruction *); + bool isScalarTexGM107(TexInstruction *); + void handleScalarTexGM107(TexInstruction *); + std::list constrList; const Target *targ; @@ -745,6 +747,7 @@ public: uint32_t degree; uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable + uint16_t maxReg; uint16_t colors; DataFile f; @@ -800,7 +803,21 @@ Function *func; Program *prog; - static uint8_t relDegree[17][17]; + struct RelDegree { + uint8_t data[17][17]; + + RelDegree() { + for (int i = 1; i <= 16; ++i) + for (int j = 1; j <= 16; ++j) + data[i][j] = j * ((i + j - 1) / j); + } + + const uint8_t* operator[](std::size_t i) const { + return data[i]; + } + }; + + static const RelDegree relDegree; RegisterSet regs; @@ -812,7 +829,7 @@ std::list mustSpill; }; -uint8_t GCRA::relDegree[17][17]; +const GCRA::RelDegree GCRA::relDegree; GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this) { @@ -842,9 +859,11 @@ static bool isShortRegOp(Instruction *insn) { - // Immediates are always in src1. Every other situation can be resolved by + // Immediates are always in src1 (except zeroes, which end up getting + // replaced with a zero reg). Every other situation can be resolved by // using a long encoding. - return insn->srcExists(1) && insn->src(1).getFile() == FILE_IMMEDIATE; + return insn->srcExists(1) && insn->src(1).getFile() == FILE_IMMEDIATE && + insn->getSrc(1)->reg.data.u64; } // Check if this LValue is ever used in an instruction that can't be encoded @@ -880,12 +899,12 @@ weight = std::numeric_limits::infinity(); degree = 0; - int size = regs.getFileSize(f, lval->reg.size); + maxReg = regs.getFileSize(f); // On nv50, we lose a bit of gpr encoding when there's an embedded // immediate. - if (regs.restrictedGPR16Range && f == FILE_GPR && isShortRegVal(lval)) - size /= 2; - degreeLimit = size; + if (regs.restrictedGPR16Range && f == FILE_GPR && (lval->reg.size == 2 || isShortRegVal(lval))) + maxReg /= 2; + degreeLimit = maxReg; degreeLimit -= relDegree[1][colors] - 1; livei.insert(lval->livei); @@ -945,6 +964,8 @@ // add val's definitions to rep and extend the live interval of its RIG node rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end()); nRep->livei.unify(nVal->livei); + nRep->degreeLimit = MIN2(nRep->degreeLimit, nVal->degreeLimit); + nRep->maxReg = MIN2(nRep->maxReg, nVal->maxReg); return true; } @@ -1148,11 +1169,6 @@ spill(spill) { prog = func->getProgram(); - - // initialize relative degrees array - i takes away from j - for (int i = 1; i <= 16; ++i) - for (int j = 1; j <= 16; ++j) - relDegree[i][j] = j * ((i + j - 1) / j); } GCRA::~GCRA() @@ -1318,13 +1334,17 @@ } else if (!DLLIST_EMPTY(&hi)) { RIG_Node *best = hi.next; + unsigned bestMaxReg = best->maxReg; float bestScore = best->weight / (float)best->degree; - // spill candidate + // Spill candidate. First go through the ones with the highest max + // register, then the ones with lower. That way the ones with the + // lowest requirement will be allocated first, since it's a stack. for (RIG_Node *it = best->next; it != &hi; it = it->next) { float score = it->weight / (float)it->degree; - if (score < bestScore) { + if (score < bestScore || it->maxReg > bestMaxReg) { best = it; bestScore = score; + bestMaxReg = it->maxReg; } } if (isinf(bestScore)) { @@ -1425,7 +1445,7 @@ LValue *lval = node->getValue(); if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) regs.print(node->f); - bool ret = regs.assign(node->reg, node->f, node->colors); + bool ret = regs.assign(node->reg, node->f, node->colors, node->maxReg); if (ret) { INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg); lval->compMask = node->getCompMask(); @@ -2048,24 +2068,35 @@ void RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn) { - uint8_t size = 0; int n; - for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n) - size += insn->getDef(n)->reg.size; - if (n < 2) + for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n); + condenseDefs(insn, 0, n - 1); +} + +void +RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn, + const int a, const int b) +{ + uint8_t size = 0; + if (a >= b) return; + for (int s = a; s <= b; ++s) + size += insn->getDef(s)->reg.size; + if (!size) + return; + LValue *lval = new_LValue(func, FILE_GPR); lval->reg.size = size; Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size)); split->setSrc(0, lval); - for (int d = 0; d < n; ++d) { - split->setDef(d, insn->getDef(d)); + for (int d = a; d <= b; ++d) { + split->setDef(d - a, insn->getDef(d)); insn->setDef(d, NULL); } - insn->setDef(0, lval); + insn->setDef(a, lval); - for (int k = 1, d = n; insn->defExists(d); ++d, ++k) { + for (int k = a + 1, d = b + 1; insn->defExists(d); ++d, ++k) { insn->setDef(k, insn->getDef(d)); insn->setDef(d, NULL); } @@ -2075,6 +2106,7 @@ insn->bb->insertAfter(insn, split); constrList.push_back(split); } + void RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, const int a, const int b) @@ -2106,6 +2138,159 @@ constrList.push_back(merge); } +bool +RegAlloc::InsertConstraintsPass::isScalarTexGM107(TexInstruction *tex) +{ + if (tex->tex.sIndirectSrc >= 0 || + tex->tex.rIndirectSrc >= 0 || + tex->tex.derivAll) + return false; + + if (tex->tex.mask == 5 || tex->tex.mask == 6) + return false; + + switch (tex->op) { + case OP_TEX: + case OP_TXF: + case OP_TXG: + case OP_TXL: + break; + default: + return false; + } + + // legal variants: + // TEXS.1D.LZ + // TEXS.2D + // TEXS.2D.LZ + // TEXS.2D.LL + // TEXS.2D.DC + // TEXS.2D.LL.DC + // TEXS.2D.LZ.DC + // TEXS.A2D + // TEXS.A2D.LZ + // TEXS.A2D.LZ.DC + // TEXS.3D + // TEXS.3D.LZ + // TEXS.CUBE + // TEXS.CUBE.LL + + // TLDS.1D.LZ + // TLDS.1D.LL + // TLDS.2D.LZ + // TLSD.2D.LZ.AOFFI + // TLDS.2D.LZ.MZ + // TLDS.2D.LL + // TLDS.2D.LL.AOFFI + // TLDS.A2D.LZ + // TLDS.3D.LZ + + // TLD4S: all 2D/RECT variants and only offset + + switch (tex->op) { + case OP_TEX: + if (tex->tex.useOffsets) + return false; + + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_1D: + case TEX_TARGET_2D_ARRAY_SHADOW: + return tex->tex.levelZero; + case TEX_TARGET_CUBE: + return !tex->tex.levelZero; + case TEX_TARGET_2D: + case TEX_TARGET_2D_ARRAY: + case TEX_TARGET_2D_SHADOW: + case TEX_TARGET_3D: + case TEX_TARGET_RECT: + case TEX_TARGET_RECT_SHADOW: + return true; + default: + return false; + } + + case OP_TXL: + if (tex->tex.useOffsets) + return false; + + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_2D: + case TEX_TARGET_2D_SHADOW: + case TEX_TARGET_RECT: + case TEX_TARGET_RECT_SHADOW: + case TEX_TARGET_CUBE: + return true; + default: + return false; + } + + case OP_TXF: + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_1D: + return !tex->tex.useOffsets; + case TEX_TARGET_2D: + case TEX_TARGET_RECT: + return true; + case TEX_TARGET_2D_ARRAY: + case TEX_TARGET_2D_MS: + case TEX_TARGET_3D: + return !tex->tex.useOffsets && tex->tex.levelZero; + default: + return false; + } + + case OP_TXG: + if (tex->tex.useOffsets > 1) + return false; + if (tex->tex.mask != 0x3 && tex->tex.mask != 0xf) + return false; + + switch (tex->tex.target.getEnum()) { + case TEX_TARGET_2D: + case TEX_TARGET_2D_MS: + case TEX_TARGET_2D_SHADOW: + case TEX_TARGET_RECT: + case TEX_TARGET_RECT_SHADOW: + return true; + default: + return false; + } + + default: + return false; + } +} + +void +RegAlloc::InsertConstraintsPass::handleScalarTexGM107(TexInstruction *tex) +{ + int defCount = tex->defCount(0xff); + int srcCount = tex->srcCount(0xff); + + tex->tex.scalar = true; + + // 1. handle defs + if (defCount > 3) + condenseDefs(tex, 2, 3); + if (defCount > 1) + condenseDefs(tex, 0, 1); + + // 2. handle srcs + // special case for TXF.A2D + if (tex->op == OP_TXF && tex->tex.target == TEX_TARGET_2D_ARRAY) { + assert(srcCount >= 3); + condenseSrcs(tex, 1, 2); + } else { + if (srcCount > 3) + condenseSrcs(tex, 2, 3); + // only if we have more than 2 sources + if (srcCount > 2) + condenseSrcs(tex, 0, 1); + } + + assert(!tex->defExists(2) && !tex->srcExists(2)); +} + void RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) { @@ -2113,6 +2298,13 @@ if (isTextureOp(tex->op)) textureMask(tex); + + if (isScalarTexGM107(tex)) { + handleScalarTexGM107(tex); + return; + } + + assert(!tex->tex.scalar); condenseDefs(tex); if (isSurfaceOp(tex->op)) { @@ -2149,9 +2341,19 @@ if (!tex->tex.target.isArray() && tex->tex.useOffsets) s++; } - n = tex->srcCount(0xff) - s; + n = tex->srcCount(0xff, true) - s; + // TODO: Is this necessary? Perhaps just has to be aligned to the + // level that the first arg is, not necessarily to 4. This + // requirement has not been rigorously verified, as it has been on + // Kepler. + if (n > 0 && n < 3) { + if (tex->srcExists(n + s)) // move potential predicate out of the way + tex->moveSources(n + s, 3 - n); + while (n < 3) + tex->setSrc(s + n++, new_LValue(func, FILE_GPR)); + } } else { - s = tex->srcCount(0xff); + s = tex->srcCount(0xff, true); n = 0; } @@ -2174,14 +2376,18 @@ } else if (isTextureOp(tex->op)) { int n = tex->srcCount(0xff, true); - if (n > 4) { - condenseSrcs(tex, 0, 3); - if (n > 5) // NOTE: first call modified positions already - condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); - } else - if (n > 1) { - condenseSrcs(tex, 0, n - 1); + int s = n > 4 ? 4 : n; + if (n > 4 && n < 7) { + if (tex->srcExists(n)) // move potential predicate out of the way + tex->moveSources(n, 7 - n); + + while (n < 7) + tex->setSrc(n++, new_LValue(func, FILE_GPR)); } + if (s > 1) + condenseSrcs(tex, 0, s - 1); + if (n > 4) + condenseSrcs(tex, 1, n - s); } } @@ -2318,6 +2524,7 @@ assert(cst->getSrc(s)->defs.size() == 1); // still SSA Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); + bool imm = defi->op == OP_MOV && defi->src(0).getFile() == FILE_IMMEDIATE; bool load = defi->op == OP_LOAD && diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -399,6 +399,7 @@ } } } + info->io.fp64 |= fp64; info->bin.relocData = emit->getRelocInfo(); info->bin.fixupData = emit->getFixupInfo(); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -203,7 +203,7 @@ { switch (file) { case FILE_NULL: return 0; - case FILE_GPR: return 256; // in 16-bit units ** + case FILE_GPR: return 254; // in 16-bit units ** case FILE_PREDICATE: return 0; case FILE_FLAGS: return 4; case FILE_ADDRESS: return 4; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -311,12 +311,12 @@ } } -int BitSet::findFreeRange(unsigned int count) const +int BitSet::findFreeRange(unsigned int count, unsigned int max) const { const uint32_t m = (1 << count) - 1; - int pos = size; + int pos = max; unsigned int i; - const unsigned int end = (size + 31) / 32; + const unsigned int end = (max + 31) / 32; if (count == 1) { for (i = 0; i < end; ++i) { @@ -373,7 +373,7 @@ pos += i * 32; - return ((pos + count) <= size) ? pos : -1; + return ((pos + count) <= max) ? pos : -1; } void BitSet::print() const diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h --- mesa-18.3.3/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h 2019-03-31 23:16:37.000000000 +0000 @@ -539,8 +539,11 @@ return data[i / 32] & (((1 << n) - 1) << (i % 32)); } - // Find a range of size (<= 32) clear bits aligned to roundup_pow2(size). - int findFreeRange(unsigned int size) const; + // Find a range of count (<= 32) clear bits aligned to roundup_pow2(count). + int findFreeRange(unsigned int count, unsigned int max) const; + inline int findFreeRange(unsigned int count) const { + return findFreeRange(count, size); + } BitSet& operator|=(const BitSet&); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nouveau_screen.h mesa-19.0.1/src/gallium/drivers/nouveau/nouveau_screen.h --- mesa-18.3.3/src/gallium/drivers/nouveau/nouveau_screen.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nouveau_screen.h 2019-03-31 23:16:37.000000000 +0000 @@ -3,6 +3,7 @@ #include "pipe/p_screen.h" #include "util/disk_cache.h" +#include "util/u_atomic.h" #include "util/u_memory.h" #ifdef DEBUG @@ -106,10 +107,10 @@ #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS # define NOUVEAU_DRV_STAT(s, n, v) do { \ - (s)->stats.named.n += (v); \ + p_atomic_add(&(s)->stats.named.n, (v)); \ } while(0) -# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \ - nouveau_screen((r)->base.screen)->stats.named.n += (v); \ +# define NOUVEAU_DRV_STAT_RES(r, n, v) do { \ + p_atomic_add(&nouveau_screen((r)->base.screen)->stats.named.n, v); \ } while(0) # define NOUVEAU_DRV_STAT_IFD(x) x #else diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_context.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_context.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_context.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -221,9 +221,7 @@ /*XXX: *cough* per-context pushbufs */ push = screen->base.pushbuf; nv30->base.pushbuf = push; - nv30->base.pushbuf->user_priv = &nv30->bufctx; /* hack at validate time */ - nv30->base.pushbuf->rsvd_kick = 16; /* hack in screen before first space */ - nv30->base.pushbuf->kick_notify = nv30_context_kick_notify; + push->kick_notify = nv30_context_kick_notify; nv30->base.invalidate_resource_storage = nv30_invalidate_resource_storage; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c 2019-03-31 23:16:37.000000000 +0000 @@ -171,7 +171,7 @@ * code */ if (fp != nv30->state.fragprog) - PUSH_RESET(nv30->base.pushbuf, BUFCTX_FRAGPROG); + nouveau_bufctx_reset(nv30->bufctx, BUFCTX_FRAGPROG); nv30->fragprog.program = fp; nv30->dirty |= NV30_NEW_FRAGPROG; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_miptree.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_miptree.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_miptree.c 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_miptree.c 2019-03-31 23:16:37.000000000 +0000 @@ -116,8 +116,22 @@ rect->x0 = util_format_get_nblocksx(pt->format, x) << mt->ms_x; rect->y0 = util_format_get_nblocksy(pt->format, y) << mt->ms_y; - rect->x1 = rect->x0 + (w << mt->ms_x); - rect->y1 = rect->y0 + (h << mt->ms_y); + rect->x1 = rect->x0 + (util_format_get_nblocksx(pt->format, w) << mt->ms_x); + rect->y1 = rect->y0 + (util_format_get_nblocksy(pt->format, h) << mt->ms_y); + + /* XXX There's some indication that swizzled formats > 4 bytes are treated + * differently. However that only applies to RGBA16_FLOAT, RGBA32_FLOAT, + * and the DXT* formats. The former aren't properly supported yet, and the + * latter avoid swizzled layouts. + + if (mt->swizzled && rect->cpp > 4) { + unsigned scale = rect->cpp / 4; + rect->w *= scale; + rect->x0 *= scale; + rect->x1 *= scale; + rect->cpp = 4; + } + */ } void @@ -265,6 +279,7 @@ { struct nv30_context *nv30 = nv30_context(pipe); struct nouveau_device *dev = nv30->screen->base.device; + struct nv30_miptree *mt = nv30_miptree(pt); struct nv30_transfer *tx; unsigned access = 0; int ret; @@ -285,10 +300,11 @@ tx->nblocksy = util_format_get_nblocksy(pt->format, box->height); define_rect(pt, level, box->z, box->x, box->y, - tx->nblocksx, tx->nblocksy, &tx->img); + box->width, box->height, &tx->img); ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, - tx->base.layer_stride, NULL, &tx->tmp.bo); + tx->base.layer_stride * tx->base.box.depth, NULL, + &tx->tmp.bo); if (ret) { pipe_resource_reference(&tx->base.resource, NULL); FREE(tx); @@ -308,8 +324,25 @@ tx->tmp.y1 = tx->tmp.h; tx->tmp.z = 0; - if (usage & PIPE_TRANSFER_READ) - nv30_transfer_rect(nv30, NEAREST, &tx->img, &tx->tmp); + if (usage & PIPE_TRANSFER_READ) { + bool is_3d = mt->base.base.target == PIPE_TEXTURE_3D; + unsigned offset = tx->img.offset; + unsigned z = tx->img.z; + unsigned i; + for (i = 0; i < box->depth; ++i) { + nv30_transfer_rect(nv30, NEAREST, &tx->img, &tx->tmp); + if (is_3d && mt->swizzled) + tx->img.z++; + else if (is_3d) + tx->img.offset += mt->level[level].zslice_size; + else + tx->img.offset += mt->layer_size; + tx->tmp.offset += tx->base.layer_stride; + } + tx->img.z = z; + tx->img.offset = offset; + tx->tmp.offset = 0; + } if (tx->tmp.bo->map) { *ptransfer = &tx->base; @@ -338,9 +371,21 @@ { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_transfer *tx = nv30_transfer(ptx); + struct nv30_miptree *mt = nv30_miptree(tx->base.resource); + unsigned i; if (ptx->usage & PIPE_TRANSFER_WRITE) { - nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img); + bool is_3d = mt->base.base.target == PIPE_TEXTURE_3D; + for (i = 0; i < tx->base.box.depth; ++i) { + nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img); + if (is_3d && mt->swizzled) + tx->img.z++; + else if (is_3d) + tx->img.offset += mt->level[tx->base.level].zslice_size; + else + tx->img.offset += mt->layer_size; + tx->tmp.offset += tx->base.layer_stride; + } /* Allow the copies above to finish executing before freeing the source */ nouveau_fence_work(nv30->screen->base.fence.current, @@ -404,8 +449,7 @@ !util_is_power_of_two_or_zero(pt->width0) || !util_is_power_of_two_or_zero(pt->height0) || !util_is_power_of_two_or_zero(pt->depth0) || - util_format_is_compressed(pt->format) || - util_format_is_float(pt->format) || mt->ms_mode) { + mt->ms_mode) { mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz; mt->uniform_pitch = align(mt->uniform_pitch, 64); if (pt->bind & PIPE_BIND_SCANOUT) { @@ -418,14 +462,20 @@ } } - if (!mt->uniform_pitch) + if (util_format_is_compressed(pt->format)) { + // Compressed (DXT) formats are packed tightly. We don't mark them as + // swizzled, since their layout is largely linear. However we do end up + // omitting the LINEAR flag when texturing them, as the levels are not + // uniformly sized (for POT sizes). + } else if (!mt->uniform_pitch) { mt->swizzled = true; + } size = 0; for (l = 0; l <= pt->last_level; l++) { struct nv30_miptree_level *lvl = &mt->level[l]; unsigned nbx = util_format_get_nblocksx(pt->format, w); - unsigned nby = util_format_get_nblocksx(pt->format, h); + unsigned nby = util_format_get_nblocksy(pt->format, h); lvl->offset = size; lvl->pitch = mt->uniform_pitch; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_screen.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_screen.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -77,6 +77,11 @@ return 1; case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; + case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: + return 8 * 1024 * 1024; + case PIPE_CAP_MAX_VARYINGS: + return 8; + /* supported capabilities */ case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_POINT_SPRITE: @@ -241,7 +246,6 @@ case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: - case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: return 0; case PIPE_CAP_MAX_GS_INVOCATIONS: @@ -434,6 +438,12 @@ if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) return false; + /* No way to render to a swizzled 3d texture. We don't necessarily know if + * it's swizzled or not here, but we have to assume anyways. + */ + if (target == PIPE_TEXTURE_3D && (bindings & PIPE_BIND_RENDER_TARGET)) + return false; + /* shared is always supported */ bindings &= ~PIPE_BIND_SHARED; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_texture.c mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_texture.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv30/nv30_texture.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv30/nv30_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -287,7 +287,7 @@ so->npot_size0 = (pt->width0 << 16) | pt->height0; if (eng3d->oclass >= NV40_3D_CLASS) { so->npot_size1 = (pt->depth0 << 20) | mt->uniform_pitch; - if (!mt->swizzled) + if (mt->uniform_pitch) so->fmt |= NV40_3D_TEX_FORMAT_LINEAR; so->fmt |= 0x00008000; so->fmt |= (pt->last_level + 1) << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_context.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_context.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_context.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -379,6 +379,15 @@ util_dynarray_init(&nv50->global_residents, NULL); + // Make sure that the first TSC entry has SRGB conversion bit set, since we + // use it as a fallback. + if (!screen->tsc.entries[0]) + nv50_upload_tsc0(nv50); + + // And mark samplers as dirty so that the first slot would get bound to the + // zero entry if it's not otherwise set. + nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS; + return pipe; out_err: diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_context.h mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_context.h --- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_context.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -256,6 +256,7 @@ void nv50_validate_textures(struct nv50_context *); void nv50_validate_samplers(struct nv50_context *); void nv50_upload_ms_info(struct nouveau_pushbuf *); +void nv50_upload_tsc0(struct nv50_context *); struct pipe_sampler_view * nv50_create_texture_view(struct pipe_context *, diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query.c 2019-03-31 23:16:37.000000000 +0000 @@ -98,12 +98,10 @@ case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + if (hq->state == NV50_HW_QUERY_STATE_READY) + wait = true; if (likely(!condition)) { - if (unlikely(hq->nesting)) - cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : - NV50_3D_COND_MODE_ALWAYS; - else - cond = NV50_3D_COND_MODE_RES_NON_ZERO; + cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; } else { cond = wait ? NV50_3D_COND_MODE_EQUAL : NV50_3D_COND_MODE_ALWAYS; } @@ -129,7 +127,7 @@ PUSH_SPACE(push, 9); - if (wait) { + if (wait && hq->state != NV50_HW_QUERY_STATE_READY) { BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); } diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 2019-03-31 23:16:37.000000000 +0000 @@ -29,11 +29,6 @@ #include "nv50/nv50_query_hw_sm.h" #include "nv_object.xml.h" -#define NV50_HW_QUERY_STATE_READY 0 -#define NV50_HW_QUERY_STATE_ACTIVE 1 -#define NV50_HW_QUERY_STATE_ENDED 2 -#define NV50_HW_QUERY_STATE_FLUSHED 3 - /* XXX: Nested queries, and simultaneous queries on multiple gallium contexts * (since we use only a single GPU channel per screen) will not work properly. * @@ -158,8 +153,7 @@ case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - hq->nesting = nv50->screen->num_occlusion_queries_active++; - if (hq->nesting) { + if (nv50->screen->num_occlusion_queries_active++) { nv50_hw_query_get(push, q, 0x10, 0x0100f002); } else { PUSH_SPACE(push, 4); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h --- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,6 +6,11 @@ #include "nv50_query.h" +#define NV50_HW_QUERY_STATE_READY 0 +#define NV50_HW_QUERY_STATE_ACTIVE 1 +#define NV50_HW_QUERY_STATE_ENDED 2 +#define NV50_HW_QUERY_STATE_FLUSHED 3 + #define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) struct nv50_hw_query; @@ -29,7 +34,6 @@ uint8_t state; bool is64bit; uint8_t rotate; - int nesting; /* only used for occlusion queries */ struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; }; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_screen.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_screen.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -136,6 +136,8 @@ return 0; case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; + case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET: + return 2047; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: @@ -152,6 +154,10 @@ return (class_3d >= NVA3_3D_CLASS) ? 4 : 0; case PIPE_CAP_MAX_WINDOW_RECTANGLES: return NV50_MAX_WINDOW_RECTANGLES; + case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: + return 16 * 1024 * 1024; + case PIPE_CAP_MAX_VARYINGS: + return 15; /* supported caps */ case PIPE_CAP_TEXTURE_MIRROR_CLAMP: @@ -211,6 +217,7 @@ case PIPE_CAP_TGSI_CLOCK: case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: + case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; /* class_3d >= NVA0_3D_CLASS; */ @@ -301,10 +308,14 @@ case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: - case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS: case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS: case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS: + case PIPE_CAP_SURFACE_SAMPLE_COUNT: + case PIPE_CAP_TGSI_ATOMFADD: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: + case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: + case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: return 0; case PIPE_CAP_VENDOR_ID: diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_state.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_state.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_state.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -967,7 +967,7 @@ util_copy_framebuffer_state(&nv50->framebuffer, fb); - nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER; + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_TEXTURES; } static void diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_tex.c mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_tex.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nv50/nv50_tex.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nv50/nv50_tex.c 2019-03-31 23:16:37.000000000 +0000 @@ -380,6 +380,16 @@ } nv50->state.num_samplers[s] = nv50->num_samplers[s]; + // TXF, in unlinked tsc mode, will always use sampler 0. So we have to + // ensure that it remains bound. Its contents don't matter, all samplers we + // ever create have the SRGB_CONVERSION bit set, so as long as the first + // entry is initialized, we're good to go. This is the only bit that has + // any effect on what TXF does. + if (!nv50->samplers[s][0]) { + BEGIN_NV04(push, NV50_3D(BIND_TSC(s)), 1); + PUSH_DATA (push, 1); + } + return need_flush; } @@ -451,3 +461,14 @@ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), ARRAY_SIZE(msaa_sample_xy_offsets)); PUSH_DATAp(push, msaa_sample_xy_offsets, ARRAY_SIZE(msaa_sample_xy_offsets)); } + +void nv50_upload_tsc0(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + u32 data[8] = { G80_TSC_0_SRGB_CONVERSION }; + nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc, + 65536 /* + tsc->id * 32 */, + NOUVEAU_BO_VRAM, 32, data); + BEGIN_NV04(push, NV50_3D(TSC_FLUSH), 1); + PUSH_DATA (push, 0); +} diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 2019-03-31 23:16:37.000000000 +0000 @@ -423,6 +423,7 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_screen *screen = nvc0->screen; struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; int ret; @@ -463,12 +464,14 @@ PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); PUSH_DATA (push, info->block[2]); + nouveau_pushbuf_space(push, 32, 2, 1); + PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD); + if (unlikely(info->indirect)) { struct nv04_resource *res = nv04_resource(info->indirect); uint32_t offset = res->offset + info->indirect_offset; unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT; - nouveau_pushbuf_space(push, 16, 0, 1); PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3)); nouveau_pushbuf_data(push, res->bo, offset, diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_context.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_context.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_context.c 2018-01-12 19:24:23.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -449,11 +449,9 @@ flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD; - BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT, flags, screen->text); BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->uniform_bo); BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->txc); if (screen->compute) { - BCTX_REFN_bo(nvc0->bufctx_cp, CP_TEXT, flags, screen->text); BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->uniform_bo); BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc); } @@ -478,6 +476,24 @@ util_dynarray_init(&nvc0->global_residents, NULL); + // Make sure that the first TSC entry has SRGB conversion bit set, since we + // use it as a fallback on Fermi for TXF, and on Kepler+ generations for + // FBFETCH handling (which also uses TXF). + // + // NOTE: Preliminary testing suggests that this isn't necessary at all at + // least on GM20x (untested on Kepler). However this is ~free, so no reason + // not to do it. + if (!screen->tsc.entries[0]) + nvc0_upload_tsc0(nvc0); + + // On Fermi, mark samplers dirty so that the proper binding can happen + if (screen->base.class_3d < NVE4_3D_CLASS) { + for (int s = 0; s < 6; s++) + nvc0->samplers_dirty[s] = 1; + nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; + nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS; + } + return pipe; out_err: diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_context.h mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_context.h --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -365,6 +365,7 @@ void nvc0_validate_suf(struct nvc0_context *nvc0, int s); void nvc0_validate_textures(struct nvc0_context *); void nvc0_validate_samplers(struct nvc0_context *); +void nvc0_upload_tsc0(struct nvc0_context *); void nve4_set_tex_handles(struct nvc0_context *); void nvc0_validate_surfaces(struct nvc0_context *); void nve4_set_surface_info(struct nouveau_pushbuf *, @@ -433,6 +434,7 @@ /* nvc0_push.c */ void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); +void nvc0_push_vbo_indirect(struct nvc0_context *, const struct pipe_draw_info *); /* nve4_compute.c */ void nve4_launch_grid(struct pipe_context *, const struct pipe_grid_info *); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_program.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_program.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -831,16 +831,6 @@ NOUVEAU_ERR("Error allocating TEXT area: %d\n", ret); return false; } - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEXT); - BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT, - NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD, - screen->text); - if (screen->compute) { - nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEXT); - BCTX_REFN_bo(nvc0->bufctx_cp, CP_TEXT, - NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD, - screen->text); - } /* Re-upload the builtin function into the new code segment. */ nvc0_program_library_upload(nvc0); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 2019-03-31 23:16:37.000000000 +0000 @@ -121,12 +121,10 @@ case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + if (hq->state == NVC0_HW_QUERY_STATE_READY) + wait = true; if (likely(!condition)) { - if (unlikely(hq->nesting)) - cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : - NVC0_3D_COND_MODE_ALWAYS; - else - cond = NVC0_3D_COND_MODE_RES_NON_ZERO; + cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL : NVC0_3D_COND_MODE_ALWAYS; } else { cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS; } @@ -151,7 +149,7 @@ return; } - if (wait) + if (wait && hq->state != NVC0_HW_QUERY_STATE_READY) nvc0_hw_query_fifo_wait(nvc0, q); PUSH_SPACE(push, 10); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c 2019-03-31 23:16:37.000000000 +0000 @@ -28,11 +28,6 @@ #include "nvc0/nvc0_query_hw_metric.h" #include "nvc0/nvc0_query_hw_sm.h" -#define NVC0_HW_QUERY_STATE_READY 0 -#define NVC0_HW_QUERY_STATE_ACTIVE 1 -#define NVC0_HW_QUERY_STATE_ENDED 2 -#define NVC0_HW_QUERY_STATE_FLUSHED 3 - #define NVC0_HW_QUERY_ALLOC_SPACE 256 bool @@ -158,14 +153,18 @@ case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - hq->nesting = nvc0->screen->num_occlusion_queries_active++; - if (hq->nesting) { + if (nvc0->screen->num_occlusion_queries_active++) { nvc0_hw_query_get(push, q, 0x10, 0x0100f002); } else { PUSH_SPACE(push, 3); BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1); PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT); IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1); + /* Given that the counter is reset, the contents at 0x10 are + * equivalent to doing the query -- we would get hq->sequence as the + * payload and 0 as the reported value. This is already set up above + * as in the hq->rotate case. + */ } break; case PIPE_QUERY_PRIMITIVES_GENERATED: @@ -199,6 +198,7 @@ nvc0_hw_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */ nvc0_hw_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */ nvc0_hw_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */ + ((uint64_t *)hq->data)[(12 + 10) * 2] = 0; break; default: break; @@ -271,6 +271,7 @@ nvc0_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ nvc0_hw_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */ nvc0_hw_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */ + ((uint64_t *)hq->data)[10 * 2] = 0; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* This query is not issued on GPU because disjoint is forced to false */ diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h 2019-03-31 23:16:37.000000000 +0000 @@ -6,6 +6,11 @@ #include "nvc0_query.h" +#define NVC0_HW_QUERY_STATE_READY 0 +#define NVC0_HW_QUERY_STATE_ACTIVE 1 +#define NVC0_HW_QUERY_STATE_ENDED 2 +#define NVC0_HW_QUERY_STATE_FLUSHED 3 + #define NVC0_HW_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) struct nvc0_hw_query; @@ -29,7 +34,6 @@ uint8_t state; boolean is64bit; uint8_t rotate; - int nesting; /* only used for occlusion queries */ struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; }; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -154,6 +154,8 @@ return 1 << 27; case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; + case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET: + return 2047; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: @@ -178,6 +180,15 @@ return NVC0_MAX_WINDOW_RECTANGLES; case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: return class_3d >= GM200_3D_CLASS ? 8 : 0; + case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: + return 64 * 1024 * 1024; + case PIPE_CAP_MAX_VARYINGS: + /* NOTE: These only count our slots for GENERIC varyings. + * The address space may be larger, but the actual hard limit seems to be + * less than what the address space layout permits, so don't add TEXCOORD, + * COLOR, etc. here. + */ + return 0x1f0 / 16; /* supported caps */ case PIPE_CAP_TEXTURE_MIRROR_CLAMP: @@ -262,6 +273,7 @@ case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: return 1; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; @@ -271,6 +283,8 @@ case PIPE_CAP_TGSI_BALLOT: case PIPE_CAP_BINDLESS_TEXTURE: return class_3d >= NVE4_3D_CLASS; + case PIPE_CAP_TGSI_ATOMFADD: + return class_3d < GM107_3D_CLASS; /* needs additional lowering */ case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: @@ -324,10 +338,13 @@ case PIPE_CAP_CONSTBUF0_FLAGS: case PIPE_CAP_PACKED_UNIFORMS: case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: - case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS: case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS: case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS: + case PIPE_CAP_SURFACE_SAMPLE_COUNT: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: + case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: + case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: return 0; case PIPE_CAP_VENDOR_ID: @@ -384,18 +401,6 @@ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: return 16; case PIPE_SHADER_CAP_MAX_INPUTS: - if (shader == PIPE_SHADER_VERTEX) - return 32; - /* NOTE: These only count our slots for GENERIC varyings. - * The address space may be larger, but the actual hard limit seems to be - * less than what the address space layout permits, so don't add TEXCOORD, - * COLOR, etc. here. - */ - if (shader == PIPE_SHADER_FRAGMENT) - return 0x1f0 / 16; - /* Actually this counts CLIPVERTEX, which occupies the last generic slot, - * and excludes 0x60 per-patch inputs. - */ return 0x200 / 16; case PIPE_SHADER_CAP_MAX_OUTPUTS: return 32; @@ -625,7 +630,6 @@ nouveau_heap_destroy(&screen->lib_code); nouveau_heap_destroy(&screen->text_heap); - FREE(screen->default_tsc); FREE(screen->tic.entries); nouveau_object_del(&screen->eng3d); @@ -1279,8 +1283,8 @@ for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) { BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(i)), 3); PUSH_DATA (push, 1); - PUSH_DATA (push, 8192 << 16); - PUSH_DATA (push, 8192 << 16); + PUSH_DATA (push, 16384 << 16); + PUSH_DATA (push, 16384 << 16); } #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); @@ -1384,9 +1388,6 @@ if (!nvc0_blitter_create(screen)) goto fail; - screen->default_tsc = CALLOC_STRUCT(nv50_tsc_entry); - screen->default_tsc->tsc[0] = G80_TSC_0_SRGB_CONVERSION; - nouveau_fence_new(&screen->base, &screen->base.fence.current); return &screen->base; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h 2019-03-31 23:16:37.000000000 +0000 @@ -89,8 +89,6 @@ struct nvc0_blitter *blitter; - struct nv50_tsc_entry *default_tsc; - struct { void **entries; int next; diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_state.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_state.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -852,7 +852,9 @@ util_copy_framebuffer_state(&nvc0->framebuffer, fb); - nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS | + NVC0_NEW_3D_TEXTURES; + nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES; } static void diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 2019-03-31 23:16:37.000000000 +0000 @@ -1,4 +1,4 @@ - +#include "util/u_format.h" #include "util/u_framebuffer.h" #include "util/u_math.h" #include "util/u_viewport.h" @@ -831,20 +831,6 @@ pipe_sampler_view_reference(&nvc0->fbtexture, NULL); nvc0->fbtexture = new_view; - if (screen->default_tsc->id < 0) { - struct nv50_tsc_entry *tsc = nv50_tsc_entry(screen->default_tsc); - tsc->id = nvc0_screen_tsc_alloc(screen, tsc); - nvc0->base.push_data(&nvc0->base, screen->txc, 65536 + tsc->id * 32, - NV_VRAM_DOMAIN(&screen->base), 32, tsc->tsc); - screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); - - IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0); - if (screen->base.class_3d < NVE4_3D_CLASS) { - BEGIN_NVC0(push, NVC0_3D(BIND_TSC2(0)), 1); - PUSH_DATA (push, (tsc->id << 12) | 1); - } - } - if (new_view) { struct nv50_tic_entry *tic = nv50_tic_entry(new_view); assert(tic->id < 0); @@ -860,7 +846,7 @@ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1); PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO); - PUSH_DATA (push, (screen->default_tsc->id << 20) | tic->id); + PUSH_DATA (push, (0 << 20) | tic->id); } else { BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1); PUSH_DATA (push, (tic->id << 9) | 1); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c 2018-10-21 19:21:32.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c 2019-03-31 23:16:37.000000000 +0000 @@ -1178,6 +1178,7 @@ nvc0->cond_cond, nvc0->cond_mode); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEXT); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0)); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1)); @@ -1200,6 +1201,7 @@ static void nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) { + struct nvc0_screen *screen = nvc0->screen; struct nvc0_blitctx *blit = nvc0->blit; struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct pipe_resource *src = info->src.resource; @@ -1301,6 +1303,8 @@ BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT, + NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD, screen->text); nouveau_pushbuf_validate(push); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 4); diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 2019-03-31 23:16:37.000000000 +0000 @@ -657,6 +657,19 @@ nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; + // TXF, in unlinked tsc mode, will always use sampler 0. So we have to + // ensure that it remains bound. Its contents don't matter, all samplers we + // ever create have the SRGB_CONVERSION bit set, so as long as the first + // entry is initialized, we're good to go. This is the only bit that has + // any effect on what TXF does. + if ((nvc0->samplers_dirty[s] & 1) && !nvc0->samplers[s][0]) { + if (n == 0) + n = 1; + // We're guaranteed that the first command refers to the first slot, so + // we're not overwriting a valid entry. + commands[0] = (0 << 12) | (0 << 4) | 1; + } + if (n) { if (unlikely(s == 5)) BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n); @@ -728,6 +741,18 @@ nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS; } +void +nvc0_upload_tsc0(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + u32 data[8] = { G80_TSC_0_SRGB_CONVERSION }; + nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, + 65536 /*+ tsc->id * 32*/, + NV_VRAM_DOMAIN(&nvc0->screen->base), 32, data); + BEGIN_NVC0(push, NVC0_3D(TSC_FLUSH), 1); + PUSH_DATA (push, 0); +} + /* Upload the "diagonal" entries for the possible texture sources ($t == $s). * At some point we might want to get a list of the combinations used by a * shader and fill in those entries instead of having it extract the handles. @@ -1026,21 +1051,13 @@ } else { struct nv50_miptree *mt = nv50_miptree(&res->base); struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level]; - const unsigned z = view->u.tex.first_layer; + unsigned z = view->u.tex.first_layer; - if (z) { - if (mt->layout_3d) { - address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z); - /* doesn't work if z passes z-tile boundary */ - if (depth > 1) { - pipe_debug_message(&nvc0->base.debug, CONFORMANCE, - "3D images are not really supported!"); - debug_printf("3D images are not really supported!\n"); - } - } else { - address += mt->layer_stride * z; - } + if (!mt->layout_3d) { + address += mt->layer_stride * z; + z = 0; } + address += lvl->offset; info[0] = address >> 8; @@ -1055,7 +1072,8 @@ info[6] = depth - 1; info[6] |= (lvl->tile_mode & 0xf00) << 21; info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22; - info[7] = 0; + info[7] = mt->layout_3d ? 1 : 0; + info[7] |= z << 16; info[14] = mt->ms_x; info[15] = mt->ms_y; } diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c 2018-01-12 19:24:23.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c 2019-03-31 23:16:37.000000000 +0000 @@ -919,6 +919,7 @@ struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; + unsigned vram_domain = NV_VRAM_DOMAIN(&screen->base); int s; /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ @@ -982,6 +983,9 @@ resident->flags); } + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TEXT, vram_domain | NOUVEAU_BO_RD, + screen->text); + nvc0_state_validate_3d(nvc0, ~0); if (nvc0->vertprog->vp.need_draw_parameters && !info->indirect) { @@ -1036,7 +1040,10 @@ } if (nvc0->state.vbo_mode) { - nvc0_push_vbo(nvc0, info); + if (info->indirect) + nvc0_push_vbo_indirect(nvc0, info); + else + nvc0_push_vbo(nvc0, info); goto cleanup; } @@ -1092,6 +1099,7 @@ nouveau_pushbuf_bufctx(push, NULL); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEXT); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX); nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS); } diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c 2019-03-31 23:16:37.000000000 +0000 @@ -466,6 +466,83 @@ } } +typedef struct { + uint32_t count; + uint32_t primCount; + uint32_t first; + uint32_t baseInstance; +} DrawArraysIndirectCommand; + +typedef struct { + uint32_t count; + uint32_t primCount; + uint32_t firstIndex; + int32_t baseVertex; + uint32_t baseInstance; +} DrawElementsIndirectCommand; + +void +nvc0_push_vbo_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) +{ + /* The strategy here is to just read the commands from the indirect buffer + * and do the draws. This is suboptimal, but will only happen in the case + * that conversion is required for FIXED or DOUBLE inputs. + */ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv04_resource *buf = nv04_resource(info->indirect->buffer); + struct nv04_resource *buf_count = nv04_resource(info->indirect->indirect_draw_count); + unsigned i; + + unsigned draw_count = info->indirect->draw_count; + if (buf_count) { + uint32_t *count = nouveau_resource_map_offset( + &nvc0->base, buf_count, info->indirect->indirect_draw_count_offset, + NOUVEAU_BO_RD); + draw_count = *count; + } + + uint8_t *buf_data = nouveau_resource_map_offset( + &nvc0->base, buf, info->indirect->offset, NOUVEAU_BO_RD); + struct pipe_draw_info single = *info; + single.indirect = NULL; + for (i = 0; i < draw_count; i++, buf_data += info->indirect->stride) { + if (info->index_size) { + DrawElementsIndirectCommand *cmd = (void *)buf_data; + single.start = info->start + cmd->firstIndex; + single.count = cmd->count; + single.start_instance = cmd->baseInstance; + single.instance_count = cmd->primCount; + single.index_bias = cmd->baseVertex; + } else { + DrawArraysIndirectCommand *cmd = (void *)buf_data; + single.start = cmd->first; + single.count = cmd->count; + single.start_instance = cmd->baseInstance; + single.instance_count = cmd->primCount; + } + + if (nvc0->vertprog->vp.need_draw_parameters) { + PUSH_SPACE(push, 9); + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3); + PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO); + PUSH_DATA (push, single.index_bias); + PUSH_DATA (push, single.start_instance); + PUSH_DATA (push, single.drawid + i); + } + + nvc0_push_vbo(nvc0, &single); + } + + nouveau_resource_unmap(buf); + if (buf_count) + nouveau_resource_unmap(buf_count); +} + void nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { diff -Nru mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nve4_compute.c mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nve4_compute.c --- mesa-18.3.3/src/gallium/drivers/nouveau/nvc0/nve4_compute.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/nouveau/nvc0/nve4_compute.c 2019-03-31 23:16:37.000000000 +0000 @@ -696,6 +696,7 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nvc0_screen *screen = nvc0->screen; struct nouveau_pushbuf *push = nvc0->base.pushbuf; void *desc; uint64_t desc_gpuaddr; @@ -769,6 +770,8 @@ } /* upload descriptor and flush */ + nouveau_pushbuf_space(push, 32, 1, 0); + PUSH_REFN(push, screen->text, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD); BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1); PUSH_DATA (push, desc_gpuaddr >> 8); BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1); diff -Nru mesa-18.3.3/src/gallium/drivers/pl111/Android.mk mesa-19.0.1/src/gallium/drivers/pl111/Android.mk --- mesa-18.3.3/src/gallium/drivers/pl111/Android.mk 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/pl111/Android.mk 1970-01-01 00:00:00.000000000 +0000 @@ -1,39 +0,0 @@ -# Copyright (C) 2014 Emil Velikov -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -LOCAL_PATH := $(call my-dir) - -# get C_SOURCES -include $(LOCAL_PATH)/Makefile.sources - -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := \ - $(C_SOURCES) - -LOCAL_MODULE := libmesa_pipe_pl111 - -include $(GALLIUM_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) - -ifneq ($(HAVE_GALLIUM_PL111),) -GALLIUM_TARGET_DRIVERS += pl111 -$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_pl111) -endif diff -Nru mesa-18.3.3/src/gallium/drivers/pl111/Automake.inc mesa-19.0.1/src/gallium/drivers/pl111/Automake.inc --- mesa-18.3.3/src/gallium/drivers/pl111/Automake.inc 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/pl111/Automake.inc 1970-01-01 00:00:00.000000000 +0000 @@ -1,9 +0,0 @@ -if HAVE_GALLIUM_PL111 - -TARGET_DRIVERS += pl111 -TARGET_CPPFLAGS += -DGALLIUM_PL111 -TARGET_LIB_DEPS += \ - $(top_builddir)/src/gallium/winsys/pl111/drm/libpl111drm.la \ - $(LIBDRM_LIBS) - -endif diff -Nru mesa-18.3.3/src/gallium/drivers/pl111/Makefile.am mesa-19.0.1/src/gallium/drivers/pl111/Makefile.am --- mesa-18.3.3/src/gallium/drivers/pl111/Makefile.am 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/pl111/Makefile.am 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -include $(top_srcdir)/src/gallium/Automake.inc - -AM_CPPFLAGS = \ - $(GALLIUM_CFLAGS) - -noinst_LTLIBRARIES = libpl111.la - -libpl111_la_SOURCES = $(C_SOURCES) diff -Nru mesa-18.3.3/src/gallium/drivers/pl111/Makefile.sources mesa-19.0.1/src/gallium/drivers/pl111/Makefile.sources --- mesa-18.3.3/src/gallium/drivers/pl111/Makefile.sources 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/pl111/Makefile.sources 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -C_SOURCES := - diff -Nru mesa-18.3.3/src/gallium/drivers/r300/meson.build mesa-19.0.1/src/gallium/drivers/r300/meson.build --- mesa-18.3.3/src/gallium/drivers/r300/meson.build 2018-01-12 19:24:23.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r300/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -158,6 +158,7 @@ ], link_with : [libr300, libgallium, libmesa_util], dependencies : [dep_m, dep_clock, dep_dl, dep_thread, dep_unwind], - ) + ), + suite : ['r300'], ) endif diff -Nru mesa-18.3.3/src/gallium/drivers/r300/r300_context.c mesa-19.0.1/src/gallium/drivers/r300/r300_context.c --- mesa-18.3.3/src/gallium/drivers/r300/r300_context.c 2018-07-14 15:13:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r300/r300_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -393,7 +393,7 @@ if (!r300->ctx) goto fail; - r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300); + r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300, false); if (r300->cs == NULL) goto fail; diff -Nru mesa-18.3.3/src/gallium/drivers/r300/r300_screen.c mesa-19.0.1/src/gallium/drivers/r300/r300_screen.c --- mesa-18.3.3/src/gallium/drivers/r300/r300_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r300/r300_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -304,6 +304,9 @@ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: return 2048; + case PIPE_CAP_MAX_VARYINGS: + return 10; + case PIPE_CAP_VENDOR_ID: return 0x1002; case PIPE_CAP_DEVICE_ID: diff -Nru mesa-18.3.3/src/gallium/drivers/r300/r300_texture.c mesa-19.0.1/src/gallium/drivers/r300/r300_texture.c --- mesa-18.3.3/src/gallium/drivers/r300/r300_texture.c 2018-01-17 14:10:45.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r300/r300_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -1182,7 +1182,7 @@ return NULL; } - buffer = rws->buffer_from_handle(rws, whandle, &stride, NULL); + buffer = rws->buffer_from_handle(rws, whandle, 0, &stride, NULL); if (!buffer) return NULL; diff -Nru mesa-18.3.3/src/gallium/drivers/r600/evergreen_compute.c mesa-19.0.1/src/gallium/drivers/r600/evergreen_compute.c --- mesa-18.3.3/src/gallium/drivers/r600/evergreen_compute.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/evergreen_compute.c 2019-03-31 23:16:37.000000000 +0000 @@ -438,7 +438,9 @@ /* Upload code + ROdata */ shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen, shader->bc.ndw * 4); - p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); + p = r600_buffer_map_sync_with_rings( + &rctx->b, shader->code_bo, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); //TODO: use util_memcpy_cpu_to_le32 ? memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); rctx->b.ws->buffer_unmap(shader->code_bo->buf); diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_asm.c mesa-19.0.1/src/gallium/drivers/r600/r600_asm.c --- mesa-18.3.3/src/gallium/drivers/r600/r600_asm.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/r600_asm.c 2019-03-31 23:16:37.000000000 +0000 @@ -2772,7 +2772,9 @@ return NULL; } - bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); + bytecode = r600_buffer_map_sync_with_rings + (&rctx->b, shader->buffer, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY); bytecode += shader->offset / 4; if (R600_BIG_ENDIAN) { diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_pipe.c mesa-19.0.1/src/gallium/drivers/r600/r600_pipe.c --- mesa-18.3.3/src/gallium/drivers/r600/r600_pipe.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/r600_pipe.c 2019-03-31 23:16:37.000000000 +0000 @@ -212,7 +212,7 @@ } rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX, - r600_context_gfx_flush, rctx); + r600_context_gfx_flush, rctx, false); rctx->b.gfx.flush = r600_context_gfx_flush; rctx->allocator_fetch_shader = @@ -536,6 +536,9 @@ case PIPE_CAP_MAX_TEXEL_OFFSET: return 7; + case PIPE_CAP_MAX_VARYINGS: + return 32; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600; case PIPE_CAP_ENDIANNESS: diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_pipe_common.c mesa-19.0.1/src/gallium/drivers/r600/r600_pipe_common.c --- mesa-18.3.3/src/gallium/drivers/r600/r600_pipe_common.c 2018-11-05 12:21:01.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/r600_pipe_common.c 2019-03-31 23:16:37.000000000 +0000 @@ -715,7 +715,7 @@ if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, r600_flush_dma_ring, - rctx); + rctx, false); rctx->dma.flush = r600_flush_dma_ring; } diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_shader.c mesa-19.0.1/src/gallium/drivers/r600/r600_shader.c --- mesa-18.3.3/src/gallium/drivers/r600/r600_shader.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/r600_shader.c 2019-03-31 23:16:37.000000000 +0000 @@ -141,7 +141,9 @@ if (shader->bo == NULL) { return -ENOMEM; } - ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); + ptr = r600_buffer_map_sync_with_rings( + &rctx->b, shader->bo, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (R600_BIG_ENDIAN) { for (i = 0; i < shader->shader.bc.ndw; ++i) { ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_state_common.c mesa-19.0.1/src/gallium/drivers/r600/r600_state_common.c --- mesa-18.3.3/src/gallium/drivers/r600/r600_state_common.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/r600_state_common.c 2019-03-31 23:16:37.000000000 +0000 @@ -1020,7 +1020,9 @@ rctx->vs_shader = (struct r600_pipe_shader_selector *)state; r600_update_vs_writes_viewport_index(&rctx->b, r600_get_vs_info(rctx)); - rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride; + + if (rctx->vs_shader->so.num_outputs) + rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride; } static void r600_bind_gs_state(struct pipe_context *ctx, void *state) @@ -1035,7 +1037,9 @@ if (!state) return; - rctx->b.streamout.stride_in_dw = rctx->gs_shader->so.stride; + + if (rctx->gs_shader->so.num_outputs) + rctx->b.streamout.stride_in_dw = rctx->gs_shader->so.stride; } static void r600_bind_tcs_state(struct pipe_context *ctx, void *state) @@ -1057,7 +1061,9 @@ if (!state) return; - rctx->b.streamout.stride_in_dw = rctx->tes_shader->so.stride; + + if (rctx->tes_shader->so.num_outputs) + rctx->b.streamout.stride_in_dw = rctx->tes_shader->so.stride; } void r600_delete_shader_selector(struct pipe_context *ctx, @@ -2917,6 +2923,7 @@ switch (desc->nr_channels) { case 1: result = FMT_8; + is_srgb_valid = TRUE; goto out_word4; case 2: result = FMT_8_8; diff -Nru mesa-18.3.3/src/gallium/drivers/r600/r600_texture.c mesa-19.0.1/src/gallium/drivers/r600/r600_texture.c --- mesa-18.3.3/src/gallium/drivers/r600/r600_texture.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/r600_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -1108,7 +1108,9 @@ templ->depth0 != 1 || templ->last_level != 0) return NULL; - buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset); + buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, + rscreen->info.max_alignment, + &stride, &offset); if (!buf) return NULL; @@ -1852,6 +1854,7 @@ return NULL; buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, + rscreen->info.max_alignment, &stride, &offset); if (!buf) { free(memobj); diff -Nru mesa-18.3.3/src/gallium/drivers/r600/radeon_uvd.c mesa-19.0.1/src/gallium/drivers/r600/radeon_uvd.c --- mesa-18.3.3/src/gallium/drivers/r600/radeon_uvd.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/radeon_uvd.c 2019-03-31 23:16:37.000000000 +0000 @@ -152,7 +152,8 @@ buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); /* calc buffer offsets */ dec->msg = (struct ruvd_msg *)ptr; @@ -1068,7 +1069,7 @@ dec->bs_size = 0; dec->bs_ptr = dec->ws->buffer_map( dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE); + dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** @@ -1121,7 +1122,8 @@ } dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE); + PIPE_TRANSFER_WRITE | + RADEON_TRANSFER_TEMPORARY); if (!dec->bs_ptr) return; @@ -1332,7 +1334,7 @@ dec->stream_handle = rvid_alloc_stream_handle(); dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL); + dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, false); if (!dec->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; diff -Nru mesa-18.3.3/src/gallium/drivers/r600/radeon_vce.c mesa-19.0.1/src/gallium/drivers/r600/radeon_vce.c --- mesa-18.3.3/src/gallium/drivers/r600/radeon_vce.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/radeon_vce.c 2019-03-31 23:16:37.000000000 +0000 @@ -353,7 +353,9 @@ struct rvid_buffer *fb = feedback; if (size) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE); + uint32_t *ptr = enc->ws->buffer_map( + fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); if (ptr[1]) { *size = ptr[4] - ptr[9]; @@ -428,7 +430,7 @@ enc->screen = context->screen; enc->ws = ws; - enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc); + enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, false); if (!enc->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; diff -Nru mesa-18.3.3/src/gallium/drivers/r600/radeon_video.c mesa-19.0.1/src/gallium/drivers/r600/radeon_video.c --- mesa-18.3.3/src/gallium/drivers/r600/radeon_video.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/radeon_video.c 2019-03-31 23:16:37.000000000 +0000 @@ -97,11 +97,13 @@ if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage)) goto error; - src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ); + src = ws->buffer_map(old_buf.res->buf, cs, + PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY); if (!src) goto error; - dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE); + dst = ws->buffer_map(new_buf->res->buf, cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (!dst) goto error; diff -Nru mesa-18.3.3/src/gallium/drivers/r600/sb/sb_ir.h mesa-19.0.1/src/gallium/drivers/r600/sb/sb_ir.h --- mesa-18.3.3/src/gallium/drivers/r600/sb/sb_ir.h 2018-02-16 12:24:09.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/r600/sb/sb_ir.h 2019-03-31 23:16:37.000000000 +0000 @@ -1012,7 +1012,7 @@ class alu_node : public node { protected: - alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); }; + alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); } public: bc_alu bc; diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/r600_perfcounter.c mesa-19.0.1/src/gallium/drivers/radeon/r600_perfcounter.c --- mesa-18.3.3/src/gallium/drivers/radeon/r600_perfcounter.c 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/r600_perfcounter.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,639 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "util/u_memory.h" -#include "radeonsi/si_query.h" -#include "radeonsi/si_pipe.h" -#include "amd/common/sid.h" - -/* Max counters per HW block */ -#define SI_QUERY_MAX_COUNTERS 16 - -static struct si_perfcounter_block * -lookup_counter(struct si_perfcounters *pc, unsigned index, - unsigned *base_gid, unsigned *sub_index) -{ - struct si_perfcounter_block *block = pc->blocks; - unsigned bid; - - *base_gid = 0; - for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { - unsigned total = block->num_groups * block->num_selectors; - - if (index < total) { - *sub_index = index; - return block; - } - - index -= total; - *base_gid += block->num_groups; - } - - return NULL; -} - -static struct si_perfcounter_block * -lookup_group(struct si_perfcounters *pc, unsigned *index) -{ - unsigned bid; - struct si_perfcounter_block *block = pc->blocks; - - for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { - if (*index < block->num_groups) - return block; - *index -= block->num_groups; - } - - return NULL; -} - -struct si_pc_group { - struct si_pc_group *next; - struct si_perfcounter_block *block; - unsigned sub_gid; /* only used during init */ - unsigned result_base; /* only used during init */ - int se; - int instance; - unsigned num_counters; - unsigned selectors[SI_QUERY_MAX_COUNTERS]; -}; - -struct si_pc_counter { - unsigned base; - unsigned qwords; - unsigned stride; /* in uint64s */ -}; - -#define SI_PC_SHADERS_WINDOWING (1 << 31) - -struct si_query_pc { - struct si_query_hw b; - - unsigned shaders; - unsigned num_counters; - struct si_pc_counter *counters; - struct si_pc_group *groups; -}; - -static void si_pc_query_destroy(struct si_screen *sscreen, - struct si_query *rquery) -{ - struct si_query_pc *query = (struct si_query_pc *)rquery; - - while (query->groups) { - struct si_pc_group *group = query->groups; - query->groups = group->next; - FREE(group); - } - - FREE(query->counters); - - si_query_hw_destroy(sscreen, rquery); -} - -static bool si_pc_query_prepare_buffer(struct si_screen *screen, - struct si_query_hw *hwquery, - struct r600_resource *buffer) -{ - /* no-op */ - return true; -} - -static void si_pc_query_emit_start(struct si_context *sctx, - struct si_query_hw *hwquery, - struct r600_resource *buffer, uint64_t va) -{ - struct si_perfcounters *pc = sctx->screen->perfcounters; - struct si_query_pc *query = (struct si_query_pc *)hwquery; - struct si_pc_group *group; - int current_se = -1; - int current_instance = -1; - - if (query->shaders) - pc->emit_shaders(sctx, query->shaders); - - for (group = query->groups; group; group = group->next) { - struct si_perfcounter_block *block = group->block; - - if (group->se != current_se || group->instance != current_instance) { - current_se = group->se; - current_instance = group->instance; - pc->emit_instance(sctx, group->se, group->instance); - } - - pc->emit_select(sctx, block, group->num_counters, group->selectors); - } - - if (current_se != -1 || current_instance != -1) - pc->emit_instance(sctx, -1, -1); - - pc->emit_start(sctx, buffer, va); -} - -static void si_pc_query_emit_stop(struct si_context *sctx, - struct si_query_hw *hwquery, - struct r600_resource *buffer, uint64_t va) -{ - struct si_perfcounters *pc = sctx->screen->perfcounters; - struct si_query_pc *query = (struct si_query_pc *)hwquery; - struct si_pc_group *group; - - pc->emit_stop(sctx, buffer, va); - - for (group = query->groups; group; group = group->next) { - struct si_perfcounter_block *block = group->block; - unsigned se = group->se >= 0 ? group->se : 0; - unsigned se_end = se + 1; - - if ((block->flags & SI_PC_BLOCK_SE) && (group->se < 0)) - se_end = sctx->screen->info.max_se; - - do { - unsigned instance = group->instance >= 0 ? group->instance : 0; - - do { - pc->emit_instance(sctx, se, instance); - pc->emit_read(sctx, block, - group->num_counters, group->selectors, - buffer, va); - va += sizeof(uint64_t) * group->num_counters; - } while (group->instance < 0 && ++instance < block->num_instances); - } while (++se < se_end); - } - - pc->emit_instance(sctx, -1, -1); -} - -static void si_pc_query_clear_result(struct si_query_hw *hwquery, - union pipe_query_result *result) -{ - struct si_query_pc *query = (struct si_query_pc *)hwquery; - - memset(result, 0, sizeof(result->batch[0]) * query->num_counters); -} - -static void si_pc_query_add_result(struct si_screen *sscreen, - struct si_query_hw *hwquery, - void *buffer, - union pipe_query_result *result) -{ - struct si_query_pc *query = (struct si_query_pc *)hwquery; - uint64_t *results = buffer; - unsigned i, j; - - for (i = 0; i < query->num_counters; ++i) { - struct si_pc_counter *counter = &query->counters[i]; - - for (j = 0; j < counter->qwords; ++j) { - uint32_t value = results[counter->base + j * counter->stride]; - result->batch[i].u64 += value; - } - } -} - -static struct si_query_ops batch_query_ops = { - .destroy = si_pc_query_destroy, - .begin = si_query_hw_begin, - .end = si_query_hw_end, - .get_result = si_query_hw_get_result -}; - -static struct si_query_hw_ops batch_query_hw_ops = { - .prepare_buffer = si_pc_query_prepare_buffer, - .emit_start = si_pc_query_emit_start, - .emit_stop = si_pc_query_emit_stop, - .clear_result = si_pc_query_clear_result, - .add_result = si_pc_query_add_result, -}; - -static struct si_pc_group *get_group_state(struct si_screen *screen, - struct si_query_pc *query, - struct si_perfcounter_block *block, - unsigned sub_gid) -{ - struct si_pc_group *group = query->groups; - - while (group) { - if (group->block == block && group->sub_gid == sub_gid) - return group; - group = group->next; - } - - group = CALLOC_STRUCT(si_pc_group); - if (!group) - return NULL; - - group->block = block; - group->sub_gid = sub_gid; - - if (block->flags & SI_PC_BLOCK_SHADER) { - unsigned sub_gids = block->num_instances; - unsigned shader_id; - unsigned shaders; - unsigned query_shaders; - - if (block->flags & SI_PC_BLOCK_SE_GROUPS) - sub_gids = sub_gids * screen->info.max_se; - shader_id = sub_gid / sub_gids; - sub_gid = sub_gid % sub_gids; - - shaders = screen->perfcounters->shader_type_bits[shader_id]; - - query_shaders = query->shaders & ~SI_PC_SHADERS_WINDOWING; - if (query_shaders && query_shaders != shaders) { - fprintf(stderr, "si_perfcounter: incompatible shader groups\n"); - FREE(group); - return NULL; - } - query->shaders = shaders; - } - - if (block->flags & SI_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { - // A non-zero value in query->shaders ensures that the shader - // masking is reset unless the user explicitly requests one. - query->shaders = SI_PC_SHADERS_WINDOWING; - } - - if (block->flags & SI_PC_BLOCK_SE_GROUPS) { - group->se = sub_gid / block->num_instances; - sub_gid = sub_gid % block->num_instances; - } else { - group->se = -1; - } - - if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) { - group->instance = sub_gid; - } else { - group->instance = -1; - } - - group->next = query->groups; - query->groups = group; - - return group; -} - -struct pipe_query *si_create_batch_query(struct pipe_context *ctx, - unsigned num_queries, - unsigned *query_types) -{ - struct si_screen *screen = - (struct si_screen *)ctx->screen; - struct si_perfcounters *pc = screen->perfcounters; - struct si_perfcounter_block *block; - struct si_pc_group *group; - struct si_query_pc *query; - unsigned base_gid, sub_gid, sub_index; - unsigned i, j; - - if (!pc) - return NULL; - - query = CALLOC_STRUCT(si_query_pc); - if (!query) - return NULL; - - query->b.b.ops = &batch_query_ops; - query->b.ops = &batch_query_hw_ops; - - query->num_counters = num_queries; - - /* Collect selectors per group */ - for (i = 0; i < num_queries; ++i) { - unsigned sub_gid; - - if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER) - goto error; - - block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, - &base_gid, &sub_index); - if (!block) - goto error; - - sub_gid = sub_index / block->num_selectors; - sub_index = sub_index % block->num_selectors; - - group = get_group_state(screen, query, block, sub_gid); - if (!group) - goto error; - - if (group->num_counters >= block->num_counters) { - fprintf(stderr, - "perfcounter group %s: too many selected\n", - block->basename); - goto error; - } - group->selectors[group->num_counters] = sub_index; - ++group->num_counters; - } - - /* Compute result bases and CS size per group */ - query->b.num_cs_dw_end = pc->num_stop_cs_dwords; - query->b.num_cs_dw_end += pc->num_instance_cs_dwords; - - i = 0; - for (group = query->groups; group; group = group->next) { - struct si_perfcounter_block *block = group->block; - unsigned read_dw; - unsigned instances = 1; - - if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0) - instances = screen->info.max_se; - if (group->instance < 0) - instances *= block->num_instances; - - group->result_base = i; - query->b.result_size += sizeof(uint64_t) * instances * group->num_counters; - i += instances * group->num_counters; - - read_dw = 6 * group->num_counters; - query->b.num_cs_dw_end += instances * read_dw; - query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords; - } - - if (query->shaders) { - if (query->shaders == SI_PC_SHADERS_WINDOWING) - query->shaders = 0xffffffff; - } - - /* Map user-supplied query array to result indices */ - query->counters = CALLOC(num_queries, sizeof(*query->counters)); - for (i = 0; i < num_queries; ++i) { - struct si_pc_counter *counter = &query->counters[i]; - struct si_perfcounter_block *block; - - block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, - &base_gid, &sub_index); - - sub_gid = sub_index / block->num_selectors; - sub_index = sub_index % block->num_selectors; - - group = get_group_state(screen, query, block, sub_gid); - assert(group != NULL); - - for (j = 0; j < group->num_counters; ++j) { - if (group->selectors[j] == sub_index) - break; - } - - counter->base = group->result_base + j; - counter->stride = group->num_counters; - - counter->qwords = 1; - if ((block->flags & SI_PC_BLOCK_SE) && group->se < 0) - counter->qwords = screen->info.max_se; - if (group->instance < 0) - counter->qwords *= block->num_instances; - } - - if (!si_query_hw_init(screen, &query->b)) - goto error; - - return (struct pipe_query *)query; - -error: - si_pc_query_destroy(screen, &query->b.b); - return NULL; -} - -static bool si_init_block_names(struct si_screen *screen, - struct si_perfcounter_block *block) -{ - unsigned i, j, k; - unsigned groups_shader = 1, groups_se = 1, groups_instance = 1; - unsigned namelen; - char *groupname; - char *p; - - if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) - groups_instance = block->num_instances; - if (block->flags & SI_PC_BLOCK_SE_GROUPS) - groups_se = screen->info.max_se; - if (block->flags & SI_PC_BLOCK_SHADER) - groups_shader = screen->perfcounters->num_shader_types; - - namelen = strlen(block->basename); - block->group_name_stride = namelen + 1; - if (block->flags & SI_PC_BLOCK_SHADER) - block->group_name_stride += 3; - if (block->flags & SI_PC_BLOCK_SE_GROUPS) { - assert(groups_se <= 10); - block->group_name_stride += 1; - - if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) - block->group_name_stride += 1; - } - if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) { - assert(groups_instance <= 100); - block->group_name_stride += 2; - } - - block->group_names = MALLOC(block->num_groups * block->group_name_stride); - if (!block->group_names) - return false; - - groupname = block->group_names; - for (i = 0; i < groups_shader; ++i) { - const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i]; - unsigned shaderlen = strlen(shader_suffix); - for (j = 0; j < groups_se; ++j) { - for (k = 0; k < groups_instance; ++k) { - strcpy(groupname, block->basename); - p = groupname + namelen; - - if (block->flags & SI_PC_BLOCK_SHADER) { - strcpy(p, shader_suffix); - p += shaderlen; - } - - if (block->flags & SI_PC_BLOCK_SE_GROUPS) { - p += sprintf(p, "%d", j); - if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) - *p++ = '_'; - } - - if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) - p += sprintf(p, "%d", k); - - groupname += block->group_name_stride; - } - } - } - - assert(block->num_selectors <= 1000); - block->selector_name_stride = block->group_name_stride + 4; - block->selector_names = MALLOC(block->num_groups * block->num_selectors * - block->selector_name_stride); - if (!block->selector_names) - return false; - - groupname = block->group_names; - p = block->selector_names; - for (i = 0; i < block->num_groups; ++i) { - for (j = 0; j < block->num_selectors; ++j) { - sprintf(p, "%s_%03d", groupname, j); - p += block->selector_name_stride; - } - groupname += block->group_name_stride; - } - - return true; -} - -int si_get_perfcounter_info(struct si_screen *screen, - unsigned index, - struct pipe_driver_query_info *info) -{ - struct si_perfcounters *pc = screen->perfcounters; - struct si_perfcounter_block *block; - unsigned base_gid, sub; - - if (!pc) - return 0; - - if (!info) { - unsigned bid, num_queries = 0; - - for (bid = 0; bid < pc->num_blocks; ++bid) { - num_queries += pc->blocks[bid].num_selectors * - pc->blocks[bid].num_groups; - } - - return num_queries; - } - - block = lookup_counter(pc, index, &base_gid, &sub); - if (!block) - return 0; - - if (!block->selector_names) { - if (!si_init_block_names(screen, block)) - return 0; - } - info->name = block->selector_names + sub * block->selector_name_stride; - info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index; - info->max_value.u64 = 0; - info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; - info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE; - info->group_id = base_gid + sub / block->num_selectors; - info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; - if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups) - info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST; - return 1; -} - -int si_get_perfcounter_group_info(struct si_screen *screen, - unsigned index, - struct pipe_driver_query_group_info *info) -{ - struct si_perfcounters *pc = screen->perfcounters; - struct si_perfcounter_block *block; - - if (!pc) - return 0; - - if (!info) - return pc->num_groups; - - block = lookup_group(pc, &index); - if (!block) - return 0; - - if (!block->group_names) { - if (!si_init_block_names(screen, block)) - return 0; - } - info->name = block->group_names + index * block->group_name_stride; - info->num_queries = block->num_selectors; - info->max_active_queries = block->num_counters; - return 1; -} - -void si_perfcounters_destroy(struct si_screen *sscreen) -{ - if (sscreen->perfcounters) - sscreen->perfcounters->cleanup(sscreen); -} - -bool si_perfcounters_init(struct si_perfcounters *pc, - unsigned num_blocks) -{ - pc->blocks = CALLOC(num_blocks, sizeof(struct si_perfcounter_block)); - if (!pc->blocks) - return false; - - pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false); - pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false); - - return true; -} - -void si_perfcounters_add_block(struct si_screen *sscreen, - struct si_perfcounters *pc, - const char *name, unsigned flags, - unsigned counters, unsigned selectors, - unsigned instances, void *data) -{ - struct si_perfcounter_block *block = &pc->blocks[pc->num_blocks]; - - assert(counters <= SI_QUERY_MAX_COUNTERS); - - block->basename = name; - block->flags = flags; - block->num_counters = counters; - block->num_selectors = selectors; - block->num_instances = MAX2(instances, 1); - block->data = data; - - if (pc->separate_se && (block->flags & SI_PC_BLOCK_SE)) - block->flags |= SI_PC_BLOCK_SE_GROUPS; - if (pc->separate_instance && block->num_instances > 1) - block->flags |= SI_PC_BLOCK_INSTANCE_GROUPS; - - if (block->flags & SI_PC_BLOCK_INSTANCE_GROUPS) { - block->num_groups = block->num_instances; - } else { - block->num_groups = 1; - } - - if (block->flags & SI_PC_BLOCK_SE_GROUPS) - block->num_groups *= sscreen->info.max_se; - if (block->flags & SI_PC_BLOCK_SHADER) - block->num_groups *= pc->num_shader_types; - - ++pc->num_blocks; - pc->num_groups += block->num_groups; -} - -void si_perfcounters_do_destroy(struct si_perfcounters *pc) -{ - unsigned i; - - for (i = 0; i < pc->num_blocks; ++i) { - FREE(pc->blocks[i].group_names); - FREE(pc->blocks[i].selector_names); - } - FREE(pc->blocks); - FREE(pc); -} diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd.c --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd.c 2019-03-31 23:16:37.000000000 +0000 @@ -148,7 +148,8 @@ buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); /* calc buffer offsets */ dec->msg = (struct ruvd_msg *)ptr; @@ -1015,7 +1016,7 @@ dec->bs_size = 0; dec->bs_ptr = dec->ws->buffer_map( dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE); + dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** @@ -1060,8 +1061,9 @@ return; } - dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE); + dec->bs_ptr = dec->ws->buffer_map( + buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (!dec->bs_ptr) return; @@ -1268,7 +1270,7 @@ dec->stream_handle = si_vid_alloc_stream_handle(); dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL); + dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL, false); if (!dec->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c 2019-03-31 23:16:37.000000000 +0000 @@ -835,10 +835,10 @@ static void radeon_uvd_enc_ctx(struct radeon_uvd_encoder *enc) { - struct si_screen *rscreen = (struct si_screen *) enc->screen; + struct si_screen *sscreen = (struct si_screen *) enc->screen; enc->enc_pic.ctx_buf.swizzle_mode = 0; - if (rscreen->info.chip_class < GFX9) { + if (sscreen->info.chip_class < GFX9) { enc->enc_pic.ctx_buf.rec_luma_pitch = (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); enc->enc_pic.ctx_buf.rec_chroma_pitch = @@ -950,7 +950,7 @@ static void radeon_uvd_enc_encode_params_hevc(struct radeon_uvd_encoder *enc) { - struct si_screen *rscreen = (struct si_screen *) enc->screen; + struct si_screen *sscreen = (struct si_screen *) enc->screen; switch (enc->enc_pic.picture_type) { case PIPE_H265_ENC_PICTURE_TYPE_I: case PIPE_H265_ENC_PICTURE_TYPE_IDR: @@ -970,7 +970,7 @@ } enc->enc_pic.enc_params.allowed_max_bitstream_size = enc->bs_size; - if (rscreen->info.chip_class < GFX9) { + if (sscreen->info.chip_class < GFX9) { enc->enc_pic.enc_params.input_pic_luma_pitch = (enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe); enc->enc_pic.enc_params.input_pic_chroma_pitch = @@ -998,7 +998,7 @@ RADEON_ENC_CS(enc->enc_pic.enc_params.pic_type); RADEON_ENC_CS(enc->enc_pic.enc_params.allowed_max_bitstream_size); - if (rscreen->info.chip_class < GFX9) { + if (sscreen->info.chip_class < GFX9) { RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, enc->luma->u.legacy.level[0].offset); RADEON_ENC_READ(enc->handle, RADEON_DOMAIN_VRAM, diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc.c --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc.c 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc.c 2019-03-31 23:16:37.000000000 +0000 @@ -263,9 +263,9 @@ if (NULL != size) { radeon_uvd_enc_feedback_t *fb_data = - (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf, - enc->cs, - PIPE_TRANSFER_READ_WRITE); + (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map( + fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); if (!fb_data->status) *size = fb_data->bitstream_size; @@ -314,7 +314,7 @@ enc->screen = context->screen; enc->ws = ws; enc->cs = - ws->cs_create(sctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc); + ws->cs_create(sctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc, false); if (!enc->cs) { RVID_ERR("Can't get command submission context.\n"); diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc.h mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc.h --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_uvd_enc.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_uvd_enc.h 2019-03-31 23:16:37.000000000 +0000 @@ -464,6 +464,6 @@ }; void radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc); -bool si_radeon_uvd_enc_supported(struct si_screen *rscreen); +bool si_radeon_uvd_enc_supported(struct si_screen *sscreen); #endif // _RADEON_UVD_ENC_H diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_vce.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_vce.c --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_vce.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_vce.c 2019-03-31 23:16:37.000000000 +0000 @@ -352,7 +352,9 @@ struct rvid_buffer *fb = feedback; if (size) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE); + uint32_t *ptr = enc->ws->buffer_map( + fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); if (ptr[1]) { *size = ptr[4] - ptr[9]; @@ -438,7 +440,7 @@ enc->screen = context->screen; enc->ws = ws; - enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc); + enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc, false); if (!enc->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_vcn_dec.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_vcn_dec.c --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_vcn_dec.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_vcn_dec.c 2019-03-31 23:16:37.000000000 +0000 @@ -822,8 +822,8 @@ decode->bsd_size = align(dec->bs_size, 128); decode->dpb_size = dec->dpb.res->buf->size; decode->dt_size = - r600_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size + - r600_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size; + si_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size + + si_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size; decode->sct_size = 0; decode->sc_coeff_size = 0; @@ -941,7 +941,9 @@ si_vid_clear_buffer(dec->base.context, &dec->ctx); /* ctx needs probs table */ - ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map( + dec->ctx.res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); fill_probs_table(ptr); dec->ws->buffer_unmap(dec->ctx.res->buf); } @@ -1034,7 +1036,8 @@ buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); /* calc buffer offsets */ dec->msg = ptr; @@ -1312,7 +1315,7 @@ dec->bs_size = 0; dec->bs_ptr = dec->ws->buffer_map( dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE); + dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** @@ -1357,8 +1360,9 @@ return; } - dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE); + dec->bs_ptr = dec->ws->buffer_map( + buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (!dec->bs_ptr) return; @@ -1507,7 +1511,7 @@ dec->stream_handle = si_vid_alloc_stream_handle(); dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL); + dec->cs = ws->cs_create(sctx->ctx, ring, NULL, NULL, false); if (!dec->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; @@ -1543,7 +1547,9 @@ void *ptr; buf = &dec->msg_fb_it_probs_buffers[i]; - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map( + buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE; fill_probs_table(ptr); dec->ws->buffer_unmap(buf->res->buf); diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_vcn_enc.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_vcn_enc.c --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_vcn_enc.c 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_vcn_enc.c 2019-03-31 23:16:37.000000000 +0000 @@ -244,7 +244,9 @@ struct rvid_buffer *fb = feedback; if (size) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE); + uint32_t *ptr = enc->ws->buffer_map( + fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); if (ptr[1]) *size = ptr[6]; else @@ -286,7 +288,8 @@ enc->bits_in_shifter = 0; enc->screen = context->screen; enc->ws = ws; - enc->cs = ws->cs_create(sctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, enc); + enc->cs = ws->cs_create(sctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, + enc, false); if (!enc->cs) { RVID_ERR("Can't get command submission context.\n"); diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_video.c mesa-19.0.1/src/gallium/drivers/radeon/radeon_video.c --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_video.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_video.c 2019-03-31 23:16:37.000000000 +0000 @@ -63,8 +63,8 @@ * able to move buffers around individually, so request a * non-sub-allocated buffer. */ - buffer->res = r600_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED, - usage, size)); + buffer->res = si_resource(pipe_buffer_create(screen, PIPE_BIND_SHARED, + usage, size)); return buffer->res != NULL; } @@ -72,7 +72,7 @@ /* destroy a buffer */ void si_vid_destroy_buffer(struct rvid_buffer *buffer) { - r600_resource_reference(&buffer->res, NULL); + si_resource_reference(&buffer->res, NULL); } /* reallocate a buffer, preserving its content */ @@ -88,11 +88,13 @@ if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage)) goto error; - src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ); + src = ws->buffer_map(old_buf.res->buf, cs, + PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY); if (!src) goto error; - dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE); + dst = ws->buffer_map(new_buf->res->buf, cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (!dst) goto error; diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_video.h mesa-19.0.1/src/gallium/drivers/radeon/radeon_video.h --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_video.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_video.h 2019-03-31 23:16:37.000000000 +0000 @@ -40,7 +40,7 @@ struct rvid_buffer { unsigned usage; - struct r600_resource *res; + struct si_resource *res; }; /* generate an stream handle */ diff -Nru mesa-18.3.3/src/gallium/drivers/radeon/radeon_winsys.h mesa-19.0.1/src/gallium/drivers/radeon/radeon_winsys.h --- mesa-18.3.3/src/gallium/drivers/radeon/radeon_winsys.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeon/radeon_winsys.h 2019-03-31 23:16:37.000000000 +0000 @@ -52,7 +52,9 @@ enum radeon_bo_domain { /* bitfield */ RADEON_DOMAIN_GTT = 2, RADEON_DOMAIN_VRAM = 4, - RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT + RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, + RADEON_DOMAIN_GDS = 8, + RADEON_DOMAIN_OA = 16, }; enum radeon_bo_flag { /* bitfield */ @@ -76,6 +78,15 @@ RADEON_USAGE_SYNCHRONIZED = 8 }; +enum radeon_transfer_flags { + /* Indicates that the caller will unmap the buffer. + * + * Not unmapping buffers is an important performance optimization for + * OpenGL (avoids kernel overhead for frequently mapped buffers). + */ + RADEON_TRANSFER_TEMPORARY = (PIPE_TRANSFER_DRV_PRV << 0), +}; + #define RADEON_SPARSE_PAGE_SIZE (64 * 1024) enum ring_type { @@ -294,9 +305,12 @@ * Map the entire data store of a buffer object into the client's address * space. * + * Callers are expected to unmap buffers again if and only if the + * RADEON_TRANSFER_TEMPORARY flag is set in \p usage. + * * \param buf A winsys buffer object to map. * \param cs A command stream to flush if the buffer is referenced by it. - * \param usage A bitmask of the PIPE_TRANSFER_* flags. + * \param usage A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags. * \return The pointer at the beginning of the buffer. */ void *(*buffer_map)(struct pb_buffer *buf, @@ -352,6 +366,7 @@ */ struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, struct winsys_handle *whandle, + unsigned vm_alignment, unsigned *stride, unsigned *offset); /** @@ -464,10 +479,11 @@ * \param user User pointer that will be passed to the flush callback. */ struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys_ctx *ctx, - enum ring_type ring_type, - void (*flush)(void *ctx, unsigned flags, - struct pipe_fence_handle **fence), - void *flush_ctx); + enum ring_type ring_type, + void (*flush)(void *ctx, unsigned flags, + struct pipe_fence_handle **fence), + void *flush_ctx, + bool stop_exec_on_failure); /** * Destroy a command stream. diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/cik_sdma.c mesa-19.0.1/src/gallium/drivers/radeonsi/cik_sdma.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/cik_sdma.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/cik_sdma.c 2019-03-31 23:16:37.000000000 +0000 @@ -35,20 +35,20 @@ { struct radeon_cmdbuf *cs = ctx->dma_cs; unsigned i, ncopy, csize; - struct r600_resource *rdst = r600_resource(dst); - struct r600_resource *rsrc = r600_resource(src); + struct si_resource *sdst = si_resource(dst); + struct si_resource *ssrc = si_resource(src); /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ - util_range_add(&rdst->valid_buffer_range, dst_offset, + util_range_add(&sdst->valid_buffer_range, dst_offset, dst_offset + size); - dst_offset += rdst->gpu_address; - src_offset += rsrc->gpu_address; + dst_offset += sdst->gpu_address; + src_offset += ssrc->gpu_address; ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); - si_need_dma_space(ctx, ncopy * 7, rdst, rsrc); + si_need_dma_space(ctx, ncopy * 7, sdst, ssrc); for (i = 0; i < ncopy; i++) { csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/driinfo_radeonsi.h mesa-19.0.1/src/gallium/drivers/radeonsi/driinfo_radeonsi.h --- mesa-18.3.3/src/gallium/drivers/radeonsi/driinfo_radeonsi.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/driinfo_radeonsi.h 2019-03-31 23:16:37.000000000 +0000 @@ -1,4 +1,8 @@ // DriConf options specific to radeonsi +DRI_CONF_SECTION_QUALITY + DRI_CONF_ADAPTIVE_SYNC("true") +DRI_CONF_SECTION_END + DRI_CONF_SECTION_PERFORMANCE DRI_CONF_RADEONSI_ENABLE_SISCHED("false") DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false") @@ -8,4 +12,5 @@ DRI_CONF_SECTION_DEBUG DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false") + DRI_CONF_RADEONSI_ENABLE_NIR("false") DRI_CONF_SECTION_END diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/Makefile.sources mesa-19.0.1/src/gallium/drivers/radeonsi/Makefile.sources --- mesa-18.3.3/src/gallium/drivers/radeonsi/Makefile.sources 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -49,7 +49,6 @@ si_test_dma_perf.c \ si_texture.c \ si_uvd.c \ - ../radeon/r600_perfcounter.c \ ../radeon/radeon_uvd.c \ ../radeon/radeon_uvd.h \ ../radeon/radeon_vcn_dec_jpeg.c \ diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/meson.build mesa-19.0.1/src/gallium/drivers/radeonsi/meson.build --- mesa-18.3.3/src/gallium/drivers/radeonsi/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -65,7 +65,6 @@ 'si_test_dma_perf.c', 'si_texture.c', 'si_uvd.c', - '../radeon/r600_perfcounter.c', '../radeon/radeon_uvd.c', '../radeon/radeon_uvd.h', '../radeon/radeon_vcn_enc_1_2.c', diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_blit.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_blit.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_blit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_blit.c 2019-03-31 23:16:37.000000000 +0000 @@ -902,6 +902,7 @@ { struct si_context *sctx = (struct si_context *)ctx; struct si_texture *ssrc = (struct si_texture*)src; + struct si_texture *sdst = (struct si_texture*)dst; struct pipe_surface *dst_view, dst_templ; struct pipe_sampler_view src_templ, *src_view; unsigned dst_width, dst_height, src_width0, src_height0; @@ -914,6 +915,17 @@ return; } + if (!util_format_is_compressed(src->format) && + !util_format_is_compressed(dst->format) && + !util_format_is_depth_or_stencil(src->format) && + src->nr_samples <= 1 && + !sdst->dcc_offset && + !(dst->target != src->target && + (src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) { + si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz, src_box); + return; + } + assert(u_max_sample(dst) == u_max_sample(src)); /* The driver doesn't decompress resources automatically while @@ -1012,36 +1024,8 @@ * Note that some chips avoid this issue by using SDMA. */ if (util_format_is_snorm8(dst_templ.format)) { - switch (dst_templ.format) { - case PIPE_FORMAT_R8_SNORM: - dst_templ.format = src_templ.format = PIPE_FORMAT_R8_SINT; - break; - case PIPE_FORMAT_R8G8_SNORM: - dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8_SINT; - break; - case PIPE_FORMAT_R8G8B8X8_SNORM: - dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8X8_SINT; - break; - case PIPE_FORMAT_R8G8B8A8_SNORM: - /* There are no SINT variants for ABGR and XBGR, so we have to use RGBA. */ - case PIPE_FORMAT_A8B8G8R8_SNORM: - case PIPE_FORMAT_X8B8G8R8_SNORM: - dst_templ.format = src_templ.format = PIPE_FORMAT_R8G8B8A8_SINT; - break; - case PIPE_FORMAT_A8_SNORM: - dst_templ.format = src_templ.format = PIPE_FORMAT_A8_SINT; - break; - case PIPE_FORMAT_L8_SNORM: - dst_templ.format = src_templ.format = PIPE_FORMAT_L8_SINT; - break; - case PIPE_FORMAT_L8A8_SNORM: - dst_templ.format = src_templ.format = PIPE_FORMAT_L8A8_SINT; - break; - case PIPE_FORMAT_I8_SNORM: - dst_templ.format = src_templ.format = PIPE_FORMAT_I8_SINT; - break; - default:; /* fall through */ - } + dst_templ.format = src_templ.format = + util_format_snorm8_to_sint8(dst_templ.format); } vi_disable_dcc_if_incompatible_format(sctx, dst, dst_level, @@ -1193,7 +1177,7 @@ templ.depth0 = 1; templ.array_size = 1; templ.usage = PIPE_USAGE_DEFAULT; - templ.flags = SI_RESOURCE_FLAG_FORCE_TILING | + templ.flags = SI_RESOURCE_FLAG_FORCE_MSAA_TILING | SI_RESOURCE_FLAG_DISABLE_DCC; /* The src and dst microtile modes must be the same. */ diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_buffer.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_buffer.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_buffer.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_buffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -44,7 +44,7 @@ } void *si_buffer_map_sync_with_rings(struct si_context *sctx, - struct r600_resource *resource, + struct si_resource *resource, unsigned usage) { enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE; @@ -101,7 +101,7 @@ } void si_init_resource_fields(struct si_screen *sscreen, - struct r600_resource *res, + struct si_resource *res, uint64_t size, unsigned alignment) { struct si_texture *tex = (struct si_texture*)res; @@ -201,7 +201,7 @@ } bool si_alloc_resource(struct si_screen *sscreen, - struct r600_resource *res) + struct si_resource *res) { struct pb_buffer *old_buf, *new_buf; @@ -248,12 +248,12 @@ static void si_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf) { - struct r600_resource *rbuffer = r600_resource(buf); + struct si_resource *buffer = si_resource(buf); threaded_resource_deinit(buf); - util_range_destroy(&rbuffer->valid_buffer_range); - pb_reference(&rbuffer->buf, NULL); - FREE(rbuffer); + util_range_destroy(&buffer->valid_buffer_range); + pb_reference(&buffer->buf, NULL); + FREE(buffer); } /* Reallocate the buffer a update all resource bindings where the buffer is @@ -264,32 +264,32 @@ */ static bool si_invalidate_buffer(struct si_context *sctx, - struct r600_resource *rbuffer) + struct si_resource *buf) { /* Shared buffers can't be reallocated. */ - if (rbuffer->b.is_shared) + if (buf->b.is_shared) return false; /* Sparse buffers can't be reallocated. */ - if (rbuffer->flags & RADEON_FLAG_SPARSE) + if (buf->flags & RADEON_FLAG_SPARSE) return false; /* In AMD_pinned_memory, the user pointer association only gets * broken when the buffer is explicitly re-allocated. */ - if (rbuffer->b.is_user_ptr) + if (buf->b.is_user_ptr) return false; /* Check if mapping this buffer would cause waiting for the GPU. */ - if (si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) || - !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { - uint64_t old_va = rbuffer->gpu_address; + if (si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) || + !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) { + uint64_t old_va = buf->gpu_address; /* Reallocate the buffer in the same pipe_resource. */ - si_alloc_resource(sctx->screen, rbuffer); - si_rebind_buffer(sctx, &rbuffer->b.b, old_va); + si_alloc_resource(sctx->screen, buf); + si_rebind_buffer(sctx, &buf->b.b, old_va); } else { - util_range_set_empty(&rbuffer->valid_buffer_range); + util_range_set_empty(&buf->valid_buffer_range); } return true; @@ -301,22 +301,22 @@ struct pipe_resource *src) { struct si_context *sctx = (struct si_context*)ctx; - struct r600_resource *rdst = r600_resource(dst); - struct r600_resource *rsrc = r600_resource(src); - uint64_t old_gpu_address = rdst->gpu_address; - - pb_reference(&rdst->buf, rsrc->buf); - rdst->gpu_address = rsrc->gpu_address; - rdst->b.b.bind = rsrc->b.b.bind; - rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads; - rdst->max_forced_staging_uploads = rsrc->max_forced_staging_uploads; - rdst->flags = rsrc->flags; - - assert(rdst->vram_usage == rsrc->vram_usage); - assert(rdst->gart_usage == rsrc->gart_usage); - assert(rdst->bo_size == rsrc->bo_size); - assert(rdst->bo_alignment == rsrc->bo_alignment); - assert(rdst->domains == rsrc->domains); + struct si_resource *sdst = si_resource(dst); + struct si_resource *ssrc = si_resource(src); + uint64_t old_gpu_address = sdst->gpu_address; + + pb_reference(&sdst->buf, ssrc->buf); + sdst->gpu_address = ssrc->gpu_address; + sdst->b.b.bind = ssrc->b.b.bind; + sdst->b.max_forced_staging_uploads = ssrc->b.max_forced_staging_uploads; + sdst->max_forced_staging_uploads = ssrc->max_forced_staging_uploads; + sdst->flags = ssrc->flags; + + assert(sdst->vram_usage == ssrc->vram_usage); + assert(sdst->gart_usage == ssrc->gart_usage); + assert(sdst->bo_size == ssrc->bo_size); + assert(sdst->bo_alignment == ssrc->bo_alignment); + assert(sdst->domains == ssrc->domains); si_rebind_buffer(sctx, dst, old_gpu_address); } @@ -325,11 +325,11 @@ struct pipe_resource *resource) { struct si_context *sctx = (struct si_context*)ctx; - struct r600_resource *rbuffer = r600_resource(resource); + struct si_resource *buf = si_resource(resource); /* We currently only do anyting here for buffers */ if (resource->target == PIPE_BUFFER) - (void)si_invalidate_buffer(sctx, rbuffer); + (void)si_invalidate_buffer(sctx, buf); } static void *si_buffer_get_transfer(struct pipe_context *ctx, @@ -337,7 +337,7 @@ unsigned usage, const struct pipe_box *box, struct pipe_transfer **ptransfer, - void *data, struct r600_resource *staging, + void *data, struct si_resource *staging, unsigned offset) { struct si_context *sctx = (struct si_context*)ctx; @@ -370,7 +370,7 @@ struct pipe_transfer **ptransfer) { struct si_context *sctx = (struct si_context*)ctx; - struct r600_resource *rbuffer = r600_resource(resource); + struct si_resource *buf = si_resource(resource); uint8_t *data; assert(box->x + box->width <= resource->width0); @@ -386,7 +386,7 @@ * * So don't ever use staging buffers. */ - if (rbuffer->b.is_user_ptr) + if (buf->b.is_user_ptr) usage |= PIPE_TRANSFER_PERSISTENT; /* See if the buffer range being mapped has never been initialized, @@ -394,8 +394,8 @@ if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) && usage & PIPE_TRANSFER_WRITE && - !rbuffer->b.is_shared && - !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { + !buf->b.is_shared && + !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width)) { usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } @@ -414,8 +414,8 @@ !(usage & PIPE_TRANSFER_PERSISTENT) && /* Try not to decrement the counter if it's not positive. Still racy, * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */ - rbuffer->max_forced_staging_uploads > 0 && - p_atomic_dec_return(&rbuffer->max_forced_staging_uploads) >= 0) { + buf->max_forced_staging_uploads > 0 && + p_atomic_dec_return(&buf->max_forced_staging_uploads) >= 0) { usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | PIPE_TRANSFER_UNSYNCHRONIZED); usage |= PIPE_TRANSFER_DISCARD_RANGE; @@ -427,7 +427,7 @@ TC_TRANSFER_MAP_NO_INVALIDATE))) { assert(usage & PIPE_TRANSFER_WRITE); - if (si_invalidate_buffer(sctx, rbuffer)) { + if (si_invalidate_buffer(sctx, buf)) { /* At this point, the buffer is always idle. */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED; } else { @@ -439,18 +439,18 @@ if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_PERSISTENT))) || - (rbuffer->flags & RADEON_FLAG_SPARSE))) { + (buf->flags & RADEON_FLAG_SPARSE))) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ - if (rbuffer->flags & RADEON_FLAG_SPARSE || + if (buf->flags & RADEON_FLAG_SPARSE || force_discard_range || - si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) || - !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { + si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) || + !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; - struct r600_resource *staging = NULL; + struct si_resource *staging = NULL; u_upload_alloc(ctx->stream_uploader, 0, box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT), @@ -462,7 +462,7 @@ data += box->x % SI_MAP_BUFFER_ALIGNMENT; return si_buffer_get_transfer(ctx, resource, usage, box, ptransfer, data, staging, offset); - } else if (rbuffer->flags & RADEON_FLAG_SPARSE) { + } else if (buf->flags & RADEON_FLAG_SPARSE) { return NULL; } } else { @@ -473,13 +473,13 @@ /* Use a staging buffer in cached GTT for reads. */ else if (((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_PERSISTENT) && - (rbuffer->domains & RADEON_DOMAIN_VRAM || - rbuffer->flags & RADEON_FLAG_GTT_WC)) || - (rbuffer->flags & RADEON_FLAG_SPARSE)) { - struct r600_resource *staging; + (buf->domains & RADEON_DOMAIN_VRAM || + buf->flags & RADEON_FLAG_GTT_WC)) || + (buf->flags & RADEON_FLAG_SPARSE)) { + struct si_resource *staging; assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)); - staging = r600_resource(pipe_buffer_create( + staging = si_resource(pipe_buffer_create( ctx->screen, 0, PIPE_USAGE_STAGING, box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT))); if (staging) { @@ -491,19 +491,19 @@ data = si_buffer_map_sync_with_rings(sctx, staging, usage & ~PIPE_TRANSFER_UNSYNCHRONIZED); if (!data) { - r600_resource_reference(&staging, NULL); + si_resource_reference(&staging, NULL); return NULL; } data += box->x % SI_MAP_BUFFER_ALIGNMENT; return si_buffer_get_transfer(ctx, resource, usage, box, ptransfer, data, staging, 0); - } else if (rbuffer->flags & RADEON_FLAG_SPARSE) { + } else if (buf->flags & RADEON_FLAG_SPARSE) { return NULL; } } - data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage); + data = si_buffer_map_sync_with_rings(sctx, buf, usage); if (!data) { return NULL; } @@ -518,17 +518,20 @@ const struct pipe_box *box) { struct si_transfer *stransfer = (struct si_transfer*)transfer; - struct r600_resource *rbuffer = r600_resource(transfer->resource); + struct si_resource *buf = si_resource(transfer->resource); if (stransfer->staging) { + unsigned src_offset = stransfer->offset + + transfer->box.x % SI_MAP_BUFFER_ALIGNMENT + + (box->x - transfer->box.x); + /* Copy the staging buffer into the original one. */ si_copy_buffer((struct si_context*)ctx, transfer->resource, - &stransfer->staging->b.b, box->x, - stransfer->offset + box->x % SI_MAP_BUFFER_ALIGNMENT, + &stransfer->staging->b.b, box->x, src_offset, box->width); } - util_range_add(&rbuffer->valid_buffer_range, box->x, + util_range_add(&buf->valid_buffer_range, box->x, box->x + box->width); } @@ -557,7 +560,7 @@ !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) si_buffer_do_flush_region(ctx, transfer, &transfer->box); - r600_resource_reference(&stransfer->staging, NULL); + si_resource_reference(&stransfer->staging, NULL); assert(stransfer->b.staging == NULL); /* for threaded context only */ pipe_resource_reference(&transfer->resource, NULL); @@ -597,27 +600,27 @@ si_buffer_transfer_unmap, /* transfer_unmap */ }; -static struct r600_resource * +static struct si_resource * si_alloc_buffer_struct(struct pipe_screen *screen, const struct pipe_resource *templ) { - struct r600_resource *rbuffer; + struct si_resource *buf; - rbuffer = MALLOC_STRUCT(r600_resource); + buf = MALLOC_STRUCT(si_resource); - rbuffer->b.b = *templ; - rbuffer->b.b.next = NULL; - pipe_reference_init(&rbuffer->b.b.reference, 1); - rbuffer->b.b.screen = screen; - - rbuffer->b.vtbl = &si_buffer_vtbl; - threaded_resource_init(&rbuffer->b.b); - - rbuffer->buf = NULL; - rbuffer->bind_history = 0; - rbuffer->TC_L2_dirty = false; - util_range_init(&rbuffer->valid_buffer_range); - return rbuffer; + buf->b.b = *templ; + buf->b.b.next = NULL; + pipe_reference_init(&buf->b.b.reference, 1); + buf->b.b.screen = screen; + + buf->b.vtbl = &si_buffer_vtbl; + threaded_resource_init(&buf->b.b); + + buf->buf = NULL; + buf->bind_history = 0; + buf->TC_L2_dirty = false; + util_range_init(&buf->valid_buffer_range); + return buf; } static struct pipe_resource *si_buffer_create(struct pipe_screen *screen, @@ -625,21 +628,21 @@ unsigned alignment) { struct si_screen *sscreen = (struct si_screen*)screen; - struct r600_resource *rbuffer = si_alloc_buffer_struct(screen, templ); + struct si_resource *buf = si_alloc_buffer_struct(screen, templ); if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) - rbuffer->b.b.flags |= SI_RESOURCE_FLAG_UNMAPPABLE; + buf->b.b.flags |= SI_RESOURCE_FLAG_UNMAPPABLE; - si_init_resource_fields(sscreen, rbuffer, templ->width0, alignment); + si_init_resource_fields(sscreen, buf, templ->width0, alignment); if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) - rbuffer->flags |= RADEON_FLAG_SPARSE; + buf->flags |= RADEON_FLAG_SPARSE; - if (!si_alloc_resource(sscreen, rbuffer)) { - FREE(rbuffer); + if (!si_alloc_resource(sscreen, buf)) { + FREE(buf); return NULL; } - return &rbuffer->b.b; + return &buf->b.b; } struct pipe_resource *pipe_aligned_buffer_create(struct pipe_screen *screen, @@ -661,11 +664,11 @@ return si_buffer_create(screen, &buffer, alignment); } -struct r600_resource *si_aligned_buffer_create(struct pipe_screen *screen, +struct si_resource *si_aligned_buffer_create(struct pipe_screen *screen, unsigned flags, unsigned usage, unsigned size, unsigned alignment) { - return r600_resource(pipe_aligned_buffer_create(screen, flags, usage, + return si_resource(pipe_aligned_buffer_create(screen, flags, usage, size, alignment)); } @@ -676,26 +679,26 @@ { struct si_screen *sscreen = (struct si_screen*)screen; struct radeon_winsys *ws = sscreen->ws; - struct r600_resource *rbuffer = si_alloc_buffer_struct(screen, templ); + struct si_resource *buf = si_alloc_buffer_struct(screen, templ); - rbuffer->domains = RADEON_DOMAIN_GTT; - rbuffer->flags = 0; - rbuffer->b.is_user_ptr = true; - util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0); - util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0); + buf->domains = RADEON_DOMAIN_GTT; + buf->flags = 0; + buf->b.is_user_ptr = true; + util_range_add(&buf->valid_buffer_range, 0, templ->width0); + util_range_add(&buf->b.valid_buffer_range, 0, templ->width0); /* Convert a user pointer to a buffer. */ - rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0); - if (!rbuffer->buf) { - FREE(rbuffer); + buf->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0); + if (!buf->buf) { + FREE(buf); return NULL; } - rbuffer->gpu_address = ws->buffer_get_virtual_address(rbuffer->buf); - rbuffer->vram_usage = 0; - rbuffer->gart_usage = templ->width0; + buf->gpu_address = ws->buffer_get_virtual_address(buf->buf); + buf->vram_usage = 0; + buf->gart_usage = templ->width0; - return &rbuffer->b.b; + return &buf->b.b; } static struct pipe_resource *si_resource_create(struct pipe_screen *screen, @@ -714,7 +717,7 @@ bool commit) { struct si_context *ctx = (struct si_context *)pctx; - struct r600_resource *res = r600_resource(resource); + struct si_resource *res = si_resource(resource); /* * Since buffer commitment changes cannot be pipelined, we need to diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_build_pm4.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_build_pm4.h --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_build_pm4.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_build_pm4.h 2019-03-31 23:16:37.000000000 +0000 @@ -100,12 +100,18 @@ } static inline void radeon_set_uconfig_reg_idx(struct radeon_cmdbuf *cs, + struct si_screen *screen, unsigned reg, unsigned idx, unsigned value) { assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); assert(cs->current.cdw + 3 <= cs->current.max_dw); - radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0)); + assert(idx != 0); + unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX; + if (screen->info.chip_class < GFX9 || + (screen->info.chip_class == GFX9 && screen->info.me_fw_version < 26)) + opcode = PKT3_SET_UCONFIG_REG; + radeon_emit(cs, PKT3(opcode, 1, 0)); radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28)); radeon_emit(cs, value); } diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_clear.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_clear.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_clear.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_clear.c 2019-03-31 23:16:37.000000000 +0000 @@ -34,6 +34,15 @@ SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE, }; +enum si_dcc_clear_code +{ + DCC_CLEAR_COLOR_0000 = 0x00000000, + DCC_CLEAR_COLOR_0001 = 0x40404040, + DCC_CLEAR_COLOR_1110 = 0x80808080, + DCC_CLEAR_COLOR_1111 = 0xC0C0C0C0, + DCC_CLEAR_COLOR_REG = 0x20202020, +}; + static void si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex) { @@ -133,7 +142,7 @@ return false; *eliminate_needed = true; - *clear_value = 0x20202020U; /* use CB clear color registers */ + *clear_value = DCC_CLEAR_COLOR_REG; if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return true; /* need ELIMINATE_FAST_CLEAR */ @@ -203,15 +212,22 @@ } /* This doesn't need ELIMINATE_FAST_CLEAR. - * CB uses both the DCC clear codes and the CB clear color registers, - * so they must match. + * On chips predating Raven2, the DCC clear codes and the CB clear + * color registers must match. */ *eliminate_needed = false; - if (color_value) - *clear_value |= 0x80808080U; - if (alpha_value) - *clear_value |= 0x40404040U; + if (color_value) { + if (alpha_value) + *clear_value = DCC_CLEAR_COLOR_1111; + else + *clear_value = DCC_CLEAR_COLOR_1110; + } else { + if (alpha_value) + *clear_value = DCC_CLEAR_COLOR_0001; + else + *clear_value = DCC_CLEAR_COLOR_0000; + } return true; } @@ -532,6 +548,12 @@ *buffers &= ~clear_bit; + /* Chips with DCC constant encoding don't need to set the clear + * color registers for DCC clear values 0 and 1. + */ + if (sctx->screen->has_dcc_constant_encode && !eliminate_needed) + continue; + if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) { sctx->framebuffer.dirty_cbufs |= 1 << i; si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_compute_blit.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_compute_blit.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_compute_blit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_compute_blit.c 2019-03-31 23:16:37.000000000 +0000 @@ -24,6 +24,7 @@ */ #include "si_pipe.h" +#include "util/u_format.h" /* Note: Compute shaders always use SI_COMPUTE_DST_CACHE_POLICY for dst * and L2_STREAM for src. @@ -57,6 +58,20 @@ } } +static void si_compute_internal_begin(struct si_context *sctx) +{ + sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; + sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; + sctx->render_cond_force_off = true; +} + +static void si_compute_internal_end(struct si_context *sctx) +{ + sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; + sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; + sctx->render_cond_force_off = false; +} + static void si_compute_do_clear_or_copy(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_offset, @@ -76,10 +91,10 @@ assert(dst->target != PIPE_BUFFER || dst_offset + size <= dst->width0); assert(!src || src_offset + size <= src->width0); + si_compute_internal_begin(sctx); sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | si_get_flush_flags(sctx, coher, SI_COMPUTE_DST_CACHE_POLICY); - si_emit_cache_flush(sctx); /* Save states. */ void *saved_cs = sctx->cs_shader_state.program; @@ -112,12 +127,20 @@ sb[0].buffer_offset = dst_offset; sb[0].buffer_size = size; + bool shader_dst_stream_policy = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU; + if (src) { sb[1].buffer = src; sb[1].buffer_offset = src_offset; sb[1].buffer_size = size; ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, 2, sb); + + if (!sctx->cs_copy_buffer) { + sctx->cs_copy_buffer = si_create_dma_compute_shader(&sctx->b, + SI_COMPUTE_COPY_DW_PER_THREAD, + shader_dst_stream_policy, true); + } ctx->bind_compute_state(ctx, sctx->cs_copy_buffer); } else { assert(clear_value_size >= 4 && @@ -128,6 +151,12 @@ sctx->cs_user_data[i] = clear_value[i % (clear_value_size / 4)]; ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, 1, sb); + + if (!sctx->cs_clear_buffer) { + sctx->cs_clear_buffer = si_create_dma_compute_shader(&sctx->b, + SI_COMPUTE_CLEAR_DW_PER_THREAD, + shader_dst_stream_policy, false); + } ctx->bind_compute_state(ctx, sctx->cs_clear_buffer); } @@ -138,11 +167,12 @@ (cache_policy == L2_BYPASS ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0); if (cache_policy != L2_BYPASS) - r600_resource(dst)->TC_L2_dirty = true; + si_resource(dst)->TC_L2_dirty = true; /* Restore states. */ ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb); + si_compute_internal_end(sctx); } void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, @@ -219,8 +249,8 @@ clear_value_size, coher); } else { assert(clear_value_size == 4); - si_cp_dma_clear_buffer(sctx, dst, offset, - aligned_size, *clear_value, coher, + si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, offset, + aligned_size, *clear_value, 0, coher, get_cache_policy(sctx, coher, size)); } @@ -267,8 +297,8 @@ /* Only use compute for VRAM copies on dGPUs. */ if (sctx->screen->info.has_dedicated_vram && - r600_resource(dst)->domains & RADEON_DOMAIN_VRAM && - r600_resource(src)->domains & RADEON_DOMAIN_VRAM && + si_resource(dst)->domains & RADEON_DOMAIN_VRAM && + si_resource(src)->domains & RADEON_DOMAIN_VRAM && size > 32 * 1024 && dst_offset % 4 == 0 && src_offset % 4 == 0 && size % 4 == 0) { si_compute_do_clear_or_copy(sctx, dst, dst_offset, src, src_offset, @@ -279,6 +309,118 @@ } } +void si_compute_copy_image(struct si_context *sctx, + struct pipe_resource *dst, + unsigned dst_level, + struct pipe_resource *src, + unsigned src_level, + unsigned dstx, unsigned dsty, unsigned dstz, + const struct pipe_box *src_box) +{ + struct pipe_context *ctx = &sctx->b; + unsigned width = src_box->width; + unsigned height = src_box->height; + unsigned depth = src_box->depth; + + unsigned data[] = {src_box->x, src_box->y, src_box->z, 0, dstx, dsty, dstz, 0}; + + if (width == 0 || height == 0) + return; + + si_compute_internal_begin(sctx); + sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | + si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); + si_make_CB_shader_coherent(sctx, dst->nr_samples, true); + + struct pipe_constant_buffer saved_cb = {}; + si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); + + struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE]; + struct pipe_image_view saved_image[2] = {0}; + util_copy_image_view(&saved_image[0], &images->views[0]); + util_copy_image_view(&saved_image[1], &images->views[1]); + + void *saved_cs = sctx->cs_shader_state.program; + + struct pipe_constant_buffer cb = {}; + cb.buffer_size = sizeof(data); + cb.user_buffer = data; + ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &cb); + + struct pipe_image_view image[2] = {0}; + image[0].resource = src; + image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ; + image[0].format = util_format_linear(src->format); + image[0].u.tex.level = src_level; + image[0].u.tex.first_layer = 0; + image[0].u.tex.last_layer = + src->target == PIPE_TEXTURE_3D ? u_minify(src->depth0, src_level) - 1 + : (unsigned)(src->array_size - 1); + image[1].resource = dst; + image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE; + image[1].format = util_format_linear(dst->format); + image[1].u.tex.level = dst_level; + image[1].u.tex.first_layer = 0; + image[1].u.tex.last_layer = + dst->target == PIPE_TEXTURE_3D ? u_minify(dst->depth0, dst_level) - 1 + : (unsigned)(dst->array_size - 1); + + if (src->format == PIPE_FORMAT_R9G9B9E5_FLOAT) + image[0].format = image[1].format = PIPE_FORMAT_R32_UINT; + + /* SNORM8 blitting has precision issues on some chips. Use the SINT + * equivalent instead, which doesn't force DCC decompression. + * Note that some chips avoid this issue by using SDMA. + */ + if (util_format_is_snorm8(dst->format)) { + image[0].format = image[1].format = + util_format_snorm8_to_sint8(dst->format); + } + + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, image); + + struct pipe_grid_info info = {0}; + + if (dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY) { + if (!sctx->cs_copy_image_1d_array) + sctx->cs_copy_image_1d_array = + si_create_copy_image_compute_shader_1d_array(ctx); + ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array); + info.block[0] = 64; + sctx->compute_last_block[0] = width % 64; + info.block[1] = 1; + info.block[2] = 1; + info.grid[0] = DIV_ROUND_UP(width, 64); + info.grid[1] = depth; + info.grid[2] = 1; + } else { + if (!sctx->cs_copy_image) + sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx); + ctx->bind_compute_state(ctx, sctx->cs_copy_image); + info.block[0] = 8; + sctx->compute_last_block[0] = width % 8; + info.block[1] = 8; + sctx->compute_last_block[1] = height % 8; + info.block[2] = 1; + info.grid[0] = DIV_ROUND_UP(width, 8); + info.grid[1] = DIV_ROUND_UP(height, 8); + info.grid[2] = depth; + } + + ctx->launch_grid(ctx, &info); + + sctx->compute_last_block[0] = 0; + sctx->compute_last_block[1] = 0; + + sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | + (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | + si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); + ctx->bind_compute_state(ctx, saved_cs); + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image); + ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); + si_compute_internal_end(sctx); +} + void si_init_compute_blit_functions(struct si_context *sctx) { sctx->b.clear_buffer = si_pipe_clear_buffer; diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_compute.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_compute.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_compute.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_compute.c 2019-03-31 23:16:37.000000000 +0000 @@ -32,9 +32,9 @@ #include "si_build_pm4.h" #include "si_compute.h" -#define COMPUTE_DBG(rscreen, fmt, args...) \ +#define COMPUTE_DBG(sscreen, fmt, args...) \ do { \ - if ((rscreen->debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \ + if ((sscreen->debug_flags & DBG(COMPUTE))) fprintf(stderr, fmt, ##args); \ } while (0); struct dispatch_packet { @@ -308,7 +308,7 @@ uint64_t va; uint32_t offset; pipe_resource_reference(&program->global_buffers[first + i], resources[i]); - va = r600_resource(resources[i])->gpu_address; + va = si_resource(resources[i])->gpu_address; offset = util_le32_to_cpu(*handles[i]); va += offset; va = util_cpu_to_le64(va); @@ -378,7 +378,7 @@ scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0; if (scratch_bo_size < scratch_needed) { - r600_resource_reference(&sctx->compute_scratch_buffer, NULL); + si_resource_reference(&sctx->compute_scratch_buffer, NULL); sctx->compute_scratch_buffer = si_aligned_buffer_create(&sctx->screen->b, @@ -398,7 +398,7 @@ if (si_shader_binary_upload(sctx->screen, shader)) return false; - r600_resource_reference(&shader->scratch_bo, + si_resource_reference(&shader->scratch_bo, sctx->compute_scratch_buffer); } @@ -582,7 +582,7 @@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR)) { struct dispatch_packet dispatch; unsigned dispatch_offset; - struct r600_resource *dispatch_buf = NULL; + struct si_resource *dispatch_buf = NULL; uint64_t dispatch_va; /* Upload dispatch ptr */ @@ -620,7 +620,7 @@ radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(dispatch_va >> 32) | S_008F04_STRIDE(0)); - r600_resource_reference(&dispatch_buf, NULL); + si_resource_reference(&dispatch_buf, NULL); user_sgpr += 2; } @@ -651,7 +651,7 @@ { struct radeon_cmdbuf *cs = sctx->gfx_cs; struct si_compute *program = sctx->cs_shader_state.program; - struct r600_resource *input_buffer = NULL; + struct si_resource *input_buffer = NULL; unsigned kernel_args_size; unsigned num_work_size_bytes = program->use_code_object_v2 ? 0 : 36; uint32_t kernel_args_offset = 0; @@ -704,7 +704,7 @@ S_008F04_STRIDE(0)); } - r600_resource_reference(&input_buffer, NULL); + si_resource_reference(&input_buffer, NULL); return true; } @@ -724,12 +724,12 @@ if (info->indirect) { if (program->uses_grid_size) { - uint64_t base_va = r600_resource(info->indirect)->gpu_address; + uint64_t base_va = si_resource(info->indirect)->gpu_address; uint64_t va = base_va + info->indirect_offset; int i; radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(info->indirect), + si_resource(info->indirect), RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); for (i = 0; i < 3; ++i) { @@ -797,11 +797,6 @@ radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, compute_resource_limits); - radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0])); - radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1])); - radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2])); - unsigned dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | S_00B800_FORCE_START_AT_000(1) | @@ -809,11 +804,38 @@ * allow launching waves out-of-order. (same as Vulkan) */ S_00B800_ORDER_MODE(sctx->chip_class >= CIK); + uint *last_block = sctx->compute_last_block; + bool partial_block_en = last_block[0] || last_block[1] || last_block[2]; + + radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); + + if (partial_block_en) { + unsigned partial[3]; + + /* If no partial_block, these should be an entire block size, not 0. */ + partial[0] = last_block[0] ? last_block[0] : info->block[0]; + partial[1] = last_block[1] ? last_block[1] : info->block[1]; + partial[2] = last_block[2] ? last_block[2] : info->block[2]; + + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]) | + S_00B81C_NUM_THREAD_PARTIAL(partial[0])); + radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]) | + S_00B820_NUM_THREAD_PARTIAL(partial[1])); + radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]) | + S_00B824_NUM_THREAD_PARTIAL(partial[2])); + + dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); + } else { + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0])); + radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1])); + radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2])); + } + if (info->indirect) { - uint64_t base_va = r600_resource(info->indirect)->gpu_address; + uint64_t base_va = si_resource(info->indirect)->gpu_address; radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(info->indirect), + si_resource(info->indirect), RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | @@ -881,9 +903,9 @@ /* Indirect buffers use TC L2 on GFX9, but not older hw. */ if (sctx->chip_class <= VI && - r600_resource(info->indirect)->TC_L2_dirty) { + si_resource(info->indirect)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; - r600_resource(info->indirect)->TC_L2_dirty = false; + si_resource(info->indirect)->TC_L2_dirty = false; } } @@ -915,8 +937,8 @@ /* Global buffers */ for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) { - struct r600_resource *buffer = - r600_resource(program->global_buffers[i]); + struct si_resource *buffer = + si_resource(program->global_buffers[i]); if (!buffer) { continue; } diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_cp_dma.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_cp_dma.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_cp_dma.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_cp_dma.c 2019-03-31 23:16:37.000000000 +0000 @@ -54,11 +54,10 @@ * a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit * clear value. */ -static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va, - uint64_t src_va, unsigned size, unsigned flags, - enum si_cache_policy cache_policy) +static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, + uint64_t dst_va, uint64_t src_va, unsigned size, + unsigned flags, enum si_cache_policy cache_policy) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; uint32_t header = 0, command = 0; assert(size <= cp_dma_max_byte_count(sctx)); @@ -146,7 +145,7 @@ * DMA request, however, the CP will see the sync flag and still wait * for all DMAs to complete. */ - si_emit_cp_dma(sctx, 0, 0, 0, CP_DMA_SYNC, L2_BYPASS); + si_emit_cp_dma(sctx, sctx->gfx_cs, 0, 0, 0, CP_DMA_SYNC, L2_BYPASS); } static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst, @@ -176,11 +175,11 @@ if (!(user_flags & SI_CPDMA_SKIP_BO_LIST_UPDATE)) { if (dst) radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(dst), + si_resource(dst), RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); if (src) radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(src), + si_resource(src), RADEON_USAGE_READ, RADEON_PRIO_CP_DMA); } @@ -190,7 +189,8 @@ if (!(user_flags & SI_CPDMA_SKIP_GFX_SYNC) && sctx->flags) si_emit_cache_flush(sctx); - if (!(user_flags & SI_CPDMA_SKIP_SYNC_BEFORE) && *is_first) + if (!(user_flags & SI_CPDMA_SKIP_SYNC_BEFORE) && *is_first && + !(*packet_flags & CP_DMA_CLEAR)) *packet_flags |= CP_DMA_RAW_WAIT; *is_first = false; @@ -207,13 +207,13 @@ } } -void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, - uint64_t offset, uint64_t size, unsigned value, - enum si_coherency coher, - enum si_cache_policy cache_policy) +void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs, + struct pipe_resource *dst, uint64_t offset, + uint64_t size, unsigned value, unsigned user_flags, + enum si_coherency coher, enum si_cache_policy cache_policy) { - struct r600_resource *rdst = r600_resource(dst); - uint64_t va = (rdst ? rdst->gpu_address : 0) + offset; + struct si_resource *sdst = si_resource(dst); + uint64_t va = (sdst ? sdst->gpu_address : 0) + offset; bool is_first = true; assert(size && size % 4 == 0); @@ -221,30 +221,32 @@ /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ - if (rdst) - util_range_add(&rdst->valid_buffer_range, offset, offset + size); + if (sdst) + util_range_add(&sdst->valid_buffer_range, offset, offset + size); /* Flush the caches. */ - sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | - SI_CONTEXT_CS_PARTIAL_FLUSH | - si_get_flush_flags(sctx, coher, cache_policy); + if (sdst && !(user_flags & SI_CPDMA_SKIP_GFX_SYNC)) { + sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | + SI_CONTEXT_CS_PARTIAL_FLUSH | + si_get_flush_flags(sctx, coher, cache_policy); + } while (size) { unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx)); - unsigned dma_flags = CP_DMA_CLEAR | (rdst ? 0 : CP_DMA_DST_IS_GDS); + unsigned dma_flags = CP_DMA_CLEAR | (sdst ? 0 : CP_DMA_DST_IS_GDS); - si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0, coher, - &is_first, &dma_flags); + si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, user_flags, + coher, &is_first, &dma_flags); /* Emit the clear packet. */ - si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, cache_policy); + si_emit_cp_dma(sctx, cs, va, value, byte_count, dma_flags, cache_policy); size -= byte_count; va += byte_count; } - if (rdst && cache_policy != L2_BYPASS) - rdst->TC_L2_dirty = true; + if (sdst && cache_policy != L2_BYPASS) + sdst->TC_L2_dirty = true; /* If it's not a framebuffer fast clear... */ if (coher == SI_COHERENCY_SHADER) @@ -273,7 +275,7 @@ */ if (!sctx->scratch_buffer || sctx->scratch_buffer->b.b.width0 < scratch_size) { - r600_resource_reference(&sctx->scratch_buffer, NULL); + si_resource_reference(&sctx->scratch_buffer, NULL); sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b, SI_RESOURCE_FLAG_UNMAPPABLE, @@ -290,7 +292,7 @@ coher, is_first, &dma_flags); va = sctx->scratch_buffer->gpu_address; - si_emit_cp_dma(sctx, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags, + si_emit_cp_dma(sctx, sctx->gfx_cs, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags, cache_policy); } @@ -321,14 +323,14 @@ /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ - util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, + util_range_add(&si_resource(dst)->valid_buffer_range, dst_offset, dst_offset + size); } - dst_offset += r600_resource(dst)->gpu_address; + dst_offset += si_resource(dst)->gpu_address; } if (src) - src_offset += r600_resource(src)->gpu_address; + src_offset += si_resource(src)->gpu_address; /* The workarounds aren't needed on Fiji and beyond. */ if (sctx->family <= CHIP_CARRIZO || @@ -373,7 +375,7 @@ size + skipped_size + realign_size, user_flags, coher, &is_first, &dma_flags); - si_emit_cp_dma(sctx, main_dst_offset, main_src_offset, + si_emit_cp_dma(sctx, sctx->gfx_cs, main_dst_offset, main_src_offset, byte_count, dma_flags, cache_policy); size -= byte_count; @@ -389,7 +391,7 @@ skipped_size + realign_size, user_flags, coher, &is_first, &dma_flags); - si_emit_cp_dma(sctx, dst_offset, src_offset, skipped_size, + si_emit_cp_dma(sctx, sctx->gfx_cs, dst_offset, src_offset, skipped_size, dma_flags, cache_policy); } @@ -400,7 +402,7 @@ } if (dst && cache_policy != L2_BYPASS) - r600_resource(dst)->TC_L2_dirty = true; + si_resource(dst)->TC_L2_dirty = true; /* If it's not a prefetch or GDS copy... */ if (dst && src && (dst != src || dst_offset != src_offset)) @@ -553,11 +555,11 @@ src = pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_DEFAULT, 16); dst = pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_DEFAULT, 16); - si_cp_dma_clear_buffer(sctx, src, 0, 4, 0xabcdef01, SI_COHERENCY_SHADER, L2_BYPASS); - si_cp_dma_clear_buffer(sctx, src, 4, 4, 0x23456789, SI_COHERENCY_SHADER, L2_BYPASS); - si_cp_dma_clear_buffer(sctx, src, 8, 4, 0x87654321, SI_COHERENCY_SHADER, L2_BYPASS); - si_cp_dma_clear_buffer(sctx, src, 12, 4, 0xfedcba98, SI_COHERENCY_SHADER, L2_BYPASS); - si_cp_dma_clear_buffer(sctx, dst, 0, 16, 0xdeadbeef, SI_COHERENCY_SHADER, L2_BYPASS); + si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 0, 4, 0xabcdef01, 0, SI_COHERENCY_SHADER, L2_BYPASS); + si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 4, 4, 0x23456789, 0, SI_COHERENCY_SHADER, L2_BYPASS); + si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 8, 4, 0x87654321, 0, SI_COHERENCY_SHADER, L2_BYPASS); + si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, src, 12, 4, 0xfedcba98, 0, SI_COHERENCY_SHADER, L2_BYPASS); + si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, 0, 16, 0xdeadbeef, 0, SI_COHERENCY_SHADER, L2_BYPASS); si_cp_dma_copy_buffer(sctx, NULL, src, offset, 0, 16, 0, SI_COHERENCY_NONE, L2_BYPASS); si_cp_dma_copy_buffer(sctx, dst, NULL, 0, offset, 16, 0, SI_COHERENCY_NONE, L2_BYPASS); @@ -567,7 +569,7 @@ r[0] == 0xabcdef01 && r[1] == 0x23456789 && r[2] == 0x87654321 && r[3] == 0xfedcba98 ? "pass" : "fail"); - si_cp_dma_clear_buffer(sctx, NULL, offset, 16, 0xc1ea4146, SI_COHERENCY_NONE, L2_BYPASS); + si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, NULL, offset, 16, 0xc1ea4146, 0, SI_COHERENCY_NONE, L2_BYPASS); si_cp_dma_copy_buffer(sctx, dst, NULL, 0, offset, 16, 0, SI_COHERENCY_NONE, L2_BYPASS); pipe_buffer_read(ctx, dst, 0, sizeof(r), r); @@ -579,3 +581,28 @@ pipe_resource_reference(&dst, NULL); exit(0); } + +void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, + unsigned offset, unsigned size, unsigned dst_sel, + unsigned engine, const void *data) +{ + struct radeon_cmdbuf *cs = sctx->gfx_cs; + + assert(offset % 4 == 0); + assert(size % 4 == 0); + + if (sctx->chip_class == SI && dst_sel == V_370_MEM) + dst_sel = V_370_MEM_GRBM; + + radeon_add_to_buffer_list(sctx, cs, buf, + RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA); + uint64_t va = buf->gpu_address + offset; + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + size/4, 0)); + radeon_emit(cs, S_370_DST_SEL(dst_sel) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(engine)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit_array(cs, (const uint32_t*)data, size/4); +} diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_debug.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_debug.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_debug.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_debug.c 2019-03-31 23:16:37.000000000 +0000 @@ -93,7 +93,7 @@ void si_destroy_saved_cs(struct si_saved_cs *scs) { si_clear_saved_cs(&scs->gfx); - r600_resource_reference(&scs->trace_buf, NULL); + si_resource_reference(&scs->trace_buf, NULL); free(scs); } @@ -612,7 +612,7 @@ uint32_t *gpu_list; /** Reference of buffer where the list is uploaded, so that gpu_list * is kept live. */ - struct r600_resource *buf; + struct si_resource *buf; const char *shader_name; const char *elem_name; @@ -628,7 +628,7 @@ si_log_chunk_desc_list_destroy(void *data) { struct si_log_chunk_desc_list *chunk = data; - r600_resource_reference(&chunk->buf, NULL); + si_resource_reference(&chunk->buf, NULL); FREE(chunk); } @@ -747,7 +747,7 @@ chunk->slot_remap = slot_remap; chunk->chip_class = screen->info.chip_class; - r600_resource_reference(&chunk->buf, desc->buffer); + si_resource_reference(&chunk->buf, desc->buffer); chunk->gpu_list = desc->gpu_list; for (unsigned i = 0; i < num_elements; ++i) { @@ -1052,23 +1052,30 @@ void si_log_draw_state(struct si_context *sctx, struct u_log_context *log) { + struct si_shader_ctx_state *tcs_shader; + if (!log) return; + tcs_shader = &sctx->tcs_shader; + if (sctx->tes_shader.cso && !sctx->tcs_shader.cso) + tcs_shader = &sctx->fixed_func_tcs_shader; + si_dump_framebuffer(sctx, log); si_dump_gfx_shader(sctx, &sctx->vs_shader, log); - si_dump_gfx_shader(sctx, &sctx->tcs_shader, log); + si_dump_gfx_shader(sctx, tcs_shader, log); si_dump_gfx_shader(sctx, &sctx->tes_shader, log); si_dump_gfx_shader(sctx, &sctx->gs_shader, log); si_dump_gfx_shader(sctx, &sctx->ps_shader, log); si_dump_descriptor_list(sctx->screen, &sctx->descriptors[SI_DESCS_RW_BUFFERS], - "", "RW buffers", 4, SI_NUM_RW_BUFFERS, + "", "RW buffers", 4, + sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots, si_identity, log); si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log); - si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log); + si_dump_gfx_descriptors(sctx, tcs_shader, log); si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log); si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log); si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_descriptors.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_descriptors.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_descriptors.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_descriptors.c 2019-03-31 23:16:37.000000000 +0000 @@ -134,7 +134,7 @@ static void si_release_descriptors(struct si_descriptors *desc) { - r600_resource_reference(&desc->buffer, NULL); + si_resource_reference(&desc->buffer, NULL); FREE(desc->list); } @@ -159,7 +159,7 @@ desc->element_dw_size]; /* The buffer is already in the buffer list. */ - r600_resource_reference(&desc->buffer, NULL); + si_resource_reference(&desc->buffer, NULL); desc->gpu_list = NULL; desc->gpu_address = si_desc_extract_buffer_address(descriptor); si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); @@ -209,7 +209,7 @@ /* SAMPLER VIEWS */ static inline enum radeon_bo_priority -si_get_sampler_view_priority(struct r600_resource *res) +si_get_sampler_view_priority(struct si_resource *res) { if (res->b.b.target == PIPE_BUFFER) return RADEON_PRIO_SAMPLER_BUFFER; @@ -290,7 +290,7 @@ } /* Set buffer descriptor fields that can be changed by reallocations. */ -static void si_set_buf_desc_address(struct r600_resource *buf, +static void si_set_buf_desc_address(struct si_resource *buf, uint64_t offset, uint32_t *state) { uint64_t va = buf->gpu_address + offset; @@ -497,7 +497,7 @@ bool disallow_early_out) { struct si_samplers *samplers = &sctx->samplers[shader]; - struct si_sampler_view *rview = (struct si_sampler_view*)view; + struct si_sampler_view *sview = (struct si_sampler_view*)view; struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); unsigned desc_slot = si_get_sampler_slot(slot); uint32_t *desc = descs->list + desc_slot * 16; @@ -508,7 +508,7 @@ if (view) { struct si_texture *tex = (struct si_texture *)view->texture; - si_set_sampler_view_desc(sctx, rview, + si_set_sampler_view_desc(sctx, sview, samplers->sampler_states[slot], desc); if (tex->buffer.b.b.target == PIPE_BUFFER) { @@ -539,7 +539,7 @@ * updated. */ si_sampler_view_add_buffer(sctx, view->texture, RADEON_USAGE_READ, - rview->is_stencil_sampler, true); + sview->is_stencil_sampler, true); } else { pipe_sampler_view_reference(&samplers->views[slot], NULL); memcpy(desc, null_texture_descriptor, 8*4); @@ -667,7 +667,7 @@ static void si_mark_image_range_valid(const struct pipe_image_view *view) { - struct r600_resource *res = r600_resource(view->resource); + struct si_resource *res = si_resource(view->resource); assert(res && res->b.b.target == PIPE_BUFFER); @@ -682,9 +682,9 @@ uint32_t *desc, uint32_t *fmask_desc) { struct si_screen *screen = ctx->screen; - struct r600_resource *res; + struct si_resource *res; - res = r600_resource(view->resource); + res = si_resource(view->resource); if (res->b.b.target == PIPE_BUFFER) { if (view->access & PIPE_IMAGE_ACCESS_WRITE) @@ -771,7 +771,7 @@ { struct si_images *images = &ctx->images[shader]; struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader); - struct r600_resource *res; + struct si_resource *res; unsigned desc_slot = si_get_image_slot(slot); uint32_t *desc = descs->list + desc_slot * 8; @@ -780,7 +780,7 @@ return; } - res = r600_resource(view->resource); + res = si_resource(view->resource); if (&images->views[slot] != view) util_copy_image_view(&images->views[slot], view); @@ -1026,7 +1026,7 @@ int i = u_bit_scan(&mask); radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(buffers->buffers[i]), + si_resource(buffers->buffers[i]), i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage : buffers->shader_usage_constbuf, i < SI_NUM_SHADER_BUFFERS ? buffers->priority : @@ -1041,7 +1041,7 @@ { pipe_resource_reference(buf, buffers->buffers[idx]); if (*buf) { - struct r600_resource *res = r600_resource(*buf); + struct si_resource *res = si_resource(*buf); const uint32_t *desc = descs->list + idx * 4; uint64_t va; @@ -1071,7 +1071,7 @@ continue; radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(sctx->vertex_buffer[vb].buffer.resource), + si_resource(sctx->vertex_buffer[vb].buffer.resource), RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); } @@ -1126,22 +1126,22 @@ for (i = 0; i < count; i++) { struct pipe_vertex_buffer *vb; - struct r600_resource *rbuffer; + struct si_resource *buf; unsigned vbo_index = velems->vertex_buffer_index[i]; uint32_t *desc = &ptr[i*4]; vb = &sctx->vertex_buffer[vbo_index]; - rbuffer = r600_resource(vb->buffer.resource); - if (!rbuffer) { + buf = si_resource(vb->buffer.resource); + if (!buf) { memset(desc, 0, 16); continue; } int64_t offset = (int64_t)((int)vb->buffer_offset) + velems->src_offset[i]; - uint64_t va = rbuffer->gpu_address + offset; + uint64_t va = buf->gpu_address + offset; - int64_t num_records = (int64_t)rbuffer->b.b.width0 - offset; + int64_t num_records = (int64_t)buf->b.b.width0 - offset; if (sctx->chip_class != VI && vb->stride) { /* Round up by rounding down and adding 1 */ num_records = (num_records - velems->format_size[i]) / @@ -1157,7 +1157,7 @@ if (first_vb_use_mask & (1 << i)) { radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(vb->buffer.resource), + si_resource(vb->buffer.resource), RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); } } @@ -1189,7 +1189,7 @@ return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)]; } -void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, +void si_upload_const_buffer(struct si_context *sctx, struct si_resource **buf, const uint8_t *ptr, unsigned size, uint32_t *const_offset) { void *tmp; @@ -1197,8 +1197,8 @@ u_upload_alloc(sctx->b.const_uploader, 0, size, si_optimal_tcc_alignment(sctx, size), const_offset, - (struct pipe_resource**)rbuffer, &tmp); - if (*rbuffer) + (struct pipe_resource**)buf, &tmp); + if (*buf) util_memcpy_cpu_to_le32(tmp, ptr, size); } @@ -1226,19 +1226,17 @@ unsigned buffer_offset; si_upload_const_buffer(sctx, - (struct r600_resource**)&buffer, input->user_buffer, + (struct si_resource**)&buffer, input->user_buffer, input->buffer_size, &buffer_offset); if (!buffer) { /* Just unbind on failure. */ si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL); return; } - va = r600_resource(buffer)->gpu_address + buffer_offset; + va = si_resource(buffer)->gpu_address + buffer_offset; } else { pipe_resource_reference(&buffer, input->buffer); - va = r600_resource(buffer)->gpu_address + input->buffer_offset; - /* Only track usage for non-user buffers. */ - r600_resource(buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER; + va = si_resource(buffer)->gpu_address + input->buffer_offset; } /* Set the descriptor. */ @@ -1256,7 +1254,7 @@ buffers->buffers[slot] = buffer; radeon_add_to_gfx_buffer_list_check_mem(sctx, - r600_resource(buffer), + si_resource(buffer), buffers->shader_usage_constbuf, buffers->priority_constbuf, true); buffers->enabled_mask |= 1u << slot; @@ -1269,13 +1267,6 @@ sctx->descriptors_dirty |= 1u << descriptors_idx; } -void si_set_rw_buffer(struct si_context *sctx, - uint slot, const struct pipe_constant_buffer *input) -{ - si_set_constant_buffer(sctx, &sctx->rw_buffers, - SI_DESCS_RW_BUFFERS, slot, input); -} - static void si_pipe_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type shader, uint slot, const struct pipe_constant_buffer *input) @@ -1286,11 +1277,14 @@ return; if (slot == 0 && input && input->buffer && - !(r600_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) { + !(si_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) { assert(!"constant buffer 0 must have a 32-bit VM address, use const_uploader"); return; } + if (input && input->buffer) + si_resource(input->buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER; + slot = si_get_constbuf_slot(slot); si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), @@ -1310,6 +1304,49 @@ /* SHADER BUFFERS */ +static void si_set_shader_buffer(struct si_context *sctx, + struct si_buffer_resources *buffers, + unsigned descriptors_idx, + uint slot, const struct pipe_shader_buffer *sbuffer, + enum radeon_bo_priority priority) +{ + struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; + uint32_t *desc = descs->list + slot * 4; + + if (!sbuffer || !sbuffer->buffer) { + pipe_resource_reference(&buffers->buffers[slot], NULL); + memset(desc, 0, sizeof(uint32_t) * 4); + buffers->enabled_mask &= ~(1u << slot); + sctx->descriptors_dirty |= 1u << descriptors_idx; + return; + } + + struct si_resource *buf = si_resource(sbuffer->buffer); + uint64_t va = buf->gpu_address + sbuffer->buffer_offset; + + desc[0] = va; + desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | + S_008F04_STRIDE(0); + desc[2] = sbuffer->buffer_size; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + + pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); + radeon_add_to_gfx_buffer_list_check_mem(sctx, buf, + buffers->shader_usage, + priority, true); + + buffers->enabled_mask |= 1u << slot; + sctx->descriptors_dirty |= 1u << descriptors_idx; + + util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset, + sbuffer->buffer_offset + sbuffer->buffer_size); +} + static void si_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader, unsigned start_slot, unsigned count, @@ -1317,53 +1354,20 @@ { struct si_context *sctx = (struct si_context *)ctx; struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader]; - struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader); + unsigned descriptors_idx = si_const_and_shader_buffer_descriptors_idx(shader); unsigned i; assert(start_slot + count <= SI_NUM_SHADER_BUFFERS); for (i = 0; i < count; ++i) { const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL; - struct r600_resource *buf; unsigned slot = si_get_shaderbuf_slot(start_slot + i); - uint32_t *desc = descs->list + slot * 4; - uint64_t va; - if (!sbuffer || !sbuffer->buffer) { - pipe_resource_reference(&buffers->buffers[slot], NULL); - memset(desc, 0, sizeof(uint32_t) * 4); - buffers->enabled_mask &= ~(1u << slot); - sctx->descriptors_dirty |= - 1u << si_const_and_shader_buffer_descriptors_idx(shader); - continue; - } - - buf = r600_resource(sbuffer->buffer); - va = buf->gpu_address + sbuffer->buffer_offset; - - desc[0] = va; - desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | - S_008F04_STRIDE(0); - desc[2] = sbuffer->buffer_size; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + if (sbuffer && sbuffer->buffer) + si_resource(sbuffer->buffer)->bind_history |= PIPE_BIND_SHADER_BUFFER; - pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); - radeon_add_to_gfx_buffer_list_check_mem(sctx, buf, - buffers->shader_usage, - buffers->priority, true); - buf->bind_history |= PIPE_BIND_SHADER_BUFFER; - - buffers->enabled_mask |= 1u << slot; - sctx->descriptors_dirty |= - 1u << si_const_and_shader_buffer_descriptors_idx(shader); - - util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset, - sbuffer->buffer_offset + sbuffer->buffer_size); + si_set_shader_buffer(sctx, buffers, descriptors_idx, slot, sbuffer, + buffers->priority); } } @@ -1386,6 +1390,20 @@ /* RING BUFFERS */ +void si_set_rw_buffer(struct si_context *sctx, + uint slot, const struct pipe_constant_buffer *input) +{ + si_set_constant_buffer(sctx, &sctx->rw_buffers, SI_DESCS_RW_BUFFERS, + slot, input); +} + +void si_set_rw_shader_buffer(struct si_context *sctx, uint slot, + const struct pipe_shader_buffer *sbuffer) +{ + si_set_shader_buffer(sctx, &sctx->rw_buffers, SI_DESCS_RW_BUFFERS, + slot, sbuffer, RADEON_PRIO_SHADER_RW_BUFFER); +} + void si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource *buffer, unsigned stride, unsigned num_records, @@ -1404,7 +1422,7 @@ if (buffer) { uint64_t va; - va = r600_resource(buffer)->gpu_address + offset; + va = si_resource(buffer)->gpu_address + offset; switch (element_size) { default: @@ -1468,7 +1486,7 @@ pipe_resource_reference(&buffers->buffers[slot], buffer); radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(buffer), + si_resource(buffer), buffers->shader_usage, buffers->priority); buffers->enabled_mask |= 1u << slot; } else { @@ -1490,7 +1508,7 @@ uint64_t offset_within_buffer = old_desc_va - old_buf_va; /* Update the descriptor. */ - si_set_buf_desc_address(r600_resource(new_buf), offset_within_buffer, + si_set_buf_desc_address(si_resource(new_buf), offset_within_buffer, desc); } @@ -1593,7 +1611,7 @@ sctx->descriptors_dirty |= 1u << descriptors_idx; radeon_add_to_gfx_buffer_list_check_mem(sctx, - r600_resource(buf), + si_resource(buf), usage, priority, true); } } @@ -1605,7 +1623,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, uint64_t old_va) { - struct r600_resource *rbuffer = r600_resource(buf); + struct si_resource *buffer = si_resource(buf); unsigned i, shader; unsigned num_elems = sctx->vertex_elements ? sctx->vertex_elements->count : 0; @@ -1617,7 +1635,7 @@ */ /* Vertex buffers. */ - if (rbuffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { + if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { for (i = 0; i < num_elems; i++) { int vb = sctx->vertex_elements->vertex_buffer_index[i]; @@ -1634,7 +1652,7 @@ } /* Streamout buffers. (other internal buffers can't be invalidated) */ - if (rbuffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { + if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) { struct si_buffer_resources *buffers = &sctx->rw_buffers; struct si_descriptors *descs = @@ -1648,7 +1666,7 @@ sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; radeon_add_to_gfx_buffer_list_check_mem(sctx, - rbuffer, buffers->shader_usage, + buffer, buffers->shader_usage, RADEON_PRIO_SHADER_RW_BUFFER, true); @@ -1662,7 +1680,7 @@ } /* Constant and shader buffers. */ - if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { + if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), @@ -1672,7 +1690,7 @@ sctx->const_and_shader_buffers[shader].priority_constbuf); } - if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) { + if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), @@ -1682,7 +1700,7 @@ sctx->const_and_shader_buffers[shader].priority); } - if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { + if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { /* Texture buffers - update bindings. */ for (shader = 0; shader < SI_NUM_SHADERS; shader++) { struct si_samplers *samplers = &sctx->samplers[shader]; @@ -1702,7 +1720,7 @@ 1u << si_sampler_and_image_descriptors_idx(shader); radeon_add_to_gfx_buffer_list_check_mem(sctx, - rbuffer, RADEON_USAGE_READ, + buffer, RADEON_USAGE_READ, RADEON_PRIO_SAMPLER_BUFFER, true); } @@ -1711,7 +1729,7 @@ } /* Shader images */ - if (rbuffer->bind_history & PIPE_BIND_SHADER_IMAGE) { + if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) { for (shader = 0; shader < SI_NUM_SHADERS; ++shader) { struct si_images *images = &sctx->images[shader]; struct si_descriptors *descs = @@ -1734,7 +1752,7 @@ 1u << si_sampler_and_image_descriptors_idx(shader); radeon_add_to_gfx_buffer_list_check_mem( - sctx, rbuffer, + sctx, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true); } @@ -1743,7 +1761,7 @@ } /* Bindless texture handles */ - if (rbuffer->texture_handle_allocated) { + if (buffer->texture_handle_allocated) { struct si_descriptors *descs = &sctx->bindless_descriptors; util_dynarray_foreach(&sctx->resident_tex_handles, @@ -1752,7 +1770,7 @@ unsigned desc_slot = (*tex_handle)->desc_slot; if (view->texture == buf) { - si_set_buf_desc_address(rbuffer, + si_set_buf_desc_address(buffer, view->u.buf.offset, descs->list + desc_slot * 16 + 4); @@ -1761,7 +1779,7 @@ sctx->bindless_descriptors_dirty = true; radeon_add_to_gfx_buffer_list_check_mem( - sctx, rbuffer, + sctx, buffer, RADEON_USAGE_READ, RADEON_PRIO_SAMPLER_BUFFER, true); } @@ -1769,7 +1787,7 @@ } /* Bindless image handles */ - if (rbuffer->image_handle_allocated) { + if (buffer->image_handle_allocated) { struct si_descriptors *descs = &sctx->bindless_descriptors; util_dynarray_foreach(&sctx->resident_img_handles, @@ -1781,7 +1799,7 @@ if (view->access & PIPE_IMAGE_ACCESS_WRITE) si_mark_image_range_valid(view); - si_set_buf_desc_address(rbuffer, + si_set_buf_desc_address(buffer, view->u.buf.offset, descs->list + desc_slot * 16 + 4); @@ -1790,7 +1808,7 @@ sctx->bindless_descriptors_dirty = true; radeon_add_to_gfx_buffer_list_check_mem( - sctx, rbuffer, + sctx, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true); } @@ -1803,7 +1821,6 @@ unsigned num_dwords) { struct si_descriptors *desc = &sctx->bindless_descriptors; - struct radeon_cmdbuf *cs = sctx->gfx_cs; unsigned desc_slot_offset = desc_slot * 16; uint32_t *data; uint64_t va; @@ -1811,13 +1828,8 @@ data = desc->list + desc_slot_offset; va = desc->gpu_address + desc_slot_offset * 4; - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + num_dwords, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_TC_L2) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit_array(cs, data, num_dwords); + si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, + num_dwords * 4, V_370_TC_L2, V_370_ME, data); } static void si_upload_bindless_descriptors(struct si_context *sctx) @@ -2055,7 +2067,7 @@ unsigned sh_offset, unsigned pointer_count) { - radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (HAVE_32BIT_POINTERS ? 1 : 2), 0)); + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count, 0)); radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2); } @@ -2065,10 +2077,7 @@ { radeon_emit(cs, va); - if (HAVE_32BIT_POINTERS) - assert(va == 0 || (va >> 32) == sscreen->info.address32_hi); - else - radeon_emit(cs, va >> 32); + assert(va == 0 || (va >> 32) == sscreen->info.address32_hi); } static void si_emit_shader_pointer(struct si_context *sctx, @@ -2106,25 +2115,6 @@ } } -static void si_emit_disjoint_shader_pointers(struct si_context *sctx, - unsigned pointer_mask, - unsigned sh_base) -{ - if (!sh_base) - return; - - struct radeon_cmdbuf *cs = sctx->gfx_cs; - unsigned mask = sctx->shader_pointers_dirty & pointer_mask; - - while (mask) { - struct si_descriptors *descs = &sctx->descriptors[u_bit_scan(&mask)]; - unsigned sh_offset = sh_base + descs->shader_userdata_offset; - - si_emit_shader_pointer_head(cs, sh_offset, 1); - si_emit_shader_pointer_body(sctx->screen, cs, descs->gpu_address); - } -} - static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs) { @@ -2164,17 +2154,10 @@ sh_base[PIPE_SHADER_TESS_EVAL]); si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT), sh_base[PIPE_SHADER_FRAGMENT]); - if (HAVE_32BIT_POINTERS || sctx->chip_class <= VI) { - si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), - sh_base[PIPE_SHADER_TESS_CTRL]); - si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), - sh_base[PIPE_SHADER_GEOMETRY]); - } else { - si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), - sh_base[PIPE_SHADER_TESS_CTRL]); - si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), - sh_base[PIPE_SHADER_GEOMETRY]); - } + si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), + sh_base[PIPE_SHADER_TESS_CTRL]); + si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), + sh_base[PIPE_SHADER_GEOMETRY]); sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE); @@ -2313,7 +2296,7 @@ bool *desc_dirty) { struct si_descriptors *desc = &sctx->bindless_descriptors; - struct r600_resource *buf = r600_resource(resource); + struct si_resource *buf = si_resource(resource); unsigned desc_slot_offset = desc_slot * 16; uint32_t *desc_list = desc->list + desc_slot_offset + 4; uint64_t old_desc_va; @@ -2379,7 +2362,7 @@ pipe_sampler_view_reference(&tex_handle->view, view); - r600_resource(sview->base.texture)->texture_handle_allocated = true; + si_resource(sview->base.texture)->texture_handle_allocated = true; return handle; } @@ -2525,7 +2508,7 @@ util_copy_image_view(&img_handle->view, view); - r600_resource(view->resource)->image_handle_allocated = true; + si_resource(view->resource)->image_handle_allocated = true; return handle; } @@ -2555,7 +2538,7 @@ struct si_context *sctx = (struct si_context *)ctx; struct si_image_handle *img_handle; struct pipe_image_view *view; - struct r600_resource *res; + struct si_resource *res; struct hash_entry *entry; entry = _mesa_hash_table_search(sctx->img_handles, @@ -2565,7 +2548,7 @@ img_handle = (struct si_image_handle *)entry->data; view = &img_handle->view; - res = r600_resource(view->resource); + res = si_resource(view->resource); if (resident) { if (res->b.b.target != PIPE_BUFFER) { @@ -2665,10 +2648,6 @@ { int i; -#if !HAVE_32BIT_POINTERS - STATIC_ASSERT(GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES % 2 == 0); -#endif - for (i = 0; i < SI_NUM_SHADERS; i++) { bool is_2nd = sctx->chip_class >= GFX9 && (i == PIPE_SHADER_TESS_CTRL || @@ -2699,7 +2678,6 @@ desc->slot_index_to_bind_directly = si_get_constbuf_slot(0); if (is_2nd) { -#if HAVE_32BIT_POINTERS if (i == PIPE_SHADER_TESS_CTRL) { rel_dw_offset = (R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4; @@ -2707,9 +2685,6 @@ rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4; } -#else - rel_dw_offset = GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES; -#endif } else { rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES; } @@ -2831,7 +2806,7 @@ for (i = 0; i < SI_NUM_DESCS; ++i) si_release_descriptors(&sctx->descriptors[i]); - r600_resource_reference(&sctx->vb_descriptors_buffer, NULL); + si_resource_reference(&sctx->vb_descriptors_buffer, NULL); sctx->vb_descriptors_gpu_list = NULL; /* points into a mapped buffer */ si_release_bindless_descriptors(sctx); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_dma.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_dma.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_dma.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_dma.c 2019-03-31 23:16:37.000000000 +0000 @@ -37,17 +37,17 @@ { struct radeon_cmdbuf *cs = ctx->dma_cs; unsigned i, ncopy, count, max_size, sub_cmd, shift; - struct r600_resource *rdst = r600_resource(dst); - struct r600_resource *rsrc = r600_resource(src); + struct si_resource *sdst = si_resource(dst); + struct si_resource *ssrc = si_resource(src); /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ - util_range_add(&rdst->valid_buffer_range, dst_offset, + util_range_add(&sdst->valid_buffer_range, dst_offset, dst_offset + size); - dst_offset += rdst->gpu_address; - src_offset += rsrc->gpu_address; + dst_offset += sdst->gpu_address; + src_offset += ssrc->gpu_address; /* see whether we should use the dword-aligned or byte-aligned copy */ if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) { @@ -61,7 +61,7 @@ } ncopy = DIV_ROUND_UP(size, max_size); - si_need_dma_space(ctx, ncopy * 5, rdst, rsrc); + si_need_dma_space(ctx, ncopy * 5, sdst, ssrc); for (i = 0; i < ncopy; i++) { count = MIN2(size, max_size); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_dma_cs.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_dma_cs.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_dma_cs.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_dma_cs.c 2019-03-31 23:16:37.000000000 +0000 @@ -36,7 +36,7 @@ radeon_emit(cs, 0xf0000000); /* NOP */ } -void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst, +void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst, uint64_t offset) { struct radeon_cmdbuf *cs = sctx->dma_cs; @@ -69,7 +69,7 @@ { struct radeon_cmdbuf *cs = sctx->dma_cs; unsigned i, ncopy, csize; - struct r600_resource *rdst = r600_resource(dst); + struct si_resource *sdst = si_resource(dst); assert(offset % 4 == 0); assert(size); @@ -83,14 +83,14 @@ /* Mark the buffer range of destination as valid (initialized), * so that transfer_map knows it should wait for the GPU when mapping * that range. */ - util_range_add(&rdst->valid_buffer_range, offset, offset + size); + util_range_add(&sdst->valid_buffer_range, offset, offset + size); - offset += rdst->gpu_address; + offset += sdst->gpu_address; if (sctx->chip_class == SI) { /* the same maximum size as for copying */ ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE); - si_need_dma_space(sctx, ncopy * 4, rdst, NULL); + si_need_dma_space(sctx, ncopy * 4, sdst, NULL); for (i = 0; i < ncopy; i++) { csize = MIN2(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE); @@ -108,7 +108,7 @@ /* The following code is for CI, VI, Vega/Raven, etc. */ /* the same maximum size as for copying */ ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); - si_need_dma_space(sctx, ncopy * 5, rdst, NULL); + si_need_dma_space(sctx, ncopy * 5, sdst, NULL); for (i = 0; i < ncopy; i++) { csize = MIN2(size, CIK_SDMA_COPY_MAX_SIZE); @@ -124,7 +124,7 @@ } void si_need_dma_space(struct si_context *ctx, unsigned num_dw, - struct r600_resource *dst, struct r600_resource *src) + struct si_resource *dst, struct si_resource *src) { uint64_t vram = ctx->dma_cs->used_vram; uint64_t gtt = ctx->dma_cs->used_gart; diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_fence.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_fence.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_fence.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_fence.c 2019-03-31 23:16:37.000000000 +0000 @@ -33,7 +33,7 @@ #include "si_build_pm4.h" struct si_fine_fence { - struct r600_resource *buf; + struct si_resource *buf; unsigned offset; }; @@ -69,7 +69,7 @@ void si_cp_release_mem(struct si_context *ctx, unsigned event, unsigned event_flags, unsigned dst_sel, unsigned int_sel, unsigned data_sel, - struct r600_resource *buf, uint64_t va, + struct si_resource *buf, uint64_t va, uint32_t new_fence, unsigned query_type) { struct radeon_cmdbuf *cs = ctx->gfx_cs; @@ -93,7 +93,7 @@ query_type != PIPE_QUERY_OCCLUSION_COUNTER && query_type != PIPE_QUERY_OCCLUSION_PREDICATE && query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { - struct r600_resource *scratch = ctx->eop_bug_scratch; + struct si_resource *scratch = ctx->eop_bug_scratch; assert(16 * ctx->screen->info.num_render_backends <= scratch->b.b.width0); @@ -117,7 +117,7 @@ } else { if (ctx->chip_class == CIK || ctx->chip_class == VI) { - struct r600_resource *scratch = ctx->eop_bug_scratch; + struct si_resource *scratch = ctx->eop_bug_scratch; uint64_t va = scratch->gpu_address; /* Two EOP events are required to make all engines go idle @@ -160,13 +160,11 @@ return dwords; } -void si_cp_wait_mem(struct si_context *ctx, +void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t va, uint32_t ref, uint32_t mask, unsigned flags) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1) | flags); + radeon_emit(cs, WAIT_REG_MEM_MEM_SPACE(1) | flags); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, ref); /* reference value */ @@ -195,17 +193,17 @@ struct pipe_fence_handle *src) { struct radeon_winsys *ws = ((struct si_screen*)screen)->ws; - struct si_multi_fence **rdst = (struct si_multi_fence **)dst; - struct si_multi_fence *rsrc = (struct si_multi_fence *)src; + struct si_multi_fence **sdst = (struct si_multi_fence **)dst; + struct si_multi_fence *ssrc = (struct si_multi_fence *)src; - if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { - ws->fence_reference(&(*rdst)->gfx, NULL); - ws->fence_reference(&(*rdst)->sdma, NULL); - tc_unflushed_batch_token_reference(&(*rdst)->tc_token, NULL); - r600_resource_reference(&(*rdst)->fine.buf, NULL); - FREE(*rdst); + if (pipe_reference(&(*sdst)->reference, &ssrc->reference)) { + ws->fence_reference(&(*sdst)->gfx, NULL); + ws->fence_reference(&(*sdst)->sdma, NULL); + tc_unflushed_batch_token_reference(&(*sdst)->tc_token, NULL); + si_resource_reference(&(*sdst)->fine.buf, NULL); + FREE(*sdst); } - *rdst = rsrc; + *sdst = ssrc; } static struct si_multi_fence *si_create_multi_fence() @@ -261,24 +259,19 @@ *fence_ptr = 0; - uint64_t fence_va = fine->buf->gpu_address + fine->offset; - - radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); if (flags & PIPE_FLUSH_TOP_OF_PIPE) { - struct radeon_cmdbuf *cs = ctx->gfx_cs; - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cs, fence_va); - radeon_emit(cs, fence_va >> 32); - radeon_emit(cs, 0x80000000); + uint32_t value = 0x80000000; + + si_cp_write_data(ctx, fine->buf, fine->offset, 4, + V_370_MEM, V_370_PFP, &value); } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) { + uint64_t fence_va = fine->buf->gpu_address + fine->offset; + + radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf, + RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); si_cp_release_mem(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, NULL, fence_va, 0x80000000, PIPE_QUERY_GPU_FINISHED); @@ -293,15 +286,15 @@ uint64_t timeout) { struct radeon_winsys *rws = ((struct si_screen*)screen)->ws; - struct si_multi_fence *rfence = (struct si_multi_fence *)fence; + struct si_multi_fence *sfence = (struct si_multi_fence *)fence; struct si_context *sctx; int64_t abs_timeout = os_time_get_absolute_timeout(timeout); ctx = threaded_context_unwrap_sync(ctx); sctx = (struct si_context*)(ctx ? ctx : NULL); - if (!util_queue_fence_is_signalled(&rfence->ready)) { - if (rfence->tc_token) { + if (!util_queue_fence_is_signalled(&sfence->ready)) { + if (sfence->tc_token) { /* Ensure that si_flush_from_st will be called for * this fence, but only if we're in the API thread * where the context is current. @@ -310,7 +303,7 @@ * be in flight in the driver thread, so the fence * may not be ready yet when this call returns. */ - threaded_context_flush(ctx, rfence->tc_token, + threaded_context_flush(ctx, sfence->tc_token, timeout == 0); } @@ -318,9 +311,9 @@ return false; if (timeout == PIPE_TIMEOUT_INFINITE) { - util_queue_fence_wait(&rfence->ready); + util_queue_fence_wait(&sfence->ready); } else { - if (!util_queue_fence_wait_timeout(&rfence->ready, abs_timeout)) + if (!util_queue_fence_wait_timeout(&sfence->ready, abs_timeout)) return false; } @@ -330,8 +323,8 @@ } } - if (rfence->sdma) { - if (!rws->fence_wait(rws, rfence->sdma, timeout)) + if (sfence->sdma) { + if (!rws->fence_wait(rws, sfence->sdma, timeout)) return false; /* Recompute the timeout after waiting. */ @@ -341,19 +334,19 @@ } } - if (!rfence->gfx) + if (!sfence->gfx) return true; - if (rfence->fine.buf && - si_fine_fence_signaled(rws, &rfence->fine)) { - rws->fence_reference(&rfence->gfx, NULL); - r600_resource_reference(&rfence->fine.buf, NULL); + if (sfence->fine.buf && + si_fine_fence_signaled(rws, &sfence->fine)) { + rws->fence_reference(&sfence->gfx, NULL); + si_resource_reference(&sfence->fine.buf, NULL); return true; } /* Flush the gfx IB if it hasn't been flushed yet. */ - if (sctx && rfence->gfx_unflushed.ctx == sctx && - rfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) { + if (sctx && sfence->gfx_unflushed.ctx == sctx && + sfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) { /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile) * spec says: * @@ -380,7 +373,7 @@ (timeout ? 0 : PIPE_FLUSH_ASYNC) | RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL); - rfence->gfx_unflushed.ctx = NULL; + sfence->gfx_unflushed.ctx = NULL; if (!timeout) return false; @@ -392,13 +385,13 @@ } } - if (rws->fence_wait(rws, rfence->gfx, timeout)) + if (rws->fence_wait(rws, sfence->gfx, timeout)) return true; /* Re-check in case the GPU is slow or hangs, but the commands before * the fine-grained fence have completed. */ - if (rfence->fine.buf && - si_fine_fence_signaled(rws, &rfence->fine)) + if (sfence->fine.buf && + si_fine_fence_signaled(rws, &sfence->fine)) return true; return false; @@ -410,12 +403,12 @@ { struct si_screen *sscreen = (struct si_screen*)ctx->screen; struct radeon_winsys *ws = sscreen->ws; - struct si_multi_fence *rfence; + struct si_multi_fence *sfence; *pfence = NULL; - rfence = si_create_multi_fence(); - if (!rfence) + sfence = si_create_multi_fence(); + if (!sfence) return; switch (type) { @@ -423,14 +416,14 @@ if (!sscreen->info.has_fence_to_handle) goto finish; - rfence->gfx = ws->fence_import_sync_file(ws, fd); + sfence->gfx = ws->fence_import_sync_file(ws, fd); break; case PIPE_FD_TYPE_SYNCOBJ: if (!sscreen->info.has_syncobj) goto finish; - rfence->gfx = ws->fence_import_syncobj(ws, fd); + sfence->gfx = ws->fence_import_syncobj(ws, fd); break; default: @@ -438,12 +431,12 @@ } finish: - if (!rfence->gfx) { - FREE(rfence); + if (!sfence->gfx) { + FREE(sfence); return; } - *pfence = (struct pipe_fence_handle*)rfence; + *pfence = (struct pipe_fence_handle*)sfence; } static int si_fence_get_fd(struct pipe_screen *screen, @@ -451,26 +444,26 @@ { struct si_screen *sscreen = (struct si_screen*)screen; struct radeon_winsys *ws = sscreen->ws; - struct si_multi_fence *rfence = (struct si_multi_fence *)fence; + struct si_multi_fence *sfence = (struct si_multi_fence *)fence; int gfx_fd = -1, sdma_fd = -1; if (!sscreen->info.has_fence_to_handle) return -1; - util_queue_fence_wait(&rfence->ready); + util_queue_fence_wait(&sfence->ready); /* Deferred fences aren't supported. */ - assert(!rfence->gfx_unflushed.ctx); - if (rfence->gfx_unflushed.ctx) + assert(!sfence->gfx_unflushed.ctx); + if (sfence->gfx_unflushed.ctx) return -1; - if (rfence->sdma) { - sdma_fd = ws->fence_export_sync_file(ws, rfence->sdma); + if (sfence->sdma) { + sdma_fd = ws->fence_export_sync_file(ws, sfence->sdma); if (sdma_fd == -1) return -1; } - if (rfence->gfx) { - gfx_fd = ws->fence_export_sync_file(ws, rfence->gfx); + if (sfence->gfx) { + gfx_fd = ws->fence_export_sync_file(ws, sfence->gfx); if (gfx_fd == -1) { if (sdma_fd != -1) close(sdma_fd); @@ -591,15 +584,15 @@ struct pipe_fence_handle *fence) { struct si_context *sctx = (struct si_context *)ctx; - struct si_multi_fence *rfence = (struct si_multi_fence *)fence; + struct si_multi_fence *sfence = (struct si_multi_fence *)fence; /* We should have at least one syncobj to signal */ - assert(rfence->sdma || rfence->gfx); + assert(sfence->sdma || sfence->gfx); - if (rfence->sdma) - si_add_syncobj_signal(sctx, rfence->sdma); - if (rfence->gfx) - si_add_syncobj_signal(sctx, rfence->gfx); + if (sfence->sdma) + si_add_syncobj_signal(sctx, sfence->sdma); + if (sfence->gfx) + si_add_syncobj_signal(sctx, sfence->gfx); /** * The spec does not require a flush here. We insert a flush @@ -618,13 +611,13 @@ struct pipe_fence_handle *fence) { struct si_context *sctx = (struct si_context *)ctx; - struct si_multi_fence *rfence = (struct si_multi_fence *)fence; + struct si_multi_fence *sfence = (struct si_multi_fence *)fence; - util_queue_fence_wait(&rfence->ready); + util_queue_fence_wait(&sfence->ready); /* Unflushed fences from the same context are no-ops. */ - if (rfence->gfx_unflushed.ctx && - rfence->gfx_unflushed.ctx == sctx) + if (sfence->gfx_unflushed.ctx && + sfence->gfx_unflushed.ctx == sctx) return; /* All unflushed commands will not start execution before @@ -634,10 +627,10 @@ */ si_flush_from_st(ctx, NULL, PIPE_FLUSH_ASYNC); - if (rfence->sdma) - si_add_fence_dependency(sctx, rfence->sdma); - if (rfence->gfx) - si_add_fence_dependency(sctx, rfence->gfx); + if (sfence->sdma) + si_add_fence_dependency(sctx, sfence->sdma); + if (sfence->gfx) + si_add_fence_dependency(sctx, sfence->gfx); } void si_init_fence_functions(struct si_context *ctx) diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_get.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_get.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_get.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_get.c 2019-03-31 23:16:37.000000000 +0000 @@ -254,6 +254,9 @@ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: return 30; + case PIPE_CAP_MAX_VARYINGS: + return 32; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: return sscreen->info.chip_class <= VI ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0; @@ -455,15 +458,6 @@ !sscreen->llvm_has_working_vgpr_indexing) return 0; - /* Doing indirect indexing on GFX9 with LLVM 6.0 hangs. - * This means we don't support INTERP instructions with - * indirect indexing on inputs. - */ - if (shader == PIPE_SHADER_FRAGMENT && - !sscreen->llvm_has_working_vgpr_indexing && - HAVE_LLVM < 0x0700) - return 0; - /* TCS and TES load inputs directly from LDS or offchip * memory, so indirect indexing is always supported. * PS has to support indirect indexing, because we can't diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_gfx_cs.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_gfx_cs.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_gfx_cs.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_gfx_cs.c 2019-03-31 23:16:37.000000000 +0000 @@ -33,7 +33,7 @@ struct radeon_cmdbuf *cs = ctx->gfx_cs; /* There is no need to flush the DMA IB here, because - * r600_need_dma_space always flushes the GFX IB if there is + * si_need_dma_space always flushes the GFX IB if there is * a conflict, which means any unflushed DMA commands automatically * precede the GFX IB (= they had no dependency on the GFX IB when * they were submitted). @@ -177,7 +177,7 @@ pipe_reference_init(&ctx->current_saved_cs->reference, 1); - ctx->current_saved_cs->trace_buf = r600_resource( + ctx->current_saved_cs->trace_buf = si_resource( pipe_buffer_create(ctx->b.screen, 0, PIPE_USAGE_STAGING, 8)); if (!ctx->current_saved_cs->trace_buf) { free(ctx->current_saved_cs); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_gpu_load.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_gpu_load.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_gpu_load.c 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_gpu_load.c 2019-03-31 23:16:37.000000000 +0000 @@ -213,8 +213,8 @@ } } -#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \ - rscreen->mmio_counters.array) +#define BUSY_INDEX(sscreen, field) (&sscreen->mmio_counters.named.field.busy - \ + sscreen->mmio_counters.array) static unsigned busy_index_from_type(struct si_screen *sscreen, unsigned type) diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_perfcounter.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_perfcounter.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_perfcounter.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_perfcounter.c 2019-03-31 23:16:37.000000000 +0000 @@ -27,6 +27,24 @@ #include "util/u_memory.h" +enum si_pc_block_flags { + /* This block is part of the shader engine */ + SI_PC_BLOCK_SE = (1 << 0), + + /* Expose per-instance groups instead of summing all instances (within + * an SE). */ + SI_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), + + /* Expose per-SE groups instead of summing instances across SEs. */ + SI_PC_BLOCK_SE_GROUPS = (1 << 2), + + /* Shader block */ + SI_PC_BLOCK_SHADER = (1 << 3), + + /* Non-shader block with perfcounters windowed by shaders. */ + SI_PC_BLOCK_SHADER_WINDOWED = (1 << 4), +}; + enum si_pc_reg_layout { /* All secondary selector dwords follow as one block after the primary * selector dwords for the counters that have secondary selectors. @@ -69,12 +87,24 @@ unsigned layout; }; -struct si_pc_block { +struct si_pc_block_gfxdescr { struct si_pc_block_base *b; unsigned selectors; unsigned instances; }; +struct si_pc_block { + const struct si_pc_block_gfxdescr *b; + unsigned num_instances; + + unsigned num_groups; + char *group_names; + unsigned group_name_stride; + + char *selector_names; + unsigned selector_name_stride; +}; + /* The order is chosen to be compatible with GPUPerfStudio's hardcoding of * performance counter group IDs. */ @@ -93,6 +123,42 @@ S_036780_CS_EN(1), }; +/* Max counters per HW block */ +#define SI_QUERY_MAX_COUNTERS 16 + +#define SI_PC_SHADERS_WINDOWING (1 << 31) + +struct si_query_group { + struct si_query_group *next; + struct si_pc_block *block; + unsigned sub_gid; /* only used during init */ + unsigned result_base; /* only used during init */ + int se; + int instance; + unsigned num_counters; + unsigned selectors[SI_QUERY_MAX_COUNTERS]; +}; + +struct si_query_counter { + unsigned base; + unsigned qwords; + unsigned stride; /* in uint64s */ +}; + +struct si_query_pc { + struct si_query b; + struct si_query_buffer buffer; + + /* Size of the results in memory, in bytes. */ + unsigned result_size; + + unsigned shaders; + unsigned num_counters; + struct si_query_counter *counters; + struct si_query_group *groups; +}; + + static struct si_pc_block_base cik_CB = { .name = "CB", .num_counters = 4, @@ -344,7 +410,7 @@ * blindly once it believes it has identified the hardware, so the order of * blocks here matters. */ -static struct si_pc_block groups_CIK[] = { +static struct si_pc_block_gfxdescr groups_CIK[] = { { &cik_CB, 226}, { &cik_CPF, 17 }, { &cik_DB, 257}, @@ -371,7 +437,7 @@ }; -static struct si_pc_block groups_VI[] = { +static struct si_pc_block_gfxdescr groups_VI[] = { { &cik_CB, 405}, { &cik_CPF, 19 }, { &cik_DB, 257}, @@ -398,7 +464,7 @@ }; -static struct si_pc_block groups_gfx9[] = { +static struct si_pc_block_gfxdescr groups_gfx9[] = { { &cik_CB, 438}, { &cik_CPF, 32 }, { &cik_DB, 328}, @@ -422,6 +488,58 @@ { &cik_CPC, 35 }, }; +static bool si_pc_block_has_per_se_groups(const struct si_perfcounters *pc, + const struct si_pc_block *block) +{ + return block->b->b->flags & SI_PC_BLOCK_SE_GROUPS || + (block->b->b->flags & SI_PC_BLOCK_SE && pc->separate_se); +} + +static bool si_pc_block_has_per_instance_groups(const struct si_perfcounters *pc, + const struct si_pc_block *block) +{ + return block->b->b->flags & SI_PC_BLOCK_INSTANCE_GROUPS || + (block->num_instances > 1 && pc->separate_instance); +} + +static struct si_pc_block * +lookup_counter(struct si_perfcounters *pc, unsigned index, + unsigned *base_gid, unsigned *sub_index) +{ + struct si_pc_block *block = pc->blocks; + unsigned bid; + + *base_gid = 0; + for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { + unsigned total = block->num_groups * block->b->selectors; + + if (index < total) { + *sub_index = index; + return block; + } + + index -= total; + *base_gid += block->num_groups; + } + + return NULL; +} + +static struct si_pc_block * +lookup_group(struct si_perfcounters *pc, unsigned *index) +{ + unsigned bid; + struct si_pc_block *block = pc->blocks; + + for (bid = 0; bid < pc->num_blocks; ++bid, ++block) { + if (*index < block->num_groups) + return block; + *index -= block->num_groups; + } + + return NULL; +} + static void si_pc_emit_instance(struct si_context *sctx, int se, int instance) { @@ -454,11 +572,10 @@ } static void si_pc_emit_select(struct si_context *sctx, - struct si_perfcounter_block *group, + struct si_pc_block *block, unsigned count, unsigned *selectors) { - struct si_pc_block *sigroup = (struct si_pc_block *)group->data; - struct si_pc_block_base *regs = sigroup->b; + struct si_pc_block_base *regs = block->b->b; struct radeon_cmdbuf *cs = sctx->gfx_cs; unsigned idx; unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK; @@ -550,7 +667,7 @@ } static void si_pc_emit_start(struct si_context *sctx, - struct r600_resource *buffer, uint64_t va) + struct si_resource *buffer, uint64_t va) { struct radeon_cmdbuf *cs = sctx->gfx_cs; @@ -576,16 +693,15 @@ /* Note: The buffer was already added in si_pc_emit_start, so we don't have to * do it again in here. */ static void si_pc_emit_stop(struct si_context *sctx, - struct r600_resource *buffer, uint64_t va) + struct si_resource *buffer, uint64_t va) { struct radeon_cmdbuf *cs = sctx->gfx_cs; si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, buffer, va, 0, SI_NOT_QUERY); - si_cp_wait_mem(sctx, va, 0, 0xffffffff, 0); + si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); @@ -597,12 +713,10 @@ } static void si_pc_emit_read(struct si_context *sctx, - struct si_perfcounter_block *group, - unsigned count, unsigned *selectors, - struct r600_resource *buffer, uint64_t va) + struct si_pc_block *block, + unsigned count, uint64_t va) { - struct si_pc_block *sigroup = (struct si_pc_block *)group->data; - struct si_pc_block_base *regs = sigroup->b; + struct si_pc_block_base *regs = block->b->b; struct radeon_cmdbuf *cs = sctx->gfx_cs; unsigned idx; unsigned reg = regs->counter0_lo; @@ -642,16 +756,537 @@ } } -static void si_pc_cleanup(struct si_screen *sscreen) +static void si_pc_query_destroy(struct si_screen *sscreen, + struct si_query *squery) +{ + struct si_query_pc *query = (struct si_query_pc *)squery; + + while (query->groups) { + struct si_query_group *group = query->groups; + query->groups = group->next; + FREE(group); + } + + FREE(query->counters); + + si_query_buffer_destroy(sscreen, &query->buffer); + FREE(query); +} + +static void si_pc_query_resume(struct si_context *sctx, struct si_query *squery) +/* + struct si_query_hw *hwquery, + struct si_resource *buffer, uint64_t va)*/ +{ + struct si_query_pc *query = (struct si_query_pc *)squery; + int current_se = -1; + int current_instance = -1; + + if (!si_query_buffer_alloc(sctx, &query->buffer, NULL, query->result_size)) + return; + si_need_gfx_cs_space(sctx); + + if (query->shaders) + si_pc_emit_shaders(sctx, query->shaders); + + for (struct si_query_group *group = query->groups; group; group = group->next) { + struct si_pc_block *block = group->block; + + if (group->se != current_se || group->instance != current_instance) { + current_se = group->se; + current_instance = group->instance; + si_pc_emit_instance(sctx, group->se, group->instance); + } + + si_pc_emit_select(sctx, block, group->num_counters, group->selectors); + } + + if (current_se != -1 || current_instance != -1) + si_pc_emit_instance(sctx, -1, -1); + + uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end; + si_pc_emit_start(sctx, query->buffer.buf, va); +} + +static void si_pc_query_suspend(struct si_context *sctx, struct si_query *squery) +{ + struct si_query_pc *query = (struct si_query_pc *)squery; + + if (!query->buffer.buf) + return; + + uint64_t va = query->buffer.buf->gpu_address + query->buffer.results_end; + query->buffer.results_end += query->result_size; + + si_pc_emit_stop(sctx, query->buffer.buf, va); + + for (struct si_query_group *group = query->groups; group; group = group->next) { + struct si_pc_block *block = group->block; + unsigned se = group->se >= 0 ? group->se : 0; + unsigned se_end = se + 1; + + if ((block->b->b->flags & SI_PC_BLOCK_SE) && (group->se < 0)) + se_end = sctx->screen->info.max_se; + + do { + unsigned instance = group->instance >= 0 ? group->instance : 0; + + do { + si_pc_emit_instance(sctx, se, instance); + si_pc_emit_read(sctx, block, group->num_counters, va); + va += sizeof(uint64_t) * group->num_counters; + } while (group->instance < 0 && ++instance < block->num_instances); + } while (++se < se_end); + } + + si_pc_emit_instance(sctx, -1, -1); +} + +static bool si_pc_query_begin(struct si_context *ctx, struct si_query *squery) +{ + struct si_query_pc *query = (struct si_query_pc *)squery; + + si_query_buffer_reset(ctx, &query->buffer); + + LIST_ADDTAIL(&query->b.active_list, &ctx->active_queries); + ctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend; + + si_pc_query_resume(ctx, squery); + + return true; +} + +static bool si_pc_query_end(struct si_context *ctx, struct si_query *squery) +{ + struct si_query_pc *query = (struct si_query_pc *)squery; + + si_pc_query_suspend(ctx, squery); + + LIST_DEL(&squery->active_list); + ctx->num_cs_dw_queries_suspend -= squery->num_cs_dw_suspend; + + return query->buffer.buf != NULL; +} + +static void si_pc_query_add_result(struct si_query_pc *query, + void *buffer, + union pipe_query_result *result) +{ + uint64_t *results = buffer; + unsigned i, j; + + for (i = 0; i < query->num_counters; ++i) { + struct si_query_counter *counter = &query->counters[i]; + + for (j = 0; j < counter->qwords; ++j) { + uint32_t value = results[counter->base + j * counter->stride]; + result->batch[i].u64 += value; + } + } +} + +static bool si_pc_query_get_result(struct si_context *sctx, struct si_query *squery, + bool wait, union pipe_query_result *result) +{ + struct si_query_pc *query = (struct si_query_pc *)squery; + + memset(result, 0, sizeof(result->batch[0]) * query->num_counters); + + for (struct si_query_buffer *qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { + unsigned usage = PIPE_TRANSFER_READ | + (wait ? 0 : PIPE_TRANSFER_DONTBLOCK); + unsigned results_base = 0; + void *map; + + if (squery->b.flushed) + map = sctx->ws->buffer_map(qbuf->buf->buf, NULL, usage); + else + map = si_buffer_map_sync_with_rings(sctx, qbuf->buf, usage); + + if (!map) + return false; + + while (results_base != qbuf->results_end) { + si_pc_query_add_result(query, map + results_base, result); + results_base += query->result_size; + } + } + + return true; +} + +static const struct si_query_ops batch_query_ops = { + .destroy = si_pc_query_destroy, + .begin = si_pc_query_begin, + .end = si_pc_query_end, + .get_result = si_pc_query_get_result, + + .suspend = si_pc_query_suspend, + .resume = si_pc_query_resume, +}; + +static struct si_query_group *get_group_state(struct si_screen *screen, + struct si_query_pc *query, + struct si_pc_block *block, + unsigned sub_gid) +{ + struct si_query_group *group = query->groups; + + while (group) { + if (group->block == block && group->sub_gid == sub_gid) + return group; + group = group->next; + } + + group = CALLOC_STRUCT(si_query_group); + if (!group) + return NULL; + + group->block = block; + group->sub_gid = sub_gid; + + if (block->b->b->flags & SI_PC_BLOCK_SHADER) { + unsigned sub_gids = block->num_instances; + unsigned shader_id; + unsigned shaders; + unsigned query_shaders; + + if (si_pc_block_has_per_se_groups(screen->perfcounters, block)) + sub_gids = sub_gids * screen->info.max_se; + shader_id = sub_gid / sub_gids; + sub_gid = sub_gid % sub_gids; + + shaders = si_pc_shader_type_bits[shader_id]; + + query_shaders = query->shaders & ~SI_PC_SHADERS_WINDOWING; + if (query_shaders && query_shaders != shaders) { + fprintf(stderr, "si_perfcounter: incompatible shader groups\n"); + FREE(group); + return NULL; + } + query->shaders = shaders; + } + + if (block->b->b->flags & SI_PC_BLOCK_SHADER_WINDOWED && !query->shaders) { + // A non-zero value in query->shaders ensures that the shader + // masking is reset unless the user explicitly requests one. + query->shaders = SI_PC_SHADERS_WINDOWING; + } + + if (si_pc_block_has_per_se_groups(screen->perfcounters, block)) { + group->se = sub_gid / block->num_instances; + sub_gid = sub_gid % block->num_instances; + } else { + group->se = -1; + } + + if (si_pc_block_has_per_instance_groups(screen->perfcounters, block)) { + group->instance = sub_gid; + } else { + group->instance = -1; + } + + group->next = query->groups; + query->groups = group; + + return group; +} + +struct pipe_query *si_create_batch_query(struct pipe_context *ctx, + unsigned num_queries, + unsigned *query_types) +{ + struct si_screen *screen = + (struct si_screen *)ctx->screen; + struct si_perfcounters *pc = screen->perfcounters; + struct si_pc_block *block; + struct si_query_group *group; + struct si_query_pc *query; + unsigned base_gid, sub_gid, sub_index; + unsigned i, j; + + if (!pc) + return NULL; + + query = CALLOC_STRUCT(si_query_pc); + if (!query) + return NULL; + + query->b.ops = &batch_query_ops; + + query->num_counters = num_queries; + + /* Collect selectors per group */ + for (i = 0; i < num_queries; ++i) { + unsigned sub_gid; + + if (query_types[i] < SI_QUERY_FIRST_PERFCOUNTER) + goto error; + + block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, + &base_gid, &sub_index); + if (!block) + goto error; + + sub_gid = sub_index / block->b->selectors; + sub_index = sub_index % block->b->selectors; + + group = get_group_state(screen, query, block, sub_gid); + if (!group) + goto error; + + if (group->num_counters >= block->b->b->num_counters) { + fprintf(stderr, + "perfcounter group %s: too many selected\n", + block->b->b->name); + goto error; + } + group->selectors[group->num_counters] = sub_index; + ++group->num_counters; + } + + /* Compute result bases and CS size per group */ + query->b.num_cs_dw_suspend = pc->num_stop_cs_dwords; + query->b.num_cs_dw_suspend += pc->num_instance_cs_dwords; + + i = 0; + for (group = query->groups; group; group = group->next) { + struct si_pc_block *block = group->block; + unsigned read_dw; + unsigned instances = 1; + + if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0) + instances = screen->info.max_se; + if (group->instance < 0) + instances *= block->num_instances; + + group->result_base = i; + query->result_size += sizeof(uint64_t) * instances * group->num_counters; + i += instances * group->num_counters; + + read_dw = 6 * group->num_counters; + query->b.num_cs_dw_suspend += instances * read_dw; + query->b.num_cs_dw_suspend += instances * pc->num_instance_cs_dwords; + } + + if (query->shaders) { + if (query->shaders == SI_PC_SHADERS_WINDOWING) + query->shaders = 0xffffffff; + } + + /* Map user-supplied query array to result indices */ + query->counters = CALLOC(num_queries, sizeof(*query->counters)); + for (i = 0; i < num_queries; ++i) { + struct si_query_counter *counter = &query->counters[i]; + struct si_pc_block *block; + + block = lookup_counter(pc, query_types[i] - SI_QUERY_FIRST_PERFCOUNTER, + &base_gid, &sub_index); + + sub_gid = sub_index / block->b->selectors; + sub_index = sub_index % block->b->selectors; + + group = get_group_state(screen, query, block, sub_gid); + assert(group != NULL); + + for (j = 0; j < group->num_counters; ++j) { + if (group->selectors[j] == sub_index) + break; + } + + counter->base = group->result_base + j; + counter->stride = group->num_counters; + + counter->qwords = 1; + if ((block->b->b->flags & SI_PC_BLOCK_SE) && group->se < 0) + counter->qwords = screen->info.max_se; + if (group->instance < 0) + counter->qwords *= block->num_instances; + } + + return (struct pipe_query *)query; + +error: + si_pc_query_destroy(screen, &query->b); + return NULL; +} + +static bool si_init_block_names(struct si_screen *screen, + struct si_pc_block *block) +{ + bool per_instance_groups = si_pc_block_has_per_instance_groups(screen->perfcounters, block); + bool per_se_groups = si_pc_block_has_per_se_groups(screen->perfcounters, block); + unsigned i, j, k; + unsigned groups_shader = 1, groups_se = 1, groups_instance = 1; + unsigned namelen; + char *groupname; + char *p; + + if (per_instance_groups) + groups_instance = block->num_instances; + if (per_se_groups) + groups_se = screen->info.max_se; + if (block->b->b->flags & SI_PC_BLOCK_SHADER) + groups_shader = ARRAY_SIZE(si_pc_shader_type_bits); + + namelen = strlen(block->b->b->name); + block->group_name_stride = namelen + 1; + if (block->b->b->flags & SI_PC_BLOCK_SHADER) + block->group_name_stride += 3; + if (per_se_groups) { + assert(groups_se <= 10); + block->group_name_stride += 1; + + if (per_instance_groups) + block->group_name_stride += 1; + } + if (per_instance_groups) { + assert(groups_instance <= 100); + block->group_name_stride += 2; + } + + block->group_names = MALLOC(block->num_groups * block->group_name_stride); + if (!block->group_names) + return false; + + groupname = block->group_names; + for (i = 0; i < groups_shader; ++i) { + const char *shader_suffix = si_pc_shader_type_suffixes[i]; + unsigned shaderlen = strlen(shader_suffix); + for (j = 0; j < groups_se; ++j) { + for (k = 0; k < groups_instance; ++k) { + strcpy(groupname, block->b->b->name); + p = groupname + namelen; + + if (block->b->b->flags & SI_PC_BLOCK_SHADER) { + strcpy(p, shader_suffix); + p += shaderlen; + } + + if (per_se_groups) { + p += sprintf(p, "%d", j); + if (per_instance_groups) + *p++ = '_'; + } + + if (per_instance_groups) + p += sprintf(p, "%d", k); + + groupname += block->group_name_stride; + } + } + } + + assert(block->b->selectors <= 1000); + block->selector_name_stride = block->group_name_stride + 4; + block->selector_names = MALLOC(block->num_groups * block->b->selectors * + block->selector_name_stride); + if (!block->selector_names) + return false; + + groupname = block->group_names; + p = block->selector_names; + for (i = 0; i < block->num_groups; ++i) { + for (j = 0; j < block->b->selectors; ++j) { + sprintf(p, "%s_%03d", groupname, j); + p += block->selector_name_stride; + } + groupname += block->group_name_stride; + } + + return true; +} + +int si_get_perfcounter_info(struct si_screen *screen, + unsigned index, + struct pipe_driver_query_info *info) +{ + struct si_perfcounters *pc = screen->perfcounters; + struct si_pc_block *block; + unsigned base_gid, sub; + + if (!pc) + return 0; + + if (!info) { + unsigned bid, num_queries = 0; + + for (bid = 0; bid < pc->num_blocks; ++bid) { + num_queries += pc->blocks[bid].b->selectors * + pc->blocks[bid].num_groups; + } + + return num_queries; + } + + block = lookup_counter(pc, index, &base_gid, &sub); + if (!block) + return 0; + + if (!block->selector_names) { + if (!si_init_block_names(screen, block)) + return 0; + } + info->name = block->selector_names + sub * block->selector_name_stride; + info->query_type = SI_QUERY_FIRST_PERFCOUNTER + index; + info->max_value.u64 = 0; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE; + info->group_id = base_gid + sub / block->b->selectors; + info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH; + if (sub > 0 && sub + 1 < block->b->selectors * block->num_groups) + info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST; + return 1; +} + +int si_get_perfcounter_group_info(struct si_screen *screen, + unsigned index, + struct pipe_driver_query_group_info *info) { - si_perfcounters_do_destroy(sscreen->perfcounters); - sscreen->perfcounters = NULL; + struct si_perfcounters *pc = screen->perfcounters; + struct si_pc_block *block; + + if (!pc) + return 0; + + if (!info) + return pc->num_groups; + + block = lookup_group(pc, &index); + if (!block) + return 0; + + if (!block->group_names) { + if (!si_init_block_names(screen, block)) + return 0; + } + info->name = block->group_names + index * block->group_name_stride; + info->num_queries = block->b->selectors; + info->max_active_queries = block->b->b->num_counters; + return 1; +} + +void si_destroy_perfcounters(struct si_screen *screen) +{ + struct si_perfcounters *pc = screen->perfcounters; + unsigned i; + + if (!pc) + return; + + for (i = 0; i < pc->num_blocks; ++i) { + FREE(pc->blocks[i].group_names); + FREE(pc->blocks[i].selector_names); + } + FREE(pc->blocks); + FREE(pc); + screen->perfcounters = NULL; } void si_init_perfcounters(struct si_screen *screen) { struct si_perfcounters *pc; - struct si_pc_block *blocks; + const struct si_pc_block_gfxdescr *blocks; unsigned num_blocks; unsigned i; @@ -680,52 +1315,50 @@ screen->info.max_sh_per_se); } - pc = CALLOC_STRUCT(si_perfcounters); + screen->perfcounters = pc = CALLOC_STRUCT(si_perfcounters); if (!pc) return; pc->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen); pc->num_instance_cs_dwords = 3; - pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits); - pc->shader_type_suffixes = si_pc_shader_type_suffixes; - pc->shader_type_bits = si_pc_shader_type_bits; - - pc->emit_instance = si_pc_emit_instance; - pc->emit_shaders = si_pc_emit_shaders; - pc->emit_select = si_pc_emit_select; - pc->emit_start = si_pc_emit_start; - pc->emit_stop = si_pc_emit_stop; - pc->emit_read = si_pc_emit_read; - pc->cleanup = si_pc_cleanup; + pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false); + pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false); - if (!si_perfcounters_init(pc, num_blocks)) + pc->blocks = CALLOC(num_blocks, sizeof(struct si_pc_block)); + if (!pc->blocks) goto error; + pc->num_blocks = num_blocks; for (i = 0; i < num_blocks; ++i) { - struct si_pc_block *block = &blocks[i]; - unsigned instances = block->instances; + struct si_pc_block *block = &pc->blocks[i]; + block->b = &blocks[i]; + block->num_instances = MAX2(1, block->b->instances); + + if (!strcmp(block->b->b->name, "CB") || + !strcmp(block->b->b->name, "DB")) + block->num_instances = screen->info.max_se; + else if (!strcmp(block->b->b->name, "TCC")) + block->num_instances = screen->info.num_tcc_blocks; + else if (!strcmp(block->b->b->name, "IA")) + block->num_instances = MAX2(1, screen->info.max_se / 2); - if (!strcmp(block->b->name, "CB") || - !strcmp(block->b->name, "DB")) - instances = screen->info.max_se; - else if (!strcmp(block->b->name, "TCC")) - instances = screen->info.num_tcc_blocks; - else if (!strcmp(block->b->name, "IA")) - instances = MAX2(1, screen->info.max_se / 2); - - si_perfcounters_add_block(screen, pc, - block->b->name, - block->b->flags, - block->b->num_counters, - block->selectors, - instances, - block); + if (si_pc_block_has_per_instance_groups(pc, block)) { + block->num_groups = block->num_instances; + } else { + block->num_groups = 1; + } + + if (si_pc_block_has_per_se_groups(pc, block)) + block->num_groups *= screen->info.max_se; + if (block->b->b->flags & SI_PC_BLOCK_SHADER) + block->num_groups *= ARRAY_SIZE(si_pc_shader_type_bits); + + pc->num_groups += block->num_groups; } - screen->perfcounters = pc; return; error: - si_perfcounters_do_destroy(pc); + si_destroy_perfcounters(screen); } diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_pipe.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_pipe.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_pipe.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_pipe.c 2019-03-31 23:16:37.000000000 +0000 @@ -103,6 +103,8 @@ { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." }, { "testdmaperf", DBG(TEST_DMA_PERF), "Test DMA performance" }, { "testgds", DBG(TEST_GDS), "Test GDS." }, + { "testgdsmm", DBG(TEST_GDS_MM), "Test GDS memory management." }, + { "testgdsoamm", DBG(TEST_GDS_OA_MM), "Test GDS OA memory management." }, DEBUG_NAMED_VALUE_END /* must be last */ }; @@ -125,7 +127,7 @@ (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0); ac_init_llvm_once(); - ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options); + ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options); compiler->passes = ac_create_llvm_passes(compiler->tm); if (compiler->low_opt_tm) @@ -161,11 +163,11 @@ pipe_resource_reference(&sctx->tess_rings, NULL); pipe_resource_reference(&sctx->null_const_buf.buffer, NULL); pipe_resource_reference(&sctx->sample_pos_buffer, NULL); - r600_resource_reference(&sctx->border_color_buffer, NULL); + si_resource_reference(&sctx->border_color_buffer, NULL); free(sctx->border_color_table); - r600_resource_reference(&sctx->scratch_buffer, NULL); - r600_resource_reference(&sctx->compute_scratch_buffer, NULL); - r600_resource_reference(&sctx->wait_mem_scratch, NULL); + si_resource_reference(&sctx->scratch_buffer, NULL); + si_resource_reference(&sctx->compute_scratch_buffer, NULL); + si_resource_reference(&sctx->wait_mem_scratch, NULL); si_pm4_free_state(sctx, sctx->init_config, ~0); if (sctx->init_config_gs_rings) @@ -199,6 +201,10 @@ sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer); if (sctx->cs_copy_buffer) sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer); + if (sctx->cs_copy_image) + sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image); + if (sctx->cs_copy_image_1d_array) + sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1d_array); if (sctx->blitter) util_blitter_destroy(sctx->blitter); @@ -240,7 +246,7 @@ sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL); sctx->ws->fence_reference(&sctx->last_sdma_fence, NULL); - r600_resource_reference(&sctx->eop_bug_scratch, NULL); + si_resource_reference(&sctx->eop_bug_scratch, NULL); si_destroy_compiler(&sctx->compiler); @@ -373,6 +379,7 @@ struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->ws; int shader, i; + bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0; if (!sctx) return NULL; @@ -411,7 +418,7 @@ if (sctx->chip_class == CIK || sctx->chip_class == VI || sctx->chip_class == GFX9) { - sctx->eop_bug_scratch = r600_resource( + sctx->eop_bug_scratch = si_resource( pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 16 * sscreen->info.num_render_backends)); if (!sctx->eop_bug_scratch) @@ -450,8 +457,8 @@ if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) { sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA, - (void*)si_flush_dma_cs, - sctx); + (void*)si_flush_dma_cs, + sctx, stop_exec_on_failure); } si_init_buffer_functions(sctx); @@ -472,7 +479,7 @@ } sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX, - (void*)si_flush_gfx_cs, sctx); + (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure); /* Border colors. */ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * @@ -480,7 +487,7 @@ if (!sctx->border_color_table) goto fail; - sctx->border_color_buffer = r600_resource( + sctx->border_color_buffer = si_resource( pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, SI_MAX_BORDER_COLORS * sizeof(*sctx->border_color_table))); @@ -498,7 +505,6 @@ si_init_state_functions(sctx); si_init_shader_functions(sctx); si_init_viewport_functions(sctx); - si_init_ia_multi_vgt_param_table(sctx); if (sctx->chip_class >= CIK) cik_init_sdma_functions(sctx); @@ -508,39 +514,24 @@ if (sscreen->debug_flags & DBG(FORCE_DMA)) sctx->b.resource_copy_region = sctx->dma_copy; - bool dst_stream_policy = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU; - sctx->cs_clear_buffer = si_create_dma_compute_shader(&sctx->b, - SI_COMPUTE_CLEAR_DW_PER_THREAD, - dst_stream_policy, false); - sctx->cs_copy_buffer = si_create_dma_compute_shader(&sctx->b, - SI_COMPUTE_COPY_DW_PER_THREAD, - dst_stream_policy, true); - sctx->blitter = util_blitter_create(&sctx->b); if (sctx->blitter == NULL) goto fail; - sctx->blitter->draw_rectangle = si_draw_rectangle; sctx->blitter->skip_viewport_restore = true; + si_init_draw_functions(sctx); + sctx->sample_mask = 0xffff; if (sctx->chip_class >= GFX9) { - sctx->wait_mem_scratch = r600_resource( + sctx->wait_mem_scratch = si_resource( pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4)); if (!sctx->wait_mem_scratch) goto fail; /* Initialize the memory. */ - struct radeon_cmdbuf *cs = sctx->gfx_cs; - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, sctx->wait_mem_scratch->gpu_address); - radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32); - radeon_emit(cs, sctx->wait_mem_number); - radeon_add_to_buffer_list(sctx, cs, sctx->wait_mem_scratch, - RADEON_USAGE_WRITE, RADEON_PRIO_FENCE); + si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4, + V_370_MEM, V_370_ME, &sctx->wait_mem_number); } /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads @@ -702,7 +693,7 @@ mtx_destroy(&sscreen->shader_parts_mutex); si_destroy_shader_cache(sscreen); - si_perfcounters_destroy(sscreen); + si_destroy_perfcounters(sscreen); si_gpu_load_kill_thread(sscreen); mtx_destroy(&sscreen->gpu_load_mutex); @@ -722,39 +713,6 @@ sscreen->info.family); } -static void si_handle_env_var_force_family(struct si_screen *sscreen) -{ - const char *family = debug_get_option("SI_FORCE_FAMILY", NULL); - unsigned i; - - if (!family) - return; - - for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { - if (!strcmp(family, ac_get_llvm_processor_name(i))) { - /* Override family and chip_class. */ - sscreen->info.family = i; - sscreen->info.name = "GCN-NOOP"; - - if (i >= CHIP_VEGA10) - sscreen->info.chip_class = GFX9; - else if (i >= CHIP_TONGA) - sscreen->info.chip_class = VI; - else if (i >= CHIP_BONAIRE) - sscreen->info.chip_class = CIK; - else - sscreen->info.chip_class = SI; - - /* Don't submit any IBs. */ - setenv("RADEON_NOOP", "1", 1); - return; - } - } - - fprintf(stderr, "radeonsi: Unknown family: %s\n", family); - exit(1); -} - static void si_test_vmfault(struct si_screen *sscreen) { struct pipe_context *ctx = sscreen->aux_context; @@ -767,7 +725,7 @@ exit(1); } - r600_resource(buf)->gpu_address = 0; /* cause a VM fault */ + si_resource(buf)->gpu_address = 0; /* cause a VM fault */ if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) { si_cp_dma_copy_buffer(sctx, buf, buf, 0, 4, 4, 0, @@ -787,6 +745,41 @@ exit(0); } +static void si_test_gds_memory_management(struct si_context *sctx, + unsigned alloc_size, unsigned alignment, + enum radeon_bo_domain domain) +{ + struct radeon_winsys *ws = sctx->ws; + struct radeon_cmdbuf *cs[8]; + struct pb_buffer *gds_bo[ARRAY_SIZE(cs)]; + + for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) { + cs[i] = ws->cs_create(sctx->ctx, RING_COMPUTE, + NULL, NULL, false); + gds_bo[i] = ws->buffer_create(ws, alloc_size, alignment, domain, 0); + assert(gds_bo[i]); + } + + for (unsigned iterations = 0; iterations < 20000; iterations++) { + for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) { + /* This clears GDS with CP DMA. + * + * We don't care if GDS is present. Just add some packet + * to make the GPU busy for a moment. + */ + si_cp_dma_clear_buffer(sctx, cs[i], NULL, 0, alloc_size, 0, + SI_CPDMA_SKIP_BO_LIST_UPDATE | + SI_CPDMA_SKIP_CHECK_CS_SPACE | + SI_CPDMA_SKIP_GFX_SYNC, 0, 0); + + ws->cs_add_buffer(cs[i], gds_bo[i], domain, + RADEON_USAGE_READWRITE, 0); + ws->cs_flush(cs[i], PIPE_FLUSH_ASYNC, NULL); + } + } + exit(0); +} + static void si_disk_cache_create(struct si_screen *sscreen) { /* Don't use the cache if shader dumping is enabled. */ @@ -840,7 +833,6 @@ sscreen->ws = ws; ws->query_info(ws, &sscreen->info); - si_handle_env_var_force_family(sscreen); if (sscreen->info.chip_class >= GFX9) { sscreen->se_tile_repeat = 32 * sscreen->info.max_se; @@ -873,7 +865,8 @@ sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) sscreen->debug_flags |= DBG(SI_SCHED); - + if (driQueryOptionb(config->options, "radeonsi_enable_nir")) + sscreen->debug_flags |= DBG(NIR); if (sscreen->debug_flags & DBG(INFO)) ac_print_gpu_info(&sscreen->info); @@ -1029,22 +1022,28 @@ sscreen->info.family == CHIP_RAVEN; sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; + sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2; + /* Only enable primitive binning on APUs by default. */ + sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN || + sscreen->info.family == CHIP_RAVEN2; + + sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN || + sscreen->info.family == CHIP_RAVEN2; + + /* Process DPBB enable flags. */ if (sscreen->debug_flags & DBG(DPBB)) { sscreen->dpbb_allowed = true; - } else { - /* Only enable primitive binning on APUs by default. */ - /* TODO: Investigate if binning is profitable on Vega12. */ - sscreen->dpbb_allowed = !(sscreen->debug_flags & DBG(NO_DPBB)) && - (sscreen->info.family == CHIP_RAVEN || - sscreen->info.family == CHIP_RAVEN2); + if (sscreen->debug_flags & DBG(DFSM)) + sscreen->dfsm_allowed = true; } - if (sscreen->debug_flags & DBG(DFSM)) { - sscreen->dfsm_allowed = sscreen->dpbb_allowed; - } else { - sscreen->dfsm_allowed = sscreen->dpbb_allowed && - !(sscreen->debug_flags & DBG(NO_DFSM)); + /* Process DPBB disable flags. */ + if (sscreen->debug_flags & DBG(NO_DPBB)) { + sscreen->dpbb_allowed = false; + sscreen->dfsm_allowed = false; + } else if (sscreen->debug_flags & DBG(NO_DFSM)) { + sscreen->dfsm_allowed = false; } /* While it would be nice not to have this flag, we are constrained @@ -1135,5 +1134,14 @@ if (sscreen->debug_flags & DBG(TEST_GDS)) si_test_gds((struct si_context*)sscreen->aux_context); + if (sscreen->debug_flags & DBG(TEST_GDS_MM)) { + si_test_gds_memory_management((struct si_context*)sscreen->aux_context, + 32 * 1024, 4, RADEON_DOMAIN_GDS); + } + if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) { + si_test_gds_memory_management((struct si_context*)sscreen->aux_context, + 4, 1, RADEON_DOMAIN_OA); + } + return &sscreen->b; } diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_pipe.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_pipe.h --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_pipe.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_pipe.h 2019-03-31 23:16:37.000000000 +0000 @@ -47,6 +47,7 @@ * the number shouldn't be a commonly-used one. */ #define SI_BASE_VERTEX_UNKNOWN INT_MIN #define SI_RESTART_INDEX_UNKNOWN INT_MIN +#define SI_INSTANCE_COUNT_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 #define SI_MAX_POINT_SIZE 2048 #define SI_GS_PER_ES 128 @@ -103,7 +104,7 @@ #define SI_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) #define SI_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) -#define SI_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) +#define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) #define SI_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3) #define SI_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4) #define SI_RESOURCE_FLAG_READ_ONLY (PIPE_RESOURCE_FLAG_DRV_PRIV << 5) @@ -174,6 +175,8 @@ DBG_TEST_VMFAULT_SHADER, DBG_TEST_DMA_PERF, DBG_TEST_GDS, + DBG_TEST_GDS_MM, + DBG_TEST_GDS_OA_MM, }; #define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1)) @@ -199,7 +202,7 @@ /* Only 32-bit buffer allocations are supported, gallium doesn't support more * at the moment. */ -struct r600_resource { +struct si_resource { struct threaded_resource b; /* Winsys objects. */ @@ -249,12 +252,12 @@ struct si_transfer { struct threaded_transfer b; - struct r600_resource *staging; + struct si_resource *staging; unsigned offset; }; struct si_texture { - struct r600_resource buffer; + struct si_resource buffer; struct radeon_surf surface; uint64_t size; @@ -264,7 +267,7 @@ uint64_t fmask_offset; uint64_t cmask_offset; uint64_t cmask_base_address_reg; - struct r600_resource *cmask_buffer; + struct si_resource *cmask_buffer; uint64_t dcc_offset; /* 0 = disabled */ unsigned cb_color_info; /* fast clear enable bit */ unsigned color_clear_value[2]; @@ -307,9 +310,9 @@ * target == 2D and last_level == 0. If enabled, dcc_offset contains * the absolute GPUVM address, not the relative one. */ - struct r600_resource *dcc_separate_buffer; + struct si_resource *dcc_separate_buffer; /* When DCC is temporarily disabled, the separate buffer is here. */ - struct r600_resource *last_dcc_separate_buffer; + struct si_resource *last_dcc_separate_buffer; /* Estimate of how much this color buffer is written to in units of * full-screen draws: ps_invocations / (width * height) * Shader kills, late Z, and blending with trivial discards make it @@ -445,6 +448,7 @@ bool clear_db_cache_before_clear; bool has_msaa_sample_loc_bug; bool has_ls_vgpr_init_bug; + bool has_dcc_constant_encode; bool dpbb_allowed; bool dfsm_allowed; bool llvm_has_working_vgpr_indexing; @@ -660,7 +664,7 @@ struct pipe_stream_output_target b; /* The buffer where BUFFER_FILLED_SIZE is stored. */ - struct r600_resource *buf_filled_size; + struct si_resource *buf_filled_size; unsigned buf_filled_size_offset; bool buf_filled_size_valid; @@ -754,7 +758,7 @@ struct pipe_reference reference; struct si_context *ctx; struct radeon_saved_cs gfx; - struct r600_resource *trace_buf; + struct si_resource *trace_buf; unsigned trace_id; unsigned gfx_last_dw; @@ -774,7 +778,7 @@ struct radeon_cmdbuf *dma_cs; struct pipe_fence_handle *last_gfx_fence; struct pipe_fence_handle *last_sdma_fence; - struct r600_resource *eop_bug_scratch; + struct si_resource *eop_bug_scratch; struct u_upload_mgr *cached_gtt_allocator; struct threaded_context *tc; struct u_suballocator *allocator_zeroed_memory; @@ -796,11 +800,13 @@ void *vs_blit_texcoord; void *cs_clear_buffer; void *cs_copy_buffer; + void *cs_copy_image; + void *cs_copy_image_1d_array; struct si_screen *screen; struct pipe_debug_callback debug; struct ac_llvm_compiler compiler; /* only non-threaded compilation */ struct si_shader_ctx_state fixed_func_tcs_shader; - struct r600_resource *wait_mem_scratch; + struct si_resource *wait_mem_scratch; unsigned wait_mem_number; uint16_t prefetch_L2_mask; @@ -866,7 +872,7 @@ /* vertex buffer descriptors */ uint32_t *vb_descriptors_gpu_list; - struct r600_resource *vb_descriptors_buffer; + struct si_resource *vb_descriptors_buffer; unsigned vb_descriptors_offset; /* shader descriptors */ @@ -885,13 +891,35 @@ struct pipe_resource *gsvs_ring; struct pipe_resource *tess_rings; union pipe_color_union *border_color_table; /* in CPU memory, any endian */ - struct r600_resource *border_color_buffer; + struct si_resource *border_color_buffer; union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ unsigned border_color_count; unsigned num_vs_blit_sgprs; uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD]; uint32_t cs_user_data[4]; + /** + * last_block allows disabling threads at the farthermost grid boundary. + * Full blocks as specified by "block" are launched, but the threads + * outside of "last_block" dimensions are disabled. + * + * If a block touches the grid boundary in the i-th axis, threads with + * THREAD_ID[i] >= last_block[i] are disabled. + * + * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i], + * meaning no effect. + * + * It's equivalent to doing this at the beginning of the compute shader: + * + * for (i = 0; i < 3; i++) { + * if (block_id[i] == grid[i] - 1 && + * last_block[i] && last_block[i] >= thread_id[i]) + * return; + * } + * (this could be moved into pipe_grid_info) + */ + uint compute_last_block[3]; + /* Vertex and index buffers. */ bool vertex_buffers_dirty; bool vertex_buffer_pointer_dirty; @@ -922,6 +950,7 @@ int last_index_size; int last_base_vertex; int last_start_instance; + int last_instance_count; int last_drawid; int last_sh_base_reg; int last_primitive_restart_en; @@ -935,11 +964,11 @@ enum pipe_prim_type current_rast_prim; /* primitive type after TES, GS */ /* Scratch buffer */ - struct r600_resource *scratch_buffer; + struct si_resource *scratch_buffer; unsigned scratch_waves; unsigned spi_tmpring_size; - struct r600_resource *compute_scratch_buffer; + struct si_resource *compute_scratch_buffer; /* Emitted derived tessellation state. */ /* Local shader (VS), or HS if LS-HS are merged. */ @@ -1107,17 +1136,17 @@ struct pb_buffer *buf, enum radeon_bo_usage usage); void *si_buffer_map_sync_with_rings(struct si_context *sctx, - struct r600_resource *resource, + struct si_resource *resource, unsigned usage); void si_init_resource_fields(struct si_screen *sscreen, - struct r600_resource *res, + struct si_resource *res, uint64_t size, unsigned alignment); bool si_alloc_resource(struct si_screen *sscreen, - struct r600_resource *res); + struct si_resource *res); struct pipe_resource *pipe_aligned_buffer_create(struct pipe_screen *screen, unsigned flags, unsigned usage, unsigned size, unsigned alignment); -struct r600_resource *si_aligned_buffer_create(struct pipe_screen *screen, +struct si_resource *si_aligned_buffer_create(struct pipe_screen *screen, unsigned flags, unsigned usage, unsigned size, unsigned alignment); void si_replace_buffer_storage(struct pipe_context *ctx, @@ -1143,6 +1172,13 @@ void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size); +void si_compute_copy_image(struct si_context *sctx, + struct pipe_resource *dst, + unsigned dst_level, + struct pipe_resource *src, + unsigned src_level, + unsigned dstx, unsigned dsty, unsigned dstz, + const struct pipe_box *src_box); void si_init_compute_blit_functions(struct si_context *sctx); /* si_cp_dma.c */ @@ -1158,10 +1194,10 @@ SI_CPDMA_SKIP_BO_LIST_UPDATE) void si_cp_dma_wait_for_idle(struct si_context *sctx); -void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, - uint64_t offset, uint64_t size, unsigned value, - enum si_coherency coher, - enum si_cache_policy cache_policy); +void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs, + struct pipe_resource *dst, uint64_t offset, + uint64_t size, unsigned value, unsigned user_flags, + enum si_coherency coher, enum si_cache_policy cache_policy); void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size, @@ -1171,6 +1207,9 @@ uint64_t offset, unsigned size); void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only); void si_test_gds(struct si_context *sctx); +void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, + unsigned offset, unsigned size, unsigned dst_sel, + unsigned engine, const void *data); /* si_debug.c */ void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, @@ -1190,12 +1229,12 @@ void si_init_dma_functions(struct si_context *sctx); /* si_dma_cs.c */ -void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst, +void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst, uint64_t offset); void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned clear_value); void si_need_dma_space(struct si_context *ctx, unsigned num_dw, - struct r600_resource *dst, struct r600_resource *src); + struct si_resource *dst, struct si_resource *src); void si_flush_dma_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence); void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst, @@ -1205,10 +1244,10 @@ void si_cp_release_mem(struct si_context *ctx, unsigned event, unsigned event_flags, unsigned dst_sel, unsigned int_sel, unsigned data_sel, - struct r600_resource *buf, uint64_t va, + struct si_resource *buf, uint64_t va, uint32_t new_fence, unsigned query_type); unsigned si_cp_write_fence_dwords(struct si_screen *screen); -void si_cp_wait_mem(struct si_context *ctx, +void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t va, uint32_t ref, uint32_t mask, unsigned flags); void si_init_fence_functions(struct si_context *ctx); void si_init_screen_fence_functions(struct si_screen *screen); @@ -1224,7 +1263,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx); void si_need_gfx_cs_space(struct si_context *ctx); -/* r600_gpu_load.c */ +/* si_gpu_load.c */ void si_gpu_load_kill_thread(struct si_screen *sscreen); uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type); unsigned si_end_counter(struct si_screen *sscreen, unsigned type, @@ -1233,11 +1272,9 @@ /* si_compute.c */ void si_init_compute_functions(struct si_context *sctx); -/* r600_perfcounters.c */ -void si_perfcounters_destroy(struct si_screen *sscreen); - /* si_perfcounters.c */ void si_init_perfcounters(struct si_screen *screen); +void si_destroy_perfcounters(struct si_screen *screen); /* si_pipe.c */ bool si_check_device_reset(struct si_context *sctx); @@ -1255,6 +1292,8 @@ void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, bool dst_stream_cache_policy, bool is_copy); +void *si_create_copy_image_compute_shader(struct pipe_context *ctx); +void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx); void *si_create_query_result_cs(struct si_context *sctx); /* si_test_dma.c */ @@ -1326,13 +1365,13 @@ * common helpers */ -static inline struct r600_resource *r600_resource(struct pipe_resource *r) +static inline struct si_resource *si_resource(struct pipe_resource *r) { - return (struct r600_resource*)r; + return (struct si_resource*)r; } static inline void -r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res) +si_resource_reference(struct si_resource **ptr, struct si_resource *res) { pipe_resource_reference((struct pipe_resource **)ptr, (struct pipe_resource *)res); @@ -1364,8 +1403,8 @@ { if (r) { /* Add memory usage for need_gfx_cs_space */ - sctx->vram += r600_resource(r)->vram_usage; - sctx->gtt += r600_resource(r)->gart_usage; + sctx->vram += si_resource(r)->vram_usage; + sctx->gtt += si_resource(r)->gart_usage; } } @@ -1373,6 +1412,7 @@ si_invalidate_draw_sh_constants(struct si_context *sctx) { sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN; + sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN; } static inline unsigned @@ -1610,15 +1650,15 @@ */ static inline void radeon_add_to_buffer_list(struct si_context *sctx, struct radeon_cmdbuf *cs, - struct r600_resource *rbo, + struct si_resource *bo, enum radeon_bo_usage usage, enum radeon_bo_priority priority) { assert(usage); sctx->ws->cs_add_buffer( - cs, rbo->buf, + cs, bo->buf, (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED), - rbo->domains, priority); + bo->domains, priority); } /** @@ -1640,18 +1680,18 @@ */ static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx, - struct r600_resource *rbo, + struct si_resource *bo, enum radeon_bo_usage usage, enum radeon_bo_priority priority, bool check_mem) { if (check_mem && !radeon_cs_memory_below_limit(sctx->screen, sctx->gfx_cs, - sctx->vram + rbo->vram_usage, - sctx->gtt + rbo->gart_usage)) + sctx->vram + bo->vram_usage, + sctx->gtt + bo->gart_usage)) si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, rbo, usage, priority); + radeon_add_to_buffer_list(sctx, sctx->gfx_cs, bo, usage, priority); } #define PRINT_ERR(fmt, args...) \ diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_pm4.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_pm4.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_pm4.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_pm4.c 2019-03-31 23:16:37.000000000 +0000 @@ -85,14 +85,14 @@ } void si_pm4_add_bo(struct si_pm4_state *state, - struct r600_resource *bo, + struct si_resource *bo, enum radeon_bo_usage usage, enum radeon_bo_priority priority) { unsigned idx = state->nbo++; assert(idx < SI_PM4_MAX_BO); - r600_resource_reference(&state->bo[idx], bo); + si_resource_reference(&state->bo[idx], bo); state->bo_usage[idx] = usage; state->bo_priority[idx] = priority; } @@ -100,8 +100,8 @@ void si_pm4_clear_state(struct si_pm4_state *state) { for (int i = 0; i < state->nbo; ++i) - r600_resource_reference(&state->bo[i], NULL); - r600_resource_reference(&state->indirect_buffer, NULL); + si_resource_reference(&state->bo[i], NULL); + si_resource_reference(&state->indirect_buffer, NULL); state->nbo = 0; state->ndw = 0; } @@ -133,7 +133,7 @@ if (!state->indirect_buffer) { radeon_emit_array(cs, state->pm4, state->ndw); } else { - struct r600_resource *ib = state->indirect_buffer; + struct si_resource *ib = state->indirect_buffer; radeon_add_to_buffer_list(sctx, sctx->gfx_cs, ib, RADEON_USAGE_READ, @@ -168,7 +168,7 @@ assert(state->ndw); assert(aligned_ndw <= SI_PM4_MAX_DW); - r600_resource_reference(&state->indirect_buffer, NULL); + si_resource_reference(&state->indirect_buffer, NULL); /* TODO: this hangs with 1024 or higher alignment on GFX9. */ state->indirect_buffer = si_aligned_buffer_create(screen, 0, diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_pm4.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_pm4.h --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_pm4.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_pm4.h 2019-03-31 23:16:37.000000000 +0000 @@ -43,7 +43,7 @@ struct si_pm4_state { /* optional indirect buffer */ - struct r600_resource *indirect_buffer; + struct si_resource *indirect_buffer; /* PKT3_SET_*_REG handling */ unsigned last_opcode; @@ -56,7 +56,7 @@ /* BO's referenced by this state */ unsigned nbo; - struct r600_resource *bo[SI_PM4_MAX_BO]; + struct si_resource *bo[SI_PM4_MAX_BO]; enum radeon_bo_usage bo_usage[SI_PM4_MAX_BO]; enum radeon_bo_priority bo_priority[SI_PM4_MAX_BO]; @@ -71,7 +71,7 @@ void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val); void si_pm4_add_bo(struct si_pm4_state *state, - struct r600_resource *bo, + struct si_resource *bo, enum radeon_bo_usage usage, enum radeon_bo_priority priority); void si_pm4_upload_indirect_buffer(struct si_context *sctx, diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_query.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_query.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_query.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_query.c 2019-03-31 23:16:37.000000000 +0000 @@ -34,6 +34,8 @@ #define SI_MAX_STREAMS 4 +static const struct si_query_ops query_hw_ops; + struct si_hw_query_params { unsigned start_offset; unsigned end_offset; @@ -57,9 +59,9 @@ }; static void si_query_sw_destroy(struct si_screen *sscreen, - struct si_query *rquery) + struct si_query *squery) { - struct si_query_sw *query = (struct si_query_sw *)rquery; + struct si_query_sw *query = (struct si_query_sw *)squery; sscreen->b.fence_reference(&sscreen->b, &query->fence, NULL); FREE(query); @@ -106,9 +108,9 @@ } static bool si_query_sw_begin(struct si_context *sctx, - struct si_query *rquery) + struct si_query *squery) { - struct si_query_sw *query = (struct si_query_sw *)rquery; + struct si_query_sw *query = (struct si_query_sw *)squery; enum radeon_value_id ws_id; switch(query->b.type) { @@ -267,9 +269,9 @@ } static bool si_query_sw_end(struct si_context *sctx, - struct si_query *rquery) + struct si_query *squery) { - struct si_query_sw *query = (struct si_query_sw *)rquery; + struct si_query_sw *query = (struct si_query_sw *)squery; enum radeon_value_id ws_id; switch(query->b.type) { @@ -432,11 +434,11 @@ } static bool si_query_sw_get_result(struct si_context *sctx, - struct si_query *rquery, + struct si_query *squery, bool wait, union pipe_query_result *result) { - struct si_query_sw *query = (struct si_query_sw *)rquery; + struct si_query_sw *query = (struct si_query_sw *)squery; switch (query->b.type) { case PIPE_QUERY_TIMESTAMP_DISJOINT: @@ -447,7 +449,7 @@ return true; case PIPE_QUERY_GPU_FINISHED: { struct pipe_screen *screen = sctx->b.screen; - struct pipe_context *ctx = rquery->b.flushed ? NULL : &sctx->b; + struct pipe_context *ctx = squery->b.flushed ? NULL : &sctx->b; result->b = screen->fence_finish(screen, ctx, query->fence, wait ? PIPE_TIMEOUT_INFINITE : 0); @@ -497,7 +499,7 @@ } -static struct si_query_ops sw_query_ops = { +static const struct si_query_ops sw_query_ops = { .destroy = si_query_sw_destroy, .begin = si_query_sw_begin, .end = si_query_sw_end, @@ -519,72 +521,121 @@ return (struct pipe_query *)query; } -void si_query_hw_destroy(struct si_screen *sscreen, - struct si_query *rquery) +void si_query_buffer_destroy(struct si_screen *sscreen, struct si_query_buffer *buffer) { - struct si_query_hw *query = (struct si_query_hw *)rquery; - struct si_query_buffer *prev = query->buffer.previous; + struct si_query_buffer *prev = buffer->previous; /* Release all query buffers. */ while (prev) { struct si_query_buffer *qbuf = prev; prev = prev->previous; - r600_resource_reference(&qbuf->buf, NULL); + si_resource_reference(&qbuf->buf, NULL); FREE(qbuf); } - r600_resource_reference(&query->buffer.buf, NULL); - r600_resource_reference(&query->workaround_buf, NULL); - FREE(rquery); + si_resource_reference(&buffer->buf, NULL); } -static struct r600_resource *si_new_query_buffer(struct si_screen *sscreen, - struct si_query_hw *query) +void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer) { - unsigned buf_size = MAX2(query->result_size, - sscreen->info.min_alloc_size); + /* Discard all query buffers except for the oldest. */ + while (buffer->previous) { + struct si_query_buffer *qbuf = buffer->previous; + buffer->previous = qbuf->previous; - /* Queries are normally read by the CPU after - * being written by the gpu, hence staging is probably a good - * usage pattern. - */ - struct r600_resource *buf = r600_resource( - pipe_buffer_create(&sscreen->b, 0, - PIPE_USAGE_STAGING, buf_size)); - if (!buf) - return NULL; + si_resource_reference(&buffer->buf, NULL); + buffer->buf = qbuf->buf; /* move ownership */ + FREE(qbuf); + } + buffer->results_end = 0; - if (!query->ops->prepare_buffer(sscreen, query, buf)) { - r600_resource_reference(&buf, NULL); - return NULL; + if (!buffer->buf) + return; + + /* Discard even the oldest buffer if it can't be mapped without a stall. */ + if (si_rings_is_buffer_referenced(sctx, buffer->buf->buf, RADEON_USAGE_READWRITE) || + !sctx->ws->buffer_wait(buffer->buf->buf, 0, RADEON_USAGE_READWRITE)) { + si_resource_reference(&buffer->buf, NULL); + } else { + buffer->unprepared = true; + } +} + +bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer, + bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*), + unsigned size) +{ + bool unprepared = buffer->unprepared; + buffer->unprepared = false; + + if (!buffer->buf || buffer->results_end + size > buffer->buf->b.b.width0) { + if (buffer->buf) { + struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer); + memcpy(qbuf, buffer, sizeof(*qbuf)); + buffer->previous = qbuf; + } + buffer->results_end = 0; + + /* Queries are normally read by the CPU after + * being written by the gpu, hence staging is probably a good + * usage pattern. + */ + struct si_screen *screen = sctx->screen; + unsigned buf_size = MAX2(size, screen->info.min_alloc_size); + buffer->buf = si_resource( + pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size)); + if (unlikely(!buffer->buf)) + return false; + unprepared = true; } - return buf; + if (unprepared && prepare_buffer) { + if (unlikely(!prepare_buffer(sctx, buffer))) { + si_resource_reference(&buffer->buf, NULL); + return false; + } + } + + return true; } -static bool si_query_hw_prepare_buffer(struct si_screen *sscreen, - struct si_query_hw *query, - struct r600_resource *buffer) + +void si_query_hw_destroy(struct si_screen *sscreen, + struct si_query *squery) { - /* Callers ensure that the buffer is currently unused by the GPU. */ - uint32_t *results = sscreen->ws->buffer_map(buffer->buf, NULL, + struct si_query_hw *query = (struct si_query_hw *)squery; + + si_query_buffer_destroy(sscreen, &query->buffer); + si_resource_reference(&query->workaround_buf, NULL); + FREE(squery); +} + +static bool si_query_hw_prepare_buffer(struct si_context *sctx, + struct si_query_buffer *qbuf) +{ + static const struct si_query_hw si_query_hw_s; + struct si_query_hw *query = container_of(qbuf, &si_query_hw_s, buffer); + struct si_screen *screen = sctx->screen; + + /* The caller ensures that the buffer is currently unused by the GPU. */ + uint32_t *results = screen->ws->buffer_map(qbuf->buf->buf, NULL, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); if (!results) return false; - memset(results, 0, buffer->b.b.width0); + memset(results, 0, qbuf->buf->b.b.width0); if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER || query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { - unsigned max_rbs = sscreen->info.num_render_backends; - unsigned enabled_rb_mask = sscreen->info.enabled_rb_mask; + unsigned max_rbs = screen->info.num_render_backends; + unsigned enabled_rb_mask = screen->info.enabled_rb_mask; unsigned num_results; unsigned i, j; /* Set top bits for unused backends. */ - num_results = buffer->b.b.width0 / query->result_size; + num_results = qbuf->buf->b.b.width0 / query->result_size; for (j = 0; j < num_results; j++) { for (i = 0; i < max_rbs; i++) { if (!(enabled_rb_mask & (1<buffer.buf = si_new_query_buffer(sscreen, query); - if (!query->buffer.buf) - return false; - - return true; -} - static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, unsigned query_type, unsigned index) @@ -665,20 +698,19 @@ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: query->result_size = 16 * sscreen->info.num_render_backends; query->result_size += 16; /* for the fence + alignment */ - query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen); + query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen); break; case SI_QUERY_TIME_ELAPSED_SDMA: /* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple of 32. */ query->result_size = 64; - query->num_cs_dw_end = 0; break; case PIPE_QUERY_TIME_ELAPSED: query->result_size = 24; - query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen); + query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen); break; case PIPE_QUERY_TIMESTAMP: query->result_size = 16; - query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen); + query->b.num_cs_dw_suspend = 8 + si_cp_write_fence_dwords(sscreen); query->flags = SI_QUERY_HW_FLAG_NO_START; break; case PIPE_QUERY_PRIMITIVES_EMITTED: @@ -687,19 +719,19 @@ case PIPE_QUERY_SO_OVERFLOW_PREDICATE: /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ query->result_size = 32; - query->num_cs_dw_end = 6; + query->b.num_cs_dw_suspend = 6; query->stream = index; break; case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ query->result_size = 32 * SI_MAX_STREAMS; - query->num_cs_dw_end = 6 * SI_MAX_STREAMS; + query->b.num_cs_dw_suspend = 6 * SI_MAX_STREAMS; break; case PIPE_QUERY_PIPELINE_STATISTICS: /* 11 values on GCN. */ query->result_size = 11 * 16; query->result_size += 8; /* for the fence + alignment */ - query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen); + query->b.num_cs_dw_suspend = 6 + si_cp_write_fence_dwords(sscreen); break; default: assert(0); @@ -707,11 +739,6 @@ return NULL; } - if (!si_query_hw_init(sscreen, query)) { - FREE(query); - return NULL; - } - return (struct pipe_query *)query; } @@ -765,7 +792,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_hw *query, - struct r600_resource *buffer, + struct si_resource *buffer, uint64_t va) { struct radeon_cmdbuf *cs = sctx->gfx_cs; @@ -816,8 +843,9 @@ { uint64_t va; - if (!query->buffer.buf) - return; // previous buffer allocation failure + if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer, + query->result_size)) + return; si_update_occlusion_query_state(sctx, query->b.type, 1); si_update_prims_generated_query_state(sctx, query->b.type, 1); @@ -825,28 +853,13 @@ if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA) si_need_gfx_cs_space(sctx); - /* Get a new query buffer if needed. */ - if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { - struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer); - *qbuf = query->buffer; - query->buffer.results_end = 0; - query->buffer.previous = qbuf; - query->buffer.buf = si_new_query_buffer(sctx->screen, query); - if (!query->buffer.buf) - return; - } - - /* emit begin query */ va = query->buffer.buf->gpu_address + query->buffer.results_end; - query->ops->emit_start(sctx, query, query->buffer.buf, va); - - sctx->num_cs_dw_queries_suspend += query->num_cs_dw_end; } static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw *query, - struct r600_resource *buffer, + struct si_resource *buffer, uint64_t va) { struct radeon_cmdbuf *cs = sctx->gfx_cs; @@ -883,9 +896,8 @@ va += 8; /* fall through */ case PIPE_QUERY_TIMESTAMP: - si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, - 0, EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, query->b.type); fence_va = va + 8; @@ -910,8 +922,7 @@ if (fence_va) { si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->buffer.buf, fence_va, 0x80000000, query->b.type); @@ -923,12 +934,16 @@ { uint64_t va; - if (!query->buffer.buf) - return; // previous buffer allocation failure - /* The queries which need begin already called this in begin_query. */ - if (query->flags & SI_QUERY_HW_FLAG_NO_START) + if (query->flags & SI_QUERY_HW_FLAG_NO_START) { si_need_gfx_cs_space(sctx); + if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer, + query->result_size)) + return; + } + + if (!query->buffer.buf) + return; // previous buffer allocation failure /* emit end query */ va = query->buffer.buf->gpu_address + query->buffer.results_end; @@ -937,15 +952,12 @@ query->buffer.results_end += query->result_size; - if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) - sctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end; - si_update_occlusion_query_state(sctx, query->b.type, -1); si_update_prims_generated_query_state(sctx, query->b.type, -1); } static void emit_set_predicate(struct si_context *ctx, - struct r600_resource *buf, uint64_t va, + struct si_resource *buf, uint64_t va, uint32_t op) { struct radeon_cmdbuf *cs = ctx->gfx_cs; @@ -1061,51 +1073,24 @@ static void si_destroy_query(struct pipe_context *ctx, struct pipe_query *query) { struct si_context *sctx = (struct si_context *)ctx; - struct si_query *rquery = (struct si_query *)query; + struct si_query *squery = (struct si_query *)query; - rquery->ops->destroy(sctx->screen, rquery); + squery->ops->destroy(sctx->screen, squery); } static boolean si_begin_query(struct pipe_context *ctx, struct pipe_query *query) { struct si_context *sctx = (struct si_context *)ctx; - struct si_query *rquery = (struct si_query *)query; + struct si_query *squery = (struct si_query *)query; - return rquery->ops->begin(sctx, rquery); -} - -void si_query_hw_reset_buffers(struct si_context *sctx, - struct si_query_hw *query) -{ - struct si_query_buffer *prev = query->buffer.previous; - - /* Discard the old query buffers. */ - while (prev) { - struct si_query_buffer *qbuf = prev; - prev = prev->previous; - r600_resource_reference(&qbuf->buf, NULL); - FREE(qbuf); - } - - query->buffer.results_end = 0; - query->buffer.previous = NULL; - - /* Obtain a new buffer if the current one can't be mapped without a stall. */ - if (si_rings_is_buffer_referenced(sctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) || - !sctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) { - r600_resource_reference(&query->buffer.buf, NULL); - query->buffer.buf = si_new_query_buffer(sctx->screen, query); - } else { - if (!query->ops->prepare_buffer(sctx->screen, query, query->buffer.buf)) - r600_resource_reference(&query->buffer.buf, NULL); - } + return squery->ops->begin(sctx, squery); } bool si_query_hw_begin(struct si_context *sctx, - struct si_query *rquery) + struct si_query *squery) { - struct si_query_hw *query = (struct si_query_hw *)rquery; + struct si_query_hw *query = (struct si_query_hw *)squery; if (query->flags & SI_QUERY_HW_FLAG_NO_START) { assert(0); @@ -1113,38 +1098,41 @@ } if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES)) - si_query_hw_reset_buffers(sctx, query); + si_query_buffer_reset(sctx, &query->buffer); - r600_resource_reference(&query->workaround_buf, NULL); + si_resource_reference(&query->workaround_buf, NULL); si_query_hw_emit_start(sctx, query); if (!query->buffer.buf) return false; - LIST_ADDTAIL(&query->list, &sctx->active_queries); + LIST_ADDTAIL(&query->b.active_list, &sctx->active_queries); + sctx->num_cs_dw_queries_suspend += query->b.num_cs_dw_suspend; return true; } static bool si_end_query(struct pipe_context *ctx, struct pipe_query *query) { struct si_context *sctx = (struct si_context *)ctx; - struct si_query *rquery = (struct si_query *)query; + struct si_query *squery = (struct si_query *)query; - return rquery->ops->end(sctx, rquery); + return squery->ops->end(sctx, squery); } bool si_query_hw_end(struct si_context *sctx, - struct si_query *rquery) + struct si_query *squery) { - struct si_query_hw *query = (struct si_query_hw *)rquery; + struct si_query_hw *query = (struct si_query_hw *)squery; if (query->flags & SI_QUERY_HW_FLAG_NO_START) - si_query_hw_reset_buffers(sctx, query); + si_query_buffer_reset(sctx, &query->buffer); si_query_hw_emit_stop(sctx, query); - if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) - LIST_DELINIT(&query->list); + if (!(query->flags & SI_QUERY_HW_FLAG_NO_START)) { + LIST_DELINIT(&query->b.active_list); + sctx->num_cs_dw_queries_suspend -= query->b.num_cs_dw_suspend; + } if (!query->buffer.buf) return false; @@ -1153,7 +1141,7 @@ } static void si_get_hw_query_params(struct si_context *sctx, - struct si_query_hw *rquery, int index, + struct si_query_hw *squery, int index, struct si_hw_query_params *params) { unsigned max_rbs = sctx->screen->info.num_render_backends; @@ -1161,7 +1149,7 @@ params->pair_stride = 0; params->pair_count = 1; - switch (rquery->b.type) { + switch (squery->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: @@ -1207,7 +1195,7 @@ * fence: it is initialized as 0, and the high bit is set by * the write of the streamout stats event. */ - params->fence_offset = rquery->result_size - 4; + params->fence_offset = squery->result_size - 4; break; case PIPE_QUERY_PIPELINE_STATISTICS: { @@ -1351,14 +1339,35 @@ } } +void si_query_hw_suspend(struct si_context *sctx, struct si_query *query) +{ + si_query_hw_emit_stop(sctx, (struct si_query_hw *)query); +} + +void si_query_hw_resume(struct si_context *sctx, struct si_query *query) +{ + si_query_hw_emit_start(sctx, (struct si_query_hw *)query); +} + +static const struct si_query_ops query_hw_ops = { + .destroy = si_query_hw_destroy, + .begin = si_query_hw_begin, + .end = si_query_hw_end, + .get_result = si_query_hw_get_result, + .get_result_resource = si_query_hw_get_result_resource, + + .suspend = si_query_hw_suspend, + .resume = si_query_hw_resume, +}; + static boolean si_get_query_result(struct pipe_context *ctx, struct pipe_query *query, boolean wait, union pipe_query_result *result) { struct si_context *sctx = (struct si_context *)ctx; - struct si_query *rquery = (struct si_query *)query; + struct si_query *squery = (struct si_query *)query; - return rquery->ops->get_result(sctx, rquery, wait, result); + return squery->ops->get_result(sctx, squery, wait, result); } static void si_get_query_result_resource(struct pipe_context *ctx, @@ -1370,9 +1379,9 @@ unsigned offset) { struct si_context *sctx = (struct si_context *)ctx; - struct si_query *rquery = (struct si_query *)query; + struct si_query *squery = (struct si_query *)query; - rquery->ops->get_result_resource(sctx, rquery, wait, result_type, index, + squery->ops->get_result_resource(sctx, squery, wait, result_type, index, resource, offset); } @@ -1383,11 +1392,11 @@ } bool si_query_hw_get_result(struct si_context *sctx, - struct si_query *rquery, + struct si_query *squery, bool wait, union pipe_query_result *result) { struct si_screen *sscreen = sctx->screen; - struct si_query_hw *query = (struct si_query_hw *)rquery; + struct si_query_hw *query = (struct si_query_hw *)squery; struct si_query_buffer *qbuf; query->ops->clear_result(query, result); @@ -1398,7 +1407,7 @@ unsigned results_base = 0; void *map; - if (rquery->b.flushed) + if (squery->b.flushed) map = sctx->ws->buffer_map(qbuf->buf->buf, NULL, usage); else map = si_buffer_map_sync_with_rings(sctx, qbuf->buf, usage); @@ -1414,9 +1423,9 @@ } /* Convert the time to expected units. */ - if (rquery->type == PIPE_QUERY_TIME_ELAPSED || - rquery->type == SI_QUERY_TIME_ELAPSED_SDMA || - rquery->type == PIPE_QUERY_TIMESTAMP) { + if (squery->type == PIPE_QUERY_TIME_ELAPSED || + squery->type == SI_QUERY_TIME_ELAPSED_SDMA || + squery->type == PIPE_QUERY_TIMESTAMP) { result->u64 = (1000000 * result->u64) / sscreen->info.clock_crystal_freq; } return true; @@ -1436,14 +1445,14 @@ } static void si_query_hw_get_result_resource(struct si_context *sctx, - struct si_query *rquery, + struct si_query *squery, bool wait, enum pipe_query_value_type result_type, int index, struct pipe_resource *resource, unsigned offset) { - struct si_query_hw *query = (struct si_query_hw *)rquery; + struct si_query_hw *query = (struct si_query_hw *)squery; struct si_query_buffer *qbuf; struct si_query_buffer *qbuf_prev; struct pipe_resource *tmp_buffer = NULL; @@ -1558,7 +1567,7 @@ ssbo[2].buffer_offset = offset; ssbo[2].buffer_size = 8; - r600_resource(resource)->TC_L2_dirty = true; + si_resource(resource)->TC_L2_dirty = true; } sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo); @@ -1573,7 +1582,8 @@ va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size; va += params.fence_offset; - si_cp_wait_mem(sctx, va, 0x80000000, 0x80000000, 0); + si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x80000000, + 0x80000000, WAIT_REG_MEM_EQUAL); } sctx->b.launch_grid(&sctx->b, &grid); @@ -1590,7 +1600,7 @@ enum pipe_render_cond_flag mode) { struct si_context *sctx = (struct si_context *)ctx; - struct si_query_hw *rquery = (struct si_query_hw *)query; + struct si_query_hw *squery = (struct si_query_hw *)query; struct si_atom *atom = &sctx->atoms.s.render_cond; if (query) { @@ -1603,21 +1613,21 @@ if (((sctx->chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) || (sctx->chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) && !condition && - (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || - (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE && - (rquery->buffer.previous || - rquery->buffer.results_end > rquery->result_size)))) { + (squery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || + (squery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE && + (squery->buffer.previous || + squery->buffer.results_end > squery->result_size)))) { needs_workaround = true; } - if (needs_workaround && !rquery->workaround_buf) { + if (needs_workaround && !squery->workaround_buf) { bool old_force_off = sctx->render_cond_force_off; sctx->render_cond_force_off = true; u_suballocator_alloc( sctx->allocator_zeroed_memory, 8, 8, - &rquery->workaround_offset, - (struct pipe_resource **)&rquery->workaround_buf); + &squery->workaround_offset, + (struct pipe_resource **)&squery->workaround_buf); /* Reset to NULL to avoid a redundant SET_PREDICATION * from launching the compute grid. @@ -1626,7 +1636,7 @@ ctx->get_query_result_resource( ctx, query, true, PIPE_QUERY_TYPE_U64, 0, - &rquery->workaround_buf->b.b, rquery->workaround_offset); + &squery->workaround_buf->b.b, squery->workaround_offset); /* Settings this in the render cond atom is too late, * so set it here. */ @@ -1646,26 +1656,21 @@ void si_suspend_queries(struct si_context *sctx) { - struct si_query_hw *query; + struct si_query *query; - LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) { - si_query_hw_emit_stop(sctx, query); - } - assert(sctx->num_cs_dw_queries_suspend == 0); + LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list) + query->ops->suspend(sctx, query); } void si_resume_queries(struct si_context *sctx) { - struct si_query_hw *query; - - assert(sctx->num_cs_dw_queries_suspend == 0); + struct si_query *query; /* Check CS space here. Resuming must not be interrupted by flushes. */ si_need_gfx_cs_space(sctx); - LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) { - si_query_hw_emit_start(sctx, query); - } + LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, active_list) + query->ops->resume(sctx, query); } #define XFULL(name_, query_type_, type_, result_type_, group_id_) \ diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_query.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_query.h --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_query.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_query.h 2019-03-31 23:16:37.000000000 +0000 @@ -34,8 +34,9 @@ struct si_screen; struct si_context; struct si_query; +struct si_query_buffer; struct si_query_hw; -struct r600_resource; +struct si_resource; enum { SI_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC, @@ -133,14 +134,23 @@ int index, struct pipe_resource *resource, unsigned offset); + + void (*suspend)(struct si_context *, struct si_query *); + void (*resume)(struct si_context *, struct si_query *); }; struct si_query { struct threaded_query b; - struct si_query_ops *ops; + const struct si_query_ops *ops; - /* The type of query */ + /* The PIPE_QUERY_xxx type of query */ unsigned type; + + /* The number of dwords for suspend. */ + unsigned num_cs_dw_suspend; + + /* Linked list of queries that must be suspended at end of CS. */ + struct list_head active_list; }; enum { @@ -151,15 +161,13 @@ }; struct si_query_hw_ops { - bool (*prepare_buffer)(struct si_screen *, - struct si_query_hw *, - struct r600_resource *); + bool (*prepare_buffer)(struct si_context *, struct si_query_buffer *); void (*emit_start)(struct si_context *, struct si_query_hw *, - struct r600_resource *buffer, uint64_t va); + struct si_resource *buffer, uint64_t va); void (*emit_stop)(struct si_context *, struct si_query_hw *, - struct r600_resource *buffer, uint64_t va); + struct si_resource *buffer, uint64_t va); void (*clear_result)(struct si_query_hw *, union pipe_query_result *); void (*add_result)(struct si_screen *screen, struct si_query_hw *, void *buffer, @@ -168,15 +176,23 @@ struct si_query_buffer { /* The buffer where query results are stored. */ - struct r600_resource *buf; - /* Offset of the next free result after current query data */ - unsigned results_end; + struct si_resource *buf; /* If a query buffer is full, a new buffer is created and the old one * is put in here. When we calculate the result, we sum up the samples * from all buffers. */ struct si_query_buffer *previous; + /* Offset of the next free result after current query data */ + unsigned results_end; + bool unprepared; }; +void si_query_buffer_destroy(struct si_screen *sctx, struct si_query_buffer *buffer); +void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer); +bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer, + bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*), + unsigned size); + + struct si_query_hw { struct si_query b; struct si_query_hw_ops *ops; @@ -187,105 +203,37 @@ /* Size of the result in memory for both begin_query and end_query, * this can be one or two numbers, or it could even be a size of a structure. */ unsigned result_size; - /* The number of dwords for end_query. */ - unsigned num_cs_dw_end; - /* Linked list of queries */ - struct list_head list; /* For transform feedback: which stream the query is for */ unsigned stream; /* Workaround via compute shader */ - struct r600_resource *workaround_buf; + struct si_resource *workaround_buf; unsigned workaround_offset; }; -bool si_query_hw_init(struct si_screen *sscreen, - struct si_query_hw *query); void si_query_hw_destroy(struct si_screen *sscreen, - struct si_query *rquery); + struct si_query *squery); bool si_query_hw_begin(struct si_context *sctx, - struct si_query *rquery); + struct si_query *squery); bool si_query_hw_end(struct si_context *sctx, - struct si_query *rquery); + struct si_query *squery); bool si_query_hw_get_result(struct si_context *sctx, - struct si_query *rquery, + struct si_query *squery, bool wait, union pipe_query_result *result); +void si_query_hw_suspend(struct si_context *sctx, struct si_query *query); +void si_query_hw_resume(struct si_context *sctx, struct si_query *query); -/* Performance counters */ -enum { - /* This block is part of the shader engine */ - SI_PC_BLOCK_SE = (1 << 0), - - /* Expose per-instance groups instead of summing all instances (within - * an SE). */ - SI_PC_BLOCK_INSTANCE_GROUPS = (1 << 1), - - /* Expose per-SE groups instead of summing instances across SEs. */ - SI_PC_BLOCK_SE_GROUPS = (1 << 2), - - /* Shader block */ - SI_PC_BLOCK_SHADER = (1 << 3), - - /* Non-shader block with perfcounters windowed by shaders. */ - SI_PC_BLOCK_SHADER_WINDOWED = (1 << 4), -}; - -/* Describes a hardware block with performance counters. Multiple instances of - * each block, possibly per-SE, may exist on the chip. Depending on the block - * and on the user's configuration, we either - * (a) expose every instance as a performance counter group, - * (b) expose a single performance counter group that reports the sum over all - * instances, or - * (c) expose one performance counter group per instance, but summed over all - * shader engines. - */ -struct si_perfcounter_block { - const char *basename; - unsigned flags; - unsigned num_counters; - unsigned num_selectors; - unsigned num_instances; - - unsigned num_groups; - char *group_names; - unsigned group_name_stride; - - char *selector_names; - unsigned selector_name_stride; - - void *data; -}; +/* Performance counters */ struct si_perfcounters { unsigned num_groups; unsigned num_blocks; - struct si_perfcounter_block *blocks; + struct si_pc_block *blocks; unsigned num_stop_cs_dwords; unsigned num_instance_cs_dwords; - unsigned num_shader_types; - const char * const *shader_type_suffixes; - const unsigned *shader_type_bits; - - void (*emit_instance)(struct si_context *, - int se, int instance); - void (*emit_shaders)(struct si_context *, unsigned shaders); - void (*emit_select)(struct si_context *, - struct si_perfcounter_block *, - unsigned count, unsigned *selectors); - void (*emit_start)(struct si_context *, - struct r600_resource *buffer, uint64_t va); - void (*emit_stop)(struct si_context *, - struct r600_resource *buffer, uint64_t va); - void (*emit_read)(struct si_context *, - struct si_perfcounter_block *, - unsigned count, unsigned *selectors, - struct r600_resource *buffer, uint64_t va); - - void (*cleanup)(struct si_screen *); - bool separate_se; bool separate_instance; }; @@ -301,16 +249,6 @@ unsigned index, struct pipe_driver_query_group_info *info); -bool si_perfcounters_init(struct si_perfcounters *, unsigned num_blocks); -void si_perfcounters_add_block(struct si_screen *, - struct si_perfcounters *, - const char *name, unsigned flags, - unsigned counters, unsigned selectors, - unsigned instances, void *data); -void si_perfcounters_do_destroy(struct si_perfcounters *); -void si_query_hw_reset_buffers(struct si_context *sctx, - struct si_query_hw *query); - struct si_qbo_state { void *saved_compute; struct pipe_constant_buffer saved_const0; diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader.c 2019-03-31 23:16:37.000000000 +0000 @@ -86,6 +86,8 @@ union si_shader_part_key *key); static void si_build_ps_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key); +static void si_fix_resource_usage(struct si_screen *sscreen, + struct si_shader *shader); /* Ideally pass the sample mask input to the PS epilog as v14, which * is its usual location, so that the shader doesn't have to add v_mov. @@ -2318,18 +2320,9 @@ ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, ""); LLVMValueRef desc0, desc1; - if (HAVE_32BIT_POINTERS) { - desc0 = ptr; - desc1 = LLVMConstInt(ctx->i32, - S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); - } else { - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, ""); - desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, ""); - desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, ""); - /* Mask out all bits except BASE_ADDRESS_HI. */ - desc1 = LLVMBuildAnd(ctx->ac.builder, desc1, - LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), ""); - } + desc0 = ptr; + desc1 = LLVMConstInt(ctx->i32, + S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); LLVMValueRef desc_elems[] = { desc0, @@ -3271,21 +3264,9 @@ unsigned param, unsigned return_index) { LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef ptr, lo, hi; - - if (HAVE_32BIT_POINTERS) { - ptr = LLVMGetParam(ctx->main_fn, param); - ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, ""); - return LLVMBuildInsertValue(builder, ret, ptr, return_index, ""); - } - - ptr = LLVMGetParam(ctx->main_fn, param); - ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, ""); - ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, ""); - lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, ""); - hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, ""); - ret = LLVMBuildInsertValue(builder, ret, lo, return_index, ""); - return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, ""); + LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, param); + ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, ""); + return LLVMBuildInsertValue(builder, ret, ptr, return_index, ""); } /* This only writes the tessellation factor levels. */ @@ -3386,8 +3367,7 @@ LLVMValueRef ret = ctx->return_value; ret = si_insert_input_ptr(ctx, ret, 0, 0); - if (HAVE_32BIT_POINTERS) - ret = si_insert_input_ptr(ctx, ret, 1, 1); + ret = si_insert_input_ptr(ctx, ret, 1, 1); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4); @@ -3402,11 +3382,6 @@ ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS); -#if !HAVE_32BIT_POINTERS - ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4, - 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES); -#endif - ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets, @@ -3430,8 +3405,7 @@ LLVMValueRef ret = ctx->return_value; ret = si_insert_input_ptr(ctx, ret, 0, 0); - if (HAVE_32BIT_POINTERS) - ret = si_insert_input_ptr(ctx, ret, 1, 1); + ret = si_insert_input_ptr(ctx, ret, 1, 1); ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); @@ -3442,11 +3416,6 @@ ctx->param_bindless_samplers_and_images, 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); -#if !HAVE_32BIT_POINTERS - ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4, - 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES); -#endif - unsigned vgpr; if (ctx->type == PIPE_SHADER_VERTEX) vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR; @@ -4592,6 +4561,30 @@ } } +static void declare_vs_blit_inputs(struct si_shader_context *ctx, + struct si_function_info *fninfo, + unsigned vs_blit_property) +{ + ctx->param_vs_blit_inputs = fninfo->num_params; + add_arg(fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */ + add_arg(fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */ + add_arg(fninfo, ARG_SGPR, ctx->f32); /* depth */ + + if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) { + add_arg(fninfo, ARG_SGPR, ctx->f32); /* color0 */ + add_arg(fninfo, ARG_SGPR, ctx->f32); /* color1 */ + add_arg(fninfo, ARG_SGPR, ctx->f32); /* color2 */ + add_arg(fninfo, ARG_SGPR, ctx->f32); /* color3 */ + } else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) { + add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */ + add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */ + add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */ + add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */ + add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */ + add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */ + } +} + static void declare_tes_input_vgprs(struct si_shader_context *ctx, struct si_function_info *fninfo) { @@ -4636,24 +4629,7 @@ declare_global_desc_pointers(ctx, &fninfo); if (vs_blit_property) { - ctx->param_vs_blit_inputs = fninfo.num_params; - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */ - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* depth */ - - if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) { - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color0 */ - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color1 */ - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color2 */ - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color3 */ - } else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) { - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */ - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */ - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */ - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */ - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */ - add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */ - } + declare_vs_blit_inputs(ctx, &fninfo, vs_blit_property); /* VGPRs */ declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs); @@ -4710,13 +4686,8 @@ case SI_SHADER_MERGED_VERTEX_TESSCTRL: /* Merged stages have 8 system SGPRs at the beginning. */ /* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */ - if (HAVE_32BIT_POINTERS) { - declare_per_stage_desc_pointers(ctx, &fninfo, - ctx->type == PIPE_SHADER_TESS_CTRL); - } else { - declare_const_and_shader_buffers(ctx, &fninfo, - ctx->type == PIPE_SHADER_TESS_CTRL); - } + declare_per_stage_desc_pointers(ctx, &fninfo, + ctx->type == PIPE_SHADER_TESS_CTRL); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4729,15 +4700,9 @@ ctx->type == PIPE_SHADER_VERTEX); declare_vs_specific_input_sgprs(ctx, &fninfo); - if (!HAVE_32BIT_POINTERS) { - declare_samplers_and_images(ctx, &fninfo, - ctx->type == PIPE_SHADER_TESS_CTRL); - } ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); - if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, ac_array_in_const32_addr_space(ctx->v4i32)); @@ -4771,13 +4736,8 @@ case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY: /* Merged stages have 8 system SGPRs at the beginning. */ /* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */ - if (HAVE_32BIT_POINTERS) { - declare_per_stage_desc_pointers(ctx, &fninfo, - ctx->type == PIPE_SHADER_GEOMETRY); - } else { - declare_const_and_shader_buffers(ctx, &fninfo, - ctx->type == PIPE_SHADER_GEOMETRY); - } + declare_per_stage_desc_pointers(ctx, &fninfo, + ctx->type == PIPE_SHADER_GEOMETRY); ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4796,14 +4756,8 @@ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* Declare as many input SGPRs as the VS has. */ - if (!HAVE_32BIT_POINTERS) - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ } - if (!HAVE_32BIT_POINTERS) { - declare_samplers_and_images(ctx, &fninfo, - ctx->type == PIPE_SHADER_GEOMETRY); - } if (ctx->type == PIPE_SHADER_VERTEX) { ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, ac_array_in_const32_addr_space(ctx->v4i32)); @@ -5288,7 +5242,7 @@ !mainb->rodata_size); assert(!epilog || !epilog->rodata_size); - r600_resource_reference(&shader->bo, NULL); + si_resource_reference(&shader->bo, NULL); shader->bo = si_aligned_buffer_create(&sscreen->b, sscreen->cpdma_prefetch_writes_memory ? 0 : SI_RESOURCE_FLAG_READ_ONLY, @@ -5301,7 +5255,8 @@ /* Upload. */ ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL, PIPE_TRANSFER_READ_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED); + PIPE_TRANSFER_UNSYNCHRONIZED | + RADEON_TRANSFER_TEMPORARY); /* Don't use util_memcpy_cpu_to_le32. LLVM binaries are * endian-independent. */ @@ -5836,6 +5791,8 @@ if (r != 0) { FREE(shader); shader = NULL; + } else { + si_fix_resource_usage(sscreen, shader); } return shader; } @@ -7164,20 +7121,9 @@ LLVMValueRef ptr[2], list; bool merged_shader = is_merged_shader(ctx); - if (HAVE_32BIT_POINTERS) { - ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); - list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], - ac_array_in_const32_addr_space(ctx->v4i32), ""); - return list; - } - - /* Get the pointer to rw buffers. */ ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); - ptr[1] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS + 1); - list = ac_build_gather_values(&ctx->ac, ptr, 2); - list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, ""); - list = LLVMBuildIntToPtr(ctx->ac.builder, list, - ac_array_in_const_addr_space(ctx->v4i32), ""); + list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], + ac_array_in_const32_addr_space(ctx->v4i32), ""); return list; } @@ -7405,8 +7351,6 @@ add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); - if (!HAVE_32BIT_POINTERS) - add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -8174,9 +8118,9 @@ void si_shader_destroy(struct si_shader *shader) { if (shader->scratch_bo) - r600_resource_reference(&shader->scratch_bo, NULL); + si_resource_reference(&shader->scratch_bo, NULL); - r600_resource_reference(&shader->bo, NULL); + si_resource_reference(&shader->bo, NULL); if (!shader->is_binary_shared) ac_shader_binary_clean(&shader->binary); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader.h --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader.h 2019-03-31 23:16:37.000000000 +0000 @@ -158,21 +158,9 @@ /* SGPR user data indices */ enum { SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */ -#if !HAVE_32BIT_POINTERS - SI_SGPR_RW_BUFFERS_HI, -#endif SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, -#if !HAVE_32BIT_POINTERS - SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI, -#endif SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */ -#if !HAVE_32BIT_POINTERS - SI_SGPR_CONST_AND_SHADER_BUFFERS_HI, -#endif SI_SGPR_SAMPLERS_AND_IMAGES, -#if !HAVE_32BIT_POINTERS - SI_SGPR_SAMPLERS_AND_IMAGES_HI, -#endif SI_NUM_RESOURCE_SGPRS, /* API VS, TES without GS, GS copy shader */ @@ -200,35 +188,20 @@ GFX6_TCS_NUM_USER_SGPR, /* GFX9: Merged shaders. */ -#if HAVE_32BIT_POINTERS /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */ /* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */ GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR, -#else - /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI (SGPR[0:1]). */ - GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR, - GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI, - GFX9_MERGED_NUM_USER_SGPR, -#endif /* GFX9: Merged LS-HS (VS-TCS) only. */ GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR, GFX9_SGPR_TCS_OUT_OFFSETS, GFX9_SGPR_TCS_OUT_LAYOUT, -#if !HAVE_32BIT_POINTERS - GFX9_SGPR_align_for_vb_pointer, -#endif GFX9_TCS_NUM_USER_SGPR, /* GS limits */ GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS, -#if HAVE_32BIT_POINTERS GFX9_VSGS_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR, GFX9_TESGS_NUM_USER_SGPR = SI_TES_NUM_USER_SGPR, -#else - GFX9_VSGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR, - GFX9_TESGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR, -#endif SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS, /* PS only */ @@ -627,8 +600,8 @@ struct si_shader_part *epilog; struct si_pm4_state *pm4; - struct r600_resource *bo; - struct r600_resource *scratch_bo; + struct si_resource *bo; + struct si_resource *scratch_bo; struct si_shader_key key; struct util_queue_fence ready; bool compilation_failed; @@ -735,7 +708,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct tgsi_shader_info *info); void si_nir_scan_tess_ctrl(const struct nir_shader *nir, - const struct tgsi_shader_info *info, struct tgsi_tessctrl_info *out); void si_lower_nir(struct si_shader_selector *sel); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c 2019-03-31 23:16:37.000000000 +0000 @@ -439,3 +439,80 @@ return sctx->b.create_compute_state(&sctx->b, &state); } + +/* Create a compute shader implementing copy_image. + * Luckily, this works with all texture targets except 1D_ARRAY. + */ +void *si_create_copy_image_compute_shader(struct pipe_context *ctx) +{ + static const char text[] = + "COMP\n" + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" + "DCL SV[0], THREAD_ID\n" + "DCL SV[1], BLOCK_ID\n" + "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" + "DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" + "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw + "DCL TEMP[0..4], LOCAL\n" + "IMM[0] UINT32 {8, 1, 0, 0}\n" + "MOV TEMP[0].xyz, CONST[0][0].xyzw\n" + "UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n" + "UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n" + "LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" + "MOV TEMP[4].xyz, CONST[0][1].xyzw\n" + "UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[4].xyzx\n" + "STORE IMAGE[1], TEMP[2].xyzz, TEMP[3], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" + "END\n"; + + struct tgsi_token tokens[1024]; + struct pipe_compute_state state = {0}; + + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { + assert(false); + return NULL; + } + + state.ir_type = PIPE_SHADER_IR_TGSI; + state.prog = tokens; + + return ctx->create_compute_state(ctx, &state); +} + +void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx) +{ + static const char text[] = + "COMP\n" + "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n" + "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" + "DCL SV[0], THREAD_ID\n" + "DCL SV[1], BLOCK_ID\n" + "DCL IMAGE[0], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" + "DCL IMAGE[1], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" + "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw + "DCL TEMP[0..4], LOCAL\n" + "IMM[0] UINT32 {64, 1, 0, 0}\n" + "MOV TEMP[0].xy, CONST[0][0].xzzw\n" + "UMAD TEMP[1].xy, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n" + "UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[0].xyzx\n" + "LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" + "MOV TEMP[4].xy, CONST[0][1].xzzw\n" + "UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[4].xyzx\n" + "STORE IMAGE[1], TEMP[2].xyzz, TEMP[3], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" + "END\n"; + + struct tgsi_token tokens[1024]; + struct pipe_compute_state state = {0}; + + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { + assert(false); + return NULL; + } + + state.ir_type = PIPE_SHADER_IR_TGSI; + state.prog = tokens; + + return ctx->create_compute_state(ctx, &state); +} diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_nir.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_nir.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_nir.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_nir.c 2019-03-31 23:16:37.000000000 +0000 @@ -143,16 +143,39 @@ case nir_intrinsic_load_tess_level_outer: info->reads_tess_factors = true; break; - case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_load: { + nir_variable *var = intrinsic_get_var(intr); + if (var->data.bindless) { + info->uses_bindless_images = true; + + if (glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_BUF) + info->uses_bindless_buffer_load = true; + else + info->uses_bindless_image_load = true; + } + break; + } case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: { nir_variable *var = intrinsic_get_var(intr); if (var->data.bindless) info->uses_bindless_images = true; + break; + } + case nir_intrinsic_image_deref_store: { + const nir_deref_instr *image_deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr); + nir_variable *var = intrinsic_get_var(intr); + if (var->data.bindless) { + info->uses_bindless_images = true; + if (glsl_get_sampler_dim(image_deref->type) == GLSL_SAMPLER_DIM_BUF) + info->uses_bindless_buffer_store = true; + else + info->uses_bindless_image_store = true; + } + info->writes_memory = true; break; } - case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: case nir_intrinsic_image_deref_atomic_min: case nir_intrinsic_image_deref_atomic_max: @@ -162,10 +185,16 @@ case nir_intrinsic_image_deref_atomic_exchange: case nir_intrinsic_image_deref_atomic_comp_swap: { nir_variable *var = intrinsic_get_var(intr); - if (var->data.bindless) + if (var->data.bindless) { info->uses_bindless_images = true; - /* fall-through */ + if (glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_BUF) + info->uses_bindless_buffer_atomic = true; + else + info->uses_bindless_image_atomic = true; + } + info->writes_memory = true; + break; } case nir_intrinsic_store_ssbo: case nir_intrinsic_ssbo_atomic_add: @@ -250,7 +279,6 @@ } void si_nir_scan_tess_ctrl(const struct nir_shader *nir, - const struct tgsi_shader_info *info, struct tgsi_tessctrl_info *out) { memset(out, 0, sizeof(*out)); @@ -258,14 +286,8 @@ if (nir->info.stage != MESA_SHADER_TESS_CTRL) return; - /* Initial value = true. Here the pass will accumulate results from - * multiple segments surrounded by barriers. If tess factors aren't - * written at all, it's a shader bug and we don't care if this will be - * true. - */ - out->tessfactors_are_def_in_all_invocs = true; - - /* TODO: Implement scanning of tess factors, see tgsi backend. */ + out->tessfactors_are_def_in_all_invocs = + ac_are_tessfactors_def_in_all_invocs(nir); } void si_nir_scan_shader(const struct nir_shader *nir, @@ -340,7 +362,7 @@ } } - if (nir->info.stage == MESA_SHADER_COMPUTE) { + if (gl_shader_stage_is_compute(nir->info.stage)) { info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] = nir->info.cs.local_size[0]; info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] = nir->info.cs.local_size[1]; info->properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH] = nir->info.cs.local_size[2]; @@ -654,7 +676,8 @@ * so we don't need to worry about the ordering. */ if (variable->interface_type != NULL) { - if (variable->data.mode == nir_var_uniform) { + if (variable->data.mode == nir_var_uniform || + variable->data.mode == nir_var_mem_ubo) { unsigned block_count; if (base_type != GLSL_TYPE_INTERFACE) { @@ -678,7 +701,7 @@ _mesa_set_add(ubo_set, variable->interface_type); } - if (variable->data.mode == nir_var_shader_storage) { + if (variable->data.mode == nir_var_mem_ssbo) { /* TODO: make this more accurate */ info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); @@ -795,8 +818,6 @@ ac_lower_indirect_derefs(sel->nir, sel->screen->info.chip_class); - NIR_PASS_V(sel->nir, nir_lower_load_const_to_scalar); - bool progress; do { progress = false; @@ -813,7 +834,7 @@ NIR_PASS(progress, sel->nir, nir_opt_if); NIR_PASS(progress, sel->nir, nir_opt_dead_cf); NIR_PASS(progress, sel->nir, nir_opt_cse); - NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8); + NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true); /* Needed for algebraic lowering */ NIR_PASS(progress, sel->nir, nir_opt_algebraic); @@ -825,6 +846,8 @@ NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0); } } while (progress); + + NIR_PASS_V(sel->nir, nir_lower_bool_to_int32); } static void declare_nir_input_vs(struct si_shader_context *ctx, @@ -916,6 +939,12 @@ /* dynamic_index is the bindless handle */ if (image) { + /* For simplicity, bindless image descriptors use fixed + * 16-dword slots for now. + */ + dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, + LLVMConstInt(ctx->i32, 2, 0), ""); + return si_load_image_desc(ctx, list, dynamic_index, desc_type, dcc_off, true); } @@ -1028,7 +1057,7 @@ ctx->num_images = util_last_bit(info->images_declared); if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) { - assert(nir->info.stage == MESA_SHADER_COMPUTE); + assert(gl_shader_stage_is_compute(nir->info.stage)); si_declare_compute_memory(ctx); } ac_nir_translate(&ctx->ac, &ctx->abi, nir); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 2019-03-31 23:16:37.000000000 +0000 @@ -496,36 +496,23 @@ { struct si_shader_context *ctx = si_shader_context(bld_base); - if (HAVE_LLVM < 0x0700) { - LLVMValueRef bfe_sm5 = - ac_build_bfe(&ctx->ac, emit_data->args[0], - emit_data->args[1], emit_data->args[2], - emit_data->info->opcode == TGSI_OPCODE_IBFE); + /* FIXME: LLVM 7 returns incorrect result when count is 0. + * https://bugs.freedesktop.org/show_bug.cgi?id=107276 + */ + LLVMValueRef zero = ctx->i32_0; + LLVMValueRef bfe_sm5 = + ac_build_bfe(&ctx->ac, emit_data->args[0], + emit_data->args[1], emit_data->args[2], + emit_data->info->opcode == TGSI_OPCODE_IBFE); - /* Correct for GLSL semantics. */ - LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], - LLVMConstInt(ctx->i32, 32, 0), ""); - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); - } else { - /* FIXME: LLVM 7 returns incorrect result when count is 0. - * https://bugs.freedesktop.org/show_bug.cgi?id=107276 - */ - LLVMValueRef zero = ctx->i32_0; - LLVMValueRef bfe_sm5 = - ac_build_bfe(&ctx->ac, emit_data->args[0], - emit_data->args[1], emit_data->args[2], - emit_data->info->opcode == TGSI_OPCODE_IBFE); - - /* Correct for GLSL semantics. */ - LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], - LLVMConstInt(ctx->i32, 32, 0), ""); - LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2], - zero, ""); - bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, ""); - } + /* Correct for GLSL semantics. */ + LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], + LLVMConstInt(ctx->i32, 32, 0), ""); + LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2], + zero, ""); + bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); + emit_data->output[emit_data->chan] = + LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, ""); } /* this is ffs in C */ diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 2019-03-31 23:16:37.000000000 +0000 @@ -396,20 +396,53 @@ * For LOAD, set this to (store | atomic) slot usage in the shader. * For STORE, set this to (load | atomic) slot usage in the shader. * \param images_reverse_access_mask Same as above, but for images. + * \param bindless_buffer_reverse_access_mask Same as above, but for bindless image buffers. + * \param bindless_image_reverse_access_mask Same as above, but for bindless images. */ static bool is_oneway_access_only(const struct tgsi_full_instruction *inst, const struct tgsi_shader_info *info, unsigned shader_buffers_reverse_access_mask, - unsigned images_reverse_access_mask) + unsigned images_reverse_access_mask, + bool bindless_buffer_reverse_access_mask, + bool bindless_image_reverse_access_mask) { + enum tgsi_file_type resource_file; + unsigned resource_index; + bool resource_indirect; + + if (inst->Instruction.Opcode == TGSI_OPCODE_STORE) { + resource_file = inst->Dst[0].Register.File; + resource_index = inst->Dst[0].Register.Index; + resource_indirect = inst->Dst[0].Register.Indirect; + } else { + resource_file = inst->Src[0].Register.File; + resource_index = inst->Src[0].Register.Index; + resource_indirect = inst->Src[0].Register.Indirect; + } + + assert(resource_file == TGSI_FILE_BUFFER || + resource_file == TGSI_FILE_IMAGE || + /* bindless image */ + resource_file == TGSI_FILE_INPUT || + resource_file == TGSI_FILE_OUTPUT || + resource_file == TGSI_FILE_CONSTANT || + resource_file == TGSI_FILE_TEMPORARY || + resource_file == TGSI_FILE_IMMEDIATE); + + assert(resource_file != TGSI_FILE_BUFFER || + inst->Memory.Texture == TGSI_TEXTURE_BUFFER); + + bool bindless = resource_file != TGSI_FILE_BUFFER && + resource_file != TGSI_FILE_IMAGE; + /* RESTRICT means NOALIAS. * If there are no writes, we can assume the accessed memory is read-only. * If there are no reads, we can assume the accessed memory is write-only. */ - if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT) { + if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT && !bindless) { unsigned reverse_access_mask; - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { + if (resource_file == TGSI_FILE_BUFFER) { reverse_access_mask = shader_buffers_reverse_access_mask; } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { reverse_access_mask = info->images_buffers & @@ -419,12 +452,12 @@ images_reverse_access_mask; } - if (inst->Src[0].Register.Indirect) { + if (resource_indirect) { if (!reverse_access_mask) return true; } else { if (!(reverse_access_mask & - (1u << inst->Src[0].Register.Index))) + (1u << resource_index))) return true; } } @@ -437,15 +470,15 @@ * Same for the case when there are no writes/reads for non-buffer * images. */ - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || - (inst->Memory.Texture == TGSI_TEXTURE_BUFFER && - (inst->Src[0].Register.File == TGSI_FILE_IMAGE || - tgsi_is_bindless_image_file(inst->Src[0].Register.File)))) { + if (resource_file == TGSI_FILE_BUFFER || + inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { if (!shader_buffers_reverse_access_mask && - !(info->images_buffers & images_reverse_access_mask)) + !(info->images_buffers & images_reverse_access_mask) && + !bindless_buffer_reverse_access_mask) return true; } else { - if (!(~info->images_buffers & images_reverse_access_mask)) + if (!(~info->images_buffers & images_reverse_access_mask) && + !bindless_image_reverse_access_mask) return true; } return false; @@ -474,8 +507,7 @@ bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF; args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo); voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0)); - } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || - tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { + } else { unsigned target = inst->Memory.Texture; image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &args.resource); @@ -499,7 +531,11 @@ info->shader_buffers_store | info->shader_buffers_atomic, info->images_store | - info->images_atomic); + info->images_atomic, + info->uses_bindless_buffer_store | + info->uses_bindless_buffer_atomic, + info->uses_bindless_image_store | + info->uses_bindless_image_atomic); args.cache_policy = get_cache_policy(ctx, inst, false, false, false); if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { @@ -650,42 +686,42 @@ struct tgsi_full_src_register resource_reg = tgsi_full_src_register_from_dst(&inst->Dst[0]); unsigned target = inst->Memory.Texture; + + if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) { + store_emit_memory(ctx, emit_data); + return; + } + bool writeonly_memory = is_oneway_access_only(inst, info, info->shader_buffers_load | info->shader_buffers_atomic, info->images_load | - info->images_atomic); - bool is_image = inst->Dst[0].Register.File == TGSI_FILE_IMAGE || - tgsi_is_bindless_image_file(inst->Dst[0].Register.File); - LLVMValueRef chans[4], value; + info->images_atomic, + info->uses_bindless_buffer_load | + info->uses_bindless_buffer_atomic, + info->uses_bindless_image_load | + info->uses_bindless_image_atomic); + LLVMValueRef chans[4]; LLVMValueRef vindex = ctx->i32_0; LLVMValueRef voffset = ctx->i32_0; struct ac_image_args args = {}; - if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) { - store_emit_memory(ctx, emit_data); - return; - } - for (unsigned chan = 0; chan < 4; ++chan) chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); - value = ac_build_gather_values(&ctx->ac, chans, 4); - if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, false); voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 0, 0)); - } else if (is_image) { + } else { image_fetch_rsrc(bld_base, &resource_reg, true, target, &args.resource); image_fetch_coords(bld_base, inst, 0, args.resource, args.coords); vindex = args.coords[0]; /* for buffers only */ - } else { - unreachable("unexpected register file"); } if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) ac_build_waitcnt(&ctx->ac, VM_CNT); + bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER; args.cache_policy = get_cache_policy(ctx, inst, false, /* atomic */ is_image, /* may_store_unaligned */ @@ -693,27 +729,46 @@ if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask, - value, voffset, args.cache_policy, writeonly_memory); + ac_build_gather_values(&ctx->ac, chans, 4), + voffset, args.cache_policy, writeonly_memory); return; } if (target == TGSI_TEXTURE_BUFFER) { - LLVMValueRef buf_args[] = { - value, + unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask); + num_channels = util_next_power_of_two(num_channels); + + LLVMValueRef buf_args[6] = { + ac_build_gather_values(&ctx->ac, chans, 4), args.resource, vindex, ctx->i32_0, /* voffset */ - LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0), - LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0), }; + if (HAVE_LLVM >= 0x0800) { + buf_args[4] = ctx->i32_0; /* soffset */ + buf_args[5] = LLVMConstInt(ctx->i1, args.cache_policy, 0); + } else { + buf_args[4] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0); + buf_args[5] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0); + } + + const char *types[] = { "f32", "v2f32", "v4f32" }; + char name[128]; + + snprintf(name, sizeof(name), "%s.%s", + HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.struct.buffer.store.format" : + "llvm.amdgcn.buffer.store.format", + types[CLAMP(num_channels, 1, 3) - 1]); + emit_data->output[emit_data->chan] = ac_build_intrinsic( - &ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", + &ctx->ac, + name, ctx->voidt, buf_args, 6, ac_get_store_intr_attribs(writeonly_memory)); } else { args.opcode = ac_image_store; - args.data[0] = value; + args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4); args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); args.attributes = ac_get_store_intr_attribs(writeonly_memory); args.dmask = 0xf; @@ -822,19 +877,45 @@ if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false); voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0)); - } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || - tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { + } else { image_fetch_rsrc(bld_base, &inst->Src[0], true, inst->Memory.Texture, &args.resource); image_fetch_coords(bld_base, inst, 1, args.resource, args.coords); vindex = args.coords[0]; /* for buffers only */ } - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || + if (HAVE_LLVM >= 0x0800 && + inst->Src[0].Register.File != TGSI_FILE_BUFFER && inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { LLVMValueRef buf_args[7]; unsigned num_args = 0; + buf_args[num_args++] = args.data[0]; + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) + buf_args[num_args++] = args.data[1]; + + buf_args[num_args++] = args.resource; + buf_args[num_args++] = vindex; + buf_args[num_args++] = voffset; + buf_args[num_args++] = ctx->i32_0; /* soffset */ + buf_args[num_args++] = LLVMConstInt(ctx->i32, args.cache_policy & ac_slc, 0); + + char intrinsic_name[64]; + snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.struct.buffer.atomic.%s", action->intr_name); + emit_data->output[emit_data->chan] = + ac_to_float(&ctx->ac, + ac_build_intrinsic(&ctx->ac, intrinsic_name, + ctx->i32, buf_args, num_args, 0)); + return; + } + + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || + (HAVE_LLVM < 0x0800 && + inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) { + LLVMValueRef buf_args[7]; + unsigned num_args = 0; + buf_args[num_args++] = args.data[0]; if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) buf_args[num_args++] = args.data[1]; diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -121,7 +121,7 @@ S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) | - S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->family == CHIP_RAVEN2)); + S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->screen->has_dcc_constant_encode)); } /* RB+ register settings. */ @@ -2151,7 +2151,7 @@ unsigned retval = 0; if (target >= PIPE_MAX_TEXTURE_TYPES) { - PRINT_ERR("r600: unsupported texture type %d\n", target); + PRINT_ERR("radeonsi: unsupported texture type %d\n", target); return false; } @@ -3570,7 +3570,7 @@ * @param state 256-bit descriptor; only the high 128 bits are filled in */ void -si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, +si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, enum pipe_format format, unsigned offset, unsigned size, uint32_t *state) @@ -3613,14 +3613,11 @@ * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. */ - if (screen->info.chip_class >= GFX9) - /* When vindex == 0, LLVM sets IDXEN = 0, thus changing units + if (screen->info.chip_class >= GFX9 && HAVE_LLVM < 0x0800) + /* When vindex == 0, LLVM < 8.0 sets IDXEN = 0, thus changing units * from STRIDE to bytes. This works around it by setting * NUM_RECORDS to at least the size of one element, so that * the first element is readable when IDXEN == 0. - * - * TODO: Fix this in LLVM, but do we need a new intrinsic where - * IDXEN is enforced? */ num_records = num_records ? MAX2(num_records, stride) : 0; else if (screen->info.chip_class == VI) @@ -4064,7 +4061,7 @@ /* Buffer resource. */ if (texture->target == PIPE_BUFFER) { si_make_buffer_descriptor(sctx->screen, - r600_resource(texture), + si_resource(texture), state->format, state->u.buf.offset, state->u.buf.size, @@ -4584,7 +4581,7 @@ unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched); v->instance_divisor_factor_buffer = - (struct r600_resource*) + (struct si_resource*) pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, num_divisors * sizeof(divisor_factors[0])); if (!v->instance_divisor_factor_buffer) { @@ -4633,7 +4630,7 @@ if (sctx->vertex_elements == state) sctx->vertex_elements = NULL; - r600_resource_reference(&v->instance_divisor_factor_buffer, NULL); + si_resource_reference(&v->instance_divisor_factor_buffer, NULL); FREE(state); } @@ -4658,7 +4655,7 @@ dsti->stride = src->stride; si_context_add_resource_size(sctx, buf); if (buf) - r600_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; + si_resource(buf)->bind_history |= PIPE_BIND_VERTEX_BUFFER; } } else { for (i = 0; i < count; i++) { @@ -4687,7 +4684,7 @@ cb.user_buffer = NULL; cb.buffer_size = sizeof(array); - si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer, + si_upload_const_buffer(sctx, (struct si_resource**)&cb.buffer, (void*)array, sizeof(array), &cb.buffer_offset); @@ -4828,8 +4825,6 @@ sctx->b.set_active_query_state = si_set_active_query_state; - sctx->b.draw_vbo = si_draw_vbo; - si_init_config(sctx); } diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_draw.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_draw.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_draw.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -221,7 +221,7 @@ assert(num_tcs_input_cp <= 32); assert(num_tcs_output_cp <= 32); - uint64_t ring_va = r600_resource(sctx->tess_rings)->gpu_address; + uint64_t ring_va = si_resource(sctx->tess_rings)->gpu_address; assert((ring_va & u_bit_consecutive(0, 19)) == 0); tcs_in_layout = S_VS_STATE_LS_OUT_PATCH_SIZE(input_patch_size / 4) | @@ -315,10 +315,12 @@ switch (info->mode) { case PIPE_PRIM_PATCHES: return info->count / info->vertices_per_patch; + case PIPE_PRIM_POLYGON: + return info->count >= 3; case SI_PRIM_RECTANGLE_LIST: return info->count / 3; default: - return u_prims_for_vertices(info->mode, info->count); + return u_decomposed_prims_for_vertices(info->mode, info->count); } } @@ -458,7 +460,7 @@ S_030960_EN_INST_OPT_ADV(sscreen->info.chip_class >= GFX9); } -void si_init_ia_multi_vgt_param_table(struct si_context *sctx) +static void si_init_ia_multi_vgt_param_table(struct si_context *sctx) { for (int prim = 0; prim <= SI_PRIM_RECTANGLE_LIST; prim++) for (int uses_instancing = 0; uses_instancing < 2; uses_instancing++) @@ -621,7 +623,9 @@ /* Draw state. */ if (ia_multi_vgt_param != sctx->last_multi_vgt_param) { if (sctx->chip_class >= GFX9) - radeon_set_uconfig_reg_idx(cs, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); + radeon_set_uconfig_reg_idx(cs, sctx->screen, + R_030960_IA_MULTI_VGT_PARAM, 4, + ia_multi_vgt_param); else if (sctx->chip_class >= CIK) radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); else @@ -631,7 +635,8 @@ } if (prim != sctx->last_prim) { if (sctx->chip_class >= CIK) - radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); + radeon_set_uconfig_reg_idx(cs, sctx->screen, + R_030908_VGT_PRIMITIVE_TYPE, 1, prim); else radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim); @@ -719,8 +724,9 @@ } if (sctx->chip_class >= GFX9) { - radeon_set_uconfig_reg_idx(cs, R_03090C_VGT_INDEX_TYPE, - 2, index_type); + radeon_set_uconfig_reg_idx(cs, sctx->screen, + R_03090C_VGT_INDEX_TYPE, 2, + index_type); } else { radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cs, index_type); @@ -731,10 +737,10 @@ index_max_size = (indexbuf->width0 - index_offset) / index_size; - index_va = r600_resource(indexbuf)->gpu_address + index_offset; + index_va = si_resource(indexbuf)->gpu_address + index_offset; radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(indexbuf), + si_resource(indexbuf), RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); } else { /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, @@ -745,7 +751,7 @@ } if (indirect) { - uint64_t indirect_va = r600_resource(indirect->buffer)->gpu_address; + uint64_t indirect_va = si_resource(indirect->buffer)->gpu_address; assert(indirect_va % 8 == 0); @@ -757,7 +763,7 @@ radeon_emit(cs, indirect_va >> 32); radeon_add_to_buffer_list(sctx, sctx->gfx_cs, - r600_resource(indirect->buffer), + si_resource(indirect->buffer), RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); unsigned di_src_sel = index_size ? V_0287F0_DI_SRC_SEL_DMA @@ -786,8 +792,8 @@ uint64_t count_va = 0; if (indirect->indirect_draw_count) { - struct r600_resource *params_buf = - r600_resource(indirect->indirect_draw_count); + struct si_resource *params_buf = + si_resource(indirect->indirect_draw_count); radeon_add_to_buffer_list( sctx, sctx->gfx_cs, params_buf, @@ -812,10 +818,15 @@ radeon_emit(cs, di_src_sel); } } else { + unsigned instance_count = info->instance_count; int base_vertex; - radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); - radeon_emit(cs, info->instance_count); + if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN || + sctx->last_instance_count != instance_count) { + radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); + radeon_emit(cs, instance_count); + sctx->last_instance_count = instance_count; + } /* Base vertex and start instance. */ base_vertex = index_size ? info->index_bias : info->start; @@ -1050,7 +1061,8 @@ EOP_DATA_SEL_VALUE_32BIT, sctx->wait_mem_scratch, va, sctx->wait_mem_number, SI_NOT_QUERY); - si_cp_wait_mem(sctx, va, sctx->wait_mem_number, 0xffffffff, 0); + si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff, + WAIT_REG_MEM_EQUAL); } /* Make sure ME is idle (it executes most packets) before continuing. @@ -1251,7 +1263,7 @@ si_emit_draw_registers(sctx, info, num_patches); } -void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) +static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct si_context *sctx = (struct si_context *)ctx; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; @@ -1406,11 +1418,11 @@ /* info->start will be added by the drawing code */ index_offset -= start_offset; } else if (sctx->chip_class <= CIK && - r600_resource(indexbuf)->TC_L2_dirty) { + si_resource(indexbuf)->TC_L2_dirty) { /* VI reads index buffers through TC L2, so it doesn't * need this. */ sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; - r600_resource(indexbuf)->TC_L2_dirty = false; + si_resource(indexbuf)->TC_L2_dirty = false; } } @@ -1422,15 +1434,15 @@ /* Indirect buffers use TC L2 on GFX9, but not older hw. */ if (sctx->chip_class <= VI) { - if (r600_resource(indirect->buffer)->TC_L2_dirty) { + if (si_resource(indirect->buffer)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; - r600_resource(indirect->buffer)->TC_L2_dirty = false; + si_resource(indirect->buffer)->TC_L2_dirty = false; } if (indirect->indirect_draw_count && - r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) { + si_resource(indirect->indirect_draw_count)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; - r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false; + si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false; } } } @@ -1531,13 +1543,14 @@ pipe_resource_reference(&indexbuf, NULL); } -void si_draw_rectangle(struct blitter_context *blitter, - void *vertex_elements_cso, - blitter_get_vs_func get_vs, - int x1, int y1, int x2, int y2, - float depth, unsigned num_instances, - enum blitter_attrib_type type, - const union blitter_attrib *attrib) +static void +si_draw_rectangle(struct blitter_context *blitter, + void *vertex_elements_cso, + blitter_get_vs_func get_vs, + int x1, int y1, int x2, int y2, + float depth, unsigned num_instances, + enum blitter_attrib_type type, + const union blitter_attrib *attrib) { struct pipe_context *pipe = util_blitter_get_pipe(blitter); struct si_context *sctx = (struct si_context*)pipe; @@ -1579,19 +1592,23 @@ void si_trace_emit(struct si_context *sctx) { struct radeon_cmdbuf *cs = sctx->gfx_cs; - uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address; uint32_t trace_id = ++sctx->current_saved_cs->trace_id; - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, trace_id); + si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, + 0, 4, V_370_MEM, V_370_ME, &trace_id); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id)); if (sctx->log) u_log_flush(sctx->log); } + +void si_init_draw_functions(struct si_context *sctx) +{ + sctx->b.draw_vbo = si_draw_vbo; + + sctx->blitter->draw_rectangle = si_draw_rectangle; + + si_init_ia_multi_vgt_param_table(sctx); +} diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state.h mesa-19.0.1/src/gallium/drivers/radeonsi/si_state.h --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state.h 2019-03-31 23:16:37.000000000 +0000 @@ -132,7 +132,7 @@ struct si_vertex_elements { - struct r600_resource *instance_divisor_factor_buffer; + struct si_resource *instance_divisor_factor_buffer; uint32_t rsrc_word3[SI_MAX_ATTRIBS]; uint16_t src_offset[SI_MAX_ATTRIBS]; uint8_t fix_fetch[SI_MAX_ATTRIBS]; @@ -384,7 +384,7 @@ uint32_t *gpu_list; /* The buffer where the descriptors have been uploaded. */ - struct r600_resource *buffer; + struct si_resource *buffer; uint64_t gpu_address; /* The maximum number of descriptors. */ @@ -465,7 +465,7 @@ void si_release_all_descriptors(struct si_context *sctx); void si_all_descriptors_begin_new_cs(struct si_context *sctx); void si_all_resident_buffers_begin_new_cs(struct si_context *sctx); -void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, +void si_upload_const_buffer(struct si_context *sctx, struct si_resource **buf, const uint8_t *ptr, unsigned size, uint32_t *const_offset); void si_update_all_texture_descriptors(struct si_context *sctx); void si_shader_change_notify(struct si_context *sctx); @@ -474,6 +474,8 @@ void si_emit_compute_shader_pointers(struct si_context *sctx); void si_set_rw_buffer(struct si_context *sctx, uint slot, const struct pipe_constant_buffer *input); +void si_set_rw_shader_buffer(struct si_context *sctx, uint slot, + const struct pipe_shader_buffer *sbuffer); void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx, uint64_t new_active_mask); void si_set_active_descriptors_for_shader(struct si_context *sctx, @@ -490,7 +492,7 @@ void si_init_state_functions(struct si_context *sctx); void si_init_screen_state_functions(struct si_screen *sscreen); void -si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, +si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, enum pipe_format format, unsigned offset, unsigned size, uint32_t *state); @@ -541,17 +543,9 @@ uint64_t *samplers_and_images); /* si_state_draw.c */ -void si_init_ia_multi_vgt_param_table(struct si_context *sctx); void si_emit_cache_flush(struct si_context *sctx); -void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo); -void si_draw_rectangle(struct blitter_context *blitter, - void *vertex_elements_cso, - blitter_get_vs_func get_vs, - int x1, int y1, int x2, int y2, - float depth, unsigned num_instances, - enum blitter_attrib_type type, - const union blitter_attrib *attrib); void si_trace_emit(struct si_context *sctx); +void si_init_draw_functions(struct si_context *sctx); /* si_state_msaa.c */ void si_init_msaa_functions(struct si_context *sctx); diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_shaders.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_shaders.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_shaders.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_shaders.c 2019-03-31 23:16:37.000000000 +0000 @@ -337,10 +337,10 @@ /* SHADER STATES */ static void si_set_tesseval_regs(struct si_screen *sscreen, - struct si_shader_selector *tes, + const struct si_shader_selector *tes, struct si_pm4_state *pm4) { - struct tgsi_shader_info *info = &tes->info; + const struct tgsi_shader_info *info = &tes->info; unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE]; unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING]; bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; @@ -464,12 +464,7 @@ static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs) { /* Add the pointer to VBO descriptors. */ - if (HAVE_32BIT_POINTERS) { - return num_always_on_user_sgprs + 1; - } else { - assert(num_always_on_user_sgprs % 2 == 0); - return num_always_on_user_sgprs + 2; - } + return num_always_on_user_sgprs + 1; } static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) @@ -2243,7 +2238,7 @@ sel->nir = state->ir.nir; si_nir_scan_shader(sel->nir, &sel->info); - si_nir_scan_tess_ctrl(sel->nir, &sel->info, &sel->tcs_info); + si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info); si_lower_nir(sel); } @@ -3094,7 +3089,7 @@ /* Update the shader state to use the new shader bo. */ si_shader_init_pm4_state(sctx->screen, shader); - r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer); + si_resource_reference(&shader->scratch_bo, sctx->scratch_buffer); si_shader_unlock(shader); return 1; @@ -3204,7 +3199,7 @@ if (scratch_needed_size > 0) { if (scratch_needed_size > current_scratch_buffer_size) { /* Create a bigger scratch buffer */ - r600_resource_reference(&sctx->scratch_buffer, NULL); + si_resource_reference(&sctx->scratch_buffer, NULL); sctx->scratch_buffer = si_aligned_buffer_create(&sctx->screen->b, @@ -3254,10 +3249,10 @@ si_init_config_add_vgt_flush(sctx); - si_pm4_add_bo(sctx->init_config, r600_resource(sctx->tess_rings), + si_pm4_add_bo(sctx->init_config, si_resource(sctx->tess_rings), RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS); - uint64_t factor_va = r600_resource(sctx->tess_rings)->gpu_address + + uint64_t factor_va = si_resource(sctx->tess_rings)->gpu_address + sctx->screen->tess_offchip_ring_size; /* Append these registers to the init config state. */ diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_streamout.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_streamout.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_streamout.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_streamout.c 2019-03-31 23:16:37.000000000 +0000 @@ -43,7 +43,7 @@ { struct si_context *sctx = (struct si_context *)ctx; struct si_streamout_target *t; - struct r600_resource *rbuffer = r600_resource(buffer); + struct si_resource *buf = si_resource(buffer); t = CALLOC_STRUCT(si_streamout_target); if (!t) { @@ -64,7 +64,7 @@ t->b.buffer_offset = buffer_offset; t->b.buffer_size = buffer_size; - util_range_add(&rbuffer->valid_buffer_range, buffer_offset, + util_range_add(&buf->valid_buffer_range, buffer_offset, buffer_offset + buffer_size); return &t->b; } @@ -74,7 +74,7 @@ { struct si_streamout_target *t = (struct si_streamout_target*)target; pipe_resource_reference(&t->b.buffer, NULL); - r600_resource_reference(&t->buf_filled_size, NULL); + si_resource_reference(&t->buf_filled_size, NULL); FREE(t); } @@ -93,10 +93,8 @@ const unsigned *offsets) { struct si_context *sctx = (struct si_context *)ctx; - struct si_buffer_resources *buffers = &sctx->rw_buffers; - struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; unsigned old_num_targets = sctx->streamout.num_targets; - unsigned i, bufidx; + unsigned i; /* We are going to unbind the buffers. Mark which caches need to be flushed. */ if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) { @@ -111,7 +109,7 @@ */ for (i = 0; i < sctx->streamout.num_targets; i++) if (sctx->streamout.targets[i]) - r600_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true; + si_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true; /* Invalidate the scalar cache in case a streamout buffer is * going to be used as a constant buffer. @@ -175,57 +173,20 @@ /* Set the shader resources.*/ for (i = 0; i < num_targets; i++) { - bufidx = SI_VS_STREAMOUT_BUF0 + i; - if (targets[i]) { - struct pipe_resource *buffer = targets[i]->buffer; - uint64_t va = r600_resource(buffer)->gpu_address; - - /* Set the descriptor. - * - * On VI, the format must be non-INVALID, otherwise - * the buffer will be considered not bound and store - * instructions will be no-ops. - */ - uint32_t *desc = descs->list + bufidx*4; - desc[0] = va; - desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); - desc[2] = 0xffffffff; - desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - - /* Set the resource. */ - pipe_resource_reference(&buffers->buffers[bufidx], - buffer); - radeon_add_to_gfx_buffer_list_check_mem(sctx, - r600_resource(buffer), - buffers->shader_usage, - RADEON_PRIO_SHADER_RW_BUFFER, - true); - r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT; - - buffers->enabled_mask |= 1u << bufidx; + struct pipe_shader_buffer sbuf; + sbuf.buffer = targets[i]->buffer; + sbuf.buffer_offset = 0; + sbuf.buffer_size = targets[i]->buffer_offset + + targets[i]->buffer_size; + si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, &sbuf); + si_resource(targets[i]->buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT; } else { - /* Clear the descriptor and unset the resource. */ - memset(descs->list + bufidx*4, 0, - sizeof(uint32_t) * 4); - pipe_resource_reference(&buffers->buffers[bufidx], - NULL); - buffers->enabled_mask &= ~(1u << bufidx); + si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL); } } - for (; i < old_num_targets; i++) { - bufidx = SI_VS_STREAMOUT_BUF0 + i; - /* Clear the descriptor and unset the resource. */ - memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4); - pipe_resource_reference(&buffers->buffers[bufidx], NULL); - buffers->enabled_mask &= ~(1u << bufidx); - } - - sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; + for (; i < old_num_targets; i++) + si_set_rw_shader_buffer(sctx, SI_VS_STREAMOUT_BUF0 + i, NULL); } static void si_flush_vgt_streamout(struct si_context *sctx) diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_viewport.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_viewport.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_state_viewport.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_state_viewport.c 2019-03-31 23:16:37.000000000 +0000 @@ -185,6 +185,16 @@ const unsigned hw_screen_offset_alignment = ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); + /* Indexed by quantization modes */ + static unsigned max_viewport_size[] = {65535, 16383, 4095}; + + /* Ensure that the whole viewport stays representable in + * absolute coordinates. + * See comment in si_set_viewport_states. + */ + assert(vp_as_scissor.maxx <= max_viewport_size[vp_as_scissor.quant_mode] && + vp_as_scissor.maxy <= max_viewport_size[vp_as_scissor.quant_mode]); + hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET); hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET); @@ -219,7 +229,6 @@ * * The viewport range is [-max_viewport_size/2, max_viewport_size/2]. */ - static unsigned max_viewport_size[] = {65535, 16383, 4095}; assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size)); max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2; left = (-max_range - vp.translate[0]) / vp.scale[0]; @@ -333,6 +342,8 @@ unsigned h = scissor->maxy - scissor->miny; unsigned max_extent = MAX2(w, h); + int max_corner = MAX2(scissor->maxx, scissor->maxy); + unsigned center_x = (scissor->maxx + scissor->minx) / 2; unsigned center_y = (scissor->maxy + scissor->miny) / 2; unsigned max_center = MAX2(center_x, center_y); @@ -358,7 +369,22 @@ if (ctx->family == CHIP_RAVEN) max_extent = 16384; /* Use QUANT_MODE == 16_8. */ - if (max_extent <= 1024) /* 4K scanline area for guardband */ + /* Another constraint is that all coordinates in the viewport + * are representable in fixed point with respect to the + * surface origin. + * + * It means that PA_SU_HARDWARE_SCREEN_OFFSET can't be given + * an offset that would make the upper corner of the viewport + * greater than the maximum representable number post + * quantization, ie 2^quant_bits. + * + * This does not matter for 14.10 and 16.8 formats since the + * offset is already limited at 8k, but it means we can't use + * 12.12 if we are drawing to some pixels outside the lower + * 4k x 4k of the render target. + */ + + if (max_extent <= 1024 && max_corner < 4096) /* 4K scanline area for guardband */ scissor->quant_mode = SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH; else if (max_extent <= 4096) /* 16K scanline area for guardband */ scissor->quant_mode = SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH; diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_test_dma_perf.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_test_dma_perf.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_test_dma_perf.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_test_dma_perf.c 2019-03-31 23:16:37.000000000 +0000 @@ -181,7 +181,8 @@ si_cp_dma_copy_buffer(sctx, dst, src, 0, 0, size, 0, SI_COHERENCY_NONE, cache_policy); } else { - si_cp_dma_clear_buffer(sctx, dst, 0, size, clear_value, + si_cp_dma_clear_buffer(sctx, sctx->gfx_cs, dst, 0, size, + clear_value, 0, SI_COHERENCY_NONE, cache_policy); } } else if (test_sdma) { diff -Nru mesa-18.3.3/src/gallium/drivers/radeonsi/si_texture.c mesa-19.0.1/src/gallium/drivers/radeonsi/si_texture.c --- mesa-18.3.3/src/gallium/drivers/radeonsi/si_texture.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/radeonsi/si_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -303,7 +303,7 @@ flags |= RADEON_SURF_SHAREABLE; if (is_imported) flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE; - if (!(ptex->flags & SI_RESOURCE_FLAG_FORCE_TILING)) + if (!(ptex->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING)) flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; r = sscreen->ws->surface_init(sscreen->ws, ptex, flags, bpe, @@ -431,7 +431,7 @@ tex->cb_color_info &= ~S_028C70_FAST_CLEAR(1); if (tex->cmask_buffer != &tex->buffer) - r600_resource_reference(&tex->cmask_buffer, NULL); + si_resource_reference(&tex->cmask_buffer, NULL); tex->cmask_buffer = NULL; @@ -482,7 +482,7 @@ * context 1 & 2 read garbage, because DCC is disabled, yet there are * compressed tiled * - * \param sctx the current context if you have one, or rscreen->aux_context + * \param sctx the current context if you have one, or sscreen->aux_context * if you don't. */ bool si_texture_disable_dcc(struct si_context *sctx, @@ -577,12 +577,12 @@ if (tex->cmask_buffer == &tex->buffer) tex->cmask_buffer = NULL; else - r600_resource_reference(&tex->cmask_buffer, NULL); + si_resource_reference(&tex->cmask_buffer, NULL); if (new_tex->cmask_buffer == &new_tex->buffer) tex->cmask_buffer = &tex->buffer; else - r600_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer); + si_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer); tex->dcc_offset = new_tex->dcc_offset; tex->cb_color_info = new_tex->cb_color_info; @@ -606,9 +606,9 @@ tex->separate_dcc_dirty = new_tex->separate_dcc_dirty; tex->dcc_gather_statistics = new_tex->dcc_gather_statistics; - r600_resource_reference(&tex->dcc_separate_buffer, + si_resource_reference(&tex->dcc_separate_buffer, new_tex->dcc_separate_buffer); - r600_resource_reference(&tex->last_dcc_separate_buffer, + si_resource_reference(&tex->last_dcc_separate_buffer, new_tex->last_dcc_separate_buffer); if (new_bind_flag == PIPE_BIND_LINEAR) { @@ -726,7 +726,7 @@ { struct si_screen *sscreen = (struct si_screen*)screen; struct si_context *sctx; - struct r600_resource *res = r600_resource(resource); + struct si_resource *res = si_resource(resource); struct si_texture *tex = (struct si_texture*)resource; struct radeon_bo_metadata metadata; bool update_metadata = false; @@ -865,16 +865,16 @@ struct pipe_resource *ptex) { struct si_texture *tex = (struct si_texture*)ptex; - struct r600_resource *resource = &tex->buffer; + struct si_resource *resource = &tex->buffer; si_texture_reference(&tex->flushed_depth_texture, NULL); if (tex->cmask_buffer != &tex->buffer) { - r600_resource_reference(&tex->cmask_buffer, NULL); + si_resource_reference(&tex->cmask_buffer, NULL); } pb_reference(&resource->buf, NULL); - r600_resource_reference(&tex->dcc_separate_buffer, NULL); - r600_resource_reference(&tex->last_dcc_separate_buffer, NULL); + si_resource_reference(&tex->dcc_separate_buffer, NULL); + si_resource_reference(&tex->last_dcc_separate_buffer, NULL); FREE(tex); } @@ -1117,7 +1117,7 @@ struct radeon_surf *surface) { struct si_texture *tex; - struct r600_resource *resource; + struct si_resource *resource; struct si_screen *sscreen = (struct si_screen*)screen; tex = CALLOC_STRUCT(si_texture); @@ -1293,7 +1293,7 @@ const struct pipe_resource *templ, bool tc_compatible_htile) { const struct util_format_description *desc = util_format_description(templ->format); - bool force_tiling = templ->flags & SI_RESOURCE_FLAG_FORCE_TILING; + bool force_tiling = templ->flags & SI_RESOURCE_FLAG_FORCE_MSAA_TILING; bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) && !(templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH); @@ -1487,7 +1487,9 @@ templ->depth0 != 1 || templ->last_level != 0) return NULL; - buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, &stride, &offset); + buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, + sscreen->info.max_alignment, + &stride, &offset); if (!buf) return NULL; @@ -1635,7 +1637,7 @@ struct si_context *sctx = (struct si_context*)ctx; struct si_texture *tex = (struct si_texture*)texture; struct si_transfer *trans; - struct r600_resource *buf; + struct si_resource *buf; unsigned offset = 0; char *map; bool use_staging_texture = false; @@ -1796,6 +1798,12 @@ buf = &tex->buffer; } + /* Always unmap texture CPU mappings on 32-bit architectures, so that + * we don't run out of the CPU address space. + */ + if (sizeof(void*) == 4) + usage |= RADEON_TRANSFER_TEMPORARY; + if (!(map = si_buffer_map_sync_with_rings(sctx, buf, usage))) goto fail_trans; @@ -1803,7 +1811,7 @@ return map + offset; fail_trans: - r600_resource_reference(&trans->staging, NULL); + si_resource_reference(&trans->staging, NULL); pipe_resource_reference(&trans->b.b.resource, NULL); FREE(trans); return NULL; @@ -1817,6 +1825,16 @@ struct pipe_resource *texture = transfer->resource; struct si_texture *tex = (struct si_texture*)texture; + /* Always unmap texture CPU mappings on 32-bit architectures, so that + * we don't run out of the CPU address space. + */ + if (sizeof(void*) == 4) { + struct si_resource *buf = + stransfer->staging ? stransfer->staging : &tex->buffer; + + sctx->ws->buffer_unmap(buf->buf); + } + if ((transfer->usage & PIPE_TRANSFER_WRITE) && stransfer->staging) { if (tex->is_depth && tex->buffer.b.b.nr_samples <= 1) { ctx->resource_copy_region(ctx, texture, transfer->level, @@ -1830,7 +1848,7 @@ if (stransfer->staging) { sctx->num_alloc_tex_transfer_bytes += stransfer->staging->buf->size; - r600_resource_reference(&stransfer->staging, NULL); + si_resource_reference(&stransfer->staging, NULL); } /* Heuristic for {upload, draw, upload, draw, ..}: @@ -2281,11 +2299,10 @@ union pipe_query_result result; /* Read the results. */ - ctx->get_query_result(ctx, sctx->dcc_stats[i].ps_stats[2], + struct pipe_query *query = sctx->dcc_stats[i].ps_stats[2]; + ctx->get_query_result(ctx, query, true, &result); - si_query_hw_reset_buffers(sctx, - (struct si_query_hw*) - sctx->dcc_stats[i].ps_stats[2]); + si_query_buffer_reset(sctx, &((struct si_query_hw*)query)->buffer); /* Compute the approximate number of fullscreen draws. */ tex->ps_draw_ratio = @@ -2338,6 +2355,7 @@ return NULL; buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, + sscreen->info.max_alignment, &stride, &offset); if (!buf) { free(memobj); diff -Nru mesa-18.3.3/src/gallium/drivers/softpipe/sp_screen.c mesa-19.0.1/src/gallium/drivers/softpipe/sp_screen.c --- mesa-18.3.3/src/gallium/drivers/softpipe/sp_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/softpipe/sp_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -265,6 +265,8 @@ return 1; case PIPE_CAP_CLEAR_TEXTURE: return 1; + case PIPE_CAP_MAX_VARYINGS: + return TGSI_EXEC_MAX_INPUT_ATTRIBS; case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: diff -Nru mesa-18.3.3/src/gallium/drivers/softpipe/sp_tile_cache.c mesa-19.0.1/src/gallium/drivers/softpipe/sp_tile_cache.c --- mesa-18.3.3/src/gallium/drivers/softpipe/sp_tile_cache.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/softpipe/sp_tile_cache.c 2019-03-31 23:16:37.000000000 +0000 @@ -373,17 +373,18 @@ if (util_format_is_pure_uint(tc->surface->format)) { pipe_put_tile_ui_format(pt, tc->transfer_map[layer], x, y, TILE_SIZE, TILE_SIZE, - pt->resource->format, + tc->surface->format, (unsigned *) tc->tile->data.colorui128); } else if (util_format_is_pure_sint(tc->surface->format)) { pipe_put_tile_i_format(pt, tc->transfer_map[layer], x, y, TILE_SIZE, TILE_SIZE, - pt->resource->format, + tc->surface->format, (int *) tc->tile->data.colori128); } else { - pipe_put_tile_rgba(pt, tc->transfer_map[layer], - x, y, TILE_SIZE, TILE_SIZE, - (float *) tc->tile->data.color); + pipe_put_tile_rgba_format(pt, tc->transfer_map[layer], + x, y, TILE_SIZE, TILE_SIZE, + tc->surface->format, + (float *) tc->tile->data.color); } } numCleared++; diff -Nru mesa-18.3.3/src/gallium/drivers/svga/svga_cmd.c mesa-19.0.1/src/gallium/drivers/svga/svga_cmd.c --- mesa-18.3.3/src/gallium/drivers/svga/svga_cmd.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/svga/svga_cmd.c 2019-03-31 23:16:37.000000000 +0000 @@ -1693,7 +1693,7 @@ return PIPE_ERROR_OUT_OF_MEMORY; swc->surface_relocation(swc, &cmd->sid, &cmd->mobid, surface, - SVGA_RELOC_READ | SVGA_RELOC_INTERNAL); + SVGA_RELOC_READ); swc->commit(swc); diff -Nru mesa-18.3.3/src/gallium/drivers/svga/svga_format.c mesa-19.0.1/src/gallium/drivers/svga/svga_format.c --- mesa-18.3.3/src/gallium/drivers/svga/svga_format.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/svga/svga_format.c 2019-03-31 23:16:37.000000000 +0000 @@ -370,6 +370,11 @@ { PIPE_FORMAT_A1B5G5R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_X1B5G5R5_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, { PIPE_FORMAT_A4B4G4R4_UNORM, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_R8_SRGB, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8L8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_G8R8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_A8B8G8R8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, + { PIPE_FORMAT_X8B8G8R8_SINT, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 }, }; diff -Nru mesa-18.3.3/src/gallium/drivers/svga/svga_screen.c mesa-19.0.1/src/gallium/drivers/svga/svga_screen.c --- mesa-18.3.3/src/gallium/drivers/svga/svga_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/svga/svga_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -350,6 +350,8 @@ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: return sws->have_sm4_1 ? 1 : 0; /* only single-channel textures */ + case PIPE_CAP_MAX_VARYINGS: + return sws->have_vgpu10 ? VGPU10_MAX_FS_INPUTS : 10; /* Unsupported features */ case PIPE_CAP_TEXTURE_MIRROR_CLAMP: diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -52,7 +52,7 @@ void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage) { - SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL), + SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t."); } diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h 2019-03-31 23:16:37.000000000 +0000 @@ -51,21 +51,21 @@ virtual LoadInst* LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, bool isVolatile, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* BasePtr, const std::initializer_list& offset, const llvm::Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual CallInst* MASKED_LOAD(Value* Ptr, @@ -74,36 +74,36 @@ Value* PassThru = nullptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERPS(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); Value* TranslateGfxAddressForRead(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine& Name = "", - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); Value* TranslateGfxAddressForWrite(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine& Name = "", - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); protected: diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder.h --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder.h 2019-03-31 23:16:37.000000000 +0000 @@ -161,7 +161,6 @@ #include "builder_math.h" #include "builder_mem.h" - protected: void SetPrivateContext(Value* pPrivateContext) { mpPrivateContext = pPrivateContext; diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -29,7 +29,6 @@ ******************************************************************************/ #include "jit_pch.hpp" #include "builder.h" -#include "common/rdtsc_buckets.h" #include diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h 2019-03-31 23:16:37.000000000 +0000 @@ -30,13 +30,13 @@ #pragma once public: -typedef enum _JIT_MEM_CLIENT +enum class JIT_MEM_CLIENT { MEM_CLIENT_INTERNAL, GFX_MEM_CLIENT_FETCH, GFX_MEM_CLIENT_SAMPLER, GFX_MEM_CLIENT_SHADER, -} JIT_MEM_CLIENT; +}; protected: virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); @@ -56,23 +56,23 @@ Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list& indexList); virtual LoadInst* - LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* - LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* Ptr, bool isVolatile, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual LoadInst* LOAD(Value* BasePtr, const std::initializer_list& offset, const llvm::Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual CallInst* MASKED_LOAD(Value* Ptr, unsigned Align, @@ -80,7 +80,7 @@ Value* PassThru = nullptr, const Twine& Name = "", Type* Ty = nullptr, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL) + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL) { return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name); } @@ -101,14 +101,14 @@ Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERPS(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); void GATHER4PS(const SWR_FORMAT_INFO& info, Value* pSrcBase, @@ -116,14 +116,14 @@ Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual Value* GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); void GATHER4DD(const SWR_FORMAT_INFO& info, Value* pSrcBase, @@ -131,7 +131,7 @@ Value* mask, Value* vGatherComponents[], bool bPackedOutput, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1); @@ -141,7 +141,7 @@ Value* vSrc, Value* vOffsets, Value* vMask, - JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL); + JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL); void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info, Value* vGatherInput, diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp 2018-11-05 12:21:01.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -765,6 +765,119 @@ Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); } ////////////////////////////////////////////////////////////////////////// + /// @brief Float / Fixed-point conversions + ////////////////////////////////////////////////////////////////////////// + Value* Builder::VCVT_F32_FIXED_SI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + Value* fixed = nullptr; + { + // Do round to nearest int on fractional bits first + // Not entirely perfect for negative numbers, but close enough + vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)); + vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits))); + + // TODO: Handle INF, NAN, overflow / underflow, etc. + + Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f)); + Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty); + Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1)); + vFixed = OR(vFixed, VIMMED1(1 << 23)); + vFixed = SELECT(vSgn, NEG(vFixed), vFixed); + + Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24)); + vExp = SUB(vExp, VIMMED1(127)); + + Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp); + + fixed = ASHR(vFixed, vExtraBits, name); + } + + return fixed; + } + + Value* Builder::VCVT_FIXED_SI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + uint32_t extraBits = 32 - numIntBits - numFracBits; + if (numIntBits && extraBits) + { + // Sign extend + Value* shftAmt = VIMMED1(extraBits); + vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); + } + + Value* fVal = VIMMED1(0.0f); + Value* fFrac = VIMMED1(0.0f); + if (numIntBits) + { + fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); + } + + if (numFracBits) + { + fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); + fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); + } + + return FADD(fVal, fFrac, name); + } + + Value* Builder::VCVT_F32_FIXED_UI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + Value* fixed = nullptr; + // KNOB_SIM_FAST_MATH? Below works correctly from a precision + // standpoint... + { + fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)), + mSimdInt32Ty); + } + return fixed; + } + + Value* Builder::VCVT_FIXED_UI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + uint32_t extraBits = 32 - numIntBits - numFracBits; + if (numIntBits && extraBits) + { + // Sign extend + Value* shftAmt = VIMMED1(extraBits); + vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); + } + + Value* fVal = VIMMED1(0.0f); + Value* fFrac = VIMMED1(0.0f); + if (numIntBits) + { + fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); + } + + if (numFracBits) + { + fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); + fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); + } + + return FADD(fVal, fFrac, name); + } + + ////////////////////////////////////////////////////////////////////////// /// @brief C functions called by LLVM IR ////////////////////////////////////////////////////////////////////////// diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h 2019-03-31 23:16:37.000000000 +0000 @@ -123,6 +123,28 @@ Value* VMOVMSK(Value* mask); ////////////////////////////////////////////////////////////////////////// +/// @brief Float / Fixed-point conversions +////////////////////////////////////////////////////////////////////////// +// Signed +Value* VCVT_F32_FIXED_SI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); +Value* VCVT_FIXED_SI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); +// Unsigned +Value* VCVT_F32_FIXED_UI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); +Value* VCVT_FIXED_UI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); + +////////////////////////////////////////////////////////////////////////// /// @brief functions that build IR to call x86 intrinsics directly, or /// emulate them with other instructions if not available on the host ////////////////////////////////////////////////////////////////////////// diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 2018-11-05 12:21:01.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -202,7 +202,7 @@ break; case R32_UINT: (fetchState.bDisableIndexOOBCheck) - ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH) + ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH) : vIndices = GetSimdValid32bitIndices(indices, pLastIndex); break; // incoming type is already 32bit int default: @@ -368,7 +368,7 @@ // gather SIMD full pixels per lane then shift/mask to move each component to their // own vector void FetchJit::CreateGatherOddFormats( - SWR_FORMAT format, Value* pMask, Value* pBase, Value* pOffsets, Value* pResult[4]) + SWR_FORMAT format, Value* pMask, Value* xpBase, Value* pOffsets, Value* pResult[4]) { const SWR_FORMAT_INFO& info = GetFormatInfo(format); @@ -378,7 +378,7 @@ Value* pGather; if (info.bpp == 32) { - pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask); + pGather = GATHERDD(VIMMED1(0), xpBase, pOffsets, pMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); } else { @@ -386,29 +386,40 @@ Value* pMem = ALLOCA(mSimdInt32Ty); STORE(VIMMED1(0u), pMem); - pBase = BITCAST(pBase, PointerType::get(mInt8Ty, 0)); - Value* pDstMem = BITCAST(pMem, mInt32PtrTy); + Value* pDstMem = POINTER_CAST(pMem, mInt32PtrTy); for (uint32_t lane = 0; lane < mVWidth; ++lane) { // Get index Value* index = VEXTRACT(pOffsets, C(lane)); Value* mask = VEXTRACT(pMask, C(lane)); + + // use branch around load based on mask + // Needed to avoid page-faults on unmasked lanes + BasicBlock* pCurrentBB = IRB()->GetInsertBlock(); + BasicBlock* pMaskedLoadBlock = + BasicBlock::Create(JM()->mContext, "MaskedLaneLoad", pCurrentBB->getParent()); + BasicBlock* pEndLoadBB = BasicBlock::Create(JM()->mContext, "AfterMaskedLoad", pCurrentBB->getParent()); + + COND_BR(mask, pMaskedLoadBlock, pEndLoadBB); + + JM()->mBuilder.SetInsertPoint(pMaskedLoadBlock); + switch (info.bpp) { case 8: { Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt8Ty, 0)); - Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt8Ty, 0)); - STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); + Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType())); + STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); break; } case 16: { Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0)); - Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt16Ty, 0)); - STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); + Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType())); + STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); break; } break; @@ -417,13 +428,13 @@ { // First 16-bits of data Value* pDst = BITCAST(GEP(pDstMem, C(lane)), PointerType::get(mInt16Ty, 0)); - Value* pSrc = BITCAST(GEP(pBase, index), PointerType::get(mInt16Ty, 0)); - STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); + Value* xpSrc = ADD(xpBase, Z_EXT(index, xpBase->getType())); + STORE(LOAD(xpSrc, "", mInt16PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); // Last 8-bits of data pDst = BITCAST(GEP(pDst, C(1)), PointerType::get(mInt8Ty, 0)); - pSrc = BITCAST(GEP(pSrc, C(1)), PointerType::get(mInt8Ty, 0)); - STORE(LOAD(SELECT(mask, pSrc, pDst)), pDst); + xpSrc = ADD(xpSrc, C(2)); + STORE(LOAD(xpSrc, "", mInt8PtrTy, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH), pDst); break; } @@ -431,6 +442,9 @@ SWR_INVALID("Shouldn't have BPP = %d now", info.bpp); break; } + + BR(pEndLoadBB); + JM()->mBuilder.SetInsertPoint(pEndLoadBB); } pGather = LOAD(pMem); @@ -550,9 +564,6 @@ Value* stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_xpData}); - // VGATHER* takes an *i8 src pointer - Value* pStreamBase = INT_TO_PTR(stream, PointerType::get(mInt8Ty, 0)); - Value* stride = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pitch}); Value* vStride = VBROADCAST(stride); @@ -619,8 +630,9 @@ // do 64bit address offset calculations. // calculate byte offset to the start of the VB - Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty)); - pStreamBase = GEP(pStreamBase, baseOffset); + Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty)); + + // VGATHER* takes an *i8 src pointer so that's what stream is Value* pStreamBaseGFX = ADD(stream, baseOffset); // if we have a start offset, subtract from max vertex. Used for OOB check @@ -698,7 +710,7 @@ { Value* pResults[4]; CreateGatherOddFormats( - (SWR_FORMAT)ied.Format, vGatherMask, pStreamBase, vOffsets, pResults); + (SWR_FORMAT)ied.Format, vGatherMask, pStreamBaseGFX, vOffsets, pResults); ConvertFormat((SWR_FORMAT)ied.Format, pResults); for (uint32_t c = 0; c < 4; c += 1) @@ -733,7 +745,7 @@ // if we have at least one component out of x or y to fetch if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)) { - vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask); + vGatherResult[0] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy @@ -744,9 +756,9 @@ if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)) { // offset base to the next components(zw) in the vertex to gather - pStreamBase = GEP(pStreamBase, C((char)4)); + pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4)); - vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask); + vGatherResult[1] = GATHERPS(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask); // e.g. result of second 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw @@ -783,18 +795,18 @@ { // Gather a SIMD of vertices // APIs allow a 4GB range for offsets - // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :( - // But, we know that elements must be aligned for FETCH. :) - // Right shift the offset by a bit and then scale by 2 to remove the - // sign extension. - Value* vShiftedOffsets = LSHR(vOffsets, 1); + // However, GATHERPS uses signed 32-bit offsets, so +/- 2GB range :( + // Add 2GB to the base pointer and 2GB to the offsets. This makes + // "negative" (large) offsets into positive offsets and small offsets + // into negative offsets. + Value* vNewOffsets = ADD(vOffsets, VIMMED1(0x80000000)); vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, - pStreamBaseGFX, - vShiftedOffsets, + ADD(pStreamBaseGFX, C((uintptr_t)0x80000000U)), + vNewOffsets, vGatherMask, - 2, - GFX_MEM_CLIENT_FETCH); + 1, + JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); } else { @@ -811,7 +823,6 @@ } // offset base to the next component in the vertex to gather - pStreamBase = GEP(pStreamBase, C((char)4)); pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4)); } } @@ -854,9 +865,9 @@ mVWidth / 2, ConstantFP::get(IRB()->getDoubleTy(), 0.0f)); Value* pGatherLo = - GATHERPD(vZeroDouble, pStreamBase, vOffsetsLo, vMaskLo); + GATHERPD(vZeroDouble, pStreamBaseGFX, vOffsetsLo, vMaskLo); Value* pGatherHi = - GATHERPD(vZeroDouble, pStreamBase, vOffsetsHi, vMaskHi); + GATHERPD(vZeroDouble, pStreamBaseGFX, vOffsetsHi, vMaskHi); pGatherLo = VCVTPD2PS(pGatherLo); pGatherHi = VCVTPD2PS(pGatherHi); @@ -880,7 +891,7 @@ } // offset base to the next component in the vertex to gather - pStreamBase = GEP(pStreamBase, C((char)8)); + pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)8)); } } break; @@ -936,7 +947,8 @@ // if we have at least one component to fetch if (compMask) { - Value* vGatherResult = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); + Value* vGatherResult = GATHERDD( + gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // e.g. result of an 8x32bit integer gather for 8bit components // 256i - 0 1 2 3 4 5 6 7 // xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw @@ -965,7 +977,7 @@ // if we have at least one component out of x or y to fetch if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)) { - vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); + vGatherResult[0] = GATHERDD(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // e.g. result of first 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy @@ -976,9 +988,9 @@ if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)) { // offset base to the next components(zw) in the vertex to gather - pStreamBase = GEP(pStreamBase, C((char)4)); + pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4)); - vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); + vGatherResult[1] = GATHERDD(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // e.g. result of second 8x32bit integer gather for 16bit components // 256i - 0 1 2 3 4 5 6 7 // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw @@ -1015,7 +1027,7 @@ if (compCtrl[i] == StoreSrc) { Value* pGather = - GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask); + GATHERDD(gatherSrc, pStreamBaseGFX, vOffsets, vGatherMask, 1, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); if (conversionType == CONVERT_USCALED) { @@ -1053,7 +1065,7 @@ } // offset base to the next component in the vertex to gather - pStreamBase = GEP(pStreamBase, C((char)4)); + pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4)); } } break; @@ -1112,7 +1124,7 @@ // if valid, load the index. if not, load 0 from the stack Value* pValid = SELECT(mask, pIndex, pZeroIndex); - Value* index = LOAD(pValid, "valid index", Ty, GFX_MEM_CLIENT_FETCH); + Value* index = LOAD(pValid, "valid index", Ty, JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); // zero extended index to 32 bits and insert into the correct simd lane index = Z_EXT(index, mInt32Ty); @@ -1187,7 +1199,7 @@ VIMMED1(0), "vIndices", PointerType::get(mSimdInt32Ty, 0), - GFX_MEM_CLIENT_FETCH); + JIT_MEM_CLIENT::GFX_MEM_CLIENT_FETCH); } ////////////////////////////////////////////////////////////////////////// diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -63,39 +63,29 @@ mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth), mArch(arch) { + mpCurrentModule = nullptr; + mpExec = nullptr; + InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); InitializeNativeTargetDisassembler(); - TargetOptions tOpts; - tOpts.AllowFPOpFusion = FPOpFusion::Fast; - tOpts.NoInfsFPMath = false; - tOpts.NoNaNsFPMath = false; - tOpts.UnsafeFPMath = false; - - // tOpts.PrintMachineCode = true; - - std::unique_ptr newModule(new Module("", mContext)); - mpCurrentModule = newModule.get(); - - StringRef hostCPUName; - // force JIT to use the same CPU arch as the rest of swr if (mArch.AVX512F()) { #if USE_SIMD16_SHADERS if (mArch.AVX512ER()) { - hostCPUName = StringRef("knl"); + mHostCpuName = StringRef("knl"); } else { - hostCPUName = StringRef("skylake-avx512"); + mHostCpuName = StringRef("skylake-avx512"); } mUsingAVX512 = true; #else - hostCPUName = StringRef("core-avx2"); + mHostCpuName = StringRef("core-avx2"); #endif if (mVWidth == 0) { @@ -104,7 +94,7 @@ } else if (mArch.AVX2()) { - hostCPUName = StringRef("core-avx2"); + mHostCpuName = StringRef("core-avx2"); if (mVWidth == 0) { mVWidth = 8; @@ -114,11 +104,11 @@ { if (mArch.F16C()) { - hostCPUName = StringRef("core-avx-i"); + mHostCpuName = StringRef("core-avx-i"); } else { - hostCPUName = StringRef("corei7-avx"); + mHostCpuName = StringRef("corei7-avx"); } if (mVWidth == 0) { @@ -131,31 +121,21 @@ } - auto optLevel = CodeGenOpt::Aggressive; + mOptLevel = CodeGenOpt::Aggressive; if (KNOB_JIT_OPTIMIZATION_LEVEL >= CodeGenOpt::None && KNOB_JIT_OPTIMIZATION_LEVEL <= CodeGenOpt::Aggressive) { - optLevel = CodeGenOpt::Level(KNOB_JIT_OPTIMIZATION_LEVEL); + mOptLevel = CodeGenOpt::Level(KNOB_JIT_OPTIMIZATION_LEVEL); } - mpCurrentModule->setTargetTriple(sys::getProcessTriple()); - mpExec = EngineBuilder(std::move(newModule)) - .setTargetOptions(tOpts) - .setOptLevel(optLevel) - .setMCPU(hostCPUName) - .create(); - if (KNOB_JIT_ENABLE_CACHE) { - mCache.Init(this, hostCPUName, optLevel); - mpExec->setObjectCache(&mCache); + mCache.Init(this, mHostCpuName, mOptLevel); } -#if LLVM_USE_INTEL_JITEVENTS - JITEventListener* vTune = JITEventListener::createIntelJITEventListener(); - mpExec->RegisterJITEventListener(vTune); -#endif + SetupNewModule(); + mIsModuleFinalized = true; // fetch function signature #if USE_SIMD16_SHADERS @@ -198,6 +178,35 @@ #endif } +void JitManager::CreateExecEngine(std::unique_ptr pModule) +{ + TargetOptions tOpts; + tOpts.AllowFPOpFusion = FPOpFusion::Fast; + tOpts.NoInfsFPMath = false; + tOpts.NoNaNsFPMath = false; + tOpts.UnsafeFPMath = false; + + // tOpts.PrintMachineCode = true; + + mpExec = EngineBuilder(std::move(pModule)) + .setTargetOptions(tOpts) + .setOptLevel(mOptLevel) + .setMCPU(mHostCpuName) + .create(); + + if (KNOB_JIT_ENABLE_CACHE) + { + mpExec->setObjectCache(&mCache); + } + +#if LLVM_USE_INTEL_JITEVENTS + JITEventListener* vTune = JITEventListener::createIntelJITEventListener(); + mpExec->RegisterJITEventListener(vTune); +#endif + + mvExecEngines.push_back(mpExec); +} + ////////////////////////////////////////////////////////////////////////// /// @brief Create new LLVM module. void JitManager::SetupNewModule() @@ -207,7 +216,7 @@ std::unique_ptr newModule(new Module("", mContext)); mpCurrentModule = newModule.get(); mpCurrentModule->setTargetTriple(sys::getProcessTriple()); - mpExec->addModule(std::move(newModule)); + CreateExecEngine(std::move(newModule)); mIsModuleFinalized = false; } @@ -443,7 +452,7 @@ ////////////////////////////////////////////////////////////////////////// /// @brief Dump function to file. -void JitManager::DumpToFile(Module* M, const char* fileName) +void JitManager::DumpToFile(Module* M, const char* fileName, llvm::AssemblyAnnotationWriter* annotater) { if (KNOB_DUMP_SHADER_IR) { @@ -458,7 +467,7 @@ sprintf(fName, "%s.%s.ll", funcName, fileName); #endif raw_fd_ostream fd(fName, EC, llvm::sys::fs::F_None); - M->print(fd, nullptr); + M->print(fd, annotater); fd.flush(); } } @@ -573,7 +582,7 @@ uint64_t GetObjectCRC() const { return m_objCRC; } private: - static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543211ULL + 4; + static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543210ULL + 6; static const size_t JC_STR_MAX_LEN = 32; static const uint32_t JC_PLATFORM_KEY = (LLVM_VERSION_MAJOR << 24) | (LLVM_VERSION_MINOR << 16) | (LLVM_VERSION_PATCH << 8) | @@ -625,6 +634,15 @@ { mCacheDir = KNOB_JIT_CACHE_DIR; } + + // Create cache dir at startup to allow jitter to write debug.ll files + // to that directory. + if (!llvm::sys::fs::exists(mCacheDir.str()) && + llvm::sys::fs::create_directories(mCacheDir.str())) + { + SWR_INVALID("Unable to create directory: %s", mCacheDir.c_str()); + } + } int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr) @@ -632,6 +650,26 @@ return ExecCmd(CmdLine, "", pStdOut, pStdErr); } +/// Calculate actual directory where module will be cached. +/// This is always a subdirectory of mCacheDir. Full absolute +/// path name will be stored in mCurrentModuleCacheDir +void JitCache::CalcModuleCacheDir() +{ + mModuleCacheDir.clear(); + + llvm::SmallString moduleDir = mCacheDir; + + // Create 4 levels of directory hierarchy based on CRC, 256 entries each + uint8_t* pCRC = (uint8_t*)&mCurrentModuleCRC; + for (uint32_t i = 0; i < 4; ++i) + { + llvm::sys::path::append(moduleDir, std::to_string((int)pCRC[i])); + } + + mModuleCacheDir = moduleDir; +} + + /// notifyObjectCompiled - Provides a pointer to compiled code for Module M. void JitCache::notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj) { @@ -641,16 +679,22 @@ return; } - if (!llvm::sys::fs::exists(mCacheDir.str()) && - llvm::sys::fs::create_directories(mCacheDir.str())) + if (!mModuleCacheDir.size()) { - SWR_INVALID("Unable to create directory: %s", mCacheDir.c_str()); + SWR_INVALID("Unset module cache directory"); + return; + } + + if (!llvm::sys::fs::exists(mModuleCacheDir.str()) && + llvm::sys::fs::create_directories(mModuleCacheDir.str())) + { + SWR_INVALID("Unable to create directory: %s", mModuleCacheDir.c_str()); return; } JitCacheFileHeader header; - llvm::SmallString filePath = mCacheDir; + llvm::SmallString filePath = mModuleCacheDir; llvm::sys::path::append(filePath, moduleID); llvm::SmallString objPath = filePath; @@ -690,12 +734,14 @@ return nullptr; } - if (!llvm::sys::fs::exists(mCacheDir)) + CalcModuleCacheDir(); + + if (!llvm::sys::fs::exists(mModuleCacheDir)) { return nullptr; } - llvm::SmallString filePath = mCacheDir; + llvm::SmallString filePath = mModuleCacheDir; llvm::sys::path::append(filePath, moduleID); llvm::SmallString objFilePath = filePath; @@ -758,3 +804,26 @@ return pBuf; } + +void InterleaveAssemblyAnnotater::emitInstructionAnnot(const llvm::Instruction *pInst, llvm::formatted_raw_ostream &OS) +{ + auto dbgLoc = pInst->getDebugLoc(); + if(dbgLoc) + { + unsigned int line = dbgLoc.getLine(); + if(line != mCurrentLineNo) + { + if(line > 0 && line <= mAssembly.size()) + { + // HACK: here we assume that OS is a formatted_raw_ostream(ods()) + // and modify the color accordingly. We can't do the color + // modification on OS because formatted_raw_ostream strips + // the color information. The only way to fix this behavior + // is to patch LLVM. + OS << "\n; " << line << ": " << mAssembly[line-1] << "\n"; + } + mCurrentLineNo = line; + } + } +} + diff -Nru mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h --- mesa-18.3.3/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,6 +31,7 @@ #include "jit_pch.hpp" #include "common/isa.hpp" +#include ////////////////////////////////////////////////////////////////////////// @@ -112,9 +113,15 @@ private: std::string mCpu; llvm::SmallString mCacheDir; + llvm::SmallString mModuleCacheDir; uint32_t mCurrentModuleCRC = 0; JitManager* mpJitMgr = nullptr; llvm::CodeGenOpt::Level mOptLevel = llvm::CodeGenOpt::None; + + /// Calculate actual directory where module will be cached. + /// This is always a subdirectory of mCacheDir. Full absolute + /// path name will be stored in mCurrentModuleCacheDir + void CalcModuleCacheDir(); }; ////////////////////////////////////////////////////////////////////////// @@ -123,12 +130,21 @@ struct JitManager { JitManager(uint32_t w, const char* arch, const char* core); - ~JitManager(){}; + ~JitManager() + { + for (auto* pExec : mvExecEngines) + { + delete pExec; + } + } - JitLLVMContext mContext; ///< LLVM compiler - llvm::IRBuilder<> mBuilder; ///< LLVM IR Builder - llvm::ExecutionEngine* mpExec; - JitCache mCache; + JitLLVMContext mContext; ///< LLVM compiler + llvm::IRBuilder<> mBuilder; ///< LLVM IR Builder + llvm::ExecutionEngine* mpExec; + std::vector mvExecEngines; + JitCache mCache; + llvm::StringRef mHostCpuName; + llvm::CodeGenOpt::Level mOptLevel; // Need to be rebuilt after a JIT and before building new IR llvm::Module* mpCurrentModule; @@ -147,11 +163,14 @@ // Debugging support std::unordered_map mDebugStructMap; + void CreateExecEngine(std::unique_ptr M); void SetupNewModule(); void DumpAsm(llvm::Function* pFunction, const char* fileName); static void DumpToFile(llvm::Function* f, const char* fileName); - static void DumpToFile(llvm::Module* M, const char* fileName); + static void DumpToFile(llvm::Module* M, + const char* fileName, + llvm::AssemblyAnnotationWriter* annotater = nullptr); static std::string GetOutputDir(); // Debugging support methods @@ -178,3 +197,14 @@ uint32_t lineNum, const std::vector>& members); }; + +class InterleaveAssemblyAnnotater : public llvm::AssemblyAnnotationWriter +{ +public: + void emitInstructionAnnot(const llvm::Instruction* pInst, + llvm::formatted_raw_ostream& OS) override; + std::vector mAssembly; + +private: + uint32_t mCurrentLineNo = 0; +}; diff -Nru mesa-18.3.3/src/gallium/drivers/swr/swr_screen.cpp mesa-19.0.1/src/gallium/drivers/swr/swr_screen.cpp --- mesa-18.3.3/src/gallium/drivers/swr/swr_screen.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/swr/swr_screen.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -369,6 +369,8 @@ return 32; case PIPE_CAP_MAX_SHADER_BUFFER_SIZE: return 1 << 27; + case PIPE_CAP_MAX_VARYINGS: + return 32; case PIPE_CAP_VENDOR_ID: return 0xFFFFFFFF; @@ -844,7 +846,9 @@ size_t total_size = (uint64_t)res->swr.depth * res->swr.qpitch * res->swr.pitch * res->swr.numSamples; - if (total_size > SWR_MAX_TEXTURE_SIZE) + + // Let non-sampled textures (e.g. buffer objects) bypass the size limit + if (swr_resource_is_texture(&res->base) && total_size > SWR_MAX_TEXTURE_SIZE) return false; if (allocate) { diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_blit.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_blit.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_blit.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_blit.c 2019-03-31 23:16:37.000000000 +0000 @@ -25,6 +25,7 @@ #include "util/u_surface.h" #include "util/u_blitter.h" #include "v3d_context.h" +#include "v3d_tiling.h" #if 0 static struct pipe_surface * @@ -183,10 +184,11 @@ util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask); util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer); util_blitter_save_fragment_sampler_states(v3d->blitter, - v3d->fragtex.num_samplers, - (void **)v3d->fragtex.samplers); + v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers, + (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers); util_blitter_save_fragment_sampler_views(v3d->blitter, - v3d->fragtex.num_textures, v3d->fragtex.textures); + v3d->tex[PIPE_SHADER_FRAGMENT].num_textures, + v3d->tex[PIPE_SHADER_FRAGMENT].textures); util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets, v3d->streamout.targets); } @@ -316,12 +318,206 @@ pipe_sampler_view_reference(&src_view, NULL); } +/* Disable level 0 write, just write following mipmaps */ +#define V3D_TFU_IOA_DIMTW (1 << 0) +#define V3D_TFU_IOA_FORMAT_SHIFT 3 +#define V3D_TFU_IOA_FORMAT_LINEARTILE 3 +#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4 +#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5 +#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6 +#define V3D_TFU_IOA_FORMAT_UIF_XOR 7 + +#define V3D_TFU_ICFG_NUMMM_SHIFT 5 +#define V3D_TFU_ICFG_TTYPE_SHIFT 9 + +#define V3D_TFU_ICFG_OPAD_SHIFT 22 + +#define V3D_TFU_ICFG_FORMAT_SHIFT 18 +#define V3D_TFU_ICFG_FORMAT_RASTER 0 +#define V3D_TFU_ICFG_FORMAT_SAND_128 1 +#define V3D_TFU_ICFG_FORMAT_SAND_256 2 +#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11 +#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12 +#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13 +#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14 +#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15 + +static bool +v3d_tfu(struct pipe_context *pctx, + struct pipe_resource *pdst, + struct pipe_resource *psrc, + unsigned int src_level, + unsigned int base_level, + unsigned int last_level, + unsigned int src_layer, + unsigned int dst_layer) +{ + struct v3d_context *v3d = v3d_context(pctx); + struct v3d_screen *screen = v3d->screen; + struct v3d_resource *src = v3d_resource(psrc); + struct v3d_resource *dst = v3d_resource(pdst); + struct v3d_resource_slice *src_base_slice = &src->slices[src_level]; + struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level]; + int msaa_scale = pdst->nr_samples > 1 ? 2 : 1; + int width = u_minify(pdst->width0, base_level) * msaa_scale; + int height = u_minify(pdst->height0, base_level) * msaa_scale; + + if (psrc->format != pdst->format) + return false; + if (psrc->nr_samples != pdst->nr_samples) + return false; + + uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, + pdst->format); + + if (!v3d_tfu_supports_tex_format(&screen->devinfo, tex_format)) + return false; + + if (pdst->target != PIPE_TEXTURE_2D || psrc->target != PIPE_TEXTURE_2D) + return false; + + /* Can't write to raster. */ + if (dst_base_slice->tiling == VC5_TILING_RASTER) + return false; + + v3d_flush_jobs_writing_resource(v3d, psrc); + v3d_flush_jobs_reading_resource(v3d, pdst); + + struct drm_v3d_submit_tfu tfu = { + .ios = (height << 16) | width, + .bo_handles = { + dst->bo->handle, + src != dst ? src->bo->handle : 0 + }, + .in_sync = v3d->out_sync, + .out_sync = v3d->out_sync, + }; + uint32_t src_offset = (src->bo->offset + + v3d_layer_offset(psrc, src_level, src_layer)); + tfu.iia |= src_offset; + if (src_base_slice->tiling == VC5_TILING_RASTER) { + tfu.icfg |= (V3D_TFU_ICFG_FORMAT_RASTER << + V3D_TFU_ICFG_FORMAT_SHIFT); + } else { + tfu.icfg |= ((V3D_TFU_ICFG_FORMAT_LINEARTILE + + (src_base_slice->tiling - VC5_TILING_LINEARTILE)) << + V3D_TFU_ICFG_FORMAT_SHIFT); + } + + uint32_t dst_offset = (dst->bo->offset + + v3d_layer_offset(pdst, src_level, dst_layer)); + tfu.ioa |= dst_offset; + if (last_level != base_level) + tfu.ioa |= V3D_TFU_IOA_DIMTW; + tfu.ioa |= ((V3D_TFU_IOA_FORMAT_LINEARTILE + + (dst_base_slice->tiling - VC5_TILING_LINEARTILE)) << + V3D_TFU_IOA_FORMAT_SHIFT); + + tfu.icfg |= tex_format << V3D_TFU_ICFG_TTYPE_SHIFT; + tfu.icfg |= (last_level - base_level) << V3D_TFU_ICFG_NUMMM_SHIFT; + + switch (src_base_slice->tiling) { + case VC5_TILING_UIF_NO_XOR: + case VC5_TILING_UIF_XOR: + tfu.iis |= (src_base_slice->padded_height / + (2 * v3d_utile_height(src->cpp))); + break; + case VC5_TILING_RASTER: + tfu.iis |= src_base_slice->stride / src->cpp; + break; + case VC5_TILING_LINEARTILE: + case VC5_TILING_UBLINEAR_1_COLUMN: + case VC5_TILING_UBLINEAR_2_COLUMN: + break; + } + + /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the + * OPAD field for the destination (how many extra UIF blocks beyond + * those necessary to cover the height). When filling mipmaps, the + * miplevel 1+ tiling state is inferred. + */ + if (dst_base_slice->tiling == VC5_TILING_UIF_NO_XOR || + dst_base_slice->tiling == VC5_TILING_UIF_XOR) { + int uif_block_h = 2 * v3d_utile_height(dst->cpp); + int implicit_padded_height = align(height, uif_block_h); + + tfu.icfg |= (((dst_base_slice->padded_height - + implicit_padded_height) / uif_block_h) << + V3D_TFU_ICFG_OPAD_SHIFT); + } + + int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu); + if (ret != 0) { + fprintf(stderr, "Failed to submit TFU job: %d\n", ret); + return false; + } + + dst->writes++; + + return true; +} + +boolean +v3d_generate_mipmap(struct pipe_context *pctx, + struct pipe_resource *prsc, + enum pipe_format format, + unsigned int base_level, + unsigned int last_level, + unsigned int first_layer, + unsigned int last_layer) +{ + if (format != prsc->format) + return false; + + /* We could maybe support looping over layers for array textures, but + * we definitely don't support 3D. + */ + if (first_layer != last_layer) + return false; + + return v3d_tfu(pctx, + prsc, prsc, + base_level, + base_level, last_level, + first_layer, first_layer); +} + +static bool +v3d_tfu_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) +{ + int dst_width = u_minify(info->dst.resource->width0, info->dst.level); + int dst_height = u_minify(info->dst.resource->height0, info->dst.level); + + if ((info->mask & PIPE_MASK_RGBA) == 0) + return false; + + if (info->dst.box.x != 0 || + info->dst.box.y != 0 || + info->dst.box.width != dst_width || + info->dst.box.height != dst_height || + info->src.box.x != 0 || + info->src.box.y != 0 || + info->src.box.width != info->dst.box.width || + info->src.box.height != info->dst.box.height) { + return false; + } + + if (info->dst.format != info->src.format) + return false; + + return v3d_tfu(pctx, info->dst.resource, info->src.resource, + info->src.level, + info->dst.level, info->dst.level, + info->src.box.z, info->dst.box.z); +} + /* Optimal hardware path for blitting pixels. * Scaling, format conversion, up- and downsampling (resolve) are allowed. */ void v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) { + struct v3d_context *v3d = v3d_context(pctx); struct pipe_blit_info info = *blit_info; if (info.mask & PIPE_MASK_S) { @@ -329,10 +525,16 @@ info.mask &= ~PIPE_MASK_S; } -#if 0 - if (v3d_tile_blit(pctx, blit_info)) - return; -#endif + if (v3d_tfu_blit(pctx, blit_info)) + info.mask &= ~PIPE_MASK_RGBA; + + if (info.mask) + v3d_render_blit(pctx, &info); - v3d_render_blit(pctx, &info); + /* Flush our blit jobs immediately. They're unlikely to get reused by + * normal drawing or other blits, and without flushing we can easily + * run into unexpected OOMs when blits are used for a large series of + * texture uploads before using the textures. + */ + v3d_flush_jobs_writing_resource(v3d, info.dst.resource); } diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_bufmgr.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_bufmgr.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_bufmgr.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_bufmgr.c 2019-03-31 23:16:37.000000000 +0000 @@ -331,7 +331,6 @@ static struct v3d_bo * v3d_bo_open_handle(struct v3d_screen *screen, - uint32_t winsys_stride, uint32_t handle, uint32_t size) { struct v3d_bo *bo; @@ -355,8 +354,7 @@ bo->private = false; #ifdef USE_V3D_SIMULATOR - v3d_simulator_open_from_handle(screen->fd, winsys_stride, - bo->handle, bo->size); + v3d_simulator_open_from_handle(screen->fd, bo->handle, bo->size); bo->map = malloc(bo->size); #endif @@ -376,14 +374,16 @@ util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo); + screen->bo_count++; + screen->bo_size += bo->size; + done: mtx_unlock(&screen->bo_handles_mutex); return bo; } struct v3d_bo * -v3d_bo_open_name(struct v3d_screen *screen, uint32_t name, - uint32_t winsys_stride) +v3d_bo_open_name(struct v3d_screen *screen, uint32_t name) { struct drm_gem_open o = { .name = name @@ -395,11 +395,11 @@ return NULL; } - return v3d_bo_open_handle(screen, winsys_stride, o.handle, o.size); + return v3d_bo_open_handle(screen, o.handle, o.size); } struct v3d_bo * -v3d_bo_open_dmabuf(struct v3d_screen *screen, int fd, uint32_t winsys_stride) +v3d_bo_open_dmabuf(struct v3d_screen *screen, int fd) { uint32_t handle; int ret = drmPrimeFDToHandle(screen->fd, fd, &handle); @@ -416,7 +416,7 @@ return NULL; } - return v3d_bo_open_handle(screen, winsys_stride, handle, size); + return v3d_bo_open_handle(screen, handle, size); } int diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_bufmgr.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_bufmgr.h --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_bufmgr.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_bufmgr.h 2019-03-31 23:16:37.000000000 +0000 @@ -60,10 +60,8 @@ const char *name); void v3d_bo_last_unreference(struct v3d_bo *bo); void v3d_bo_last_unreference_locked_timed(struct v3d_bo *bo, time_t time); -struct v3d_bo *v3d_bo_open_name(struct v3d_screen *screen, uint32_t name, - uint32_t winsys_stride); -struct v3d_bo *v3d_bo_open_dmabuf(struct v3d_screen *screen, int fd, - uint32_t winsys_stride); +struct v3d_bo *v3d_bo_open_name(struct v3d_screen *screen, uint32_t name); +struct v3d_bo *v3d_bo_open_dmabuf(struct v3d_screen *screen, int fd); bool v3d_bo_flink(struct v3d_bo *bo, uint32_t *name); int v3d_bo_get_dmabuf(struct v3d_bo *bo); diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_context.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_context.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -37,6 +37,7 @@ #include "v3d_screen.h" #include "v3d_context.h" #include "v3d_resource.h" +#include "broadcom/compiler/v3d_compiler.h" void v3d_flush(struct pipe_context *pctx) @@ -66,6 +67,28 @@ } static void +v3d_memory_barrier(struct pipe_context *pctx, unsigned int flags) +{ + struct v3d_context *v3d = v3d_context(pctx); + + /* We only need to flush jobs writing to SSBOs/images. */ + perf_debug("Flushing all jobs for glMemoryBarrier(), could do better"); + v3d_flush(pctx); +} + +static void +v3d_set_debug_callback(struct pipe_context *pctx, + const struct pipe_debug_callback *cb) +{ + struct v3d_context *v3d = v3d_context(pctx); + + if (cb) + v3d->debug = *cb; + else + memset(&v3d->debug, 0, sizeof(v3d->debug)); +} + +static void v3d_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) { struct v3d_context *v3d = v3d_context(pctx); @@ -98,6 +121,8 @@ if (v3d->uploader) u_upload_destroy(v3d->uploader); + if (v3d->state_uploader) + u_upload_destroy(v3d->state_uploader); slab_destroy_child(&v3d->transfer_pool); @@ -109,6 +134,27 @@ ralloc_free(v3d); } +static void +v3d_get_sample_position(struct pipe_context *pctx, + unsigned sample_count, unsigned sample_index, + float *xy) +{ + struct v3d_context *v3d = v3d_context(pctx); + + if (sample_count <= 1) { + xy[0] = 0.5; + xy[1] = 0.5; + } else { + static const int xoffsets_v33[] = { 1, -3, 3, -1 }; + static const int xoffsets_v42[] = { -1, 3, -3, 1 }; + const int *xoffsets = (v3d->screen->devinfo.ver >= 42 ? + xoffsets_v42 : xoffsets_v33); + + xy[0] = 0.5 + xoffsets[sample_index] * .125; + xy[1] = .125 + sample_index * .25; + } +} + struct pipe_context * v3d_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) { @@ -137,7 +183,10 @@ pctx->priv = priv; pctx->destroy = v3d_context_destroy; pctx->flush = v3d_pipe_flush; + pctx->memory_barrier = v3d_memory_barrier; + pctx->set_debug_callback = v3d_set_debug_callback; pctx->invalidate_resource = v3d_invalidate_resource; + pctx->get_sample_position = v3d_get_sample_position; if (screen->devinfo.ver >= 41) { v3d41_draw_init(pctx); @@ -159,6 +208,10 @@ v3d->uploader = u_upload_create_default(&v3d->base); v3d->base.stream_uploader = v3d->uploader; v3d->base.const_uploader = v3d->uploader; + v3d->state_uploader = u_upload_create(&v3d->base, + 4096, + PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STREAM, 0); v3d->blitter = util_blitter_create(pctx); if (!v3d->blitter) @@ -172,7 +225,7 @@ V3D_DEBUG |= saved_shaderdb_flag; - v3d->sample_mask = (1 << VC5_MAX_SAMPLES) - 1; + v3d->sample_mask = (1 << V3D_MAX_SAMPLES) - 1; v3d->active_queries = true; return &v3d->base; diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_context.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_context.h --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -38,6 +38,7 @@ #include "xf86drm.h" #include "v3d_drm.h" #include "v3d_screen.h" +#include "broadcom/common/v3d_limits.h" struct v3d_job; struct v3d_bo; @@ -58,6 +59,7 @@ #define VC5_DIRTY_ZSA (1 << 2) #define VC5_DIRTY_FRAGTEX (1 << 3) #define VC5_DIRTY_VERTTEX (1 << 4) +#define VC5_DIRTY_SHADER_IMAGE (1 << 5) #define VC5_DIRTY_BLEND_COLOR (1 << 7) #define VC5_DIRTY_STENCIL_REF (1 << 8) @@ -82,9 +84,39 @@ #define VC5_DIRTY_OQ (1 << 28) #define VC5_DIRTY_CENTROID_FLAGS (1 << 29) #define VC5_DIRTY_NOPERSPECTIVE_FLAGS (1 << 30) +#define VC5_DIRTY_SSBO (1 << 31) #define VC5_MAX_FS_INPUTS 64 +enum v3d_sampler_state_variant { + V3D_SAMPLER_STATE_BORDER_0, + V3D_SAMPLER_STATE_F16, + V3D_SAMPLER_STATE_F16_UNORM, + V3D_SAMPLER_STATE_F16_SNORM, + V3D_SAMPLER_STATE_F16_BGRA, + V3D_SAMPLER_STATE_F16_BGRA_UNORM, + V3D_SAMPLER_STATE_F16_BGRA_SNORM, + V3D_SAMPLER_STATE_F16_A, + V3D_SAMPLER_STATE_F16_A_SNORM, + V3D_SAMPLER_STATE_F16_A_UNORM, + V3D_SAMPLER_STATE_F16_LA, + V3D_SAMPLER_STATE_F16_LA_UNORM, + V3D_SAMPLER_STATE_F16_LA_SNORM, + V3D_SAMPLER_STATE_32, + V3D_SAMPLER_STATE_32_UNORM, + V3D_SAMPLER_STATE_32_SNORM, + V3D_SAMPLER_STATE_32_A, + V3D_SAMPLER_STATE_32_A_UNORM, + V3D_SAMPLER_STATE_32_A_SNORM, + V3D_SAMPLER_STATE_1010102U, + V3D_SAMPLER_STATE_16U, + V3D_SAMPLER_STATE_16I, + V3D_SAMPLER_STATE_8I, + V3D_SAMPLER_STATE_8U, + + V3D_SAMPLER_STATE_VARIANT_COUNT, +}; + struct v3d_sampler_view { struct pipe_sampler_view base; uint32_t p0; @@ -95,6 +127,14 @@ uint8_t texture_shader_state[32]; /* V3D 4.x: Texture state struct. */ struct v3d_bo *bo; + + enum v3d_sampler_state_variant sampler_variant; + + /* Actual texture to be read by this sampler view. May be different + * from base.texture in the case of having a shadow tiled copy of a + * raster texture. + */ + struct pipe_resource *texture; }; struct v3d_sampler_state { @@ -105,15 +145,18 @@ /* V3D 3.x: Packed texture state. */ uint8_t texture_shader_state[32]; /* V3D 4.x: Sampler state struct. */ - struct v3d_bo *bo; + struct pipe_resource *sampler_state; + uint32_t sampler_state_offset[V3D_SAMPLER_STATE_VARIANT_COUNT]; + + bool border_color_variants; }; struct v3d_texture_stateobj { - struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view *textures[V3D_MAX_TEXTURE_SAMPLERS]; unsigned num_textures; - struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *samplers[V3D_MAX_TEXTURE_SAMPLERS]; unsigned num_samplers; - struct v3d_cl_reloc texture_state[PIPE_MAX_SAMPLERS]; + struct v3d_cl_reloc texture_state[V3D_MAX_TEXTURE_SAMPLERS]; }; struct v3d_shader_uniform_info { @@ -143,7 +186,8 @@ }; struct v3d_compiled_shader { - struct v3d_bo *bo; + struct pipe_resource *resource; + uint32_t offset; union { struct v3d_prog_data *base; @@ -181,11 +225,12 @@ }; struct v3d_vertex_stateobj { - struct pipe_vertex_element pipe[VC5_MAX_ATTRIBUTES]; + struct pipe_vertex_element pipe[V3D_MAX_VS_INPUTS / 4]; unsigned num_elements; - uint8_t attrs[16 * VC5_MAX_ATTRIBUTES]; - struct v3d_bo *default_attribute_values; + uint8_t attrs[16 * (V3D_MAX_VS_INPUTS / 4)]; + struct pipe_resource *defaults; + uint32_t defaults_offset; }; struct v3d_streamout_stateobj { @@ -195,6 +240,11 @@ unsigned num_targets; }; +struct v3d_ssbo_stateobj { + struct pipe_shader_buffer sb[PIPE_MAX_SHADER_BUFFERS]; + uint32_t enabled_mask; +}; + /* Hash table key for v3d->jobs */ struct v3d_job_key { struct pipe_surface *cbufs[4]; @@ -208,6 +258,18 @@ VC5_EZ_DISABLED, }; +struct v3d_image_view { + struct pipe_image_view base; + /* V3D 4.x texture shader state struct */ + struct pipe_resource *tex_state; + uint32_t tex_state_offset; +}; + +struct v3d_shaderimg_stateobj { + struct v3d_image_view si[PIPE_MAX_SHADER_IMAGES]; + uint32_t enabled_mask; +}; + /** * A complete bin/render job. * @@ -300,6 +362,11 @@ */ bool needs_flush; + /* Set if any shader has dirtied cachelines in the TMU that need to be + * flushed before job end. + */ + bool tmu_dirty_rcl; + /** * Set if a packet enabling TF has been emitted in the job (V3D 4.x). */ @@ -365,10 +432,19 @@ /** Maximum index buffer valid for the current shader_rec. */ uint32_t max_index; - /** Sync object that our RCL will update as its out_sync. */ + /** Sync object that our RCL or TFU job will update as its out_sync. */ uint32_t out_sync; + /* Stream uploader used by gallium internals. This could also be used + * by driver internals, but we tend to use the v3d_cl.h interfaces + * instead. + */ struct u_upload_mgr *uploader; + /* State uploader used inside the driver. This is for packing bits of + * long-term state inside buffers, since the kernel interfaces + * allocate a page at a time. + */ + struct u_upload_mgr *state_uploader; /** @{ Current pipeline state objects */ struct pipe_scissor_state scissor; @@ -376,8 +452,6 @@ struct v3d_rasterizer_state *rasterizer; struct v3d_depth_stencil_alpha_state *zsa; - struct v3d_texture_stateobj verttex, fragtex; - struct v3d_program_stateobj prog; struct v3d_vertex_stateobj *vtx; @@ -413,10 +487,14 @@ struct pipe_poly_stipple stipple; struct pipe_clip_state clip; struct pipe_viewport_state viewport; + struct v3d_ssbo_stateobj ssbo[PIPE_SHADER_TYPES]; + struct v3d_shaderimg_stateobj shaderimg[PIPE_SHADER_TYPES]; struct v3d_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; + struct v3d_texture_stateobj tex[PIPE_SHADER_TYPES]; struct v3d_vertexbuf_stateobj vertexbuf; struct v3d_streamout_stateobj streamout; struct v3d_bo *current_oq; + struct pipe_debug_callback debug; /** @} */ }; @@ -448,8 +526,13 @@ #define perf_debug(...) do { \ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \ fprintf(stderr, __VA_ARGS__); \ + if (unlikely(v3d->debug.debug_message)) \ + pipe_debug_message(&v3d->debug, PERF_INFO, __VA_ARGS__); \ } while (0) +#define foreach_bit(b, mask) \ + for (uint32_t _m = (mask), b; _m && ({(b) = u_bit_scan(&_m); 1;});) + static inline struct v3d_context * v3d_context(struct pipe_context *pcontext) { @@ -476,12 +559,8 @@ void v3d_simulator_init(struct v3d_screen *screen); void v3d_simulator_destroy(struct v3d_screen *screen); -int v3d_simulator_flush(struct v3d_context *v3d, - struct drm_v3d_submit_cl *args, - struct v3d_job *job); int v3d_simulator_ioctl(int fd, unsigned long request, void *arg); -void v3d_simulator_open_from_handle(int fd, uint32_t winsys_stride, - int handle, uint32_t size); +void v3d_simulator_open_from_handle(int fd, int handle, uint32_t size); static inline int v3d_ioctl(int fd, unsigned long request, void *arg) @@ -495,8 +574,7 @@ void v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader); struct v3d_cl_reloc v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader, - struct v3d_constbuf_stateobj *cb, - struct v3d_texture_stateobj *texstate); + enum pipe_shader_type stage); void v3d_flush(struct pipe_context *pctx); void v3d_job_init(struct v3d_context *v3d); @@ -530,10 +608,19 @@ uint32_t format, uint32_t *type, uint32_t *bpp); +bool v3d_tfu_supports_tex_format(const struct v3d_device_info *devinfo, + uint32_t tex_format); void v3d_init_query_functions(struct v3d_context *v3d); void v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info); void v3d_blitter_save(struct v3d_context *v3d); +boolean v3d_generate_mipmap(struct pipe_context *pctx, + struct pipe_resource *prsc, + enum pipe_format format, + unsigned int base_level, + unsigned int last_level, + unsigned int first_layer, + unsigned int last_layer); struct v3d_fence *v3d_fence_create(struct v3d_context *v3d); diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_formats.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_formats.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_formats.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_formats.c 2019-03-31 23:16:37.000000000 +0000 @@ -142,3 +142,14 @@ type, bpp); } } + +bool +v3d_tfu_supports_tex_format(const struct v3d_device_info *devinfo, + uint32_t tex_format) +{ + if (devinfo->ver >= 41) { + return v3d41_tfu_supports_tex_format(tex_format); + } else { + return v3d33_tfu_supports_tex_format(tex_format); + } +} diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_job.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_job.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_job.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_job.c 2019-03-31 23:16:37.000000000 +0000 @@ -62,7 +62,7 @@ } } - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { if (job->cbufs[i]) { remove_from_ht(v3d->write_jobs, job->cbufs[i]->texture); pipe_surface_reference(&job->cbufs[i], NULL); @@ -204,7 +204,7 @@ tile_size_index++; int max_bpp = RENDER_TARGET_MAXIMUM_32BPP; - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { if (job->cbufs[i]) { struct v3d_surface *surf = v3d_surface(job->cbufs[i]); max_bpp = MAX2(max_bpp, surf->internal_bpp); @@ -222,7 +222,7 @@ /** * Returns a v3d_job struture for tracking V3D rendering to a particular FBO. * - * If we've already started rendering to this FBO, then return old same job, + * If we've already started rendering to this FBO, then return the same job, * otherwise make a new one. If we're beginning rendering to an FBO, make * sure that any previous reads of the FBO (or writes to its color/Z surfaces) * have been flushed. @@ -251,7 +251,7 @@ */ struct v3d_job *job = v3d_job_create(v3d); - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { if (cbufs[i]) { v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture); pipe_surface_reference(&job->cbufs[i], cbufs[i]); @@ -267,9 +267,7 @@ job->msaa = true; } - v3d_job_set_tile_buffer_size(job); - - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { if (cbufs[i]) _mesa_hash_table_insert(v3d->write_jobs, cbufs[i]->texture, job); @@ -303,6 +301,11 @@ struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf; struct v3d_job *job = v3d_get_job(v3d, cbufs, zsbuf); + if (v3d->framebuffer.samples >= 1) + job->msaa = true; + + v3d_job_set_tile_buffer_size(job); + /* The dirty flags are tracking what's been updated while v3d->job has * been bound, so set them all to ~0 when switching between jobs. We * also need to reset all state at the start of rendering. @@ -385,7 +388,15 @@ v3d33_bcl_epilogue(v3d, job); } + /* While the RCL will implicitly depend on the last RCL to have + * finished, we also need to block on any previous TFU job we may have + * dispatched. + */ + job->submit.in_sync_rcl = v3d->out_sync; + + /* Update the sync object for the last rendering by our context. */ job->submit.out_sync = v3d->out_sync; + job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl); job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl); @@ -406,11 +417,7 @@ if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) { int ret; -#ifndef USE_V3D_SIMULATOR - ret = drmIoctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit); -#else - ret = v3d_simulator_flush(v3d, &job->submit, job); -#endif + ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit); static bool warned = false; if (ret && !warned) { fprintf(stderr, "Draw call returned %s. " diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_program.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_program.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -27,6 +27,7 @@ #include "util/u_memory.h" #include "util/ralloc.h" #include "util/hash_table.h" +#include "util/u_upload_mgr.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_parse.h" #include "compiler/nir/nir.h" @@ -37,6 +38,12 @@ #include "broadcom/cle/v3d_packet_v33_pack.h" #include "mesa/state_tracker/st_glsl_types.h" +static struct v3d_compiled_shader * +v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key); +static void +v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, + struct v3d_key *key); + static gl_varying_slot v3d_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location) { @@ -174,6 +181,70 @@ return st_glsl_storage_type_size(type, false); } +/** + * Precompiles a shader variant at shader state creation time if + * V3D_DEBUG=precompile is set. Used for shader-db + * (https://gitlab.freedesktop.org/mesa/shader-db) + */ +static void +v3d_shader_precompile(struct v3d_context *v3d, + struct v3d_uncompiled_shader *so) +{ + nir_shader *s = so->base.ir.nir; + + if (s->info.stage == MESA_SHADER_FRAGMENT) { + struct v3d_fs_key key = { + .base.shader_state = so, + }; + + nir_foreach_variable(var, &s->outputs) { + if (var->data.location == FRAG_RESULT_COLOR) { + key.nr_cbufs = 1; + } else if (var->data.location == FRAG_RESULT_DATA0) { + key.nr_cbufs = MAX2(key.nr_cbufs, + var->data.location - + FRAG_RESULT_DATA0 + 1); + } + } + + v3d_setup_shared_precompile_key(so, &key.base); + v3d_get_compiled_shader(v3d, &key.base); + } else { + struct v3d_vs_key key = { + .base.shader_state = so, + }; + + v3d_setup_shared_precompile_key(so, &key.base); + + /* Compile VS: All outputs */ + nir_foreach_variable(var, &s->outputs) { + unsigned array_len = MAX2(glsl_get_length(var->type), 1); + assert(array_len == 1); + (void)array_len; + + int slot = var->data.location; + for (int i = 0; i < glsl_get_components(var->type); i++) { + int swiz = var->data.location_frac + i; + key.fs_inputs[key.num_fs_inputs++] = + v3d_slot_from_slot_and_component(slot, + swiz); + } + } + + v3d_get_compiled_shader(v3d, &key.base); + + /* Compile VS bin shader: only position (XXX: include TF) */ + key.is_coord = true; + key.num_fs_inputs = 0; + for (int i = 0; i < 4; i++) { + key.fs_inputs[key.num_fs_inputs++] = + v3d_slot_from_slot_and_component(VARYING_SLOT_POS, + i); + } + v3d_get_compiled_shader(v3d, &key.base); + } +} + static void * v3d_shader_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) @@ -225,7 +296,7 @@ v3d_optimize_nir(s); - NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local); + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); /* Garbage collect dead instructions */ nir_sweep(s); @@ -244,9 +315,20 @@ fprintf(stderr, "\n"); } + if (V3D_DEBUG & V3D_DEBUG_PRECOMPILE) + v3d_shader_precompile(v3d, so); + return so; } +static void +v3d_shader_debug_output(const char *message, void *data) +{ + struct v3d_context *v3d = data; + + pipe_debug_message(&v3d->debug, SHADER_INFO, "%s", message); +} + static struct v3d_compiled_shader * v3d_get_compiled_shader(struct v3d_context *v3d, struct v3d_key *key) { @@ -276,34 +358,19 @@ uint64_t *qpu_insts; uint32_t shader_size; - switch (s->info.stage) { - case MESA_SHADER_VERTEX: - shader->prog_data.vs = rzalloc(shader, struct v3d_vs_prog_data); - - qpu_insts = v3d_compile_vs(v3d->screen->compiler, - (struct v3d_vs_key *)key, - shader->prog_data.vs, s, - program_id, variant_id, - &shader_size); - break; - case MESA_SHADER_FRAGMENT: - shader->prog_data.fs = rzalloc(shader, struct v3d_fs_prog_data); - - qpu_insts = v3d_compile_fs(v3d->screen->compiler, - (struct v3d_fs_key *)key, - shader->prog_data.fs, s, - program_id, variant_id, - &shader_size); - break; - default: - unreachable("bad stage"); - } + qpu_insts = v3d_compile(v3d->screen->compiler, key, + &shader->prog_data.base, s, + v3d_shader_debug_output, + v3d, + program_id, variant_id, &shader_size); + ralloc_steal(shader, shader->prog_data.base); v3d_set_shader_uniform_dirty_flags(shader); - shader->bo = v3d_bo_alloc(v3d->screen, shader_size, "shader"); - v3d_bo_map(shader->bo); - memcpy(shader->bo->map, qpu_insts, shader_size); + if (shader_size) { + u_upload_data(v3d->state_uploader, 0, shader_size, 8, + qpu_insts, &shader->offset, &shader->resource); + } free(qpu_insts); @@ -331,6 +398,13 @@ } static void +v3d_free_compiled_shader(struct v3d_compiled_shader *shader) +{ + pipe_resource_reference(&shader->resource, NULL); + ralloc_free(shader); +} + +static void v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key, struct v3d_texture_stateobj *texstate) { @@ -379,8 +453,6 @@ } if (sampler) { - key->tex[i].compare_mode = sampler_state->compare_mode; - key->tex[i].compare_func = sampler_state->compare_func; key->tex[i].clamp_s = sampler_state->wrap_s == PIPE_TEX_WRAP_CLAMP; key->tex[i].clamp_t = @@ -394,6 +466,23 @@ } static void +v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader *uncompiled, + struct v3d_key *key) +{ + nir_shader *s = uncompiled->base.ir.nir; + + for (int i = 0; i < s->info.num_textures; i++) { + key->tex[i].return_size = 16; + key->tex[i].return_channels = 2; + + key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; + key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; + key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; + key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; + } +} + +static void v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode) { struct v3d_job *job = v3d->job; @@ -412,7 +501,7 @@ } memset(key, 0, sizeof(*key)); - v3d_setup_shared_key(v3d, &key->base, &v3d->fragtex); + v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_FRAGMENT]); key->base.shader_state = v3d->prog.bind_fs; key->is_points = (prim_mode == PIPE_PRIM_POINTS); key->is_lines = (prim_mode >= PIPE_PRIM_LINES && @@ -426,7 +515,7 @@ if (job->msaa) { key->msaa = v3d->rasterizer->base.multisample; key->sample_coverage = (v3d->rasterizer->base.multisample && - v3d->sample_mask != (1 << VC5_MAX_SAMPLES) - 1); + v3d->sample_mask != (1 << V3D_MAX_SAMPLES) - 1); key->sample_alpha_to_coverage = v3d->blend->base.alpha_to_coverage; key->sample_alpha_to_one = v3d->blend->base.alpha_to_one; } @@ -523,7 +612,7 @@ } memset(key, 0, sizeof(*key)); - v3d_setup_shared_key(v3d, &key->base, &v3d->verttex); + v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]); key->base.shader_state = v3d->prog.bind_vs; key->num_fs_inputs = v3d->prog.fs->prog_data.fs->base.num_inputs; STATIC_ASSERT(sizeof(key->fs_inputs) == @@ -606,12 +695,11 @@ if (key->shader_state == so) { struct v3d_compiled_shader *shader = entry->data; _mesa_hash_table_remove(ht, entry); - v3d_bo_unreference(&shader->bo); if (shader == *last_compile) *last_compile = NULL; - ralloc_free(shader); + v3d_free_compiled_shader(shader); } } @@ -677,15 +765,13 @@ hash_table_foreach(v3d->fs_cache, entry) { struct v3d_compiled_shader *shader = entry->data; - v3d_bo_unreference(&shader->bo); - ralloc_free(shader); + v3d_free_compiled_shader(shader); _mesa_hash_table_remove(v3d->fs_cache, entry); } hash_table_foreach(v3d->vs_cache, entry) { struct v3d_compiled_shader *shader = entry->data; - v3d_bo_unreference(&shader->bo); - ralloc_free(shader); + v3d_free_compiled_shader(shader); _mesa_hash_table_remove(v3d->vs_cache, entry); } diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_resource.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_resource.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_resource.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_resource.c 2019-03-31 23:16:37.000000000 +0000 @@ -146,37 +146,13 @@ slab_free(&v3d->transfer_pool, ptrans); } -static void * -v3d_resource_transfer_map(struct pipe_context *pctx, - struct pipe_resource *prsc, - unsigned level, unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **pptrans) +static void +v3d_map_usage_prep(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned usage) { struct v3d_context *v3d = v3d_context(pctx); struct v3d_resource *rsc = v3d_resource(prsc); - struct v3d_transfer *trans; - struct pipe_transfer *ptrans; - enum pipe_format format = prsc->format; - char *buf; - - /* MSAA maps should have been handled by u_transfer_helper. */ - assert(prsc->nr_samples <= 1); - - /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is - * being mapped. - */ - if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && - !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && - !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) && - prsc->last_level == 0 && - prsc->width0 == box->width && - prsc->height0 == box->height && - prsc->depth0 == box->depth && - prsc->array_size == 1 && - rsc->bo->private) { - usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; - } if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { if (v3d_resource_bo_alloc(rsc)) { @@ -209,6 +185,41 @@ rsc->writes++; rsc->initialized_buffers = ~0; } +} + +static void * +v3d_resource_transfer_map(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned level, unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **pptrans) +{ + struct v3d_context *v3d = v3d_context(pctx); + struct v3d_resource *rsc = v3d_resource(prsc); + struct v3d_transfer *trans; + struct pipe_transfer *ptrans; + enum pipe_format format = prsc->format; + char *buf; + + /* MSAA maps should have been handled by u_transfer_helper. */ + assert(prsc->nr_samples <= 1); + + /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is + * being mapped. + */ + if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && + !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) && + !(prsc->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) && + prsc->last_level == 0 && + prsc->width0 == box->width && + prsc->height0 == box->height && + prsc->depth0 == box->depth && + prsc->array_size == 1 && + rsc->bo->private) { + usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + } + + v3d_map_usage_prep(pctx, prsc, usage); trans = slab_alloc(&v3d->transfer_pool); if (!trans) @@ -296,11 +307,60 @@ } static void +v3d_texture_subdata(struct pipe_context *pctx, + struct pipe_resource *prsc, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) +{ + struct v3d_resource *rsc = v3d_resource(prsc); + struct v3d_resource_slice *slice = &rsc->slices[level]; + + /* For a direct mapping, we can just take the u_transfer path. */ + if (!rsc->tiled) { + return u_default_texture_subdata(pctx, prsc, level, usage, box, + data, stride, layer_stride); + } + + /* Otherwise, map and store the texture data directly into the tiled + * texture. Note that gallium's texture_subdata may be called with + * obvious usage flags missing! + */ + v3d_map_usage_prep(pctx, prsc, usage | (PIPE_TRANSFER_WRITE | + PIPE_TRANSFER_DISCARD_RANGE)); + + void *buf; + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) + buf = v3d_bo_map_unsynchronized(rsc->bo); + else + buf = v3d_bo_map(rsc->bo); + + for (int i = 0; i < box->depth; i++) { + v3d_store_tiled_image(buf + + v3d_layer_offset(&rsc->base, + level, + box->z + i), + slice->stride, + (void *)data + layer_stride * i, + stride, + slice->tiling, rsc->cpp, slice->padded_height, + box); + } +} + +static void v3d_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc) { + struct v3d_screen *screen = v3d_screen(pscreen); struct v3d_resource *rsc = v3d_resource(prsc); + if (rsc->scanout) + renderonly_scanout_destroy(rsc->scanout, screen->ro); + v3d_bo_unreference(&rsc->bo); free(rsc); } @@ -312,6 +372,7 @@ struct winsys_handle *whandle, unsigned usage) { + struct v3d_screen *screen = v3d_screen(pscreen); struct v3d_resource *rsc = v3d_resource(prsc); struct v3d_bo *bo = rsc->bo; @@ -339,6 +400,10 @@ case WINSYS_HANDLE_TYPE_SHARED: return v3d_bo_flink(bo, &whandle->handle); case WINSYS_HANDLE_TYPE_KMS: + if (screen->ro) { + assert(rsc->scanout); + return renderonly_get_handle(rsc->scanout, whandle); + } whandle->handle = bo->handle; return TRUE; case WINSYS_HANDLE_TYPE_FD: @@ -396,7 +461,7 @@ } static void -v3d_setup_slices(struct v3d_resource *rsc) +v3d_setup_slices(struct v3d_resource *rsc, uint32_t winsys_stride) { struct pipe_resource *prsc = &rsc->base; uint32_t width = prsc->width0; @@ -423,6 +488,12 @@ */ bool uif_top = msaa; + /* Check some easy mistakes to make in a resource_create() call that + * will break our setup. + */ + assert(prsc->array_size != 0); + assert(prsc->depth0 != 0); + for (int i = prsc->last_level; i >= 0; i--) { struct v3d_resource_slice *slice = &rsc->slices[i]; @@ -498,7 +569,10 @@ } slice->offset = offset; - slice->stride = level_width * rsc->cpp; + if (winsys_stride) + slice->stride = winsys_stride; + else + slice->stride = level_width * rsc->cpp; slice->padded_height = level_height; slice->size = level_height * slice->stride; @@ -630,6 +704,43 @@ const uint64_t *modifiers, int count) { + struct v3d_screen *screen = v3d_screen(pscreen); + + /* If we're in a renderonly setup, use the other device to perform our + * (linear) allocation and just import it to v3d. The other device + * may be using CMA, and V3D can import from CMA but doesn't do CMA + * allocations on its own. + * + * We always allocate this way for SHARED, because get_handle will + * need a resource on the display fd. + */ + if (screen->ro && (tmpl->bind & (PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED))) { + struct winsys_handle handle; + struct pipe_resource scanout_tmpl = *tmpl; + struct renderonly_scanout *scanout = + renderonly_scanout_for_resource(&scanout_tmpl, + screen->ro, + &handle); + if (!scanout) { + fprintf(stderr, "Failed to create scanout resource\n"); + return NULL; + } + assert(handle.type == WINSYS_HANDLE_TYPE_FD); + /* The fd is all we need. Destroy the old scanout (and its + * GEM handle on kms_fd) before resource_from_handle()'s + * renderonly_create_gpu_import_for_resource() call which will + * also get a kms_fd GEM handle for the fd. + */ + renderonly_scanout_destroy(scanout, screen->ro); + struct pipe_resource *prsc = + pscreen->resource_from_handle(pscreen, tmpl, + &handle, + PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE); + close(handle.handle); + return prsc; + } + bool linear_ok = find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count); struct v3d_resource *rsc = v3d_resource_setup(pscreen, tmpl); struct pipe_resource *prsc = &rsc->base; @@ -669,14 +780,15 @@ rsc->tiled = false; } else { fprintf(stderr, "Unsupported modifier requested\n"); - return NULL; + goto fail; } rsc->internal_format = prsc->format; - v3d_setup_slices(rsc); + v3d_setup_slices(rsc, 0); + if (!v3d_resource_bo_alloc(rsc)) - goto fail; + goto fail; return prsc; fail: @@ -711,9 +823,11 @@ rsc->tiled = false; break; case DRM_FORMAT_MOD_BROADCOM_UIF: - case DRM_FORMAT_MOD_INVALID: rsc->tiled = true; break; + case DRM_FORMAT_MOD_INVALID: + rsc->tiled = screen->ro == NULL; + break; default: fprintf(stderr, "Attempt to import unsupported modifier 0x%llx\n", @@ -730,12 +844,10 @@ switch (whandle->type) { case WINSYS_HANDLE_TYPE_SHARED: - rsc->bo = v3d_bo_open_name(screen, - whandle->handle, whandle->stride); + rsc->bo = v3d_bo_open_name(screen, whandle->handle); break; case WINSYS_HANDLE_TYPE_FD: - rsc->bo = v3d_bo_open_dmabuf(screen, - whandle->handle, whandle->stride); + rsc->bo = v3d_bo_open_dmabuf(screen, whandle->handle); break; default: fprintf(stderr, @@ -749,9 +861,24 @@ rsc->internal_format = prsc->format; - v3d_setup_slices(rsc); + v3d_setup_slices(rsc, whandle->stride); v3d_debug_resource_layout(rsc, "import"); + if (screen->ro) { + /* Make sure that renderonly has a handle to our buffer in the + * display's fd, so that a later renderonly_get_handle() + * returns correct handles or GEM names. + */ + rsc->scanout = + renderonly_create_gpu_import_for_resource(prsc, + screen->ro, + NULL); + if (!rsc->scanout) { + fprintf(stderr, "Failed to create scanout resource.\n"); + goto fail; + } + } + if (whandle->stride != slice->stride) { static bool warned = false; if (!warned) { @@ -774,6 +901,62 @@ return NULL; } +void +v3d_update_shadow_texture(struct pipe_context *pctx, + struct pipe_sampler_view *pview) +{ + struct v3d_context *v3d = v3d_context(pctx); + struct v3d_sampler_view *view = v3d_sampler_view(pview); + struct v3d_resource *shadow = v3d_resource(view->texture); + struct v3d_resource *orig = v3d_resource(pview->texture); + + assert(view->texture != pview->texture); + + if (shadow->writes == orig->writes && orig->bo->private) + return; + + perf_debug("Updating %dx%d@%d shadow for linear texture\n", + orig->base.width0, orig->base.height0, + pview->u.tex.first_level); + + for (int i = 0; i <= shadow->base.last_level; i++) { + unsigned width = u_minify(shadow->base.width0, i); + unsigned height = u_minify(shadow->base.height0, i); + struct pipe_blit_info info = { + .dst = { + .resource = &shadow->base, + .level = i, + .box = { + .x = 0, + .y = 0, + .z = 0, + .width = width, + .height = height, + .depth = 1, + }, + .format = shadow->base.format, + }, + .src = { + .resource = &orig->base, + .level = pview->u.tex.first_level + i, + .box = { + .x = 0, + .y = 0, + .z = 0, + .width = width, + .height = height, + .depth = 1, + }, + .format = orig->base.format, + }, + .mask = util_format_get_mask(orig->base.format), + }; + pctx->blit(pctx, &info); + } + + shadow->writes = orig->writes; +} + static struct pipe_surface * v3d_create_surface(struct pipe_context *pctx, struct pipe_resource *ptex, @@ -810,6 +993,12 @@ surface->format = v3d_get_rt_format(&screen->devinfo, psurf->format); + const struct util_format_description *desc = + util_format_description(psurf->format); + + surface->swap_rb = (desc->swizzle[0] == PIPE_SWIZZLE_Z && + psurf->format != PIPE_FORMAT_B5G6R5_UNORM); + if (util_format_is_depth_or_stencil(psurf->format)) { switch (psurf->format) { case PIPE_FORMAT_Z16_UNORM: @@ -920,10 +1109,11 @@ pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region; pctx->transfer_unmap = u_transfer_helper_transfer_unmap; pctx->buffer_subdata = u_default_buffer_subdata; - pctx->texture_subdata = u_default_texture_subdata; + pctx->texture_subdata = v3d_texture_subdata; pctx->create_surface = v3d_create_surface; pctx->surface_destroy = v3d_surface_destroy; pctx->resource_copy_region = util_resource_copy_region; pctx->blit = v3d_blit; + pctx->generate_mipmap = v3d_generate_mipmap; pctx->flush_resource = v3d_flush_resource; } diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_resource.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_resource.h --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_resource.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_resource.h 2019-03-31 23:16:37.000000000 +0000 @@ -104,6 +104,13 @@ */ uint8_t internal_bpp; + /** + * If the R and B channels should be swapped. On V3D 3.x, we do it in + * the shader and the blend equation. On V3D 4.1+, we can use the new + * TLB load/store flags instead of recompiling. + */ + bool swap_rb; + uint32_t padded_height_of_output_image_in_uif_blocks; /* If the resource being referenced is separate stencil, then this is @@ -115,7 +122,8 @@ struct v3d_resource { struct pipe_resource base; struct v3d_bo *bo; - struct v3d_resource_slice slices[VC5_MAX_MIP_LEVELS]; + struct renderonly_scanout *scanout; + struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS]; uint32_t cube_map_stride; uint32_t size; int cpp; @@ -168,6 +176,8 @@ void v3d_resource_context_init(struct pipe_context *pctx); struct pipe_resource *v3d_resource_create(struct pipe_screen *pscreen, const struct pipe_resource *tmpl); +void v3d_update_shadow_texture(struct pipe_context *pctx, + struct pipe_sampler_view *view); uint32_t v3d_layer_offset(struct pipe_resource *prsc, uint32_t level, uint32_t layer); diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_screen.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_screen.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_screen.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -70,6 +70,7 @@ util_hash_table_destroy(screen->bo_handles); v3d_bufmgr_destroy(pscreen); slab_destroy_parent(&screen->transfer_pool); + free(screen->ro); if (using_v3d_simulator) v3d_simulator_destroy(screen); @@ -81,6 +82,20 @@ ralloc_free(pscreen); } +static bool +v3d_has_feature(struct v3d_screen *screen, enum drm_v3d_param feature) +{ + struct drm_v3d_get_param p = { + .param = feature, + }; + int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_GET_PARAM, &p); + + if (ret != 0) + return false; + + return p.value; +} + static int v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { @@ -108,23 +123,34 @@ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: case PIPE_CAP_COMPUTE: case PIPE_CAP_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT: case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 1; + case PIPE_CAP_GENERATE_MIPMAP: + return v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_TFU); + case PIPE_CAP_INDEP_BLEND_ENABLE: return screen->devinfo.ver >= 40; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + if (screen->devinfo.ver < 40) + return 0; + return 4; + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: return 4; case PIPE_CAP_GLSL_FEATURE_LEVEL: - return 400; + return 330; case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: return 140; @@ -152,11 +178,14 @@ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: return 4; + case PIPE_CAP_MAX_VARYINGS: + return V3D_MAX_FS_INPUTS / 4; + /* Texturing. */ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return VC5_MAX_MIP_LEVELS; + return V3D_MAX_MIP_LEVELS; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: return 2048; @@ -215,6 +244,8 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum pipe_shader_cap param) { + struct v3d_screen *screen = v3d_screen(pscreen); + if (shader != PIPE_SHADER_VERTEX && shader != PIPE_SHADER_FRAGMENT) { return 0; @@ -233,14 +264,14 @@ case PIPE_SHADER_CAP_MAX_INPUTS: if (shader == PIPE_SHADER_FRAGMENT) - return VC5_MAX_FS_INPUTS / 4; + return V3D_MAX_FS_INPUTS / 4; else - return VC5_MAX_ATTRIBUTES; + return V3D_MAX_VS_INPUTS / 4; case PIPE_SHADER_CAP_MAX_OUTPUTS: if (shader == PIPE_SHADER_FRAGMENT) return 4; else - return VC5_MAX_FS_INPUTS / 4; + return V3D_MAX_FS_INPUTS / 4; case PIPE_SHADER_CAP_MAX_TEMPS: return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: @@ -273,9 +304,17 @@ return 1; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: - case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + return V3D_MAX_TEXTURE_SAMPLERS; + case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: - return VC5_MAX_TEXTURE_SAMPLERS; + return PIPE_MAX_SHADER_BUFFERS; + + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + if (screen->devinfo.ver < 41) + return 0; + else + return PIPE_MAX_SHADER_IMAGES; + case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_NIR; case PIPE_SHADER_CAP_SUPPORTED_IRS: @@ -305,7 +344,7 @@ if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) return false; - if (sample_count > 1 && sample_count != VC5_MAX_SAMPLES) + if (sample_count > 1 && sample_count != V3D_MAX_SAMPLES) return FALSE; if (target >= PIPE_MAX_TEXTURE_TYPES) { @@ -372,7 +411,11 @@ } } + /* FORMAT_NONE gets allowed for ARB_framebuffer_no_attachments's probe + * of FRAMEBUFFER_MAX_SAMPLES + */ if ((usage & PIPE_BIND_RENDER_TARGET) && + format != PIPE_FORMAT_NONE && !v3d_rt_format_supported(&screen->devinfo, format)) { return FALSE; } @@ -467,7 +510,7 @@ } struct pipe_screen * -v3d_screen_create(int fd) +v3d_screen_create(int fd, struct renderonly *ro) { struct v3d_screen *screen = rzalloc(NULL, struct v3d_screen); struct pipe_screen *pscreen; @@ -482,6 +525,14 @@ pscreen->is_format_supported = v3d_screen_is_format_supported; screen->fd = fd; + if (ro) { + screen->ro = renderonly_dup(ro); + if (!screen->ro) { + fprintf(stderr, "Failed to dup renderonly object\n"); + ralloc_free(screen); + return NULL; + } + } list_inithead(&screen->bo_cache.time_list); (void)mtx_init(&screen->bo_handles_mutex, mtx_plain); screen->bo_handles = util_hash_table_create(handle_hash, handle_compare); diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_screen.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_screen.h --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_screen.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_screen.h 2019-03-31 23:16:37.000000000 +0000 @@ -25,6 +25,7 @@ #define VC5_SCREEN_H #include "pipe/p_screen.h" +#include "renderonly/renderonly.h" #include "os/os_thread.h" #include "state_tracker/drm_driver.h" #include "util/list.h" @@ -34,12 +35,6 @@ struct v3d_bo; -#define VC5_MAX_MIP_LEVELS 12 -#define VC5_MAX_TEXTURE_SAMPLERS 32 -#define VC5_MAX_SAMPLES 4 -#define VC5_MAX_DRAW_BUFFERS 4 -#define VC5_MAX_ATTRIBUTES 16 - /* These are tunable parameters in the HW design, but all the V3D * implementations agree. */ @@ -55,6 +50,7 @@ struct v3d_screen { struct pipe_screen base; + struct renderonly *ro; int fd; struct v3d_device_info devinfo; @@ -90,7 +86,7 @@ return (struct v3d_screen *)screen; } -struct pipe_screen *v3d_screen_create(int fd); +struct pipe_screen *v3d_screen_create(int fd, struct renderonly *ro); void v3d_fence_init(struct v3d_screen *screen); diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator.c 2019-03-31 23:16:37.000000000 +0000 @@ -54,6 +54,7 @@ #include "util/set.h" #include "util/u_memory.h" #include "util/u_mm.h" +#include "drm-uapi/i915_drm.h" #include "v3d_simulator_wrapper.h" #include "v3d_screen.h" @@ -76,7 +77,7 @@ struct mem_block *heap; struct mem_block *overflow; - /** Mapping from GEM handle to struct v3d_simulator_bo * */ + /** Mapping from GEM fd to struct v3d_simulator_file * */ struct hash_table *fd_map; int refcount; @@ -93,6 +94,9 @@ struct mem_block *gmp; void *gmp_vaddr; + + /** Actual GEM fd is i915, so we should use their create ioctl. */ + bool is_i915; }; /** Wrapper for drm_v3d_bo tracking the simulator-specific state. */ @@ -102,10 +106,9 @@ /** Area for this BO within sim_state->mem */ struct mem_block *block; uint32_t size; - void *vaddr; - - void *winsys_map; - uint32_t winsys_stride; + uint64_t mmap_offset; + void *sim_vaddr; + void *gem_vaddr; int handle; }; @@ -177,10 +180,50 @@ set_gmp_flags(file, sim_bo->block->ofs, size, 0x3); sim_bo->size = size; - sim_bo->vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base; - memset(sim_bo->vaddr, 0xd0, size); - *(uint32_t *)(sim_bo->vaddr + sim_bo->size) = BO_SENTINEL; + /* Allocate space for the buffer in simulator memory. */ + sim_bo->sim_vaddr = sim_state.mem + sim_bo->block->ofs - sim_state.mem_base; + memset(sim_bo->sim_vaddr, 0xd0, size); + + *(uint32_t *)(sim_bo->sim_vaddr + sim_bo->size) = BO_SENTINEL; + + /* Map the GEM buffer for copy in/out to the simulator. i915 blocks + * dumb mmap on render nodes, so use their ioctl directly if we're on + * one. + */ + int ret; + if (file->is_i915) { + struct drm_i915_gem_mmap_gtt map = { + .handle = handle, + }; + + /* We could potentially use non-gtt (cached) for LLC systems, + * but the copy-in/out won't be the limiting factor on + * simulation anyway. + */ + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &map); + sim_bo->mmap_offset = map.offset; + } else { + struct drm_mode_map_dumb map = { + .handle = handle, + }; + + ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map); + sim_bo->mmap_offset = map.offset; + } + if (ret) { + fprintf(stderr, "Failed to get MMAP offset: %d\n", ret); + abort(); + } + + sim_bo->gem_vaddr = mmap(NULL, sim_bo->size, + PROT_READ | PROT_WRITE, MAP_SHARED, + fd, sim_bo->mmap_offset); + if (sim_bo->gem_vaddr == MAP_FAILED) { + fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + handle, (long long)sim_bo->mmap_offset, sim_bo->size); + abort(); + } /* A handle of 0 is used for v3d_gem.c internal allocations that * don't need to go in the lookup table. @@ -200,18 +243,16 @@ { struct v3d_simulator_file *sim_file = sim_bo->file; - if (sim_bo->winsys_map) - munmap(sim_bo->winsys_map, sim_bo->size); - set_gmp_flags(sim_file, sim_bo->block->ofs, sim_bo->size, 0x0); + if (sim_bo->gem_vaddr) + munmap(sim_bo->gem_vaddr, sim_bo->size); + mtx_lock(&sim_state.mutex); u_mmFreeMem(sim_bo->block); if (sim_bo->handle) { - struct hash_entry *entry = - _mesa_hash_table_search(sim_file->bo_map, - int_to_key(sim_bo->handle)); - _mesa_hash_table_remove(sim_file->bo_map, entry); + _mesa_hash_table_remove_key(sim_file->bo_map, + int_to_key(sim_bo->handle)); } mtx_unlock(&sim_state.mutex); ralloc_free(sim_bo); @@ -228,237 +269,90 @@ return entry ? entry->data : NULL; } -static int -v3d_simulator_pin_bos(int fd, struct v3d_job *job) -{ - struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); - - set_foreach(job->bos, entry) { - struct v3d_bo *bo = (struct v3d_bo *)entry->key; - struct v3d_simulator_bo *sim_bo = - v3d_get_simulator_bo(file, bo->handle); - - v3d_bo_map(bo); - memcpy(sim_bo->vaddr, bo->map, bo->size); - } - - return 0; -} - -static int -v3d_simulator_unpin_bos(int fd, struct v3d_job *job) +static void +v3d_simulator_copy_in_handle(struct v3d_simulator_file *file, int handle) { - struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + struct v3d_simulator_bo *sim_bo = v3d_get_simulator_bo(file, handle); - set_foreach(job->bos, entry) { - struct v3d_bo *bo = (struct v3d_bo *)entry->key; - struct v3d_simulator_bo *sim_bo = - v3d_get_simulator_bo(file, bo->handle); - - if (*(uint32_t *)(sim_bo->vaddr + - sim_bo->size) != BO_SENTINEL) { - fprintf(stderr, "Buffer overflow in %s\n", bo->name); - } - - v3d_bo_map(bo); - memcpy(bo->map, sim_bo->vaddr, bo->size); - } + if (!sim_bo) + return; - return 0; + memcpy(sim_bo->sim_vaddr, sim_bo->gem_vaddr, sim_bo->size); } -#if 0 static void -v3d_dump_to_file(struct v3d_exec_info *exec) +v3d_simulator_copy_out_handle(struct v3d_simulator_file *file, int handle) { - static int dumpno = 0; - struct drm_v3d_get_hang_state *state; - struct drm_v3d_get_hang_state_bo *bo_state; - unsigned int dump_version = 0; + struct v3d_simulator_bo *sim_bo = v3d_get_simulator_bo(file, handle); - if (!(v3d_debug & VC5_DEBUG_DUMP)) + if (!sim_bo) return; - state = calloc(1, sizeof(*state)); + memcpy(sim_bo->gem_vaddr, sim_bo->sim_vaddr, sim_bo->size); - int unref_count = 0; - list_for_each_entry_safe(struct drm_v3d_bo, bo, &exec->unref_list, - unref_head) { - unref_count++; - } - - /* Add one more for the overflow area that isn't wrapped in a BO. */ - state->bo_count = exec->bo_count + unref_count + 1; - bo_state = calloc(state->bo_count, sizeof(*bo_state)); - - char *filename = NULL; - asprintf(&filename, "v3d-dri-%d.dump", dumpno++); - FILE *f = fopen(filename, "w+"); - if (!f) { - fprintf(stderr, "Couldn't open %s: %s", filename, - strerror(errno)); - return; - } - - fwrite(&dump_version, sizeof(dump_version), 1, f); - - state->ct0ca = exec->ct0ca; - state->ct0ea = exec->ct0ea; - state->ct1ca = exec->ct1ca; - state->ct1ea = exec->ct1ea; - state->start_bin = exec->ct0ca; - state->start_render = exec->ct1ca; - fwrite(state, sizeof(*state), 1, f); - - int i; - for (i = 0; i < exec->bo_count; i++) { - struct drm_gem_cma_object *cma_bo = exec->bo[i]; - bo_state[i].handle = i; /* Not used by the parser. */ - bo_state[i].paddr = cma_bo->paddr; - bo_state[i].size = cma_bo->base.size; - } - - list_for_each_entry_safe(struct drm_v3d_bo, bo, &exec->unref_list, - unref_head) { - struct drm_gem_cma_object *cma_bo = &bo->base; - bo_state[i].handle = 0; - bo_state[i].paddr = cma_bo->paddr; - bo_state[i].size = cma_bo->base.size; - i++; + if (*(uint32_t *)(sim_bo->sim_vaddr + + sim_bo->size) != BO_SENTINEL) { + fprintf(stderr, "Buffer overflow in handle %d\n", + handle); } +} - /* Add the static overflow memory area. */ - bo_state[i].handle = exec->bo_count; - bo_state[i].paddr = sim_state.overflow->ofs; - bo_state[i].size = sim_state.overflow->size; - i++; +static int +v3d_simulator_pin_bos(struct v3d_simulator_file *file, + struct drm_v3d_submit_cl *submit) +{ + uint32_t *bo_handles = (uint32_t *)(uintptr_t)submit->bo_handles; - fwrite(bo_state, sizeof(*bo_state), state->bo_count, f); + for (int i = 0; i < submit->bo_handle_count; i++) + v3d_simulator_copy_in_handle(file, bo_handles[i]); - for (int i = 0; i < exec->bo_count; i++) { - struct drm_gem_cma_object *cma_bo = exec->bo[i]; - fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); - } + return 0; +} - list_for_each_entry_safe(struct drm_v3d_bo, bo, &exec->unref_list, - unref_head) { - struct drm_gem_cma_object *cma_bo = &bo->base; - fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f); - } +static int +v3d_simulator_unpin_bos(struct v3d_simulator_file *file, + struct drm_v3d_submit_cl *submit) +{ + uint32_t *bo_handles = (uint32_t *)(uintptr_t)submit->bo_handles; - void *overflow = calloc(1, sim_state.overflow->size); - fwrite(overflow, 1, sim_state.overflow->size, f); - free(overflow); + for (int i = 0; i < submit->bo_handle_count; i++) + v3d_simulator_copy_out_handle(file, bo_handles[i]); - free(state); - free(bo_state); - fclose(f); + return 0; } -#endif -int -v3d_simulator_flush(struct v3d_context *v3d, - struct drm_v3d_submit_cl *submit, struct v3d_job *job) +static int +v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit) { - struct v3d_screen *screen = v3d->screen; - int fd = screen->fd; struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); - struct v3d_surface *csurf = v3d_surface(v3d->framebuffer.cbufs[0]); - struct v3d_resource *ctex = csurf ? v3d_resource(csurf->base.texture) : NULL; - struct v3d_simulator_bo *csim_bo = ctex ? v3d_get_simulator_bo(file, ctex->bo->handle) : NULL; - uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0; - uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0; - uint32_t row_len = MIN2(sim_stride, winsys_stride); int ret; - if (ctex && csim_bo->winsys_map) { -#if 0 - fprintf(stderr, "%dx%d %d %d %d\n", - ctex->base.b.width0, ctex->base.b.height0, - winsys_stride, - sim_stride, - ctex->bo->size); -#endif - - for (int y = 0; y < ctex->base.height0; y++) { - memcpy(ctex->bo->map + y * sim_stride, - csim_bo->winsys_map + y * winsys_stride, - row_len); - } - } - - ret = v3d_simulator_pin_bos(fd, job); + ret = v3d_simulator_pin_bos(file, submit); if (ret) return ret; - //v3d_dump_to_file(&exec); - if (sim_state.ver >= 41) - v3d41_simulator_flush(sim_state.v3d, submit, file->gmp->ofs); + v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); else - v3d33_simulator_flush(sim_state.v3d, submit, file->gmp->ofs); + v3d33_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); - ret = v3d_simulator_unpin_bos(fd, job); + ret = v3d_simulator_unpin_bos(file, submit); if (ret) return ret; - if (ctex && csim_bo->winsys_map) { - for (int y = 0; y < ctex->base.height0; y++) { - memcpy(csim_bo->winsys_map + y * winsys_stride, - ctex->bo->map + y * sim_stride, - row_len); - } - } - return 0; } /** - * Map the underlying GEM object from the real hardware GEM handle. - */ -static void * -v3d_simulator_map_winsys_bo(int fd, struct v3d_simulator_bo *sim_bo) -{ - int ret; - void *map; - - struct drm_mode_map_dumb map_dumb = { - .handle = sim_bo->handle, - }; - ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb); - if (ret != 0) { - fprintf(stderr, "map ioctl failure\n"); - abort(); - } - - map = mmap(NULL, sim_bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, - fd, map_dumb.offset); - if (map == MAP_FAILED) { - fprintf(stderr, - "mmap of bo %d (offset 0x%016llx, size %d) failed\n", - sim_bo->handle, (long long)map_dumb.offset, - (int)sim_bo->size); - abort(); - } - - return map; -} - -/** * Do fixups after a BO has been opened from a handle. * * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE * time, but we're still using drmPrimeFDToHandle() so we have this helper to * be called afterward instead. */ -void v3d_simulator_open_from_handle(int fd, uint32_t winsys_stride, - int handle, uint32_t size) +void v3d_simulator_open_from_handle(int fd, int handle, uint32_t size) { - struct v3d_simulator_bo *sim_bo = - v3d_create_simulator_bo(fd, handle, size); - - sim_bo->winsys_stride = winsys_stride; - sim_bo->winsys_map = v3d_simulator_map_winsys_bo(fd, sim_bo); + v3d_create_simulator_bo(fd, handle, size); } /** @@ -469,22 +363,38 @@ static int v3d_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args) { + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + + /* i915 bans dumb create on render nodes, so we have to use their + * native ioctl in case we're on a render node. + */ int ret; - struct drm_mode_create_dumb create = { - .width = 128, - .bpp = 8, - .height = (args->size + 127) / 128, - }; + if (file->is_i915) { + struct drm_i915_gem_create create = { + .size = args->size, + }; + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); + + args->handle = create.handle; + } else { + struct drm_mode_create_dumb create = { + .width = 128, + .bpp = 8, + .height = (args->size + 127) / 128, + }; - ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create); - assert(create.size >= args->size); + ret = drmIoctl(fd, DRM_IOCTL_MODE_CREATE_DUMB, &create); + assert(ret != 0 || create.size >= args->size); - args->handle = create.handle; + args->handle = create.handle; + } - struct v3d_simulator_bo *sim_bo = - v3d_create_simulator_bo(fd, create.handle, args->size); + if (ret == 0) { + struct v3d_simulator_bo *sim_bo = + v3d_create_simulator_bo(fd, args->handle, args->size); - args->offset = sim_bo->block->ofs; + args->offset = sim_bo->block->ofs; + } return ret; } @@ -492,20 +402,19 @@ /** * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation. * - * We just pass this straight through to dumb mmap. + * We've already grabbed the mmap offset when we created the sim bo, so just + * return it. */ static int v3d_simulator_mmap_bo_ioctl(int fd, struct drm_v3d_mmap_bo *args) { - int ret; - struct drm_mode_map_dumb map = { - .handle = args->handle, - }; + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + struct v3d_simulator_bo *sim_bo = v3d_get_simulator_bo(file, + args->handle); - ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map); - args->offset = map.offset; + args->offset = sim_bo->mmap_offset; - return ret; + return 0; } static int @@ -543,10 +452,33 @@ return v3d33_simulator_get_param_ioctl(sim_state.v3d, args); } +static int +v3d_simulator_submit_tfu_ioctl(int fd, struct drm_v3d_submit_tfu *args) +{ + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + int ret; + + v3d_simulator_copy_in_handle(file, args->bo_handles[0]); + v3d_simulator_copy_in_handle(file, args->bo_handles[1]); + v3d_simulator_copy_in_handle(file, args->bo_handles[2]); + v3d_simulator_copy_in_handle(file, args->bo_handles[3]); + + if (sim_state.ver >= 41) + ret = v3d41_simulator_submit_tfu_ioctl(sim_state.v3d, args); + else + ret = v3d33_simulator_submit_tfu_ioctl(sim_state.v3d, args); + + v3d_simulator_copy_out_handle(file, args->bo_handles[0]); + + return ret; +} + int v3d_simulator_ioctl(int fd, unsigned long request, void *args) { switch (request) { + case DRM_IOCTL_V3D_SUBMIT_CL: + return v3d_simulator_submit_cl_ioctl(fd, args); case DRM_IOCTL_V3D_CREATE_BO: return v3d_simulator_create_bo_ioctl(fd, args); case DRM_IOCTL_V3D_MMAP_BO: @@ -568,6 +500,9 @@ case DRM_IOCTL_GEM_CLOSE: return v3d_simulator_gem_close_ioctl(fd, args); + case DRM_IOCTL_V3D_SUBMIT_TFU: + return v3d_simulator_submit_tfu_ioctl(fd, args); + case DRM_IOCTL_GEM_OPEN: case DRM_IOCTL_GEM_FLINK: return drmIoctl(fd, request, args); @@ -627,6 +562,11 @@ screen->sim_file = rzalloc(screen, struct v3d_simulator_file); struct v3d_simulator_file *sim_file = screen->sim_file; + drmVersionPtr version = drmGetVersion(screen->fd); + if (version && strncmp(version->name, "i915", version->name_len) == 0) + sim_file->is_i915 = true; + drmFreeVersion(version); + screen->sim_file->bo_map = _mesa_hash_table_create(screen->sim_file, _mesa_hash_pointer, diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator_wrapper.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -83,6 +83,11 @@ return ident->tech_version * 10 + ident->revision; } +void +v3d_hw_set_isr(struct v3d_hw *hw, void (*isr)(uint32_t status)) +{ + hw->set_isr(isr); } +} #endif /* USE_V3D_SIMULATOR */ diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator_wrapper.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator_wrapper.h --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_simulator_wrapper.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_simulator_wrapper.h 2019-03-31 23:16:37.000000000 +0000 @@ -38,6 +38,7 @@ void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val); void v3d_hw_tick(struct v3d_hw *hw); int v3d_hw_get_version(struct v3d_hw *hw); +void v3d_hw_set_isr(struct v3d_hw *hw, void (*isr)(uint32_t status)); #ifdef __cplusplus } diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_tiling.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_tiling.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_tiling.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_tiling.c 2019-03-31 23:16:37.000000000 +0000 @@ -31,6 +31,7 @@ #include "v3d_screen.h" #include "v3d_context.h" #include "v3d_tiling.h" +#include "broadcom/common/v3d_cpu_tiling.h" /** Return the width in pixels of a 64-byte microtile. */ uint32_t @@ -78,9 +79,8 @@ v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) { uint32_t utile_w = v3d_utile_width(cpp); - uint32_t utile_h = v3d_utile_height(cpp); - assert(x < utile_w && y < utile_h); + assert(x < utile_w && y < v3d_utile_height(cpp)); return x * cpp + y * utile_w * cpp; } @@ -211,15 +211,19 @@ return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false); } +/* Loads/stores non-utile-aligned boxes by walking over the destination + * rectangle, computing the address on the GPU, and storing/loading a pixel at + * a time. + */ static inline void -v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, - void *cpu, uint32_t cpu_stride, - int cpp, uint32_t image_h, - const struct pipe_box *box, - uint32_t (*get_pixel_offset)(uint32_t cpp, - uint32_t image_h, - uint32_t x, uint32_t y), - bool is_load) +v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) { for (uint32_t y = 0; y < box->height; y++) { void *cpu_row = cpu + y * cpu_stride; @@ -248,6 +252,107 @@ } } +/* Breaks the image down into utiles and calls either the fast whole-utile + * load/store functions, or the unaligned fallback case. + */ +static inline void +v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + uint32_t utile_w = v3d_utile_width(cpp); + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t utile_gpu_stride = utile_w * cpp; + uint32_t x1 = box->x; + uint32_t y1 = box->y; + uint32_t x2 = box->x + box->width; + uint32_t y2 = box->y + box->height; + uint32_t align_x1 = align(x1, utile_w); + uint32_t align_y1 = align(y1, utile_h); + uint32_t align_x2 = x2 & ~(utile_w - 1); + uint32_t align_y2 = y2 & ~(utile_h - 1); + + /* Load/store all the whole utiles first. */ + for (uint32_t y = align_y1; y < align_y2; y += utile_h) { + void *cpu_row = cpu + (y - box->y) * cpu_stride; + + for (uint32_t x = align_x1; x < align_x2; x += utile_w) { + void *utile_gpu = (gpu + + get_pixel_offset(cpp, image_h, x, y)); + void *utile_cpu = cpu_row + (x - box->x) * cpp; + + if (is_load) { + v3d_load_utile(utile_cpu, cpu_stride, + utile_gpu, utile_gpu_stride); + } else { + v3d_store_utile(utile_gpu, utile_gpu_stride, + utile_cpu, cpu_stride); + } + } + } + + /* If there were no aligned utiles in the middle, load/store the whole + * thing unaligned. + */ + if (align_y2 <= align_y1 || + align_x2 <= align_x1) { + v3d_move_pixels_unaligned(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, + box, + get_pixel_offset, is_load); + return; + } + + /* Load/store the partial utiles. */ + struct pipe_box partial_boxes[4] = { + /* Top */ + { + .x = x1, + .width = x2 - x1, + .y = y1, + .height = align_y1 - y1, + }, + /* Bottom */ + { + .x = x1, + .width = x2 - x1, + .y = align_y2, + .height = y2 - align_y2, + }, + /* Left */ + { + .x = x1, + .width = align_x1 - x1, + .y = align_y1, + .height = align_y2 - align_y1, + }, + /* Right */ + { + .x = align_x2, + .width = x2 - align_x2, + .y = align_y1, + .height = align_y2 - align_y1, + }, + }; + for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) { + void *partial_cpu = (cpu + + (partial_boxes[i].y - y1) * cpu_stride + + (partial_boxes[i].x - x1) * cpp); + + v3d_move_pixels_unaligned(gpu, gpu_stride, + partial_cpu, cpu_stride, + cpp, image_h, + &partial_boxes[i], + get_pixel_offset, is_load); + } +} + static inline void v3d_move_pixels_general(void *gpu, uint32_t gpu_stride, void *cpu, uint32_t cpu_stride, diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_tiling.h mesa-19.0.1/src/gallium/drivers/v3d/v3d_tiling.h --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_tiling.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_tiling.h 2019-03-31 23:16:37.000000000 +0000 @@ -27,8 +27,6 @@ uint32_t v3d_utile_width(int cpp) ATTRIBUTE_CONST; uint32_t v3d_utile_height(int cpp) ATTRIBUTE_CONST; bool v3d_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST; -void v3d_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp); -void v3d_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp); void v3d_load_tiled_image(void *dst, uint32_t dst_stride, void *src, uint32_t src_stride, enum v3d_tiling_mode tiling_format, int cpp, diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3d_uniforms.c mesa-19.0.1/src/gallium/drivers/v3d/v3d_uniforms.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3d_uniforms.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3d_uniforms.c 2019-03-31 23:16:37.000000000 +0000 @@ -28,82 +28,6 @@ #include "compiler/v3d_compiler.h" #include "broadcom/cle/v3d_packet_v33_pack.h" -#if 0 - -#define SWIZ(x,y,z,w) { \ - PIPE_SWIZZLE_##x, \ - PIPE_SWIZZLE_##y, \ - PIPE_SWIZZLE_##z, \ - PIPE_SWIZZLE_##w \ -} - -static void -write_texture_border_color(struct v3d_job *job, - struct v3d_cl_out **uniforms, - struct v3d_texture_stateobj *texstate, - uint32_t unit) -{ - struct pipe_sampler_state *sampler = texstate->samplers[unit]; - struct pipe_sampler_view *texture = texstate->textures[unit]; - struct v3d_resource *rsc = v3d_resource(texture->texture); - union util_color uc; - - const struct util_format_description *tex_format_desc = - util_format_description(texture->format); - - float border_color[4]; - for (int i = 0; i < 4; i++) - border_color[i] = sampler->border_color.f[i]; - if (util_format_is_srgb(texture->format)) { - for (int i = 0; i < 3; i++) - border_color[i] = - util_format_linear_to_srgb_float(border_color[i]); - } - - /* Turn the border color into the layout of channels that it would - * have when stored as texture contents. - */ - float storage_color[4]; - util_format_unswizzle_4f(storage_color, - border_color, - tex_format_desc->swizzle); - - /* Now, pack so that when the v3d_format-sampled texture contents are - * replaced with our border color, the v3d_get_format_swizzle() - * swizzling will get the right channels. - */ - if (util_format_is_depth_or_stencil(texture->format)) { - uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, - sampler->border_color.f[0]) << 8; - } else { - switch (rsc->v3d_format) { - default: - case VC5_TEXTURE_TYPE_RGBA8888: - util_pack_color(storage_color, - PIPE_FORMAT_R8G8B8A8_UNORM, &uc); - break; - case VC5_TEXTURE_TYPE_RGBA4444: - util_pack_color(storage_color, - PIPE_FORMAT_A8B8G8R8_UNORM, &uc); - break; - case VC5_TEXTURE_TYPE_RGB565: - util_pack_color(storage_color, - PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - break; - case VC5_TEXTURE_TYPE_ALPHA: - uc.ui[0] = float_to_ubyte(storage_color[0]) << 24; - break; - case VC5_TEXTURE_TYPE_LUMALPHA: - uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) | - (float_to_ubyte(storage_color[0]) << 0)); - break; - } - } - - cl_aligned_u32(uniforms, uc.ui[0]); -} -#endif - static uint32_t get_texrect_scale(struct v3d_texture_stateobj *texstate, enum quniform_contents contents, @@ -147,6 +71,30 @@ } } +static uint32_t +get_image_size(struct v3d_shaderimg_stateobj *shaderimg, + enum quniform_contents contents, + uint32_t data) +{ + struct v3d_image_view *image = &shaderimg->si[data]; + + switch (contents) { + case QUNIFORM_IMAGE_WIDTH: + return u_minify(image->base.resource->width0, + image->base.u.tex.level); + case QUNIFORM_IMAGE_HEIGHT: + return u_minify(image->base.resource->height0, + image->base.u.tex.level); + case QUNIFORM_IMAGE_DEPTH: + return u_minify(image->base.resource->depth0, + image->base.u.tex.level); + case QUNIFORM_IMAGE_ARRAY_SIZE: + return image->base.resource->array_size; + default: + unreachable("Bad texture size field"); + } +} + static struct v3d_bo * v3d_upload_ubo(struct v3d_context *v3d, struct v3d_compiled_shader *shader, @@ -224,17 +172,34 @@ struct v3d_texture_stateobj *texstate, uint32_t data) { - /* Extract the texture unit from the top bits, and the compiler's + int unit = v3d_tmu_config_data_get_unit(data); + struct pipe_sampler_view *psview = texstate->textures[unit]; + struct v3d_sampler_view *sview = v3d_sampler_view(psview); + struct v3d_resource *rsc = v3d_resource(sview->texture); + + cl_aligned_reloc(&job->indirect, uniforms, sview->bo, + v3d_tmu_config_data_get_value(data)); + v3d_job_add_bo(job, rsc->bo); +} + +static void +write_image_tmu_p0(struct v3d_job *job, + struct v3d_cl_out **uniforms, + struct v3d_shaderimg_stateobj *img, + uint32_t data) +{ + /* Extract the image unit from the top bits, and the compiler's * packed p0 from the bottom. */ uint32_t unit = data >> 24; uint32_t p0 = data & 0x00ffffff; - struct pipe_sampler_view *psview = texstate->textures[unit]; - struct v3d_sampler_view *sview = v3d_sampler_view(psview); - struct v3d_resource *rsc = v3d_resource(psview->texture); + struct v3d_image_view *iview = &img->si[unit]; + struct v3d_resource *rsc = v3d_resource(iview->base.resource); - cl_aligned_reloc(&job->indirect, uniforms, sview->bo, p0); + cl_aligned_reloc(&job->indirect, uniforms, + v3d_resource(iview->tex_state)->bo, + iview->tex_state_offset | p0); v3d_job_add_bo(job, rsc->bo); } @@ -245,23 +210,28 @@ struct v3d_texture_stateobj *texstate, uint32_t data) { - /* Extract the texture unit from the top bits, and the compiler's - * packed p1 from the bottom. - */ - uint32_t unit = data >> 24; - uint32_t p0 = data & 0x00ffffff; - + uint32_t unit = v3d_tmu_config_data_get_unit(data); struct pipe_sampler_state *psampler = texstate->samplers[unit]; struct v3d_sampler_state *sampler = v3d_sampler_state(psampler); + struct pipe_sampler_view *psview = texstate->textures[unit]; + struct v3d_sampler_view *sview = v3d_sampler_view(psview); + int variant = 0; + + if (sampler->border_color_variants) + variant = sview->sampler_variant; - cl_aligned_reloc(&job->indirect, uniforms, sampler->bo, p0); + cl_aligned_reloc(&job->indirect, uniforms, + v3d_resource(sampler->sampler_state)->bo, + sampler->sampler_state_offset[variant] | + v3d_tmu_config_data_get_value(data)); } struct v3d_cl_reloc v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader, - struct v3d_constbuf_stateobj *cb, - struct v3d_texture_stateobj *texstate) + enum pipe_shader_type stage) { + struct v3d_constbuf_stateobj *cb = &v3d->constbuf[stage]; + struct v3d_texture_stateobj *texstate = &v3d->tex[stage]; struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms; struct v3d_job *job = v3d->job; const uint32_t *gallium_uniforms = cb->cb[0].user_buffer; @@ -279,14 +249,14 @@ cl_start(&job->indirect); for (int i = 0; i < uinfo->count; i++) { + uint32_t data = uinfo->data[i]; switch (uinfo->contents[i]) { case QUNIFORM_CONSTANT: - cl_aligned_u32(&uniforms, uinfo->data[i]); + cl_aligned_u32(&uniforms, data); break; case QUNIFORM_UNIFORM: - cl_aligned_u32(&uniforms, - gallium_uniforms[uinfo->data[i]]); + cl_aligned_u32(&uniforms, gallium_uniforms[data]); break; case QUNIFORM_VIEWPORT_X_SCALE: cl_aligned_f(&uniforms, v3d->viewport.scale[0] * 256.0f); @@ -304,37 +274,33 @@ case QUNIFORM_USER_CLIP_PLANE: cl_aligned_f(&uniforms, - v3d->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]); + v3d->clip.ucp[data / 4][data % 4]); break; case QUNIFORM_TMU_CONFIG_P0: - write_tmu_p0(job, &uniforms, texstate, - uinfo->data[i]); + write_tmu_p0(job, &uniforms, texstate, data); break; case QUNIFORM_TMU_CONFIG_P1: - write_tmu_p1(job, &uniforms, texstate, - uinfo->data[i]); + write_tmu_p1(job, &uniforms, texstate, data); break; - case QUNIFORM_TEXTURE_CONFIG_P1: - write_texture_p1(job, &uniforms, texstate, - uinfo->data[i]); + case QUNIFORM_IMAGE_TMU_CONFIG_P0: + write_image_tmu_p0(job, &uniforms, + &v3d->shaderimg[stage], data); break; -#if 0 - case QUNIFORM_TEXTURE_FIRST_LEVEL: - write_texture_first_level(job, &uniforms, texstate, - uinfo->data[i]); + case QUNIFORM_TEXTURE_CONFIG_P1: + write_texture_p1(job, &uniforms, texstate, + data); break; -#endif case QUNIFORM_TEXRECT_SCALE_X: case QUNIFORM_TEXRECT_SCALE_Y: cl_aligned_u32(&uniforms, get_texrect_scale(texstate, uinfo->contents[i], - uinfo->data[i])); + data)); break; case QUNIFORM_TEXTURE_WIDTH: @@ -345,7 +311,17 @@ cl_aligned_u32(&uniforms, get_texture_size(texstate, uinfo->contents[i], - uinfo->data[i])); + data)); + break; + + case QUNIFORM_IMAGE_WIDTH: + case QUNIFORM_IMAGE_HEIGHT: + case QUNIFORM_IMAGE_DEPTH: + case QUNIFORM_IMAGE_ARRAY_SIZE: + cl_aligned_u32(&uniforms, + get_image_size(&v3d->shaderimg[stage], + uinfo->contents[i], + data)); break; case QUNIFORM_ALPHA_REF: @@ -353,16 +329,12 @@ v3d->zsa->base.alpha.ref_value); break; - case QUNIFORM_SAMPLE_MASK: - cl_aligned_u32(&uniforms, v3d->sample_mask); - break; - case QUNIFORM_UBO_ADDR: - if (uinfo->data[i] == 0) { + if (data == 0) { cl_aligned_reloc(&job->indirect, &uniforms, ubo, 0); } else { - int ubo_index = uinfo->data[i]; + int ubo_index = data; struct v3d_resource *rsc = v3d_resource(cb->cb[ubo_index].buffer); @@ -372,13 +344,24 @@ } break; - case QUNIFORM_TEXTURE_FIRST_LEVEL: - cl_aligned_f(&uniforms, - texstate->textures[uinfo->data[i]]->u.tex.first_level); + case QUNIFORM_SSBO_OFFSET: { + struct pipe_shader_buffer *sb = + &v3d->ssbo[stage].sb[data]; + + cl_aligned_reloc(&job->indirect, &uniforms, + v3d_resource(sb->buffer)->bo, + sb->buffer_offset); + break; + } + + case QUNIFORM_GET_BUFFER_SIZE: + cl_aligned_u32(&uniforms, + v3d->ssbo[stage].sb[data].buffer_size); break; - case QUNIFORM_TEXTURE_BORDER_COLOR: - /* XXX */ + case QUNIFORM_TEXTURE_FIRST_LEVEL: + cl_aligned_f(&uniforms, + texstate->textures[data]->u.tex.first_level); break; case QUNIFORM_SPILL_OFFSET: @@ -397,15 +380,17 @@ write_texture_p0(job, &uniforms, texstate, uinfo->contents[i] - QUNIFORM_TEXTURE_CONFIG_P0_0, - uinfo->data[i]); + data); break; } #if 0 uint32_t written_val = *((uint32_t *)uniforms - 1); - fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n", + fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f) ", shader, i, __gen_address_offset(&uniform_stream) + i * 4, written_val, uif(written_val)); + vir_dump_uniform(uinfo->contents[i], data); + fprintf(stderr, "\n"); #endif } @@ -444,7 +429,6 @@ case QUNIFORM_TMU_CONFIG_P0: case QUNIFORM_TMU_CONFIG_P1: case QUNIFORM_TEXTURE_CONFIG_P1: - case QUNIFORM_TEXTURE_BORDER_COLOR: case QUNIFORM_TEXTURE_FIRST_LEVEL: case QUNIFORM_TEXRECT_SCALE_X: case QUNIFORM_TEXRECT_SCALE_Y: @@ -461,12 +445,21 @@ dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX; break; - case QUNIFORM_ALPHA_REF: - dirty |= VC5_DIRTY_ZSA; + case QUNIFORM_SSBO_OFFSET: + case QUNIFORM_GET_BUFFER_SIZE: + dirty |= VC5_DIRTY_SSBO; break; - case QUNIFORM_SAMPLE_MASK: - dirty |= VC5_DIRTY_SAMPLE_STATE; + case QUNIFORM_IMAGE_TMU_CONFIG_P0: + case QUNIFORM_IMAGE_WIDTH: + case QUNIFORM_IMAGE_HEIGHT: + case QUNIFORM_IMAGE_DEPTH: + case QUNIFORM_IMAGE_ARRAY_SIZE: + dirty |= VC5_DIRTY_SHADER_IMAGE; + break; + + case QUNIFORM_ALPHA_REF: + dirty |= VC5_DIRTY_ZSA; break; default: diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_context.h mesa-19.0.1/src/gallium/drivers/v3d/v3dx_context.h --- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_context.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -39,9 +39,13 @@ void v3dX(simulator_init_regs)(struct v3d_hw *v3d); int v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, struct drm_v3d_get_param *args); -void v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, - uint32_t gmp_ofs); +void v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, + struct drm_v3d_submit_cl *args, + uint32_t gmp_offset); +int v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d, + struct drm_v3d_submit_tfu *args); const struct v3d_format *v3dX(get_format_desc)(enum pipe_format f); void v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, uint32_t *type, uint32_t *bpp); +bool v3dX(tfu_supports_tex_format)(uint32_t tex_format); diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_draw.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_draw.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_draw.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -119,18 +119,42 @@ } static void -v3d_predraw_check_textures(struct pipe_context *pctx, - struct v3d_texture_stateobj *stage_tex) +v3d_predraw_check_stage_inputs(struct pipe_context *pctx, + enum pipe_shader_type s) { struct v3d_context *v3d = v3d_context(pctx); - for (int i = 0; i < stage_tex->num_textures; i++) { - struct pipe_sampler_view *view = stage_tex->textures[i]; - if (!view) + /* XXX perf: If we're reading from the output of TF in this job, we + * should instead be using the wait for transform feedback + * functionality. + */ + + /* Flush writes to textures we're sampling. */ + for (int i = 0; i < v3d->tex[s].num_textures; i++) { + struct pipe_sampler_view *pview = v3d->tex[s].textures[i]; + if (!pview) continue; + struct v3d_sampler_view *view = v3d_sampler_view(pview); + + if (view->texture != view->base.texture) + v3d_update_shadow_texture(pctx, &view->base); v3d_flush_jobs_writing_resource(v3d, view->texture); } + + /* Flush writes to UBOs. */ + foreach_bit(i, v3d->constbuf[s].enabled_mask) { + struct pipe_constant_buffer *cb = &v3d->constbuf[s].cb[i]; + if (cb->buffer) + v3d_flush_jobs_writing_resource(v3d, cb->buffer); + } + + /* Flush writes to our image views */ + foreach_bit(i, v3d->shaderimg[s].enabled_mask) { + struct v3d_image_view *view = &v3d->shaderimg[s].si[i]; + + v3d_flush_jobs_writing_resource(v3d, view->base.resource); + } } static void @@ -146,16 +170,13 @@ /* Upload the uniforms to the indirect CL first */ struct v3d_cl_reloc fs_uniforms = v3d_write_uniforms(v3d, v3d->prog.fs, - &v3d->constbuf[PIPE_SHADER_FRAGMENT], - &v3d->fragtex); + PIPE_SHADER_FRAGMENT); struct v3d_cl_reloc vs_uniforms = v3d_write_uniforms(v3d, v3d->prog.vs, - &v3d->constbuf[PIPE_SHADER_VERTEX], - &v3d->verttex); + PIPE_SHADER_VERTEX); struct v3d_cl_reloc cs_uniforms = v3d_write_uniforms(v3d, v3d->prog.cs, - &v3d->constbuf[PIPE_SHADER_VERTEX], - &v3d->verttex); + PIPE_SHADER_VERTEX); /* See GFXH-930 workaround below */ uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1); @@ -166,6 +187,10 @@ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), 32); + /* XXX perf: We should move most of the SHADER_STATE_RECORD setup to + * compile time, so that we mostly just have to OR the VS and FS + * records together at draw time. + */ cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) { shader.enable_clipping = true; /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */ @@ -178,8 +203,13 @@ * shader needs to write the Z value (even just discards). */ shader.fragment_shader_does_z_writes = - (v3d->prog.fs->prog_data.fs->writes_z || - v3d->prog.fs->prog_data.fs->discard); + v3d->prog.fs->prog_data.fs->writes_z; + /* Set if the EZ test must be disabled (due to shader side + * effects and the early_z flag not being present in the + * shader). + */ + shader.turn_off_early_z_test = + v3d->prog.fs->prog_data.fs->disable_ez; shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = v3d->prog.fs->prog_data.fs->uses_center_w; @@ -192,21 +222,27 @@ shader.fragment_shader_propagate_nans = true; shader.coordinate_shader_code_address = - cl_address(v3d->prog.cs->bo, 0); + cl_address(v3d_resource(v3d->prog.cs->resource)->bo, + v3d->prog.cs->offset); shader.vertex_shader_code_address = - cl_address(v3d->prog.vs->bo, 0); + cl_address(v3d_resource(v3d->prog.vs->resource)->bo, + v3d->prog.vs->offset); shader.fragment_shader_code_address = - cl_address(v3d->prog.fs->bo, 0); + cl_address(v3d_resource(v3d->prog.fs->resource)->bo, + v3d->prog.fs->offset); /* XXX: Use combined input/output size flag in the common * case. */ - shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true; + shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = + v3d->prog.cs->prog_data.vs->separate_segments; + shader.vertex_shader_has_separate_input_and_output_vpm_blocks = + v3d->prog.vs->prog_data.vs->separate_segments; + shader.coordinate_shader_input_vpm_segment_size = - MAX2(v3d->prog.cs->prog_data.vs->vpm_input_size, 1); + v3d->prog.cs->prog_data.vs->vpm_input_size; shader.vertex_shader_input_vpm_segment_size = - MAX2(v3d->prog.vs->prog_data.vs->vpm_input_size, 1); + v3d->prog.vs->prog_data.vs->vpm_input_size; shader.coordinate_shader_output_vpm_segment_size = v3d->prog.cs->prog_data.vs->vpm_output_size; @@ -259,7 +295,8 @@ v3d->prog.vs->prog_data.vs->uses_iid; shader.address_of_default_attribute_values = - cl_address(vtx->default_attribute_values, 0); + cl_address(v3d_resource(vtx->defaults)->bo, + vtx->defaults_offset); } for (int i = 0; i < vtx->num_elements; i++) { @@ -285,7 +322,7 @@ attr.maximum_index = 0xffffff; #endif } - STATIC_ASSERT(sizeof(vtx->attrs) >= VC5_MAX_ATTRIBUTES * size); + STATIC_ASSERT(sizeof(vtx->attrs) >= V3D_MAX_VS_INPUTS / 4 * size); } if (vtx->num_elements == 0) { @@ -431,8 +468,11 @@ /* Before setting up the draw, flush anything writing to the textures * that we read from. */ - v3d_predraw_check_textures(pctx, &v3d->verttex); - v3d_predraw_check_textures(pctx, &v3d->fragtex); + for (int s = 0; s < PIPE_SHADER_TYPES; s++) + v3d_predraw_check_stage_inputs(pctx, s); + + if (info->indirect) + v3d_flush_jobs_writing_resource(v3d, info->indirect->buffer); struct v3d_job *job = v3d_get_job_for_fbo(v3d); @@ -444,12 +484,29 @@ * on the last submitted render, rather than tracking the last * rendering to each texture's BO. */ - if (v3d->verttex.num_textures) { + if (v3d->tex[PIPE_SHADER_VERTEX].num_textures) { perf_debug("Blocking binner on last render " "due to vertex texturing.\n"); job->submit.in_sync_bcl = v3d->out_sync; } + /* Mark SSBOs as being written. We don't actually know which ones are + * read vs written, so just assume the worst + */ + for (int s = 0; s < PIPE_SHADER_TYPES; s++) { + foreach_bit(i, v3d->ssbo[s].enabled_mask) { + v3d_job_add_write_resource(job, + v3d->ssbo[s].sb[i].buffer); + job->tmu_dirty_rcl = true; + } + + foreach_bit(i, v3d->shaderimg[s].enabled_mask) { + v3d_job_add_write_resource(job, + v3d->shaderimg[s].si[i].base.resource); + job->tmu_dirty_rcl = true; + } + } + /* Get space to emit our draw call into the BCL, using a branch to * jump to a new BO if necessary. */ @@ -531,7 +588,23 @@ } #endif - if (info->instance_count > 1) { + if (info->indirect) { + cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) { + prim.index_type = ffs(info->index_size) - 1; +#if V3D_VERSION < 40 + prim.address_of_indices_list = + cl_address(rsc->bo, offset); +#endif /* V3D_VERSION < 40 */ + prim.mode = info->mode | prim_tf_enable; + prim.enable_primitive_restarts = info->primitive_restart; + + prim.number_of_draw_indirect_indexed_records = info->indirect->draw_count; + + prim.stride_in_multiples_of_4_bytes = info->indirect->stride >> 2; + prim.address = cl_address(v3d_resource(info->indirect->buffer)->bo, + info->indirect->offset); + } + } else if (info->instance_count > 1) { cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) { prim.index_type = ffs(info->index_size) - 1; #if V3D_VERSION >= 40 @@ -568,7 +641,16 @@ if (info->has_user_indices) pipe_resource_reference(&prsc, NULL); } else { - if (info->instance_count > 1) { + if (info->indirect) { + cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) { + prim.mode = info->mode | prim_tf_enable; + prim.number_of_draw_indirect_array_records = info->indirect->draw_count; + + prim.stride_in_multiples_of_4_bytes = info->indirect->stride >> 2; + prim.address = cl_address(v3d_resource(info->indirect->buffer)->bo, + info->indirect->offset); + } + } else if (info->instance_count > 1) { cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) { prim.mode = info->mode | prim_tf_enable; prim.index_of_first_vertex = info->start; @@ -623,7 +705,7 @@ rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; } - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { uint32_t bit = PIPE_CLEAR_COLOR0 << i; int blend_rt = v3d->blend->base.independent_blend_enable ? i : 0; @@ -703,7 +785,7 @@ buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; } - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { uint32_t bit = PIPE_CLEAR_COLOR0 << i; if (!(buffers & bit)) continue; diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_emit.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_emit.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_emit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_emit.c 2019-03-31 23:16:37.000000000 +0000 @@ -291,7 +291,7 @@ if (blend->independent_blend_enable) config.render_target_mask = 1 << rt; else - config.render_target_mask = (1 << VC5_MAX_DRAW_BUFFERS) - 1; + config.render_target_mask = (1 << V3D_MAX_DRAW_BUFFERS) - 1; #else assert(rt == 0); #endif @@ -588,7 +588,7 @@ #endif if (blend->base.independent_blend_enable) { - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) emit_rt_blend(v3d, job, &blend->base, i); } else { emit_rt_blend(v3d, job, &blend->base, 0); @@ -653,10 +653,10 @@ * the view, so we merge them together at draw time. */ if (v3d->dirty & VC5_DIRTY_FRAGTEX) - emit_textures(v3d, &v3d->fragtex); + emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]); if (v3d->dirty & VC5_DIRTY_VERTTEX) - emit_textures(v3d, &v3d->verttex); + emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]); #endif if (v3d->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) { diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_format_table.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_format_table.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_format_table.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_format_table.c 2019-03-31 23:16:37.000000000 +0000 @@ -65,6 +65,8 @@ FORMAT(B8G8R8X8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYX1, 16, 0), FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, 0), FORMAT(R8G8B8X8_UNORM, RGBA8, RGBA8, SWIZ_XYZ1, 16, 0), + FORMAT(R8G8B8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, 0), + FORMAT(R8G8B8X8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_XYZ1, 16, 0), FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, 0), FORMAT(R8G8B8X8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZ1, 16, 0), FORMAT(R10G10B10A2_UNORM, RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, 0), @@ -145,12 +147,13 @@ #if V3D_VERSION >= 40 FORMAT(S8_UINT_Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), FORMAT(X8Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1), - FORMAT(S8X24_UINT, S8, DEPTH_COMP32F, SWIZ_XXXX, 32, 1), + FORMAT(S8X24_UINT, S8, RGBA8UI, SWIZ_XXXX, 16, 1), FORMAT(Z32_FLOAT, D32F, DEPTH_COMP32F, SWIZ_XXXX, 32, 1), FORMAT(Z16_UNORM, D16, DEPTH_COMP16,SWIZ_XXXX, 32, 1), /* Pretend we support this, but it'll be separate Z32F depth and S8. */ FORMAT(Z32_FLOAT_S8X24_UINT, D32F, DEPTH_COMP32F, SWIZ_XXXX, 32, 1), + FORMAT(X32_S8X24_UINT, S8, R8UI, SWIZ_XXXX, 16, 1), #else FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), FORMAT(X8Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1), @@ -316,3 +319,34 @@ break; } } + +bool +v3dX(tfu_supports_tex_format)(enum V3DX(Texture_Data_Formats) format) +{ + switch (format) { + case TEXTURE_DATA_FORMAT_R8: + case TEXTURE_DATA_FORMAT_R8_SNORM: + case TEXTURE_DATA_FORMAT_RG8: + case TEXTURE_DATA_FORMAT_RG8_SNORM: + case TEXTURE_DATA_FORMAT_RGBA8: + case TEXTURE_DATA_FORMAT_RGBA8_SNORM: + case TEXTURE_DATA_FORMAT_RGB565: + case TEXTURE_DATA_FORMAT_RGBA4: + case TEXTURE_DATA_FORMAT_RGB5_A1: + case TEXTURE_DATA_FORMAT_RGB10_A2: + case TEXTURE_DATA_FORMAT_R16: + case TEXTURE_DATA_FORMAT_R16_SNORM: + case TEXTURE_DATA_FORMAT_RG16: + case TEXTURE_DATA_FORMAT_RG16_SNORM: + case TEXTURE_DATA_FORMAT_RGBA16: + case TEXTURE_DATA_FORMAT_RGBA16_SNORM: + case TEXTURE_DATA_FORMAT_R16F: + case TEXTURE_DATA_FORMAT_RG16F: + case TEXTURE_DATA_FORMAT_RGBA16F: + case TEXTURE_DATA_FORMAT_R11F_G11F_B10F: + case TEXTURE_DATA_FORMAT_R4: + return true; + default: + return false; + } +} diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_rcl.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_rcl.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_rcl.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_rcl.c 2019-03-31 23:16:37.000000000 +0000 @@ -74,6 +74,7 @@ load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8; else load.input_image_format = surf->format; + load.r_b_swap = surf->swap_rb; if (surf->tiling == VC5_TILING_UIF_NO_XOR || surf->tiling == VC5_TILING_UIF_XOR) { @@ -137,6 +138,7 @@ else store.output_image_format = surf->format; + store.r_b_swap = surf->swap_rb; store.memory_format = surf->tiling; if (surf->tiling == VC5_TILING_UIF_NO_XOR || @@ -205,7 +207,7 @@ { uint32_t loads_pending = job->load; - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { uint32_t bit = PIPE_CLEAR_COLOR0 << i; if (!(loads_pending & bit)) continue; @@ -303,7 +305,7 @@ * perspective. Non-MSAA surfaces will use * STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED. */ - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { uint32_t bit = PIPE_CLEAR_COLOR0 << i; if (!(job->store & bit)) continue; @@ -372,6 +374,15 @@ } } #else /* V3D_VERSION >= 40 */ + /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments, + * we still need to emit some sort of store. + */ + if (!job->store) { + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + } + assert(!stores_pending); /* GFXH-1461/GFXH-1689: The per-buffer store command's clear @@ -496,7 +507,7 @@ v3d_job_add_bo(job, job->rcl.bo); int nr_cbufs = 0; - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { if (job->cbufs[i]) nr_cbufs = i + 1; } @@ -759,7 +770,10 @@ v3d_rcl_emit_generic_per_tile_list(job, nr_cbufs - 1); - /* XXX: Use Morton order */ + /* XXX perf: We should expose GL_MESA_tile_raster_order to improve X11 + * performance, but we should use Morton order otherwise to improve + * cache locality. + */ uint32_t supertile_w_in_pixels = job->tile_width * supertile_w; uint32_t supertile_h_in_pixels = job->tile_height * supertile_h; uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels; @@ -781,5 +795,20 @@ } } + if (job->tmu_dirty_rcl) { + cl_emit(&job->rcl, L1_CACHE_FLUSH_CONTROL, flush) { + flush.tmu_config_cache_clear = 0xf; + flush.tmu_data_cache_clear = 0xf; + flush.uniforms_cache_clear = 0xf; + flush.instruction_cache_clear = 0xf; + } + + cl_emit(&job->rcl, L2T_CACHE_FLUSH_CONTROL, flush) { + flush.l2t_flush_mode = L2T_FLUSH_MODE_CLEAN; + flush.l2t_flush_start = cl_address(NULL, 0); + flush.l2t_flush_end = cl_address(NULL, ~0); + } + } + cl_emit(&job->rcl, END_OF_RENDERING, end); } diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_simulator.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_simulator.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_simulator.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_simulator.c 2019-03-31 23:16:37.000000000 +0000 @@ -49,7 +49,7 @@ #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg) static void -v3d_flush_l3(struct v3d_hw *v3d) +v3d_invalidate_l3(struct v3d_hw *v3d) { if (!v3d_hw_has_gca(v3d)) return; @@ -62,10 +62,13 @@ #endif } -/* Invalidates the L2 cache. This is a read-only cache. */ +/* Invalidates the L2C cache. This is a read-only cache for uniforms and instructions. */ static void -v3d_flush_l2(struct v3d_hw *v3d) +v3d_invalidate_l2c(struct v3d_hw *v3d) { + if (V3D_VERSION >= 33) + return; + V3D_WRITE(V3D_CTL_0_L2CACTL, V3D_CTL_0_L2CACTL_L2CCLR_SET | V3D_CTL_0_L2CACTL_L2CENA_SET); @@ -73,7 +76,7 @@ /* Invalidates texture L2 cachelines */ static void -v3d_flush_l2t(struct v3d_hw *v3d) +v3d_invalidate_l2t(struct v3d_hw *v3d) { V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0); V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0); @@ -84,18 +87,44 @@ /* Invalidates the slice caches. These are read-only caches. */ static void -v3d_flush_slices(struct v3d_hw *v3d) +v3d_invalidate_slices(struct v3d_hw *v3d) { V3D_WRITE(V3D_CTL_0_SLCACTL, ~0); } static void -v3d_flush_caches(struct v3d_hw *v3d) +v3d_invalidate_caches(struct v3d_hw *v3d) { - v3d_flush_l3(v3d); - v3d_flush_l2(v3d); - v3d_flush_l2t(v3d); - v3d_flush_slices(v3d); + v3d_invalidate_l3(v3d); + v3d_invalidate_l2c(v3d); + v3d_invalidate_l2t(v3d); + v3d_invalidate_slices(v3d); +} + +int +v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d, + struct drm_v3d_submit_tfu *args) +{ + int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET; + + V3D_WRITE(V3D_TFU_IIA, args->iia); + V3D_WRITE(V3D_TFU_IIS, args->iis); + V3D_WRITE(V3D_TFU_ICA, args->ica); + V3D_WRITE(V3D_TFU_IUA, args->iua); + V3D_WRITE(V3D_TFU_IOA, args->ioa); + V3D_WRITE(V3D_TFU_IOS, args->ios); + V3D_WRITE(V3D_TFU_COEF0, args->coef[0]); + V3D_WRITE(V3D_TFU_COEF1, args->coef[1]); + V3D_WRITE(V3D_TFU_COEF2, args->coef[2]); + V3D_WRITE(V3D_TFU_COEF3, args->coef[3]); + + V3D_WRITE(V3D_TFU_ICFG, args->icfg); + + while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) { + v3d_hw_tick(v3d); + } + + return 0; } int @@ -112,6 +141,12 @@ [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2, }; + switch (args->param) { + case DRM_V3D_PARAM_SUPPORTS_TFU: + args->value = 1; + return 0; + } + if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) { args->value = V3D_READ(reg_map[args->param]); return 0; @@ -122,6 +157,32 @@ abort(); } +static struct v3d_hw *v3d_isr_hw; + +static void +v3d_isr(uint32_t hub_status) +{ + struct v3d_hw *v3d = v3d_isr_hw; + + /* Check the per-core bits */ + if (hub_status & (1 << 0)) { + uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS); + + if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) { + fprintf(stderr, "GMP violation at 0x%08x\n", + V3D_READ(V3D_GMP_0_VIO_ADDR)); + abort(); + } else { + fprintf(stderr, + "Unexpected ISR with core status 0x%08x\n", + core_status); + } + abort(); + } + + return; +} + void v3dX(simulator_init_regs)(struct v3d_hw *v3d) { @@ -136,11 +197,19 @@ */ V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET); #endif + + uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_GMPV_SET; + V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts); + V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts); + + v3d_isr_hw = v3d; + v3d_hw_set_isr(v3d, v3d_isr); } void -v3dX(simulator_flush)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, - uint32_t gmp_ofs) +v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, + struct drm_v3d_submit_cl *submit, + uint32_t gmp_ofs) { /* Completely reset the GMP. */ V3D_WRITE(V3D_GMP_0_CFG, @@ -152,7 +221,7 @@ ; } - v3d_flush_caches(v3d); + v3d_invalidate_caches(v3d); if (submit->qma) { V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma); @@ -168,14 +237,17 @@ V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start); V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end); - /* Wait for bin to complete before firing render, as it seems the - * simulator doesn't implement the semaphores. + /* Wait for bin to complete before firing render. The kernel's + * scheduler implements this using the GPU scheduler blocking on the + * bin fence completing. (We don't use HW semaphores). */ while (V3D_READ(V3D_CLE_0_CT0CA) != V3D_READ(V3D_CLE_0_CT0EA)) { v3d_hw_tick(v3d); } + v3d_invalidate_caches(v3d); + V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start); V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end); diff -Nru mesa-18.3.3/src/gallium/drivers/v3d/v3dx_state.c mesa-19.0.1/src/gallium/drivers/v3d/v3dx_state.c --- mesa-18.3.3/src/gallium/drivers/v3d/v3dx_state.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/v3d/v3dx_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -30,10 +30,12 @@ #include "util/u_memory.h" #include "util/u_half.h" #include "util/u_helpers.h" +#include "util/u_upload_mgr.h" #include "v3d_context.h" #include "v3d_tiling.h" #include "broadcom/common/v3d_macros.h" +#include "broadcom/compiler/v3d_compiler.h" #include "broadcom/cle/v3dx_pack.h" static void @@ -77,7 +79,7 @@ v3d_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask) { struct v3d_context *v3d = v3d_context(pctx); - v3d->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1); + v3d->sample_mask = sample_mask & ((1 << V3D_MAX_SAMPLES) - 1); v3d->dirty |= VC5_DIRTY_SAMPLE_STATE; } @@ -130,7 +132,7 @@ so->base = *cso; if (cso->independent_blend_enable) { - for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) { + for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { so->blend_enables |= cso->rt[i].blend_enable << i; /* V3D 4.x is when we got independent blend enables. */ @@ -139,7 +141,7 @@ } } else { if (cso->rt[0].blend_enable) - so->blend_enables = (1 << VC5_MAX_DRAW_BUFFERS) - 1; + so->blend_enables = (1 << V3D_MAX_DRAW_BUFFERS) - 1; } return so; @@ -404,12 +406,12 @@ /* Set up the default attribute values in case any of the vertex * elements use them. */ - so->default_attribute_values = v3d_bo_alloc(v3d->screen, - VC5_MAX_ATTRIBUTES * - 4 * sizeof(float), - "default_attributes"); - uint32_t *attrs = v3d_bo_map(so->default_attribute_values); - for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) { + uint32_t *attrs; + u_upload_alloc(v3d->state_uploader, 0, + V3D_MAX_VS_INPUTS * sizeof(float), 16, + &so->defaults_offset, &so->defaults, (void **)&attrs); + + for (int i = 0; i < V3D_MAX_VS_INPUTS / 4; i++) { attrs[i * 4 + 0] = 0; attrs[i * 4 + 1] = 0; attrs[i * 4 + 2] = 0; @@ -421,6 +423,7 @@ } } + u_upload_unmap(v3d->state_uploader); return so; } @@ -429,7 +432,7 @@ { struct v3d_vertex_stateobj *so = hwcso; - v3d_bo_unreference(&so->default_attribute_values); + pipe_resource_reference(&so->defaults, NULL); free(so); } @@ -481,17 +484,17 @@ struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i]; if (!cbuf) continue; + struct v3d_surface *v3d_cbuf = v3d_surface(cbuf); const struct util_format_description *desc = util_format_description(cbuf->format); /* For BGRA8 formats (DRI window system default format), we - * need to swap R and B, since the HW's format is RGBA8. + * need to swap R and B, since the HW's format is RGBA8. On + * V3D 4.1+, the RCL can swap R and B on load/store. */ - if (desc->swizzle[0] == PIPE_SWIZZLE_Z && - cbuf->format != PIPE_FORMAT_B5G6R5_UNORM) { + if (v3d->screen->devinfo.ver < 41 && v3d_cbuf->swap_rb) v3d->swap_color_rb |= 1 << i; - } if (desc->swizzle[3] == PIPE_SWIZZLE_1) v3d->blend_dst_alpha_one |= 1 << i; @@ -500,64 +503,34 @@ v3d->dirty |= VC5_DIRTY_FRAMEBUFFER; } -static struct v3d_texture_stateobj * -v3d_get_stage_tex(struct v3d_context *v3d, enum pipe_shader_type shader) -{ - switch (shader) { - case PIPE_SHADER_FRAGMENT: - v3d->dirty |= VC5_DIRTY_FRAGTEX; - return &v3d->fragtex; - break; - case PIPE_SHADER_VERTEX: - v3d->dirty |= VC5_DIRTY_VERTTEX; - return &v3d->verttex; - break; - default: - fprintf(stderr, "Unknown shader target %d\n", shader); - abort(); - } -} - -static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest) +static enum V3DX(Wrap_Mode) +translate_wrap(uint32_t pipe_wrap, bool using_nearest) { switch (pipe_wrap) { case PIPE_TEX_WRAP_REPEAT: - return 0; + return V3D_WRAP_MODE_REPEAT; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return 1; + return V3D_WRAP_MODE_CLAMP; case PIPE_TEX_WRAP_MIRROR_REPEAT: - return 2; + return V3D_WRAP_MODE_MIRROR; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return 3; + return V3D_WRAP_MODE_BORDER; case PIPE_TEX_WRAP_CLAMP: - return (using_nearest ? 1 : 3); + return (using_nearest ? + V3D_WRAP_MODE_CLAMP : + V3D_WRAP_MODE_BORDER); default: unreachable("Unknown wrap mode"); } } - -static void * -v3d_create_sampler_state(struct pipe_context *pctx, - const struct pipe_sampler_state *cso) -{ - MAYBE_UNUSED struct v3d_context *v3d = v3d_context(pctx); - struct v3d_sampler_state *so = CALLOC_STRUCT(v3d_sampler_state); - - if (!so) - return NULL; - - memcpy(so, cso, sizeof(*cso)); - - bool either_nearest = - (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || - cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); - #if V3D_VERSION >= 40 - so->bo = v3d_bo_alloc(v3d->screen, cl_packet_length(SAMPLER_STATE), - "sampler"); - void *map = v3d_bo_map(so->bo); - +static void +v3d_upload_sampler_state_variant(void *map, + const struct pipe_sampler_state *cso, + enum v3d_sampler_state_variant variant, + bool either_nearest) +{ v3dx_pack(map, SAMPLER_STATE, sampler) { sampler.wrap_i_border = false; @@ -603,29 +576,190 @@ sampler.maximum_anisotropy = 1; } - sampler.border_color_mode = V3D_BORDER_COLOR_FOLLOWS; - /* XXX: The border color field is in the TMU blending format - * (32, f16, or i16), and we need to customize it based on - * that. - * - * XXX: for compat alpha formats, we need the alpha field to - * be in the red channel. - */ - sampler.border_color_red = - util_float_to_half(cso->border_color.f[0]); - sampler.border_color_green = - util_float_to_half(cso->border_color.f[1]); - sampler.border_color_blue = - util_float_to_half(cso->border_color.f[2]); - sampler.border_color_alpha = - util_float_to_half(cso->border_color.f[3]); + if (variant == V3D_SAMPLER_STATE_BORDER_0) { + sampler.border_color_mode = V3D_BORDER_COLOR_0000; + } else { + sampler.border_color_mode = V3D_BORDER_COLOR_FOLLOWS; + + union pipe_color_union border; + + /* First, reswizzle the border color for any + * mismatching we're doing between the texture's + * channel order in hardware (R) versus what it is at + * the GL level (ALPHA) + */ + switch (variant) { + case V3D_SAMPLER_STATE_F16_BGRA: + case V3D_SAMPLER_STATE_F16_BGRA_UNORM: + case V3D_SAMPLER_STATE_F16_BGRA_SNORM: + border.i[0] = cso->border_color.i[2]; + border.i[1] = cso->border_color.i[1]; + border.i[2] = cso->border_color.i[0]; + border.i[3] = cso->border_color.i[3]; + break; + + case V3D_SAMPLER_STATE_F16_A: + case V3D_SAMPLER_STATE_F16_A_UNORM: + case V3D_SAMPLER_STATE_F16_A_SNORM: + case V3D_SAMPLER_STATE_32_A: + case V3D_SAMPLER_STATE_32_A_UNORM: + case V3D_SAMPLER_STATE_32_A_SNORM: + border.i[0] = cso->border_color.i[3]; + border.i[1] = 0; + border.i[2] = 0; + border.i[3] = 0; + break; + + case V3D_SAMPLER_STATE_F16_LA: + case V3D_SAMPLER_STATE_F16_LA_UNORM: + case V3D_SAMPLER_STATE_F16_LA_SNORM: + border.i[0] = cso->border_color.i[0]; + border.i[1] = cso->border_color.i[3]; + border.i[2] = 0; + border.i[3] = 0; + break; + + default: + border = cso->border_color; + } + + /* Perform any clamping. */ + switch (variant) { + case V3D_SAMPLER_STATE_F16_UNORM: + case V3D_SAMPLER_STATE_F16_BGRA_UNORM: + case V3D_SAMPLER_STATE_F16_A_UNORM: + case V3D_SAMPLER_STATE_F16_LA_UNORM: + case V3D_SAMPLER_STATE_32_UNORM: + case V3D_SAMPLER_STATE_32_A_UNORM: + for (int i = 0; i < 4; i++) + border.f[i] = CLAMP(border.f[i], 0, 1); + break; + + case V3D_SAMPLER_STATE_F16_SNORM: + case V3D_SAMPLER_STATE_F16_BGRA_SNORM: + case V3D_SAMPLER_STATE_F16_A_SNORM: + case V3D_SAMPLER_STATE_F16_LA_SNORM: + case V3D_SAMPLER_STATE_32_SNORM: + case V3D_SAMPLER_STATE_32_A_SNORM: + for (int i = 0; i < 4; i++) + border.f[i] = CLAMP(border.f[i], -1, 1); + break; + + case V3D_SAMPLER_STATE_1010102U: + border.ui[0] = CLAMP(border.ui[0], + 0, (1 << 10) - 1); + border.ui[1] = CLAMP(border.ui[1], + 0, (1 << 10) - 1); + border.ui[2] = CLAMP(border.ui[2], + 0, (1 << 10) - 1); + border.ui[3] = CLAMP(border.ui[3], + 0, 3); + break; + + case V3D_SAMPLER_STATE_16U: + for (int i = 0; i < 4; i++) + border.ui[i] = CLAMP(border.ui[i], + 0, 0xffff); + break; + + case V3D_SAMPLER_STATE_16I: + for (int i = 0; i < 4; i++) + border.i[i] = CLAMP(border.i[i], + -32768, 32767); + break; + + case V3D_SAMPLER_STATE_8U: + for (int i = 0; i < 4; i++) + border.ui[i] = CLAMP(border.ui[i], + 0, 0xff); + break; + + case V3D_SAMPLER_STATE_8I: + for (int i = 0; i < 4; i++) + border.i[i] = CLAMP(border.i[i], + -128, 127); + break; + + default: + break; + } + + if (variant >= V3D_SAMPLER_STATE_32) { + sampler.border_color_word_0 = border.ui[0]; + sampler.border_color_word_1 = border.ui[1]; + sampler.border_color_word_2 = border.ui[2]; + sampler.border_color_word_3 = border.ui[3]; + } else { + sampler.border_color_word_0 = + util_float_to_half(border.f[0]); + sampler.border_color_word_1 = + util_float_to_half(border.f[1]); + sampler.border_color_word_2 = + util_float_to_half(border.f[2]); + sampler.border_color_word_3 = + util_float_to_half(border.f[3]); + } + } + } +} +#endif + +static void * +v3d_create_sampler_state(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + MAYBE_UNUSED struct v3d_context *v3d = v3d_context(pctx); + struct v3d_sampler_state *so = CALLOC_STRUCT(v3d_sampler_state); + + if (!so) + return NULL; + + memcpy(so, cso, sizeof(*cso)); + + bool either_nearest = + (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || + cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); + + enum V3DX(Wrap_Mode) wrap_s = translate_wrap(cso->wrap_s, + either_nearest); + enum V3DX(Wrap_Mode) wrap_t = translate_wrap(cso->wrap_t, + either_nearest); + enum V3DX(Wrap_Mode) wrap_r = translate_wrap(cso->wrap_r, + either_nearest); + + bool uses_border_color = (wrap_s == V3D_WRAP_MODE_BORDER || + wrap_t == V3D_WRAP_MODE_BORDER || + wrap_r == V3D_WRAP_MODE_BORDER); + so->border_color_variants = (uses_border_color && + (cso->border_color.ui[0] != 0 || + cso->border_color.ui[1] != 0 || + cso->border_color.ui[2] != 0 || + cso->border_color.ui[3] != 0)); + +#if V3D_VERSION >= 40 + void *map; + int sampler_align = so->border_color_variants ? 32 : 8; + int sampler_size = align(cl_packet_length(SAMPLER_STATE), sampler_align); + int num_variants = (so->border_color_variants ? ARRAY_SIZE(so->sampler_state_offset) : 1); + u_upload_alloc(v3d->state_uploader, 0, + sampler_size * num_variants, + sampler_align, + &so->sampler_state_offset[0], + &so->sampler_state, + &map); + + for (int i = 0; i < num_variants; i++) { + so->sampler_state_offset[i] = + so->sampler_state_offset[0] + i * sampler_size; + v3d_upload_sampler_state_variant(map + i * sampler_size, + cso, i, either_nearest); } #else /* V3D_VERSION < 40 */ v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) { - p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest); - p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest); - p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest); + p0.s_wrap_mode = wrap_s; + p0.t_wrap_mode = wrap_t; + p0.r_wrap_mode = wrap_r; } v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { @@ -642,7 +776,7 @@ unsigned nr, void **hwcso) { struct v3d_context *v3d = v3d_context(pctx); - struct v3d_texture_stateobj *stage_tex = v3d_get_stage_tex(v3d, shader); + struct v3d_texture_stateobj *stage_tex = &v3d->tex[shader]; assert(start == 0); unsigned i; @@ -668,7 +802,7 @@ struct pipe_sampler_state *psampler = hwcso; struct v3d_sampler_state *sampler = v3d_sampler_state(psampler); - v3d_bo_unreference(&sampler->bo); + pipe_resource_reference(&sampler->sampler_state, NULL); free(psampler); } @@ -692,6 +826,69 @@ } #endif +static void +v3d_setup_texture_shader_state(struct V3DX(TEXTURE_SHADER_STATE) *tex, + struct pipe_resource *prsc, + int base_level, int last_level, + int first_layer, int last_layer) +{ + struct v3d_resource *rsc = v3d_resource(prsc); + int msaa_scale = prsc->nr_samples > 1 ? 2 : 1; + + tex->image_width = prsc->width0 * msaa_scale; + tex->image_height = prsc->height0 * msaa_scale; + +#if V3D_VERSION >= 40 + /* On 4.x, the height of a 1D texture is redefined to be the + * upper 14 bits of the width (which is only usable with txf). + */ + if (prsc->target == PIPE_TEXTURE_1D || + prsc->target == PIPE_TEXTURE_1D_ARRAY) { + tex->image_height = tex->image_width >> 14; + } +#endif + + if (prsc->target == PIPE_TEXTURE_3D) { + tex->image_depth = prsc->depth0; + } else { + tex->image_depth = (last_layer - first_layer) + 1; + } + + tex->base_level = base_level; +#if V3D_VERSION >= 40 + tex->max_level = last_level; + /* Note that we don't have a job to reference the texture's sBO + * at state create time, so any time this sampler view is used + * we need to add the texture to the job. + */ + tex->texture_base_pointer = + cl_address(NULL, + rsc->bo->offset + + v3d_layer_offset(prsc, 0, first_layer)); +#endif + tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64; + + /* Since other platform devices may produce UIF images even + * when they're not big enough for V3D to assume they're UIF, + * we force images with level 0 as UIF to be always treated + * that way. + */ + tex->level_0_is_strictly_uif = + (rsc->slices[0].tiling == VC5_TILING_UIF_XOR || + rsc->slices[0].tiling == VC5_TILING_UIF_NO_XOR); + tex->level_0_xor_enable = (rsc->slices[0].tiling == VC5_TILING_UIF_XOR); + + if (tex->level_0_is_strictly_uif) + tex->level_0_ub_pad = rsc->slices[0].ub_pad; + +#if V3D_VERSION >= 40 + if (tex->uif_xor_disable || + tex->level_0_is_strictly_uif) { + tex->extended = true; + } +#endif /* V3D_VERSION >= 40 */ +} + static struct pipe_sampler_view * v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) @@ -726,61 +923,149 @@ so->base.reference.count = 1; so->base.context = pctx; - int msaa_scale = prsc->nr_samples > 1 ? 2 : 1; + if (rsc->separate_stencil && + cso->format == PIPE_FORMAT_X32_S8X24_UINT) { + rsc = rsc->separate_stencil; + prsc = &rsc->base; + } + + /* If we're sampling depth from depth/stencil, demote the format to + * just depth. u_format will end up giving the answers for the + * stencil channel, otherwise. + */ + enum pipe_format sample_format = cso->format; + if (sample_format == PIPE_FORMAT_S8_UINT_Z24_UNORM) + sample_format = PIPE_FORMAT_X8Z24_UNORM; #if V3D_VERSION >= 40 - so->bo = v3d_bo_alloc(v3d->screen, - cl_packet_length(TEXTURE_SHADER_STATE), "sampler"); - void *map = v3d_bo_map(so->bo); + const struct util_format_description *desc = + util_format_description(sample_format); - v3dx_pack(map, TEXTURE_SHADER_STATE, tex) { -#else /* V3D_VERSION < 40 */ - STATIC_ASSERT(sizeof(so->texture_shader_state) >= - cl_packet_length(TEXTURE_SHADER_STATE)); - v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) { + if (util_format_is_pure_integer(sample_format) && + !util_format_has_depth(desc)) { + int chan = util_format_get_first_non_void_channel(sample_format); + if (util_format_is_pure_uint(sample_format)) { + switch (desc->channel[chan].size) { + case 32: + so->sampler_variant = V3D_SAMPLER_STATE_32; + break; + case 16: + so->sampler_variant = V3D_SAMPLER_STATE_16U; + break; + case 10: + so->sampler_variant = V3D_SAMPLER_STATE_1010102U; + break; + case 8: + so->sampler_variant = V3D_SAMPLER_STATE_8U; + break; + } + } else { + switch (desc->channel[chan].size) { + case 32: + so->sampler_variant = V3D_SAMPLER_STATE_32; + break; + case 16: + so->sampler_variant = V3D_SAMPLER_STATE_16I; + break; + case 8: + so->sampler_variant = V3D_SAMPLER_STATE_8I; + break; + } + } + } else { + if (v3d_get_tex_return_size(&screen->devinfo, sample_format, + PIPE_TEX_COMPARE_NONE) == 32) { + if (util_format_is_alpha(sample_format)) + so->sampler_variant = V3D_SAMPLER_STATE_32_A; + else + so->sampler_variant = V3D_SAMPLER_STATE_32; + } else { + if (util_format_is_luminance_alpha(sample_format)) + so->sampler_variant = V3D_SAMPLER_STATE_F16_LA; + else if (util_format_is_alpha(sample_format)) + so->sampler_variant = V3D_SAMPLER_STATE_F16_A; + else if (fmt_swizzle[0] == PIPE_SWIZZLE_Z) + so->sampler_variant = V3D_SAMPLER_STATE_F16_BGRA; + else + so->sampler_variant = V3D_SAMPLER_STATE_F16; + + } + + if (util_format_is_unorm(sample_format)) { + so->sampler_variant += (V3D_SAMPLER_STATE_F16_UNORM - + V3D_SAMPLER_STATE_F16); + } else if (util_format_is_snorm(sample_format)){ + so->sampler_variant += (V3D_SAMPLER_STATE_F16_SNORM - + V3D_SAMPLER_STATE_F16); + } + } #endif - tex.image_width = prsc->width0 * msaa_scale; - tex.image_height = prsc->height0 * msaa_scale; + /* V3D still doesn't support sampling from raster textures, so we will + * have to copy to a temporary tiled texture. + */ + if (!rsc->tiled && !(prsc->target == PIPE_TEXTURE_1D || + prsc->target == PIPE_TEXTURE_1D_ARRAY)) { + struct v3d_resource *shadow_parent = rsc; + struct pipe_resource tmpl = { + .target = prsc->target, + .format = prsc->format, + .width0 = u_minify(prsc->width0, + cso->u.tex.first_level), + .height0 = u_minify(prsc->height0, + cso->u.tex.first_level), + .depth0 = 1, + .array_size = 1, + .bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, + .last_level = cso->u.tex.last_level - cso->u.tex.first_level, + .nr_samples = prsc->nr_samples, + }; -#if V3D_VERSION >= 40 - /* On 4.x, the height of a 1D texture is redefined to be the - * upper 14 bits of the width (which is only usable with txf). + /* Create the shadow texture. The rest of the sampler view + * setup will use the shadow. */ - if (prsc->target == PIPE_TEXTURE_1D || - prsc->target == PIPE_TEXTURE_1D_ARRAY) { - tex.image_height = tex.image_width >> 14; + prsc = v3d_resource_create(pctx->screen, &tmpl); + if (!prsc) { + free(so); + return NULL; } + rsc = v3d_resource(prsc); + + /* Flag it as needing update of the contents from the parent. */ + rsc->writes = shadow_parent->writes - 1; + assert(rsc->tiled); + + so->texture = prsc; + } else { + pipe_resource_reference(&so->texture, prsc); + } + + void *map; +#if V3D_VERSION >= 40 + so->bo = v3d_bo_alloc(v3d->screen, + cl_packet_length(TEXTURE_SHADER_STATE), "sampler"); + map = v3d_bo_map(so->bo); +#else /* V3D_VERSION < 40 */ + STATIC_ASSERT(sizeof(so->texture_shader_state) >= + cl_packet_length(TEXTURE_SHADER_STATE)); + map = &so->texture_shader_state; #endif - if (prsc->target == PIPE_TEXTURE_3D) { - tex.image_depth = prsc->depth0; - } else { - tex.image_depth = (cso->u.tex.last_layer - - cso->u.tex.first_layer) + 1; - } + v3dx_pack(map, TEXTURE_SHADER_STATE, tex) { + v3d_setup_texture_shader_state(&tex, prsc, + cso->u.tex.first_level, + cso->u.tex.last_level, + cso->u.tex.first_layer, + cso->u.tex.last_layer); tex.srgb = util_format_is_srgb(cso->format); - tex.base_level = cso->u.tex.first_level; #if V3D_VERSION >= 40 - tex.max_level = cso->u.tex.last_level; - /* Note that we don't have a job to reference the texture's sBO - * at state create time, so any time this sampler view is used - * we need to add the texture to the job. - */ - tex.texture_base_pointer = cl_address(NULL, - rsc->bo->offset + - rsc->slices[0].offset + - cso->u.tex.first_layer * - rsc->cube_map_stride), - tex.swizzle_r = translate_swizzle(so->swizzle[0]); tex.swizzle_g = translate_swizzle(so->swizzle[1]); tex.swizzle_b = translate_swizzle(so->swizzle[2]); tex.swizzle_a = translate_swizzle(so->swizzle[3]); #endif - tex.array_stride_64_byte_aligned = rsc->cube_map_stride / 64; if (prsc->nr_samples > 1 && V3D_VERSION < 40) { /* Using texture views to reinterpret formats on our @@ -829,28 +1114,6 @@ tex.texture_type = v3d_get_tex_format(&screen->devinfo, cso->format); } - - /* Since other platform devices may produce UIF images even - * when they're not big enough for V3D to assume they're UIF, - * we force images with level 0 as UIF to be always treated - * that way. - */ - tex.level_0_is_strictly_uif = (rsc->slices[0].tiling == - VC5_TILING_UIF_XOR || - rsc->slices[0].tiling == - VC5_TILING_UIF_NO_XOR); - tex.level_0_xor_enable = (rsc->slices[0].tiling == - VC5_TILING_UIF_XOR); - - if (tex.level_0_is_strictly_uif) - tex.level_0_ub_pad = rsc->slices[0].ub_pad; - -#if V3D_VERSION >= 40 - if (tex.uif_xor_disable || - tex.level_0_is_strictly_uif) { - tex.extended = true; - } -#endif /* V3D_VERSION >= 40 */ }; return &so->base; @@ -864,6 +1127,7 @@ v3d_bo_unreference(&sview->bo); pipe_resource_reference(&psview->texture, NULL); + pipe_resource_reference(&sview->texture, NULL); free(psview); } @@ -874,7 +1138,7 @@ struct pipe_sampler_view **views) { struct v3d_context *v3d = v3d_context(pctx); - struct v3d_texture_stateobj *stage_tex = v3d_get_stage_tex(v3d, shader); + struct v3d_texture_stateobj *stage_tex = &v3d->tex[shader]; unsigned i; unsigned new_nr = 0; @@ -950,6 +1214,144 @@ ctx->dirty |= VC5_DIRTY_STREAMOUT; } +static void +v3d_set_shader_buffers(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned count, + const struct pipe_shader_buffer *buffers) +{ + struct v3d_context *v3d = v3d_context(pctx); + struct v3d_ssbo_stateobj *so = &v3d->ssbo[shader]; + unsigned mask = 0; + + if (buffers) { + for (unsigned i = 0; i < count; i++) { + unsigned n = i + start; + struct pipe_shader_buffer *buf = &so->sb[n]; + + if ((buf->buffer == buffers[i].buffer) && + (buf->buffer_offset == buffers[i].buffer_offset) && + (buf->buffer_size == buffers[i].buffer_size)) + continue; + + mask |= 1 << n; + + buf->buffer_offset = buffers[i].buffer_offset; + buf->buffer_size = buffers[i].buffer_size; + pipe_resource_reference(&buf->buffer, buffers[i].buffer); + + if (buf->buffer) + so->enabled_mask |= 1 << n; + else + so->enabled_mask &= ~(1 << n); + } + } else { + mask = ((1 << count) - 1) << start; + + for (unsigned i = 0; i < count; i++) { + unsigned n = i + start; + struct pipe_shader_buffer *buf = &so->sb[n]; + + pipe_resource_reference(&buf->buffer, NULL); + } + + so->enabled_mask &= ~mask; + } + + v3d->dirty |= VC5_DIRTY_SSBO; +} + +static void +v3d_create_image_view_texture_shader_state(struct v3d_context *v3d, + struct v3d_shaderimg_stateobj *so, + int img) +{ +#if V3D_VERSION >= 40 + struct v3d_image_view *iview = &so->si[img]; + + void *map; + u_upload_alloc(v3d->uploader, 0, cl_packet_length(TEXTURE_SHADER_STATE), + 32, + &iview->tex_state_offset, + &iview->tex_state, + &map); + + struct pipe_resource *prsc = iview->base.resource; + + v3dx_pack(map, TEXTURE_SHADER_STATE, tex) { + v3d_setup_texture_shader_state(&tex, prsc, + iview->base.u.tex.level, + iview->base.u.tex.level, + iview->base.u.tex.first_layer, + iview->base.u.tex.last_layer); + + tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); + tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); + tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); + tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); + + tex.texture_type = v3d_get_tex_format(&v3d->screen->devinfo, + iview->base.format); + }; +#else /* V3D_VERSION < 40 */ + /* V3D 3.x doesn't use support shader image load/store operations on + * textures, so it would get lowered in the shader to general memory + * acceses. + */ +#endif +} + +static void +v3d_set_shader_images(struct pipe_context *pctx, + enum pipe_shader_type shader, + unsigned start, unsigned count, + const struct pipe_image_view *images) +{ + struct v3d_context *v3d = v3d_context(pctx); + struct v3d_shaderimg_stateobj *so = &v3d->shaderimg[shader]; + + if (images) { + for (unsigned i = 0; i < count; i++) { + unsigned n = i + start; + struct v3d_image_view *iview = &so->si[n]; + + if ((iview->base.resource == images[i].resource) && + (iview->base.format == images[i].format) && + (iview->base.access == images[i].access) && + !memcmp(&iview->base.u, &images[i].u, + sizeof(iview->base.u))) + continue; + + util_copy_image_view(&iview->base, &images[i]); + + if (iview->base.resource) { + so->enabled_mask |= 1 << n; + v3d_create_image_view_texture_shader_state(v3d, + so, + n); + } else { + so->enabled_mask &= ~(1 << n); + pipe_resource_reference(&iview->tex_state, NULL); + } + } + } else { + for (unsigned i = 0; i < count; i++) { + unsigned n = i + start; + struct v3d_image_view *iview = &so->si[n]; + + pipe_resource_reference(&iview->base.resource, NULL); + pipe_resource_reference(&iview->tex_state, NULL); + } + + if (count == 32) + so->enabled_mask = 0; + else + so->enabled_mask &= ~(((1 << count) - 1) << start); + } + + v3d->dirty |= VC5_DIRTY_SHADER_IMAGE; +} + void v3dX(state_init)(struct pipe_context *pctx) { @@ -989,6 +1391,9 @@ pctx->sampler_view_destroy = v3d_sampler_view_destroy; pctx->set_sampler_views = v3d_set_sampler_views; + pctx->set_shader_buffers = v3d_set_shader_buffers; + pctx->set_shader_images = v3d_set_shader_images; + pctx->create_stream_output_target = v3d_create_stream_output_target; pctx->stream_output_target_destroy = v3d_stream_output_target_destroy; pctx->set_stream_output_targets = v3d_set_stream_output_targets; diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_bufmgr.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_bufmgr.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_bufmgr.c 2018-02-27 16:44:19.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_bufmgr.c 2019-03-31 23:16:37.000000000 +0000 @@ -386,7 +386,6 @@ static struct vc4_bo * vc4_bo_open_handle(struct vc4_screen *screen, - uint32_t winsys_stride, uint32_t handle, uint32_t size) { struct vc4_bo *bo; @@ -410,8 +409,7 @@ bo->private = false; #ifdef USE_VC4_SIMULATOR - vc4_simulator_open_from_handle(screen->fd, winsys_stride, - bo->handle, bo->size); + vc4_simulator_open_from_handle(screen->fd, bo->handle, bo->size); bo->map = malloc(bo->size); #endif @@ -423,8 +421,7 @@ } struct vc4_bo * -vc4_bo_open_name(struct vc4_screen *screen, uint32_t name, - uint32_t winsys_stride) +vc4_bo_open_name(struct vc4_screen *screen, uint32_t name) { struct drm_gem_open o = { .name = name @@ -436,11 +433,11 @@ return NULL; } - return vc4_bo_open_handle(screen, winsys_stride, o.handle, o.size); + return vc4_bo_open_handle(screen, o.handle, o.size); } struct vc4_bo * -vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd, uint32_t winsys_stride) +vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd) { uint32_t handle; int ret = drmPrimeFDToHandle(screen->fd, fd, &handle); @@ -457,7 +454,7 @@ return NULL; } - return vc4_bo_open_handle(screen, winsys_stride, handle, size); + return vc4_bo_open_handle(screen, handle, size); } int diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_bufmgr.h mesa-19.0.1/src/gallium/drivers/vc4/vc4_bufmgr.h --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_bufmgr.h 2018-02-27 16:44:19.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_bufmgr.h 2019-03-31 23:16:37.000000000 +0000 @@ -66,10 +66,8 @@ uint32_t size); void vc4_bo_last_unreference(struct vc4_bo *bo); void vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time); -struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name, - uint32_t winsys_stride); -struct vc4_bo *vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd, - uint32_t winsys_stride); +struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name); +struct vc4_bo *vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd); bool vc4_bo_flink(struct vc4_bo *bo, uint32_t *name); int vc4_bo_get_dmabuf(struct vc4_bo *bo); diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_context.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_context.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -85,6 +85,18 @@ } static void +vc4_set_debug_callback(struct pipe_context *pctx, + const struct pipe_debug_callback *cb) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + if (cb) + vc4->debug = *cb; + else + memset(&vc4->debug, 0, sizeof(vc4->debug)); +} + +static void vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) { struct vc4_context *vc4 = vc4_context(pctx); @@ -164,6 +176,7 @@ pctx->priv = priv; pctx->destroy = vc4_context_destroy; pctx->flush = vc4_pipe_flush; + pctx->set_debug_callback = vc4_set_debug_callback; pctx->invalidate_resource = vc4_invalidate_resource; pctx->texture_barrier = vc4_texture_barrier; diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_context.h mesa-19.0.1/src/gallium/drivers/vc4/vc4_context.h --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_context.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -405,6 +405,7 @@ struct pipe_viewport_state viewport; struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; struct vc4_vertexbuf_stateobj vertexbuf; + struct pipe_debug_callback debug; struct vc4_hwperfmon *perfmon; /** @} */ @@ -451,6 +452,8 @@ #define perf_debug(...) do { \ if (unlikely(vc4_debug & VC4_DEBUG_PERF)) \ fprintf(stderr, __VA_ARGS__); \ + if (unlikely(vc4->debug.debug_message)) \ + pipe_debug_message(&vc4->debug, PERF_INFO, __VA_ARGS__); \ } while (0) static inline struct vc4_context * @@ -486,12 +489,8 @@ void vc4_query_init(struct pipe_context *pctx); void vc4_simulator_init(struct vc4_screen *screen); void vc4_simulator_destroy(struct vc4_screen *screen); -int vc4_simulator_flush(struct vc4_context *vc4, - struct drm_vc4_submit_cl *args, - struct vc4_job *job); int vc4_simulator_ioctl(int fd, unsigned long request, void *arg); -void vc4_simulator_open_from_handle(int fd, uint32_t winsys_stride, - int handle, uint32_t size); +void vc4_simulator_open_from_handle(int fd, int handle, uint32_t size); static inline int vc4_ioctl(int fd, unsigned long request, void *arg) diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_job.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_job.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_job.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_job.c 2019-03-31 23:16:37.000000000 +0000 @@ -492,11 +492,7 @@ if (!(vc4_debug & VC4_DEBUG_NORAST)) { int ret; -#ifndef USE_VC4_SIMULATOR - ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); -#else - ret = vc4_simulator_flush(vc4, &submit, job); -#endif + ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); static bool warned = false; if (ret && !warned) { fprintf(stderr, "Draw call returned %s. " diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_nir_lower_blend.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_nir_lower_blend.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_nir_lower_blend.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_nir_lower_blend.c 2019-03-31 23:16:37.000000000 +0000 @@ -42,6 +42,7 @@ #include "util/u_format.h" #include "vc4_qir.h" #include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_format_convert.h" #include "vc4_context.h" static bool @@ -67,37 +68,6 @@ return &load->dest.ssa; } -static nir_ssa_def * -vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb) -{ - nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045)); - nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92)); - nir_ssa_def *high = nir_fpow(b, - nir_fmul(b, - nir_fadd(b, srgb, - nir_imm_float(b, 0.055)), - nir_imm_float(b, 1.0 / 1.055)), - nir_imm_float(b, 2.4)); - - return nir_bcsel(b, is_low, low, high); -} - -static nir_ssa_def * -vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear) -{ - nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308)); - nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92)); - nir_ssa_def *high = nir_fsub(b, - nir_fmul(b, - nir_imm_float(b, 1.055), - nir_fpow(b, - linear, - nir_imm_float(b, 0.41666))), - nir_imm_float(b, 0.055)); - - return nir_bcsel(b, is_low, low, high); -} - static nir_ssa_def * vc4_blend_channel_f(nir_builder *b, nir_ssa_def **src, @@ -130,7 +100,7 @@ return nir_load_system_value(b, nir_intrinsic_load_blend_const_color_r_float + channel, - 0); + 0, 32); case PIPE_BLENDFACTOR_CONST_ALPHA: return nir_load_blend_const_color_a_float(b); case PIPE_BLENDFACTOR_ZERO: @@ -148,7 +118,7 @@ nir_load_system_value(b, nir_intrinsic_load_blend_const_color_r_float + channel, - 0)); + 0, 32)); case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return nir_fsub(b, nir_imm_float(b, 1.0), nir_load_blend_const_color_a_float(b)); @@ -501,14 +471,14 @@ /* Turn dst color to linear. */ for (int i = 0; i < 3; i++) - dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]); + dst_color[i] = nir_format_srgb_to_linear(b, dst_color[i]); nir_ssa_def *blend_color[4]; vc4_do_blending_f(c, b, blend_color, src_color, dst_color); /* sRGB encode the output color */ for (int i = 0; i < 3; i++) - blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]); + blend_color[i] = nir_format_linear_to_srgb(b, blend_color[i]); packed_color = vc4_nir_swizzle_and_pack(c, b, blend_color); } else { diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_nir_lower_io.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_nir_lower_io.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_nir_lower_io.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_nir_lower_io.c 2019-03-31 23:16:37.000000000 +0000 @@ -330,7 +330,8 @@ nir_intrinsic_instr *intr_comp = nir_intrinsic_instr_create(c->s, intr->intrinsic); intr_comp->num_components = 1; - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, + intr->dest.ssa.bit_size, NULL); /* Convert the uniform offset to bytes. If it happens * to be a constant, constant-folding will clean up diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_program.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_program.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -1004,24 +1004,24 @@ enum qpu_cond cond; switch (compare_instr->op) { - case nir_op_feq: - case nir_op_ieq: + case nir_op_feq32: + case nir_op_ieq32: case nir_op_seq: cond = QPU_COND_ZS; break; - case nir_op_fne: - case nir_op_ine: + case nir_op_fne32: + case nir_op_ine32: case nir_op_sne: cond = QPU_COND_ZC; break; - case nir_op_fge: - case nir_op_ige: - case nir_op_uge: + case nir_op_fge32: + case nir_op_ige32: + case nir_op_uge32: case nir_op_sge: cond = QPU_COND_NC; break; - case nir_op_flt: - case nir_op_ilt: + case nir_op_flt32: + case nir_op_ilt32: case nir_op_slt: cond = QPU_COND_NS; break; @@ -1048,7 +1048,7 @@ qir_uniform_f(c, 1.0), qir_uniform_f(c, 0.0)); break; - case nir_op_bcsel: + case nir_op_b32csel: *dest = qir_SEL(c, cond, ntq_get_alu_src(c, sel_instr, 1), ntq_get_alu_src(c, sel_instr, 2)); @@ -1208,14 +1208,14 @@ case nir_op_u2f32: result = qir_ITOF(c, src[0]); break; - case nir_op_b2f: + case nir_op_b2f32: result = qir_AND(c, src[0], qir_uniform_f(c, 1.0)); break; - case nir_op_b2i: + case nir_op_b2i32: result = qir_AND(c, src[0], qir_uniform_ui(c, 1)); break; - case nir_op_i2b: - case nir_op_f2b: + case nir_op_i2b32: + case nir_op_f2b32: qir_SF(c, src[0]); result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC, qir_uniform_ui(c, ~0), @@ -1264,21 +1264,21 @@ case nir_op_sne: case nir_op_sge: case nir_op_slt: - case nir_op_feq: - case nir_op_fne: - case nir_op_fge: - case nir_op_flt: - case nir_op_ieq: - case nir_op_ine: - case nir_op_ige: - case nir_op_uge: - case nir_op_ilt: + case nir_op_feq32: + case nir_op_fne32: + case nir_op_fge32: + case nir_op_flt32: + case nir_op_ieq32: + case nir_op_ine32: + case nir_op_ige32: + case nir_op_uge32: + case nir_op_ilt32: if (!ntq_emit_comparison(c, &result, instr, instr)) { fprintf(stderr, "Bad comparison instruction\n"); } break; - case nir_op_bcsel: + case nir_op_b32csel: result = ntq_emit_bcsel(c, instr, src); break; case nir_op_fcsel: @@ -1591,14 +1591,14 @@ NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); - NIR_PASS(progress, s, nir_opt_peephole_select, 8); + NIR_PASS(progress, s, nir_opt_peephole_select, 8, true); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); NIR_PASS(progress, s, nir_opt_loop_unroll, nir_var_shader_in | nir_var_shader_out | - nir_var_local); + nir_var_function_temp); } while (progress); } @@ -2363,7 +2363,8 @@ if (stage == QSTAGE_FRAG) { NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables); } else { - NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables); + NIR_PASS_V(c->s, nir_lower_clip_vs, + c->key->ucp_enables, false); NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out); } @@ -2384,6 +2385,8 @@ vc4_optimize_nir(c->s); + NIR_PASS_V(c->s, nir_lower_bool_to_int32); + NIR_PASS_V(c->s, nir_convert_from_ssa, true); if (vc4_debug & VC4_DEBUG_SHADERDB) { @@ -2514,7 +2517,7 @@ vc4_optimize_nir(s); - NIR_PASS_V(s, nir_remove_dead_variables, nir_var_local); + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); /* Garbage collect dead instructions */ nir_sweep(s); diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_query.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_query.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_query.c 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_query.c 2019-03-31 23:16:37.000000000 +0000 @@ -132,7 +132,7 @@ /* We can't mix HW and non-HW queries. */ if (nhwqueries && nhwqueries != num_queries) - return NULL; + goto err_free_query; if (!nhwqueries) return (struct pipe_query *)query; diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_resource.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_resource.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_resource.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_resource.c 2019-03-31 23:16:37.000000000 +0000 @@ -319,8 +319,10 @@ return vc4_bo_flink(rsc->bo, &whandle->handle); case WINSYS_HANDLE_TYPE_KMS: - if (screen->ro && renderonly_get_handle(rsc->scanout, whandle)) - return TRUE; + if (screen->ro) { + assert(rsc->scanout); + return renderonly_get_handle(rsc->scanout, whandle); + } whandle->handle = rsc->bo->handle; return TRUE; case WINSYS_HANDLE_TYPE_FD: @@ -622,12 +624,10 @@ switch (whandle->type) { case WINSYS_HANDLE_TYPE_SHARED: - rsc->bo = vc4_bo_open_name(screen, - whandle->handle, whandle->stride); + rsc->bo = vc4_bo_open_name(screen, whandle->handle); break; case WINSYS_HANDLE_TYPE_FD: - rsc->bo = vc4_bo_open_dmabuf(screen, - whandle->handle, whandle->stride); + rsc->bo = vc4_bo_open_dmabuf(screen, whandle->handle); break; default: fprintf(stderr, @@ -1013,6 +1013,7 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, struct pipe_sampler_view *pview) { + struct vc4_context *vc4 = vc4_context(pctx); struct vc4_sampler_view *view = vc4_sampler_view(pview); struct vc4_resource *shadow = vc4_resource(view->texture); struct vc4_resource *orig = vc4_resource(pview->texture); diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_screen.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_screen.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_screen.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -178,6 +178,9 @@ /* Note: Not supported in hardware, just faking it. */ return 5; + case PIPE_CAP_MAX_VARYINGS: + return 8; + case PIPE_CAP_VENDOR_ID: return 0x14E4; case PIPE_CAP_ACCELERATED: diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_simulator.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_simulator.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_simulator.c 2018-03-13 20:41:43.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_simulator.c 2019-03-31 23:16:37.000000000 +0000 @@ -99,10 +99,13 @@ /** Area for this BO within sim_state->mem */ struct mem_block *block; - void *winsys_map; - uint32_t winsys_stride; int handle; + + /* Mapping of the underlying GEM object that we copy in/out of + * simulator memory. + */ + void *gem_vaddr; }; static void * @@ -143,6 +146,7 @@ sim_bo->file = file; sim_bo->handle = handle; + /* Allocate space for the buffer in simulator memory. */ mtx_lock(&sim_state.mutex); sim_bo->block = u_mmAllocMem(sim_state.heap, size + 4, PAGE_ALIGN2, 0); mtx_unlock(&sim_state.mutex); @@ -162,6 +166,25 @@ mtx_lock(&sim_state.mutex); _mesa_hash_table_insert(file->bo_map, int_to_key(handle), bo); mtx_unlock(&sim_state.mutex); + + /* Map the GEM buffer for copy in/out to the simulator. */ + struct drm_mode_map_dumb map = { + .handle = handle, + }; + int ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map); + if (ret) { + fprintf(stderr, "Failed to get MMAP offset: %d\n", + errno); + abort(); + } + sim_bo->gem_vaddr = mmap(NULL, obj->base.size, + PROT_READ | PROT_WRITE, MAP_SHARED, + fd, map.offset); + if (sim_bo->gem_vaddr == MAP_FAILED) { + fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n", + handle, (long long)map.offset, (int)obj->base.size); + abort(); + } } return sim_bo; @@ -174,16 +197,19 @@ struct drm_vc4_bo *bo = &sim_bo->base; struct drm_gem_cma_object *obj = &bo->base; - if (sim_bo->winsys_map) - munmap(sim_bo->winsys_map, obj->base.size); + if (bo->validated_shader) { + free(bo->validated_shader->texture_samples); + free(bo->validated_shader); + } + + if (sim_bo->gem_vaddr) + munmap(sim_bo->gem_vaddr, obj->base.size); mtx_lock(&sim_state.mutex); u_mmFreeMem(sim_bo->block); if (sim_bo->handle) { - struct hash_entry *entry = - _mesa_hash_table_search(sim_file->bo_map, - int_to_key(sim_bo->handle)); - _mesa_hash_table_remove(sim_file->bo_map, entry); + _mesa_hash_table_remove_key(sim_file->bo_map, + int_to_key(sim_bo->handle)); } mtx_unlock(&sim_state.mutex); ralloc_free(sim_bo); @@ -210,41 +236,23 @@ } static int -vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_job *job, +vc4_simulator_pin_bos(struct vc4_simulator_file *file, struct vc4_exec_info *exec) { - int fd = dev->screen->fd; - struct vc4_simulator_file *file = vc4_get_simulator_file_for_fd(fd); struct drm_vc4_submit_cl *args = exec->args; - struct vc4_bo **bos = job->bo_pointers.base; + uint32_t *bo_handles = (uint32_t *)(uintptr_t)args->bo_handles; exec->bo_count = args->bo_handle_count; exec->bo = calloc(exec->bo_count, sizeof(void *)); for (int i = 0; i < exec->bo_count; i++) { - struct vc4_bo *bo = bos[i]; struct vc4_simulator_bo *sim_bo = - vc4_get_simulator_bo(file, bo->handle); + vc4_get_simulator_bo(file, bo_handles[i]); struct drm_vc4_bo *drm_bo = &sim_bo->base; struct drm_gem_cma_object *obj = &drm_bo->base; - drm_bo->bo = bo; -#if 0 - fprintf(stderr, "bo hindex %d: %s\n", i, bo->name); -#endif - - vc4_bo_map(bo); - memcpy(obj->vaddr, bo->map, bo->size); + memcpy(obj->vaddr, sim_bo->gem_vaddr, obj->base.size); exec->bo[i] = obj; - - /* The kernel does this validation at shader create ioctl - * time. - */ - if (strcmp(bo->name, "code") == 0) { - drm_bo->validated_shader = vc4_validate_shader(obj); - if (!drm_bo->validated_shader) - abort(); - } } return 0; } @@ -255,16 +263,13 @@ for (int i = 0; i < exec->bo_count; i++) { struct drm_gem_cma_object *obj = exec->bo[i]; struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base); - struct vc4_bo *bo = drm_bo->bo; + struct vc4_simulator_bo *sim_bo = + (struct vc4_simulator_bo *)drm_bo; assert(*(uint32_t *)(obj->vaddr + obj->base.size) == BO_SENTINEL); - memcpy(bo->map, obj->vaddr, bo->size); - - if (drm_bo->validated_shader) { - free(drm_bo->validated_shader->texture_samples); - free(drm_bo->validated_shader); - } + if (sim_bo->gem_vaddr) + memcpy(sim_bo->gem_vaddr, obj->vaddr, obj->base.size); } free(exec->bo); @@ -359,19 +364,10 @@ fclose(f); } -int -vc4_simulator_flush(struct vc4_context *vc4, - struct drm_vc4_submit_cl *args, struct vc4_job *job) +static int +vc4_simulator_submit_cl_ioctl(int fd, struct drm_vc4_submit_cl *args) { - struct vc4_screen *screen = vc4->screen; - int fd = screen->fd; struct vc4_simulator_file *file = vc4_get_simulator_file_for_fd(fd); - struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); - struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL; - struct vc4_simulator_bo *csim_bo = ctex ? vc4_get_simulator_bo(file, ctex->bo->handle) : NULL; - uint32_t winsys_stride = ctex ? csim_bo->winsys_stride : 0; - uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0; - uint32_t row_len = MIN2(sim_stride, winsys_stride); struct vc4_exec_info exec; struct drm_device *dev = &file->dev; int ret; @@ -379,25 +375,9 @@ memset(&exec, 0, sizeof(exec)); list_inithead(&exec.unref_list); - if (ctex && csim_bo->winsys_map) { -#if 0 - fprintf(stderr, "%dx%d %d %d %d\n", - ctex->base.b.width0, ctex->base.b.height0, - winsys_stride, - sim_stride, - ctex->bo->size); -#endif - - for (int y = 0; y < ctex->base.height0; y++) { - memcpy(ctex->bo->map + y * sim_stride, - csim_bo->winsys_map + y * winsys_stride, - row_len); - } - } - exec.args = args; - ret = vc4_simulator_pin_bos(dev, job, &exec); + ret = vc4_simulator_pin_bos(file, &exec); if (ret) return ret; @@ -448,65 +428,19 @@ vc4_free_simulator_bo(sim_bo); } - if (ctex && csim_bo->winsys_map) { - for (int y = 0; y < ctex->base.height0; y++) { - memcpy(csim_bo->winsys_map + y * winsys_stride, - ctex->bo->map + y * sim_stride, - row_len); - } - } - return 0; } /** - * Map the underlying GEM object from the real hardware GEM handle. - */ -static void * -vc4_simulator_map_winsys_bo(int fd, struct vc4_simulator_bo *sim_bo) -{ - struct drm_vc4_bo *bo = &sim_bo->base; - struct drm_gem_cma_object *obj = &bo->base; - int ret; - void *map; - - struct drm_mode_map_dumb map_dumb = { - .handle = sim_bo->handle, - }; - ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map_dumb); - if (ret != 0) { - fprintf(stderr, "map ioctl failure\n"); - abort(); - } - - map = mmap(NULL, obj->base.size, PROT_READ | PROT_WRITE, MAP_SHARED, - fd, map_dumb.offset); - if (map == MAP_FAILED) { - fprintf(stderr, - "mmap of bo %d (offset 0x%016llx, size %d) failed\n", - sim_bo->handle, (long long)map_dumb.offset, - (int)obj->base.size); - abort(); - } - - return map; -} - -/** * Do fixups after a BO has been opened from a handle. * * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE * time, but we're still using drmPrimeFDToHandle() so we have this helper to * be called afterward instead. */ -void vc4_simulator_open_from_handle(int fd, uint32_t winsys_stride, - int handle, uint32_t size) +void vc4_simulator_open_from_handle(int fd, int handle, uint32_t size) { - struct vc4_simulator_bo *sim_bo = - vc4_create_simulator_bo(fd, handle, size); - - sim_bo->winsys_stride = winsys_stride; - sim_bo->winsys_map = vc4_simulator_map_winsys_bo(fd, sim_bo); + vc4_create_simulator_bo(fd, handle, size); } /** @@ -558,19 +492,22 @@ args->handle = create.handle; - vc4_create_simulator_bo(fd, create.handle, args->size); + struct vc4_simulator_bo *sim_bo = + vc4_create_simulator_bo(fd, create.handle, args->size); + struct drm_vc4_bo *drm_bo = &sim_bo->base; + struct drm_gem_cma_object *obj = &drm_bo->base; - struct drm_mode_map_dumb map = { - .handle = create.handle - }; - ret = drmIoctl(fd, DRM_IOCTL_MODE_MAP_DUMB, &map); - if (ret) - return ret; + /* Copy into the simulator's BO for validation. */ + memcpy(obj->vaddr, (void *)(uintptr_t)args->data, args->size); + + /* Copy into the GEM BO to prevent the simulator_pin_bos() from + * smashing it. + */ + memcpy(sim_bo->gem_vaddr, (void *)(uintptr_t)args->data, args->size); - void *shader = mmap(NULL, args->size, PROT_READ | PROT_WRITE, MAP_SHARED, - fd, map.offset); - memcpy(shader, (void *)(uintptr_t)args->data, args->size); - munmap(shader, args->size); + drm_bo->validated_shader = vc4_validate_shader(obj); + if (!drm_bo->validated_shader) + return -EINVAL; return 0; } @@ -643,6 +580,8 @@ vc4_simulator_ioctl(int fd, unsigned long request, void *args) { switch (request) { + case DRM_IOCTL_VC4_SUBMIT_CL: + return vc4_simulator_submit_cl_ioctl(fd, args); case DRM_IOCTL_VC4_CREATE_BO: return vc4_simulator_create_bo_ioctl(fd, args); case DRM_IOCTL_VC4_CREATE_SHADER_BO: diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_simulator_validate.h mesa-19.0.1/src/gallium/drivers/vc4/vc4_simulator_validate.h --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_simulator_validate.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_simulator_validate.h 2019-03-31 23:16:37.000000000 +0000 @@ -94,7 +94,6 @@ struct drm_vc4_bo { struct drm_gem_cma_object base; - struct vc4_bo *bo; struct vc4_validated_shader_info *validated_shader; struct list_head unref_head; }; diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_tiling_lt.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_tiling_lt.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_tiling_lt.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_tiling_lt.c 2019-03-31 23:16:37.000000000 +0000 @@ -26,7 +26,7 @@ * Helper functions from vc4_tiling.c that will be compiled for using NEON * assembly or not. * - * If VC4_BUILD_NEON is set, then the functions will be suffixed with _neon. + * If V3D_BUILD_NEON is set, then the functions will be suffixed with _neon. * They will only use NEON assembly if __ARM_ARCH is also set, to keep the x86 * sim build working. */ @@ -34,8 +34,9 @@ #include #include "pipe/p_state.h" #include "vc4_tiling.h" +#include "broadcom/common/v3d_cpu_tiling.h" -#ifdef VC4_BUILD_NEON +#ifdef V3D_BUILD_NEON #define NEON_TAG(x) x ## _neon #else #define NEON_TAG(x) x ## _base @@ -63,217 +64,6 @@ } } -static void -vc4_load_utile(void *cpu, void *gpu, uint32_t cpu_stride, uint32_t cpp) -{ - uint32_t gpu_stride = vc4_utile_stride(cpp); -#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM) - if (gpu_stride == 8) { - __asm__ volatile ( - /* Load from the GPU in one shot, no interleave, to - * d0-d7. - */ - "vldm %[gpu], {q0, q1, q2, q3}\n" - /* Store each 8-byte line to cpu-side destination, - * incrementing it by the stride each time. - */ - "vst1.8 d0, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d1, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d2, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d3, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d4, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d5, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d6, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d7, [%[cpu]]\n" - : [cpu] "+r"(cpu) - : [gpu] "r"(gpu), - [cpu_stride] "r"(cpu_stride) - : "q0", "q1", "q2", "q3"); - } else { - assert(gpu_stride == 16); - void *cpu2 = cpu + 8; - __asm__ volatile ( - /* Load from the GPU in one shot, no interleave, to - * d0-d7. - */ - "vldm %[gpu], {q0, q1, q2, q3};\n" - /* Store each 16-byte line in 2 parts to the cpu-side - * destination. (vld1 can only store one d-register - * at a time). - */ - "vst1.8 d0, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d1, [%[cpu2]],%[cpu_stride]\n" - "vst1.8 d2, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d3, [%[cpu2]],%[cpu_stride]\n" - "vst1.8 d4, [%[cpu]], %[cpu_stride]\n" - "vst1.8 d5, [%[cpu2]],%[cpu_stride]\n" - "vst1.8 d6, [%[cpu]]\n" - "vst1.8 d7, [%[cpu2]]\n" - : [cpu] "+r"(cpu), - [cpu2] "+r"(cpu2) - : [gpu] "r"(gpu), - [cpu_stride] "r"(cpu_stride) - : "q0", "q1", "q2", "q3"); - } -#elif defined (PIPE_ARCH_AARCH64) - if (gpu_stride == 8) { - __asm__ volatile ( - /* Load from the GPU in one shot, no interleave, to - * d0-d7. - */ - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n" - /* Store each 8-byte line to cpu-side destination, - * incrementing it by the stride each time. - */ - "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n" - "st1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n" - "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n" - "st1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n" - "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n" - "st1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n" - "st1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n" - "st1 {v3.D}[1], [%[cpu]]\n" - : [cpu] "+r"(cpu) - : [gpu] "r"(gpu), - [cpu_stride] "r"(cpu_stride) - : "v0", "v1", "v2", "v3"); - } else { - assert(gpu_stride == 16); - void *cpu2 = cpu + 8; - __asm__ volatile ( - /* Load from the GPU in one shot, no interleave, to - * d0-d7. - */ - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n" - /* Store each 16-byte line in 2 parts to the cpu-side - * destination. (vld1 can only store one d-register - * at a time). - */ - "st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n" - "st1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n" - "st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n" - "st1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n" - "st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n" - "st1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n" - "st1 {v3.D}[0], [%[cpu]]\n" - "st1 {v3.D}[1], [%[cpu2]]\n" - : [cpu] "+r"(cpu), - [cpu2] "+r"(cpu2) - : [gpu] "r"(gpu), - [cpu_stride] "r"(cpu_stride) - : "v0", "v1", "v2", "v3"); - } -#else - for (uint32_t gpu_offset = 0; gpu_offset < 64; gpu_offset += gpu_stride) { - memcpy(cpu, gpu + gpu_offset, gpu_stride); - cpu += cpu_stride; - } -#endif -} - -static void -vc4_store_utile(void *gpu, void *cpu, uint32_t cpu_stride, uint32_t cpp) -{ - uint32_t gpu_stride = vc4_utile_stride(cpp); - -#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM) - if (gpu_stride == 8) { - __asm__ volatile ( - /* Load each 8-byte line from cpu-side source, - * incrementing it by the stride each time. - */ - "vld1.8 d0, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d1, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d2, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d3, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d4, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d5, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d6, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d7, [%[cpu]]\n" - /* Load from the GPU in one shot, no interleave, to - * d0-d7. - */ - "vstm %[gpu], {q0, q1, q2, q3}\n" - : [cpu] "r"(cpu) - : [gpu] "r"(gpu), - [cpu_stride] "r"(cpu_stride) - : "q0", "q1", "q2", "q3"); - } else { - assert(gpu_stride == 16); - void *cpu2 = cpu + 8; - __asm__ volatile ( - /* Load each 16-byte line in 2 parts from the cpu-side - * destination. (vld1 can only store one d-register - * at a time). - */ - "vld1.8 d0, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d1, [%[cpu2]],%[cpu_stride]\n" - "vld1.8 d2, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d3, [%[cpu2]],%[cpu_stride]\n" - "vld1.8 d4, [%[cpu]], %[cpu_stride]\n" - "vld1.8 d5, [%[cpu2]],%[cpu_stride]\n" - "vld1.8 d6, [%[cpu]]\n" - "vld1.8 d7, [%[cpu2]]\n" - /* Store to the GPU in one shot, no interleave. */ - "vstm %[gpu], {q0, q1, q2, q3}\n" - : [cpu] "+r"(cpu), - [cpu2] "+r"(cpu2) - : [gpu] "r"(gpu), - [cpu_stride] "r"(cpu_stride) - : "q0", "q1", "q2", "q3"); - } -#elif defined (PIPE_ARCH_AARCH64) - if (gpu_stride == 8) { - __asm__ volatile ( - /* Load each 8-byte line from cpu-side source, - * incrementing it by the stride each time. - */ - "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n" - "ld1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n" - "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n" - "ld1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n" - "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n" - "ld1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n" - "ld1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n" - "ld1 {v3.D}[1], [%[cpu]]\n" - /* Store to the GPU in one shot, no interleave. */ - "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n" - : [cpu] "+r"(cpu) - : [gpu] "r"(gpu), - [cpu_stride] "r"(cpu_stride) - : "v0", "v1", "v2", "v3"); - } else { - assert(gpu_stride == 16); - void *cpu2 = cpu + 8; - __asm__ volatile ( - /* Load each 16-byte line in 2 parts from the cpu-side - * destination. (vld1 can only store one d-register - * at a time). - */ - "ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n" - "ld1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n" - "ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n" - "ld1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n" - "ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n" - "ld1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n" - "ld1 {v3.D}[0], [%[cpu]]\n" - "ld1 {v3.D}[1], [%[cpu2]]\n" - /* Store to the GPU in one shot, no interleave. */ - "st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n" - : [cpu] "+r"(cpu), - [cpu2] "+r"(cpu2) - : [gpu] "r"(gpu), - [cpu_stride] "r"(cpu_stride) - : "v0", "v1", "v2", "v3"); - } -#else - for (uint32_t gpu_offset = 0; gpu_offset < 64; gpu_offset += gpu_stride) { - memcpy(gpu + gpu_offset, cpu, gpu_stride); - cpu += cpu_stride; - } -#endif - -} /** * Returns the X value into the address bits for LT tiling. * @@ -349,6 +139,7 @@ { uint32_t utile_w = vc4_utile_width(cpp); uint32_t utile_h = vc4_utile_height(cpp); + uint32_t utile_stride = vc4_utile_stride(cpp); uint32_t xstart = box->x; uint32_t ystart = box->y; @@ -357,15 +148,17 @@ void *gpu_tile = gpu + ((ystart + y) * gpu_stride + (xstart + x) * 64 / utile_w); if (to_cpu) { - vc4_load_utile(cpu + (cpu_stride * y + + v3d_load_utile(cpu + (cpu_stride * y + x * cpp), + cpu_stride, gpu_tile, - cpu_stride, cpp); + utile_stride); } else { - vc4_store_utile(gpu_tile, + v3d_store_utile(gpu_tile, + utile_stride, cpu + (cpu_stride * y + x * cpp), - cpu_stride, cpp); + cpu_stride); } } } diff -Nru mesa-18.3.3/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c mesa-19.0.1/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c --- mesa-18.3.3/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/vc4/vc4_tiling_lt_neon.c 2019-03-31 23:16:37.000000000 +0000 @@ -26,5 +26,5 @@ * single file. */ -#define VC4_BUILD_NEON +#define V3D_BUILD_NEON #include "vc4_tiling_lt.c" diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_buffer.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_buffer.c --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_buffer.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_buffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -27,17 +27,6 @@ #include "virgl_resource.h" #include "virgl_screen.h" -static void virgl_buffer_destroy(struct pipe_screen *screen, - struct pipe_resource *buf) -{ - struct virgl_screen *vs = virgl_screen(screen); - struct virgl_buffer *vbuf = virgl_buffer(buf); - - util_range_destroy(&vbuf->valid_buffer_range); - vs->vws->resource_unref(vs->vws, vbuf->base.hw_res); - FREE(vbuf); -} - static void *virgl_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, @@ -47,52 +36,40 @@ { struct virgl_context *vctx = virgl_context(ctx); struct virgl_screen *vs = virgl_screen(ctx->screen); - struct virgl_buffer *vbuf = virgl_buffer(resource); + struct virgl_resource *vbuf = virgl_resource(resource); struct virgl_transfer *trans; void *ptr; bool readback; - uint32_t offset; bool doflushwait = false; - if ((usage & PIPE_TRANSFER_READ) && (vbuf->on_list == TRUE)) + if (usage & PIPE_TRANSFER_READ) doflushwait = true; else - doflushwait = virgl_res_needs_flush_wait(vctx, &vbuf->base, usage); + doflushwait = virgl_res_needs_flush_wait(vctx, vbuf, usage); if (doflushwait) ctx->flush(ctx, NULL, 0); - trans = slab_alloc(&vctx->texture_transfer_pool); - if (!trans) - return NULL; + trans = virgl_resource_create_transfer(ctx, resource, &vbuf->metadata, level, + usage, box); - trans->base.resource = resource; - trans->base.level = level; - trans->base.usage = usage; - trans->base.box = *box; - trans->base.stride = 0; - trans->base.layer_stride = 0; - - offset = box->x; - - readback = virgl_res_needs_readback(vctx, &vbuf->base, usage); + readback = virgl_res_needs_readback(vctx, vbuf, usage); if (readback) - vs->vws->transfer_get(vs->vws, vbuf->base.hw_res, box, trans->base.stride, trans->base.layer_stride, offset, level); + vs->vws->transfer_get(vs->vws, vbuf->hw_res, box, trans->base.stride, + trans->l_stride, trans->offset, level); if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) doflushwait = true; if (doflushwait || readback) - vs->vws->resource_wait(vs->vws, vbuf->base.hw_res); + vs->vws->resource_wait(vs->vws, vbuf->hw_res); - ptr = vs->vws->resource_map(vs->vws, vbuf->base.hw_res); + ptr = vs->vws->resource_map(vs->vws, vbuf->hw_res); if (!ptr) { return NULL; } - trans->offset = offset; *transfer = &trans->base; - return ptr + trans->offset; } @@ -101,73 +78,61 @@ { struct virgl_context *vctx = virgl_context(ctx); struct virgl_transfer *trans = virgl_transfer(transfer); - struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); + struct virgl_resource *vbuf = virgl_resource(transfer->resource); if (trans->base.usage & PIPE_TRANSFER_WRITE) { - if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { - struct virgl_screen *vs = virgl_screen(ctx->screen); - vctx->num_transfers++; - vs->vws->transfer_put(vs->vws, vbuf->base.hw_res, - &transfer->box, trans->base.stride, trans->base.layer_stride, trans->offset, transfer->level); - + struct virgl_screen *vs = virgl_screen(ctx->screen); + if (transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT) { + if (trans->range.end <= trans->range.start) + goto out; + + transfer->box.x += trans->range.start; + transfer->box.width = trans->range.end - trans->range.start; + trans->offset = transfer->box.x; } + + vctx->num_transfers++; + vs->vws->transfer_put(vs->vws, vbuf->hw_res, + &transfer->box, trans->base.stride, + trans->l_stride, trans->offset, + transfer->level); + } - slab_free(&vctx->texture_transfer_pool, trans); +out: + virgl_resource_destroy_transfer(vctx, trans); } static void virgl_buffer_transfer_flush_region(struct pipe_context *ctx, struct pipe_transfer *transfer, const struct pipe_box *box) { - struct virgl_context *vctx = virgl_context(ctx); - struct virgl_buffer *vbuf = virgl_buffer(transfer->resource); - - if (!vbuf->on_list) { - struct pipe_resource *res = NULL; - - list_addtail(&vbuf->flush_list, &vctx->to_flush_bufs); - vbuf->on_list = TRUE; - pipe_resource_reference(&res, &vbuf->base.u.b); - } - - util_range_add(&vbuf->valid_buffer_range, transfer->box.x + box->x, - transfer->box.x + box->x + box->width); + struct virgl_resource *vbuf = virgl_resource(transfer->resource); + struct virgl_transfer *trans = virgl_transfer(transfer); - vbuf->base.clean = FALSE; + /* + * FIXME: This is not optimal. For example, + * + * glMapBufferRange(.., 0, 100, GL_MAP_FLUSH_EXPLICIT_BIT) + * glFlushMappedBufferRange(.., 25, 30) + * glFlushMappedBufferRange(.., 65, 70) + * + * We'll end up flushing 25 --> 70. + */ + util_range_add(&trans->range, box->x, box->x + box->width); + vbuf->clean = FALSE; } static const struct u_resource_vtbl virgl_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ - virgl_buffer_destroy, /* resource_destroy */ + virgl_resource_destroy, /* resource_destroy */ virgl_buffer_transfer_map, /* transfer_map */ virgl_buffer_transfer_flush_region, /* transfer_flush_region */ virgl_buffer_transfer_unmap, /* transfer_unmap */ }; -struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs, - const struct pipe_resource *template) +void virgl_buffer_init(struct virgl_resource *res) { - struct virgl_buffer *buf; - uint32_t size; - uint32_t vbind; - buf = CALLOC_STRUCT(virgl_buffer); - buf->base.clean = TRUE; - buf->base.u.b = *template; - buf->base.u.b.screen = &vs->base; - buf->base.u.vtbl = &virgl_buffer_vtbl; - pipe_reference_init(&buf->base.u.b.reference, 1); - util_range_init(&buf->valid_buffer_range); - - vbind = pipe_to_virgl_bind(template->bind); - size = template->width0; - - /* SSBOs and texture buffers can written to by host compute shaders. */ - if (vbind == VIRGL_BIND_SHADER_BUFFER || vbind == VIRGL_BIND_SAMPLER_VIEW) - buf->base.clean = FALSE; - buf->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, 1, 1, 1, 0, 0, size); - - util_range_set_empty(&buf->valid_buffer_range); - return &buf->base.u.b; + res->u.vtbl = &virgl_buffer_vtbl; } diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_context.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_context.c --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_context.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -21,6 +21,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include "pipe/p_shader_tokens.h" #include "pipe/p_context.h" @@ -59,29 +60,6 @@ return ++next_handle; } -static void virgl_buffer_flush(struct virgl_context *vctx, - struct virgl_buffer *vbuf) -{ - struct virgl_screen *rs = virgl_screen(vctx->base.screen); - struct pipe_box box; - - assert(vbuf->on_list); - - box.height = 1; - box.depth = 1; - box.y = 0; - box.z = 0; - - box.x = vbuf->valid_buffer_range.start; - box.width = MIN2(vbuf->valid_buffer_range.end - vbuf->valid_buffer_range.start, vbuf->base.u.b.width0); - - vctx->num_transfers++; - rs->vws->transfer_put(rs->vws, vbuf->base.hw_res, - &box, 0, 0, box.x, 0); - - util_range_set_empty(&vbuf->valid_buffer_range); -} - static void virgl_attach_res_framebuffer(struct virgl_context *vctx) { struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws; @@ -251,6 +229,11 @@ if (!surf) return NULL; + assert(ctx->screen->get_param(ctx->screen, + PIPE_CAP_DEST_SURFACE_SRGB_CONTROL) || + (util_format_is_srgb(templ->format) == + util_format_is_srgb(resource->format))); + res->clean = FALSE; handle = virgl_object_assign_handle(); pipe_reference_init(&surf->base.reference, 1); @@ -344,19 +327,27 @@ const struct pipe_rasterizer_state *rs_state) { struct virgl_context *vctx = virgl_context(ctx); - uint32_t handle; - handle = virgl_object_assign_handle(); + struct virgl_rasterizer_state *vrs = CALLOC_STRUCT(virgl_rasterizer_state); - virgl_encode_rasterizer_state(vctx, handle, rs_state); - return (void *)(unsigned long)handle; + if (!vrs) + return NULL; + vrs->rs = *rs_state; + vrs->handle = virgl_object_assign_handle(); + + virgl_encode_rasterizer_state(vctx, vrs->handle, rs_state); + return (void *)vrs; } static void virgl_bind_rasterizer_state(struct pipe_context *ctx, void *rs_state) { struct virgl_context *vctx = virgl_context(ctx); - uint32_t handle = (unsigned long)rs_state; - + uint32_t handle = 0; + if (rs_state) { + struct virgl_rasterizer_state *vrs = rs_state; + vctx->rs_state = *vrs; + handle = vrs->handle; + } virgl_encode_bind_object(vctx, handle, VIRGL_OBJECT_RASTERIZER); } @@ -364,8 +355,9 @@ void *rs_state) { struct virgl_context *vctx = virgl_context(ctx); - uint32_t handle = (unsigned long)rs_state; - virgl_encode_delete_object(vctx, handle, VIRGL_OBJECT_RASTERIZER); + struct virgl_rasterizer_state *vrs = rs_state; + virgl_encode_delete_object(vctx, vrs->handle, VIRGL_OBJECT_RASTERIZER); + FREE(vrs); } static void virgl_set_framebuffer_state(struct pipe_context *ctx, @@ -455,10 +447,8 @@ vctx->vertex_array_dirty = TRUE; } -static void virgl_hw_set_vertex_buffers(struct pipe_context *ctx) +static void virgl_hw_set_vertex_buffers(struct virgl_context *vctx) { - struct virgl_context *vctx = virgl_context(ctx); - if (vctx->vertex_array_dirty) { struct virgl_vertex_elements_state *ve = vctx->vertex_elements; @@ -489,10 +479,9 @@ virgl_encoder_set_blend_color(vctx, color); } -static void virgl_hw_set_index_buffer(struct pipe_context *ctx, +static void virgl_hw_set_index_buffer(struct virgl_context *vctx, struct virgl_indexbuf *ib) { - struct virgl_context *vctx = virgl_context(ctx); virgl_encoder_set_index_buffer(vctx, ib); virgl_attach_res_index_buffer(vctx, ib); } @@ -531,14 +520,13 @@ struct virgl_context *vctx = virgl_context(ctx); struct virgl_screen *vs = virgl_screen(ctx->screen); struct virgl_resource *grres = virgl_resource(res); - struct virgl_buffer *vbuf = virgl_buffer(res); grres->clean = FALSE; - if (virgl_res_needs_flush_wait(vctx, &vbuf->base, usage)) { + if (virgl_res_needs_flush_wait(vctx, grres, usage)) { ctx->flush(ctx, NULL, 0); - vs->vws->resource_wait(vs->vws, vbuf->base.hw_res); + vs->vws->resource_wait(vs->vws, grres->hw_res); } virgl_encoder_inline_write(vctx, grres, level, usage, @@ -721,6 +709,7 @@ return; if (!(rs->caps.caps.v1.prim_mask & (1 << dinfo->mode))) { + util_primconvert_save_rasterizer_state(vctx->primconvert, &vctx->rs_state.rs); util_primconvert_draw_vbo(vctx->primconvert, dinfo); return; } @@ -740,9 +729,9 @@ u_upload_unmap(vctx->uploader); vctx->num_draws++; - virgl_hw_set_vertex_buffers(ctx); + virgl_hw_set_vertex_buffers(vctx); if (info.index_size) - virgl_hw_set_index_buffer(ctx, &ib); + virgl_hw_set_index_buffer(vctx, &ib); virgl_encoder_draw_vbo(vctx, &info); @@ -750,13 +739,20 @@ } -static void virgl_flush_eq(struct virgl_context *ctx, void *closure) +static void virgl_flush_eq(struct virgl_context *ctx, void *closure, + struct pipe_fence_handle **fence) { struct virgl_screen *rs = virgl_screen(ctx->base.screen); + int out_fence_fd = -1; /* send the buffer to the remote side for decoding */ ctx->num_transfers = ctx->num_draws = 0; - rs->vws->submit_cmd(rs->vws, ctx->cbuf); + + rs->vws->submit_cmd(rs->vws, ctx->cbuf, ctx->cbuf->in_fence_fd, + ctx->cbuf->needs_out_fence_fd ? &out_fence_fd : NULL); + + if (fence) + *fence = rs->vws->cs_create_fence(rs->vws, out_fence_fd); virgl_encoder_set_sub_ctx(ctx, ctx->hw_sub_ctx_id); @@ -769,21 +765,17 @@ enum pipe_flush_flags flags) { struct virgl_context *vctx = virgl_context(ctx); - struct virgl_screen *rs = virgl_screen(ctx->screen); - struct virgl_buffer *buf, *tmp; - if (fence) - *fence = rs->vws->cs_create_fence(rs->vws); + if (flags & PIPE_FLUSH_FENCE_FD) + vctx->cbuf->needs_out_fence_fd = true; - LIST_FOR_EACH_ENTRY_SAFE(buf, tmp, &vctx->to_flush_bufs, flush_list) { - struct pipe_resource *res = &buf->base.u.b; - virgl_buffer_flush(vctx, buf); - list_del(&buf->flush_list); - buf->on_list = FALSE; - pipe_resource_reference(&res, NULL); + virgl_flush_eq(vctx, vctx, fence); + if (vctx->cbuf->in_fence_fd != -1) { + close(vctx->cbuf->in_fence_fd); + vctx->cbuf->in_fence_fd = -1; } - virgl_flush_eq(vctx, vctx); + vctx->cbuf->needs_out_fence_fd = false; } static struct pipe_sampler_view *virgl_create_sampler_view(struct pipe_context *ctx, @@ -1002,6 +994,11 @@ struct virgl_resource *dres = virgl_resource(blit->dst.resource); struct virgl_resource *sres = virgl_resource(blit->src.resource); + assert(ctx->screen->get_param(ctx->screen, + PIPE_CAP_DEST_SURFACE_SRGB_CONTROL) || + (util_format_is_srgb(blit->dst.resource->format) == + util_format_is_srgb(blit->dst.format))); + dres->clean = FALSE; virgl_encode_blit(vctx, dres, sres, blit); @@ -1057,6 +1054,28 @@ virgl_encode_set_shader_buffers(vctx, shader, start_slot, count, buffers); } +static void virgl_create_fence_fd(struct pipe_context *ctx, + struct pipe_fence_handle **fence, + int fd, + enum pipe_fd_type type) +{ + assert(type == PIPE_FD_TYPE_NATIVE_SYNC); + struct virgl_screen *rs = virgl_screen(ctx->screen); + + if (rs->vws->cs_create_fence) + *fence = rs->vws->cs_create_fence(rs->vws, fd); +} + +static void virgl_fence_server_sync(struct pipe_context *ctx, + struct pipe_fence_handle *fence) +{ + struct virgl_context *vctx = virgl_context(ctx); + struct virgl_screen *rs = virgl_screen(ctx->screen); + + if (rs->vws->fence_server_sync) + rs->vws->fence_server_sync(rs->vws, vctx->cbuf, fence); +} + static void virgl_set_shader_images(struct pipe_context *ctx, enum pipe_shader_type shader, unsigned start_slot, unsigned count, @@ -1149,14 +1168,14 @@ vctx->framebuffer.zsbuf = NULL; vctx->framebuffer.nr_cbufs = 0; virgl_encoder_destroy_sub_ctx(vctx, vctx->hw_sub_ctx_id); - virgl_flush_eq(vctx, vctx); + virgl_flush_eq(vctx, vctx, NULL); rs->vws->cmd_buf_destroy(vctx->cbuf); if (vctx->uploader) u_upload_destroy(vctx->uploader); util_primconvert_destroy(vctx->primconvert); - slab_destroy_child(&vctx->texture_transfer_pool); + slab_destroy_child(&vctx->transfer_pool); FREE(vctx); } @@ -1205,6 +1224,7 @@ struct virgl_context *vctx; struct virgl_screen *rs = virgl_screen(pscreen); vctx = CALLOC_STRUCT(virgl_context); + const char *host_debug_flagstring; vctx->cbuf = rs->vws->cmd_buf_create(rs->vws); if (!vctx->cbuf) { @@ -1284,6 +1304,8 @@ vctx->base.resource_copy_region = virgl_resource_copy_region; vctx->base.flush_resource = virgl_flush_resource; vctx->base.blit = virgl_blit; + vctx->base.create_fence_fd = virgl_create_fence_fd; + vctx->base.fence_server_sync = virgl_fence_server_sync; vctx->base.set_shader_buffers = virgl_set_shader_buffers; vctx->base.set_hw_atomic_buffers = virgl_set_hw_atomic_buffers; @@ -1294,8 +1316,7 @@ virgl_init_query_functions(vctx); virgl_init_so_functions(vctx); - list_inithead(&vctx->to_flush_bufs); - slab_create_child(&vctx->texture_transfer_pool, &rs->texture_transfer_pool); + slab_create_child(&vctx->transfer_pool, &rs->transfer_pool); vctx->primconvert = util_primconvert_create(&vctx->base, rs->caps.caps.v1.prim_mask); vctx->uploader = u_upload_create(&vctx->base, 1024 * 1024, @@ -1309,6 +1330,13 @@ virgl_encoder_create_sub_ctx(vctx, vctx->hw_sub_ctx_id); virgl_encoder_set_sub_ctx(vctx, vctx->hw_sub_ctx_id); + + if (rs->caps.caps.v2.capability_bits & VIRGL_CAP_GUEST_MAY_INIT_LOG) { + host_debug_flagstring = getenv("VIRGL_HOST_DEBUG"); + if (host_debug_flagstring) + virgl_encode_host_debug_flagstring(vctx, host_debug_flagstring); + } + return &vctx->base; fail: return NULL; diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_context.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_context.h --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_context.h 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -49,6 +49,11 @@ uint32_t enabled_mask; }; +struct virgl_rasterizer_state { + struct pipe_rasterizer_state rs; + uint32_t handle; +}; + struct virgl_context { struct pipe_context base; struct virgl_cmd_buf *cbuf; @@ -58,7 +63,7 @@ struct pipe_framebuffer_state framebuffer; - struct slab_child_pool texture_transfer_pool; + struct slab_child_pool transfer_pool; struct u_upload_mgr *uploader; @@ -66,6 +71,7 @@ unsigned num_vertex_buffers; boolean vertex_array_dirty; + struct virgl_rasterizer_state rs_state; struct virgl_so_target so_targets[PIPE_MAX_SO_BUFFERS]; unsigned num_so_targets; @@ -75,7 +81,6 @@ struct pipe_resource *images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS]; int num_transfers; int num_draws; - struct list_head to_flush_bufs; struct pipe_resource *atomic_buffers[PIPE_MAX_HW_ATOMIC_BUFFERS]; diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_encode.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_encode.c --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_encode.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_encode.c 2019-03-31 23:16:37.000000000 +0000 @@ -1054,3 +1054,27 @@ virgl_encoder_write_dword(ctx->cbuf, flags); return 0; } + +int virgl_encode_host_debug_flagstring(struct virgl_context *ctx, + const char *flagstring) +{ + unsigned long slen = strlen(flagstring) + 1; + uint32_t sslen; + uint32_t string_length; + + if (!slen) + return 0; + + if (slen > 4 * 0xffff) { + debug_printf("VIRGL: host debug flag string too long, will be truncated\n"); + slen = 4 * 0xffff; + } + + sslen = (uint32_t )(slen + 3) / 4; + string_length = (uint32_t)MIN2(sslen * 4, slen); + + virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_DEBUG_FLAGS, 0, sslen)); + virgl_encoder_write_block(ctx->cbuf, (const uint8_t *)flagstring, string_length); + + return 0; +} diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_encode.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_encode.h --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_encode.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_encode.h 2019-03-31 23:16:37.000000000 +0000 @@ -276,4 +276,7 @@ const struct pipe_grid_info *grid_info); int virgl_encode_texture_barrier(struct virgl_context *ctx, unsigned flags); + +int virgl_encode_host_debug_flagstring(struct virgl_context *ctx, + const char *envname); #endif diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_hw.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_hw.h --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_hw.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_hw.h 2019-03-31 23:16:37.000000000 +0000 @@ -231,6 +231,8 @@ #define VIRGL_CAP_SHADER_CLOCK (1 << 11) #define VIRGL_CAP_TEXTURE_BARRIER (1 << 12) #define VIRGL_CAP_TGSI_COMPONENTS (1 << 13) +#define VIRGL_CAP_GUEST_MAY_INIT_LOG (1 << 14) +#define VIRGL_CAP_SRGB_WRITE_CONTROL (1 << 15) /* virgl bind flags - these are compatible with mesa 10.5 gallium. * but are fixed, no other should be passed to virgl either. diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_protocol.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_protocol.h --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_protocol.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_protocol.h 2019-03-31 23:16:37.000000000 +0000 @@ -92,6 +92,7 @@ VIRGL_CCMD_SET_FRAMEBUFFER_STATE_NO_ATTACH, VIRGL_CCMD_TEXTURE_BARRIER, VIRGL_CCMD_SET_ATOMIC_BUFFERS, + VIRGL_CCMD_SET_DEBUG_FLAGS, }; /* @@ -222,7 +223,7 @@ #define VIRGL_OBJ_SHADER_OFFSET_VAL(x) (((x) & 0x7fffffff) << 0) /* start contains full length in VAL - also implies continuations */ /* continuation contains offset in VAL */ -#define VIRGL_OBJ_SHADER_OFFSET_CONT (0x1 << 31) +#define VIRGL_OBJ_SHADER_OFFSET_CONT (0x1u << 31) #define VIRGL_OBJ_SHADER_NUM_TOKENS 4 #define VIRGL_OBJ_SHADER_SO_NUM_OUTPUTS 5 #define VIRGL_OBJ_SHADER_SO_STRIDE(x) (6 + (x)) diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_resource.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_resource.c --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_resource.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_resource.c 2019-03-31 23:16:37.000000000 +0000 @@ -20,7 +20,9 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "util/u_format.h" #include "util/u_inlines.h" +#include "util/u_memory.h" #include "virgl_context.h" #include "virgl_resource.h" #include "virgl_screen.h" @@ -55,11 +57,37 @@ static struct pipe_resource *virgl_resource_create(struct pipe_screen *screen, const struct pipe_resource *templ) { - struct virgl_screen *vs = virgl_screen(screen); - if (templ->target == PIPE_BUFFER) - return virgl_buffer_create(vs, templ); - else - return virgl_texture_create(vs, templ); + unsigned vbind; + struct virgl_screen *vs = virgl_screen(screen); + struct virgl_resource *res = CALLOC_STRUCT(virgl_resource); + + res->clean = TRUE; + res->u.b = *templ; + res->u.b.screen = &vs->base; + pipe_reference_init(&res->u.b.reference, 1); + vbind = pipe_to_virgl_bind(templ->bind); + virgl_resource_layout(&res->u.b, &res->metadata); + res->hw_res = vs->vws->resource_create(vs->vws, templ->target, + templ->format, vbind, + templ->width0, + templ->height0, + templ->depth0, + templ->array_size, + templ->last_level, + templ->nr_samples, + res->metadata.total_size); + if (!res->hw_res) { + FREE(res); + return NULL; + } + + if (templ->target == PIPE_BUFFER) + virgl_buffer_init(res); + else + virgl_texture_init(res); + + return &res->u.b; + } static struct pipe_resource *virgl_resource_from_handle(struct pipe_screen *screen, @@ -67,11 +95,24 @@ struct winsys_handle *whandle, unsigned usage) { - struct virgl_screen *vs = virgl_screen(screen); - if (templ->target == PIPE_BUFFER) - return NULL; - else - return virgl_texture_from_handle(vs, templ, whandle); + struct virgl_screen *vs = virgl_screen(screen); + if (templ->target == PIPE_BUFFER) + return NULL; + + struct virgl_resource *res = CALLOC_STRUCT(virgl_resource); + res->u.b = *templ; + res->u.b.screen = &vs->base; + pipe_reference_init(&res->u.b.reference, 1); + + res->hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle); + if (!res->hw_res) { + FREE(res); + return NULL; + } + + virgl_texture_init(res); + + return &res->u.b; } void virgl_init_screen_resource_functions(struct pipe_screen *screen) @@ -110,3 +151,128 @@ ctx->buffer_subdata = virgl_buffer_subdata; ctx->texture_subdata = u_default_texture_subdata; } + +void virgl_resource_layout(struct pipe_resource *pt, + struct virgl_resource_metadata *metadata) +{ + unsigned level, nblocksy; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; + unsigned buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + unsigned slices; + + if (pt->target == PIPE_TEXTURE_CUBE) + slices = 6; + else if (pt->target == PIPE_TEXTURE_3D) + slices = depth; + else + slices = pt->array_size; + + nblocksy = util_format_get_nblocksy(pt->format, height); + metadata->stride[level] = util_format_get_stride(pt->format, width); + metadata->layer_stride[level] = nblocksy * metadata->stride[level]; + metadata->level_offset[level] = buffer_size; + + buffer_size += slices * metadata->layer_stride[level]; + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + if (pt->nr_samples <= 1) + metadata->total_size = buffer_size; + else /* don't create guest backing store for MSAA */ + metadata->total_size = 0; +} + +struct virgl_transfer * +virgl_resource_create_transfer(struct pipe_context *ctx, + struct pipe_resource *pres, + const struct virgl_resource_metadata *metadata, + unsigned level, unsigned usage, + const struct pipe_box *box) +{ + struct virgl_transfer *trans; + enum pipe_format format = pres->format; + struct virgl_context *vctx = virgl_context(ctx); + const unsigned blocksy = box->y / util_format_get_blockheight(format); + const unsigned blocksx = box->x / util_format_get_blockwidth(format); + + unsigned offset = metadata->level_offset[level]; + if (pres->target == PIPE_TEXTURE_CUBE || + pres->target == PIPE_TEXTURE_CUBE_ARRAY || + pres->target == PIPE_TEXTURE_3D || + pres->target == PIPE_TEXTURE_2D_ARRAY) { + offset += box->z * metadata->layer_stride[level]; + } + else if (pres->target == PIPE_TEXTURE_1D_ARRAY) { + offset += box->z * metadata->stride[level]; + assert(box->y == 0); + } else if (pres->target == PIPE_BUFFER) { + assert(box->y == 0 && box->z == 0); + } else { + assert(box->z == 0); + } + + offset += blocksy * metadata->stride[level]; + offset += blocksx * util_format_get_blocksize(format); + + trans = slab_alloc(&vctx->transfer_pool); + if (!trans) + return NULL; + + trans->base.resource = pres; + trans->base.level = level; + trans->base.usage = usage; + trans->base.box = *box; + trans->base.stride = metadata->stride[level]; + trans->base.layer_stride = metadata->layer_stride[level]; + trans->offset = offset; + util_range_init(&trans->range); + + if (trans->base.resource->target != PIPE_TEXTURE_3D && + trans->base.resource->target != PIPE_TEXTURE_CUBE && + trans->base.resource->target != PIPE_TEXTURE_1D_ARRAY && + trans->base.resource->target != PIPE_TEXTURE_2D_ARRAY && + trans->base.resource->target != PIPE_TEXTURE_CUBE_ARRAY) + trans->l_stride = 0; + else + trans->l_stride = trans->base.layer_stride; + + return trans; +} + +void virgl_resource_destroy_transfer(struct virgl_context *vctx, + struct virgl_transfer *trans) +{ + util_range_destroy(&trans->range); + slab_free(&vctx->transfer_pool, trans); +} + +void virgl_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *resource) +{ + struct virgl_screen *vs = virgl_screen(screen); + struct virgl_resource *res = virgl_resource(resource); + vs->vws->resource_unref(vs->vws, res->hw_res); + FREE(res); +} + +boolean virgl_resource_get_handle(struct pipe_screen *screen, + struct pipe_resource *resource, + struct winsys_handle *whandle) +{ + struct virgl_screen *vs = virgl_screen(screen); + struct virgl_resource *res = virgl_resource(resource); + + if (res->u.b.target == PIPE_BUFFER) + return FALSE; + + return vs->vws->resource_get_handle(vs->vws, res->hw_res, + res->metadata.stride[0], + whandle); +} diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_resource.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_resource.h --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_resource.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_resource.h 2019-03-31 23:16:37.000000000 +0000 @@ -36,39 +36,25 @@ struct virgl_screen; struct virgl_context; +struct virgl_resource_metadata +{ + unsigned long level_offset[VR_MAX_TEXTURE_2D_LEVELS]; + unsigned stride[VR_MAX_TEXTURE_2D_LEVELS]; + unsigned layer_stride[VR_MAX_TEXTURE_2D_LEVELS]; + uint32_t total_size; +}; + struct virgl_resource { struct u_resource u; - struct virgl_hw_res *hw_res; boolean clean; -}; - -struct virgl_buffer { - struct virgl_resource base; - - struct list_head flush_list; - boolean on_list; - - /* The buffer range which is initialized (with a write transfer, - * streamout, DMA, or as a random access target). The rest of - * the buffer is considered invalid and can be mapped unsynchronized. - * - * This allows unsychronized mapping of a buffer range which hasn't - * been used yet. It's for applications which forget to use - * the unsynchronized map flag and expect the driver to figure it out. - */ - struct util_range valid_buffer_range; -}; - -struct virgl_texture { - struct virgl_resource base; - - unsigned long level_offset[VR_MAX_TEXTURE_2D_LEVELS]; - unsigned stride[VR_MAX_TEXTURE_2D_LEVELS]; + struct virgl_hw_res *hw_res; + struct virgl_resource_metadata metadata; }; struct virgl_transfer { struct pipe_transfer base; - uint32_t offset; + uint32_t offset, l_stride; + struct util_range range; struct virgl_resource *resolve_tmp; }; @@ -79,35 +65,19 @@ void virgl_init_context_resource_functions(struct pipe_context *ctx); -struct pipe_resource *virgl_texture_create(struct virgl_screen *vs, - const struct pipe_resource *templ); - -struct pipe_resource *virgl_texture_from_handle(struct virgl_screen *vs, - const struct pipe_resource *templ, - struct winsys_handle *whandle); +void virgl_texture_init(struct virgl_resource *res); static inline struct virgl_resource *virgl_resource(struct pipe_resource *r) { return (struct virgl_resource *)r; } -static inline struct virgl_buffer *virgl_buffer(struct pipe_resource *r) -{ - return (struct virgl_buffer *)r; -} - -static inline struct virgl_texture *virgl_texture(struct pipe_resource *r) -{ - return (struct virgl_texture *)r; -} - static inline struct virgl_transfer *virgl_transfer(struct pipe_transfer *trans) { return (struct virgl_transfer *)trans; } -struct pipe_resource *virgl_buffer_create(struct virgl_screen *vs, - const struct pipe_resource *templ); +void virgl_buffer_init(struct virgl_resource *res); static inline unsigned pipe_to_virgl_bind(unsigned pbind) { @@ -145,4 +115,24 @@ bool virgl_res_needs_readback(struct virgl_context *vctx, struct virgl_resource *res, unsigned usage); + +void virgl_resource_layout(struct pipe_resource *pt, + struct virgl_resource_metadata *metadata); + +struct virgl_transfer * +virgl_resource_create_transfer(struct pipe_context *ctx, + struct pipe_resource *pres, + const struct virgl_resource_metadata *metadata, + unsigned level, unsigned usage, + const struct pipe_box *box); + +void virgl_resource_destroy_transfer(struct virgl_context *vctx, + struct virgl_transfer *trans); + +void virgl_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *resource); + +boolean virgl_resource_get_handle(struct pipe_screen *screen, + struct pipe_resource *resource, + struct winsys_handle *whandle); #endif diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_screen.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_screen.c --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -255,6 +255,13 @@ return vscreen->caps.caps.v2.max_combined_atomic_counters; case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS: return vscreen->caps.caps.v2.max_combined_atomic_counter_buffers; + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + return 1; /* TODO: need to introduce a hw-cap for this */ + case PIPE_CAP_MAX_VARYINGS: + if (vscreen->caps.caps.v1.glsl_level < 150) + return vscreen->caps.caps.v2.max_vertex_attribs; + return 32; case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: case PIPE_CAP_FAKE_SW_MSAA: @@ -267,8 +274,6 @@ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_TEXTURE_FLOAT_LINEAR: - case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: case PIPE_CAP_DEPTH_BOUNDS_TEST: case PIPE_CAP_SHAREABLE_SHADERS: case PIPE_CAP_CLEAR_TEXTURE: @@ -340,7 +345,9 @@ case PIPE_CAP_VIDEO_MEMORY: return 0; case PIPE_CAP_NATIVE_FENCE_FD: - return 0; + return vscreen->vws->supports_fences; + case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: + return vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_SRGB_WRITE_CONTROL; default: return u_pipe_screen_get_param_defaults(screen, param); } @@ -721,6 +728,15 @@ return vws->fence_wait(vws, fence, timeout); } +static int virgl_fence_get_fd(struct pipe_screen *screen, + struct pipe_fence_handle *fence) +{ + struct virgl_screen *vscreen = virgl_screen(screen); + struct virgl_winsys *vws = vscreen->vws; + + return vws->fence_get_fd(vws, fence); +} + static uint64_t virgl_get_timestamp(struct pipe_screen *_screen) { @@ -733,7 +749,7 @@ struct virgl_screen *vscreen = virgl_screen(screen); struct virgl_winsys *vws = vscreen->vws; - slab_destroy_parent(&vscreen->texture_transfer_pool); + slab_destroy_parent(&vscreen->transfer_pool); if (vws) vws->destroy(vws); @@ -765,6 +781,7 @@ screen->base.fence_reference = virgl_fence_reference; //screen->base.fence_signalled = virgl_fence_signalled; screen->base.fence_finish = virgl_fence_finish; + screen->base.fence_get_fd = virgl_fence_get_fd; virgl_init_screen_resource_functions(&screen->base); @@ -772,7 +789,7 @@ screen->refcnt = 1; - slab_create_parent(&screen->texture_transfer_pool, sizeof(struct virgl_transfer), 16); + slab_create_parent(&screen->transfer_pool, sizeof(struct virgl_transfer), 16); return &screen->base; } diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_screen.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_screen.h --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_screen.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_screen.h 2019-03-31 23:16:37.000000000 +0000 @@ -43,7 +43,7 @@ struct virgl_drm_caps caps; - struct slab_parent_pool texture_transfer_pool; + struct slab_parent_pool transfer_pool; uint32_t sub_ctx_id; }; diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_texture.c mesa-19.0.1/src/gallium/drivers/virgl/virgl_texture.c --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_texture.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -94,31 +94,6 @@ } } -static unsigned -vrend_get_tex_image_offset(const struct virgl_texture *res, - unsigned level, unsigned layer) -{ - const struct pipe_resource *pres = &res->base.u.b; - const unsigned hgt = u_minify(pres->height0, level); - const unsigned nblocksy = util_format_get_nblocksy(pres->format, hgt); - unsigned offset = res->level_offset[level]; - - if (pres->target == PIPE_TEXTURE_CUBE || - pres->target == PIPE_TEXTURE_CUBE_ARRAY || - pres->target == PIPE_TEXTURE_3D || - pres->target == PIPE_TEXTURE_2D_ARRAY) { - offset += layer * nblocksy * res->stride[level]; - } - else if (pres->target == PIPE_TEXTURE_1D_ARRAY) { - offset += layer * res->stride[level]; - } - else { - assert(layer == 0); - } - - return offset; -} - static void *virgl_texture_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, @@ -128,41 +103,19 @@ { struct virgl_context *vctx = virgl_context(ctx); struct virgl_screen *vs = virgl_screen(ctx->screen); - struct virgl_texture *vtex = virgl_texture(resource); - enum pipe_format format = resource->format; + struct virgl_resource *vtex = virgl_resource(resource); struct virgl_transfer *trans; void *ptr; boolean readback = TRUE; - uint32_t offset; struct virgl_hw_res *hw_res; - const unsigned h = u_minify(vtex->base.u.b.height0, level); - const unsigned nblocksy = util_format_get_nblocksy(format, h); - uint32_t l_stride; bool doflushwait; - doflushwait = virgl_res_needs_flush_wait(vctx, &vtex->base, usage); + doflushwait = virgl_res_needs_flush_wait(vctx, vtex, usage); if (doflushwait) ctx->flush(ctx, NULL, 0); - trans = slab_alloc(&vctx->texture_transfer_pool); - if (!trans) - return NULL; - - trans->base.resource = resource; - trans->base.level = level; - trans->base.usage = usage; - trans->base.box = *box; - trans->base.stride = vtex->stride[level]; - trans->base.layer_stride = trans->base.stride * nblocksy; - - if (resource->target != PIPE_TEXTURE_3D && - resource->target != PIPE_TEXTURE_CUBE && - resource->target != PIPE_TEXTURE_1D_ARRAY && - resource->target != PIPE_TEXTURE_2D_ARRAY && - resource->target != PIPE_TEXTURE_CUBE_ARRAY) - l_stride = 0; - else - l_stride = trans->base.layer_stride; + trans = virgl_resource_create_transfer(ctx, resource, &vtex->metadata, + level, usage, box); if (resource->nr_samples > 1) { struct pipe_resource tmp_resource; @@ -175,34 +128,30 @@ ctx->flush(ctx, NULL, 0); /* we want to do a resolve blit into the temporary */ hw_res = trans->resolve_tmp->hw_res; - offset = 0; - trans->base.stride = ((struct virgl_texture*)trans->resolve_tmp)->stride[level]; - trans->base.layer_stride = trans->base.stride * nblocksy; + struct virgl_resource_metadata *data = &trans->resolve_tmp->metadata; + trans->base.stride = data->stride[level]; + trans->base.layer_stride = data->layer_stride[level]; + trans->offset = 0; } else { - offset = vrend_get_tex_image_offset(vtex, level, box->z); - - offset += box->y / util_format_get_blockheight(format) * trans->base.stride + - box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); - hw_res = vtex->base.hw_res; + hw_res = vtex->hw_res; trans->resolve_tmp = NULL; } - readback = virgl_res_needs_readback(vctx, &vtex->base, usage); + readback = virgl_res_needs_readback(vctx, vtex, usage); if (readback) - vs->vws->transfer_get(vs->vws, hw_res, box, trans->base.stride, l_stride, offset, level); + vs->vws->transfer_get(vs->vws, hw_res, box, trans->base.stride, + trans->l_stride, trans->offset, level); if (doflushwait || readback) - vs->vws->resource_wait(vs->vws, vtex->base.hw_res); + vs->vws->resource_wait(vs->vws, vtex->hw_res); ptr = vs->vws->resource_map(vs->vws, hw_res); if (!ptr) { - slab_free(&vctx->texture_transfer_pool, trans); + slab_free(&vctx->transfer_pool, trans); return NULL; } - trans->offset = offset; *transfer = &trans->base; - return ptr + trans->offset; } @@ -211,25 +160,17 @@ { struct virgl_context *vctx = virgl_context(ctx); struct virgl_transfer *trans = virgl_transfer(transfer); - struct virgl_texture *vtex = virgl_texture(transfer->resource); - uint32_t l_stride; - - if (transfer->resource->target != PIPE_TEXTURE_3D && - transfer->resource->target != PIPE_TEXTURE_CUBE && - transfer->resource->target != PIPE_TEXTURE_1D_ARRAY && - transfer->resource->target != PIPE_TEXTURE_2D_ARRAY && - transfer->resource->target != PIPE_TEXTURE_CUBE_ARRAY) - l_stride = 0; - else - l_stride = trans->base.layer_stride; + struct virgl_resource *vtex = virgl_resource(transfer->resource); if (trans->base.usage & PIPE_TRANSFER_WRITE) { if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { struct virgl_screen *vs = virgl_screen(ctx->screen); - vtex->base.clean = FALSE; + vtex->clean = FALSE; vctx->num_transfers++; - vs->vws->transfer_put(vs->vws, vtex->base.hw_res, - &transfer->box, trans->base.stride, l_stride, trans->offset, transfer->level); + vs->vws->transfer_put(vs->vws, vtex->hw_res, + &transfer->box, trans->base.stride, + trans->l_stride, trans->offset, + transfer->level); } } @@ -237,111 +178,19 @@ if (trans->resolve_tmp) pipe_resource_reference((struct pipe_resource **)&trans->resolve_tmp, NULL); - slab_free(&vctx->texture_transfer_pool, trans); -} - - -static void -vrend_resource_layout(struct virgl_texture *res, - uint32_t *total_size) -{ - struct pipe_resource *pt = &res->base.u.b; - unsigned level; - unsigned width = pt->width0; - unsigned height = pt->height0; - unsigned depth = pt->depth0; - unsigned buffer_size = 0; - - for (level = 0; level <= pt->last_level; level++) { - unsigned slices; - - if (pt->target == PIPE_TEXTURE_CUBE) - slices = 6; - else if (pt->target == PIPE_TEXTURE_3D) - slices = depth; - else - slices = pt->array_size; - - res->stride[level] = util_format_get_stride(pt->format, width); - res->level_offset[level] = buffer_size; - - buffer_size += (util_format_get_nblocksy(pt->format, height) * - slices * res->stride[level]); - - width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); - } - - if (pt->nr_samples <= 1) - *total_size = buffer_size; - else /* don't create guest backing store for MSAA */ - *total_size = 0; -} - -static boolean virgl_texture_get_handle(struct pipe_screen *screen, - struct pipe_resource *ptex, - struct winsys_handle *whandle) -{ - struct virgl_screen *vs = virgl_screen(screen); - struct virgl_texture *vtex = virgl_texture(ptex); - - return vs->vws->resource_get_handle(vs->vws, vtex->base.hw_res, vtex->stride[0], whandle); -} - -static void virgl_texture_destroy(struct pipe_screen *screen, - struct pipe_resource *res) -{ - struct virgl_screen *vs = virgl_screen(screen); - struct virgl_texture *vtex = virgl_texture(res); - vs->vws->resource_unref(vs->vws, vtex->base.hw_res); - FREE(vtex); + virgl_resource_destroy_transfer(vctx, trans); } static const struct u_resource_vtbl virgl_texture_vtbl = { - virgl_texture_get_handle, /* get_handle */ - virgl_texture_destroy, /* resource_destroy */ + virgl_resource_get_handle, /* get_handle */ + virgl_resource_destroy, /* resource_destroy */ virgl_texture_transfer_map, /* transfer_map */ NULL, /* transfer_flush_region */ virgl_texture_transfer_unmap, /* transfer_unmap */ }; -struct pipe_resource * -virgl_texture_from_handle(struct virgl_screen *vs, - const struct pipe_resource *template, - struct winsys_handle *whandle) +void virgl_texture_init(struct virgl_resource *res) { - struct virgl_texture *tex = CALLOC_STRUCT(virgl_texture); - tex->base.u.b = *template; - tex->base.u.b.screen = &vs->base; - pipe_reference_init(&tex->base.u.b.reference, 1); - tex->base.u.vtbl = &virgl_texture_vtbl; - - tex->base.hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle); - return &tex->base.u.b; -} - -struct pipe_resource *virgl_texture_create(struct virgl_screen *vs, - const struct pipe_resource *template) -{ - struct virgl_texture *tex; - uint32_t size; - unsigned vbind; - - tex = CALLOC_STRUCT(virgl_texture); - tex->base.clean = TRUE; - tex->base.u.b = *template; - tex->base.u.b.screen = &vs->base; - pipe_reference_init(&tex->base.u.b.reference, 1); - tex->base.u.vtbl = &virgl_texture_vtbl; - vrend_resource_layout(tex, &size); - - vbind = pipe_to_virgl_bind(template->bind); - tex->base.hw_res = vs->vws->resource_create(vs->vws, template->target, template->format, vbind, template->width0, template->height0, template->depth0, template->array_size, template->last_level, template->nr_samples, size); - if (!tex->base.hw_res) { - FREE(tex); - return NULL; - } - return &tex->base.u.b; + res->u.vtbl = &virgl_texture_vtbl; } diff -Nru mesa-18.3.3/src/gallium/drivers/virgl/virgl_winsys.h mesa-19.0.1/src/gallium/drivers/virgl/virgl_winsys.h --- mesa-18.3.3/src/gallium/drivers/virgl/virgl_winsys.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/drivers/virgl/virgl_winsys.h 2019-03-31 23:16:37.000000000 +0000 @@ -40,10 +40,13 @@ struct virgl_cmd_buf { unsigned cdw; uint32_t *buf; + int in_fence_fd; + bool needs_out_fence_fd; }; struct virgl_winsys { unsigned pci_id; + int supports_fences; /* In/Out fences are supported */ void (*destroy)(struct virgl_winsys *vws); @@ -83,7 +86,8 @@ void (*cmd_buf_destroy)(struct virgl_cmd_buf *buf); void (*emit_res)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf, struct virgl_hw_res *res, boolean write_buffer); - int (*submit_cmd)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf); + int (*submit_cmd)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf, + int32_t in_fence_fd, int32_t *out_fence_fd); boolean (*res_is_referenced)(struct virgl_winsys *vws, struct virgl_cmd_buf *buf, @@ -92,7 +96,7 @@ int (*get_caps)(struct virgl_winsys *vws, struct virgl_drm_caps *caps); /* fence */ - struct pipe_fence_handle *(*cs_create_fence)(struct virgl_winsys *vws); + struct pipe_fence_handle *(*cs_create_fence)(struct virgl_winsys *vws, int fd); bool (*fence_wait)(struct virgl_winsys *vws, struct pipe_fence_handle *fence, uint64_t timeout); @@ -107,6 +111,12 @@ unsigned level, unsigned layer, void *winsys_drawable_handle, struct pipe_box *sub_box); + void (*fence_server_sync)(struct virgl_winsys *vws, + struct virgl_cmd_buf *cbuf, + struct pipe_fence_handle *fence); + + int (*fence_get_fd)(struct virgl_winsys *vws, + struct pipe_fence_handle *fence); }; /* this defaults all newer caps, diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_context.h mesa-19.0.1/src/gallium/include/pipe/p_context.h --- mesa-18.3.3/src/gallium/include/pipe/p_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/include/pipe/p_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -793,7 +793,7 @@ * Invalidate the contents of the resource. This is used to * * (1) implement EGL's semantic of undefined depth/stencil - * contenst after a swapbuffers. This allows a tiled renderer (for + * contents after a swapbuffers. This allows a tiled renderer (for * example) to not store the depth buffer. * * (2) implement GL's InvalidateBufferData. For backwards compatibility, diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_defines.h mesa-19.0.1/src/gallium/include/pipe/p_defines.h --- mesa-18.3.3/src/gallium/include/pipe/p_defines.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/include/pipe/p_defines.h 2019-03-31 23:16:37.000000000 +0000 @@ -341,7 +341,13 @@ * PIPE_RESOURCE_FLAG_MAP_COHERENT must be set when creating * the resource. */ - PIPE_TRANSFER_COHERENT = (1 << 14) + PIPE_TRANSFER_COHERENT = (1 << 14), + + /** + * This and higher bits are reserved for private use by drivers. Drivers + * should use this as (PIPE_TRANSFER_DRV_PRV << i). + */ + PIPE_TRANSFER_DRV_PRV = (1 << 24) }; /** @@ -401,6 +407,9 @@ */ #define PIPE_CONTEXT_LOW_PRIORITY (1 << 5) +/** Stop execution if the device is reset. */ +#define PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET (1 << 6) + /** * Flags for pipe_context::memory_barrier. */ @@ -554,12 +563,30 @@ PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE, PIPE_QUERY_GPU_FINISHED, PIPE_QUERY_PIPELINE_STATISTICS, + PIPE_QUERY_PIPELINE_STATISTICS_SINGLE, PIPE_QUERY_TYPES, /* start of driver queries, see pipe_screen::get_driver_query_info */ PIPE_QUERY_DRIVER_SPECIFIC = 256, }; /** + * Index for PIPE_QUERY_PIPELINE_STATISTICS subqueries. + */ +enum pipe_statistics_query_index { + PIPE_STAT_QUERY_IA_VERTICES, + PIPE_STAT_QUERY_IA_PRIMITIVES, + PIPE_STAT_QUERY_VS_INVOCATIONS, + PIPE_STAT_QUERY_GS_INVOCATIONS, + PIPE_STAT_QUERY_GS_PRIMITIVES, + PIPE_STAT_QUERY_C_INVOCATIONS, + PIPE_STAT_QUERY_C_PRIMITIVES, + PIPE_STAT_QUERY_PS_INVOCATIONS, + PIPE_STAT_QUERY_HS_INVOCATIONS, + PIPE_STAT_QUERY_DS_INVOCATIONS, + PIPE_STAT_QUERY_CS_INVOCATIONS, +}; + +/** * Conditional rendering modes */ enum pipe_render_cond_flag { @@ -780,6 +807,7 @@ PIPE_CAP_TGSI_CAN_READ_OUTPUTS, PIPE_CAP_NATIVE_FENCE_FD, PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY, + PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS, PIPE_CAP_TGSI_FS_FBFETCH, PIPE_CAP_TGSI_MUL_ZERO_WINS, PIPE_CAP_DOUBLES, @@ -823,6 +851,12 @@ PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS, PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET, PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET, + PIPE_CAP_SURFACE_SAMPLE_COUNT, + PIPE_CAP_TGSI_ATOMFADD, + PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE, + PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND, + PIPE_CAP_DEST_SURFACE_SRGB_CONTROL, + PIPE_CAP_MAX_VARYINGS, }; /** diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_format.h mesa-19.0.1/src/gallium/include/pipe/p_format.h --- mesa-18.3.3/src/gallium/include/pipe/p_format.h 2018-02-08 14:40:56.000000000 +0000 +++ mesa-19.0.1/src/gallium/include/pipe/p_format.h 2019-03-31 23:16:37.000000000 +0000 @@ -396,6 +396,13 @@ PIPE_FORMAT_X1B5G5R5_UNORM = 310, PIPE_FORMAT_A4B4G4R4_UNORM = 311, + PIPE_FORMAT_R8_SRGB = 312, + + PIPE_FORMAT_A8L8_SINT = 313, + PIPE_FORMAT_G8R8_SINT = 314, + PIPE_FORMAT_A8B8G8R8_SINT = 315, + PIPE_FORMAT_X8B8G8R8_SINT = 316, + PIPE_FORMAT_COUNT }; diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_shader_tokens.h mesa-19.0.1/src/gallium/include/pipe/p_shader_tokens.h --- mesa-18.3.3/src/gallium/include/pipe/p_shader_tokens.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/include/pipe/p_shader_tokens.h 2019-03-31 23:16:37.000000000 +0000 @@ -442,7 +442,7 @@ TGSI_OPCODE_BGNSUB = 100, TGSI_OPCODE_ENDLOOP = 101, TGSI_OPCODE_ENDSUB = 102, - /* gap */ + TGSI_OPCODE_ATOMFADD = 103, TGSI_OPCODE_TXQS = 104, TGSI_OPCODE_RESQ = 105, TGSI_OPCODE_READ_FIRST = 106, diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_state.h mesa-19.0.1/src/gallium/include/pipe/p_state.h --- mesa-18.3.3/src/gallium/include/pipe/p_state.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/include/pipe/p_state.h 2019-03-31 23:16:37.000000000 +0000 @@ -443,6 +443,13 @@ uint16_t width; /**< logical width in pixels */ uint16_t height; /**< logical height in pixels */ + /** + * Number of samples for the surface. This will be 0 if rendering + * should use the resource's nr_samples, or another value if the resource + * is bound using FramebufferTexture2DMultisampleEXT. + */ + unsigned nr_samples:8; + union pipe_surface_desc u; }; diff -Nru mesa-18.3.3/src/gallium/include/pipe/p_video_enums.h mesa-19.0.1/src/gallium/include/pipe/p_video_enums.h --- mesa-18.3.3/src/gallium/include/pipe/p_video_enums.h 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/gallium/include/pipe/p_video_enums.h 2019-03-31 23:16:37.000000000 +0000 @@ -70,7 +70,8 @@ PIPE_VIDEO_PROFILE_HEVC_MAIN_444, PIPE_VIDEO_PROFILE_JPEG_BASELINE, PIPE_VIDEO_PROFILE_VP9_PROFILE0, - PIPE_VIDEO_PROFILE_VP9_PROFILE2 + PIPE_VIDEO_PROFILE_VP9_PROFILE2, + PIPE_VIDEO_PROFILE_MAX }; /* Video caps, can be different for each codec/profile */ diff -Nru mesa-18.3.3/src/gallium/Makefile.am mesa-19.0.1/src/gallium/Makefile.am --- mesa-18.3.3/src/gallium/Makefile.am 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -56,12 +56,8 @@ SUBDIRS += drivers/etnaviv winsys/etnaviv/drm endif -if HAVE_GALLIUM_IMX -SUBDIRS += drivers/imx winsys/imx/drm -endif - -if HAVE_GALLIUM_PL111 -SUBDIRS += drivers/pl111 winsys/pl111/drm +if HAVE_GALLIUM_KMSRO +SUBDIRS += drivers/kmsro winsys/kmsro/drm endif ## swrast/softpipe diff -Nru mesa-18.3.3/src/gallium/meson.build mesa-19.0.1/src/gallium/meson.build --- mesa-18.3.3/src/gallium/meson.build 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -89,27 +89,22 @@ else driver_vc4 = declare_dependency() endif -if with_gallium_pl111 - subdir('winsys/pl111/drm') -else - driver_pl111 = declare_dependency() -endif -if with_gallium_v3d - subdir('winsys/v3d/drm') - subdir('drivers/v3d') -else - driver_v3d = declare_dependency() -endif if with_gallium_etnaviv subdir('winsys/etnaviv/drm') subdir('drivers/etnaviv') else driver_etnaviv = declare_dependency() endif -if with_gallium_imx - subdir('winsys/imx/drm') +if with_gallium_kmsro + subdir('winsys/kmsro/drm') else - driver_imx = declare_dependency() + driver_kmsro = declare_dependency() +endif +if with_gallium_v3d + subdir('winsys/v3d/drm') + subdir('drivers/v3d') +else + driver_v3d = declare_dependency() endif if with_gallium_tegra subdir('winsys/tegra/drm') diff -Nru mesa-18.3.3/src/gallium/state_trackers/clover/meson.build mesa-19.0.1/src/gallium/state_trackers/clover/meson.build --- mesa-18.3.3/src/gallium/state_trackers/clover/meson.build 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/clover/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -53,7 +53,7 @@ '-DLIBCLC_LIBEXECDIR="@0@/"'.format(dep_clc.get_pkgconfig_variable('libexecdir')), '-DCLANG_RESOURCE_DIR="@0@"'.format(join_paths( dep_llvm.get_configtool_variable('libdir'), 'clang', - dep_llvm.get_configtool_variable('version'), 'include', + dep_llvm.version(), 'include', )), ], dependencies : [dep_llvm, dep_elf], diff -Nru mesa-18.3.3/src/gallium/state_trackers/dri/dri2.c mesa-19.0.1/src/gallium/state_trackers/dri/dri2.c --- mesa-18.3.3/src/gallium/state_trackers/dri/dri2.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/dri/dri2.c 2019-03-31 23:16:37.000000000 +0000 @@ -66,305 +66,72 @@ return (struct dri2_buffer *) driBufferPriv; } -static const int fourcc_formats[] = { - __DRI_IMAGE_FOURCC_ARGB2101010, - __DRI_IMAGE_FOURCC_XRGB2101010, - __DRI_IMAGE_FOURCC_ABGR2101010, - __DRI_IMAGE_FOURCC_XBGR2101010, - __DRI_IMAGE_FOURCC_ARGB8888, - __DRI_IMAGE_FOURCC_ABGR8888, - __DRI_IMAGE_FOURCC_SARGB8888, - __DRI_IMAGE_FOURCC_XRGB8888, - __DRI_IMAGE_FOURCC_XBGR8888, - __DRI_IMAGE_FOURCC_ARGB1555, - __DRI_IMAGE_FOURCC_RGB565, - __DRI_IMAGE_FOURCC_R8, - __DRI_IMAGE_FOURCC_R16, - __DRI_IMAGE_FOURCC_GR88, - __DRI_IMAGE_FOURCC_GR1616, - __DRI_IMAGE_FOURCC_YUV410, - __DRI_IMAGE_FOURCC_YUV411, - __DRI_IMAGE_FOURCC_YUV420, - __DRI_IMAGE_FOURCC_YUV422, - __DRI_IMAGE_FOURCC_YUV444, - __DRI_IMAGE_FOURCC_YVU410, - __DRI_IMAGE_FOURCC_YVU411, - __DRI_IMAGE_FOURCC_YVU420, - __DRI_IMAGE_FOURCC_YVU422, - __DRI_IMAGE_FOURCC_YVU444, - __DRI_IMAGE_FOURCC_NV12, - __DRI_IMAGE_FOURCC_NV16, - __DRI_IMAGE_FOURCC_YUYV -}; - -static int convert_fourcc(int format, int *dri_components_p) -{ +struct dri2_format_mapping { + int dri_fourcc; + int dri_format; int dri_components; - switch(format) { - case __DRI_IMAGE_FOURCC_ARGB1555: - format = __DRI_IMAGE_FORMAT_ARGB1555; - dri_components = __DRI_IMAGE_COMPONENTS_RGBA; - break; - case __DRI_IMAGE_FOURCC_RGB565: - format = __DRI_IMAGE_FORMAT_RGB565; - dri_components = __DRI_IMAGE_COMPONENTS_RGB; - break; - case __DRI_IMAGE_FOURCC_ARGB8888: - format = __DRI_IMAGE_FORMAT_ARGB8888; - dri_components = __DRI_IMAGE_COMPONENTS_RGBA; - break; - case __DRI_IMAGE_FOURCC_XRGB8888: - format = __DRI_IMAGE_FORMAT_XRGB8888; - dri_components = __DRI_IMAGE_COMPONENTS_RGB; - break; - case __DRI_IMAGE_FOURCC_ABGR8888: - format = __DRI_IMAGE_FORMAT_ABGR8888; - dri_components = __DRI_IMAGE_COMPONENTS_RGBA; - break; - case __DRI_IMAGE_FOURCC_XBGR8888: - format = __DRI_IMAGE_FORMAT_XBGR8888; - dri_components = __DRI_IMAGE_COMPONENTS_RGB; - break; - case __DRI_IMAGE_FOURCC_ARGB2101010: - format = __DRI_IMAGE_FORMAT_ARGB2101010; - dri_components = __DRI_IMAGE_COMPONENTS_RGBA; - break; - case __DRI_IMAGE_FOURCC_XRGB2101010: - format = __DRI_IMAGE_FORMAT_XRGB2101010; - dri_components = __DRI_IMAGE_COMPONENTS_RGB; - break; - case __DRI_IMAGE_FOURCC_ABGR2101010: - format = __DRI_IMAGE_FORMAT_ABGR2101010; - dri_components = __DRI_IMAGE_COMPONENTS_RGBA; - break; - case __DRI_IMAGE_FOURCC_XBGR2101010: - format = __DRI_IMAGE_FORMAT_XBGR2101010; - dri_components = __DRI_IMAGE_COMPONENTS_RGB; - break; - case __DRI_IMAGE_FOURCC_R8: - format = __DRI_IMAGE_FORMAT_R8; - dri_components = __DRI_IMAGE_COMPONENTS_R; - break; - case __DRI_IMAGE_FOURCC_GR88: - format = __DRI_IMAGE_FORMAT_GR88; - dri_components = __DRI_IMAGE_COMPONENTS_RG; - break; - case __DRI_IMAGE_FOURCC_R16: - format = __DRI_IMAGE_FORMAT_R16; - dri_components = __DRI_IMAGE_COMPONENTS_R; - break; - case __DRI_IMAGE_FOURCC_GR1616: - format = __DRI_IMAGE_FORMAT_GR1616; - dri_components = __DRI_IMAGE_COMPONENTS_RG; - break; - case __DRI_IMAGE_FOURCC_YUYV: - format = __DRI_IMAGE_FORMAT_YUYV; - dri_components = __DRI_IMAGE_COMPONENTS_Y_XUXV; - break; - /* - * For multi-planar YUV formats, we return the format of the first - * plane only. Since there is only one caller which supports multi- - * planar YUV it gets to figure out the remaining planes on it's - * own. - */ - case __DRI_IMAGE_FOURCC_YUV420: - case __DRI_IMAGE_FOURCC_YVU420: - format = __DRI_IMAGE_FORMAT_R8; - dri_components = __DRI_IMAGE_COMPONENTS_Y_U_V; - break; - case __DRI_IMAGE_FOURCC_NV12: - format = __DRI_IMAGE_FORMAT_R8; - dri_components = __DRI_IMAGE_COMPONENTS_Y_UV; - break; - default: - return -1; - } - *dri_components_p = dri_components; - return format; -} - -/* NOTE this probably isn't going to do the right thing for YUV images - * (but I think the same can be said for intel_query_image()). I think - * only needed for exporting dmabuf's, so I think I won't loose much - * sleep over it. - */ -static int convert_to_fourcc(int format) -{ - switch(format) { - case __DRI_IMAGE_FORMAT_ARGB1555: - format = __DRI_IMAGE_FOURCC_ARGB1555; - break; - case __DRI_IMAGE_FORMAT_RGB565: - format = __DRI_IMAGE_FOURCC_RGB565; - break; - case __DRI_IMAGE_FORMAT_ARGB8888: - format = __DRI_IMAGE_FOURCC_ARGB8888; - break; - case __DRI_IMAGE_FORMAT_XRGB8888: - format = __DRI_IMAGE_FOURCC_XRGB8888; - break; - case __DRI_IMAGE_FORMAT_ABGR8888: - format = __DRI_IMAGE_FOURCC_ABGR8888; - break; - case __DRI_IMAGE_FORMAT_XBGR8888: - format = __DRI_IMAGE_FOURCC_XBGR8888; - break; - case __DRI_IMAGE_FORMAT_ARGB2101010: - format = __DRI_IMAGE_FOURCC_ARGB2101010; - break; - case __DRI_IMAGE_FORMAT_XRGB2101010: - format = __DRI_IMAGE_FOURCC_XRGB2101010; - break; - case __DRI_IMAGE_FORMAT_ABGR2101010: - format = __DRI_IMAGE_FOURCC_ABGR2101010; - break; - case __DRI_IMAGE_FORMAT_XBGR2101010: - format = __DRI_IMAGE_FOURCC_XBGR2101010; - break; - case __DRI_IMAGE_FORMAT_R8: - format = __DRI_IMAGE_FOURCC_R8; - break; - case __DRI_IMAGE_FORMAT_GR88: - format = __DRI_IMAGE_FOURCC_GR88; - break; - default: - return -1; - } - return format; -} + enum pipe_format pipe_format; +}; -static enum pipe_format dri2_format_to_pipe_format (int format) -{ - enum pipe_format pf; +static const struct dri2_format_mapping dri2_format_table[] = { + { __DRI_IMAGE_FOURCC_ARGB2101010, __DRI_IMAGE_FORMAT_ARGB2101010, + __DRI_IMAGE_COMPONENTS_RGBA, PIPE_FORMAT_B10G10R10A2_UNORM }, + { __DRI_IMAGE_FOURCC_XRGB2101010, __DRI_IMAGE_FORMAT_XRGB2101010, + __DRI_IMAGE_COMPONENTS_RGB, PIPE_FORMAT_B10G10R10X2_UNORM }, + { __DRI_IMAGE_FOURCC_ABGR2101010, __DRI_IMAGE_FORMAT_ABGR2101010, + __DRI_IMAGE_COMPONENTS_RGBA, PIPE_FORMAT_R10G10B10A2_UNORM }, + { __DRI_IMAGE_FOURCC_XBGR2101010, __DRI_IMAGE_FORMAT_XBGR2101010, + __DRI_IMAGE_COMPONENTS_RGB, PIPE_FORMAT_R10G10B10X2_UNORM }, + { __DRI_IMAGE_FOURCC_ARGB8888, __DRI_IMAGE_FORMAT_ARGB8888, + __DRI_IMAGE_COMPONENTS_RGBA, PIPE_FORMAT_BGRA8888_UNORM }, + { __DRI_IMAGE_FOURCC_ABGR8888, __DRI_IMAGE_FORMAT_ABGR8888, + __DRI_IMAGE_COMPONENTS_RGBA, PIPE_FORMAT_RGBA8888_UNORM }, + { __DRI_IMAGE_FOURCC_SARGB8888, __DRI_IMAGE_FORMAT_SARGB8, + __DRI_IMAGE_COMPONENTS_RGBA, PIPE_FORMAT_BGRA8888_SRGB }, + { __DRI_IMAGE_FOURCC_XRGB8888, __DRI_IMAGE_FORMAT_XRGB8888, + __DRI_IMAGE_COMPONENTS_RGB, PIPE_FORMAT_BGRX8888_UNORM }, + { __DRI_IMAGE_FOURCC_XBGR8888, __DRI_IMAGE_FORMAT_XBGR8888, + __DRI_IMAGE_COMPONENTS_RGB, PIPE_FORMAT_RGBX8888_UNORM }, + { __DRI_IMAGE_FOURCC_ARGB1555, __DRI_IMAGE_FORMAT_ARGB1555, + __DRI_IMAGE_COMPONENTS_RGBA, PIPE_FORMAT_B5G5R5A1_UNORM }, + { __DRI_IMAGE_FOURCC_RGB565, __DRI_IMAGE_FORMAT_RGB565, + __DRI_IMAGE_COMPONENTS_RGB, PIPE_FORMAT_B5G6R5_UNORM }, + { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_FORMAT_R8, + __DRI_IMAGE_COMPONENTS_R, PIPE_FORMAT_R8_UNORM }, + { __DRI_IMAGE_FOURCC_R16, __DRI_IMAGE_FORMAT_R16, + __DRI_IMAGE_COMPONENTS_R, PIPE_FORMAT_R16_UNORM }, + { __DRI_IMAGE_FOURCC_GR88, __DRI_IMAGE_FORMAT_GR88, + __DRI_IMAGE_COMPONENTS_RG, PIPE_FORMAT_RG88_UNORM }, + { __DRI_IMAGE_FOURCC_GR1616, __DRI_IMAGE_FORMAT_GR88, + __DRI_IMAGE_COMPONENTS_RG, PIPE_FORMAT_RG1616_UNORM }, + { __DRI_IMAGE_FOURCC_YUV420, __DRI_IMAGE_FORMAT_NONE, + __DRI_IMAGE_COMPONENTS_Y_U_V, PIPE_FORMAT_IYUV }, + { __DRI_IMAGE_FOURCC_YVU420, __DRI_IMAGE_FORMAT_NONE, + __DRI_IMAGE_COMPONENTS_Y_U_V, PIPE_FORMAT_YV12 }, + { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_FORMAT_NONE, + __DRI_IMAGE_COMPONENTS_Y_UV, PIPE_FORMAT_NV12 }, + { __DRI_IMAGE_FOURCC_YUYV, __DRI_IMAGE_FORMAT_YUYV, + __DRI_IMAGE_COMPONENTS_Y_XUXV, PIPE_FORMAT_YUYV }, +}; - switch (format) { - case __DRI_IMAGE_FORMAT_ARGB1555: - pf = PIPE_FORMAT_B5G5R5A1_UNORM; - break; - case __DRI_IMAGE_FORMAT_RGB565: - pf = PIPE_FORMAT_B5G6R5_UNORM; - break; - case __DRI_IMAGE_FORMAT_XRGB8888: - pf = PIPE_FORMAT_BGRX8888_UNORM; - break; - case __DRI_IMAGE_FORMAT_ARGB8888: - pf = PIPE_FORMAT_BGRA8888_UNORM; - break; - case __DRI_IMAGE_FORMAT_XBGR8888: - pf = PIPE_FORMAT_RGBX8888_UNORM; - break; - case __DRI_IMAGE_FORMAT_ABGR8888: - pf = PIPE_FORMAT_RGBA8888_UNORM; - break; - case __DRI_IMAGE_FORMAT_XRGB2101010: - pf = PIPE_FORMAT_B10G10R10X2_UNORM; - break; - case __DRI_IMAGE_FORMAT_ARGB2101010: - pf = PIPE_FORMAT_B10G10R10A2_UNORM; - break; - case __DRI_IMAGE_FORMAT_XBGR2101010: - pf = PIPE_FORMAT_R10G10B10X2_UNORM; - break; - case __DRI_IMAGE_FORMAT_ABGR2101010: - pf = PIPE_FORMAT_R10G10B10A2_UNORM; - break; - case __DRI_IMAGE_FORMAT_R8: - pf = PIPE_FORMAT_R8_UNORM; - break; - case __DRI_IMAGE_FORMAT_GR88: - pf = PIPE_FORMAT_RG88_UNORM; - break; - case __DRI_IMAGE_FORMAT_R16: - pf = PIPE_FORMAT_R16_UNORM; - break; - case __DRI_IMAGE_FORMAT_GR1616: - pf = PIPE_FORMAT_R16G16_UNORM; - break; - case __DRI_IMAGE_FORMAT_YUYV: - pf = PIPE_FORMAT_YUYV; - break; - default: - pf = PIPE_FORMAT_NONE; - break; +static const struct dri2_format_mapping * +dri2_get_mapping_by_fourcc(int fourcc) { + for (unsigned i = 0; i < ARRAY_SIZE(dri2_format_table); i++) { + if (dri2_format_table[i].dri_fourcc == fourcc) + return &dri2_format_table[i]; } - return pf; + return NULL; } -static enum pipe_format fourcc_to_pipe_format(int fourcc) -{ - enum pipe_format pf; - - switch (fourcc) { - case __DRI_IMAGE_FOURCC_R8: - pf = PIPE_FORMAT_R8_UNORM; - break; - case __DRI_IMAGE_FOURCC_GR88: - pf = PIPE_FORMAT_RG88_UNORM; - break; - case __DRI_IMAGE_FOURCC_ARGB1555: - pf = PIPE_FORMAT_B5G5R5A1_UNORM; - break; - case __DRI_IMAGE_FOURCC_R16: - pf = PIPE_FORMAT_R16_UNORM; - break; - case __DRI_IMAGE_FOURCC_GR1616: - pf = PIPE_FORMAT_RG1616_UNORM; - break; - case __DRI_IMAGE_FOURCC_RGB565: - pf = PIPE_FORMAT_B5G6R5_UNORM; - break; - case __DRI_IMAGE_FOURCC_ARGB8888: - pf = PIPE_FORMAT_BGRA8888_UNORM; - break; - case __DRI_IMAGE_FOURCC_XRGB8888: - pf = PIPE_FORMAT_BGRX8888_UNORM; - break; - case __DRI_IMAGE_FOURCC_ABGR8888: - pf = PIPE_FORMAT_RGBA8888_UNORM; - break; - case __DRI_IMAGE_FOURCC_XBGR8888: - pf = PIPE_FORMAT_RGBX8888_UNORM; - break; - case __DRI_IMAGE_FOURCC_ARGB2101010: - pf = PIPE_FORMAT_B10G10R10A2_UNORM; - break; - case __DRI_IMAGE_FOURCC_XRGB2101010: - pf = PIPE_FORMAT_B10G10R10X2_UNORM; - break; - case __DRI_IMAGE_FOURCC_ABGR2101010: - pf = PIPE_FORMAT_R10G10B10A2_UNORM; - break; - case __DRI_IMAGE_FOURCC_XBGR2101010: - pf = PIPE_FORMAT_R10G10B10X2_UNORM; - break; - - case __DRI_IMAGE_FOURCC_NV12: - pf = PIPE_FORMAT_NV12; - break; - case __DRI_IMAGE_FOURCC_YUYV: - pf = PIPE_FORMAT_YUYV; - break; - case __DRI_IMAGE_FOURCC_YUV420: - case __DRI_IMAGE_FOURCC_YVU420: - pf = PIPE_FORMAT_YV12; - break; - - case __DRI_IMAGE_FOURCC_SARGB8888: - case __DRI_IMAGE_FOURCC_YUV410: - case __DRI_IMAGE_FOURCC_YUV411: - case __DRI_IMAGE_FOURCC_YUV422: - case __DRI_IMAGE_FOURCC_YUV444: - case __DRI_IMAGE_FOURCC_NV16: - case __DRI_IMAGE_FOURCC_YVU410: - case __DRI_IMAGE_FOURCC_YVU411: - case __DRI_IMAGE_FOURCC_YVU422: - case __DRI_IMAGE_FOURCC_YVU444: - default: - pf = PIPE_FORMAT_NONE; +static const struct dri2_format_mapping * +dri2_get_mapping_by_format(int format) { + for (unsigned i = 0; i < ARRAY_SIZE(dri2_format_table); i++) { + if (dri2_format_table[i].dri_format == format) + return &dri2_format_table[i]; } - return pf; + return NULL; } /** @@ -1011,7 +778,7 @@ static __DRIimage * dri2_create_image_from_winsys(__DRIscreen *_screen, - int width, int height, int format, + int width, int height, enum pipe_format pf, int num_handles, struct winsys_handle *whandle, void *loaderPrivate) { @@ -1019,14 +786,28 @@ struct pipe_screen *pscreen = screen->base.screen; __DRIimage *img; struct pipe_resource templ; - unsigned tex_usage; - enum pipe_format pf; + unsigned tex_usage = 0; int i; - tex_usage = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + if (pscreen->is_format_supported(pscreen, pf, screen->target, 0, 0, + PIPE_BIND_RENDER_TARGET)) + tex_usage |= PIPE_BIND_RENDER_TARGET; + if (pscreen->is_format_supported(pscreen, pf, screen->target, 0, 0, + PIPE_BIND_SAMPLER_VIEW)) + tex_usage |= PIPE_BIND_SAMPLER_VIEW; + + if (!tex_usage && util_format_is_yuv(pf)) { + /* YUV format sampling can be emulated by the Mesa state tracker by + * using multiple R8/RG88 samplers. So try to rewrite the pipe format. + */ + pf = PIPE_FORMAT_R8_UNORM; + + if (pscreen->is_format_supported(pscreen, pf, screen->target, 0, 0, + PIPE_BIND_SAMPLER_VIEW)) + tex_usage |= PIPE_BIND_SAMPLER_VIEW; + } - pf = dri2_format_to_pipe_format (format); - if (pf == PIPE_FORMAT_NONE) + if (!tex_usage) return NULL; img = CALLOC_STRUCT(__DRIimageRec); @@ -1080,7 +861,6 @@ img->level = 0; img->layer = 0; - img->dri_format = format; img->use = 0; img->loader_private = loaderPrivate; @@ -1092,22 +872,31 @@ int width, int height, int format, int name, int pitch, void *loaderPrivate) { + const struct dri2_format_mapping *map = dri2_get_mapping_by_format(format); struct winsys_handle whandle; - enum pipe_format pf; + __DRIimage *img; + + if (!map) + return NULL; memset(&whandle, 0, sizeof(whandle)); whandle.type = WINSYS_HANDLE_TYPE_SHARED; whandle.handle = name; whandle.modifier = DRM_FORMAT_MOD_INVALID; - pf = dri2_format_to_pipe_format (format); - if (pf == PIPE_FORMAT_NONE) + whandle.stride = pitch * util_format_get_blocksize(map->pipe_format); + + img = dri2_create_image_from_winsys(_screen, width, height, map->pipe_format, + 1, &whandle, loaderPrivate); + + if (!img) return NULL; - whandle.stride = pitch * util_format_get_blocksize(pf); + img->dri_components = map->dri_components; + img->dri_fourcc = map->dri_fourcc; + img->dri_format = map->dri_format; - return dri2_create_image_from_winsys(_screen, width, height, format, - 1, &whandle, loaderPrivate); + return img; } static __DRIimage * @@ -1115,14 +904,19 @@ int width, int height, int fourcc, uint64_t modifier, int *fds, int num_fds, int *strides, int *offsets, unsigned *error, - int *dri_components, void *loaderPrivate) + void *loaderPrivate) { struct winsys_handle whandles[3]; - int format; + const struct dri2_format_mapping *map = dri2_get_mapping_by_fourcc(fourcc); __DRIimage *img = NULL; unsigned err = __DRI_IMAGE_ERROR_SUCCESS; int expected_num_fds, i; + if (!map) { + err = __DRI_IMAGE_ERROR_BAD_MATCH; + goto exit; + } + switch (fourcc) { case __DRI_IMAGE_FOURCC_YUV420: case __DRI_IMAGE_FOURCC_YVU420: @@ -1141,12 +935,6 @@ goto exit; } - format = convert_fourcc(fourcc, dri_components); - if (format == -1) { - err = __DRI_IMAGE_ERROR_BAD_MATCH; - goto exit; - } - memset(whandles, 0, sizeof(whandles)); for (i = 0; i < num_fds; i++) { @@ -1168,12 +956,19 @@ whandles[1] = whandles[2]; whandles[2] = tmp; fourcc = __DRI_IMAGE_FOURCC_YUV420; + map = dri2_get_mapping_by_fourcc(fourcc); } - img = dri2_create_image_from_winsys(_screen, width, height, format, + img = dri2_create_image_from_winsys(_screen, width, height, map->pipe_format, num_fds, whandles, loaderPrivate); - if(img == NULL) + if(img == NULL) { err = __DRI_IMAGE_ERROR_BAD_ALLOC; + goto exit; + } + + img->dri_components = map->dri_components; + img->dri_fourcc = fourcc; + img->dri_format = map->dri_format; exit: if (error) @@ -1190,16 +985,14 @@ const unsigned count, void *loaderPrivate) { + const struct dri2_format_mapping *map = dri2_get_mapping_by_format(format); struct dri_screen *screen = dri_screen(_screen); __DRIimage *img; struct pipe_resource templ; unsigned tex_usage; - enum pipe_format pf; - /* createImageWithModifiers doesn't supply usage, and we should not get - * here with both modifiers and a usage flag. - */ - assert(!(use && (modifiers != NULL))); + if (!map) + return NULL; tex_usage = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; @@ -1215,17 +1008,13 @@ tex_usage |= PIPE_BIND_CURSOR; } - pf = dri2_format_to_pipe_format (format); - if (pf == PIPE_FORMAT_NONE) - return NULL; - img = CALLOC_STRUCT(__DRIimageRec); if (!img) return NULL; memset(&templ, 0, sizeof(templ)); templ.bind = tex_usage; - templ.format = pf; + templ.format = map->pipe_format; templ.target = PIPE_TEXTURE_2D; templ.last_level = 0; templ.width0 = width; @@ -1251,6 +1040,7 @@ img->level = 0; img->layer = 0; img->dri_format = format; + img->dri_fourcc = map->dri_fourcc; img->dri_components = 0; img->use = use; @@ -1276,7 +1066,7 @@ void *loaderPrivate) { return dri2_create_image_common(dri_screen, width, height, format, - 0 /* use */, modifiers, count, + __DRI_IMAGE_USE_SHARE, modifiers, count, loaderPrivate); } @@ -1345,8 +1135,18 @@ *value = image->dri_components; return GL_TRUE; case __DRI_IMAGE_ATTRIB_FOURCC: - *value = convert_to_fourcc(image->dri_format); - return *value != -1; + if (image->dri_fourcc) { + *value = image->dri_fourcc; + } else { + const struct dri2_format_mapping *map; + + map = dri2_get_mapping_by_format(image->dri_format); + if (!map) + return GL_FALSE; + + *value = map->dri_fourcc; + } + return GL_TRUE; case __DRI_IMAGE_ATTRIB_NUM_PLANES: *value = 1; return GL_TRUE; @@ -1429,15 +1229,14 @@ int *names, int num_names, int *strides, int *offsets, void *loaderPrivate) { + const struct dri2_format_mapping *map = dri2_get_mapping_by_format(format); __DRIimage *img; - int dri_components; struct winsys_handle whandle; - if (num_names != 1) + if (!map) return NULL; - format = convert_fourcc(format, &dri_components); - if (format == -1) + if (num_names != 1) return NULL; memset(&whandle, 0, sizeof(whandle)); @@ -1447,12 +1246,15 @@ whandle.offset = offsets[0]; whandle.modifier = DRM_FORMAT_MOD_INVALID; - img = dri2_create_image_from_winsys(screen, width, height, format, + img = dri2_create_image_from_winsys(screen, width, height, map->pipe_format, 1, &whandle, loaderPrivate); if (img == NULL) return NULL; - img->dri_components = dri_components; + img->dri_components = map->dri_components; + img->dri_fourcc = map->dri_fourcc; + img->dri_format = map->pipe_format; + return img; } @@ -1485,18 +1287,9 @@ int *fds, int num_fds, int *strides, int *offsets, void *loaderPrivate) { - __DRIimage *img; - int dri_components; - - img = dri2_create_image_from_fd(screen, width, height, fourcc, + return dri2_create_image_from_fd(screen, width, height, fourcc, DRM_FORMAT_MOD_INVALID, fds, num_fds, - strides, offsets, NULL, - &dri_components, loaderPrivate); - if (img == NULL) - return NULL; - - img->dri_components = dri_components; - return img; + strides, offsets, NULL, loaderPrivate); } static boolean @@ -1505,24 +1298,26 @@ { struct dri_screen *screen = dri_screen(_screen); struct pipe_screen *pscreen = screen->base.screen; - const unsigned bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; int i, j; - for (i = 0, j = 0; (i < ARRAY_SIZE(fourcc_formats)) && + for (i = 0, j = 0; (i < ARRAY_SIZE(dri2_format_table)) && (j < max || max == 0); i++) { + const struct dri2_format_mapping *map = &dri2_format_table[i]; + /* The sRGB format is not a real FourCC as defined by drm_fourcc.h, so we * must not leak it out to clients. */ - if (fourcc_formats[i] == __DRI_IMAGE_FOURCC_SARGB8888) + if (dri2_format_table[i].dri_fourcc == __DRI_IMAGE_FOURCC_SARGB8888) continue; - if (pscreen->is_format_supported(pscreen, - fourcc_to_pipe_format( - fourcc_formats[i]), - screen->target, - 0, 0, bind)) { + if (pscreen->is_format_supported(pscreen, map->pipe_format, + screen->target, 0, 0, + PIPE_BIND_RENDER_TARGET) || + pscreen->is_format_supported(pscreen, map->pipe_format, + screen->target, 0, 0, + PIPE_BIND_SAMPLER_VIEW)) { if (j < max) - formats[j] = fourcc_formats[i]; + formats[j] = map->dri_fourcc; j++; } } @@ -1537,12 +1332,19 @@ { struct dri_screen *screen = dri_screen(_screen); struct pipe_screen *pscreen = screen->base.screen; - enum pipe_format format = fourcc_to_pipe_format(fourcc); - const unsigned usage = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + const struct dri2_format_mapping *map = dri2_get_mapping_by_fourcc(fourcc); + enum pipe_format format; + + if (!map) + return false; + + format = map->pipe_format; if (pscreen->query_dmabuf_modifiers != NULL && - pscreen->is_format_supported(pscreen, format, screen->target, 0, 0, - usage)) { + (pscreen->is_format_supported(pscreen, format, screen->target, 0, 0, + PIPE_BIND_RENDER_TARGET) || + pscreen->is_format_supported(pscreen, format, screen->target, 0, 0, + PIPE_BIND_SAMPLER_VIEW))) { pscreen->query_dmabuf_modifiers(pscreen, format, max, modifiers, external_only, count); return true; @@ -1563,12 +1365,10 @@ void *loaderPrivate) { __DRIimage *img; - int dri_components; img = dri2_create_image_from_fd(screen, width, height, fourcc, DRM_FORMAT_MOD_INVALID, fds, num_fds, - strides, offsets, error, - &dri_components, loaderPrivate); + strides, offsets, error, loaderPrivate); if (img == NULL) return NULL; @@ -1576,7 +1376,6 @@ img->sample_range = sample_range; img->horizontal_siting = horizontal_siting; img->vertical_siting = vertical_siting; - img->dri_components = dri_components; *error = __DRI_IMAGE_ERROR_SUCCESS; return img; @@ -1595,11 +1394,10 @@ void *loaderPrivate) { __DRIimage *img; - int dri_components; img = dri2_create_image_from_fd(screen, width, height, fourcc, modifier, fds, num_fds, strides, offsets, - error, &dri_components, loaderPrivate); + error, loaderPrivate); if (img == NULL) return NULL; @@ -1607,7 +1405,6 @@ img->sample_range = sample_range; img->horizontal_siting = horizontal_siting; img->vertical_siting = vertical_siting; - img->dri_components = dri_components; *error = __DRI_IMAGE_ERROR_SUCCESS; return img; diff -Nru mesa-18.3.3/src/gallium/state_trackers/dri/dri_drawable.c mesa-19.0.1/src/gallium/state_trackers/dri/dri_drawable.c --- mesa-18.3.3/src/gallium/state_trackers/dri/dri_drawable.c 2018-02-23 13:07:51.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/dri/dri_drawable.c 2019-03-31 23:16:37.000000000 +0000 @@ -524,13 +524,6 @@ dri_postprocessing(ctx, drawable, ST_ATTACHMENT_BACK_LEFT); - if (ctx->hud) { - hud_run(ctx->hud, ctx->st->cso_context, - drawable->textures[ST_ATTACHMENT_BACK_LEFT]); - } - - pipe->flush_resource(pipe, drawable->textures[ST_ATTACHMENT_BACK_LEFT]); - if (pipe->invalidate_resource && (flags & __DRI2_FLUSH_INVALIDATE_ANCILLARY)) { if (drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]) @@ -538,6 +531,13 @@ if (drawable->msaa_textures[ST_ATTACHMENT_DEPTH_STENCIL]) pipe->invalidate_resource(pipe, drawable->msaa_textures[ST_ATTACHMENT_DEPTH_STENCIL]); } + + if (ctx->hud) { + hud_run(ctx->hud, ctx->st->cso_context, + drawable->textures[ST_ATTACHMENT_BACK_LEFT]); + } + + pipe->flush_resource(pipe, drawable->textures[ST_ATTACHMENT_BACK_LEFT]); } flush_flags = 0; diff -Nru mesa-18.3.3/src/gallium/state_trackers/dri/dri_screen.h mesa-19.0.1/src/gallium/state_trackers/dri/dri_screen.h --- mesa-18.3.3/src/gallium/state_trackers/dri/dri_screen.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/dri/dri_screen.h 2019-03-31 23:16:37.000000000 +0000 @@ -103,6 +103,7 @@ unsigned level; unsigned layer; uint32_t dri_format; + uint32_t dri_fourcc; uint32_t dri_components; unsigned use; diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/basetexture9.c mesa-19.0.1/src/gallium/state_trackers/nine/basetexture9.c --- mesa-18.3.3/src/gallium/state_trackers/nine/basetexture9.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/basetexture9.c 2019-03-31 23:16:37.000000000 +0000 @@ -28,7 +28,7 @@ #include "cubetexture9.h" #include "volumetexture9.h" -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) #include "nine_pipe.h" #include "nine_dump.h" #endif @@ -605,7 +605,7 @@ BASETEX_REGISTER_UPDATE(This); } -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) void NineBaseTexture9_Dump( struct NineBaseTexture9 *This ) { @@ -620,4 +620,4 @@ This->base.info.array_size, This->base.info.last_level, This->managed.lod, This->managed.lod_resident); } -#endif /* DEBUG */ +#endif /* DEBUG || !NDEBUG */ diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/basetexture9.h mesa-19.0.1/src/gallium/state_trackers/nine/basetexture9.h --- mesa-18.3.3/src/gallium/state_trackers/nine/basetexture9.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/basetexture9.h 2019-03-31 23:16:37.000000000 +0000 @@ -150,7 +150,7 @@ nine_bind(slot, tex); } -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) void NineBaseTexture9_Dump( struct NineBaseTexture9 *This ); #else diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_debug.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_debug.c --- mesa-18.3.3/src/gallium/state_trackers/nine/nine_debug.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_debug.c 2019-03-31 23:16:37.000000000 +0000 @@ -93,18 +93,18 @@ for (func += 4; func != f; ++func) { *ptr++ = tolower(*func); } *ptr = '\0'; if (tid) - debug_printf("nine:0x%08lx:%s:%s: ", tid, klass, ++f); + _debug_printf("nine:0x%08lx:%s:%s: ", tid, klass, ++f); else - debug_printf("nine:%s:%s: ", klass, ++f); + _debug_printf("nine:%s:%s: ", klass, ++f); } else if (func) { if (tid) - debug_printf("nine:0x%08lx:%s ", tid, func); + _debug_printf("nine:0x%08lx:%s ", tid, func); else - debug_printf("nine:%s ", func); + _debug_printf("nine:%s ", func); } va_start(ap, fmt); - debug_vprintf(fmt, ap); + _debug_vprintf(fmt, ap); va_end(ap); } } @@ -116,5 +116,5 @@ { const char *r = strrchr(file, '/'); if (r == NULL) { r = strrchr(file, '\\'); } - debug_printf("nine:%s:%d: %s STUB!\n", r ? ++r : file, line, func); + _debug_printf("nine:%s:%d: %s STUB!\n", r ? ++r : file, line, func); } diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_debug.h mesa-19.0.1/src/gallium/state_trackers/nine/nine_debug.h --- mesa-18.3.3/src/gallium/state_trackers/nine/nine_debug.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_debug.h 2019-03-31 23:16:37.000000000 +0000 @@ -33,7 +33,7 @@ #define ERR(fmt, ...) _nine_debug_printf(DBG_ERROR, __FUNCTION__, fmt, ## __VA_ARGS__) -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) #define WARN(fmt, ...) _nine_debug_printf(DBG_WARN, __FUNCTION__, fmt, ## __VA_ARGS__) #define WARN_ONCE(fmt, ...) \ do { \ @@ -48,7 +48,7 @@ #define WARN_ONCE(fmt, ...) #endif -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) #define DBG_FLAG(flag, fmt, ...) \ _nine_debug_printf(flag, __FUNCTION__, fmt, ## __VA_ARGS__) #else @@ -90,7 +90,7 @@ const char *func, unsigned line ); -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) #define STUB(ret) \ do { \ _nine_stub(__FILE__, __FUNCTION__, __LINE__); \ @@ -104,7 +104,7 @@ * macro is designed to be used in conditionals ala * if (user_error(required condition)) { assertion failed } * It also prints debug message if the assertion fails. */ -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) #define user_error(x) \ (!(x) ? (DBG_FLAG(DBG_USER, "User assertion failed: `%s'\n", #x), TRUE) \ : FALSE) @@ -112,7 +112,7 @@ #define user_error(x) (!(x) ? TRUE : FALSE) #endif -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) #define user_warn(x) \ if ((x)) { DBG_FLAG(DBG_USER, "User warning: `%s'\n", #x); } #else diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_dump.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_dump.c --- mesa-18.3.3/src/gallium/state_trackers/nine/nine_dump.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_dump.c 2019-03-31 23:16:37.000000000 +0000 @@ -8,7 +8,7 @@ #include "nine_dump.h" -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) static char __thread tls[128]; @@ -810,4 +810,4 @@ FREE(s); } -#endif /* DEBUG */ +#endif /* DEBUG || !NDEBUG */ diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_dump.h mesa-19.0.1/src/gallium/state_trackers/nine/nine_dump.h --- mesa-18.3.3/src/gallium/state_trackers/nine/nine_dump.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_dump.h 2019-03-31 23:16:37.000000000 +0000 @@ -16,7 +16,7 @@ const char *nine_D3DLOCK_to_str(DWORD); const char *nine_D3DSAMP_to_str(DWORD); -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) void nine_dump_D3DADAPTER_IDENTIFIER9(unsigned, const D3DADAPTER_IDENTIFIER9 *); @@ -29,7 +29,7 @@ void nine_dump_D3DTSS_value(unsigned, D3DTEXTURESTAGESTATETYPE, DWORD); -#else /* !DEBUG */ +#else /* !DEBUG && NDEBUG */ static inline void nine_dump_D3DADAPTER_IDENTIFIER9(unsigned ch, const D3DADAPTER_IDENTIFIER9 *id) @@ -47,6 +47,6 @@ nine_dump_D3DTSS_value(unsigned ch, D3DTEXTURESTAGESTATETYPE tss, DWORD value) { } -#endif /* DEBUG */ +#endif /* DEBUG || !NDEBUG */ #endif /* _NINE_DUMP_H_H_ */ diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_ff.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_ff.c --- mesa-18.3.3/src/gallium/state_trackers/nine/nine_ff.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_ff.c 2019-03-31 23:16:37.000000000 +0000 @@ -2138,7 +2138,7 @@ { struct nine_context *context = &device->context; - if (device->ff.num_vs > 100) { + if (device->ff.num_vs > 1024) { /* could destroy the bound one here, so unbind */ context->pipe->bind_vs_state(context->pipe, NULL); util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); @@ -2152,7 +2152,7 @@ { struct nine_context *context = &device->context; - if (device->ff.num_ps > 100) { + if (device->ff.num_ps > 1024) { /* could destroy the bound one here, so unbind */ context->pipe->bind_fs_state(context->pipe, NULL); util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); @@ -2491,7 +2491,7 @@ for (k = 0; k < 4; k++) D->m[i][k] *= det; -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) { D3DMATRIX I; diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_pipe.h mesa-19.0.1/src/gallium/state_trackers/nine/nine_pipe.h --- mesa-18.3.3/src/gallium/state_trackers/nine/nine_pipe.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_pipe.h 2019-03-31 23:16:37.000000000 +0000 @@ -377,6 +377,10 @@ if (levels) *levels = 1; + /* Ignores multisamplequality */ + if (*multisample == D3DMULTISAMPLE_NONE) + return D3D_OK; + if (*multisample == D3DMULTISAMPLE_NONMASKABLE) { if (depth_stencil_format(format)) bind = d3d9_get_pipe_depth_format_bindings(format); diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_queue.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_queue.c --- mesa-18.3.3/src/gallium/state_trackers/nine/nine_queue.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_queue.c 2019-03-31 23:16:37.000000000 +0000 @@ -265,8 +265,12 @@ nine_queue_delete(struct nine_queue_pool *ctx) { unsigned i; + mtx_destroy(&ctx->mutex_pop); + cnd_destroy(&ctx->event_pop); + mtx_destroy(&ctx->mutex_push); + cnd_destroy(&ctx->event_push); for (i = 0; i < NINE_CMD_BUFS; i++) FREE(ctx->pool[i].mem_pool); diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/nine_state.c mesa-19.0.1/src/gallium/state_trackers/nine/nine_state.c --- mesa-18.3.3/src/gallium/state_trackers/nine/nine_state.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/nine_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -157,7 +157,7 @@ (void) mtx_init(&ctx->thread_running, mtx_plain); (void) mtx_init(&ctx->thread_resume, mtx_plain); -#if DEBUG +#if defined(DEBUG) || !defined(NDEBUG) u_thread_setname("Main thread"); #endif @@ -234,7 +234,12 @@ nine_csmt_wait_processed(ctx); nine_queue_delete(ctx->pool); + + mtx_destroy(&ctx->thread_resume); + mtx_destroy(&ctx->thread_running); + mtx_destroy(&ctx->mutex_processed); + cnd_destroy(&ctx->event_processed); FREE(ctx); diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/surface9.c mesa-19.0.1/src/gallium/state_trackers/nine/surface9.c --- mesa-18.3.3/src/gallium/state_trackers/nine/surface9.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/surface9.c 2019-03-31 23:16:37.000000000 +0000 @@ -272,7 +272,7 @@ assert(This->surface[1]); } -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) void NineSurface9_Dump( struct NineSurface9 *This ) { @@ -300,7 +300,7 @@ NineUnknown_Release(NineUnknown(tex)); } } -#endif /* DEBUG */ +#endif /* DEBUG || !NDEBUG */ HRESULT NINE_WINAPI NineSurface9_GetContainer( struct NineSurface9 *This, diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/surface9.h mesa-19.0.1/src/gallium/state_trackers/nine/surface9.h --- mesa-18.3.3/src/gallium/state_trackers/nine/surface9.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/surface9.h 2019-03-31 23:16:37.000000000 +0000 @@ -139,7 +139,7 @@ return This->base.usage == 0 && !This->texture; } -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) void NineSurface9_Dump( struct NineSurface9 *This ); #else diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/swapchain9.c mesa-19.0.1/src/gallium/state_trackers/nine/swapchain9.c --- mesa-18.3.3/src/gallium/state_trackers/nine/swapchain9.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/swapchain9.c 2019-03-31 23:16:37.000000000 +0000 @@ -28,6 +28,7 @@ #include "nine_pipe.h" #include "nine_dump.h" +#include "util/u_atomic.h" #include "util/u_inlines.h" #include "util/u_surface.h" #include "hud/hud_context.h" @@ -50,6 +51,7 @@ D3DDISPLAYMODEEX *mode ) { HRESULT hr; + int i; DBG("This=%p pDevice=%p pPresent=%p pCTX=%p hFocusWindow=%p\n", This, pParams->device, pPresent, pCTX, hFocusWindow); @@ -65,8 +67,7 @@ This->mode = NULL; ID3DPresent_AddRef(pPresent); - if (!This->actx->thread_submit && - This->base.device->minor_version_num > 2) { + if (This->base.device->minor_version_num > 2) { D3DPRESENT_PARAMETERS2 params2; memset(¶ms2, 0, sizeof(D3DPRESENT_PARAMETERS2)); @@ -80,6 +81,11 @@ This->rendering_done = FALSE; This->pool = NULL; + for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) { + This->pending_presentation[i] = calloc(1, sizeof(BOOL)); + if (!This->pending_presentation[i]) + return E_OUTOFMEMORY; + } return NineSwapChain9_Resize(This, pPresentationParameters, mode); } @@ -122,6 +128,40 @@ return ret; } +static void +D3DWindowBuffer_release(struct NineSwapChain9 *This, + D3DWindowBuffer *present_handle) +{ + int i; + /* Add it to the 'pending release' list */ + for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) { + if (!This->present_handles_pending_release[i]) { + This->present_handles_pending_release[i] = present_handle; + break; + } + } + if (i == (D3DPRESENT_BACK_BUFFERS_MAX_EX + 1)) { + ERR("Server not releasing buffers...\n"); + assert(false); + } + + /* Destroy elements of the list released by the server */ + for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) { + if (This->present_handles_pending_release[i] && + ID3DPresent_IsBufferReleased(This->present, This->present_handles_pending_release[i])) { + /* WaitBufferReleased also waits the presentation feedback + * (which should arrive at about the same time), + * while IsBufferReleased doesn't. DestroyD3DWindowBuffer unfortunately + * checks it to release immediately all data, else the release + * is postponed for This->present release. To avoid leaks (we may handle + * a lot of resize), call WaitBufferReleased. */ + ID3DPresent_WaitBufferReleased(This->present, This->present_handles_pending_release[i]); + ID3DPresent_DestroyD3DWindowBuffer(This->present, This->present_handles_pending_release[i]); + This->present_handles_pending_release[i] = NULL; + } + } +} + static int NineSwapChain9_GetBackBufferCountForParams( struct NineSwapChain9 *This, D3DPRESENT_PARAMETERS *pParams ); @@ -285,7 +325,7 @@ This->enable_threadpool = FALSE; for (i = 0; i < oldBufferCount; i++) { - ID3DPresent_DestroyD3DWindowBuffer(This->present, This->present_handles[i]); + D3DWindowBuffer_release(This, This->present_handles[i]); This->present_handles[i] = NULL; if (This->present_buffers[i]) pipe_resource_reference(&(This->present_buffers[i]), NULL); @@ -508,6 +548,16 @@ if (This->pool) _mesa_threadpool_destroy(This, This->pool); + for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) { + if (This->pending_presentation[i]) + FREE(This->pending_presentation[i]); + } + + for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) { + if (This->present_handles_pending_release[i]) + ID3DPresent_DestroyD3DWindowBuffer(This->present, This->present_handles_pending_release[i]); + } + for (i = 0; i < This->num_back_buffers; i++) { if (This->buffers[i]) NineUnknown_Detach(NineUnknown(This->buffers[i])); @@ -619,6 +669,7 @@ struct pipe_fence_handle *fence_to_wait; ID3DPresent *present; D3DWindowBuffer *present_handle; + BOOL *pending_presentation; HWND hDestWindowOverride; }; @@ -630,6 +681,7 @@ work->screen->fence_reference(work->screen, &(work->fence_to_wait), NULL); } ID3DPresent_PresentBuffer(work->present, work->present_handle, work->hDestWindowOverride, NULL, NULL, NULL, 0); + p_atomic_set(work->pending_presentation, FALSE); free(work); } @@ -643,6 +695,8 @@ work->present = This->present; work->present_handle = This->present_handles[0]; work->hDestWindowOverride = hDestWindowOverride; + work->pending_presentation = This->pending_presentation[0]; + p_atomic_set(work->pending_presentation, TRUE); This->tasks[0] = _mesa_threadpool_queue_task(This->pool, work_present, work); return; @@ -661,7 +715,7 @@ struct pipe_fence_handle *fence; HRESULT hr; struct pipe_blit_info blit; - int target_width, target_height, target_depth; + int target_width, target_height, target_depth, i; DBG("present: This=%p pSourceRect=%p pDestRect=%p " "pDirtyRegion=%p hDestWindowOverride=%p" @@ -696,9 +750,51 @@ if (This->params.SwapEffect == D3DSWAPEFFECT_DISCARD) handle_draw_cursor_and_hud(This, resource); - ID3DPresent_GetWindowInfo(This->present, hDestWindowOverride, &target_width, &target_height, &target_depth); + hr = ID3DPresent_GetWindowInfo(This->present, hDestWindowOverride, &target_width, &target_height, &target_depth); (void)target_depth; + /* Can happen with old Wine (presentation can still succeed), + * or at window destruction. */ + if (FAILED(hr) || target_width == 0 || target_height == 0) { + target_width = resource->width0; + target_height = resource->height0; + } + + /* Switch to using presentation buffers on window resize. + * Note: Most apps should resize the d3d back buffers when + * a window resize is detected, which will result in a call to + * NineSwapChain9_Resize. Thus everything will get released, + * and it will switch back to not using separate presentation + * buffers. */ + if (!This->present_buffers[0] && + (target_width != resource->width0 || target_height != resource->height0)) { + BOOL failure = false; + struct pipe_resource *new_resource[This->num_back_buffers]; + D3DWindowBuffer *new_handles[This->num_back_buffers]; + for (i = 0; i < This->num_back_buffers; i++) { + /* Note: if (!new_handles[i]), new_resource[i] + * gets released and contains NULL */ + create_present_buffer(This, target_width, target_height, &new_resource[i], &new_handles[i]); + if (!new_handles[i]) + failure = true; + } + if (failure) { + for (i = 0; i < This->num_back_buffers; i++) { + if (new_resource[i]) + pipe_resource_reference(&new_resource[i], NULL); + if (new_handles[i]) + D3DWindowBuffer_release(This, new_handles[i]); + } + } else { + for (i = 0; i < This->num_back_buffers; i++) { + D3DWindowBuffer_release(This, This->present_handles[i]); + This->present_handles[i] = new_handles[i]; + pipe_resource_reference(&This->present_buffers[i], new_resource[i]); + pipe_resource_reference(&new_resource[i], NULL); + } + } + } + pipe = NineDevice9_GetPipe(This->base.device); if (This->present_buffers[0]) { @@ -723,13 +819,7 @@ create_present_buffer(This, target_width, target_height, &new_resource, &new_handle); /* Switch to the new buffer */ if (new_handle) { - /* WaitBufferReleased also waits the presentation feedback, - * while IsBufferReleased doesn't. DestroyD3DWindowBuffer unfortunately - * checks it to release immediately all data, else the release - * is postponed for This->present release. To avoid leaks (we may handle - * a lot of resize), call WaitBufferReleased. */ - ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]); - ID3DPresent_DestroyD3DWindowBuffer(This->present, This->present_handles[0]); + D3DWindowBuffer_release(This, This->present_handles[0]); This->present_handles[0] = new_handle; pipe_resource_reference(&This->present_buffers[0], new_resource); pipe_resource_reference(&new_resource, NULL); @@ -817,6 +907,7 @@ struct pipe_resource *res = NULL; D3DWindowBuffer *handle_temp; struct threadpool_task *task_temp; + BOOL *pending_presentation_temp; int i; HRESULT hr; @@ -850,14 +941,14 @@ if (This->base.device->minor_version_num > 2 && This->params.SwapEffect == D3DSWAPEFFECT_DISCARD && - This->params.PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE && - !This->actx->thread_submit) { + This->params.PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE) { int next_buffer = -1; while (next_buffer == -1) { /* Find a free backbuffer */ for (i = 1; i < This->num_back_buffers; i++) { - if (ID3DPresent_IsBufferReleased(This->present, This->present_handles[i])) { + if (!p_atomic_read(This->pending_presentation[i]) && + ID3DPresent_IsBufferReleased(This->present, This->present_handles[i])) { DBG("Found buffer released: %d\n", i); next_buffer = i; break; @@ -868,6 +959,17 @@ ID3DPresent_WaitBufferReleaseEvent(This->present); } } + + /* Free the task (we already checked it is finished) */ + if (This->tasks[next_buffer]) + _mesa_threadpool_wait_for_task(This->pool, &(This->tasks[next_buffer])); + assert(!*This->pending_presentation[next_buffer] && !This->tasks[next_buffer]); + This->tasks[next_buffer] = This->tasks[0]; + This->tasks[0] = NULL; + pending_presentation_temp = This->pending_presentation[next_buffer]; + This->pending_presentation[next_buffer] = This->pending_presentation[0]; + This->pending_presentation[0] = pending_presentation_temp; + /* Switch with the released buffer */ pipe_resource_reference(&res, This->buffers[0]->base.resource); NineSurface9_SetResourceResize( @@ -886,9 +988,6 @@ handle_temp = This->present_handles[0]; This->present_handles[0] = This->present_handles[next_buffer]; This->present_handles[next_buffer] = handle_temp; - - /* Path not yet compatible with thread_submit */ - assert(!This->tasks[0] && !This->tasks[next_buffer]); } else { switch (This->params.SwapEffect) { case D3DSWAPEFFECT_OVERLAY: /* Not implemented, fallback to FLIP */ @@ -923,6 +1022,11 @@ This->tasks[i-1] = This->tasks[i]; } This->tasks[This->num_back_buffers - 1] = task_temp; + pending_presentation_temp = This->pending_presentation[0]; + for (i = 1; i < This->num_back_buffers; i++) { + This->pending_presentation[i-1] = This->pending_presentation[i]; + } + This->pending_presentation[This->num_back_buffers - 1] = pending_presentation_temp; break; case D3DSWAPEFFECT_COPY: @@ -932,6 +1036,7 @@ if (This->tasks[0]) _mesa_threadpool_wait_for_task(This->pool, &(This->tasks[0])); + assert(!*This->pending_presentation[0]); ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]); } @@ -1159,15 +1264,17 @@ * without releasing them: * . Buffer on screen. * . Buffer scheduled kernel side to be next on screen. - * . Last buffer sent. - * For some reasons, 5 buffers are actually needed, because in - * case a pageflip is missed because rendering wasn't finished, - * the Xserver will hold 4 buffers. */ - if (!This->actx->thread_submit && - This->base.device->minor_version_num > 2 && - pParams->PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE && - count < 5) - count = 5; + * . Last buffer sent. */ + if (This->base.device->minor_version_num > 2 && + pParams->PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE) { + if (This->actx->thread_submit && count < 4) + count = 4; + /* When thread_submit is not used, 5 buffers are actually needed, + * because in case a pageflip is missed because rendering wasn't finished, + * the Xserver will hold 4 buffers. */ + else if (!This->actx->thread_submit && count < 5) + count = 5; + } } return count; diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/swapchain9.h mesa-19.0.1/src/gallium/state_trackers/nine/swapchain9.h --- mesa-18.3.3/src/gallium/state_trackers/nine/swapchain9.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/swapchain9.h 2019-03-31 23:16:37.000000000 +0000 @@ -57,6 +57,7 @@ struct NineSurface9 *buffers[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1]; /* 0 to BackBufferCount-1 : the back buffers. BackBufferCount : additional buffer */ struct pipe_resource *present_buffers[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1]; D3DWindowBuffer *present_handles[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1]; + D3DWindowBuffer *present_handles_pending_release[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1]; struct pipe_fence_handle *swap_fences[DRI_SWAP_FENCES_MAX]; unsigned int cur_fences; @@ -72,6 +73,7 @@ struct threadpool *pool; struct threadpool_task *tasks[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1]; + BOOL *pending_presentation[D3DPRESENT_BACK_BUFFERS_MAX_EX + 1]; BOOL enable_threadpool; }; diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/threadpool.c mesa-19.0.1/src/gallium/state_trackers/nine/threadpool.c --- mesa-18.3.3/src/gallium/state_trackers/nine/threadpool.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/threadpool.c 2019-03-31 23:16:37.000000000 +0000 @@ -52,10 +52,8 @@ while (!pool->workqueue && !pool->shutdown) pthread_cond_wait(&pool->new_work, &pool->m); - if (pool->shutdown) { - pthread_mutex_unlock(&pool->m); - return NULL; - } + if (pool->shutdown) + break; /* Pull the first task from the list. We don't free it -- it now lacks * a reference other than the worker creator's, whose responsibility it diff -Nru mesa-18.3.3/src/gallium/state_trackers/nine/volume9.c mesa-19.0.1/src/gallium/state_trackers/nine/volume9.c --- mesa-18.3.3/src/gallium/state_trackers/nine/volume9.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/nine/volume9.c 2019-03-31 23:16:37.000000000 +0000 @@ -186,7 +186,7 @@ NineVolume9_MarkContainerDirty( struct NineVolume9 *This ) { struct NineBaseTexture9 *tex; -#ifdef DEBUG +#if defined(DEBUG) || !defined(NDEBUG) /* This is always contained by a NineVolumeTexture9. */ GUID id = IID_IDirect3DVolumeTexture9; REFIID ref = &id; diff -Nru mesa-18.3.3/src/gallium/state_trackers/va/context.c mesa-19.0.1/src/gallium/state_trackers/va/context.c --- mesa-18.3.3/src/gallium/state_trackers/va/context.c 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/va/context.c 2019-03-31 23:16:37.000000000 +0000 @@ -175,7 +175,7 @@ ctx->version_minor = 1; *ctx->vtable = vtable; *ctx->vtable_vpp = vtable_vpp; - ctx->max_profiles = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH - PIPE_VIDEO_PROFILE_UNKNOWN; + ctx->max_profiles = PIPE_VIDEO_PROFILE_MAX - PIPE_VIDEO_PROFILE_UNKNOWN - 1; ctx->max_entrypoints = 2; ctx->max_attributes = 1; ctx->max_image_formats = VL_VA_MAX_IMAGE_FORMATS; diff -Nru mesa-18.3.3/src/gallium/state_trackers/va/picture_vp9.c mesa-19.0.1/src/gallium/state_trackers/va/picture_vp9.c --- mesa-18.3.3/src/gallium/state_trackers/va/picture_vp9.c 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/va/picture_vp9.c 2019-03-31 23:16:37.000000000 +0000 @@ -28,6 +28,8 @@ #include "vl/vl_vlc.h" #include "va_private.h" +#define NUM_VP9_REFS 8 + void vlVaHandlePictureParameterBufferVP9(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf) { VADecPictureParameterBufferVP9 *vp9 = buf->data; @@ -79,8 +81,11 @@ context->desc.vp9.picture_parameter.bit_depth = vp9->bit_depth; - for (i = 0 ; i < 8 ; i++) + for (i = 0 ; i < NUM_VP9_REFS ; i++) vlVaGetReferenceFrame(drv, vp9->reference_frames[i], &context->desc.vp9.ref[i]); + + if (!context->decoder && !context->templat.max_references) + context->templat.max_references = NUM_VP9_REFS; } void vlVaHandleSliceParameterBufferVP9(vlVaContext *context, vlVaBuffer *buf) diff -Nru mesa-18.3.3/src/gallium/state_trackers/va/surface.c mesa-19.0.1/src/gallium/state_trackers/va/surface.c --- mesa-18.3.3/src/gallium/state_trackers/va/surface.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/va/surface.c 2019-03-31 23:16:37.000000000 +0000 @@ -146,9 +146,40 @@ VAStatus vlVaQuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfaceStatus *status) { + vlVaDriver *drv; + vlVaSurface *surf; + vlVaContext *context; + if (!ctx) return VA_STATUS_ERROR_INVALID_CONTEXT; + drv = VL_VA_DRIVER(ctx); + if (!drv) + return VA_STATUS_ERROR_INVALID_CONTEXT; + + mtx_lock(&drv->mutex); + + surf = handle_table_get(drv->htab, render_target); + if (!surf || !surf->buffer) { + mtx_unlock(&drv->mutex); + return VA_STATUS_ERROR_INVALID_SURFACE; + } + + context = handle_table_get(drv->htab, surf->ctx); + if (!context) { + mtx_unlock(&drv->mutex); + return VA_STATUS_ERROR_INVALID_CONTEXT; + } + + if (context->decoder->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { + if(surf->feedback == NULL) + *status=VASurfaceReady; + else + *status=VASurfaceRendering; + } + + mtx_unlock(&drv->mutex); + return VA_STATUS_SUCCESS; } diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/meson.build mesa-19.0.1/src/gallium/state_trackers/xa/meson.build --- mesa-18.3.3/src/gallium/state_trackers/xa/meson.build 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xa/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -18,7 +18,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -xa_version = ['2', '4', '0'] +xa_version = ['2', '5', '0'] xa_conf = configuration_data() xa_conf.set('XA_MAJOR', xa_version[0]) diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_composite.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_composite.c --- mesa-18.3.3/src/gallium/state_trackers/xa/xa_composite.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_composite.c 2019-03-31 23:16:37.000000000 +0000 @@ -112,12 +112,6 @@ boolean supported = FALSE; /* - * Temporarily disable component alpha since it appears buggy. - */ - if (mask_pic && mask_pic->component_alpha) - return FALSE; - - /* * our default in case something goes wrong */ *blend = xa_blends[XA_BLEND_OP_OVER]; @@ -126,9 +120,16 @@ if (xa_blends[i].op == op) { *blend = xa_blends[i]; supported = TRUE; + break; } } + /* + * No component alpha yet. + */ + if (mask_pic && mask_pic->component_alpha && blend->alpha_src) + return FALSE; + if (!dst_pic->srf) return supported; @@ -150,21 +151,6 @@ blend->rgb_src = PIPE_BLENDFACTOR_ZERO; } - /* - * If the source alpha is being used, then we should only be in a case where - * the source blend factor is 0, and the source blend value is the mask - * channels multiplied by the source picture's alpha. - */ - if (mask_pic && mask_pic->component_alpha && - xa_format_rgb(mask_pic->pict_format) && - blend->alpha_src) { - if (blend->rgb_dst == PIPE_BLENDFACTOR_SRC_ALPHA) { - blend->rgb_dst = PIPE_BLENDFACTOR_SRC_COLOR; - } else if (blend->rgb_dst == PIPE_BLENDFACTOR_INV_SRC_ALPHA) { - blend->rgb_dst = PIPE_BLENDFACTOR_INV_SRC_COLOR; - } - } - return supported; } @@ -214,43 +200,53 @@ return 1; } +/** + * xa_src_pict_is_accelerated - Check whether we support acceleration + * of the given src_pict type + * + * \param src_pic[in]: Pointer to a union xa_source_pict to check. + * + * \returns TRUE if accelerated, FALSE otherwise. + */ +static boolean +xa_src_pict_is_accelerated(const union xa_source_pict *src_pic) +{ + if (!src_pic) + return TRUE; + + if (src_pic->type == xa_src_pict_solid_fill || + src_pic->type == xa_src_pict_float_solid_fill) + return TRUE; + + return FALSE; +} + XA_EXPORT int xa_composite_check_accelerated(const struct xa_composite *comp) { - struct xa_composite_blend blend; struct xa_picture *src_pic = comp->src; + struct xa_picture *mask_pic = comp->mask; + struct xa_composite_blend blend; if (!xa_is_filter_accelerated(src_pic) || !xa_is_filter_accelerated(comp->mask)) { return -XA_ERR_INVAL; } + if (!xa_src_pict_is_accelerated(src_pic->src_pict) || + (mask_pic && !xa_src_pict_is_accelerated(mask_pic->src_pict))) + return -XA_ERR_INVAL; - if (src_pic->src_pict) { - if (src_pic->src_pict->type != xa_src_pict_solid_fill) - return -XA_ERR_INVAL; - - /* - * Currently we don't support solid fill with a mask. - * We can easily do that, but that would require shader, - * sampler view setup and vertex setup modification. - */ - if (comp->mask) - return -XA_ERR_INVAL; - } - - if (blend_for_op(&blend, comp->op, comp->src, comp->mask, comp->dst)) { - struct xa_picture *mask = comp->mask; - if (mask && mask->component_alpha && - xa_format_rgb(mask->pict_format)) { - if (blend.alpha_src && blend.rgb_src != PIPE_BLENDFACTOR_ZERO) { - return -XA_ERR_INVAL; - } - } + if (!blend_for_op(&blend, comp->op, comp->src, comp->mask, comp->dst)) + return -XA_ERR_INVAL; - return XA_ERR_NONE; - } - return -XA_ERR_INVAL; + /* + * No component alpha yet. + */ + if (mask_pic && mask_pic->component_alpha && blend.alpha_src) + return -XA_ERR_INVAL; + + return XA_ERR_NONE; } static int @@ -293,7 +289,7 @@ src_hw_format = xa_surface_format(src); src_pic_format = src_pic->pict_format; - set_alpha = (xa_format_type_is_color(src_pic_format) && + set_alpha = (xa_format_type_is_color(src_hw_format) && xa_format_a(src_pic_format) == 0); if (set_alpha) @@ -324,6 +320,61 @@ return ret; } +static void +xa_src_in_mask(float src[4], const float mask[4]) +{ + src[0] *= mask[3]; + src[1] *= mask[3]; + src[2] *= mask[3]; + src[3] *= mask[3]; +} + +/** + * xa_handle_src_pict - Set up xa_context state and fragment shader + * input based on scr_pict type + * + * \param ctx[in, out]: Pointer to the xa context. + * \param src_pict[in]: Pointer to the union xa_source_pict to consider. + * \param is_mask[in]: Whether we're considering a mask picture. + * + * \returns TRUE if succesful, FALSE otherwise. + * + * This function computes some xa_context state used to determine whether + * to upload the solid color and also the solid color itself used as an input + * to the fragment shader. + */ +static boolean +xa_handle_src_pict(struct xa_context *ctx, + const union xa_source_pict *src_pict, + boolean is_mask) +{ + float solid_color[4]; + + switch(src_pict->type) { + case xa_src_pict_solid_fill: + xa_pixel_to_float4(src_pict->solid_fill.color, solid_color); + break; + case xa_src_pict_float_solid_fill: + memcpy(solid_color, src_pict->float_solid_fill.color, + sizeof(solid_color)); + break; + default: + return FALSE; + } + + if (is_mask && ctx->has_solid_src) + xa_src_in_mask(ctx->solid_color, solid_color); + else + memcpy(ctx->solid_color, solid_color, sizeof(solid_color)); + + if (is_mask) + ctx->has_solid_mask = TRUE; + else + ctx->has_solid_src = TRUE; + + return TRUE; +} + static int bind_shaders(struct xa_context *ctx, const struct xa_composite *comp) { @@ -331,48 +382,54 @@ struct xa_shader shader; struct xa_picture *src_pic = comp->src; struct xa_picture *mask_pic = comp->mask; + struct xa_picture *dst_pic = comp->dst; + + ctx->has_solid_src = FALSE; + ctx->has_solid_mask = FALSE; - ctx->has_solid_color = FALSE; + if (dst_pic && xa_format_type(dst_pic->pict_format) != + xa_format_type(xa_surface_format(dst_pic->srf))) + return -XA_ERR_INVAL; if (src_pic) { if (src_pic->wrap == xa_wrap_clamp_to_border && src_pic->has_transform) fs_traits |= FS_SRC_REPEAT_NONE; - if (src_pic->src_pict) { - if (src_pic->src_pict->type == xa_src_pict_solid_fill) { - fs_traits |= FS_SOLID_FILL | FS_FILL; - vs_traits |= VS_SOLID_FILL; - xa_pixel_to_float4(src_pic->src_pict->solid_fill.color, - ctx->solid_color); - ctx->has_solid_color = TRUE; - } - } else { - fs_traits |= FS_COMPOSITE; - vs_traits |= VS_COMPOSITE; - } + fs_traits |= FS_COMPOSITE; + vs_traits |= VS_COMPOSITE; - fs_traits |= picture_format_fixups(src_pic, 0); + if (src_pic->src_pict) { + if (!xa_handle_src_pict(ctx, src_pic->src_pict, false)) + return -XA_ERR_INVAL; + fs_traits |= FS_SRC_SRC; + vs_traits |= VS_SRC_SRC; + } else + fs_traits |= picture_format_fixups(src_pic, 0); } if (mask_pic) { vs_traits |= VS_MASK; fs_traits |= FS_MASK; - if (mask_pic->wrap == xa_wrap_clamp_to_border && - mask_pic->has_transform) - fs_traits |= FS_MASK_REPEAT_NONE; - - if (mask_pic->component_alpha) { - struct xa_composite_blend blend; - if (!blend_for_op(&blend, comp->op, src_pic, mask_pic, NULL)) - return -XA_ERR_INVAL; - - if (blend.alpha_src) { - fs_traits |= FS_CA_SRCALPHA; - } else - fs_traits |= FS_CA_FULL; - } + if (mask_pic->component_alpha) + fs_traits |= FS_CA; + if (mask_pic->src_pict) { + if (!xa_handle_src_pict(ctx, mask_pic->src_pict, true)) + return -XA_ERR_INVAL; + + if (ctx->has_solid_src) { + vs_traits &= ~VS_MASK; + fs_traits &= ~FS_MASK; + } else { + vs_traits |= VS_MASK_SRC; + fs_traits |= FS_MASK_SRC; + } + } else { + if (mask_pic->wrap == xa_wrap_clamp_to_border && + mask_pic->has_transform) + fs_traits |= FS_MASK_REPEAT_NONE; - fs_traits |= picture_format_fixups(mask_pic, 1); + fs_traits |= picture_format_fixups(mask_pic, 1); + } } if (ctx->srf->format == PIPE_FORMAT_L8_UNORM || @@ -396,42 +453,35 @@ struct pipe_context *pipe = ctx->pipe; struct xa_picture *src_pic = comp->src; struct xa_picture *mask_pic = comp->mask; + int num_samplers = 0; - ctx->num_bound_samplers = 0; - + xa_ctx_sampler_views_destroy(ctx); memset(&src_sampler, 0, sizeof(struct pipe_sampler_state)); memset(&mask_sampler, 0, sizeof(struct pipe_sampler_state)); - if (src_pic) { - if (ctx->has_solid_color) { - samplers[0] = NULL; - pipe_sampler_view_reference(&ctx->bound_sampler_views[0], NULL); - } else { - unsigned src_wrap = xa_repeat_to_gallium(src_pic->wrap); - int filter; - - (void) xa_filter_to_gallium(src_pic->filter, &filter); - - src_sampler.wrap_s = src_wrap; - src_sampler.wrap_t = src_wrap; - src_sampler.min_img_filter = filter; - src_sampler.mag_img_filter = filter; - src_sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; - src_sampler.normalized_coords = 1; - samplers[0] = &src_sampler; - ctx->num_bound_samplers = 1; - u_sampler_view_default_template(&view_templ, - src_pic->srf->tex, - src_pic->srf->tex->format); - src_view = pipe->create_sampler_view(pipe, src_pic->srf->tex, - &view_templ); - pipe_sampler_view_reference(&ctx->bound_sampler_views[0], NULL); - ctx->bound_sampler_views[0] = src_view; - } + if (src_pic && !ctx->has_solid_src) { + unsigned src_wrap = xa_repeat_to_gallium(src_pic->wrap); + int filter; + + (void) xa_filter_to_gallium(src_pic->filter, &filter); + + src_sampler.wrap_s = src_wrap; + src_sampler.wrap_t = src_wrap; + src_sampler.min_img_filter = filter; + src_sampler.mag_img_filter = filter; + src_sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + src_sampler.normalized_coords = 1; + samplers[0] = &src_sampler; + u_sampler_view_default_template(&view_templ, + src_pic->srf->tex,+ src_pic->srf->tex->format); + src_view = pipe->create_sampler_view(pipe, src_pic->srf->tex, + &view_templ); + ctx->bound_sampler_views[0] = src_view; + num_samplers++; } - if (mask_pic) { - unsigned mask_wrap = xa_repeat_to_gallium(mask_pic->wrap); + if (mask_pic && !ctx->has_solid_mask) { + unsigned mask_wrap = xa_repeat_to_gallium(mask_pic->wrap); int filter; (void) xa_filter_to_gallium(mask_pic->filter, &filter); @@ -442,31 +492,21 @@ mask_sampler.mag_img_filter = filter; src_sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; mask_sampler.normalized_coords = 1; - samplers[1] = &mask_sampler; - ctx->num_bound_samplers = 2; + samplers[num_samplers] = &mask_sampler; u_sampler_view_default_template(&view_templ, mask_pic->srf->tex, mask_pic->srf->tex->format); src_view = pipe->create_sampler_view(pipe, mask_pic->srf->tex, &view_templ); - pipe_sampler_view_reference(&ctx->bound_sampler_views[1], NULL); - ctx->bound_sampler_views[1] = src_view; - - - /* - * If src is a solid color, we have no src view, so set up a - * dummy one that will not be used anyway. - */ - if (ctx->bound_sampler_views[0] == NULL) - pipe_sampler_view_reference(&ctx->bound_sampler_views[0], - src_view); - + ctx->bound_sampler_views[num_samplers] = src_view; + num_samplers++; } - cso_set_samplers(ctx->cso, PIPE_SHADER_FRAGMENT, ctx->num_bound_samplers, + cso_set_samplers(ctx->cso, PIPE_SHADER_FRAGMENT, num_samplers, (const struct pipe_sampler_state **)samplers); - cso_set_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT, ctx->num_bound_samplers, + cso_set_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT, num_samplers, ctx->bound_sampler_views); + ctx->num_bound_samplers = num_samplers; } XA_EXPORT int @@ -476,9 +516,6 @@ struct xa_surface *dst_srf = comp->dst->srf; int ret; - if (comp->mask && !comp->mask->srf) - return -XA_ERR_INVAL; - ret = xa_ctx_srf_create(ctx, dst_srf); if (ret != XA_ERR_NONE) return ret; @@ -511,8 +548,8 @@ int dstX, int dstY, int width, int height) { if (ctx->num_bound_samplers == 0 ) { /* solid fill */ - renderer_solid(ctx, dstX, dstY, dstX + width, dstY + height, - ctx->solid_color); + xa_scissor_update(ctx, dstX, dstY, dstX + width, dstY + height); + renderer_solid(ctx, dstX, dstY, dstX + width, dstY + height); } else { const struct xa_composite *comp = ctx->comp; int pos[6] = {srcX, srcY, maskX, maskY, dstX, dstY}; @@ -537,7 +574,8 @@ renderer_draw_flush(ctx); ctx->comp = NULL; - ctx->has_solid_color = FALSE; + ctx->has_solid_src = FALSE; + ctx->has_solid_mask = FALSE; xa_ctx_sampler_views_destroy(ctx); } diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_composite.h mesa-19.0.1/src/gallium/state_trackers/xa/xa_composite.h --- mesa-18.3.3/src/gallium/state_trackers/xa/xa_composite.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_composite.h 2019-03-31 23:16:37.000000000 +0000 @@ -74,18 +74,34 @@ * Src picture types. */ enum xa_composite_src_pict_type { - xa_src_pict_solid_fill + xa_src_pict_solid_fill, + xa_src_pict_float_solid_fill }; + +/* + * struct xa_pict_solid_fill - Description of a solid_fill picture + * Deprecated. Use struct xa_pict_float_solid_fill instead. + */ struct xa_pict_solid_fill { enum xa_composite_src_pict_type type; unsigned int class; uint32_t color; }; +/* + * struct xa_pict_solid_fill - Description of a solid_fill picture + * with color channels represented by floats. + */ +struct xa_pict_float_solid_fill { + enum xa_composite_src_pict_type type; + float color[4]; /* R, G, B, A */ +}; + union xa_source_pict { - unsigned int type; + enum xa_composite_src_pict_type type; struct xa_pict_solid_fill solid_fill; + struct xa_pict_float_solid_fill float_solid_fill; }; struct xa_picture { diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_context.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_context.c --- mesa-18.3.3/src/gallium/state_trackers/xa/xa_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -309,7 +309,7 @@ xa_pixel_to_float4_a8(fg, ctx->solid_color); else xa_pixel_to_float4(fg, ctx->solid_color); - ctx->has_solid_color = 1; + ctx->has_solid_src = 1; ctx->dst = dst; @@ -321,8 +321,8 @@ exa->solid_color[2], exa->solid_color[3]); #endif - vs_traits = VS_SOLID_FILL; - fs_traits = FS_SOLID_FILL; + vs_traits = VS_SRC_SRC | VS_COMPOSITE; + fs_traits = FS_SRC_SRC | VS_COMPOSITE; renderer_bind_destination(ctx, ctx->srf); bind_solid_blend_state(ctx); @@ -343,7 +343,7 @@ xa_solid(struct xa_context *ctx, int x, int y, int width, int height) { xa_scissor_update(ctx, x, y, x + width, y + height); - renderer_solid(ctx, x, y, x + width, y + height, ctx->solid_color); + renderer_solid(ctx, x, y, x + width, y + height); } XA_EXPORT void @@ -351,7 +351,7 @@ { renderer_draw_flush(ctx); ctx->comp = NULL; - ctx->has_solid_color = FALSE; + ctx->has_solid_src = FALSE; ctx->num_bound_samplers = 0; } diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_priv.h mesa-19.0.1/src/gallium/state_trackers/xa/xa_priv.h --- mesa-18.3.3/src/gallium/state_trackers/xa/xa_priv.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_priv.h 2019-03-31 23:16:37.000000000 +0000 @@ -113,7 +113,8 @@ int simple_copy; - int has_solid_color; + int has_solid_src; + int has_solid_mask; float solid_color[4]; unsigned int num_bound_samplers; @@ -145,35 +146,27 @@ enum xa_vs_traits { VS_COMPOSITE = 1 << 0, VS_MASK = 1 << 1, - VS_SOLID_FILL = 1 << 2, - VS_LINGRAD_FILL = 1 << 3, - VS_RADGRAD_FILL = 1 << 4, - VS_YUV = 1 << 5, - - VS_FILL = (VS_SOLID_FILL | VS_LINGRAD_FILL | VS_RADGRAD_FILL) + VS_SRC_SRC = 1 << 2, + VS_MASK_SRC = 1 << 3, + VS_YUV = 1 << 4, }; enum xa_fs_traits { FS_COMPOSITE = 1 << 0, FS_MASK = 1 << 1, - FS_SOLID_FILL = 1 << 2, - FS_LINGRAD_FILL = 1 << 3, - FS_RADGRAD_FILL = 1 << 4, - FS_CA_FULL = 1 << 5, /* src.rgba * mask.rgba */ - FS_CA_SRCALPHA = 1 << 6, /* src.aaaa * mask.rgba */ - FS_YUV = 1 << 7, - FS_SRC_REPEAT_NONE = 1 << 8, - FS_MASK_REPEAT_NONE = 1 << 9, - FS_SRC_SWIZZLE_RGB = 1 << 10, - FS_MASK_SWIZZLE_RGB = 1 << 11, - FS_SRC_SET_ALPHA = 1 << 12, - FS_MASK_SET_ALPHA = 1 << 13, - FS_SRC_LUMINANCE = 1 << 14, - FS_MASK_LUMINANCE = 1 << 15, - FS_DST_LUMINANCE = 1 << 16, - - FS_FILL = (FS_SOLID_FILL | FS_LINGRAD_FILL | FS_RADGRAD_FILL), - FS_COMPONENT_ALPHA = (FS_CA_FULL | FS_CA_SRCALPHA) + FS_SRC_SRC = 1 << 2, + FS_MASK_SRC = 1 << 3, + FS_YUV = 1 << 4, + FS_SRC_REPEAT_NONE = 1 << 5, + FS_MASK_REPEAT_NONE = 1 << 6, + FS_SRC_SWIZZLE_RGB = 1 << 7, + FS_MASK_SWIZZLE_RGB = 1 << 8, + FS_SRC_SET_ALPHA = 1 << 9, + FS_MASK_SET_ALPHA = 1 << 10, + FS_SRC_LUMINANCE = 1 << 11, + FS_MASK_LUMINANCE = 1 << 12, + FS_DST_LUMINANCE = 1 << 13, + FS_CA = 1 << 14, }; struct xa_shader { @@ -282,7 +275,7 @@ void renderer_begin_solid(struct xa_context *r); void renderer_solid(struct xa_context *r, - int x0, int y0, int x1, int y1, float *color); + int x0, int y0, int x1, int y1); void renderer_begin_textures(struct xa_context *r); diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_renderer.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_renderer.c --- mesa-18.3.3/src/gallium/state_trackers/xa/xa_renderer.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_renderer.c 2019-03-31 23:16:37.000000000 +0000 @@ -46,14 +46,14 @@ int shader_type, const float *params, int param_bytes); static inline boolean -is_affine(float *matrix) +is_affine(const float *matrix) { return floatIsZero(matrix[2]) && floatIsZero(matrix[5]) && floatsEqual(matrix[8], 1); } static inline void -map_point(float *mat, float x, float y, float *out_x, float *out_y) +map_point(const float *mat, float x, float y, float *out_x, float *out_y) { if (!mat) { *out_x = x; @@ -137,7 +137,7 @@ } static inline void -add_vertex_color(struct xa_context *r, float x, float y, float color[4]) +add_vertex_none(struct xa_context *r, float x, float y) { float *vertex = r->buffer + r->buffer_size; @@ -146,12 +146,7 @@ vertex[2] = 0.f; /*z */ vertex[3] = 1.f; /*w */ - vertex[4] = color[0]; /*r */ - vertex[5] = color[1]; /*g */ - vertex[6] = color[2]; /*b */ - vertex[7] = color[3]; /*a */ - - r->buffer_size += 8; + r->buffer_size += 4; } static inline void @@ -197,47 +192,55 @@ } static void -add_vertex_data1(struct xa_context *r, - float srcX, float srcY, float dstX, float dstY, - float width, float height, - struct pipe_resource *src, const float *src_matrix) -{ - float s0, t0, s1, t1, s2, t2, s3, t3; - float pt0[2], pt1[2], pt2[2], pt3[2]; - - pt0[0] = srcX; - pt0[1] = srcY; - pt1[0] = (srcX + width); - pt1[1] = srcY; - pt2[0] = (srcX + width); - pt2[1] = (srcY + height); - pt3[0] = srcX; - pt3[1] = (srcY + height); +compute_src_coords(float sx, float sy, const struct pipe_resource *src, + const float *src_matrix, + float width, float height, + float tc0[2], float tc1[2], float tc2[2], float tc3[2]) +{ + tc0[0] = sx; + tc0[1] = sy; + tc1[0] = sx + width; + tc1[1] = sy; + tc2[0] = sx + width; + tc2[1] = sy + height; + tc3[0] = sx; + tc3[1] = sy + height; if (src_matrix) { - map_point((float *)src_matrix, pt0[0], pt0[1], &pt0[0], &pt0[1]); - map_point((float *)src_matrix, pt1[0], pt1[1], &pt1[0], &pt1[1]); - map_point((float *)src_matrix, pt2[0], pt2[1], &pt2[0], &pt2[1]); - map_point((float *)src_matrix, pt3[0], pt3[1], &pt3[0], &pt3[1]); + map_point(src_matrix, tc0[0], tc0[1], &tc0[0], &tc0[1]); + map_point(src_matrix, tc1[0], tc1[1], &tc1[0], &tc1[1]); + map_point(src_matrix, tc2[0], tc2[1], &tc2[0], &tc2[1]); + map_point(src_matrix, tc3[0], tc3[1], &tc3[0], &tc3[1]); } - s0 = pt0[0] / src->width0; - s1 = pt1[0] / src->width0; - s2 = pt2[0] / src->width0; - s3 = pt3[0] / src->width0; - t0 = pt0[1] / src->height0; - t1 = pt1[1] / src->height0; - t2 = pt2[1] / src->height0; - t3 = pt3[1] / src->height0; + tc0[0] /= src->width0; + tc1[0] /= src->width0; + tc2[0] /= src->width0; + tc3[0] /= src->width0; + tc0[1] /= src->height0; + tc1[1] /= src->height0; + tc2[1] /= src->height0; + tc3[1] /= src->height0; +} + +static void +add_vertex_data1(struct xa_context *r, + float srcX, float srcY, float dstX, float dstY, + float width, float height, + const struct pipe_resource *src, const float *src_matrix) +{ + float tc0[2], tc1[2], tc2[2], tc3[2]; + compute_src_coords(srcX, srcY, src, src_matrix, width, height, + tc0, tc1, tc2, tc3); /* 1st vertex */ - add_vertex_1tex(r, dstX, dstY, s0, t0); + add_vertex_1tex(r, dstX, dstY, tc0[0], tc0[1]); /* 2nd vertex */ - add_vertex_1tex(r, dstX + width, dstY, s1, t1); + add_vertex_1tex(r, dstX + width, dstY, tc1[0], tc1[1]); /* 3rd vertex */ - add_vertex_1tex(r, dstX + width, dstY + height, s2, t2); + add_vertex_1tex(r, dstX + width, dstY + height, tc2[0], tc2[1]); /* 4th vertex */ - add_vertex_1tex(r, dstX, dstY + height, s3, t3); + add_vertex_1tex(r, dstX, dstY + height, tc3[0], tc3[1]); } static void @@ -248,53 +251,26 @@ struct pipe_resource *mask, const float *src_matrix, const float *mask_matrix) { - float src_s0, src_t0, src_s1, src_t1; - float mask_s0, mask_t0, mask_s1, mask_t1; - float spt0[2], spt1[2]; - float mpt0[2], mpt1[2]; - - spt0[0] = srcX; - spt0[1] = srcY; - spt1[0] = srcX + width; - spt1[1] = srcY + height; - - mpt0[0] = maskX; - mpt0[1] = maskY; - mpt1[0] = maskX + width; - mpt1[1] = maskY + height; - - if (src_matrix) { - map_point((float *)src_matrix, spt0[0], spt0[1], &spt0[0], &spt0[1]); - map_point((float *)src_matrix, spt1[0], spt1[1], &spt1[0], &spt1[1]); - } + float spt0[2], spt1[2], spt2[2], spt3[2]; + float mpt0[2], mpt1[2], mpt2[2], mpt3[2]; - if (mask_matrix) { - map_point((float *)mask_matrix, mpt0[0], mpt0[1], &mpt0[0], &mpt0[1]); - map_point((float *)mask_matrix, mpt1[0], mpt1[1], &mpt1[0], &mpt1[1]); - } - - src_s0 = spt0[0] / src->width0; - src_t0 = spt0[1] / src->height0; - src_s1 = spt1[0] / src->width0; - src_t1 = spt1[1] / src->height0; - - mask_s0 = mpt0[0] / mask->width0; - mask_t0 = mpt0[1] / mask->height0; - mask_s1 = mpt1[0] / mask->width0; - mask_t1 = mpt1[1] / mask->height0; + compute_src_coords(srcX, srcY, src, src_matrix, width, height, + spt0, spt1, spt2, spt3); + compute_src_coords(maskX, maskY, mask, mask_matrix, width, height, + mpt0, mpt1, mpt2, mpt3); /* 1st vertex */ add_vertex_2tex(r, dstX, dstY, - src_s0, src_t0, mask_s0, mask_t0); + spt0[0], spt0[1], mpt0[0], mpt0[1]); /* 2nd vertex */ add_vertex_2tex(r, dstX + width, dstY, - src_s1, src_t0, mask_s1, mask_t0); + spt1[0], spt1[1], mpt1[0], mpt1[1]); /* 3rd vertex */ add_vertex_2tex(r, dstX + width, dstY + height, - src_s1, src_t1, mask_s1, mask_t1); + spt2[0], spt2[1], mpt2[0], mpt2[1]); /* 4th vertex */ add_vertex_2tex(r, dstX, dstY + height, - src_s0, src_t1, mask_s0, mask_t1); + spt3[0], spt3[1], mpt3[0], mpt3[1]); } static void @@ -554,27 +530,29 @@ renderer_begin_solid(struct xa_context *r) { r->buffer_size = 0; - r->attrs_per_vertex = 2; + r->attrs_per_vertex = 1; + renderer_set_constants(r, PIPE_SHADER_FRAGMENT, r->solid_color, + 4 * sizeof(float)); } void renderer_solid(struct xa_context *r, - int x0, int y0, int x1, int y1, float *color) + int x0, int y0, int x1, int y1) { /* * debug_printf("solid rect[(%d, %d), (%d, %d)], rgba[%f, %f, %f, %f]\n", * x0, y0, x1, y1, color[0], color[1], color[2], color[3]); */ - renderer_draw_conditional(r, 4 * 8); + renderer_draw_conditional(r, 4 * 4); /* 1st vertex */ - add_vertex_color(r, x0, y0, color); + add_vertex_none(r, x0, y0); /* 2nd vertex */ - add_vertex_color(r, x1, y0, color); + add_vertex_none(r, x1, y0); /* 3rd vertex */ - add_vertex_color(r, x1, y1, color); + add_vertex_none(r, x1, y1); /* 4th vertex */ - add_vertex_color(r, x0, y1, color); + add_vertex_none(r, x0, y1); } void @@ -588,6 +566,9 @@ { r->attrs_per_vertex = 1 + r->num_bound_samplers; r->buffer_size = 0; + if (r->has_solid_src || r->has_solid_mask) + renderer_set_constants(r, PIPE_SHADER_FRAGMENT, r->solid_color, + 4 * sizeof(float)); } void @@ -617,11 +598,19 @@ switch(r->attrs_per_vertex) { case 2: renderer_draw_conditional(r, 4 * 8); - add_vertex_data1(r, - pos[0], pos[1], /* src */ - pos[4], pos[5], /* dst */ - width, height, - sampler_view[0]->texture, src_matrix); + if (!r->has_solid_src) { + add_vertex_data1(r, + pos[0], pos[1], /* src */ + pos[4], pos[5], /* dst */ + width, height, + sampler_view[0]->texture, src_matrix); + } else { + add_vertex_data1(r, + pos[2], pos[3], /* mask */ + pos[4], pos[5], /* dst */ + width, height, + sampler_view[0]->texture, mask_matrix); + } break; case 3: renderer_draw_conditional(r, 4 * 12); diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_tgsi.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_tgsi.c --- mesa-18.3.3/src/gallium/state_trackers/xa/xa_tgsi.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_tgsi.c 2019-03-31 23:16:37.000000000 +0000 @@ -48,19 +48,18 @@ * CONST[1] = (-1, -1, 0, 0) * * OUT[0] = vertex pos - * OUT[1] = src tex coord | solid fill color + * OUT[1] = src tex coord * OUT[2] = mask tex coord * OUT[3] = dst tex coord */ -/* Fragment shader: - * SAMP[0] = src - * SAMP[1] = mask - * SAMP[2] = dst - * IN[0] = pos src | solid fill color - * IN[1] = pos mask - * IN[2] = pos dst - * CONST[0] = (0, 0, 0, 1) +/* Fragment shader. Samplers are allocated when needed. + * SAMP[0] = sampler for first texture (src or mask if src is solid) + * SAMP[1] = sampler for second texture (mask or none) + * IN[0] = first texture coordinates if present + * IN[1] = second texture coordinates if present + * CONST[0] = Solid color (src if src solid or mask if mask solid + * or src in mask if both solid). * * OUT[0] = color */ @@ -71,21 +70,19 @@ const char *strings[] = { "FS_COMPOSITE", /* = 1 << 0, */ "FS_MASK", /* = 1 << 1, */ - "FS_SOLID_FILL", /* = 1 << 2, */ - "FS_LINGRAD_FILL", /* = 1 << 3, */ - "FS_RADGRAD_FILL", /* = 1 << 4, */ - "FS_CA_FULL", /* = 1 << 5, *//* src.rgba * mask.rgba */ - "FS_CA_SRCALPHA", /* = 1 << 6, *//* src.aaaa * mask.rgba */ - "FS_YUV", /* = 1 << 7, */ - "FS_SRC_REPEAT_NONE", /* = 1 << 8, */ - "FS_MASK_REPEAT_NONE", /* = 1 << 9, */ - "FS_SRC_SWIZZLE_RGB", /* = 1 << 10, */ - "FS_MASK_SWIZZLE_RGB", /* = 1 << 11, */ - "FS_SRC_SET_ALPHA", /* = 1 << 12, */ - "FS_MASK_SET_ALPHA", /* = 1 << 13, */ - "FS_SRC_LUMINANCE", /* = 1 << 14, */ - "FS_MASK_LUMINANCE", /* = 1 << 15, */ - "FS_DST_LUMINANCE", /* = 1 << 15, */ + "FS_SRC_SRC", /* = 1 << 2, */ + "FS_MASK_SRC", /* = 1 << 3, */ + "FS_YUV", /* = 1 << 4, */ + "FS_SRC_REPEAT_NONE", /* = 1 << 5, */ + "FS_MASK_REPEAT_NONE", /* = 1 << 6, */ + "FS_SRC_SWIZZLE_RGB", /* = 1 << 7, */ + "FS_MASK_SWIZZLE_RGB", /* = 1 << 8, */ + "FS_SRC_SET_ALPHA", /* = 1 << 9, */ + "FS_MASK_SET_ALPHA", /* = 1 << 10, */ + "FS_SRC_LUMINANCE", /* = 1 << 11, */ + "FS_MASK_LUMINANCE", /* = 1 << 12, */ + "FS_DST_LUMINANCE", /* = 1 << 13, */ + "FS_CA", /* = 1 << 14, */ }; int i, k; @@ -111,18 +108,20 @@ struct ureg_dst dst, struct ureg_src src, struct ureg_src mask, - unsigned component_alpha, unsigned mask_luminance) + unsigned mask_luminance, boolean component_alpha) { - if (component_alpha == FS_CA_FULL) { - ureg_MUL(ureg, dst, src, mask); - } else if (component_alpha == FS_CA_SRCALPHA) { - ureg_MUL(ureg, dst, ureg_scalar(src, TGSI_SWIZZLE_W), mask); - } else { - if (mask_luminance) - ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_X)); - else - ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_W)); - } + if (mask_luminance) + if (component_alpha) { + ureg_MOV(ureg, dst, src); + ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), + src, ureg_scalar(mask, TGSI_SWIZZLE_X)); + } else { + ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_X)); + } + else if (!component_alpha) + ureg_MUL(ureg, dst, src, ureg_scalar(mask, TGSI_SWIZZLE_W)); + else + ureg_MUL(ureg, dst, src, mask); } static struct ureg_src @@ -139,125 +138,6 @@ return ret; } -static void -linear_gradient(struct ureg_program *ureg, - struct ureg_dst out, - struct ureg_src pos, - struct ureg_src sampler, - struct ureg_src coords, - struct ureg_src const0124, - struct ureg_src matrow0, - struct ureg_src matrow1, struct ureg_src matrow2) -{ - struct ureg_dst temp0 = ureg_DECL_temporary(ureg); - struct ureg_dst temp1 = ureg_DECL_temporary(ureg); - struct ureg_dst temp2 = ureg_DECL_temporary(ureg); - struct ureg_dst temp3 = ureg_DECL_temporary(ureg); - struct ureg_dst temp4 = ureg_DECL_temporary(ureg); - struct ureg_dst temp5 = ureg_DECL_temporary(ureg); - - ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos); - ureg_MOV(ureg, - ureg_writemask(temp0, TGSI_WRITEMASK_Z), - ureg_scalar(const0124, TGSI_SWIZZLE_Y)); - - ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0)); - ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0)); - ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0)); - ureg_RCP(ureg, temp3, ureg_src(temp3)); - ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3)); - ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3)); - - ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_X), ureg_src(temp1)); - ureg_MOV(ureg, ureg_writemask(temp4, TGSI_WRITEMASK_Y), ureg_src(temp2)); - - ureg_MUL(ureg, temp0, - ureg_scalar(coords, TGSI_SWIZZLE_Y), - ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_Y)); - ureg_MAD(ureg, temp1, - ureg_scalar(coords, TGSI_SWIZZLE_X), - ureg_scalar(ureg_src(temp4), TGSI_SWIZZLE_X), ureg_src(temp0)); - - ureg_MUL(ureg, temp2, ureg_src(temp1), ureg_scalar(coords, TGSI_SWIZZLE_Z)); - - ureg_TEX(ureg, out, TGSI_TEXTURE_1D, ureg_src(temp2), sampler); - - ureg_release_temporary(ureg, temp0); - ureg_release_temporary(ureg, temp1); - ureg_release_temporary(ureg, temp2); - ureg_release_temporary(ureg, temp3); - ureg_release_temporary(ureg, temp4); - ureg_release_temporary(ureg, temp5); -} - -static void -radial_gradient(struct ureg_program *ureg, - struct ureg_dst out, - struct ureg_src pos, - struct ureg_src sampler, - struct ureg_src coords, - struct ureg_src const0124, - struct ureg_src matrow0, - struct ureg_src matrow1, struct ureg_src matrow2) -{ - struct ureg_dst temp0 = ureg_DECL_temporary(ureg); - struct ureg_dst temp1 = ureg_DECL_temporary(ureg); - struct ureg_dst temp2 = ureg_DECL_temporary(ureg); - struct ureg_dst temp3 = ureg_DECL_temporary(ureg); - struct ureg_dst temp4 = ureg_DECL_temporary(ureg); - struct ureg_dst temp5 = ureg_DECL_temporary(ureg); - - ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos); - ureg_MOV(ureg, - ureg_writemask(temp0, TGSI_WRITEMASK_Z), - ureg_scalar(const0124, TGSI_SWIZZLE_Y)); - - ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0)); - ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0)); - ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0)); - ureg_RCP(ureg, temp3, ureg_src(temp3)); - ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3)); - ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3)); - - ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_X), ureg_src(temp1)); - ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_Y), ureg_src(temp2)); - - ureg_MUL(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Y), - ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y)); - ureg_MAD(ureg, temp1, - ureg_scalar(coords, TGSI_SWIZZLE_X), - ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_src(temp0)); - ureg_ADD(ureg, temp1, ureg_src(temp1), ureg_src(temp1)); - ureg_MUL(ureg, temp3, - ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y), - ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y)); - ureg_MAD(ureg, temp4, - ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), - ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_src(temp3)); - ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4))); - ureg_MUL(ureg, temp2, ureg_scalar(coords, TGSI_SWIZZLE_Z), ureg_src(temp4)); - ureg_MUL(ureg, temp0, - ureg_scalar(const0124, TGSI_SWIZZLE_W), ureg_src(temp2)); - ureg_MUL(ureg, temp3, ureg_src(temp1), ureg_src(temp1)); - ureg_ADD(ureg, temp2, ureg_src(temp3), ureg_negate(ureg_src(temp0))); - ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2))); - ureg_RCP(ureg, temp2, ureg_src(temp2)); - ureg_ADD(ureg, temp1, ureg_src(temp2), ureg_negate(ureg_src(temp1))); - ureg_ADD(ureg, temp0, - ureg_scalar(coords, TGSI_SWIZZLE_Z), - ureg_scalar(coords, TGSI_SWIZZLE_Z)); - ureg_RCP(ureg, temp0, ureg_src(temp0)); - ureg_MUL(ureg, temp2, ureg_src(temp1), ureg_src(temp0)); - ureg_TEX(ureg, out, TGSI_TEXTURE_1D, ureg_src(temp2), sampler); - - ureg_release_temporary(ureg, temp0); - ureg_release_temporary(ureg, temp1); - ureg_release_temporary(ureg, temp2); - ureg_release_temporary(ureg, temp3); - ureg_release_temporary(ureg, temp4); - ureg_release_temporary(ureg, temp5); -} - static void * create_vs(struct pipe_context *pipe, unsigned vs_traits) { @@ -265,10 +145,11 @@ struct ureg_src src; struct ureg_dst dst; struct ureg_src const0, const1; - boolean is_fill = (vs_traits & VS_FILL) != 0; boolean is_composite = (vs_traits & VS_COMPOSITE) != 0; boolean has_mask = (vs_traits & VS_MASK) != 0; boolean is_yuv = (vs_traits & VS_YUV) != 0; + boolean is_src_src = (vs_traits & VS_SRC_SRC) != 0; + boolean is_mask_src = (vs_traits & VS_MASK_SRC) != 0; unsigned input_slot = 0; ureg = ureg_create(PIPE_SHADER_VERTEX); @@ -279,8 +160,6 @@ const1 = ureg_DECL_constant(ureg, 1); /* it has to be either a fill or a composite op */ - debug_assert((is_fill ^ is_composite) ^ is_yuv); - src = ureg_DECL_vs_input(ureg, input_slot++); dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); src = vs_normalize_coords(ureg, src, const0, const1); @@ -293,21 +172,17 @@ } if (is_composite) { - src = ureg_DECL_vs_input(ureg, input_slot++); - dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0); - ureg_MOV(ureg, dst, src); - } - - if (is_fill) { - src = ureg_DECL_vs_input(ureg, input_slot++); - dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); - ureg_MOV(ureg, dst, src); - } - - if (has_mask) { - src = ureg_DECL_vs_input(ureg, input_slot++); - dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1); - ureg_MOV(ureg, dst, src); + if (!is_src_src || (has_mask && !is_mask_src)) { + src = ureg_DECL_vs_input(ureg, input_slot++); + dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0); + ureg_MOV(ureg, dst, src); + } + + if (!is_src_src && (has_mask && !is_mask_src)) { + src = ureg_DECL_vs_input(ureg, input_slot++); + dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 1); + ureg_MOV(ureg, dst, src); + } } ureg_END(ureg); @@ -383,7 +258,7 @@ struct ureg_dst dst, struct ureg_src coords, struct ureg_src sampler, - struct ureg_src imm0, + const struct ureg_src *imm0, boolean repeat_none, boolean swizzle, boolean set_alpha) { if (repeat_none) { @@ -394,11 +269,11 @@ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, - TGSI_SWIZZLE_Y), ureg_scalar(imm0, + TGSI_SWIZZLE_Y), ureg_scalar(*imm0, TGSI_SWIZZLE_X)); ureg_SLT(ureg, tmp0, ureg_swizzle(coords, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, - TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y), ureg_scalar(imm0, + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y), ureg_scalar(*imm0, TGSI_SWIZZLE_W)); ureg_MIN(ureg, tmp0, ureg_src(tmp0), ureg_src(tmp1)); ureg_MIN(ureg, tmp0, ureg_scalar(ureg_src(tmp0), TGSI_SWIZZLE_X), @@ -412,7 +287,7 @@ if (set_alpha) ureg_MOV(ureg, ureg_writemask(tmp1, TGSI_WRITEMASK_W), - ureg_scalar(imm0, TGSI_SWIZZLE_W)); + ureg_scalar(*imm0, TGSI_SWIZZLE_W)); ureg_MUL(ureg, dst, ureg_src(tmp1), ureg_src(tmp0)); ureg_release_temporary(ureg, tmp0); ureg_release_temporary(ureg, tmp1); @@ -432,7 +307,32 @@ if (set_alpha) ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), - ureg_scalar(imm0, TGSI_SWIZZLE_W)); + ureg_scalar(*imm0, TGSI_SWIZZLE_W)); + } +} + +static void +read_input(struct ureg_program *ureg, + struct ureg_dst dst, + const struct ureg_src *imm0, + boolean repeat_none, boolean swizzle, boolean set_alpha, + boolean is_src, unsigned *cur_constant, unsigned *cur_sampler) +{ + struct ureg_src input, sampler; + + if (is_src) { + input = ureg_DECL_constant(ureg, (*cur_constant)++); + ureg_MOV(ureg, dst, input); + } else { + sampler = ureg_DECL_sampler(ureg, *cur_sampler); + ureg_DECL_sampler_view(ureg, *cur_sampler, TGSI_TEXTURE_2D, + TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, + TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT); + input = ureg_DECL_fs_input(ureg, + TGSI_SEMANTIC_GENERIC, (*cur_sampler)++, + TGSI_INTERPOLATE_PERSPECTIVE); + xrender_tex(ureg, dst, input, sampler, imm0, + repeat_none, swizzle, set_alpha); } } @@ -440,18 +340,10 @@ create_fs(struct pipe_context *pipe, unsigned fs_traits) { struct ureg_program *ureg; - struct ureg_src /*dst_sampler, */ src_sampler, mask_sampler; - struct ureg_src /*dst_pos, */ src_input, mask_pos; struct ureg_dst src, mask; struct ureg_dst out; struct ureg_src imm0 = { 0 }; unsigned has_mask = (fs_traits & FS_MASK) != 0; - unsigned is_fill = (fs_traits & FS_FILL) != 0; - unsigned is_composite = (fs_traits & FS_COMPOSITE) != 0; - unsigned is_solid = (fs_traits & FS_SOLID_FILL) != 0; - unsigned is_lingrad = (fs_traits & FS_LINGRAD_FILL) != 0; - unsigned is_radgrad = (fs_traits & FS_RADGRAD_FILL) != 0; - unsigned comp_alpha_mask = fs_traits & FS_COMPONENT_ALPHA; unsigned is_yuv = (fs_traits & FS_YUV) != 0; unsigned src_repeat_none = (fs_traits & FS_SRC_REPEAT_NONE) != 0; unsigned mask_repeat_none = (fs_traits & FS_MASK_REPEAT_NONE) != 0; @@ -462,6 +354,11 @@ unsigned src_luminance = (fs_traits & FS_SRC_LUMINANCE) != 0; unsigned mask_luminance = (fs_traits & FS_MASK_LUMINANCE) != 0; unsigned dst_luminance = (fs_traits & FS_DST_LUMINANCE) != 0; + unsigned is_src_src = (fs_traits & FS_SRC_SRC) != 0; + unsigned is_mask_src = (fs_traits & FS_MASK_SRC) != 0; + boolean component_alpha = (fs_traits & FS_CA) != 0; + unsigned cur_sampler = 0; + unsigned cur_constant = 0; #if 0 print_fs_traits(fs_traits); @@ -473,9 +370,8 @@ if (ureg == NULL) return 0; - /* it has to be either a fill, a composite op or a yuv conversion */ - debug_assert((is_fill ^ is_composite) ^ is_yuv); - (void)is_yuv; + if (is_yuv) + return create_yuv_shader(pipe, ureg); out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); @@ -483,86 +379,13 @@ src_set_alpha || mask_set_alpha || src_luminance) { imm0 = ureg_imm4f(ureg, 0, 0, 0, 1); } - if (is_composite) { - src_sampler = ureg_DECL_sampler(ureg, 0); - ureg_DECL_sampler_view(ureg, 0, TGSI_TEXTURE_2D, - TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, - TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT); - src_input = ureg_DECL_fs_input(ureg, - TGSI_SEMANTIC_GENERIC, 0, - TGSI_INTERPOLATE_PERSPECTIVE); - } else if (is_fill) { - if (is_solid) - src_input = ureg_DECL_fs_input(ureg, - TGSI_SEMANTIC_COLOR, 0, - TGSI_INTERPOLATE_PERSPECTIVE); - else - src_input = ureg_DECL_fs_input(ureg, - TGSI_SEMANTIC_POSITION, 0, - TGSI_INTERPOLATE_PERSPECTIVE); - } else { - debug_assert(is_yuv); - return create_yuv_shader(pipe, ureg); - } - if (has_mask) { - mask_sampler = ureg_DECL_sampler(ureg, 1); - ureg_DECL_sampler_view(ureg, 1, TGSI_TEXTURE_2D, - TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, - TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT); - mask_pos = ureg_DECL_fs_input(ureg, - TGSI_SEMANTIC_GENERIC, 1, - TGSI_INTERPOLATE_PERSPECTIVE); - } -#if 0 /* unused right now */ - dst_sampler = ureg_DECL_sampler(ureg, 2); - ureg_DECL_sampler_view(ureg, 2, TGSI_TEXTURE_2D, - TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, - TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT); - dst_pos = ureg_DECL_fs_input(ureg, - TGSI_SEMANTIC_POSITION, 2, - TGSI_INTERPOLATE_PERSPECTIVE); -#endif + src = (has_mask || src_luminance || dst_luminance) ? + ureg_DECL_temporary(ureg) : out; + + read_input(ureg, src, &imm0, src_repeat_none, src_swizzle, + src_set_alpha, is_src_src, &cur_constant, &cur_sampler); - if (is_composite) { - if (has_mask || src_luminance || dst_luminance) - src = ureg_DECL_temporary(ureg); - else - src = out; - xrender_tex(ureg, src, src_input, src_sampler, imm0, - src_repeat_none, src_swizzle, src_set_alpha); - } else if (is_fill) { - if (is_solid) { - if (has_mask || src_luminance || dst_luminance) - src = ureg_dst(src_input); - else - ureg_MOV(ureg, out, src_input); - } else if (is_lingrad || is_radgrad) { - struct ureg_src coords, const0124, matrow0, matrow1, matrow2; - - if (has_mask || src_luminance || dst_luminance) - src = ureg_DECL_temporary(ureg); - else - src = out; - - coords = ureg_DECL_constant(ureg, 0); - const0124 = ureg_DECL_constant(ureg, 1); - matrow0 = ureg_DECL_constant(ureg, 2); - matrow1 = ureg_DECL_constant(ureg, 3); - matrow2 = ureg_DECL_constant(ureg, 4); - - if (is_lingrad) { - linear_gradient(ureg, src, - src_input, src_sampler, - coords, const0124, matrow0, matrow1, matrow2); - } else if (is_radgrad) { - radial_gradient(ureg, src, - src_input, src_sampler, - coords, const0124, matrow0, matrow1, matrow2); - } - } else - debug_assert(!"Unknown fill type!"); - } if (src_luminance) { ureg_MOV(ureg, src, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_X)); ureg_MOV(ureg, ureg_writemask(src, TGSI_WRITEMASK_XYZ), @@ -573,13 +396,12 @@ if (has_mask) { mask = ureg_DECL_temporary(ureg); - xrender_tex(ureg, mask, mask_pos, mask_sampler, imm0, - mask_repeat_none, mask_swizzle, mask_set_alpha); - /* src IN mask */ + read_input(ureg, mask, &imm0, mask_repeat_none, + mask_swizzle, mask_set_alpha, is_mask_src, &cur_constant, + &cur_sampler); src_in_mask(ureg, (dst_luminance) ? src : out, ureg_src(src), - ureg_src(mask), - comp_alpha_mask, mask_luminance); + ureg_src(mask), mask_luminance, component_alpha); ureg_release_temporary(ureg, mask); } diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_tracker.c mesa-19.0.1/src/gallium/state_trackers/xa/xa_tracker.c --- mesa-18.3.3/src/gallium/state_trackers/xa/xa_tracker.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_tracker.c 2019-03-31 23:16:37.000000000 +0000 @@ -89,6 +89,15 @@ fdesc.xa_format = xa_format; switch (xa_format) { + case xa_format_a8: + if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM, + PIPE_TEXTURE_2D, 0, 0, + stype_bind[xa_type_a] | + PIPE_BIND_RENDER_TARGET)) + fdesc.format = PIPE_FORMAT_R8_UNORM; + else + fdesc.format = PIPE_FORMAT_L8_UNORM; + break; case xa_format_a8r8g8b8: fdesc.format = PIPE_FORMAT_B8G8R8A8_UNORM; break; @@ -101,15 +110,21 @@ case xa_format_x1r5g5b5: fdesc.format = PIPE_FORMAT_B5G5R5A1_UNORM; break; - case xa_format_a8: - if (xa->screen->is_format_supported(xa->screen, PIPE_FORMAT_R8_UNORM, - PIPE_TEXTURE_2D, 0, 0, - stype_bind[xa_type_a] | - PIPE_BIND_RENDER_TARGET)) - fdesc.format = PIPE_FORMAT_R8_UNORM; - else - fdesc.format = PIPE_FORMAT_L8_UNORM; - break; + case xa_format_a4r4g4b4: + fdesc.format = PIPE_FORMAT_B4G4R4A4_UNORM; + break; + case xa_format_a2b10g10r10: + fdesc.format = PIPE_FORMAT_R10G10B10A2_UNORM; + break; + case xa_format_x2b10g10r10: + fdesc.format = PIPE_FORMAT_R10G10B10X2_UNORM; + break; + case xa_format_b8g8r8a8: + fdesc.format = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + case xa_format_b8g8r8x8: + fdesc.format = PIPE_FORMAT_X8R8G8B8_UNORM; + break; case xa_format_z24: fdesc.format = PIPE_FORMAT_Z24X8_UNORM; break; diff -Nru mesa-18.3.3/src/gallium/state_trackers/xa/xa_tracker.h.in mesa-19.0.1/src/gallium/state_trackers/xa/xa_tracker.h.in --- mesa-18.3.3/src/gallium/state_trackers/xa/xa_tracker.h.in 2017-11-23 00:32:52.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xa/xa_tracker.h.in 2019-03-31 23:16:37.000000000 +0000 @@ -126,6 +126,11 @@ xa_format_x8r8g8b8 = xa_format(32, xa_type_argb, 0, 8, 8, 8), xa_format_r5g6b5 = xa_format(16, xa_type_argb, 0, 5, 6, 5), xa_format_x1r5g5b5 = xa_format(16, xa_type_argb, 0, 5, 5, 5), + xa_format_a4r4g4b4 = xa_format(16, xa_type_argb, 4, 4, 4, 4), + xa_format_a2b10g10r10 = xa_format(32, xa_type_abgr, 2, 10, 10, 10), + xa_format_x2b10g10r10 = xa_format(32, xa_type_abgr, 0, 10, 10, 10), + xa_format_b8g8r8a8 = xa_format(32, xa_type_bgra, 8, 8, 8, 8), + xa_format_b8g8r8x8 = xa_format(32, xa_type_bgra, 0, 8, 8, 8), xa_format_z16 = xa_format_c(16, xa_type_z, 16, 0), xa_format_z32 = xa_format_c(32, xa_type_z, 32, 0), diff -Nru mesa-18.3.3/src/gallium/state_trackers/xvmc/attributes.c mesa-19.0.1/src/gallium/state_trackers/xvmc/attributes.c --- mesa-18.3.3/src/gallium/state_trackers/xvmc/attributes.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xvmc/attributes.c 2019-03-31 23:16:37.000000000 +0000 @@ -90,15 +90,15 @@ if (!attr) return XvMCBadContext; - if (strcmp(attr, XV_BRIGHTNESS)) + if (strcmp(attr, XV_BRIGHTNESS) == 0) context_priv->procamp.brightness = value / 1000.0f; - else if (strcmp(attr, XV_CONTRAST)) + else if (strcmp(attr, XV_CONTRAST) == 0) context_priv->procamp.contrast = value / 1000.0f + 1.0f; - else if (strcmp(attr, XV_SATURATION)) + else if (strcmp(attr, XV_SATURATION) == 0) context_priv->procamp.saturation = value / 1000.0f + 1.0f; - else if (strcmp(attr, XV_HUE)) + else if (strcmp(attr, XV_HUE) == 0) context_priv->procamp.hue = value / 1000.0f; - else if (strcmp(attr, XV_COLORSPACE)) + else if (strcmp(attr, XV_COLORSPACE) == 0) context_priv->color_standard = value ? VL_CSC_COLOR_STANDARD_BT_601 : VL_CSC_COLOR_STANDARD_BT_709; @@ -134,15 +134,15 @@ if (!attr) return XvMCBadContext; - if (strcmp(attr, XV_BRIGHTNESS)) + if (strcmp(attr, XV_BRIGHTNESS) == 0) *value = context_priv->procamp.brightness * 1000; - else if (strcmp(attr, XV_CONTRAST)) + else if (strcmp(attr, XV_CONTRAST) == 0) *value = context_priv->procamp.contrast * 1000 - 1000; - else if (strcmp(attr, XV_SATURATION)) + else if (strcmp(attr, XV_SATURATION) == 0) *value = context_priv->procamp.saturation * 1000 + 1000; - else if (strcmp(attr, XV_HUE)) + else if (strcmp(attr, XV_HUE) == 0) *value = context_priv->procamp.hue * 1000; - else if (strcmp(attr, XV_COLORSPACE)) + else if (strcmp(attr, XV_COLORSPACE) == 0) *value = context_priv->color_standard == VL_CSC_COLOR_STANDARD_BT_709; else return BadName; diff -Nru mesa-18.3.3/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c mesa-19.0.1/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c --- mesa-18.3.3/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c 2019-03-31 23:16:37.000000000 +0000 @@ -123,11 +123,11 @@ while (token && !fail) { - if (strcmp(token, "i")) + if (strcmp(token, "i") == 0) config->mb_types |= MB_TYPE_I; - else if (strcmp(token, "p")) + else if (strcmp(token, "p") == 0) config->mb_types |= MB_TYPE_P; - else if (strcmp(token, "b")) + else if (strcmp(token, "b") == 0) config->mb_types |= MB_TYPE_B; else fail = 1; diff -Nru mesa-18.3.3/src/gallium/targets/d3dadapter9/description.c mesa-19.0.1/src/gallium/targets/d3dadapter9/description.c --- mesa-18.3.3/src/gallium/targets/d3dadapter9/description.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/d3dadapter9/description.c 2019-03-31 23:16:37.000000000 +0000 @@ -20,6 +20,7 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include #include "adapter9.h" @@ -239,7 +240,7 @@ DBG("unknown vendor 0x4%x, emulating 0x4%x\n", drvid->VendorId, fallback_ven); drvid->VendorId = fallback_ven; drvid->DeviceId = fallback_dev; - strncpy(drvid->Description, fallback_name, sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), "%s", fallback_name); } /* fill in driver name and version */ @@ -277,46 +278,54 @@ case HW_VENDOR_INTEL: for (i = 0; i < sizeof(cards_intel) / sizeof(cards_intel[0]); i++) { if (strstr(drvid->Description, cards_intel[i].mesaname)) { - strncpy(drvid->Description, cards_intel[i].d3d9name, sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", cards_intel[i].d3d9name); return; } } /* use a fall-back if nothing matches */ DBG("Unknown card name %s!\n", drvid->DeviceName); - strncpy(drvid->Description, cards_intel[0].d3d9name, sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", cards_intel[0].d3d9name); break; case HW_VENDOR_VMWARE: for (i = 0; i < sizeof(cards_vmware) / sizeof(cards_vmware[0]); i++) { if (strstr(drvid->Description, cards_vmware[i].mesaname)) { - strncpy(drvid->Description, cards_vmware[i].d3d9name, sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", cards_vmware[i].d3d9name); return; } } /* use a fall-back if nothing matches */ DBG("Unknown card name %s!\n", drvid->DeviceName); - strncpy(drvid->Description, cards_vmware[0].d3d9name, sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", cards_vmware[0].d3d9name); break; case HW_VENDOR_AMD: for (i = 0; i < sizeof(cards_amd) / sizeof(cards_amd[0]); i++) { if (strstr(drvid->Description, cards_amd[i].mesaname)) { - strncpy(drvid->Description, cards_amd[i].d3d9name, sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", cards_amd[i].d3d9name); return; } } /* use a fall-back if nothing matches */ DBG("Unknown card name %s!\n", drvid->DeviceName); - strncpy(drvid->Description, cards_amd[0].d3d9name, sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", cards_amd[0].d3d9name); break; case HW_VENDOR_NVIDIA: for (i = 0; i < sizeof(cards_nvidia) / sizeof(cards_nvidia[0]); i++) { if (strstr(drvid->Description, cards_nvidia[i].mesaname)) { - strncpy(drvid->Description, cards_nvidia[i].d3d9name, sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", cards_nvidia[i].d3d9name); return; } } /* use a fall-back if nothing matches */ DBG("Unknown card name %s!\n", drvid->DeviceName); - strncpy(drvid->Description, cards_nvidia[0].d3d9name, sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", cards_nvidia[0].d3d9name); break; default: break; diff -Nru mesa-18.3.3/src/gallium/targets/d3dadapter9/drm.c mesa-19.0.1/src/gallium/targets/d3dadapter9/drm.c --- mesa-18.3.3/src/gallium/targets/d3dadapter9/drm.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/d3dadapter9/drm.c 2019-03-31 23:16:37.000000000 +0000 @@ -149,8 +149,8 @@ &drvid->SubSysId, &drvid->Revision); snprintf(drvid->DeviceName, sizeof(drvid->DeviceName), "Gallium 0.4 with %s", ctx->hal->get_vendor(ctx->hal)); - strncpy(drvid->Description, ctx->hal->get_name(ctx->hal), - sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", ctx->hal->get_name(ctx->hal)); if (override_vendorid > 0) { found = FALSE; @@ -163,8 +163,8 @@ fallback_cards[i].device_id); drvid->VendorId = fallback_cards[i].vendor_id; drvid->DeviceId = fallback_cards[i].device_id; - strncpy(drvid->Description, fallback_cards[i].name, - sizeof(drvid->Description)); + snprintf(drvid->Description, sizeof(drvid->Description), + "%s", fallback_cards[i].name); found = TRUE; break; } @@ -279,9 +279,6 @@ DBG("You have set a non standard throttling value in combination with thread_submit." "We advise to use a throttling value of -2/0"); } - if (ctx->base.thread_submit && !different_device) - DBG("You have set thread_submit but do not use a different device than the server." - "You should not expect any benefit."); if (driCheckOption(&userInitOptions, "override_vendorid", DRI_INT)) { override_vendorid = driQueryOptioni(&userInitOptions, "override_vendorid"); diff -Nru mesa-18.3.3/src/gallium/targets/d3dadapter9/meson.build mesa-19.0.1/src/gallium/targets/d3dadapter9/meson.build --- mesa-18.3.3/src/gallium/targets/d3dadapter9/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/d3dadapter9/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -68,5 +68,5 @@ description : 'Native D3D driver modules', version : '.'.join(nine_version), requires_private : 'libdrm >= ' + dep_libdrm.version(), - variables : ['moduledir=${prefix}/@0@'.format(d3d_drivers_path)], + variables : ['moduledir=@0@'.format(d3d_drivers_path)], ) diff -Nru mesa-18.3.3/src/gallium/targets/dri/Makefile.am mesa-19.0.1/src/gallium/targets/dri/Makefile.am --- mesa-18.3.3/src/gallium/targets/dri/Makefile.am 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/dri/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -78,12 +78,11 @@ include $(top_srcdir)/src/gallium/drivers/v3d/Automake.inc include $(top_srcdir)/src/gallium/drivers/vc4/Automake.inc -include $(top_srcdir)/src/gallium/drivers/pl111/Automake.inc +include $(top_srcdir)/src/gallium/drivers/kmsro/Automake.inc include $(top_srcdir)/src/gallium/drivers/virgl/Automake.inc include $(top_srcdir)/src/gallium/drivers/etnaviv/Automake.inc -include $(top_srcdir)/src/gallium/drivers/imx/Automake.inc include $(top_srcdir)/src/gallium/drivers/softpipe/Automake.inc include $(top_srcdir)/src/gallium/drivers/llvmpipe/Automake.inc diff -Nru mesa-18.3.3/src/gallium/targets/dri/meson.build mesa-19.0.1/src/gallium/targets/dri/meson.build --- mesa-18.3.3/src/gallium/targets/dri/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/dri/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -56,13 +56,15 @@ dependencies : [ dep_selinux, dep_expat, dep_libdrm, dep_llvm, dep_thread, driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau, - driver_pl111, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv, - driver_imx, driver_tegra, driver_i915, driver_svga, driver_virgl, + driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv, + driver_tegra, driver_i915, driver_svga, driver_virgl, driver_swr, ], ) -foreach d : [[with_gallium_pl111, 'pl111_dri.so'], +foreach d : [[with_gallium_kmsro, 'pl111_dri.so'], + [with_gallium_kmsro, 'hx8357d_dri.so'], + [with_gallium_kmsro, 'imx-drm_dri.so'], [with_gallium_radeonsi, 'radeonsi_dri.so'], [with_gallium_nouveau, 'nouveau_dri.so'], [with_gallium_freedreno, ['msm_dri.so', 'kgsl_dri.so']], @@ -71,7 +73,6 @@ [with_gallium_v3d, 'v3d_dri.so'], [with_gallium_vc4, 'vc4_dri.so'], [with_gallium_etnaviv, 'etnaviv_dri.so'], - [with_gallium_imx, 'imx-drm_dri.so'], [with_gallium_tegra, 'tegra_dri.so'], [with_gallium_i915, 'i915_dri.so'], [with_gallium_r300, 'r300_dri.so'], diff -Nru mesa-18.3.3/src/gallium/targets/dri/target.c mesa-19.0.1/src/gallium/targets/dri/target.c --- mesa-18.3.3/src/gallium/targets/dri/target.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/dri/target.c 2019-03-31 23:16:37.000000000 +0000 @@ -77,7 +77,8 @@ #if defined(GALLIUM_VC4) DEFINE_LOADER_DRM_ENTRYPOINT(vc4) -#if defined(GALLIUM_PL111) +#if defined(GALLIUM_KMSRO) +DEFINE_LOADER_DRM_ENTRYPOINT(hx8357d) DEFINE_LOADER_DRM_ENTRYPOINT(pl111) #endif #endif diff -Nru mesa-18.3.3/src/gallium/targets/omx/meson.build mesa-19.0.1/src/gallium/targets/omx/meson.build --- mesa-18.3.3/src/gallium/targets/omx/meson.build 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/omx/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -32,7 +32,7 @@ libomx_gallium = shared_library( 'omx_mesa', - 'target.c', + ['target.c', xmlpool_options_h], c_args : c_vis_args, cpp_args : cpp_vis_args, link_args : [omx_link_args, ld_args_gc_sections], diff -Nru mesa-18.3.3/src/gallium/targets/pipe-loader/Makefile.am mesa-19.0.1/src/gallium/targets/pipe-loader/Makefile.am --- mesa-18.3.3/src/gallium/targets/pipe-loader/Makefile.am 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/pipe-loader/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -157,6 +157,8 @@ $(PIPE_LIBS) \ $(top_builddir)/src/gallium/winsys/freedreno/drm/libfreedrenodrm.la \ $(top_builddir)/src/gallium/drivers/freedreno/libfreedreno.la \ + $(top_builddir)/src/freedreno/libfreedreno_drm.la \ + $(top_builddir)/src/freedreno/libfreedreno_ir3.la \ $(LIBDRM_LIBS) \ $(FREEDRENO_LIBS) diff -Nru mesa-18.3.3/src/gallium/targets/pipe-loader/pipe_msm.c mesa-19.0.1/src/gallium/targets/pipe-loader/pipe_msm.c --- mesa-18.3.3/src/gallium/targets/pipe-loader/pipe_msm.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/pipe-loader/pipe_msm.c 2019-03-31 23:16:37.000000000 +0000 @@ -8,7 +8,7 @@ { struct pipe_screen *screen; - screen = fd_drm_screen_create(fd); + screen = fd_drm_screen_create(fd, NULL); if (!screen) return NULL; diff -Nru mesa-18.3.3/src/gallium/targets/va/meson.build mesa-19.0.1/src/gallium/targets/va/meson.build --- mesa-18.3.3/src/gallium/targets/va/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/va/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -33,7 +33,7 @@ libva_gallium = shared_library( 'gallium_drv_video', - 'target.c', + ['target.c', xmlpool_options_h], c_args : c_vis_args, cpp_args : cpp_vis_args, link_args : [va_link_args, ld_args_gc_sections], diff -Nru mesa-18.3.3/src/gallium/targets/vdpau/meson.build mesa-19.0.1/src/gallium/targets/vdpau/meson.build --- mesa-18.3.3/src/gallium/targets/vdpau/meson.build 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/vdpau/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -38,7 +38,7 @@ libvdpau_gallium = shared_library( 'vdpau_gallium', - 'target.c', + ['target.c', xmlpool_options_h], c_args : c_vis_args, cpp_args : cpp_vis_args, link_args : [vdpau_link_args, ld_args_gc_sections], diff -Nru mesa-18.3.3/src/gallium/targets/xa/meson.build mesa-19.0.1/src/gallium/targets/xa/meson.build --- mesa-18.3.3/src/gallium/targets/xa/meson.build 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/xa/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -34,7 +34,7 @@ libxatracker = shared_library( 'xatracker', - 'target.c', + ['target.c', xmlpool_options_h], c_args : c_vis_args, cpp_args : cpp_vis_args, link_args : [xa_link_args, ld_args_gc_sections], diff -Nru mesa-18.3.3/src/gallium/targets/xvmc/meson.build mesa-19.0.1/src/gallium/targets/xvmc/meson.build --- mesa-18.3.3/src/gallium/targets/xvmc/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/targets/xvmc/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -33,7 +33,7 @@ libxvmc_gallium = shared_library( 'XvMCgallium', - 'target.c', + ['target.c', xmlpool_options_h], c_args : c_vis_args, cpp_args : cpp_vis_args, link_args : [xvmc_link_args, ld_args_gc_sections], diff -Nru mesa-18.3.3/src/gallium/tests/meson.build mesa-19.0.1/src/gallium/tests/meson.build --- mesa-18.3.3/src/gallium/tests/meson.build 2018-04-19 04:33:31.000000000 +0000 +++ mesa-19.0.1/src/gallium/tests/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -19,7 +19,5 @@ # SOFTWARE. subdir('trivial') -if with_gallium_softpipe - subdir('unit') -endif +subdir('unit') subdir('graw') diff -Nru mesa-18.3.3/src/gallium/tests/trivial/compute.c mesa-19.0.1/src/gallium/tests/trivial/compute.c --- mesa-18.3.3/src/gallium/tests/trivial/compute.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/tests/trivial/compute.c 2019-03-31 23:16:37.000000000 +0000 @@ -240,7 +240,7 @@ util_format_get_nblocksy(tex->format, tex->height0)); struct pipe_transfer *xfer; char *map; - int x, y, i; + int x = 0, y, i; int err = 0; if (!check) diff -Nru mesa-18.3.3/src/gallium/tests/unit/meson.build mesa-19.0.1/src/gallium/tests/unit/meson.build --- mesa-18.3.3/src/gallium/tests/unit/meson.build 2018-04-19 04:33:31.000000000 +0000 +++ mesa-19.0.1/src/gallium/tests/unit/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -20,12 +20,16 @@ foreach t : ['pipe_barrier_test', 'u_cache_test', 'u_half_test', 'u_format_test', 'u_format_compatible_test', 'translate_test'] - executable( + exe = executable( t, '@0@.c'.format(t), - include_directories : [inc_common, inc_gallium_drivers, inc_gallium_winsys], - link_with : [libgallium, libmesa_util, libws_null], - dependencies : [driver_swrast, dep_thread], + include_directories : inc_common, + link_with : [libgallium, libmesa_util], + dependencies : [dep_thread], install : false, ) + # u_cache_test is slow, and translate_test fails. + if not ['u_cache_test', 'translate_test'].contains(t) + test(t, exe, suite: 'gallium') + endif endforeach diff -Nru mesa-18.3.3/src/gallium/tests/unit/u_format_test.c mesa-19.0.1/src/gallium/tests/unit/u_format_test.c --- mesa-18.3.3/src/gallium/tests/unit/u_format_test.c 2018-07-29 21:31:02.000000000 +0000 +++ mesa-19.0.1/src/gallium/tests/unit/u_format_test.c 2019-03-31 23:16:37.000000000 +0000 @@ -668,6 +668,47 @@ } +/* Touch-test that the unorm/snorm flags are set up right by codegen. */ +static boolean +test_format_norm_flags(const struct util_format_description *format_desc) +{ + boolean success = TRUE; + +#define FORMAT_CASE(format, unorm, snorm) \ + case format: \ + success = (format_desc->is_unorm == unorm && \ + format_desc->is_snorm == snorm); \ + break + + switch (format_desc->format) { + FORMAT_CASE(PIPE_FORMAT_R8G8B8A8_UNORM, TRUE, FALSE); + FORMAT_CASE(PIPE_FORMAT_R8G8B8A8_SRGB, TRUE, FALSE); + FORMAT_CASE(PIPE_FORMAT_R8G8B8A8_SNORM, FALSE, TRUE); + FORMAT_CASE(PIPE_FORMAT_R32_FLOAT, FALSE, FALSE); + FORMAT_CASE(PIPE_FORMAT_X8Z24_UNORM, TRUE, FALSE); + FORMAT_CASE(PIPE_FORMAT_S8X24_UINT, FALSE, FALSE); + FORMAT_CASE(PIPE_FORMAT_DXT1_RGB, TRUE, FALSE); + FORMAT_CASE(PIPE_FORMAT_ETC2_RGB8, TRUE, FALSE); + FORMAT_CASE(PIPE_FORMAT_ETC2_R11_SNORM, FALSE, TRUE); + FORMAT_CASE(PIPE_FORMAT_ASTC_4x4, TRUE, FALSE); + FORMAT_CASE(PIPE_FORMAT_BPTC_RGBA_UNORM, TRUE, FALSE); + FORMAT_CASE(PIPE_FORMAT_BPTC_RGB_FLOAT, FALSE, FALSE); + default: + success = !(format_desc->is_unorm && format_desc->is_snorm); + break; + } +#undef FORMAT_CASE + + if (!success) { + printf("FAILED: %s (unorm %s, snorm %s)\n", + format_desc->short_name, + format_desc->is_unorm ? "yes" : "no", + format_desc->is_snorm ? "yes" : "no"); + } + + return success; +} + typedef boolean (*test_func_t)(const struct util_format_description *format_desc, const struct util_format_test_case *test); @@ -698,6 +739,22 @@ return success; } +static boolean +test_format_metadata(const struct util_format_description *format_desc, + boolean (*func)(const struct util_format_description *format_desc), + const char *suffix) +{ + boolean success = TRUE; + + printf("Testing util_format_%s_%s ...\n", format_desc->short_name, suffix); + fflush(stdout); + + if (!func(format_desc)) { + success = FALSE; + } + + return success; +} static boolean test_all(void) @@ -724,6 +781,11 @@ } \ } +# define TEST_FORMAT_METADATA(name) \ + if (!test_format_metadata(format_desc, &test_format_##name, #name)) { \ + success = FALSE; \ + } \ + TEST_ONE_FUNC(fetch_rgba_float); TEST_ONE_FUNC(pack_rgba_float); TEST_ONE_FUNC(unpack_rgba_float); @@ -737,7 +799,10 @@ TEST_ONE_FUNC(unpack_s_8uint); TEST_ONE_FUNC(pack_s_8uint); + TEST_FORMAT_METADATA(norm_flags); + # undef TEST_ONE_FUNC +# undef TEST_ONE_FORMAT } return success; diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c --- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 2019-03-31 23:16:37.000000000 +0000 @@ -56,6 +56,7 @@ unsigned alignment, enum radeon_bo_domain domain, enum radeon_bo_flag flags); +static void amdgpu_bo_unmap(struct pb_buffer *buf); static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, enum radeon_bo_usage usage) @@ -173,6 +174,12 @@ assert(bo->bo && "must not be called for slab entries"); + if (!bo->is_user_ptr && bo->cpu_ptr) { + bo->cpu_ptr = NULL; + amdgpu_bo_unmap(&bo->base); + } + assert(bo->is_user_ptr || bo->u.real.map_count == 0); + if (ws->debug_all_bos) { simple_mtx_lock(&ws->global_bo_list_lock); LIST_DEL(&bo->u.real.global_list_item); @@ -184,8 +191,10 @@ util_hash_table_remove(ws->bo_export_table, bo->bo); simple_mtx_unlock(&ws->bo_export_table_lock); - amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); - amdgpu_va_range_free(bo->u.real.va_handle); + if (bo->initial_domain & RADEON_DOMAIN_VRAM_GTT) { + amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); + amdgpu_va_range_free(bo->u.real.va_handle); + } amdgpu_bo_free(bo->bo); amdgpu_bo_remove_fences(bo); @@ -195,14 +204,7 @@ else if (bo->initial_domain & RADEON_DOMAIN_GTT) ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size); - if (bo->u.real.map_count >= 1) { - if (bo->initial_domain & RADEON_DOMAIN_VRAM) - ws->mapped_vram -= bo->base.size; - else if (bo->initial_domain & RADEON_DOMAIN_GTT) - ws->mapped_gtt -= bo->base.size; - ws->num_mapped_buffers--; - } - + simple_mtx_destroy(&bo->lock); FREE(bo); } @@ -218,6 +220,37 @@ amdgpu_bo_destroy(_buf); } +static void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws) +{ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) + pb_slabs_reclaim(&ws->bo_slabs[i]); + + pb_cache_release_all_buffers(&ws->bo_cache); +} + +static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu) +{ + assert(!bo->sparse && bo->bo && !bo->is_user_ptr); + int r = amdgpu_bo_cpu_map(bo->bo, cpu); + if (r) { + /* Clean up buffer managers and try again. */ + amdgpu_clean_up_buffer_managers(bo->ws); + r = amdgpu_bo_cpu_map(bo->bo, cpu); + if (r) + return false; + } + + if (p_atomic_inc_return(&bo->u.real.map_count) == 1) { + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + bo->ws->mapped_vram += bo->base.size; + else if (bo->initial_domain & RADEON_DOMAIN_GTT) + bo->ws->mapped_gtt += bo->base.size; + bo->ws->num_mapped_buffers++; + } + + return true; +} + static void *amdgpu_bo_map(struct pb_buffer *buf, struct radeon_cmdbuf *rcs, enum pipe_transfer_usage usage) @@ -225,9 +258,6 @@ struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; struct amdgpu_winsys_bo *real; struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs; - int r; - void *cpu = NULL; - uint64_t offset = 0; assert(!bo->sparse); @@ -312,9 +342,9 @@ } } - /* If the buffer is created from user memory, return the user pointer. */ - if (bo->user_ptr) - return bo->user_ptr; + /* Buffer synchronization has been checked, now actually map the buffer. */ + void *cpu = NULL; + uint64_t offset = 0; if (bo->bo) { real = bo; @@ -323,22 +353,31 @@ offset = bo->va - real->va; } - r = amdgpu_bo_cpu_map(real->bo, &cpu); - if (r) { - /* Clear the cache and try again. */ - pb_cache_release_all_buffers(&real->ws->bo_cache); - r = amdgpu_bo_cpu_map(real->bo, &cpu); - if (r) - return NULL; + if (usage & RADEON_TRANSFER_TEMPORARY) { + if (real->is_user_ptr) { + cpu = real->cpu_ptr; + } else { + if (!amdgpu_bo_do_map(real, &cpu)) + return NULL; + } + } else { + cpu = p_atomic_read(&real->cpu_ptr); + if (!cpu) { + simple_mtx_lock(&real->lock); + /* Must re-check due to the possibility of a race. Re-check need not + * be atomic thanks to the lock. */ + cpu = real->cpu_ptr; + if (!cpu) { + if (!amdgpu_bo_do_map(real, &cpu)) { + simple_mtx_unlock(&real->lock); + return NULL; + } + p_atomic_set(&real->cpu_ptr, cpu); + } + simple_mtx_unlock(&real->lock); + } } - if (p_atomic_inc_return(&real->u.real.map_count) == 1) { - if (real->initial_domain & RADEON_DOMAIN_VRAM) - real->ws->mapped_vram += real->base.size; - else if (real->initial_domain & RADEON_DOMAIN_GTT) - real->ws->mapped_gtt += real->base.size; - real->ws->num_mapped_buffers++; - } return (uint8_t*)cpu + offset; } @@ -349,12 +388,15 @@ assert(!bo->sparse); - if (bo->user_ptr) + if (bo->is_user_ptr) return; real = bo->bo ? bo : bo->u.slab.real; - + assert(real->u.real.map_count != 0 && "too many unmaps"); if (p_atomic_dec_zero(&real->u.real.map_count)) { + assert(!real->cpu_ptr && + "too many unmaps or forgot RADEON_TRANSFER_TEMPORARY flag"); + if (real->initial_domain & RADEON_DOMAIN_VRAM) real->ws->mapped_vram -= real->base.size; else if (real->initial_domain & RADEON_DOMAIN_GTT) @@ -384,6 +426,27 @@ } } +static uint64_t amdgpu_get_optimal_vm_alignment(struct amdgpu_winsys *ws, + uint64_t size, unsigned alignment) +{ + uint64_t vm_alignment = alignment; + + /* Increase the VM alignment for faster address translation. */ + if (size >= ws->info.pte_fragment_size) + vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size); + + /* Gfx9: Increase the VM alignment to the most significant bit set + * in the size for faster address translation. + */ + if (ws->info.chip_class >= GFX9) { + unsigned msb = util_last_bit64(size); /* 0 = no bit is set */ + uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0; + + vm_alignment = MAX2(vm_alignment, msb_alignment); + } + return vm_alignment; +} + static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, uint64_t size, unsigned alignment, @@ -396,11 +459,12 @@ uint64_t va = 0; struct amdgpu_winsys_bo *bo; amdgpu_va_handle va_handle; - unsigned va_gap_size; int r; /* VRAM or GTT must be specified, but not both at the same time. */ - assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1); + assert(util_bitcount(initial_domain & (RADEON_DOMAIN_VRAM_GTT | + RADEON_DOMAIN_GDS | + RADEON_DOMAIN_OA)) == 1); bo = CALLOC_STRUCT(amdgpu_winsys_bo); if (!bo) { @@ -418,6 +482,10 @@ request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; if (initial_domain & RADEON_DOMAIN_GTT) request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; + if (initial_domain & RADEON_DOMAIN_GDS) + request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS; + if (initial_domain & RADEON_DOMAIN_OA) + request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA; /* Since VRAM and GTT have almost the same performance on APUs, we could * just set GTT. However, in order to decrease GTT(RAM) usage, which is @@ -447,27 +515,31 @@ goto error_bo_alloc; } - va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; - if (size > ws->info.pte_fragment_size) - alignment = MAX2(alignment, ws->info.pte_fragment_size); - r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, - size + va_gap_size, alignment, 0, &va, &va_handle, - (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | - AMDGPU_VA_RANGE_HIGH); - if (r) - goto error_va_alloc; + if (initial_domain & RADEON_DOMAIN_VRAM_GTT) { + unsigned va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; - unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | - AMDGPU_VM_PAGE_EXECUTABLE; + r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, + size + va_gap_size, + amdgpu_get_optimal_vm_alignment(ws, size, alignment), + 0, &va, &va_handle, + (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | + AMDGPU_VA_RANGE_HIGH); + if (r) + goto error_va_alloc; - if (!(flags & RADEON_FLAG_READ_ONLY)) - vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; + unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | + AMDGPU_VM_PAGE_EXECUTABLE; - r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, + if (!(flags & RADEON_FLAG_READ_ONLY)) + vm_flags |= AMDGPU_VM_PAGE_WRITEABLE; + + r = amdgpu_bo_va_op_raw(ws->dev, buf_handle, 0, size, va, vm_flags, AMDGPU_VA_OP_MAP); - if (r) - goto error_va_map; + if (r) + goto error_va_map; + } + simple_mtx_init(&bo->lock, mtx_plain); pipe_reference_init(&bo->base.reference, 1); bo->base.alignment = alignment; bo->base.usage = 0; @@ -486,7 +558,7 @@ else if (initial_domain & RADEON_DOMAIN_GTT) ws->allocated_gtt += align64(size, ws->info.gart_page_size); - amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms_noimport, &bo->u.real.kms_handle); + amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle); amdgpu_add_buffer_to_global_list(bo); @@ -522,13 +594,27 @@ return amdgpu_bo_can_reclaim(&bo->base); } +static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size) +{ + /* Find the correct slab allocator for the given size. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + struct pb_slabs *slabs = &ws->bo_slabs[i]; + + if (size <= 1 << (slabs->min_order + slabs->num_orders - 1)) + return slabs; + } + + assert(0); + return NULL; +} + static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); assert(!bo->bo); - pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry); + pb_slab_free(get_slabs(bo->ws, bo->base.size), &bo->u.slab.entry); } static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = { @@ -545,19 +631,37 @@ enum radeon_bo_domain domains = radeon_domain_from_heap(heap); enum radeon_bo_flag flags = radeon_flags_from_heap(heap); uint32_t base_id; + unsigned slab_size = 0; if (!slab) return NULL; - unsigned slab_size = 1 << AMDGPU_SLAB_BO_SIZE_LOG2; + /* Determine the slab buffer size. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + struct pb_slabs *slabs = &ws->bo_slabs[i]; + unsigned max_entry_size = 1 << (slabs->min_order + slabs->num_orders - 1); + + if (entry_size <= max_entry_size) { + /* The slab size is twice the size of the largest possible entry. */ + slab_size = max_entry_size * 2; + + /* The largest slab should have the same size as the PTE fragment + * size to get faster address translation. + */ + if (i == NUM_SLAB_ALLOCATORS - 1 && + slab_size < ws->info.pte_fragment_size) + slab_size = ws->info.pte_fragment_size; + break; + } + } + assert(slab_size != 0); + slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base, slab_size, slab_size, domains, flags)); if (!slab->buffer) goto fail; - assert(slab->buffer->bo); - slab->base.num_entries = slab->buffer->base.size / entry_size; slab->base.num_free = slab->base.num_entries; slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); @@ -571,6 +675,7 @@ for (unsigned i = 0; i < slab->base.num_entries; ++i) { struct amdgpu_winsys_bo *bo = &slab->entries[i]; + simple_mtx_init(&bo->lock, mtx_plain); bo->base.alignment = entry_size; bo->base.usage = slab->buffer->base.usage; bo->base.size = entry_size; @@ -581,7 +686,15 @@ bo->unique_id = base_id + i; bo->u.slab.entry.slab = &slab->base; bo->u.slab.entry.group_index = group_index; - bo->u.slab.real = slab->buffer; + + if (slab->buffer->bo) { + /* The slab is not suballocated. */ + bo->u.slab.real = slab->buffer; + } else { + /* The slab is allocated out of a bigger slab. */ + bo->u.slab.real = slab->buffer->u.slab.real; + assert(bo->u.slab.real->bo); + } LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free); } @@ -599,8 +712,10 @@ { struct amdgpu_slab *slab = amdgpu_slab(pslab); - for (unsigned i = 0; i < slab->base.num_entries; ++i) + for (unsigned i = 0; i < slab->base.num_entries; ++i) { amdgpu_bo_remove_fences(&slab->entries[i]); + simple_mtx_destroy(&slab->entries[i].lock); + } FREE(slab->entries); amdgpu_winsys_bo_reference(&slab->buffer, NULL); @@ -858,8 +973,8 @@ } amdgpu_va_range_free(bo->u.sparse.va_handle); - simple_mtx_destroy(&bo->u.sparse.commit_lock); FREE(bo->u.sparse.commitments); + simple_mtx_destroy(&bo->lock); FREE(bo); } @@ -889,6 +1004,7 @@ if (!bo) return NULL; + simple_mtx_init(&bo->lock, mtx_plain); pipe_reference_init(&bo->base.reference, 1); bo->base.alignment = RADEON_SPARSE_PAGE_SIZE; bo->base.size = size; @@ -905,7 +1021,6 @@ if (!bo->u.sparse.commitments) goto error_alloc_commitments; - simple_mtx_init(&bo->u.sparse.commit_lock, mtx_plain); LIST_INITHEAD(&bo->u.sparse.backing); /* For simplicity, we always map a multiple of the page size. */ @@ -928,9 +1043,9 @@ error_va_map: amdgpu_va_range_free(bo->u.sparse.va_handle); error_va_alloc: - simple_mtx_destroy(&bo->u.sparse.commit_lock); FREE(bo->u.sparse.commitments); error_alloc_commitments: + simple_mtx_destroy(&bo->lock); FREE(bo); return NULL; } @@ -955,7 +1070,7 @@ va_page = offset / RADEON_SPARSE_PAGE_SIZE; end_va_page = va_page + DIV_ROUND_UP(size, RADEON_SPARSE_PAGE_SIZE); - simple_mtx_lock(&bo->u.sparse.commit_lock); + simple_mtx_lock(&bo->lock); #if DEBUG_SPARSE_COMMITS sparse_dump(bo, __func__); @@ -1059,7 +1174,7 @@ } out: - simple_mtx_unlock(&bo->u.sparse.commit_lock); + simple_mtx_unlock(&bo->lock); return ok; } @@ -1193,22 +1308,28 @@ /* Sparse buffers must have NO_CPU_ACCESS set. */ assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS); + struct pb_slabs *last_slab = &ws->bo_slabs[NUM_SLAB_ALLOCATORS - 1]; + unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1); + /* Sub-allocate small buffers from slabs. */ if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) && - size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) && - alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) { + size <= max_slab_entry_size && + /* The alignment must be at most the size of the smallest slab entry or + * the next power of two. */ + alignment <= MAX2(1 << ws->bo_slabs[0].min_order, util_next_power_of_two(size))) { struct pb_slab_entry *entry; int heap = radeon_get_heap_index(domain, flags); if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) goto no_slab; - entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + struct pb_slabs *slabs = get_slabs(ws, size); + entry = pb_slab_alloc(slabs, size, heap); if (!entry) { - /* Clear the cache and try again. */ - pb_cache_release_all_buffers(&ws->bo_cache); + /* Clean up buffer managers and try again. */ + amdgpu_clean_up_buffer_managers(ws); - entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + entry = pb_slab_alloc(slabs, size, heap); } if (!entry) return NULL; @@ -1235,8 +1356,10 @@ * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, * like constant/uniform buffers, can benefit from better and more reuse. */ - size = align64(size, ws->info.gart_page_size); - alignment = align(alignment, ws->info.gart_page_size); + if (domain & RADEON_DOMAIN_VRAM_GTT) { + size = align64(size, ws->info.gart_page_size); + alignment = align(alignment, ws->info.gart_page_size); + } bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING; @@ -1254,9 +1377,9 @@ /* Create a new one. */ bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap); if (!bo) { - /* Clear the cache and try again. */ - pb_slabs_reclaim(&ws->bo_slabs); - pb_cache_release_all_buffers(&ws->bo_cache); + /* Clean up buffer managers and try again. */ + amdgpu_clean_up_buffer_managers(ws); + bo = amdgpu_create_bo(ws, size, alignment, domain, flags, heap); if (!bo) return NULL; @@ -1268,6 +1391,7 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, struct winsys_handle *whandle, + unsigned vm_alignment, unsigned *stride, unsigned *offset) { @@ -1325,8 +1449,10 @@ goto error; r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, - result.alloc_size, 1 << 20, 0, &va, &va_handle, - AMDGPU_VA_RANGE_HIGH); + result.alloc_size, + amdgpu_get_optimal_vm_alignment(ws, result.alloc_size, + vm_alignment), + 0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH); if (r) goto error; @@ -1344,6 +1470,7 @@ initial |= RADEON_DOMAIN_GTT; /* Initialize the structure. */ + simple_mtx_init(&bo->lock, mtx_plain); pipe_reference_init(&bo->base.reference, 1); bo->base.alignment = info.phys_alignment; bo->bo = result.buf_handle; @@ -1361,7 +1488,7 @@ else if (bo->initial_domain & RADEON_DOMAIN_GTT) ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size); - amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms_noimport, &bo->u.real.kms_handle); + amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle); amdgpu_add_buffer_to_global_list(bo); @@ -1445,21 +1572,25 @@ goto error; if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, - aligned_size, 1 << 12, 0, &va, &va_handle, - AMDGPU_VA_RANGE_HIGH)) + aligned_size, + amdgpu_get_optimal_vm_alignment(ws, aligned_size, + ws->info.gart_page_size), + 0, &va, &va_handle, AMDGPU_VA_RANGE_HIGH)) goto error_va_alloc; if (amdgpu_bo_va_op(buf_handle, 0, aligned_size, va, 0, AMDGPU_VA_OP_MAP)) goto error_va_map; /* Initialize it. */ + bo->is_user_ptr = true; pipe_reference_init(&bo->base.reference, 1); + simple_mtx_init(&bo->lock, mtx_plain); bo->bo = buf_handle; bo->base.alignment = 0; bo->base.size = size; bo->base.vtbl = &amdgpu_winsys_bo_vtbl; bo->ws = ws; - bo->user_ptr = pointer; + bo->cpu_ptr = pointer; bo->va = va; bo->u.real.va_handle = va_handle; bo->initial_domain = RADEON_DOMAIN_GTT; @@ -1469,7 +1600,7 @@ amdgpu_add_buffer_to_global_list(bo); - amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms_noimport, &bo->u.real.kms_handle); + amdgpu_bo_export(bo->bo, amdgpu_bo_handle_type_kms, &bo->u.real.kms_handle); return (struct pb_buffer*)bo; @@ -1486,7 +1617,7 @@ static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf) { - return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL; + return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr; } static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf) diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h --- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h 2019-03-31 23:16:37.000000000 +0000 @@ -74,7 +74,6 @@ struct amdgpu_winsys_bo *real; } slab; struct { - simple_mtx_t commit_lock; amdgpu_va_handle va_handle; enum radeon_bo_flag flags; @@ -89,10 +88,12 @@ } u; struct amdgpu_winsys *ws; - void *user_ptr; /* from buffer_from_ptr */ + void *cpu_ptr; /* for user_ptr and permanent maps */ amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */ bool sparse; + bool is_user_ptr; + bool is_local; uint32_t unique_id; uint64_t va; enum radeon_bo_domain initial_domain; @@ -114,7 +115,7 @@ unsigned max_fences; struct pipe_fence_handle **fences; - bool is_local; + simple_mtx_t lock; }; struct amdgpu_slab { diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c --- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 2019-03-31 23:16:37.000000000 +0000 @@ -172,45 +172,45 @@ uint64_t seq_no, uint64_t *user_fence_cpu_address) { - struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence; + struct amdgpu_fence *afence = (struct amdgpu_fence*)fence; - rfence->fence.fence = seq_no; - rfence->user_fence_cpu_address = user_fence_cpu_address; - util_queue_fence_signal(&rfence->submitted); + afence->fence.fence = seq_no; + afence->user_fence_cpu_address = user_fence_cpu_address; + util_queue_fence_signal(&afence->submitted); } static void amdgpu_fence_signalled(struct pipe_fence_handle *fence) { - struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence; + struct amdgpu_fence *afence = (struct amdgpu_fence*)fence; - rfence->signalled = true; - util_queue_fence_signal(&rfence->submitted); + afence->signalled = true; + util_queue_fence_signal(&afence->submitted); } bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout, bool absolute) { - struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence; + struct amdgpu_fence *afence = (struct amdgpu_fence*)fence; uint32_t expired; int64_t abs_timeout; uint64_t *user_fence_cpu; int r; - if (rfence->signalled) + if (afence->signalled) return true; /* Handle syncobjs. */ - if (amdgpu_fence_is_syncobj(rfence)) { + if (amdgpu_fence_is_syncobj(afence)) { /* Absolute timeouts are only be used by BO fences, which aren't * backed by syncobjs. */ assert(!absolute); - if (amdgpu_cs_syncobj_wait(rfence->ws->dev, &rfence->syncobj, 1, + if (amdgpu_cs_syncobj_wait(afence->ws->dev, &afence->syncobj, 1, timeout, 0, NULL)) return false; - rfence->signalled = true; + afence->signalled = true; return true; } @@ -222,13 +222,13 @@ /* The fence might not have a number assigned if its IB is being * submitted in the other thread right now. Wait until the submission * is done. */ - if (!util_queue_fence_wait_timeout(&rfence->submitted, abs_timeout)) + if (!util_queue_fence_wait_timeout(&afence->submitted, abs_timeout)) return false; - user_fence_cpu = rfence->user_fence_cpu_address; + user_fence_cpu = afence->user_fence_cpu_address; if (user_fence_cpu) { - if (*user_fence_cpu >= rfence->fence.fence) { - rfence->signalled = true; + if (*user_fence_cpu >= afence->fence.fence) { + afence->signalled = true; return true; } @@ -238,7 +238,7 @@ } /* Now use the libdrm query. */ - r = amdgpu_cs_query_fence_status(&rfence->fence, + r = amdgpu_cs_query_fence_status(&afence->fence, abs_timeout, AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE, &expired); @@ -250,7 +250,7 @@ if (expired) { /* This variable can only transition from false to true, so it doesn't * matter if threads race for it. */ - rfence->signalled = true; + afence->signalled = true; return true; } return false; @@ -598,7 +598,7 @@ /* We delay adding the backing buffers until we really have to. However, * we cannot delay accounting for memory use. */ - simple_mtx_lock(&bo->u.sparse.commit_lock); + simple_mtx_lock(&bo->lock); list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) { if (bo->initial_domain & RADEON_DOMAIN_VRAM) @@ -607,7 +607,7 @@ acs->main.base.used_gart += backing->bo->base.size; } - simple_mtx_unlock(&bo->u.sparse.commit_lock); + simple_mtx_unlock(&bo->lock); return idx; } @@ -923,7 +923,8 @@ enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), - void *flush_ctx) + void *flush_ctx, + bool stop_exec_on_failure) { struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; struct amdgpu_cs *cs; @@ -939,6 +940,7 @@ cs->flush_cs = flush; cs->flush_data = flush_ctx; cs->ring_type = ring_type; + cs->stop_exec_on_failure = stop_exec_on_failure; struct amdgpu_cs_fence_info fence_info; fence_info.handle = cs->ctx->user_fence_bo; @@ -1217,8 +1219,6 @@ { struct amdgpu_cs_context *cs = acs->csc; - cs->num_fence_dependencies = 0; - amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_real_buffers, cs->real_buffers); amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_slab_buffers, cs->slab_buffers); amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers); @@ -1265,7 +1265,7 @@ struct amdgpu_cs_buffer *buffer = &cs->sparse_buffers[i]; struct amdgpu_winsys_bo *bo = buffer->bo; - simple_mtx_lock(&bo->u.sparse.commit_lock); + simple_mtx_lock(&bo->lock); list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) { /* We can directly add the buffer here, because we know that each @@ -1274,7 +1274,7 @@ int idx = amdgpu_do_add_real_buffer(cs, backing->bo); if (idx < 0) { fprintf(stderr, "%s: failed to add buffer\n", __FUNCTION__); - simple_mtx_unlock(&bo->u.sparse.commit_lock); + simple_mtx_unlock(&bo->lock); return false; } @@ -1283,7 +1283,7 @@ p_atomic_inc(&backing->bo->num_active_ioctls); } - simple_mtx_unlock(&bo->u.sparse.commit_lock); + simple_mtx_unlock(&bo->lock); } return true; @@ -1295,7 +1295,7 @@ struct amdgpu_winsys *ws = acs->ctx->ws; struct amdgpu_cs_context *cs = acs->cst; int i, r; - amdgpu_bo_list_handle bo_list = NULL; + uint32_t bo_list = 0; uint64_t seq_no = 0; bool has_user_fence = amdgpu_cs_has_user_fence(cs); bool use_bo_list_create = ws->info.drm_minor < 27; @@ -1306,27 +1306,28 @@ /* The buffer list contains all buffers. This is a slow path that * ensures that no buffer is missing in the BO list. */ + unsigned num_handles = 0; + struct drm_amdgpu_bo_list_entry *list = + alloca(ws->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry)); struct amdgpu_winsys_bo *bo; - amdgpu_bo_handle *handles; - unsigned num = 0; simple_mtx_lock(&ws->global_bo_list_lock); - handles = alloca(sizeof(handles[0]) * ws->num_buffers); - LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, u.real.global_list_item) { - assert(num < ws->num_buffers); - handles[num++] = bo->bo; + if (bo->is_local) + continue; + + list[num_handles].bo_handle = bo->u.real.kms_handle; + list[num_handles].bo_priority = 0; + ++num_handles; } - r = amdgpu_bo_list_create(ws->dev, ws->num_buffers, - handles, NULL, &bo_list); + r = amdgpu_bo_list_create_raw(ws->dev, ws->num_buffers, list, &bo_list); simple_mtx_unlock(&ws->global_bo_list_lock); if (r) { fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r); goto cleanup; } - } else if (!use_bo_list_create) { - /* Standard path passing the buffer list via the CS ioctl. */ + } else { if (!amdgpu_add_sparse_backing_buffers(cs)) { fprintf(stderr, "amdgpu: amdgpu_add_sparse_backing_buffers failed\n"); r = -ENOMEM; @@ -1350,52 +1351,27 @@ ++num_handles; } - bo_list_in.operation = ~0; - bo_list_in.list_handle = ~0; - bo_list_in.bo_number = num_handles; - bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry); - bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)list; - } else { - /* Legacy path creating the buffer list handle and passing it to the CS ioctl. */ - unsigned num_handles; - - if (!amdgpu_add_sparse_backing_buffers(cs)) { - fprintf(stderr, "amdgpu: amdgpu_add_sparse_backing_buffers failed\n"); - r = -ENOMEM; - goto cleanup; - } - - amdgpu_bo_handle *handles = alloca(sizeof(*handles) * cs->num_real_buffers); - uint8_t *flags = alloca(sizeof(*flags) * cs->num_real_buffers); - - num_handles = 0; - for (i = 0; i < cs->num_real_buffers; ++i) { - struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i]; - - if (buffer->bo->is_local) - continue; - - assert(buffer->u.real.priority_usage != 0); - - handles[num_handles] = buffer->bo->bo; - flags[num_handles] = (util_last_bit(buffer->u.real.priority_usage) - 1) / 2; - ++num_handles; - } - - if (num_handles) { - r = amdgpu_bo_list_create(ws->dev, num_handles, - handles, flags, &bo_list); + if (use_bo_list_create) { + /* Legacy path creating the buffer list handle and passing it to the CS ioctl. */ + r = amdgpu_bo_list_create_raw(ws->dev, num_handles, list, &bo_list); if (r) { fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r); goto cleanup; } + } else { + /* Standard path passing the buffer list via the CS ioctl. */ + bo_list_in.operation = ~0; + bo_list_in.list_handle = ~0; + bo_list_in.bo_number = num_handles; + bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry); + bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)list; } } if (acs->ring_type == RING_GFX) ws->gfx_bo_list_counter += cs->num_real_buffers; - if (acs->ctx->num_rejected_cs) { + if (acs->stop_exec_on_failure && acs->ctx->num_rejected_cs) { r = -ECANCELED; } else { struct drm_amdgpu_cs_chunk chunks[6]; @@ -1499,8 +1475,8 @@ assert(num_chunks <= ARRAY_SIZE(chunks)); - r = amdgpu_cs_submit_raw(ws->dev, acs->ctx->ctx, bo_list, - num_chunks, chunks, &seq_no); + r = amdgpu_cs_submit_raw2(ws->dev, acs->ctx->ctx, bo_list, + num_chunks, chunks, &seq_no); } if (r) { @@ -1525,7 +1501,7 @@ /* Cleanup. */ if (bo_list) - amdgpu_bo_list_destroy(bo_list); + amdgpu_bo_list_destroy_raw(ws->dev, bo_list); cleanup: /* If there was an error, signal the fence, because it won't be signalled diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h --- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h 2019-03-31 23:16:37.000000000 +0000 @@ -129,6 +129,7 @@ /* Flush CS. */ void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence); void *flush_data; + bool stop_exec_on_failure; struct util_queue_fence flush_completed; struct pipe_fence_handle *next_fence; @@ -169,11 +170,11 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst, struct pipe_fence_handle *src) { - struct amdgpu_fence **rdst = (struct amdgpu_fence **)dst; - struct amdgpu_fence *rsrc = (struct amdgpu_fence *)src; + struct amdgpu_fence **adst = (struct amdgpu_fence **)dst; + struct amdgpu_fence *asrc = (struct amdgpu_fence *)src; - if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { - struct amdgpu_fence *fence = *rdst; + if (pipe_reference(&(*adst)->reference, &asrc->reference)) { + struct amdgpu_fence *fence = *adst; if (amdgpu_fence_is_syncobj(fence)) amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj); @@ -183,7 +184,7 @@ util_queue_fence_destroy(&fence->submitted); FREE(fence); } - *rdst = rsrc; + *adst = asrc; } int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo); diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c --- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 2019-03-31 23:16:37.000000000 +0000 @@ -38,6 +38,7 @@ #include #include #include +#include "amd/common/ac_llvm_util.h" #include "amd/common/sid.h" #include "amd/common/gfx9d.h" @@ -50,6 +51,39 @@ DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) +static void handle_env_var_force_family(struct amdgpu_winsys *ws) +{ + const char *family = debug_get_option("SI_FORCE_FAMILY", NULL); + unsigned i; + + if (!family) + return; + + for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { + if (!strcmp(family, ac_get_llvm_processor_name(i))) { + /* Override family and chip_class. */ + ws->info.family = i; + ws->info.name = "GCN-NOOP"; + + if (i >= CHIP_VEGA10) + ws->info.chip_class = GFX9; + else if (i >= CHIP_TONGA) + ws->info.chip_class = VI; + else if (i >= CHIP_BONAIRE) + ws->info.chip_class = CIK; + else + ws->info.chip_class = SI; + + /* Don't submit any IBs. */ + setenv("RADEON_NOOP", "1", 1); + return; + } + } + + fprintf(stderr, "radeonsi: Unknown family: %s\n", family); + exit(1); +} + /* Helper function to do the ioctls needed for setup and init. */ static bool do_winsys_init(struct amdgpu_winsys *ws, const struct pipe_screen_config *config, @@ -58,6 +92,8 @@ if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo)) goto fail; + handle_env_var_force_family(ws); + ws->addrlib = amdgpu_addr_create(&ws->info, &ws->amdinfo, &ws->info.max_alignment); if (!ws->addrlib) { fprintf(stderr, "amdgpu: Cannot create addrlib.\n"); @@ -95,7 +131,10 @@ util_queue_destroy(&ws->cs_queue); simple_mtx_destroy(&ws->bo_fence_lock); - pb_slabs_deinit(&ws->bo_slabs); + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + if (ws->bo_slabs[i].groups) + pb_slabs_deinit(&ws->bo_slabs[i]); + } pb_cache_deinit(&ws->bo_cache); util_hash_table_destroy(ws->bo_export_table); simple_mtx_destroy(&ws->global_bo_list_lock); @@ -307,16 +346,33 @@ (ws->info.vram_size + ws->info.gart_size) / 8, amdgpu_bo_destroy, amdgpu_bo_can_reclaim); - if (!pb_slabs_init(&ws->bo_slabs, - AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2, - RADEON_MAX_SLAB_HEAPS, - ws, - amdgpu_bo_can_reclaim_slab, - amdgpu_bo_slab_alloc, - amdgpu_bo_slab_free)) - goto fail_cache; + unsigned min_slab_order = 9; /* 512 bytes */ + unsigned max_slab_order = 18; /* 256 KB - higher numbers increase memory usage */ + unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) / + NUM_SLAB_ALLOCATORS; + + /* Divide the size order range among slab managers. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + unsigned min_order = min_slab_order; + unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator, + max_slab_order); + + if (!pb_slabs_init(&ws->bo_slabs[i], + min_order, max_order, + RADEON_MAX_SLAB_HEAPS, + ws, + amdgpu_bo_can_reclaim_slab, + amdgpu_bo_slab_alloc, + amdgpu_bo_slab_free)) { + amdgpu_winsys_destroy(&ws->base); + simple_mtx_unlock(&dev_tab_mutex); + return NULL; + } + + min_slab_order = max_order + 1; + } - ws->info.min_alloc_size = 1 << AMDGPU_SLAB_MIN_SIZE_LOG2; + ws->info.min_alloc_size = 1 << ws->bo_slabs[0].min_order; /* init reference */ pipe_reference_init(&ws->reference, 1); diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h --- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h 2019-03-31 23:16:37.000000000 +0000 @@ -31,22 +31,24 @@ #include "pipebuffer/pb_cache.h" #include "pipebuffer/pb_slab.h" #include "gallium/drivers/radeon/radeon_winsys.h" -#include "addrlib/addrinterface.h" +#include "addrlib/inc/addrinterface.h" #include "util/simple_mtx.h" #include "util/u_queue.h" #include struct amdgpu_cs; -#define AMDGPU_SLAB_MIN_SIZE_LOG2 9 /* 512 bytes */ -#define AMDGPU_SLAB_MAX_SIZE_LOG2 16 /* 64 KB */ -#define AMDGPU_SLAB_BO_SIZE_LOG2 17 /* 128 KB */ +#define NUM_SLAB_ALLOCATORS 3 struct amdgpu_winsys { struct radeon_winsys base; struct pipe_reference reference; struct pb_cache bo_cache; - struct pb_slabs bo_slabs; + + /* Each slab buffer can only contain suballocations of equal sizes, so we + * need to layer the allocators, so that we don't waste too much memory. + */ + struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS]; amdgpu_device_handle dev; diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/amdgpu/drm/Makefile.am --- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/Makefile.am 2018-01-24 16:24:53.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -4,6 +4,7 @@ AM_CFLAGS = \ $(GALLIUM_WINSYS_CFLAGS) \ $(AMDGPU_CFLAGS) \ + $(LLVM_CFLAGS) \ -I$(top_srcdir)/src/amd/ AM_CXXFLAGS = $(AM_CFLAGS) diff -Nru mesa-18.3.3/src/gallium/winsys/amdgpu/drm/meson.build mesa-19.0.1/src/gallium/winsys/amdgpu/drm/meson.build --- mesa-18.3.3/src/gallium/winsys/amdgpu/drm/meson.build 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/amdgpu/drm/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -31,5 +31,5 @@ c_args : [c_vis_args], cpp_args : [cpp_vis_args], link_with : libamdgpu_addrlib, - dependencies : dep_libdrm_amdgpu, + dependencies : [dep_llvm, dep_libdrm_amdgpu], ) diff -Nru mesa-18.3.3/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h mesa-19.0.1/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h --- mesa-18.3.3/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h 2019-03-31 23:16:37.000000000 +0000 @@ -3,7 +3,8 @@ #define __FREEDRENO_DRM_PUBLIC_H__ struct pipe_screen; +struct renderonly; -struct pipe_screen *fd_drm_screen_create(int drmFD); +struct pipe_screen *fd_drm_screen_create(int drmFD, struct renderonly *ro); #endif diff -Nru mesa-18.3.3/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c mesa-19.0.1/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c --- mesa-18.3.3/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c 2019-03-31 23:16:37.000000000 +0000 @@ -85,7 +85,7 @@ } struct pipe_screen * -fd_drm_screen_create(int fd) +fd_drm_screen_create(int fd, struct renderonly *ro) { struct pipe_screen *pscreen = NULL; @@ -104,7 +104,7 @@ if (!dev) goto unlock; - pscreen = fd_screen_create(dev); + pscreen = fd_screen_create(dev, ro); if (pscreen) { int fd = fd_device_fd(dev); diff -Nru mesa-18.3.3/src/gallium/winsys/freedreno/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/freedreno/drm/Makefile.am --- mesa-18.3.3/src/gallium/winsys/freedreno/drm/Makefile.am 2018-01-24 16:24:53.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/freedreno/drm/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -25,6 +25,8 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/freedreno \ + -I$(top_srcdir)/src/freedreno/registers \ $(GALLIUM_WINSYS_CFLAGS) \ $(FREEDRENO_CFLAGS) diff -Nru mesa-18.3.3/src/gallium/winsys/freedreno/drm/meson.build mesa-19.0.1/src/gallium/winsys/freedreno/drm/meson.build --- mesa-18.3.3/src/gallium/winsys/freedreno/drm/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/freedreno/drm/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -23,6 +23,7 @@ files('freedreno_drm_public.h', 'freedreno_drm_winsys.c'), include_directories : [ inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_gallium_drivers, + inc_freedreno, ], c_args : [c_vis_args], dependencies : [dep_libdrm], diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/Android.mk mesa-19.0.1/src/gallium/winsys/imx/drm/Android.mk --- mesa-18.3.3/src/gallium/winsys/imx/drm/Android.mk 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/imx/drm/Android.mk 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -# Copyright (C) 2016 Linaro, Ltd, Rob Herring -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -LOCAL_PATH := $(call my-dir) - -include $(LOCAL_PATH)/Makefile.sources - -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := $(C_SOURCES) - -LOCAL_SHARED_LIBRARIES := libdrm_etnaviv - -LOCAL_MODULE := libmesa_winsys_imx - -include $(GALLIUM_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) - -ifneq ($(HAVE_GALLIUM_IMX),) -GALLIUM_TARGET_DRIVERS += imx-drm -$(eval GALLIUM_LIBS += $(LOCAL_MODULE) libmesa_winsys_etnaviv) -$(eval GALLIUM_SHARED_LIBS += $(LOCAL_SHARED_LIBRARIES)) -endif diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/imx_drm_public.h mesa-19.0.1/src/gallium/winsys/imx/drm/imx_drm_public.h --- mesa-18.3.3/src/gallium/winsys/imx/drm/imx_drm_public.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/imx/drm/imx_drm_public.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2016 Christian Gmeiner - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Christian Gmeiner - */ - -#ifndef __IMX_DRM_PUBLIC_H__ -#define __IMX_DRM_PUBLIC_H__ - -struct pipe_screen; - -struct pipe_screen *imx_drm_screen_create(int fd); - -#endif /* __IMX_DRM_PUBLIC_H__ */ diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/imx_drm_winsys.c mesa-19.0.1/src/gallium/winsys/imx/drm/imx_drm_winsys.c --- mesa-18.3.3/src/gallium/winsys/imx/drm/imx_drm_winsys.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/imx/drm/imx_drm_winsys.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2016 Christian Gmeiner - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Christian Gmeiner - */ - -#include "imx_drm_public.h" -#include "etnaviv/drm/etnaviv_drm_public.h" -#include "renderonly/renderonly.h" - -#include -#include - -struct pipe_screen *imx_drm_screen_create(int fd) -{ - struct renderonly ro = { - .create_for_resource = renderonly_create_kms_dumb_buffer_for_resource, - .kms_fd = fd, - .gpu_fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC) - }; - - if (ro.gpu_fd < 0) - return NULL; - - struct pipe_screen *screen = etna_drm_screen_create_renderonly(&ro); - if (!screen) - close(ro.gpu_fd); - - return screen; -} diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/imx/drm/Makefile.am --- mesa-18.3.3/src/gallium/winsys/imx/drm/Makefile.am 2018-01-24 16:24:53.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/imx/drm/Makefile.am 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -# Copyright © 2012 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -include Makefile.sources -include $(top_srcdir)/src/gallium/Automake.inc - -AM_CFLAGS = \ - -I$(top_srcdir)/src/gallium/drivers \ - -I$(top_srcdir)/src/gallium/winsys \ - $(GALLIUM_WINSYS_CFLAGS) - -noinst_LTLIBRARIES = libimxdrm.la - -libimxdrm_la_SOURCES = $(C_SOURCES) - -EXTRA_DIST = meson.build diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/Makefile.sources mesa-19.0.1/src/gallium/winsys/imx/drm/Makefile.sources --- mesa-18.3.3/src/gallium/winsys/imx/drm/Makefile.sources 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/imx/drm/Makefile.sources 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -C_SOURCES := \ - imx_drm_public.h \ - imx_drm_winsys.c diff -Nru mesa-18.3.3/src/gallium/winsys/imx/drm/meson.build mesa-19.0.1/src/gallium/winsys/imx/drm/meson.build --- mesa-18.3.3/src/gallium/winsys/imx/drm/meson.build 2017-12-13 19:58:47.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/imx/drm/meson.build 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ -# Copyright © 2017 Intel Corporation - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -libimxdrm = static_library( - 'imxdrm', - 'imx_drm_winsys.c', - include_directories : [ - inc_include, inc_src, inc_gallium, inc_gallium_aux, - include_directories('../..'), - ], -) - -driver_imx = declare_dependency( - compile_args : '-DGALLIUM_IMX', - link_with : libimxdrm, -) diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/Android.mk mesa-19.0.1/src/gallium/winsys/kmsro/drm/Android.mk --- mesa-18.3.3/src/gallium/winsys/kmsro/drm/Android.mk 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/Android.mk 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,33 @@ +# Copyright (C) 2014 Emil Velikov +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +LOCAL_PATH := $(call my-dir) + +# get C_SOURCES +include $(LOCAL_PATH)/Makefile.sources + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(C_SOURCES) + +LOCAL_MODULE := libmesa_winsys_kmsro + +include $(GALLIUM_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/kmsro_drm_public.h mesa-19.0.1/src/gallium/winsys/kmsro/drm/kmsro_drm_public.h --- mesa-18.3.3/src/gallium/winsys/kmsro/drm/kmsro_drm_public.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/kmsro_drm_public.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2016 Christian Gmeiner + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Christian Gmeiner + */ + +#ifndef __KMSRO_DRM_PUBLIC_H__ +#define __KMSRO_DRM_PUBLIC_H__ + +struct pipe_screen; + +struct pipe_screen *kmsro_drm_screen_create(int fd); + +#endif /* __KMSRO_DRM_PUBLIC_H__ */ diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c mesa-19.0.1/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c --- mesa-18.3.3/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/kmsro_drm_winsys.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2016 Christian Gmeiner + * Copyright (C) 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "kmsro_drm_public.h" +#include "vc4/drm/vc4_drm_public.h" +#include "etnaviv/drm/etnaviv_drm_public.h" +#include "freedreno/drm/freedreno_drm_public.h" +#include "xf86drm.h" + +#include "pipe/p_screen.h" +#include "renderonly/renderonly.h" + +struct pipe_screen *kmsro_drm_screen_create(int fd) +{ + struct pipe_screen *screen = NULL; + struct renderonly ro = { + .kms_fd = fd, + .gpu_fd = -1, + }; + +#if defined(GALLIUM_VC4) + ro.gpu_fd = drmOpenWithType("vc4", NULL, DRM_NODE_RENDER); + if (ro.gpu_fd >= 0) { + /* Passes the vc4-allocated BO through to the KMS-only DRM device using + * PRIME buffer sharing. The VC4 BO must be linear, which the SCANOUT + * flag on allocation will have ensured. + */ + ro.create_for_resource = renderonly_create_gpu_import_for_resource, + screen = vc4_drm_screen_create_renderonly(&ro); + if (!screen) + close(ro.gpu_fd); + + return screen; + } +#endif + +#if defined(GALLIUM_ETNAVIV) + ro.gpu_fd = drmOpenWithType("etnaviv", NULL, DRM_NODE_RENDER); + if (ro.gpu_fd >= 0) { + ro.create_for_resource = renderonly_create_kms_dumb_buffer_for_resource, + screen = etna_drm_screen_create_renderonly(&ro); + if (!screen) + close(ro.gpu_fd); + + return screen; + } +#endif + +#if defined(GALLIUM_FREEDRENO) + ro.gpu_fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER); + if (ro.gpu_fd >= 0) { + ro.create_for_resource = renderonly_create_kms_dumb_buffer_for_resource, + screen = fd_drm_screen_create(ro.gpu_fd, &ro); + if (!screen) + close(ro.gpu_fd); + + return screen; + } +#endif + + return screen; +} diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/kmsro/drm/Makefile.am --- mesa-18.3.3/src/gallium/winsys/kmsro/drm/Makefile.am 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,48 @@ +# Copyright © 2012 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(GALLIUM_WINSYS_CFLAGS) \ + $(LIBDRM_CFLAGS) + +if HAVE_GALLIUM_ETNAVIV +AM_CFLAGS += -DGALLIUM_ETNAVIV +endif + +if HAVE_GALLIUM_VC4 +AM_CFLAGS += -DGALLIUM_VC4 +endif + +if HAVE_GALLIUM_FREEDRENO +AM_CFLAGS += -DGALLIUM_FREEDRENO +endif + +noinst_LTLIBRARIES = libkmsrodrm.la + +libkmsrodrm_la_SOURCES = $(C_SOURCES) + +EXTRA_DIST = meson.build diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/Makefile.sources mesa-19.0.1/src/gallium/winsys/kmsro/drm/Makefile.sources --- mesa-18.3.3/src/gallium/winsys/kmsro/drm/Makefile.sources 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,3 @@ +C_SOURCES := \ + kmsro_drm_public.h \ + kmsro_drm_winsys.c diff -Nru mesa-18.3.3/src/gallium/winsys/kmsro/drm/meson.build mesa-19.0.1/src/gallium/winsys/kmsro/drm/meson.build --- mesa-18.3.3/src/gallium/winsys/kmsro/drm/meson.build 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/kmsro/drm/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,46 @@ +# Copyright © 2017 Broadcom +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +kmsro_c_args = [] +if with_gallium_etnaviv + kmsro_c_args += '-DGALLIUM_ETNAVIV' +endif +if with_gallium_vc4 + kmsro_c_args += '-DGALLIUM_VC4' +endif +if with_gallium_freedreno + kmsro_c_args += '-DGALLIUM_FREEDRENO' +endif + +libkmsrowinsys = static_library( + 'kmsrowinsys', + files('kmsro_drm_winsys.c'), + include_directories : [ + inc_src, inc_include, + inc_gallium, inc_gallium_aux, inc_gallium_winsys, + ], + c_args : [c_vis_args, kmsro_c_args], + dependencies: dep_libdrm, +) + +driver_kmsro = declare_dependency( + compile_args : '-DGALLIUM_KMSRO', + link_with : libkmsrowinsys, +) diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/Android.mk mesa-19.0.1/src/gallium/winsys/pl111/drm/Android.mk --- mesa-18.3.3/src/gallium/winsys/pl111/drm/Android.mk 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/pl111/drm/Android.mk 1970-01-01 00:00:00.000000000 +0000 @@ -1,33 +0,0 @@ -# Copyright (C) 2014 Emil Velikov -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -LOCAL_PATH := $(call my-dir) - -# get C_SOURCES -include $(LOCAL_PATH)/Makefile.sources - -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := $(C_SOURCES) - -LOCAL_MODULE := libmesa_winsys_pl111 - -include $(GALLIUM_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/Makefile.am mesa-19.0.1/src/gallium/winsys/pl111/drm/Makefile.am --- mesa-18.3.3/src/gallium/winsys/pl111/drm/Makefile.am 2018-01-24 16:24:53.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/pl111/drm/Makefile.am 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -# Copyright © 2012 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -include Makefile.sources -include $(top_srcdir)/src/gallium/Automake.inc - -AM_CFLAGS = \ - -I$(top_srcdir)/src/gallium/drivers \ - -I$(top_srcdir)/src/gallium/winsys \ - $(GALLIUM_WINSYS_CFLAGS) \ - $(LIBDRM_CFLAGS) - -noinst_LTLIBRARIES = libpl111drm.la - -libpl111drm_la_SOURCES = $(C_SOURCES) - -EXTRA_DIST = meson.build diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/Makefile.sources mesa-19.0.1/src/gallium/winsys/pl111/drm/Makefile.sources --- mesa-18.3.3/src/gallium/winsys/pl111/drm/Makefile.sources 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/pl111/drm/Makefile.sources 1970-01-01 00:00:00.000000000 +0000 @@ -1,3 +0,0 @@ -C_SOURCES := \ - pl111_drm_public.h \ - pl111_drm_winsys.c diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/meson.build mesa-19.0.1/src/gallium/winsys/pl111/drm/meson.build --- mesa-18.3.3/src/gallium/winsys/pl111/drm/meson.build 2017-12-13 19:58:47.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/pl111/drm/meson.build 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -# Copyright © 2017 Broadcom -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -libpl111winsys = static_library( - 'pl111winsys', - files('pl111_drm_winsys.c'), - include_directories : [ - inc_src, inc_include, - inc_gallium, inc_gallium_aux, inc_gallium_winsys, - ], - c_args : [c_vis_args], - dependencies: dep_libdrm, - link_with : libvc4winsys, -) - -driver_pl111 = declare_dependency( - compile_args : '-DGALLIUM_PL111', - link_with : libpl111winsys, -) diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/pl111_drm_public.h mesa-19.0.1/src/gallium/winsys/pl111/drm/pl111_drm_public.h --- mesa-18.3.3/src/gallium/winsys/pl111/drm/pl111_drm_public.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/pl111/drm/pl111_drm_public.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2016 Christian Gmeiner - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Christian Gmeiner - */ - -#ifndef __PL111_DRM_PUBLIC_H__ -#define __PL111_DRM_PUBLIC_H__ - -struct pipe_screen; - -struct pipe_screen *pl111_drm_screen_create(int fd); - -#endif /* __PL111_DRM_PUBLIC_H__ */ diff -Nru mesa-18.3.3/src/gallium/winsys/pl111/drm/pl111_drm_winsys.c mesa-19.0.1/src/gallium/winsys/pl111/drm/pl111_drm_winsys.c --- mesa-18.3.3/src/gallium/winsys/pl111/drm/pl111_drm_winsys.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/pl111/drm/pl111_drm_winsys.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -/* - * Copyright (C) 2016 Christian Gmeiner - * Copyright (C) 2017 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include - -#include "pl111_drm_public.h" -#include "vc4/drm/vc4_drm_public.h" -#include "xf86drm.h" - -#include "pipe/p_screen.h" -#include "renderonly/renderonly.h" - -struct pipe_screen *pl111_drm_screen_create(int fd) -{ - struct renderonly ro = { - /* Passes the vc4-allocated BO through to the pl111 DRM device using - * PRIME buffer sharing. The VC4 BO must be linear, which the SCANOUT - * flag on allocation will have ensured. - */ - .create_for_resource = renderonly_create_gpu_import_for_resource, - .kms_fd = fd, - .gpu_fd = drmOpenWithType("vc4", NULL, DRM_NODE_RENDER), - }; - - if (ro.gpu_fd < 0) - return NULL; - - struct pipe_screen *screen = vc4_drm_screen_create_renderonly(&ro); - if (!screen) - close(ro.gpu_fd); - - return screen; -} diff -Nru mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_bo.c mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_bo.c --- mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 2019-03-31 23:16:37.000000000 +0000 @@ -1134,6 +1134,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, struct winsys_handle *whandle, + unsigned vm_alignment, unsigned *stride, unsigned *offset) { @@ -1239,7 +1240,7 @@ if (ws->info.r600_has_virtual_memory && !bo->va) { struct drm_radeon_gem_va va; - bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20); + bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment); va.handle = bo->handle; va.operation = RADEON_VA_MAP; diff -Nru mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_cs.c mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_cs.c --- mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 2019-03-31 23:16:37.000000000 +0000 @@ -150,7 +150,8 @@ enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), - void *flush_ctx) + void *flush_ctx, + bool stop_exec_on_failure) { struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx; struct radeon_drm_cs *cs; diff -Nru mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c --- mesa-18.3.3/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 2019-03-31 23:16:37.000000000 +0000 @@ -589,6 +589,7 @@ /* 2D tiling on CIK is supported since DRM 2.35.0 */ ws->info.has_2d_tiling = ws->info.chip_class <= SI || ws->info.drm_minor >= 35; ws->info.has_read_registers_query = ws->info.drm_minor >= 42; + ws->info.max_alignment = 1024*1024; ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL; diff -Nru mesa-18.3.3/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c mesa-19.0.1/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c --- mesa-18.3.3/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c 2019-03-31 23:16:37.000000000 +0000 @@ -396,6 +396,7 @@ { struct xlib_displaytarget *xlib_dt; unsigned nblocksy, size; + int ignore; xlib_dt = CALLOC_STRUCT(xlib_displaytarget); if (!xlib_dt) @@ -410,7 +411,8 @@ xlib_dt->stride = align(util_format_get_stride(format, width), alignment); size = xlib_dt->stride * nblocksy; - if (!debug_get_option_xlib_no_shm()) { + if (!debug_get_option_xlib_no_shm() && + XQueryExtension(xlib_dt->display, "MIT-SHM", &ignore, &ignore, &ignore)) { xlib_dt->data = alloc_shm(xlib_dt, size); if (xlib_dt->data) { xlib_dt->shm = True; diff -Nru mesa-18.3.3/src/gallium/winsys/v3d/drm/v3d_drm_public.h mesa-19.0.1/src/gallium/winsys/v3d/drm/v3d_drm_public.h --- mesa-18.3.3/src/gallium/winsys/v3d/drm/v3d_drm_public.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/v3d/drm/v3d_drm_public.h 2019-03-31 23:16:37.000000000 +0000 @@ -25,7 +25,9 @@ #define __VC5_DRM_PUBLIC_H__ struct pipe_screen; +struct renderonly; struct pipe_screen *v3d_drm_screen_create(int drmFD); +struct pipe_screen *v3d_drm_screen_create_renderonly(struct renderonly *ro); #endif /* __VC5_DRM_PUBLIC_H__ */ diff -Nru mesa-18.3.3/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c mesa-19.0.1/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c --- mesa-18.3.3/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/v3d/drm/v3d_drm_winsys.c 2019-03-31 23:16:37.000000000 +0000 @@ -31,5 +31,11 @@ struct pipe_screen * v3d_drm_screen_create(int fd) { - return v3d_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3)); + return v3d_screen_create(fcntl(fd, F_DUPFD_CLOEXEC, 3), NULL); +} + +struct pipe_screen * +v3d_drm_screen_create_renderonly(struct renderonly *ro) +{ + return v3d_screen_create(ro->gpu_fd, ro); } diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c mesa-19.0.1/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c --- mesa-18.3.3/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c 2019-03-31 23:16:37.000000000 +0000 @@ -38,11 +38,17 @@ #include "virgl/virgl_public.h" #include +#include #include "virtgpu_drm.h" #include "virgl_drm_winsys.h" #include "virgl_drm_public.h" + +#define VIRGL_DRM_VERSION(major, minor) ((major) << 16 | (minor)) +#define VIRGL_DRM_VERSION_FENCE_FD VIRGL_DRM_VERSION(1, 0) + + static inline boolean can_cache_resource(struct virgl_hw_res *res) { return res->cacheable == TRUE; @@ -70,6 +76,9 @@ if (res->ptr) os_munmap(res->ptr, res->size); + if (res->fence_fd != -1) + close(res->fence_fd); + memset(&args, 0, sizeof(args)); args.handle = res->bo_handle; drmIoctl(qdws->fd, DRM_IOCTL_GEM_CLOSE, &args); @@ -222,6 +231,7 @@ res->stride = stride; pipe_reference_init(&res->reference, 1); res->num_cs_references = 0; + res->fence_fd = -1; return res; } @@ -457,6 +467,7 @@ res->stride = info_arg.stride; pipe_reference_init(&res->reference, 1); res->num_cs_references = 0; + res->fence_fd = -1; util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)handle, res); @@ -577,6 +588,7 @@ } cbuf->base.buf = cbuf->buf; + cbuf->base.in_fence_fd = -1; return &cbuf->base; } @@ -687,7 +699,8 @@ } static int virgl_drm_winsys_submit_cmd(struct virgl_winsys *qws, - struct virgl_cmd_buf *_cbuf) + struct virgl_cmd_buf *_cbuf, + int in_fence_fd, int *out_fence_fd) { struct virgl_drm_winsys *qdws = virgl_drm_winsys(qws); struct virgl_drm_cmd_buf *cbuf = virgl_drm_cmd_buf(_cbuf); @@ -702,12 +715,24 @@ eb.size = cbuf->base.cdw * 4; eb.num_bo_handles = cbuf->cres; eb.bo_handles = (unsigned long)(void *)cbuf->res_hlist; + eb.fence_fd = -1; + + if (in_fence_fd != -1) { + eb.flags |= VIRTGPU_EXECBUF_FENCE_FD_IN; + eb.fence_fd = in_fence_fd; + } + + if (out_fence_fd != NULL) + eb.flags |= VIRTGPU_EXECBUF_FENCE_FD_OUT; ret = drmIoctl(qdws->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &eb); if (ret == -1) fprintf(stderr,"got error from kernel - expect bad rendering %d\n", errno); cbuf->base.cdw = 0; + if (out_fence_fd != NULL) + *out_fence_fd = eb.fence_fd; + virgl_drm_release_all_res(qdws, cbuf); memset(cbuf->is_handle_added, 0, sizeof(cbuf->is_handle_added)); @@ -759,7 +784,7 @@ } static struct pipe_fence_handle * -virgl_cs_create_fence(struct virgl_winsys *vws) +virgl_cs_create_fence(struct virgl_winsys *vws, int fd) { struct virgl_hw_res *res; @@ -769,6 +794,7 @@ VIRGL_BIND_CUSTOM, 8, 1, 1, 0, 0, 0, 8); + res->fence_fd = fd; return (struct pipe_fence_handle *)res; } @@ -793,6 +819,12 @@ return TRUE; } virgl_drm_resource_wait(vws, res); + + if (res->fence_fd != -1) { + int ret = sync_wait(res->fence_fd, timeout / 1000000); + return ret == 0; + } + return TRUE; } @@ -805,11 +837,51 @@ virgl_hw_res(src)); } +static void virgl_fence_server_sync(struct virgl_winsys *vws, + struct virgl_cmd_buf *cbuf, + struct pipe_fence_handle *fence) +{ + struct virgl_hw_res *hw_res = virgl_hw_res(fence); + + /* if not an external fence, then nothing more to do without preemption: */ + if (hw_res->fence_fd == -1) + return; + + sync_accumulate("virgl", &cbuf->in_fence_fd, hw_res->fence_fd); +} + +static int virgl_fence_get_fd(struct virgl_winsys *vws, + struct pipe_fence_handle *fence) +{ + struct virgl_hw_res *hw_res = virgl_hw_res(fence); + + return dup(hw_res->fence_fd); +} + +static int virgl_drm_get_version(int fd) +{ + int ret; + drmVersionPtr version; + + version = drmGetVersion(fd); + + if (!version) + ret = -EFAULT; + else if (version->version_major != 0) + ret = -EINVAL; + else + ret = version->version_minor; + + drmFreeVersion(version); + + return ret; +} static struct virgl_winsys * virgl_drm_winsys_create(int drmFD) { struct virgl_drm_winsys *qdws; + int drm_version; int ret; int gl = 0; struct drm_virtgpu_getparam getparam = {0}; @@ -820,6 +892,10 @@ if (ret < 0 || !gl) return NULL; + drm_version = virgl_drm_get_version(drmFD); + if (drm_version < 0) + return NULL; + qdws = CALLOC_STRUCT(virgl_drm_winsys); if (!qdws) return NULL; @@ -851,6 +927,9 @@ qdws->base.cs_create_fence = virgl_cs_create_fence; qdws->base.fence_wait = virgl_fence_wait; qdws->base.fence_reference = virgl_fence_reference; + qdws->base.fence_server_sync = virgl_fence_server_sync; + qdws->base.fence_get_fd = virgl_fence_get_fd; + qdws->base.supports_fences = drm_version >= VIRGL_DRM_VERSION_FENCE_FD; qdws->base.get_caps = virgl_drm_get_caps; diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h mesa-19.0.1/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h --- mesa-18.3.3/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h 2019-03-31 23:16:37.000000000 +0000 @@ -50,6 +50,7 @@ int64_t start, end; boolean flinked; uint32_t flink; + int fence_fd; }; struct virgl_drm_winsys diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/drm/virtgpu_drm.h mesa-19.0.1/src/gallium/winsys/virgl/drm/virtgpu_drm.h --- mesa-18.3.3/src/gallium/winsys/virgl/drm/virtgpu_drm.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/virgl/drm/virtgpu_drm.h 2019-03-31 23:16:37.000000000 +0000 @@ -44,6 +44,16 @@ #define DRM_VIRTGPU_WAIT 0x08 #define DRM_VIRTGPU_GET_CAPS 0x09 +/* + * virtgpu execbuffer flags + */ +#define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 +#define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 +#define VIRTGPU_EXECBUF_FLAGS (\ + VIRTGPU_EXECBUF_FENCE_FD_IN |\ + VIRTGPU_EXECBUF_FENCE_FD_OUT |\ + 0) + struct drm_virtgpu_map { uint64_t offset; /* use for mmap system call */ uint32_t handle; @@ -56,7 +66,7 @@ uint64_t command; /* void* */ uint64_t bo_handles; uint32_t num_bo_handles; - uint32_t pad; + int32_t fence_fd; }; #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ @@ -130,7 +140,7 @@ DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) #define DRM_IOCTL_VIRTGPU_EXECBUFFER \ - DRM_IOW(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\ struct drm_virtgpu_execbuffer) #define DRM_IOCTL_VIRTGPU_GETPARAM \ diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c mesa-19.0.1/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c --- mesa-18.3.3/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c 2019-03-31 23:16:37.000000000 +0000 @@ -30,8 +30,6 @@ #include #include -/* connect to remote socket */ -#define VTEST_SOCKET_NAME "/tmp/.virgl_test" #include "virgl_vtest_winsys.h" #include "virgl_vtest_public.h" @@ -163,7 +161,7 @@ memset(&un, 0, sizeof(un)); un.sun_family = AF_UNIX; - snprintf(un.sun_path, sizeof(un.sun_path), "%s", VTEST_SOCKET_NAME); + snprintf(un.sun_path, sizeof(un.sun_path), "%s", VTEST_DEFAULT_SOCKET_NAME); do { ret = 0; diff -Nru mesa-18.3.3/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c mesa-19.0.1/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c --- mesa-18.3.3/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c 2019-03-31 23:16:37.000000000 +0000 @@ -344,7 +344,7 @@ struct virgl_hw_res *res, *curr_res; struct list_head *curr, *next; int64_t now; - int ret; + int ret = -1; /* only store binds for vertex/index/const buffers */ if (bind != VIRGL_BIND_CONSTANT_BUFFER && bind != VIRGL_BIND_INDEX_BUFFER && @@ -427,6 +427,7 @@ } cbuf->ws = vws; cbuf->base.buf = cbuf->buf; + cbuf->base.in_fence_fd = -1; return &cbuf->base; } @@ -501,7 +502,8 @@ } static int virgl_vtest_winsys_submit_cmd(struct virgl_winsys *vws, - struct virgl_cmd_buf *_cbuf) + struct virgl_cmd_buf *_cbuf, + int in_fence_fd, int *out_fence_fd) { struct virgl_vtest_winsys *vtws = virgl_vtest_winsys(vws); struct virgl_vtest_cmd_buf *cbuf = virgl_vtest_cmd_buf(_cbuf); @@ -510,6 +512,9 @@ if (cbuf->base.cdw == 0) return 0; + assert(in_fence_fd == -1); + assert(out_fence_fd == NULL); + ret = virgl_vtest_submit_cmd(vtws, cbuf); virgl_vtest_release_all_res(vtws, cbuf); @@ -552,7 +557,7 @@ } static struct pipe_fence_handle * -virgl_cs_create_fence(struct virgl_winsys *vws) +virgl_cs_create_fence(struct virgl_winsys *vws, int fd) { struct virgl_hw_res *res; @@ -694,6 +699,7 @@ vtws->base.cs_create_fence = virgl_cs_create_fence; vtws->base.fence_wait = virgl_fence_wait; vtws->base.fence_reference = virgl_fence_reference; + vtws->base.supports_fences = 0; vtws->base.flush_frontbuffer = virgl_vtest_flush_frontbuffer; diff -Nru mesa-18.3.3/src/gbm/backends/dri/gbm_dri.c mesa-19.0.1/src/gbm/backends/dri/gbm_dri.c --- mesa-18.3.3/src/gbm/backends/dri/gbm_dri.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gbm/backends/dri/gbm_dri.c 2019-03-31 23:16:37.000000000 +0000 @@ -304,28 +304,6 @@ static const __DRIextension ** dri_open_driver(struct gbm_dri_device *dri) { - const __DRIextension **extensions = NULL; - char path[PATH_MAX], *search_paths, *p, *next, *end; - char *get_extensions_name; - - search_paths = NULL; - /* don't allow setuid apps to use LIBGL_DRIVERS_PATH or GBM_DRIVERS_PATH */ - if (geteuid() == getuid()) { - /* Read GBM_DRIVERS_PATH first for compatibility, but LIBGL_DRIVERS_PATH - * is recommended over GBM_DRIVERS_PATH. - */ - search_paths = getenv("GBM_DRIVERS_PATH"); - - /* Read LIBGL_DRIVERS_PATH if GBM_DRIVERS_PATH was not set. - * LIBGL_DRIVERS_PATH is recommended over GBM_DRIVERS_PATH. - */ - if (search_paths == NULL) { - search_paths = getenv("LIBGL_DRIVERS_PATH"); - } - } - if (search_paths == NULL) - search_paths = DEFAULT_DRIVER_DIR; - /* Temporarily work around dri driver libs that need symbols in libglapi * but don't automatically link it in. */ @@ -334,56 +312,18 @@ */ dlopen("libglapi.so.0", RTLD_LAZY | RTLD_GLOBAL); - dri->driver = NULL; - end = search_paths + strlen(search_paths); - for (p = search_paths; p < end && dri->driver == NULL; p = next + 1) { - int len; - next = strchr(p, ':'); - if (next == NULL) - next = end; - - len = next - p; -#if GLX_USE_TLS - snprintf(path, sizeof path, - "%.*s/tls/%s_dri.so", len, p, dri->driver_name); - dri->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL); -#endif - if (dri->driver == NULL) { - snprintf(path, sizeof path, - "%.*s/%s_dri.so", len, p, dri->driver_name); - dri->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL); - } - /* not need continue to loop all paths once the driver is found */ - if (dri->driver != NULL) - break; - } - - if (dri->driver == NULL) { - fprintf(stderr, "gbm: failed to open any driver (search paths %s)\n", - search_paths); - fprintf(stderr, "gbm: Last dlopen error: %s\n", dlerror()); - return NULL; - } - - get_extensions_name = loader_get_extensions_name(dri->driver_name); - if (get_extensions_name) { - const __DRIextension **(*get_extensions)(void); - - get_extensions = dlsym(dri->driver, get_extensions_name); - free(get_extensions_name); - - if (get_extensions) - extensions = get_extensions(); - } - - if (!extensions) - extensions = dlsym(dri->driver, __DRI_DRIVER_EXTENSIONS); - if (extensions == NULL) { - fprintf(stderr, "gbm: driver exports no extensions (%s)", dlerror()); - dlclose(dri->driver); - } - - return extensions; + static const char *search_path_vars[] = { + /* Read GBM_DRIVERS_PATH first for compatibility, but LIBGL_DRIVERS_PATH + * is recommended over GBM_DRIVERS_PATH. + */ + "GBM_DRIVERS_PATH", + /* Read LIBGL_DRIVERS_PATH if GBM_DRIVERS_PATH was not set. + * LIBGL_DRIVERS_PATH is recommended over GBM_DRIVERS_PATH. + */ + "LIBGL_DRIVERS_PATH", + NULL + }; + return loader_open_driver(dri->driver_name, &dri->driver, search_path_vars); } static int @@ -594,22 +534,6 @@ }, }; -/* The two GBM_BO_FORMAT_[XA]RGB8888 formats alias the GBM_FORMAT_* - * formats of the same name. We want to accept them whenever someone - * has a GBM format, but never return them to the user. */ -static int -gbm_format_canonicalize(uint32_t gbm_format) -{ - switch (gbm_format) { - case GBM_BO_FORMAT_XRGB8888: - return GBM_FORMAT_XRGB8888; - case GBM_BO_FORMAT_ARGB8888: - return GBM_FORMAT_ARGB8888; - default: - return gbm_format; - } -} - static int gbm_format_to_dri_format(uint32_t gbm_format) { diff -Nru mesa-18.3.3/src/gbm/gbm-symbols-check mesa-19.0.1/src/gbm/gbm-symbols-check --- mesa-18.3.3/src/gbm/gbm-symbols-check 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/gbm/gbm-symbols-check 2019-03-31 23:16:37.000000000 +0000 @@ -38,6 +38,7 @@ gbm_bo_set_user_data gbm_bo_get_user_data gbm_bo_destroy +gbm_format_get_name gbm_surface_create gbm_surface_create_with_modifiers gbm_surface_lock_front_buffer diff -Nru mesa-18.3.3/src/gbm/main/gbm.c mesa-19.0.1/src/gbm/main/gbm.c --- mesa-18.3.3/src/gbm/main/gbm.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gbm/main/gbm.c 2019-03-31 23:16:37.000000000 +0000 @@ -448,14 +448,14 @@ * \param gbm The gbm device returned from gbm_create_device() * \param width The width for the buffer * \param height The height for the buffer - * \param format The format to use for the buffer + * \param format The format to use for the buffer, from GBM_FORMAT_* or + * GBM_BO_FORMAT_* tokens * \param usage The union of the usage flags for this buffer * * \return A newly allocated buffer that should be freed with gbm_bo_destroy() * when no longer needed. If an error occurs during allocation %NULL will be * returned and errno set. * - * \sa enum gbm_bo_format for the list of formats * \sa enum gbm_bo_flags for the list of usage flags */ GBM_EXPORT struct gbm_bo * @@ -695,3 +695,39 @@ { return surf->gbm->surface_has_free_buffers(surf); } + +/* The two GBM_BO_FORMAT_[XA]RGB8888 formats alias the GBM_FORMAT_* + * formats of the same name. We want to accept them whenever someone + * has a GBM format, but never return them to the user. */ +uint32_t +gbm_format_canonicalize(uint32_t gbm_format) +{ + switch (gbm_format) { + case GBM_BO_FORMAT_XRGB8888: + return GBM_FORMAT_XRGB8888; + case GBM_BO_FORMAT_ARGB8888: + return GBM_FORMAT_ARGB8888; + default: + return gbm_format; + } +} + +/** + * Returns a string representing the fourcc format name. + * + * \param desc Caller-provided storage for the format name string. + * \return String containing the fourcc of the format. + */ +GBM_EXPORT char * +gbm_format_get_name(uint32_t gbm_format, struct gbm_format_name_desc *desc) +{ + gbm_format = gbm_format_canonicalize(gbm_format); + + desc->name[0] = gbm_format; + desc->name[1] = gbm_format >> 8; + desc->name[2] = gbm_format >> 16; + desc->name[3] = gbm_format >> 24; + desc->name[4] = 0; + + return desc->name; +} diff -Nru mesa-18.3.3/src/gbm/main/gbm.h mesa-19.0.1/src/gbm/main/gbm.h --- mesa-18.3.3/src/gbm/main/gbm.h 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/gbm/main/gbm.h 2019-03-31 23:16:37.000000000 +0000 @@ -190,6 +190,9 @@ #define GBM_FORMAT_YUV444 __gbm_fourcc_code('Y', 'U', '2', '4') /* non-subsampled Cb (1) and Cr (2) planes */ #define GBM_FORMAT_YVU444 __gbm_fourcc_code('Y', 'V', '2', '4') /* non-subsampled Cr (1) and Cb (2) planes */ +struct gbm_format_name_desc { + char name[5]; +}; /** * Flags to indicate the intended use for the buffer - these are passed into @@ -399,6 +402,9 @@ void gbm_surface_destroy(struct gbm_surface *surface); +char * +gbm_format_get_name(uint32_t gbm_format, struct gbm_format_name_desc *desc); + #ifdef __cplusplus } #endif diff -Nru mesa-18.3.3/src/gbm/main/gbmint.h mesa-19.0.1/src/gbm/main/gbmint.h --- mesa-18.3.3/src/gbm/main/gbmint.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/gbm/main/gbmint.h 2019-03-31 23:16:37.000000000 +0000 @@ -133,4 +133,7 @@ struct gbm_device *(*create_device)(int fd); }; +uint32_t +gbm_format_canonicalize(uint32_t gbm_format); + #endif diff -Nru mesa-18.3.3/src/gbm/Makefile.am mesa-19.0.1/src/gbm/Makefile.am --- mesa-18.3.3/src/gbm/Makefile.am 2018-01-24 16:24:53.000000000 +0000 +++ mesa-19.0.1/src/gbm/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -42,7 +42,6 @@ $(gbm_dri_FILES) AM_CFLAGS += \ - -DDEFAULT_DRIVER_DIR='"$(DRI_DRIVER_SEARCH_DIR)"' \ $(LIBDRM_CFLAGS) \ $(PTHREADSTUBS_CFLAGS) diff -Nru mesa-18.3.3/src/gbm/meson.build mesa-19.0.1/src/gbm/meson.build --- mesa-18.3.3/src/gbm/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/gbm/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -37,7 +37,6 @@ if with_dri2 files_gbm += files('backends/dri/gbm_dri.c', 'backends/dri/gbm_driint.h') deps_gbm += dep_libdrm # TODO: pthread-stubs - args_gbm += '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path) endif if with_platform_wayland deps_gbm += dep_wayland_server @@ -72,6 +71,7 @@ 'gbm-symbols-check', find_program('gbm-symbols-check'), env : env_test, - args : libgbm + args : libgbm, + suite : ['gbm'], ) endif diff -Nru mesa-18.3.3/src/glx/dri2_glx.c mesa-19.0.1/src/glx/dri2_glx.c --- mesa-18.3.3/src/glx/dri2_glx.c 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/src/glx/dri2_glx.c 2019-03-31 23:16:37.000000000 +0000 @@ -1252,13 +1252,7 @@ driverName = loader_driverName; } - psc->driver = driOpenDriver(driverName); - if (psc->driver == NULL) { - ErrorMessageF("driver pointer missing\n"); - goto handle_error; - } - - extensions = driGetDriverExtensions(psc->driver, driverName); + extensions = driOpenDriver(driverName, &psc->driver); if (extensions == NULL) goto handle_error; diff -Nru mesa-18.3.3/src/glx/dri3_glx.c mesa-19.0.1/src/glx/dri3_glx.c --- mesa-18.3.3/src/glx/dri3_glx.c 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/glx/dri3_glx.c 2019-03-31 23:16:37.000000000 +0000 @@ -861,13 +861,7 @@ goto handle_error; } - psc->driver = driOpenDriver(driverName); - if (psc->driver == NULL) { - ErrorMessageF("driver pointer missing\n"); - goto handle_error; - } - - extensions = driGetDriverExtensions(psc->driver, driverName); + extensions = driOpenDriver(driverName, &psc->driver); if (extensions == NULL) goto handle_error; diff -Nru mesa-18.3.3/src/glx/dri_common.c mesa-19.0.1/src/glx/dri_common.c --- mesa-18.3.3/src/glx/dri_common.c 2017-12-02 01:35:56.000000000 +0000 +++ mesa-19.0.1/src/glx/dri_common.c 2019-03-31 23:16:37.000000000 +0000 @@ -77,11 +77,6 @@ #define GL_LIB_NAME "libGL.so.1" #endif -#ifndef DEFAULT_DRIVER_DIR -/* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */ -#define DEFAULT_DRIVER_DIR "/usr/local/lib/dri" -#endif - /** * Try to \c dlopen the named driver. * @@ -90,97 +85,32 @@ * order to find the driver. * * \param driverName - a name like "i965", "radeon", "nouveau", etc. + * \param out_driver_handle - Address to return the resulting dlopen() handle. * * \returns - * A handle from \c dlopen, or \c NULL if driver file not found. + * The __DRIextension entrypoint table for the driver, or \c NULL if driver + * file not found. */ -_X_HIDDEN void * -driOpenDriver(const char *driverName) +_X_HIDDEN const __DRIextension ** +driOpenDriver(const char *driverName, void **out_driver_handle) { - void *glhandle, *handle; - const char *libPaths, *p, *next; - char realDriverName[200]; - int len; + void *glhandle; /* Attempt to make sure libGL symbols will be visible to the driver */ glhandle = dlopen(GL_LIB_NAME, RTLD_NOW | RTLD_GLOBAL); - libPaths = NULL; - if (geteuid() == getuid()) { - /* don't allow setuid apps to use LIBGL_DRIVERS_PATH */ - libPaths = getenv("LIBGL_DRIVERS_PATH"); - if (!libPaths) - libPaths = getenv("LIBGL_DRIVERS_DIR"); /* deprecated */ - } - if (libPaths == NULL) - libPaths = DEFAULT_DRIVER_DIR; - - handle = NULL; - for (p = libPaths; *p; p = next) { - next = strchr(p, ':'); - if (next == NULL) { - len = strlen(p); - next = p + len; - } - else { - len = next - p; - next++; - } - -#ifdef GLX_USE_TLS - snprintf(realDriverName, sizeof realDriverName, - "%.*s/tls/%s_dri.so", len, p, driverName); - InfoMessageF("OpenDriver: trying %s\n", realDriverName); - handle = dlopen(realDriverName, RTLD_NOW | RTLD_GLOBAL); -#endif - - if (handle == NULL) { - snprintf(realDriverName, sizeof realDriverName, - "%.*s/%s_dri.so", len, p, driverName); - InfoMessageF("OpenDriver: trying %s\n", realDriverName); - handle = dlopen(realDriverName, RTLD_NOW | RTLD_GLOBAL); - } - - if (handle != NULL) - break; - else - InfoMessageF("dlopen %s failed (%s)\n", realDriverName, dlerror()); - } + static const char *search_path_vars[] = { + "LIBGL_DRIVERS_PATH", + "LIBGL_DRIVERS_DIR", /* deprecated */ + NULL + }; - if (!handle) - ErrorMessageF("unable to load driver: %s_dri.so\n", driverName); + const __DRIextension **extensions = + loader_open_driver(driverName, out_driver_handle, search_path_vars); if (glhandle) dlclose(glhandle); - return handle; -} - -_X_HIDDEN const __DRIextension ** -driGetDriverExtensions(void *handle, const char *driver_name) -{ - const __DRIextension **extensions = NULL; - const __DRIextension **(*get_extensions)(void); - char *get_extensions_name = loader_get_extensions_name(driver_name); - - if (get_extensions_name) { - get_extensions = dlsym(handle, get_extensions_name); - if (get_extensions) { - free(get_extensions_name); - return get_extensions(); - } else { - InfoMessageF("driver does not expose %s(): %s\n", - get_extensions_name, dlerror()); - free(get_extensions_name); - } - } - - extensions = dlsym(handle, __DRI_DRIVER_EXTENSIONS); - if (extensions == NULL) { - ErrorMessageF("driver exports no extensions (%s)\n", dlerror()); - return NULL; - } - return extensions; } diff -Nru mesa-18.3.3/src/glx/dri_common.h mesa-19.0.1/src/glx/dri_common.h --- mesa-18.3.3/src/glx/dri_common.h 2017-11-14 18:46:21.000000000 +0000 +++ mesa-19.0.1/src/glx/dri_common.h 2019-03-31 23:16:37.000000000 +0000 @@ -69,10 +69,8 @@ #define ErrorMessageF(...) dri_message(_LOADER_WARNING, __VA_ARGS__) #define CriticalErrorMessageF(...) dri_message(_LOADER_FATAL, __VA_ARGS__) -extern void *driOpenDriver(const char *driverName); - -extern const __DRIextension ** -driGetDriverExtensions(void *handle, const char *driver_name); +extern const __DRIextension **driOpenDriver(const char *driverName, + void **out_driver_handle); extern bool dri2_convert_glx_attribs(unsigned num_attribs, const uint32_t *attribs, diff -Nru mesa-18.3.3/src/glx/dri_glx.c mesa-19.0.1/src/glx/dri_glx.c --- mesa-18.3.3/src/glx/dri_glx.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/glx/dri_glx.c 2019-03-31 23:16:37.000000000 +0000 @@ -199,15 +199,9 @@ static char * get_driver_config(const char *driverName) { - void *handle = driOpenDriver(driverName); - const __DRIextension **extensions; - - if (!handle) - return NULL; - + void *handle; char *config = NULL; - - extensions = driGetDriverExtensions(handle, driverName); + const __DRIextension **extensions = driOpenDriver(driverName, &handle); if (extensions) { for (int i = 0; extensions[i]; i++) { if (strcmp(extensions[i]->name, __DRI_CONFIG_OPTIONS) != 0) @@ -918,11 +912,7 @@ goto cleanup; } - psc->driver = driOpenDriver(driverName); - if (psc->driver == NULL) - goto cleanup; - - extensions = dlsym(psc->driver, __DRI_DRIVER_EXTENSIONS); + extensions = driOpenDriver(driverName, &psc->driver); if (extensions == NULL) { ErrorMessageF("driver exports no extensions (%s)\n", dlerror()); goto cleanup; diff -Nru mesa-18.3.3/src/glx/drisw_glx.c mesa-19.0.1/src/glx/drisw_glx.c --- mesa-18.3.3/src/glx/drisw_glx.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/glx/drisw_glx.c 2019-03-31 23:16:37.000000000 +0000 @@ -147,6 +147,9 @@ if (pdp->ximage) XDestroyImage(pdp->ximage); + if (pdp->shminfo.shmid > 0) + XShmDetach(dpy, &pdp->shminfo); + free(pdp->visinfo); XFreeGC(dpy, pdp->gc); @@ -764,17 +767,6 @@ #define SWRAST_DRIVER_NAME "swrast" -static void * -driOpenSwrast(void) -{ - void *driver = NULL; - - if (driver == NULL) - driver = driOpenDriver(SWRAST_DRIVER_NAME); - - return driver; -} - static const struct glx_screen_vtable drisw_screen_vtable = { .create_context = drisw_create_context, .create_context_attribs = drisw_create_context_attribs, @@ -853,11 +845,7 @@ return NULL; } - psc->driver = driOpenSwrast(); - if (psc->driver == NULL) - goto handle_error; - - extensions = driGetDriverExtensions(psc->driver, SWRAST_DRIVER_NAME); + extensions = driOpenDriver(SWRAST_DRIVER_NAME, &psc->driver); if (extensions == NULL) goto handle_error; diff -Nru mesa-18.3.3/src/glx/glxcmds.c mesa-19.0.1/src/glx/glxcmds.c --- mesa-18.3.3/src/glx/glxcmds.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/glx/glxcmds.c 2019-03-31 23:16:37.000000000 +0000 @@ -272,6 +272,44 @@ return True; } +/** + * Determine if a context uses direct rendering. + * + * \param dpy Display where the context was created. + * \param contextID ID of the context to be tested. + * \param error Out parameter, set to True on error if not NULL + * + * \returns \c True if the context is direct rendering or not. + */ +static Bool +__glXIsDirect(Display * dpy, GLXContextID contextID, Bool *error) +{ + CARD8 opcode; + xcb_connection_t *c; + xcb_generic_error_t *err; + xcb_glx_is_direct_reply_t *reply; + Bool is_direct; + + opcode = __glXSetupForCommand(dpy); + if (!opcode) { + return False; + } + + c = XGetXCBConnection(dpy); + reply = xcb_glx_is_direct_reply(c, xcb_glx_is_direct(c, contextID), &err); + is_direct = (reply != NULL && reply->is_direct) ? True : False; + + if (err != NULL) { + if (error) + *error = True; + __glXSendErrorForXcb(dpy, err); + free(err); + } + + free(reply); + + return is_direct; +} /** * Create a new context. @@ -376,6 +414,21 @@ gc->share_xid = shareList ? shareList->xid : None; gc->imported = GL_FALSE; + /* Unlike most X resource creation requests, we're about to return a handle + * with client-side state, not just an XID. To simplify error handling + * elsewhere in libGL, force a round-trip here to ensure the CreateContext + * request above succeeded. + */ + { + Bool error = False; + int isDirect = __glXIsDirect(dpy, gc->xid, &error); + + if (error != False || isDirect != gc->isDirect) { + gc->vtable->destroy(gc); + gc = NULL; + } + } + return (GLXContext) gc; } @@ -613,42 +666,6 @@ /** - * Determine if a context uses direct rendering. - * - * \param dpy Display where the context was created. - * \param contextID ID of the context to be tested. - * - * \returns \c True if the context is direct rendering or not. - */ -static Bool -__glXIsDirect(Display * dpy, GLXContextID contextID) -{ - CARD8 opcode; - xcb_connection_t *c; - xcb_generic_error_t *err; - xcb_glx_is_direct_reply_t *reply; - Bool is_direct; - - opcode = __glXSetupForCommand(dpy); - if (!opcode) { - return False; - } - - c = XGetXCBConnection(dpy); - reply = xcb_glx_is_direct_reply(c, xcb_glx_is_direct(c, contextID), &err); - is_direct = (reply != NULL && reply->is_direct) ? True : False; - - if (err != NULL) { - __glXSendErrorForXcb(dpy, err); - free(err); - } - - free(reply); - - return is_direct; -} - -/** * \todo * Shouldn't this function \b always return \c False when * \c GLX_DIRECT_RENDERING is not defined? Do we really need to bother with @@ -668,7 +685,7 @@ #ifdef GLX_USE_APPLEGL /* TODO: indirect on darwin */ return False; #else - return __glXIsDirect(dpy, gc->xid); + return __glXIsDirect(dpy, gc->xid, NULL); #endif } @@ -1428,7 +1445,7 @@ return NULL; } - if (__glXIsDirect(dpy, contextID)) + if (__glXIsDirect(dpy, contextID, NULL)) return NULL; opcode = __glXSetupForCommand(dpy); diff -Nru mesa-18.3.3/src/glx/Makefile.am mesa-19.0.1/src/glx/Makefile.am --- mesa-18.3.3/src/glx/Makefile.am 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/glx/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -35,7 +35,6 @@ -I$(top_srcdir)/src/mapi/glapi \ $(VISIBILITY_CFLAGS) \ -D_REENTRANT \ - -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \ $(DEFINES) \ $(LIBDRM_CFLAGS) \ $(DRI2PROTO_CFLAGS) \ diff -Nru mesa-18.3.3/src/glx/meson.build mesa-19.0.1/src/glx/meson.build --- mesa-18.3.3/src/glx/meson.build 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/glx/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -134,7 +134,6 @@ gl_lib_cargs = [ '-D_REENTRANT', - '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path), ] libglx = static_library( diff -Nru mesa-18.3.3/src/glx/SConscript mesa-19.0.1/src/glx/SConscript --- mesa-18.3.3/src/glx/SConscript 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/glx/SConscript 2019-03-31 23:16:37.000000000 +0000 @@ -24,7 +24,6 @@ env.Append(CPPDEFINES = [ '_REENTRANT', - #('DEFAULT_DRIVER_DIR', 'DRI_DRIVER_SEARCH_DIR') ]) env.Prepend(LIBS = [ diff -Nru mesa-18.3.3/src/glx/tests/meson.build mesa-19.0.1/src/glx/tests/meson.build --- mesa-18.3.3/src/glx/tests/meson.build 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/glx/tests/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -50,5 +50,6 @@ ], dependencies : [dep_libdrm, dep_glproto, dep_thread, idep_gtest] ), + suite : ['glx'], ) endif diff -Nru mesa-18.3.3/src/intel/Android.isl.mk mesa-19.0.1/src/intel/Android.isl.mk --- mesa-18.3.3/src/intel/Android.isl.mk 2018-02-16 12:24:09.000000000 +0000 +++ mesa-19.0.1/src/intel/Android.isl.mk 2019-03-31 23:16:37.000000000 +0000 @@ -199,6 +199,47 @@ include $(BUILD_STATIC_LIBRARY) # --------------------------------------- +# Build libmesa_isl_tiled_memcpy +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_isl_tiled_memcpy + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa + +LOCAL_SRC_FILES := $(ISL_TILED_MEMCPY_FILES) + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + +# --------------------------------------- +# Build libmesa_isl_tiled_memcpy_sse41 +# --------------------------------------- + +ifeq ($(ARCH_X86_HAVE_SSE4_1),true) +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_isl_tiled_memcpy_sse41 + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa + +LOCAL_SRC_FILES := $(ISL_TILED_MEMCPY_SSE41_FILES) + +LOCAL_CFLAGS += \ + -DUSE_SSE41 -msse4.1 -mstackrealign + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) +endif + +# --------------------------------------- # Build libmesa_isl # --------------------------------------- @@ -227,7 +268,15 @@ libmesa_isl_gen9 \ libmesa_isl_gen10 \ libmesa_isl_gen11 \ - libmesa_genxml + libmesa_genxml \ + libmesa_isl_tiled_memcpy + +ifeq ($(ARCH_X86_HAVE_SSE4_1),true) +LOCAL_CFLAGS += \ + -DUSE_SSE41 +LOCAL_WHOLE_STATIC_LIBRARIES += \ + libmesa_isl_tiled_memcpy_sse41 +endif # Autogenerated sources diff -Nru mesa-18.3.3/src/intel/Android.vulkan.mk mesa-19.0.1/src/intel/Android.vulkan.mk --- mesa-18.3.3/src/intel/Android.vulkan.mk 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/Android.vulkan.mk 2019-03-31 23:16:37.000000000 +0000 @@ -23,9 +23,10 @@ include $(CLEAR_VARS) include $(LOCAL_PATH)/Makefile.sources -VK_ENTRYPOINTS_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/vulkan/anv_entrypoints_gen.py - -VK_EXTENSIONS_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/vulkan/anv_extensions_gen.py +ANV_ENTRYPOINTS_GEN_SCRIPT := $(LOCAL_PATH)/vulkan/anv_entrypoints_gen.py +ANV_EXTENSIONS_GEN_SCRIPT := $(LOCAL_PATH)/vulkan/anv_extensions_gen.py +ANV_EXTENSIONS_SCRIPT := $(LOCAL_PATH)/vulkan/anv_extensions.py +VULKAN_API_XML := $(MESA_TOP)/src/vulkan/registry/vk.xml VULKAN_COMMON_INCLUDES := \ $(MESA_TOP)/include \ @@ -38,6 +39,7 @@ $(MESA_TOP)/src/intel \ $(MESA_TOP)/include/drm-uapi \ $(MESA_TOP)/src/intel/vulkan \ + $(MESA_TOP)/src/compiler \ frameworks/native/vulkan/include # libmesa_anv_entrypoints with header and dummy.c @@ -64,10 +66,13 @@ @echo "Gen Dummy: $(PRIVATE_MODULE) <= $(notdir $(@))" $(hide) touch $@ -$(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c - $(VK_ENTRYPOINTS_SCRIPT) \ +$(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c \ + $(ANV_ENTRYPOINTS_GEN_SCRIPT) \ + $(ANV_EXTENSIONS_SCRIPT) \ + $(VULKAN_API_XML) + $(MESA_PYTHON2) $(ANV_ENTRYPOINTS_GEN_SCRIPT) \ --outdir $(dir $@) \ - --xml $(MESA_TOP)/src/vulkan/registry/vk.xml + --xml $(VULKAN_API_XML) LOCAL_EXPORT_C_INCLUDE_DIRS := \ $(intermediates) @@ -241,22 +246,28 @@ LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.c LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.h -$(intermediates)/vulkan/anv_entrypoints.c: +$(intermediates)/vulkan/anv_entrypoints.c: $(ANV_ENTRYPOINTS_GEN_SCRIPT) \ + $(ANV_EXTENSIONS_SCRIPT) \ + $(VULKAN_API_XML) @mkdir -p $(dir $@) - $(VK_ENTRYPOINTS_SCRIPT) \ - --xml $(MESA_TOP)/src/vulkan/registry/vk.xml \ + $(MESA_PYTHON2) $(ANV_ENTRYPOINTS_GEN_SCRIPT) \ + --xml $(VULKAN_API_XML) \ --outdir $(dir $@) -$(intermediates)/vulkan/anv_extensions.c: +$(intermediates)/vulkan/anv_extensions.c: $(ANV_EXTENSIONS_GEN_SCRIPT) \ + $(ANV_EXTENSIONS_SCRIPT) \ + $(VULKAN_API_XML) @mkdir -p $(dir $@) - $(VK_EXTENSIONS_SCRIPT) \ - --xml $(MESA_TOP)/src/vulkan/registry/vk.xml \ + $(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \ + --xml $(VULKAN_API_XML) \ --out-c $@ -$(intermediates)/vulkan/anv_extensions.h: +$(intermediates)/vulkan/anv_extensions.h: $(ANV_EXTENSIONS_GEN_SCRIPT) \ + $(ANV_EXTENSIONS_SCRIPT) \ + $(VULKAN_API_XML) @mkdir -p $(dir $@) - $(VK_EXTENSIONS_SCRIPT) \ - --xml $(MESA_TOP)/src/vulkan/registry/vk.xml \ + $(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \ + --xml $(VULKAN_API_XML) \ --out-h $@ LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) @@ -308,7 +319,7 @@ libmesa_intel_compiler \ libmesa_anv_entrypoints -LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libz libsync liblog +LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libexpat libz libsync liblog include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff -Nru mesa-18.3.3/src/intel/blorp/blorp_blit.c mesa-19.0.1/src/intel/blorp/blorp_blit.c --- mesa-18.3.3/src/intel/blorp/blorp_blit.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/blorp/blorp_blit.c 2019-03-31 23:16:37.000000000 +0000 @@ -588,10 +588,11 @@ } static nir_ssa_def * -blorp_nir_manual_blend_average(nir_builder *b, struct brw_blorp_blit_vars *v, - nir_ssa_def *pos, unsigned tex_samples, - enum isl_aux_usage tex_aux_usage, - nir_alu_type dst_type) +blorp_nir_combine_samples(nir_builder *b, struct brw_blorp_blit_vars *v, + nir_ssa_def *pos, unsigned tex_samples, + enum isl_aux_usage tex_aux_usage, + nir_alu_type dst_type, + enum blorp_filter filter) { /* If non-null, this is the outer-most if statement */ nir_if *outer_if = NULL; @@ -603,6 +604,35 @@ if (tex_aux_usage == ISL_AUX_USAGE_MCS) mcs = blorp_blit_txf_ms_mcs(b, v, pos); + nir_op combine_op; + switch (filter) { + case BLORP_FILTER_AVERAGE: + assert(dst_type == nir_type_float); + combine_op = nir_op_fadd; + break; + + case BLORP_FILTER_MIN_SAMPLE: + switch (dst_type) { + case nir_type_int: combine_op = nir_op_imin; break; + case nir_type_uint: combine_op = nir_op_umin; break; + case nir_type_float: combine_op = nir_op_fmin; break; + default: unreachable("Invalid dst_type"); + } + break; + + case BLORP_FILTER_MAX_SAMPLE: + switch (dst_type) { + case nir_type_int: combine_op = nir_op_imax; break; + case nir_type_uint: combine_op = nir_op_umax; break; + case nir_type_float: combine_op = nir_op_fmax; break; + default: unreachable("Invalid dst_type"); + } + break; + + default: + unreachable("Invalid filter"); + } + /* We add together samples using a binary tree structure, e.g. for 4x MSAA: * * result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4 @@ -689,18 +719,22 @@ assert(stack_depth >= 2); --stack_depth; - assert(dst_type == nir_type_float); texture_data[stack_depth - 1] = - nir_fadd(b, texture_data[stack_depth - 1], - texture_data[stack_depth]); + nir_build_alu(b, combine_op, + texture_data[stack_depth - 1], + texture_data[stack_depth], + NULL, NULL); } } /* We should have just 1 sample on the stack now. */ assert(stack_depth == 1); - texture_data[0] = nir_fmul(b, texture_data[0], - nir_imm_float(b, 1.0 / tex_samples)); + if (filter == BLORP_FILTER_AVERAGE) { + assert(dst_type == nir_type_float); + texture_data[0] = nir_fmul(b, texture_data[0], + nir_imm_float(b, 1.0 / tex_samples)); + } nir_store_var(b, color, texture_data[0], 0xf); @@ -1351,6 +1385,8 @@ break; case BLORP_FILTER_AVERAGE: + case BLORP_FILTER_MIN_SAMPLE: + case BLORP_FILTER_MAX_SAMPLE: assert(!key->src_tiled_w); assert(key->tex_samples == key->src_samples); assert(key->tex_layout == key->src_layout); @@ -1369,15 +1405,17 @@ * to multiply our X and Y coordinates each by 2 and then add 1. */ assert(key->src_coords_normalized); + assert(key->filter == BLORP_FILTER_AVERAGE); src_pos = nir_fadd(&b, nir_i2f32(&b, src_pos), nir_imm_float(&b, 0.5f)); color = blorp_nir_tex(&b, &v, key, src_pos); } else { /* Gen7+ hardware doesn't automaticaly blend. */ - color = blorp_nir_manual_blend_average(&b, &v, src_pos, key->src_samples, - key->tex_aux_usage, - key->texture_data_type); + color = blorp_nir_combine_samples(&b, &v, src_pos, key->src_samples, + key->tex_aux_usage, + key->texture_data_type, + key->filter); } break; @@ -1428,11 +1466,13 @@ } static bool -brw_blorp_get_blit_kernel(struct blorp_context *blorp, +brw_blorp_get_blit_kernel(struct blorp_batch *batch, struct blorp_params *params, const struct brw_blorp_blit_prog_key *prog_key) { - if (blorp->lookup_shader(blorp, prog_key, sizeof(*prog_key), + struct blorp_context *blorp = batch->blorp; + + if (blorp->lookup_shader(batch, prog_key, sizeof(*prog_key), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) return true; @@ -1455,7 +1495,7 @@ &prog_data); bool result = - blorp->upload_shader(blorp, prog_key, sizeof(*prog_key), + blorp->upload_shader(batch, prog_key, sizeof(*prog_key), program, prog_data.base.program_size, &prog_data.base, sizeof(prog_data), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); @@ -1518,6 +1558,9 @@ { bool ok UNUSED; + /* It would be insane to try and do this on a compressed surface */ + assert(info->aux_usage == ISL_AUX_USAGE_NONE); + /* Just bail if we have nothing to do. */ if (info->surf.dim == ISL_SURF_DIM_2D && info->view.base_level == 0 && info->view.base_array_layer == 0 && @@ -2037,10 +2080,10 @@ /* For some texture types, we need to pass the layer through the sampler. */ params->wm_inputs.src_z = params->src.z_offset; - if (!brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key)) + if (!brw_blorp_get_blit_kernel(batch, params, wm_prog_key)) return 0; - if (!blorp_ensure_sf_program(batch->blorp, params)) + if (!blorp_ensure_sf_program(batch, params)) return 0; unsigned result = 0; diff -Nru mesa-18.3.3/src/intel/blorp/blorp.c mesa-19.0.1/src/intel/blorp/blorp.c --- mesa-18.3.3/src/intel/blorp/blorp.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/blorp/blorp.c 2019-03-31 23:16:37.000000000 +0000 @@ -247,9 +247,10 @@ }; bool -blorp_ensure_sf_program(struct blorp_context *blorp, +blorp_ensure_sf_program(struct blorp_batch *batch, struct blorp_params *params) { + struct blorp_context *blorp = batch->blorp; const struct brw_wm_prog_data *wm_prog_data = params->wm_prog_data; assert(params->wm_prog_data); @@ -276,7 +277,7 @@ memcpy(key.key.interp_mode, wm_prog_data->interp_mode, sizeof(key.key.interp_mode)); - if (blorp->lookup_shader(blorp, &key, sizeof(key), + if (blorp->lookup_shader(batch, &key, sizeof(key), ¶ms->sf_prog_kernel, ¶ms->sf_prog_data)) return true; @@ -293,7 +294,7 @@ &prog_data_tmp, &vue_map, &program_size); bool result = - blorp->upload_shader(blorp, &key, sizeof(key), program, program_size, + blorp->upload_shader(batch, &key, sizeof(key), program, program_size, (void *)&prog_data_tmp, sizeof(prog_data_tmp), ¶ms->sf_prog_kernel, ¶ms->sf_prog_data); diff -Nru mesa-18.3.3/src/intel/blorp/blorp_clear.c mesa-19.0.1/src/intel/blorp/blorp_clear.c --- mesa-18.3.3/src/intel/blorp/blorp_clear.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/blorp/blorp_clear.c 2019-03-31 23:16:37.000000000 +0000 @@ -43,18 +43,20 @@ }; static bool -blorp_params_get_clear_kernel(struct blorp_context *blorp, +blorp_params_get_clear_kernel(struct blorp_batch *batch, struct blorp_params *params, bool use_replicated_data, bool clear_rgb_as_red) { + struct blorp_context *blorp = batch->blorp; + const struct brw_blorp_const_color_prog_key blorp_key = { .shader_type = BLORP_SHADER_TYPE_CLEAR, .use_simd16_replicated_data = use_replicated_data, .clear_rgb_as_red = clear_rgb_as_red, }; - if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key), + if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) return true; @@ -104,7 +106,7 @@ &prog_data); bool result = - blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key), + blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key), program, prog_data.base.program_size, &prog_data.base, sizeof(prog_data), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); @@ -126,9 +128,10 @@ * vertex shader. */ static bool -blorp_params_get_layer_offset_vs(struct blorp_context *blorp, +blorp_params_get_layer_offset_vs(struct blorp_batch *batch, struct blorp_params *params) { + struct blorp_context *blorp = batch->blorp; struct layer_offset_vs_key blorp_key = { .shader_type = BLORP_SHADER_TYPE_LAYER_OFFSET_VS, }; @@ -136,7 +139,7 @@ if (params->wm_prog_data) blorp_key.num_inputs = params->wm_prog_data->num_varying_inputs; - if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key), + if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), ¶ms->vs_prog_kernel, ¶ms->vs_prog_data)) return true; @@ -194,7 +197,7 @@ blorp_compile_vs(blorp, mem_ctx, b.shader, &vs_prog_data); bool result = - blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key), + blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key), program, vs_prog_data.base.base.program_size, &vs_prog_data.base.base, sizeof(vs_prog_data), ¶ms->vs_prog_kernel, ¶ms->vs_prog_data); @@ -351,7 +354,7 @@ get_fast_clear_rect(batch->blorp->isl_dev, surf->aux_surf, ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true, false)) + if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) return; brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, level, @@ -453,12 +456,12 @@ } } - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, + if (!blorp_params_get_clear_kernel(batch, ¶ms, use_simd16_replicated_data, clear_rgb_as_red)) return; - if (!blorp_ensure_sf_program(batch->blorp, ¶ms)) + if (!blorp_ensure_sf_program(batch, ¶ms)) return; while (num_layers > 0) { @@ -589,7 +592,7 @@ * we disable statistics in 3DSTATE_WM. Give it the usual clear shader * to work around the issue. */ - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, false, false)) + if (!blorp_params_get_clear_kernel(batch, ¶ms, false, false)) return; } @@ -829,7 +832,7 @@ * is tiled or not, we have to assume it may be linear. This means no * SIMD16_REPDATA for us. :-( */ - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, false, false)) + if (!blorp_params_get_clear_kernel(batch, ¶ms, false, false)) return; } @@ -847,7 +850,7 @@ params.stencil_ref = stencil_value; } - if (!blorp_params_get_layer_offset_vs(batch->blorp, ¶ms)) + if (!blorp_params_get_layer_offset_vs(batch, ¶ms)) return; params.vs_inputs.base_layer = start_layer; @@ -914,7 +917,7 @@ * color" message. */ - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true, false)) + if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) return; batch->blorp->exec(batch, ¶ms); @@ -936,9 +939,10 @@ }; static bool -blorp_params_get_mcs_partial_resolve_kernel(struct blorp_context *blorp, +blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch, struct blorp_params *params) { + struct blorp_context *blorp = batch->blorp; const struct blorp_mcs_partial_resolve_key blorp_key = { .shader_type = BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE, .indirect_clear_color = params->dst.clear_color_addr.buffer != NULL, @@ -946,7 +950,7 @@ .num_samples = params->num_samples, }; - if (blorp->lookup_shader(blorp, &blorp_key, sizeof(blorp_key), + if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) return true; @@ -1002,7 +1006,7 @@ &prog_data); bool result = - blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key), + blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key), program, prog_data.base.program_size, &prog_data.base, sizeof(prog_data), ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); @@ -1039,7 +1043,7 @@ memcpy(¶ms.wm_inputs.clear_color, surf->clear_color.f32, sizeof(float) * 4); - if (!blorp_params_get_mcs_partial_resolve_kernel(batch->blorp, ¶ms)) + if (!blorp_params_get_mcs_partial_resolve_kernel(batch, ¶ms)) return; batch->blorp->exec(batch, ¶ms); @@ -1192,7 +1196,7 @@ memset(¶ms.wm_inputs.clear_color, 0, sizeof(params.wm_inputs.clear_color)); - if (!blorp_params_get_clear_kernel(batch->blorp, ¶ms, true, false)) + if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) return; batch->blorp->exec(batch, ¶ms); diff -Nru mesa-18.3.3/src/intel/blorp/blorp_genX_exec.h mesa-19.0.1/src/intel/blorp/blorp_genX_exec.h --- mesa-18.3.3/src/intel/blorp/blorp_genX_exec.h 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/blorp/blorp_genX_exec.h 2019-03-31 23:16:37.000000000 +0000 @@ -82,6 +82,10 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, struct blorp_address address, uint32_t delta); +static uint64_t +blorp_get_surface_address(struct blorp_batch *batch, + struct blorp_address address); + #if GEN_GEN >= 7 && GEN_GEN < 10 static struct blorp_address blorp_get_surface_base_address(struct blorp_batch *batch); @@ -311,7 +315,7 @@ vb[idx].BufferPitch = stride; #if GEN_GEN >= 6 - vb[idx].VertexBufferMOCS = addr.mocs; + vb[idx].MOCS = addr.mocs; #endif #if GEN_GEN >= 7 @@ -347,13 +351,13 @@ blorp_emit_input_varying_data(batch, params, &addrs[1], &size); blorp_fill_vertex_buffer_state(batch, vb, 1, addrs[1], size, 0); + blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, num_vbs); + const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length); uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords); if (!dw) return; - blorp_vf_invalidate_for_vb_48b_transitions(batch, addrs, num_vbs); - for (unsigned i = 0; i < num_vbs; i++) { GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]); dw += GENX(VERTEX_BUFFER_STATE_length); @@ -1363,6 +1367,13 @@ isl_surf_fill_state(batch->blorp->isl_dev, state, .surf = &surf, .view = &surface->view, .aux_surf = &surface->aux_surf, .aux_usage = aux_usage, + .address = + blorp_get_surface_address(batch, surface->addr), + .aux_address = aux_usage == ISL_AUX_USAGE_NONE ? 0 : + blorp_get_surface_address(batch, surface->aux_addr), + .clear_address = !use_clear_address ? 0 : + blorp_get_surface_address(batch, + surface->clear_color_addr), .mocs = surface->addr.mocs, .clear_color = surface->clear_color, .use_clear_address = use_clear_address, diff -Nru mesa-18.3.3/src/intel/blorp/blorp.h mesa-19.0.1/src/intel/blorp/blorp.h --- mesa-18.3.3/src/intel/blorp/blorp.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/blorp/blorp.h 2019-03-31 23:16:37.000000000 +0000 @@ -45,10 +45,10 @@ const struct brw_compiler *compiler; - bool (*lookup_shader)(struct blorp_context *blorp, + bool (*lookup_shader)(struct blorp_batch *batch, const void *key, uint32_t key_size, uint32_t *kernel_out, void *prog_data_out); - bool (*upload_shader)(struct blorp_context *blorp, + bool (*upload_shader)(struct blorp_batch *batch, const void *key, uint32_t key_size, const void *kernel, uint32_t kernel_size, const struct brw_stage_prog_data *prog_data, @@ -91,8 +91,8 @@ struct blorp_address { void *buffer; + uint64_t offset; unsigned reloc_flags; - uint32_t offset; uint32_t mocs; }; @@ -125,6 +125,8 @@ BLORP_FILTER_BILINEAR, BLORP_FILTER_SAMPLE_0, BLORP_FILTER_AVERAGE, + BLORP_FILTER_MIN_SAMPLE, + BLORP_FILTER_MAX_SAMPLE, }; void diff -Nru mesa-18.3.3/src/intel/blorp/blorp_priv.h mesa-19.0.1/src/intel/blorp/blorp_priv.h --- mesa-18.3.3/src/intel/blorp/blorp_priv.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/blorp/blorp_priv.h 2019-03-31 23:16:37.000000000 +0000 @@ -360,7 +360,7 @@ struct brw_vs_prog_data *vs_prog_data); bool -blorp_ensure_sf_program(struct blorp_context *blorp, +blorp_ensure_sf_program(struct blorp_batch *batch, struct blorp_params *params); /** \} */ diff -Nru mesa-18.3.3/src/intel/common/gen_batch_decoder.c mesa-19.0.1/src/intel/common/gen_batch_decoder.c --- mesa-18.3.3/src/intel/common/gen_batch_decoder.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/common/gen_batch_decoder.c 2019-03-31 23:16:37.000000000 +0000 @@ -24,6 +24,7 @@ #include "common/gen_decoder.h" #include "gen_disasm.h" #include "util/macros.h" +#include "main/macros.h" /* Needed for ROUND_DOWN_TO */ #include @@ -45,6 +46,7 @@ ctx->fp = fp; ctx->flags = flags; ctx->max_vbo_decoded_lines = -1; /* No limit! */ + ctx->engine = I915_ENGINE_CLASS_RENDER; if (xml_path == NULL) ctx->spec = gen_spec_load(devinfo); @@ -168,7 +170,8 @@ uint32_t pitch, int max_lines) { - const uint32_t *dw_end = bo.map + MIN2(bo.size, read_length); + const uint32_t *dw_end = + bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4); int column_count = 0, line_count = -1; for (const uint32_t *dw = bo.map; dw < dw_end; dw++) { @@ -192,10 +195,16 @@ fprintf(ctx->fp, "\n"); } +static struct gen_group * +gen_ctx_find_instruction(struct gen_batch_decode_ctx *ctx, const uint32_t *p) +{ + return gen_spec_find_instruction(ctx->spec, ctx->engine, p); +} + static void handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p) { - struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p); + struct gen_group *inst = gen_ctx_find_instruction(ctx, p); struct gen_field_iterator iter; gen_field_iterator_init(&iter, inst, p, 0, false); @@ -309,7 +318,7 @@ handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx, const uint32_t *p) { - struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p); + struct gen_group *inst = gen_ctx_find_instruction(ctx, p); struct gen_group *desc = gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA"); @@ -373,7 +382,7 @@ handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx *ctx, const uint32_t *p) { - struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p); + struct gen_group *inst = gen_ctx_find_instruction(ctx, p); struct gen_group *vbs = gen_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE"); struct gen_batch_decode_bo vb = {}; @@ -402,7 +411,7 @@ ready = true; } else if (strcmp(vbs_iter.name, "End Address") == 0) { if (vb.map && vbs_iter.raw_value >= vb.addr) - vb_size = vbs_iter.raw_value - vb.addr; + vb_size = (vbs_iter.raw_value + 1) - vb.addr; else vb_size = 0; ready = true; @@ -436,7 +445,7 @@ handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx, const uint32_t *p) { - struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p); + struct gen_group *inst = gen_ctx_find_instruction(ctx, p); struct gen_batch_decode_bo ib = {}; uint32_t ib_size = 0; @@ -486,7 +495,7 @@ static void decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p) { - struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p); + struct gen_group *inst = gen_ctx_find_instruction(ctx, p); uint64_t ksp = 0; bool is_simd8 = false; /* vertex shaders on Gen8+ only */ @@ -528,7 +537,7 @@ static void decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p) { - struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p); + struct gen_group *inst = gen_ctx_find_instruction(ctx, p); uint64_t ksp[3] = {0, 0, 0}; bool enabled[3] = {false, false, false}; @@ -576,7 +585,7 @@ static void decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, const uint32_t *p) { - struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p); + struct gen_group *inst = gen_ctx_find_instruction(ctx, p); struct gen_group *body = gen_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY"); @@ -658,7 +667,7 @@ const char *struct_type, const uint32_t *p, int count) { - struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p); + struct gen_group *inst = gen_ctx_find_instruction(ctx, p); uint32_t state_offset = 0; @@ -802,7 +811,7 @@ struct gen_group *inst; for (p = batch; p < end; p += length) { - inst = gen_spec_find_instruction(ctx->spec, p); + inst = gen_ctx_find_instruction(ctx, p); length = gen_group_get_length(inst, p); assert(inst == NULL || length > 0); length = MAX2(1, length); diff -Nru mesa-18.3.3/src/intel/common/gen_decoder.c mesa-19.0.1/src/intel/common/gen_decoder.c --- mesa-18.3.3/src/intel/common/gen_decoder.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/common/gen_decoder.c 2019-03-31 23:16:37.000000000 +0000 @@ -165,6 +165,9 @@ group->fixed_length = fixed_length; group->dword_length_field = NULL; group->dw_length = 0; + group->engine_mask = I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_RENDER) | + I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_VIDEO) | + I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_COPY); group->bias = 1; for (int i = 0; atts[i]; i += 2) { @@ -173,6 +176,28 @@ group->dw_length = strtoul(atts[i + 1], &p, 0); } else if (strcmp(atts[i], "bias") == 0) { group->bias = strtoul(atts[i + 1], &p, 0); + } else if (strcmp(atts[i], "engine") == 0) { + void *mem_ctx = ralloc_context(NULL); + char *tmp = ralloc_strdup(mem_ctx, atts[i + 1]); + char *save_ptr; + char *tok = strtok_r(tmp, "|", &save_ptr); + + group->engine_mask = 0; + while (tok != NULL) { + if (strcmp(tok, "render") == 0) { + group->engine_mask |= I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_RENDER); + } else if (strcmp(tok, "video") == 0) { + group->engine_mask |= I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_VIDEO); + } else if (strcmp(tok, "blitter") == 0) { + group->engine_mask |= I915_ENGINE_CLASS_TO_MASK(I915_ENGINE_CLASS_COPY); + } else { + fprintf(stderr, "unknown engine class defined for instruction \"%s\": %s\n", name, atts[i + 1]); + } + + tok = strtok_r(NULL, "|", &save_ptr); + } + + ralloc_free(mem_ctx); } } @@ -708,12 +733,15 @@ } struct gen_group * -gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p) +gen_spec_find_instruction(struct gen_spec *spec, + enum drm_i915_gem_engine_class engine, + const uint32_t *p) { hash_table_foreach(spec->commands, entry) { struct gen_group *command = entry->data; uint32_t opcode = *p & command->opcode_mask; - if (opcode == command->opcode) + if ((command->engine_mask & I915_ENGINE_CLASS_TO_MASK(engine)) && + opcode == command->opcode) return command; } diff -Nru mesa-18.3.3/src/intel/common/gen_decoder.h mesa-19.0.1/src/intel/common/gen_decoder.h --- mesa-18.3.3/src/intel/common/gen_decoder.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/common/gen_decoder.h 2019-03-31 23:16:37.000000000 +0000 @@ -30,6 +30,9 @@ #include "dev/gen_device_info.h" #include "util/hash_table.h" +#include "util/bitset.h" + +#include "drm-uapi/i915_drm.h" #ifdef __cplusplus extern "C" { @@ -40,6 +43,8 @@ struct gen_field; union gen_field_value; +#define I915_ENGINE_CLASS_TO_MASK(x) BITSET_BIT(x) + static inline uint32_t gen_make_gen(uint32_t major, uint32_t minor) { return (major << 8) | minor; @@ -51,7 +56,9 @@ const char *path); void gen_spec_destroy(struct gen_spec *spec); uint32_t gen_spec_get_gen(struct gen_spec *spec); -struct gen_group *gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p); +struct gen_group *gen_spec_find_instruction(struct gen_spec *spec, + enum drm_i915_gem_engine_class engine, + const uint32_t *p); struct gen_group *gen_spec_find_register(struct gen_spec *spec, uint32_t offset); struct gen_group *gen_spec_find_register_by_name(struct gen_spec *spec, const char *name); struct gen_enum *gen_spec_find_enum(struct gen_spec *spec, const char *name); @@ -102,6 +109,7 @@ struct gen_field *dword_length_field; /* specific */ uint32_t dw_length; + uint32_t engine_mask; /* specific */ uint32_t bias; /* specific */ uint32_t group_offset, group_count; uint32_t group_size; @@ -227,6 +235,8 @@ uint64_t instruction_base; int max_vbo_decoded_lines; + + enum drm_i915_gem_engine_class engine; }; void gen_batch_decode_ctx_init(struct gen_batch_decode_ctx *ctx, diff -Nru mesa-18.3.3/src/intel/common/gen_l3_config.c mesa-19.0.1/src/intel/common/gen_l3_config.c --- mesa-18.3.3/src/intel/common/gen_l3_config.c 2018-03-26 16:53:06.000000000 +0000 +++ mesa-19.0.1/src/intel/common/gen_l3_config.c 2019-03-31 23:16:37.000000000 +0000 @@ -134,15 +134,15 @@ /** * ICL validated L3 configurations. \sa icl_l3_configs. + * Zeroth entry in below table has been commented out intentionally + * due to known issues with this configuration. Many other entries + * suggested by h/w specification aren't added here because they + * do under allocation of L3 cache with below partitioning. */ static const struct gen_l3_config icl_l3_configs[] = { /* SLM URB ALL DC RO IS C T */ - {{ 0, 64, 64, 0, 0, 0, 0, 0 }}, - {{ 0, 64, 0, 16, 48, 0, 0, 0 }}, - {{ 0, 48, 0, 16, 64, 0, 0, 0 }}, - {{ 0, 32, 0, 0, 96, 0, 0, 0 }}, - {{ 0, 32, 96, 0, 0, 0, 0, 0 }}, - {{ 0, 32, 0, 16, 80, 0, 0, 0 }}, + /*{{ 0, 16, 80, 0, 0, 0, 0, 0 }},*/ + {{ 0, 32, 64, 0, 0, 0, 0, 0 }}, {{ 0 }} }; @@ -309,7 +309,8 @@ get_l3_way_size(const struct gen_device_info *devinfo) { const unsigned way_size_per_bank = - devinfo->gen >= 9 && devinfo->l3_banks == 1 ? 4 : 2; + (devinfo->gen >= 9 && devinfo->l3_banks == 1) || devinfo->gen == 11 ? + 4 : 2; assert(devinfo->l3_banks); return way_size_per_bank * devinfo->l3_banks; diff -Nru mesa-18.3.3/src/intel/common/gen_urb_config.c mesa-19.0.1/src/intel/common/gen_urb_config.c --- mesa-18.3.3/src/intel/common/gen_urb_config.c 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/intel/common/gen_urb_config.c 2019-03-31 23:16:37.000000000 +0000 @@ -195,8 +195,14 @@ } /* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */ - start[0] = push_constant_chunks; - for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) { - start[i] = start[i - 1] + chunks[i - 1]; + int next = push_constant_chunks; + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + if (entries[i]) { + start[i] = next; + next += chunks[i]; + } else { + /* Just put disabled stages at the beginning. */ + start[i] = 0; + } } } diff -Nru mesa-18.3.3/src/intel/compiler/brw_compiler.c mesa-19.0.1/src/intel/compiler/brw_compiler.c --- mesa-18.3.3/src/intel/compiler/brw_compiler.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_compiler.c 2019-03-31 23:16:37.000000000 +0000 @@ -42,6 +42,7 @@ .lower_fdiv = true, \ .lower_flrp64 = true, \ .lower_ldexp = true, \ + .lower_cs_local_id_from_index = true, \ .lower_device_index_to_zero = true, \ .native_integers = true, \ .use_interpolated_input_intrinsics = true, \ diff -Nru mesa-18.3.3/src/intel/compiler/brw_compiler.h mesa-19.0.1/src/intel/compiler/brw_compiler.h --- mesa-18.3.3/src/intel/compiler/brw_compiler.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_compiler.h 2019-03-31 23:16:37.000000000 +0000 @@ -195,6 +195,7 @@ uint32_t y_uv_image_mask; uint32_t yx_xuxv_image_mask; uint32_t xy_uxvx_image_mask; + uint32_t ayuv_image_mask; }; /** @@ -642,19 +643,6 @@ return prog_data->param + old_nr_params; } -static inline void -brw_mark_surface_used(struct brw_stage_prog_data *prog_data, - unsigned surf_index) -{ - /* A binding table index is 8 bits and the top 3 values are reserved for - * special things (stateless and SLM). - */ - assert(surf_index <= 252); - - prog_data->binding_table.size_bytes = - MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4); -} - enum brw_barycentric_mode { BRW_BARYCENTRIC_PERSPECTIVE_PIXEL = 0, BRW_BARYCENTRIC_PERSPECTIVE_CENTROID = 1, @@ -1238,7 +1226,7 @@ void *mem_ctx, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, - const struct nir_shader *shader, + struct nir_shader *shader, int shader_time_index, char **error_str); @@ -1253,7 +1241,7 @@ void *mem_ctx, const struct brw_tcs_prog_key *key, struct brw_tcs_prog_data *prog_data, - const struct nir_shader *nir, + struct nir_shader *nir, int shader_time_index, char **error_str); @@ -1268,7 +1256,7 @@ const struct brw_tes_prog_key *key, const struct brw_vue_map *input_vue_map, struct brw_tes_prog_data *prog_data, - const struct nir_shader *shader, + struct nir_shader *shader, struct gl_program *prog, int shader_time_index, char **error_str); @@ -1283,7 +1271,7 @@ void *mem_ctx, const struct brw_gs_prog_key *key, struct brw_gs_prog_data *prog_data, - const struct nir_shader *shader, + struct nir_shader *shader, struct gl_program *prog, int shader_time_index, char **error_str); @@ -1330,7 +1318,7 @@ void *mem_ctx, const struct brw_wm_prog_key *key, struct brw_wm_prog_data *prog_data, - const struct nir_shader *shader, + struct nir_shader *shader, struct gl_program *prog, int shader_time_index8, int shader_time_index16, diff -Nru mesa-18.3.3/src/intel/compiler/brw_disasm.c mesa-19.0.1/src/intel/compiler/brw_disasm.c --- mesa-18.3.3/src/intel/compiler/brw_disasm.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_disasm.c 2019-03-31 23:16:37.000000000 +0000 @@ -80,6 +80,22 @@ opcode == BRW_OPCODE_XOR; } +static bool +is_send(unsigned opcode) +{ + return opcode == BRW_OPCODE_SEND || + opcode == BRW_OPCODE_SENDC || + opcode == BRW_OPCODE_SENDS || + opcode == BRW_OPCODE_SENDSC; +} + +static bool +is_split_send(UNUSED const struct gen_device_info *devinfo, unsigned opcode) +{ + return opcode == BRW_OPCODE_SENDS || + opcode == BRW_OPCODE_SENDSC; +} + const char *const conditional_modifier[16] = { [BRW_CONDITIONAL_NONE] = "", [BRW_CONDITIONAL_Z] = ".z", @@ -289,7 +305,7 @@ [BRW_SFID_MESSAGE_GATEWAY] = "gateway", [BRW_SFID_URB] = "urb", [BRW_SFID_THREAD_SPAWNER] = "thread_spawner", - [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler", + [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "dp_sampler", [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render", [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const", [GEN7_SFID_DATAPORT_DATA_CACHE] = "data", @@ -713,7 +729,28 @@ unsigned elem_size = brw_reg_type_to_size(type); int err = 0; - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (is_split_send(devinfo, brw_inst_opcode(devinfo, inst))) { + /* These are fixed for split sends */ + type = BRW_REGISTER_TYPE_UD; + elem_size = 4; + if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { + err |= reg(file, brw_inst_send_dst_reg_file(devinfo, inst), + brw_inst_dst_da_reg_nr(devinfo, inst)); + unsigned subreg_nr = brw_inst_dst_da16_subreg_nr(devinfo, inst); + if (subreg_nr) + format(file, ".%u", subreg_nr); + string(file, brw_reg_type_to_letters(type)); + } else { + string(file, "g[a0"); + if (brw_inst_dst_ia_subreg_nr(devinfo, inst)) + format(file, ".%"PRIu64, brw_inst_dst_ia_subreg_nr(devinfo, inst) / + elem_size); + if (brw_inst_send_dst_ia16_addr_imm(devinfo, inst)) + format(file, " %d", brw_inst_send_dst_ia16_addr_imm(devinfo, inst)); + string(file, "]<"); + string(file, brw_reg_type_to_letters(type)); + } + } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { err |= reg(file, brw_inst_dst_reg_file(devinfo, inst), brw_inst_dst_da_reg_nr(devinfo, inst)); @@ -1316,9 +1353,60 @@ } static int +src_sends_da(FILE *file, + const struct gen_device_info *devinfo, + enum brw_reg_type type, + unsigned _reg_nr, + unsigned _reg_subnr) +{ + int err = 0; + + err |= reg(file, BRW_GENERAL_REGISTER_FILE, _reg_nr); + if (err == -1) + return 0; + if (_reg_subnr) + format(file, ".1"); + string(file, brw_reg_type_to_letters(type)); + + return err; +} + +static int +src_sends_ia(FILE *file, + const struct gen_device_info *devinfo, + enum brw_reg_type type, + int _addr_imm, + unsigned _addr_subreg_nr) +{ + string(file, "g[a0"); + if (_addr_subreg_nr) + format(file, ".1"); + if (_addr_imm) + format(file, " %d", _addr_imm); + string(file, "]"); + string(file, brw_reg_type_to_letters(type)); + + return 0; +} + +static int src0(FILE *file, const struct gen_device_info *devinfo, const brw_inst *inst) { - if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { + if (is_split_send(devinfo, brw_inst_opcode(devinfo, inst))) { + if (brw_inst_send_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { + return src_sends_da(file, + devinfo, + BRW_REGISTER_TYPE_UD, + brw_inst_src0_da_reg_nr(devinfo, inst), + brw_inst_src0_da16_subreg_nr(devinfo, inst)); + } else { + return src_sends_ia(file, + devinfo, + BRW_REGISTER_TYPE_UD, + brw_inst_send_src0_ia16_addr_imm(devinfo, inst), + brw_inst_src0_ia_subreg_nr(devinfo, inst)); + } + } else if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { return imm(file, devinfo, brw_inst_src0_type(devinfo, inst), inst); } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { if (brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { @@ -1373,7 +1461,13 @@ static int src1(FILE *file, const struct gen_device_info *devinfo, const brw_inst *inst) { - if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { + if (is_split_send(devinfo, brw_inst_opcode(devinfo, inst))) { + return src_sends_da(file, + devinfo, + BRW_REGISTER_TYPE_UD, + brw_inst_send_src1_reg_nr(devinfo, inst), + 0 /* subreg_nr */); + } else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { return imm(file, devinfo, brw_inst_src1_type(devinfo, inst), inst); } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { if (brw_inst_src1_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { @@ -1485,9 +1579,9 @@ string(file, "("); err |= control(file, "predicate inverse", pred_inv, brw_inst_pred_inv(devinfo, inst), NULL); - format(file, "f%"PRIu64, devinfo->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0); - if (brw_inst_flag_subreg_nr(devinfo, inst)) - format(file, ".%"PRIu64, brw_inst_flag_subreg_nr(devinfo, inst)); + format(file, "f%"PRIu64".%"PRIu64, + devinfo->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0, + brw_inst_flag_subreg_nr(devinfo, inst)); if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { err |= control(file, "predicate control align1", pred_ctrl_align1, brw_inst_pred_control(devinfo, inst), NULL); @@ -1509,7 +1603,7 @@ string(file, " "); err |= control(file, "function", math_function, brw_inst_math_function(devinfo, inst), NULL); - } else if (opcode != BRW_OPCODE_SEND && opcode != BRW_OPCODE_SENDC) { + } else if (!is_send(opcode)) { err |= control(file, "conditional modifier", conditional_modifier, brw_inst_cond_modifier(devinfo, inst), NULL); @@ -1522,10 +1616,9 @@ opcode != BRW_OPCODE_CSEL && opcode != BRW_OPCODE_IF && opcode != BRW_OPCODE_WHILE))) { - format(file, ".f%"PRIu64, - devinfo->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0); - if (brw_inst_flag_subreg_nr(devinfo, inst)) - format(file, ".%"PRIu64, brw_inst_flag_subreg_nr(devinfo, inst)); + format(file, ".f%"PRIu64".%"PRIu64, + devinfo->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0, + brw_inst_flag_subreg_nr(devinfo, inst)); } } @@ -1599,20 +1692,47 @@ } } - if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) { + if (is_send(opcode)) { enum brw_message_target sfid = brw_inst_sfid(devinfo, inst); - if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) { - /* show the indirect descriptor source */ - pad(file, 48); - err |= src1(file, devinfo, inst); + bool has_imm_desc = false, has_imm_ex_desc = false; + uint32_t imm_desc = 0, imm_ex_desc = 0; + if (is_split_send(devinfo, opcode)) { pad(file, 64); + if (brw_inst_send_sel_reg32_desc(devinfo, inst)) { + /* show the indirect descriptor source */ + err |= src_sends_ia(file, devinfo, BRW_REGISTER_TYPE_UD, 0, 0); + } else { + has_imm_desc = true; + imm_desc = brw_inst_send_desc(devinfo, inst); + fprintf(file, "0x%08"PRIx32, imm_desc); + } + + pad(file, 80); + if (brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) { + /* show the indirect descriptor source */ + err |= src_sends_ia(file, devinfo, BRW_REGISTER_TYPE_UD, 0, + brw_inst_send_ex_desc_ia_subreg_nr(devinfo, inst)); + } else { + has_imm_ex_desc = true; + imm_ex_desc = brw_inst_send_ex_desc(devinfo, inst); + fprintf(file, "0x%08"PRIx32, imm_ex_desc); + } } else { - pad(file, 48); - } + if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) { + /* show the indirect descriptor source */ + pad(file, 48); + err |= src1(file, devinfo, inst); + pad(file, 64); + } else { + has_imm_desc = true; + imm_desc = brw_inst_send_desc(devinfo, inst); + pad(file, 48); + } - /* Print message descriptor as immediate source */ - fprintf(file, "0x%08"PRIx64, inst->data[1] >> 32); + /* Print message descriptor as immediate source */ + fprintf(file, "0x%08"PRIx64, inst->data[1] >> 32); + } newline(file); pad(file, 16); @@ -1623,7 +1743,7 @@ sfid, &space); string(file, " MsgDesc:"); - if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) { + if (!has_imm_desc) { format(file, " indirect"); } else { switch (sfid) { @@ -1642,21 +1762,24 @@ case BRW_SFID_SAMPLER: if (devinfo->gen >= 5) { err |= control(file, "sampler message", gen5_sampler_msg_type, - brw_inst_sampler_msg_type(devinfo, inst), &space); + brw_sampler_desc_msg_type(devinfo, imm_desc), + &space); err |= control(file, "sampler simd mode", gen5_sampler_simd_mode, - brw_inst_sampler_simd_mode(devinfo, inst), &space); - format(file, " Surface = %"PRIu64" Sampler = %"PRIu64, - brw_inst_binding_table_index(devinfo, inst), - brw_inst_sampler(devinfo, inst)); + brw_sampler_desc_simd_mode(devinfo, imm_desc), + &space); + format(file, " Surface = %u Sampler = %u", + brw_sampler_desc_binding_table_index(devinfo, imm_desc), + brw_sampler_desc_sampler(devinfo, imm_desc)); } else { - format(file, " (%"PRIu64", %"PRIu64", %"PRIu64", ", - brw_inst_binding_table_index(devinfo, inst), - brw_inst_sampler(devinfo, inst), - brw_inst_sampler_msg_type(devinfo, inst)); + format(file, " (%u, %u, %u, ", + brw_sampler_desc_binding_table_index(devinfo, imm_desc), + brw_sampler_desc_sampler(devinfo, imm_desc), + brw_sampler_desc_msg_type(devinfo, imm_desc)); if (!devinfo->is_g4x) { err |= control(file, "sampler target format", sampler_target_format, - brw_inst_sampler_return_format(devinfo, inst), NULL); + brw_sampler_desc_return_format(devinfo, imm_desc), + NULL); } string(file, ")"); } @@ -1665,29 +1788,31 @@ case GEN6_SFID_DATAPORT_CONSTANT_CACHE: /* aka BRW_SFID_DATAPORT_READ on Gen4-5 */ if (devinfo->gen >= 6) { - format(file, " (%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64")", - brw_inst_binding_table_index(devinfo, inst), - brw_inst_dp_msg_control(devinfo, inst), - brw_inst_dp_msg_type(devinfo, inst), - devinfo->gen >= 7 ? 0 : brw_inst_dp_write_commit(devinfo, inst)); + format(file, " (%u, %u, %u, %u)", + brw_dp_desc_binding_table_index(devinfo, imm_desc), + brw_dp_desc_msg_control(devinfo, imm_desc), + brw_dp_desc_msg_type(devinfo, imm_desc), + devinfo->gen >= 7 ? 0u : + brw_dp_write_desc_write_commit(devinfo, imm_desc)); } else { bool is_965 = devinfo->gen == 4 && !devinfo->is_g4x; err |= control(file, "DP read message type", is_965 ? gen4_dp_read_port_msg_type : g45_dp_read_port_msg_type, - brw_inst_dp_read_msg_type(devinfo, inst), + brw_dp_read_desc_msg_type(devinfo, imm_desc), &space); - format(file, " MsgCtrl = 0x%"PRIx64, - brw_inst_dp_read_msg_control(devinfo, inst)); + format(file, " MsgCtrl = 0x%u", + brw_dp_read_desc_msg_control(devinfo, imm_desc)); - format(file, " Surface = %"PRIu64, brw_inst_binding_table_index(devinfo, inst)); + format(file, " Surface = %u", + brw_dp_desc_binding_table_index(devinfo, imm_desc)); } break; case GEN6_SFID_DATAPORT_RENDER_CACHE: { /* aka BRW_SFID_DATAPORT_WRITE on Gen4-5 */ - unsigned msg_type = brw_inst_dp_write_msg_type(devinfo, inst); + unsigned msg_type = brw_dp_write_desc_msg_type(devinfo, imm_desc); err |= control(file, "DP rc message type", dp_rc_msg_type(devinfo), msg_type, &space); @@ -1701,16 +1826,18 @@ brw_inst_rt_message_type(devinfo, inst), &space); if (devinfo->gen >= 6 && brw_inst_rt_slot_group(devinfo, inst)) string(file, " Hi"); - if (brw_inst_rt_last(devinfo, inst)) + if (brw_dp_write_desc_last_render_target(devinfo, imm_desc)) string(file, " LastRT"); - if (devinfo->gen < 7 && brw_inst_dp_write_commit(devinfo, inst)) + if (devinfo->gen < 7 && + brw_dp_write_desc_write_commit(devinfo, imm_desc)) string(file, " WriteCommit"); } else { - format(file, " MsgCtrl = 0x%"PRIx64, - brw_inst_dp_write_msg_control(devinfo, inst)); + format(file, " MsgCtrl = 0x%u", + brw_dp_write_desc_msg_control(devinfo, imm_desc)); } - format(file, " Surface = %"PRIu64, brw_inst_binding_table_index(devinfo, inst)); + format(file, " Surface = %u", + brw_dp_desc_binding_table_index(devinfo, imm_desc)); break; } @@ -1767,17 +1894,20 @@ err |= control(file, "DP DC0 message type", dp_dc0_msg_type_gen7, - brw_inst_dp_msg_type(devinfo, inst), &space); + brw_dp_desc_msg_type(devinfo, imm_desc), &space); - format(file, ", %"PRIu64", ", brw_inst_binding_table_index(devinfo, inst)); + format(file, ", %u, ", + brw_dp_desc_binding_table_index(devinfo, imm_desc)); switch (brw_inst_dp_msg_type(devinfo, inst)) { case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP: control(file, "atomic op", aop, - brw_inst_imm_ud(devinfo, inst) >> 8 & 0xf, &space); + brw_dp_desc_msg_control(devinfo, imm_desc) & 0xf, + &space); break; default: - format(file, "%"PRIu64, brw_inst_dp_msg_control(devinfo, inst)); + format(file, "%u", + brw_dp_desc_msg_control(devinfo, imm_desc)); } format(file, ")"); break; @@ -1788,14 +1918,14 @@ if (devinfo->gen >= 7) { format(file, " ("); - unsigned msg_ctrl = brw_inst_dp_msg_control(devinfo, inst); + unsigned msg_ctrl = brw_dp_desc_msg_control(devinfo, imm_desc); err |= control(file, "DP DC1 message type", dp_dc1_msg_type_hsw, - brw_inst_dp_msg_type(devinfo, inst), &space); + brw_dp_desc_msg_type(devinfo, imm_desc), &space); - format(file, ", Surface = %"PRIu64", ", - brw_inst_binding_table_index(devinfo, inst)); + format(file, ", Surface = %u, ", + brw_dp_desc_binding_table_index(devinfo, imm_desc)); switch (brw_inst_dp_msg_type(devinfo, inst)) { case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP: @@ -1848,9 +1978,15 @@ if (space) string(file, " "); - format(file, "mlen %"PRIu64, brw_inst_mlen(devinfo, inst)); - format(file, " rlen %"PRIu64, brw_inst_rlen(devinfo, inst)); } + if (has_imm_desc) + format(file, "mlen %u", brw_message_desc_mlen(devinfo, imm_desc)); + if (has_imm_ex_desc) { + format(file, " ex_mlen %u", + brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc)); + } + if (has_imm_desc) + format(file, " rlen %u", brw_message_desc_rlen(devinfo, imm_desc)); } pad(file, 64); if (opcode != BRW_OPCODE_NOP && opcode != BRW_OPCODE_NENOP) { @@ -1893,7 +2029,7 @@ err |= control(file, "acc write control", accwr, brw_inst_acc_wr_control(devinfo, inst), &space); } - if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) + if (is_send(opcode)) err |= control(file, "end of thread", end_of_thread, brw_inst_eot(devinfo, inst), &space); if (space) diff -Nru mesa-18.3.3/src/intel/compiler/brw_eu_defines.h mesa-19.0.1/src/intel/compiler/brw_eu_defines.h --- mesa-18.3.3/src/intel/compiler/brw_eu_defines.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_eu_defines.h 2019-03-31 23:16:37.000000000 +0000 @@ -41,14 +41,14 @@ /* Using the GNU statement expression extension */ #define SET_FIELD(value, field) \ ({ \ - uint32_t fieldval = (value) << field ## _SHIFT; \ + uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT; \ assert((fieldval & ~ field ## _MASK) == 0); \ fieldval & field ## _MASK; \ }) #define SET_BITS(value, high, low) \ ({ \ - const uint32_t fieldval = (value) << (low); \ + const uint32_t fieldval = (uint32_t)(value) << (low); \ assert((fieldval & ~INTEL_MASK(high, low)) == 0); \ fieldval & INTEL_MASK(high, low); \ }) @@ -316,6 +316,13 @@ SHADER_OPCODE_COS, /** + * A generic "send" opcode. The first two sources are the message + * descriptor and extended message descriptor respectively. The third + * and optional fourth sources are the message payload + */ + SHADER_OPCODE_SEND, + + /** * Texture sampling opcodes. * * LOGICAL opcodes are eventually translated to the matching non-LOGICAL @@ -355,6 +362,7 @@ SHADER_OPCODE_SAMPLEINFO_LOGICAL, SHADER_OPCODE_IMAGE_SIZE, + SHADER_OPCODE_IMAGE_SIZE_LOGICAL, /** * Combines multiple sources of size 1 into a larger virtual GRF. @@ -518,13 +526,10 @@ FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4, - FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, FS_OPCODE_DISCARD_JUMP, FS_OPCODE_SET_SAMPLE_ID, FS_OPCODE_PACK_HALF_2x16_SPLIT, - FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, - FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, FS_OPCODE_PLACEHOLDER_HALT, FS_OPCODE_INTERPOLATE_AT_SAMPLE, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, @@ -811,6 +816,8 @@ TEX_LOGICAL_SRC_LOD, /** dPdy if the operation takes explicit derivatives */ TEX_LOGICAL_SRC_LOD2, + /** Min LOD */ + TEX_LOGICAL_SRC_MIN_LOD, /** Sample index */ TEX_LOGICAL_SRC_SAMPLE_INDEX, /** MCS data */ diff -Nru mesa-18.3.3/src/intel/compiler/brw_eu_emit.c mesa-19.0.1/src/intel/compiler/brw_eu_emit.c --- mesa-18.3.3/src/intel/compiler/brw_eu_emit.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_eu_emit.c 2019-03-31 23:16:37.000000000 +0000 @@ -91,51 +91,65 @@ if (dest.file == BRW_MESSAGE_REGISTER_FILE) assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen)); - else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE) + else if (dest.file == BRW_GENERAL_REGISTER_FILE) assert(dest.nr < 128); gen7_convert_mrf_to_grf(p, &dest); - brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); - brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); - - if (dest.address_mode == BRW_ADDRESS_DIRECT) { + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(dest.address_mode == BRW_ADDRESS_DIRECT); + assert(dest.subnr % 16 == 0); + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 && + dest.vstride == dest.width + 1); + assert(!dest.negate && !dest.abs); brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); - - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); - } else { - brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); - brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); - if (dest.file == BRW_GENERAL_REGISTER_FILE || - dest.file == BRW_MESSAGE_REGISTER_FILE) { - assert(dest.writemask != 0); - } - /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: - * Although Dst.HorzStride is a don't care for Align16, HW needs - * this to be programmed as "01". - */ - brw_inst_set_dst_hstride(devinfo, inst, 1); - } + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file); } else { - brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); + brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type); + brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); - /* These are different sizes in align1 vs align16: - */ - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_dst_ia1_addr_imm(devinfo, inst, - dest.indirect_offset); - if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) - dest.hstride = BRW_HORIZONTAL_STRIDE_1; - brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + } else { + brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); + brw_inst_set_da16_writemask(devinfo, inst, dest.writemask); + if (dest.file == BRW_GENERAL_REGISTER_FILE || + dest.file == BRW_MESSAGE_REGISTER_FILE) { + assert(dest.writemask != 0); + } + /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: + * Although Dst.HorzStride is a don't care for Align16, HW needs + * this to be programmed as "01". + */ + brw_inst_set_dst_hstride(devinfo, inst, 1); + } } else { - brw_inst_set_dst_ia16_addr_imm(devinfo, inst, - dest.indirect_offset); - /* even ignored in da16, still need to set as '01' */ - brw_inst_set_dst_hstride(devinfo, inst, 1); + brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); + + /* These are different sizes in align1 vs align16: + */ + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_dst_ia1_addr_imm(devinfo, inst, + dest.indirect_offset); + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); + } else { + brw_inst_set_dst_ia16_addr_imm(devinfo, inst, + dest.indirect_offset); + /* even ignored in da16, still need to set as '01' */ + brw_inst_set_dst_hstride(devinfo, inst, 1); + } } } @@ -170,13 +184,16 @@ if (reg.file == BRW_MESSAGE_REGISTER_FILE) assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen)); - else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) + else if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < 128); gen7_convert_mrf_to_grf(p, ®); - if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { + if (devinfo->gen >= 6 && + (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) { /* Any source modifiers or regions will be ignored, since this just * identifies the MRF/GRF to start reading the message contents from. * Check for some likely failures. @@ -186,84 +203,96 @@ assert(reg.address_mode == BRW_ADDRESS_DIRECT); } - brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); - brw_inst_set_src0_abs(devinfo, inst, reg.abs); - brw_inst_set_src0_negate(devinfo, inst, reg.negate); - brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); - - if (reg.file == BRW_IMMEDIATE_VALUE) { - if (reg.type == BRW_REGISTER_TYPE_DF || - brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM) - brw_inst_set_imm_df(devinfo, inst, reg.df); - else if (reg.type == BRW_REGISTER_TYPE_UQ || - reg.type == BRW_REGISTER_TYPE_Q) - brw_inst_set_imm_uq(devinfo, inst, reg.u64); - else - brw_inst_set_imm_ud(devinfo, inst, reg.ud); + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr % 16 == 0); + assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1); + assert(!reg.negate && !reg.abs); + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } else { + brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type); + brw_inst_set_src0_abs(devinfo, inst, reg.abs); + brw_inst_set_src0_negate(devinfo, inst, reg.negate); + brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + if (reg.type == BRW_REGISTER_TYPE_DF || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM) + brw_inst_set_imm_df(devinfo, inst, reg.df); + else if (reg.type == BRW_REGISTER_TYPE_UQ || + reg.type == BRW_REGISTER_TYPE_Q) + brw_inst_set_imm_uq(devinfo, inst, reg.u64); + else + brw_inst_set_imm_ud(devinfo, inst, reg.ud); - if (type_sz(reg.type) < 8) { - brw_inst_set_src1_reg_file(devinfo, inst, - BRW_ARCHITECTURE_REGISTER_FILE); - brw_inst_set_src1_reg_hw_type(devinfo, inst, - brw_inst_src0_reg_hw_type(devinfo, inst)); - } - } else { - if (reg.address_mode == BRW_ADDRESS_DIRECT) { - brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); - } else { - brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } + if (type_sz(reg.type) < 8) { + brw_inst_set_src1_reg_file(devinfo, inst, + BRW_ARCHITECTURE_REGISTER_FILE); + brw_inst_set_src1_reg_hw_type(devinfo, inst, + brw_inst_src0_reg_hw_type(devinfo, inst)); + } } else { - brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); + } else { + brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } + } else { + brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); - } else { - brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); - } - } + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset); + } else { + brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset); + } + } - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); - brw_inst_set_src0_width(devinfo, inst, reg.width); - brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_inst_set_src0_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_inst_set_src0_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_inst_set_src0_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_inst_set_src0_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->gen == 7 && !devinfo->is_haswell && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { + brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); + brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); + } else { + brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); + brw_inst_set_src0_width(devinfo, inst, reg.width); + brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + } } else { - brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + brw_inst_set_src0_da16_swiz_x(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); + brw_inst_set_src0_da16_swiz_y(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); + brw_inst_set_src0_da16_swiz_z(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); + brw_inst_set_src0_da16_swiz_w(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); + + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else if (devinfo->gen == 7 && !devinfo->is_haswell && + reg.type == BRW_REGISTER_TYPE_DF && + reg.vstride == BRW_VERTICAL_STRIDE_2) { + /* From SNB PRM: + * + * "For Align16 access mode, only encodings of 0000 and 0011 + * are allowed. Other codes are reserved." + * + * Presumably the DevSNB behavior applies to IVB as well. + */ + brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else { + brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); + } } } } @@ -275,85 +304,98 @@ { const struct gen_device_info *devinfo = p->devinfo; - if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) + if (reg.file == BRW_GENERAL_REGISTER_FILE) assert(reg.nr < 128); - /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: - * - * "Accumulator registers may be accessed explicitly as src0 - * operands only." - */ - assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || - reg.nr != BRW_ARF_ACCUMULATOR); - - gen7_convert_mrf_to_grf(p, ®); - assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) { + assert(reg.file == BRW_GENERAL_REGISTER_FILE || + reg.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(reg.address_mode == BRW_ADDRESS_DIRECT); + assert(reg.subnr == 0); + assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 && + reg.vstride == reg.width + 1); + assert(!reg.negate && !reg.abs); + brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr); + brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file); + } else { + /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5: + * + * "Accumulator registers may be accessed explicitly as src0 + * operands only." + */ + assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE || + reg.nr != BRW_ARF_ACCUMULATOR); - brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); - brw_inst_set_src1_abs(devinfo, inst, reg.abs); - brw_inst_set_src1_negate(devinfo, inst, reg.negate); + gen7_convert_mrf_to_grf(p, ®); + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - /* Only src1 can be immediate in two-argument instructions. - */ - assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); + brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type); + brw_inst_set_src1_abs(devinfo, inst, reg.abs); + brw_inst_set_src1_negate(devinfo, inst, reg.negate); - if (reg.file == BRW_IMMEDIATE_VALUE) { - /* two-argument instructions can only use 32-bit immediates */ - assert(type_sz(reg.type) < 8); - brw_inst_set_imm_ud(devinfo, inst, reg.ud); - } else { - /* This is a hardware restriction, which may or may not be lifted - * in the future: + /* Only src1 can be immediate in two-argument instructions. */ - assert (reg.address_mode == BRW_ADDRESS_DIRECT); - /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ + assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); - brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); + if (reg.file == BRW_IMMEDIATE_VALUE) { + /* two-argument instructions can only use 32-bit immediates */ + assert(type_sz(reg.type) < 8); + brw_inst_set_imm_ud(devinfo, inst, reg.ud); } else { - brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); - } + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ - if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { - if (reg.width == BRW_WIDTH_1 && - brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { - brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); - brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); - } else { - brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); - brw_inst_set_src1_width(devinfo, inst, reg.width); - brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); - } - } else { - brw_inst_set_src1_da16_swiz_x(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); - brw_inst_set_src1_da16_swiz_y(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); - brw_inst_set_src1_da16_swiz_z(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); - brw_inst_set_src1_da16_swiz_w(devinfo, inst, - BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); - - if (reg.vstride == BRW_VERTICAL_STRIDE_8) { - /* This is an oddity of the fact we're using the same - * descriptions for registers in align_16 as align_1: - */ - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); - } else if (devinfo->gen == 7 && !devinfo->is_haswell && - reg.type == BRW_REGISTER_TYPE_DF && - reg.vstride == BRW_VERTICAL_STRIDE_2) { - /* From SNB PRM: - * - * "For Align16 access mode, only encodings of 0000 and 0011 - * are allowed. Other codes are reserved." - * - * Presumably the DevSNB behavior applies to IVB as well. - */ - brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); + } else { + brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); + } + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { + brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); + brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); + } else { + brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); + brw_inst_set_src1_width(devinfo, inst, reg.width); + brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + } } else { - brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + brw_inst_set_src1_da16_swiz_x(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X)); + brw_inst_set_src1_da16_swiz_y(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y)); + brw_inst_set_src1_da16_swiz_z(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z)); + brw_inst_set_src1_da16_swiz_w(devinfo, inst, + BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W)); + + if (reg.vstride == BRW_VERTICAL_STRIDE_8) { + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else if (devinfo->gen == 7 && !devinfo->is_haswell && + reg.type == BRW_REGISTER_TYPE_DF && + reg.vstride == BRW_VERTICAL_STRIDE_2) { + /* From SNB PRM: + * + * "For Align16 access mode, only encodings of 0000 and 0011 + * are allowed. Other codes are reserved." + * + * Presumably the DevSNB behavior applies to IVB as well. + */ + brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); + } else { + brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); + } } } } @@ -654,9 +696,9 @@ gen7_convert_mrf_to_grf(p, &dest); assert(dest.nr < 128); - assert(src0.nr < 128); - assert(src1.nr < 128); - assert(src2.nr < 128); + assert(src0.file != BRW_IMMEDIATE_VALUE || src0.nr < 128); + assert(src1.file != BRW_IMMEDIATE_VALUE || src1.nr < 128); + assert(src2.file != BRW_IMMEDIATE_VALUE || src2.nr < 128); assert(dest.address_mode == BRW_ADDRESS_DIRECT); assert(src0.address_mode == BRW_ADDRESS_DIRECT); assert(src1.address_mode == BRW_ADDRESS_DIRECT); @@ -833,7 +875,15 @@ struct brw_reg src0, \ struct brw_reg src1, \ struct brw_reg src2) \ -{ \ +{ \ + if (p->current->access_mode == BRW_ALIGN_16) { \ + if (src0.vstride == BRW_VERTICAL_STRIDE_0) \ + src0.swizzle = BRW_SWIZZLE_XXXX; \ + if (src1.vstride == BRW_VERTICAL_STRIDE_0) \ + src1.swizzle = BRW_SWIZZLE_XXXX; \ + if (src2.vstride == BRW_VERTICAL_STRIDE_0) \ + src2.swizzle = BRW_SWIZZLE_XXXX; \ + } \ return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ } @@ -855,6 +905,15 @@ assert(src1.type == BRW_REGISTER_TYPE_DF); \ assert(src2.type == BRW_REGISTER_TYPE_DF); \ } \ + \ + if (p->current->access_mode == BRW_ALIGN_16) { \ + if (src0.vstride == BRW_VERTICAL_STRIDE_0) \ + src0.swizzle = BRW_SWIZZLE_XXXX; \ + if (src1.vstride == BRW_VERTICAL_STRIDE_0) \ + src1.swizzle = BRW_SWIZZLE_XXXX; \ + if (src2.vstride == BRW_VERTICAL_STRIDE_0) \ + src2.swizzle = BRW_SWIZZLE_XXXX; \ + } \ return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ } @@ -2456,14 +2515,106 @@ brw_set_src1(p, send, addr); } - if (dst.width < BRW_EXECUTE_8) - brw_inst_set_exec_size(devinfo, send, dst.width); - brw_set_dest(p, send, dst); brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_inst_set_sfid(devinfo, send, sfid); } +void +brw_send_indirect_split_message(struct brw_codegen *p, + unsigned sfid, + struct brw_reg dst, + struct brw_reg payload0, + struct brw_reg payload1, + struct brw_reg desc, + unsigned desc_imm, + struct brw_reg ex_desc, + unsigned ex_desc_imm) +{ + const struct gen_device_info *devinfo = p->devinfo; + struct brw_inst *send; + + dst = retype(dst, BRW_REGISTER_TYPE_UW); + + assert(desc.type == BRW_REGISTER_TYPE_UD); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + desc.ud |= desc_imm; + } else { + struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* Load the indirect descriptor to an address register using OR so the + * caller can specify additional descriptor bits with the desc_imm + * immediate. + */ + brw_OR(p, addr, desc, brw_imm_ud(desc_imm)); + + brw_pop_insn_state(p); + desc = addr; + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + ex_desc.ud |= ex_desc_imm; + } else { + struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_exec_size(p, BRW_EXECUTE_1); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* Load the indirect extended descriptor to an address register using OR + * so the caller can specify additional descriptor bits with the + * desc_imm immediate. + * + * Even though the instruction dispatcher always pulls the SFID from the + * instruction itself, the extended descriptor sent to the actual unit + * gets the SFID from the extended descriptor which comes from the + * address register. If we don't OR it in, the external unit gets + * confused and hangs the GPU. + */ + brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid)); + + brw_pop_insn_state(p); + ex_desc = addr; + } + + send = next_insn(p, BRW_OPCODE_SENDS); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD)); + + if (desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_desc(devinfo, send, 0); + brw_inst_set_send_desc(devinfo, send, desc.ud); + } else { + assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(desc.nr == BRW_ARF_ADDRESS); + assert(desc.subnr == 0); + brw_inst_set_send_sel_reg32_desc(devinfo, send, 1); + } + + if (ex_desc.file == BRW_IMMEDIATE_VALUE) { + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0); + brw_inst_set_send_ex_desc(devinfo, send, ex_desc.ud); + } else { + assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE); + assert(ex_desc.nr == BRW_ARF_ADDRESS); + assert((ex_desc.subnr & 0x3) == 0); + brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1); + brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2); + } + + brw_inst_set_sfid(devinfo, send, sfid); +} + static void brw_send_indirect_surface_message(struct brw_codegen *p, unsigned sfid, @@ -2724,45 +2875,14 @@ static unsigned brw_surface_payload_size(struct brw_codegen *p, unsigned num_channels, - bool has_simd4x2, - bool has_simd16) + unsigned exec_size /**< 0 for SIMD4x2 */) { - if (has_simd4x2 && brw_get_default_access_mode(p) == BRW_ALIGN_16) - return 1; - else if (has_simd16 && brw_get_default_exec_size(p) == BRW_EXECUTE_16) - return 2 * num_channels; - else + if (exec_size == 0) + return 1; /* SIMD4x2 */ + else if (exec_size <= 8) return num_channels; -} - -static uint32_t -brw_dp_untyped_atomic_desc(struct brw_codegen *p, - unsigned atomic_op, - bool response_expected) -{ - const struct gen_device_info *devinfo = p->devinfo; - unsigned msg_control = - atomic_op | /* Atomic Operation Type: BRW_AOP_* */ - (response_expected ? 1 << 5 : 0); /* Return data expected */ - unsigned msg_type; - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if (brw_get_default_exec_size(p) != BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD8 mode */ - - msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP; - } else { - msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2; - } - } else { - if (brw_get_default_exec_size(p) != BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD8 mode */ - - msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP; - } - - return brw_dp_surface_desc(devinfo, msg_type, msg_control); + else + return 2 * num_channels; } void @@ -2779,12 +2899,17 @@ const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); - const unsigned response_length = brw_surface_payload_size( - p, response_expected, devinfo->gen >= 8 || devinfo->is_haswell, true); + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + /* SIMD4x2 untyped atomic instructions only exist on HSW+ */ + const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : + has_simd4x2 ? 0 : 8; + const unsigned response_length = + brw_surface_payload_size(p, response_expected, exec_size); const unsigned desc = brw_message_desc(devinfo, msg_length, response_length, header_present) | - brw_dp_untyped_atomic_desc(p, atomic_op, response_expected); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op, + response_expected); /* Mask out unused components -- This is especially important in Align16 * mode on generations that don't have native support for SIMD4x2 atomics, * because unused but enabled components will cause the dataport to perform @@ -2797,74 +2922,6 @@ payload, surface, desc); } -static uint32_t -brw_dp_untyped_atomic_float_desc(struct brw_codegen *p, - unsigned atomic_op, - bool response_expected) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP; - unsigned msg_control = - atomic_op | /* Atomic Operation Type: BRW_AOP_F* */ - (response_expected ? 1 << 5 : 0); /* Return data expected */ - - assert(devinfo->gen >= 9); - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - - if (brw_get_default_exec_size(p) != BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD8 mode */ - - return brw_dp_surface_desc(devinfo, msg_type, msg_control); -} - -void -brw_untyped_atomic_float(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned atomic_op, - unsigned msg_length, - bool response_expected, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - - assert(devinfo->gen >= 9); - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - - const unsigned sfid = HSW_SFID_DATAPORT_DATA_CACHE_1; - const unsigned response_length = brw_surface_payload_size( - p, response_expected, true, true); - const unsigned desc = - brw_message_desc(devinfo, msg_length, response_length, header_present) | - brw_dp_untyped_atomic_float_desc(p, atomic_op, response_expected); - - brw_send_indirect_surface_message(p, sfid, - brw_writemask(dst, WRITEMASK_XYZW), - payload, surface, desc); -} - -static uint32_t -brw_dp_untyped_surface_read_desc(struct brw_codegen *p, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned msg_type = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ : - GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ); - /* Set mask of 32-bit channels to drop. */ - unsigned msg_control = 0xf & (0xf << num_channels); - - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if (brw_get_default_exec_size(p) == BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD16 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ - } - - return brw_dp_surface_desc(devinfo, msg_type, msg_control); -} - void brw_untyped_surface_read(struct brw_codegen *p, struct brw_reg dst, @@ -2877,41 +2934,17 @@ const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0; const unsigned response_length = - brw_surface_payload_size(p, num_channels, true, true); + brw_surface_payload_size(p, num_channels, exec_size); const unsigned desc = brw_message_desc(devinfo, msg_length, response_length, false) | - brw_dp_untyped_surface_read_desc(p, num_channels); + brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false); brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc); } -static uint32_t -brw_dp_untyped_surface_write_desc(struct brw_codegen *p, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - const unsigned msg_type = (devinfo->gen >= 8 || devinfo->is_haswell ? - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE : - GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE); - /* Set mask of 32-bit channels to drop. */ - unsigned msg_control = 0xf & (0xf << num_channels); - - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if (brw_get_default_exec_size(p) == BRW_EXECUTE_16) - msg_control |= 1 << 4; /* SIMD16 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ - } else { - if (devinfo->gen >= 8 || devinfo->is_haswell) - msg_control |= 0 << 4; /* SIMD4x2 mode */ - else - msg_control |= 2 << 4; /* SIMD8 mode */ - } - - return brw_dp_surface_desc(devinfo, msg_type, msg_control); -} - void brw_untyped_surface_write(struct brw_codegen *p, struct brw_reg payload, @@ -2924,124 +2957,21 @@ const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + /* SIMD4x2 untyped surface write instructions only exist on HSW+ */ + const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : + has_simd4x2 ? 0 : 8; const unsigned desc = brw_message_desc(devinfo, msg_length, 0, header_present) | - brw_dp_untyped_surface_write_desc(p, num_channels); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true); /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? - WRITEMASK_X : WRITEMASK_XYZW; + const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW; brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask), payload, surface, desc); } -static unsigned -brw_byte_scattered_data_element_from_bit_size(unsigned bit_size) -{ - switch (bit_size) { - case 8: - return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE; - case 16: - return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD; - case 32: - return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD; - default: - unreachable("Unsupported bit_size for byte scattered messages"); - } -} - -static uint32_t -brw_dp_byte_scattered_desc(struct brw_codegen *p, unsigned bit_size, - unsigned msg_type) -{ - const struct gen_device_info *devinfo = p->devinfo; - unsigned msg_control = - brw_byte_scattered_data_element_from_bit_size(bit_size) << 2; - - if (brw_get_default_exec_size(p) == BRW_EXECUTE_16) - msg_control |= 1; /* SIMD16 mode */ - else - msg_control |= 0; /* SIMD8 mode */ - - return brw_dp_surface_desc(devinfo, msg_type, msg_control); -} - -void -brw_byte_scattered_read(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size) -{ - const struct gen_device_info *devinfo = p->devinfo; - assert(devinfo->gen > 7 || devinfo->is_haswell); - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - const unsigned response_length = - brw_surface_payload_size(p, 1, true, true); - const unsigned desc = - brw_message_desc(devinfo, msg_length, response_length, false) | - brw_dp_byte_scattered_desc(p, bit_size, - HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ); - - brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE, - dst, payload, surface, desc); -} - -void -brw_byte_scattered_write(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size, - bool header_present) -{ - const struct gen_device_info *devinfo = p->devinfo; - assert(devinfo->gen > 7 || devinfo->is_haswell); - assert(brw_get_default_access_mode(p) == BRW_ALIGN_1); - const unsigned desc = - brw_message_desc(devinfo, msg_length, 0, header_present) | - brw_dp_byte_scattered_desc(p, bit_size, - HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE); - - brw_send_indirect_surface_message(p, GEN7_SFID_DATAPORT_DATA_CACHE, - brw_writemask(brw_null_reg(), - WRITEMASK_XYZW), - payload, surface, desc); -} - -static uint32_t -brw_dp_typed_atomic_desc(struct brw_codegen *p, - unsigned atomic_op, - bool response_expected) -{ - const struct gen_device_info *devinfo = p->devinfo; - unsigned msg_control = - atomic_op | /* Atomic Operation Type: BRW_AOP_* */ - (response_expected ? 1 << 5 : 0); /* Return data expected */ - unsigned msg_type; - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ - - msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP; - } else { - msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2; - } - - } else { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ - - msg_type = GEN7_DATAPORT_RC_TYPED_ATOMIC_OP; - } - - return brw_dp_surface_desc(devinfo, msg_type, msg_control); -} - void brw_typed_atomic(struct brw_codegen *p, struct brw_reg dst, @@ -3055,12 +2985,19 @@ const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN6_SFID_DATAPORT_RENDER_CACHE); - const unsigned response_length = brw_surface_payload_size( - p, response_expected, devinfo->gen >= 8 || devinfo->is_haswell, false); + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + /* SIMD4x2 typed atomic instructions only exist on HSW+ */ + const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : + has_simd4x2 ? 0 : 8; + /* Typed atomics don't support SIMD16 */ + assert(exec_size <= 8); + const unsigned response_length = + brw_surface_payload_size(p, response_expected, exec_size); const unsigned desc = brw_message_desc(devinfo, msg_length, response_length, header_present) | - brw_dp_typed_atomic_desc(p, atomic_op, response_expected); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + brw_dp_typed_atomic_desc(devinfo, exec_size, brw_get_default_group(p), + atomic_op, response_expected); /* Mask out unused components -- See comment in brw_untyped_atomic(). */ const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; @@ -3068,36 +3005,6 @@ payload, surface, desc); } -static uint32_t -brw_dp_typed_surface_read_desc(struct brw_codegen *p, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of unused channels. */ - unsigned msg_control = 0xf & (0xf << num_channels); - unsigned msg_type; - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ - else - msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ - } - - msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ; - } else { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ - } - - msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_READ; - } - - return brw_dp_surface_desc(devinfo, msg_type, msg_control); -} - void brw_typed_surface_read(struct brw_codegen *p, struct brw_reg dst, @@ -3111,46 +3018,21 @@ const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN6_SFID_DATAPORT_RENDER_CACHE); - const unsigned response_length = brw_surface_payload_size( - p, num_channels, devinfo->gen >= 8 || devinfo->is_haswell, false); + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + /* SIMD4x2 typed read instructions only exist on HSW+ */ + const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : + has_simd4x2 ? 0 : 8; + const unsigned response_length = + brw_surface_payload_size(p, num_channels, exec_size); const unsigned desc = brw_message_desc(devinfo, msg_length, response_length, header_present) | - brw_dp_typed_surface_read_desc(p, num_channels); + brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p), + num_channels, false); brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc); } -static uint32_t -brw_dp_typed_surface_write_desc(struct brw_codegen *p, - unsigned num_channels) -{ - const struct gen_device_info *devinfo = p->devinfo; - /* Set mask of unused channels. */ - unsigned msg_control = 0xf & (0xf << num_channels); - unsigned msg_type; - - if (devinfo->gen >= 8 || devinfo->is_haswell) { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ - else - msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ - } - - msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE; - - } else { - if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { - if ((brw_get_default_group(p) / 8) % 2 == 1) - msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ - } - - msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE; - } - - return brw_dp_surface_desc(devinfo, msg_type, msg_control); -} - void brw_typed_surface_write(struct brw_codegen *p, struct brw_reg payload, @@ -3163,13 +3045,17 @@ const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN6_SFID_DATAPORT_RENDER_CACHE); + const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + /* SIMD4x2 typed read instructions only exist on HSW+ */ + const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell; + const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : + has_simd4x2 ? 0 : 8; const unsigned desc = brw_message_desc(devinfo, msg_length, 0, header_present) | - brw_dp_typed_surface_write_desc(p, num_channels); - const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1; + brw_dp_typed_surface_rw_desc(devinfo, exec_size, brw_get_default_group(p), + num_channels, true); /* Mask out unused components -- See comment in brw_untyped_atomic(). */ - const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? - WRITEMASK_X : WRITEMASK_XYZW); + const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW; brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask), payload, surface, desc); @@ -3295,6 +3181,14 @@ brw_push_insn_state(p); + /* The flag register is only used on Gen7 in align1 mode, so avoid setting + * unnecessary bits in the instruction words, get the information we need + * and reset the default flag register. This allows more instructions to be + * compacted. + */ + const unsigned flag_subreg = p->current->flag_subreg; + brw_set_default_flag_reg(p, 0, 0); + if (brw_get_default_access_mode(p) == BRW_ALIGN_1) { brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -3328,8 +3222,7 @@ */ inst = brw_FBL(p, vec1(dst), exec_mask); } else { - const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2, - p->current->flag_subreg % 2); + const struct brw_reg flag = brw_flag_subreg(flag_subreg); brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); @@ -3349,6 +3242,8 @@ brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control); brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z); brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1); + brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2); + brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2); } /* Find the first bit set in the exec_size-wide portion of the flag @@ -3554,7 +3449,8 @@ brw_set_src0(p, send, brw_vec1_reg(payload.file, payload.nr, 0)); brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) | - brw_dp_untyped_atomic_desc(p, BRW_AOP_ADD, false))); + brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD, + false))); brw_inst_set_sfid(devinfo, send, sfid); brw_inst_set_binding_table_index(devinfo, send, surf_index); diff -Nru mesa-18.3.3/src/intel/compiler/brw_eu.h mesa-19.0.1/src/intel/compiler/brw_eu.h --- mesa-18.3.3/src/intel/compiler/brw_eu.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_eu.h 2019-03-31 23:16:37.000000000 +0000 @@ -266,6 +266,46 @@ } } +static inline unsigned +brw_message_desc_mlen(const struct gen_device_info *devinfo, uint32_t desc) +{ + if (devinfo->gen >= 5) + return GET_BITS(desc, 28, 25); + else + return GET_BITS(desc, 23, 20); +} + +static inline unsigned +brw_message_desc_rlen(const struct gen_device_info *devinfo, uint32_t desc) +{ + if (devinfo->gen >= 5) + return GET_BITS(desc, 24, 20); + else + return GET_BITS(desc, 19, 16); +} + +static inline bool +brw_message_desc_header_present(const struct gen_device_info *devinfo, + uint32_t desc) +{ + assert(devinfo->gen >= 5); + return GET_BITS(desc, 19, 19); +} + +static inline unsigned +brw_message_ex_desc(const struct gen_device_info *devinfo, + unsigned ex_msg_length) +{ + return SET_BITS(ex_msg_length, 9, 6); +} + +static inline unsigned +brw_message_ex_desc_ex_mlen(const struct gen_device_info *devinfo, + uint32_t ex_desc) +{ + return GET_BITS(ex_desc, 9, 6); +} + /** * Construct a message descriptor immediate with the specified sampler * function controls. @@ -293,6 +333,103 @@ SET_BITS(msg_type, 15, 14)); } +static inline unsigned +brw_sampler_desc_binding_table_index(const struct gen_device_info *devinfo, + uint32_t desc) +{ + return GET_BITS(desc, 7, 0); +} + +static inline unsigned +brw_sampler_desc_sampler(const struct gen_device_info *devinfo, uint32_t desc) +{ + return GET_BITS(desc, 11, 8); +} + +static inline unsigned +brw_sampler_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc) +{ + if (devinfo->gen >= 7) + return GET_BITS(desc, 16, 12); + else if (devinfo->gen >= 5 || devinfo->is_g4x) + return GET_BITS(desc, 15, 12); + else + return GET_BITS(desc, 15, 14); +} + +static inline unsigned +brw_sampler_desc_simd_mode(const struct gen_device_info *devinfo, uint32_t desc) +{ + assert(devinfo->gen >= 5); + if (devinfo->gen >= 7) + return GET_BITS(desc, 18, 17); + else + return GET_BITS(desc, 17, 16); +} + +static inline unsigned +brw_sampler_desc_return_format(const struct gen_device_info *devinfo, + uint32_t desc) +{ + assert(devinfo->gen == 4 && !devinfo->is_g4x); + return GET_BITS(desc, 13, 12); +} + +/** + * Construct a message descriptor for the dataport + */ +static inline uint32_t +brw_dp_desc(const struct gen_device_info *devinfo, + unsigned binding_table_index, + unsigned msg_type, + unsigned msg_control) +{ + /* Prior to gen6, things are too inconsistent; use the dp_read/write_desc + * helpers instead. + */ + assert(devinfo->gen >= 6); + const unsigned desc = SET_BITS(binding_table_index, 7, 0); + if (devinfo->gen >= 8) { + return (desc | SET_BITS(msg_control, 13, 8) | + SET_BITS(msg_type, 18, 14)); + } else if (devinfo->gen >= 7) { + return (desc | SET_BITS(msg_control, 13, 8) | + SET_BITS(msg_type, 17, 14)); + } else { + return (desc | SET_BITS(msg_control, 12, 8) | + SET_BITS(msg_type, 16, 13)); + } +} + +static inline unsigned +brw_dp_desc_binding_table_index(const struct gen_device_info *devinfo, + uint32_t desc) +{ + return GET_BITS(desc, 7, 0); +} + +static inline unsigned +brw_dp_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc) +{ + assert(devinfo->gen >= 6); + if (devinfo->gen >= 8) + return GET_BITS(desc, 18, 14); + else if (devinfo->gen >= 7) + return GET_BITS(desc, 17, 14); + else + return GET_BITS(desc, 16, 13); +} + +static inline unsigned +brw_dp_desc_msg_control(const struct gen_device_info *devinfo, uint32_t desc) +{ + assert(devinfo->gen >= 6); + if (devinfo->gen >= 7) + return GET_BITS(desc, 13, 8); + else + return GET_BITS(desc, 12, 8); +} + /** * Construct a message descriptor immediate with the specified dataport read * function controls. @@ -304,23 +441,43 @@ unsigned msg_type, unsigned target_cache) { - const unsigned desc = SET_BITS(binding_table_index, 7, 0); - if (devinfo->gen >= 7) - return (desc | SET_BITS(msg_control, 13, 8) | - SET_BITS(msg_type, 17, 14)); - else if (devinfo->gen >= 6) - return (desc | SET_BITS(msg_control, 12, 8) | - SET_BITS(msg_type, 16, 13)); + if (devinfo->gen >= 6) + return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control); else if (devinfo->gen >= 5 || devinfo->is_g4x) - return (desc | SET_BITS(msg_control, 10, 8) | + return (SET_BITS(binding_table_index, 7, 0) | + SET_BITS(msg_control, 10, 8) | SET_BITS(msg_type, 13, 11) | SET_BITS(target_cache, 15, 14)); else - return (desc | SET_BITS(msg_control, 11, 8) | + return (SET_BITS(binding_table_index, 7, 0) | + SET_BITS(msg_control, 11, 8) | SET_BITS(msg_type, 13, 12) | SET_BITS(target_cache, 15, 14)); } +static inline unsigned +brw_dp_read_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc) +{ + if (devinfo->gen >= 6) + return brw_dp_desc_msg_type(devinfo, desc); + else if (devinfo->gen >= 5 || devinfo->is_g4x) + return GET_BITS(desc, 13, 11); + else + return GET_BITS(desc, 13, 12); +} + +static inline unsigned +brw_dp_read_desc_msg_control(const struct gen_device_info *devinfo, + uint32_t desc) +{ + if (devinfo->gen >= 6) + return brw_dp_desc_msg_control(devinfo, desc); + else if (devinfo->gen >= 5 || devinfo->is_g4x) + return GET_BITS(desc, 10, 8); + else + return GET_BITS(desc, 11, 8); +} + /** * Construct a message descriptor immediate with the specified dataport write * function controls. @@ -333,23 +490,60 @@ unsigned last_render_target, unsigned send_commit_msg) { - const unsigned desc = SET_BITS(binding_table_index, 7, 0); - if (devinfo->gen >= 7) - return (desc | SET_BITS(msg_control, 13, 8) | - SET_BITS(last_render_target, 12, 12) | - SET_BITS(msg_type, 17, 14)); - else if (devinfo->gen >= 6) - return (desc | SET_BITS(msg_control, 12, 8) | - SET_BITS(last_render_target, 12, 12) | - SET_BITS(msg_type, 16, 13) | - SET_BITS(send_commit_msg, 17, 17)); + assert(devinfo->gen <= 6 || !send_commit_msg); + if (devinfo->gen >= 6) + return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) | + SET_BITS(last_render_target, 12, 12) | + SET_BITS(send_commit_msg, 17, 17); else - return (desc | SET_BITS(msg_control, 11, 8) | + return (SET_BITS(binding_table_index, 7, 0) | + SET_BITS(msg_control, 11, 8) | SET_BITS(last_render_target, 11, 11) | SET_BITS(msg_type, 14, 12) | SET_BITS(send_commit_msg, 15, 15)); } +static inline unsigned +brw_dp_write_desc_msg_type(const struct gen_device_info *devinfo, + uint32_t desc) +{ + if (devinfo->gen >= 6) + return brw_dp_desc_msg_type(devinfo, desc); + else + return GET_BITS(desc, 14, 12); +} + +static inline unsigned +brw_dp_write_desc_msg_control(const struct gen_device_info *devinfo, + uint32_t desc) +{ + if (devinfo->gen >= 6) + return brw_dp_desc_msg_control(devinfo, desc); + else + return GET_BITS(desc, 11, 8); +} + +static inline bool +brw_dp_write_desc_last_render_target(const struct gen_device_info *devinfo, + uint32_t desc) +{ + if (devinfo->gen >= 6) + return GET_BITS(desc, 12, 12); + else + return GET_BITS(desc, 11, 11); +} + +static inline bool +brw_dp_write_desc_write_commit(const struct gen_device_info *devinfo, + uint32_t desc) +{ + assert(devinfo->gen <= 6); + if (devinfo->gen >= 6) + return GET_BITS(desc, 17, 17); + else + return GET_BITS(desc, 15, 15); +} + /** * Construct a message descriptor immediate with the specified dataport * surface function controls. @@ -360,13 +554,221 @@ unsigned msg_control) { assert(devinfo->gen >= 7); - if (devinfo->gen >= 8) { - return (SET_BITS(msg_control, 13, 8) | - SET_BITS(msg_type, 18, 14)); + /* We'll OR in the binding table index later */ + return brw_dp_desc(devinfo, 0, msg_type, msg_control); +} + +static inline uint32_t +brw_dp_untyped_atomic_desc(const struct gen_device_info *devinfo, + unsigned exec_size, /**< 0 for SIMD4x2 */ + unsigned atomic_op, + bool response_expected) +{ + assert(exec_size <= 8 || exec_size == 16); + + unsigned msg_type; + if (devinfo->gen >= 8 || devinfo->is_haswell) { + if (exec_size > 0) { + msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP; + } else { + msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2; + } } else { - return (SET_BITS(msg_control, 13, 8) | - SET_BITS(msg_type, 17, 14)); + msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP; + } + + const unsigned msg_control = + SET_BITS(atomic_op, 3, 0) | + SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) | + SET_BITS(response_expected, 5, 5); + + return brw_dp_surface_desc(devinfo, msg_type, msg_control); +} + +static inline uint32_t +brw_dp_untyped_atomic_float_desc(const struct gen_device_info *devinfo, + unsigned exec_size, + unsigned atomic_op, + bool response_expected) +{ + assert(exec_size <= 8 || exec_size == 16); + assert(devinfo->gen >= 9); + + assert(exec_size > 0); + const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP; + + const unsigned msg_control = + SET_BITS(atomic_op, 1, 0) | + SET_BITS(exec_size <= 8, 4, 4) | + SET_BITS(response_expected, 5, 5); + + return brw_dp_surface_desc(devinfo, msg_type, msg_control); +} + +static inline unsigned +brw_mdc_cmask(unsigned num_channels) +{ + /* See also MDC_CMASK in the SKL PRM Vol 2d. */ + return 0xf & (0xf << num_channels); +} + +static inline uint32_t +brw_dp_untyped_surface_rw_desc(const struct gen_device_info *devinfo, + unsigned exec_size, /**< 0 for SIMD4x2 */ + unsigned num_channels, + bool write) +{ + assert(exec_size <= 8 || exec_size == 16); + + unsigned msg_type; + if (write) { + if (devinfo->gen >= 8 || devinfo->is_haswell) { + msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE; + } else { + msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE; + } + } else { + /* Read */ + if (devinfo->gen >= 8 || devinfo->is_haswell) { + msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ; + } else { + msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ; + } + } + + /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */ + if (write && devinfo->gen == 7 && !devinfo->is_haswell && exec_size == 0) + exec_size = 8; + + /* See also MDC_SM3 in the SKL PRM Vol 2d. */ + const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */ + exec_size <= 8 ? 2 : 1; + + const unsigned msg_control = + SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | + SET_BITS(simd_mode, 5, 4); + + return brw_dp_surface_desc(devinfo, msg_type, msg_control); +} + +static inline unsigned +brw_mdc_ds(unsigned bit_size) +{ + switch (bit_size) { + case 8: + return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE; + case 16: + return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD; + case 32: + return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD; + default: + unreachable("Unsupported bit_size for byte scattered messages"); + } +} + +static inline uint32_t +brw_dp_byte_scattered_rw_desc(const struct gen_device_info *devinfo, + unsigned exec_size, + unsigned bit_size, + bool write) +{ + assert(exec_size <= 8 || exec_size == 16); + + assert(devinfo->gen > 7 || devinfo->is_haswell); + const unsigned msg_type = + write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE : + HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ; + + assert(exec_size > 0); + const unsigned msg_control = + SET_BITS(exec_size == 16, 0, 0) | + SET_BITS(brw_mdc_ds(bit_size), 3, 2); + + return brw_dp_surface_desc(devinfo, msg_type, msg_control); +} + +static inline uint32_t +brw_dp_typed_atomic_desc(const struct gen_device_info *devinfo, + unsigned exec_size, + unsigned exec_group, + unsigned atomic_op, + bool response_expected) +{ + assert(exec_size > 0 || exec_group == 0); + assert(exec_group % 8 == 0); + + unsigned msg_type; + if (devinfo->gen >= 8 || devinfo->is_haswell) { + if (exec_size == 0) { + msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2; + } else { + msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP; + } + } else { + /* SIMD4x2 typed surface R/W messages only exist on HSW+ */ + assert(exec_size > 0); + msg_type = GEN7_DATAPORT_RC_TYPED_ATOMIC_OP; } + + const bool high_sample_mask = (exec_group / 8) % 2 == 1; + + const unsigned msg_control = + SET_BITS(atomic_op, 3, 0) | + SET_BITS(high_sample_mask, 4, 4) | + SET_BITS(response_expected, 5, 5); + + return brw_dp_surface_desc(devinfo, msg_type, msg_control); +} + +static inline uint32_t +brw_dp_typed_surface_rw_desc(const struct gen_device_info *devinfo, + unsigned exec_size, + unsigned exec_group, + unsigned num_channels, + bool write) +{ + assert(exec_size > 0 || exec_group == 0); + assert(exec_group % 8 == 0); + + /* Typed surface reads and writes don't support SIMD16 */ + assert(exec_size <= 8); + + unsigned msg_type; + if (write) { + if (devinfo->gen >= 8 || devinfo->is_haswell) { + msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE; + } else { + msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE; + } + } else { + if (devinfo->gen >= 8 || devinfo->is_haswell) { + msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ; + } else { + msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_READ; + } + } + + /* See also MDC_SG3 in the SKL PRM Vol 2d. */ + unsigned msg_control; + if (devinfo->gen >= 8 || devinfo->is_haswell) { + /* See also MDC_SG3 in the SKL PRM Vol 2d. */ + const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */ + 1 + ((exec_group / 8) % 2); + + msg_control = + SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | + SET_BITS(slot_group, 5, 4); + } else { + /* SIMD4x2 typed surface R/W messages only exist on HSW+ */ + assert(exec_size > 0); + const unsigned slot_group = ((exec_group / 8) % 2); + + msg_control = + SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | + SET_BITS(slot_group, 5, 5); + } + + return brw_dp_surface_desc(devinfo, msg_type, msg_control); } /** @@ -409,6 +811,17 @@ struct brw_reg desc, unsigned desc_imm); +void +brw_send_indirect_split_message(struct brw_codegen *p, + unsigned sfid, + struct brw_reg dst, + struct brw_reg payload0, + struct brw_reg payload1, + struct brw_reg desc, + unsigned desc_imm, + struct brw_reg ex_desc, + unsigned ex_desc_imm); + void brw_ff_sync(struct brw_codegen *p, struct brw_reg dest, unsigned msg_reg_nr, @@ -578,17 +991,6 @@ bool header_present); void -brw_untyped_atomic_float(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned atomic_op, - unsigned msg_length, - bool response_expected, - bool header_present); - - -void brw_untyped_surface_read(struct brw_codegen *p, struct brw_reg dst, struct brw_reg payload, @@ -632,22 +1034,6 @@ bool header_present); void -brw_byte_scattered_read(struct brw_codegen *p, - struct brw_reg dst, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size); - -void -brw_byte_scattered_write(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg surface, - unsigned msg_length, - unsigned bit_size, - bool header_present); - -void brw_memory_fence(struct brw_codegen *p, struct brw_reg dst, enum opcode send_op); diff -Nru mesa-18.3.3/src/intel/compiler/brw_eu_validate.c mesa-19.0.1/src/intel/compiler/brw_eu_validate.c --- mesa-18.3.3/src/intel/compiler/brw_eu_validate.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_eu_validate.c 2019-03-31 23:16:37.000000000 +0000 @@ -1,5 +1,5 @@ /* - * Copyright © 2015 Intel Corporation + * Copyright © 2015-2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -24,6 +24,18 @@ /** @file brw_eu_validate.c * * This file implements a pass that validates shader assembly. + * + * The restrictions implemented herein are intended to verify that instructions + * in shader assembly do not violate restrictions documented in the graphics + * programming reference manuals. + * + * The restrictions are difficult for humans to quickly verify due to their + * complexity and abundance. + * + * It is critical that this code is thoroughly unit tested because false + * results will lead developers astray, which is worse than having no validator + * at all. Functional changes to this file without corresponding unit tests (in + * test_eu_validate.cpp) will be rejected. */ #include "brw_eu.h" @@ -90,6 +102,18 @@ } } +static bool +inst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + switch (brw_inst_opcode(devinfo, inst)) { + case BRW_OPCODE_SENDS: + case BRW_OPCODE_SENDSC: + return true; + default: + return false; + } +} + static unsigned signed_type(unsigned type) { @@ -236,6 +260,12 @@ if (num_sources == 3) return (struct string){}; + /* Nothing to test. Split sends can only encode a file in sources that are + * allowed to be NULL. + */ + if (inst_is_split_send(devinfo, inst)) + return (struct string){}; + if (num_sources >= 1) ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); @@ -251,7 +281,41 @@ { struct string error_msg = { .str = NULL, .len = 0 }; - if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { + if (inst_is_split_send(devinfo, inst)) { + ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL, + "src1 of split send must be a GRF or NULL"); + + ERROR_IF(brw_inst_eot(devinfo, inst) && + brw_inst_src0_da_reg_nr(devinfo, inst) < 112, + "send with EOT must use g112-g127"); + ERROR_IF(brw_inst_eot(devinfo, inst) && + brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE && + brw_inst_send_src1_reg_nr(devinfo, inst) < 112, + "send with EOT must use g112-g127"); + + if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) { + /* Assume minimums if we don't know */ + unsigned mlen = 1; + if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) { + const uint32_t desc = brw_inst_send_desc(devinfo, inst); + mlen = brw_message_desc_mlen(devinfo, desc); + } + + unsigned ex_mlen = 1; + if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) { + const uint32_t ex_desc = brw_inst_send_ex_desc(devinfo, inst); + ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc); + } + const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst); + const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst); + ERROR_IF((src0_reg_nr <= src1_reg_nr && + src1_reg_nr < src0_reg_nr + mlen) || + (src1_reg_nr <= src0_reg_nr && + src0_reg_nr < src1_reg_nr + ex_mlen), + "split send payloads must not overlap"); + } + } else if (inst_is_send(devinfo, inst)) { ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, "send must use direct addressing"); @@ -521,6 +585,12 @@ if (num_sources == 3) return (struct string){}; + /* Split sends don't have the bits in the instruction to encode regions so + * there's nothing to check. + */ + if (inst_is_split_send(devinfo, inst)) + return (struct string){}; + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, @@ -1111,6 +1181,10 @@ if (num_sources == 3 || num_sources == 0) return (struct string){}; + /* Split sends don't have types so there's no doubles there. */ + if (inst_is_split_send(devinfo, inst)) + return (struct string){}; + enum brw_reg_type exec_type = execution_type(devinfo, inst); unsigned exec_type_size = brw_reg_type_to_size(exec_type); diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_builder.h mesa-19.0.1/src/intel/compiler/brw_fs_builder.h --- mesa-18.3.3/src/intel/compiler/brw_fs_builder.h 2018-10-21 19:21:32.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_builder.h 2019-03-31 23:16:37.000000000 +0000 @@ -114,11 +114,25 @@ fs_builder group(unsigned n, unsigned i) const { - assert(force_writemask_all || - (n <= dispatch_width() && i < dispatch_width() / n)); fs_builder bld = *this; + + if (n <= dispatch_width() && i < dispatch_width() / n) { + bld._group += i * n; + } else { + /* The requested channel group isn't a subset of the channel group + * of this builder, which means that the resulting instructions + * would use (potentially undefined) channel enable signals not + * specified by the parent builder. That's only valid if the + * instruction doesn't have per-channel semantics, in which case + * we should clear off the default group index in order to prevent + * emitting instructions with channel group not aligned to their + * own execution size. + */ + assert(force_writemask_all); + bld._group = 0; + } + bld._dispatch_width = n; - bld._group += i * n; return bld; } @@ -412,6 +426,21 @@ return src_reg(component(dst, 0)); } + src_reg + move_to_vgrf(const src_reg &src, unsigned num_components) const + { + src_reg *const src_comps = new src_reg[num_components]; + for (unsigned i = 0; i < num_components; i++) + src_comps[i] = offset(src, dispatch_width(), i); + + const dst_reg dst = vgrf(src.type, num_components); + LOAD_PAYLOAD(dst, src_comps, num_components, 0); + + delete[] src_comps; + + return src_reg(dst); + } + void emit_scan(enum opcode opcode, const dst_reg &tmp, unsigned cluster_size, brw_conditional_mod mod) const @@ -437,43 +466,13 @@ if (cluster_size > 1) { const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0); - dst_reg left = horiz_stride(tmp, 2); - dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2); - - /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn": - * - * "When source or destination datatype is 64b or operation is - * integer DWord multiply, regioning in Align1 must follow - * these rules: - * - * [...] - * - * 3. Source and Destination offset must be the same, except - * the case of scalar source." - * - * In order to work around this, we create a temporary register - * and shift left over to match right. If we have a 64-bit type, - * we have to use two integer MOVs instead of a 64-bit MOV. - */ - if (need_matching_subreg_offset(opcode, tmp.type)) { - dst_reg tmp2 = vgrf(tmp.type); - dst_reg new_left = horiz_stride(horiz_offset(tmp2, 1), 2); - if (type_sz(tmp.type) > 4) { - ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 0), - subscript(left, BRW_REGISTER_TYPE_D, 0)); - ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 1), - subscript(left, BRW_REGISTER_TYPE_D, 1)); - } else { - ubld.MOV(new_left, left); - } - left = new_left; - } + const dst_reg left = horiz_stride(tmp, 2); + const dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2); set_condmod(mod, ubld.emit(opcode, right, left, right)); } if (cluster_size > 2) { - if (type_sz(tmp.type) <= 4 && - !need_matching_subreg_offset(opcode, tmp.type)) { + if (type_sz(tmp.type) <= 4) { const fs_builder ubld = exec_all().group(dispatch_width() / 4, 0); src_reg left = horiz_stride(horiz_offset(tmp, 1), 4); @@ -773,38 +772,6 @@ } } - - /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn": - * - * "When source or destination datatype is 64b or operation is - * integer DWord multiply, regioning in Align1 must follow - * these rules: - * - * [...] - * - * 3. Source and Destination offset must be the same, except - * the case of scalar source." - * - * This helper just detects when we're in this case. - */ - bool - need_matching_subreg_offset(enum opcode opcode, - enum brw_reg_type type) const - { - if (!shader->devinfo->is_cherryview && - !gen_device_info_is_9lp(shader->devinfo)) - return false; - - if (type_sz(type) > 4) - return true; - - if (opcode == BRW_OPCODE_MUL && - !brw_reg_type_is_floating_point(type)) - return true; - - return false; - } - bblock_t *block; exec_node *cursor; diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_cmod_propagation.cpp mesa-19.0.1/src/intel/compiler/brw_fs_cmod_propagation.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_cmod_propagation.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_cmod_propagation.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -255,6 +255,13 @@ if (inst->opcode == BRW_OPCODE_AND) break; + /* Not safe to use inequality operators if the types are different + */ + if (scan_inst->dst.type != inst->src[0].type && + inst->conditional_mod != BRW_CONDITIONAL_Z && + inst->conditional_mod != BRW_CONDITIONAL_NZ) + break; + /* Comparisons operate differently for ints and floats */ if (scan_inst->dst.type != inst->dst.type && (scan_inst->dst.type == BRW_REGISTER_TYPE_F || diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_combine_constants.cpp mesa-19.0.1/src/intel/compiler/brw_fs_combine_constants.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_combine_constants.cpp 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_combine_constants.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -294,13 +294,14 @@ for (int i = 0; i < table.len; i++) { foreach_list_typed(reg_link, link, link, table.imm[i].uses) { fs_reg *reg = link->reg; + assert((isnan(reg->f) && isnan(table.imm[i].val)) || + fabsf(reg->f) == fabs(table.imm[i].val)); + reg->file = VGRF; - reg->nr = table.imm[i].nr; reg->offset = table.imm[i].subreg_offset; reg->stride = 0; reg->negate = signbit(reg->f) != signbit(table.imm[i].val); - assert((isnan(reg->f) && isnan(table.imm[i].val)) || - fabsf(reg->f) == fabs(table.imm[i].val)); + reg->nr = table.imm[i].nr; } } diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_copy_propagation.cpp mesa-19.0.1/src/intel/compiler/brw_fs_copy_propagation.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_copy_propagation.cpp 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_copy_propagation.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -371,6 +371,20 @@ return true; } +static bool +instruction_requires_packed_data(fs_inst *inst) +{ + switch (inst->opcode) { + case FS_OPCODE_DDX_FINE: + case FS_OPCODE_DDX_COARSE: + case FS_OPCODE_DDY_FINE: + case FS_OPCODE_DDY_COARSE: + return true; + default: + return false; + } +} + bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { @@ -417,6 +431,13 @@ inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE) return false; + /* Some instructions implemented in the generator backend, such as + * derivatives, assume that their operands are packed so we can't + * generally propagate strided regions to them. + */ + if (instruction_requires_packed_data(inst) && entry->src.stride > 1) + return false; + /* Bail if the result of composing both strides would exceed the * hardware limit. */ diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs.cpp mesa-19.0.1/src/intel/compiler/brw_fs.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs.cpp 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -213,31 +213,10 @@ } bool -fs_inst::equals(fs_inst *inst) const -{ - return (opcode == inst->opcode && - dst.equals(inst->dst) && - src[0].equals(inst->src[0]) && - src[1].equals(inst->src[1]) && - src[2].equals(inst->src[2]) && - saturate == inst->saturate && - predicate == inst->predicate && - conditional_mod == inst->conditional_mod && - mlen == inst->mlen && - base_mrf == inst->base_mrf && - target == inst->target && - eot == inst->eot && - header_size == inst->header_size && - shadow_compare == inst->shadow_compare && - exec_size == inst->exec_size && - offset == inst->offset); -} - -bool fs_inst::is_send_from_grf() const { switch (opcode) { - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: + case SHADER_OPCODE_SEND: case SHADER_OPCODE_SHADER_TIME_ADD: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: @@ -251,6 +230,7 @@ case SHADER_OPCODE_TYPED_ATOMIC: case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_WRITE: + case SHADER_OPCODE_IMAGE_SIZE: case SHADER_OPCODE_URB_WRITE_SIMD8: case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: @@ -271,6 +251,62 @@ } } +bool +fs_inst::is_control_source(unsigned arg) const +{ + switch (opcode) { + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: + return arg == 0; + + case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_SHUFFLE: + case SHADER_OPCODE_QUAD_SWIZZLE: + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + case SHADER_OPCODE_IMAGE_SIZE: + case SHADER_OPCODE_GET_BUFFER_SIZE: + return arg == 1; + + case SHADER_OPCODE_MOV_INDIRECT: + case SHADER_OPCODE_CLUSTER_BROADCAST: + case SHADER_OPCODE_TEX: + case FS_OPCODE_TXB: + case SHADER_OPCODE_TXD: + case SHADER_OPCODE_TXF: + case SHADER_OPCODE_TXF_LZ: + case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_CMS_W: + case SHADER_OPCODE_TXF_UMS: + case SHADER_OPCODE_TXF_MCS: + case SHADER_OPCODE_TXL: + case SHADER_OPCODE_TXL_LZ: + case SHADER_OPCODE_TXS: + case SHADER_OPCODE_LOD: + case SHADER_OPCODE_TG4: + case SHADER_OPCODE_TG4_OFFSET: + case SHADER_OPCODE_SAMPLEINFO: + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case SHADER_OPCODE_BYTE_SCATTERED_READ: + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: + case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_TYPED_SURFACE_READ: + case SHADER_OPCODE_TYPED_SURFACE_WRITE: + return arg == 1 || arg == 2; + + case SHADER_OPCODE_SEND: + return arg == 0 || arg == 1; + + default: + return false; + } +} + /** * Returns true if this instruction's sources and destinations cannot * safely be the same register. @@ -399,7 +435,7 @@ } bool -fs_inst::can_do_source_mods(const struct gen_device_info *devinfo) +fs_inst::can_do_source_mods(const struct gen_device_info *devinfo) const { if (devinfo->gen == 6 && is_math()) return false; @@ -756,6 +792,7 @@ case SHADER_OPCODE_TXF_LOGICAL: case SHADER_OPCODE_TXL_LOGICAL: case SHADER_OPCODE_TXS_LOGICAL: + case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: case FS_OPCODE_TXB_LOGICAL: case SHADER_OPCODE_TXF_CMS_LOGICAL: case SHADER_OPCODE_TXF_CMS_W_LOGICAL: @@ -868,6 +905,14 @@ fs_inst::size_read(int arg) const { switch (opcode) { + case SHADER_OPCODE_SEND: + if (arg == 2) { + return mlen * REG_SIZE; + } else if (arg == 3) { + return ex_mlen * REG_SIZE; + } + break; + case FS_OPCODE_FB_WRITE: case FS_OPCODE_REP_FB_WRITE: if (arg == 0) { @@ -892,6 +937,7 @@ case SHADER_OPCODE_TYPED_ATOMIC: case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_WRITE: + case SHADER_OPCODE_IMAGE_SIZE: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case SHADER_OPCODE_BYTE_SCATTERED_WRITE: @@ -2394,8 +2440,6 @@ inst->src[i].nr = dst.nr; inst->src[i].offset = (base & (block_sz - 1)) + inst->src[i].offset % 4; - - brw_mark_surface_used(prog_data, index); } if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && @@ -2409,8 +2453,6 @@ inst->src[1], pull_index * 4); inst->remove(block); - - brw_mark_surface_used(prog_data, index); } } invalidate_live_intervals(); @@ -2421,9 +2463,45 @@ { bool progress = false; - foreach_block_and_inst(block, fs_inst, inst, cfg) { + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { switch (inst->opcode) { case BRW_OPCODE_MOV: + if (!devinfo->has_64bit_types && + (inst->dst.type == BRW_REGISTER_TYPE_DF || + inst->dst.type == BRW_REGISTER_TYPE_UQ || + inst->dst.type == BRW_REGISTER_TYPE_Q)) { + assert(inst->dst.type == inst->src[0].type); + assert(!inst->saturate); + assert(!inst->src[0].abs); + assert(!inst->src[0].negate); + const brw::fs_builder ibld(this, block, inst); + + if (inst->src[0].file == IMM) { + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), + brw_imm_ud(inst->src[0].u64 >> 32)); + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), + brw_imm_ud(inst->src[0].u64)); + } else { + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1)); + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0)); + } + + inst->remove(block); + progress = true; + } + + if ((inst->conditional_mod == BRW_CONDITIONAL_Z || + inst->conditional_mod == BRW_CONDITIONAL_NZ) && + inst->dst.is_null() && + (inst->src[0].abs || inst->src[0].negate)) { + inst->src[0].abs = false; + inst->src[0].negate = false; + progress = true; + break; + } + if (inst->src[0].file != IMM) break; @@ -2449,16 +2527,16 @@ break; case BRW_OPCODE_MUL: - if (inst->src[1].file != IMM) - continue; + if (inst->src[1].file != IMM) + continue; - /* a * 1.0 = a */ - if (inst->src[1].is_one()) { - inst->opcode = BRW_OPCODE_MOV; - inst->src[1] = reg_undef; - progress = true; - break; - } + /* a * 1.0 = a */ + if (inst->src[1].is_one()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } /* a * -1.0 = -a */ if (inst->src[1].is_negative_one()) { @@ -2486,7 +2564,7 @@ progress = true; break; } - break; + break; case BRW_OPCODE_ADD: if (inst->src[1].file != IMM) continue; @@ -2528,18 +2606,39 @@ } break; case BRW_OPCODE_CMP: - if (inst->conditional_mod == BRW_CONDITIONAL_GE && - inst->src[0].abs && - inst->src[0].negate && - inst->src[1].is_zero()) { + if ((inst->conditional_mod == BRW_CONDITIONAL_Z || + inst->conditional_mod == BRW_CONDITIONAL_NZ) && + inst->src[1].is_zero() && + (inst->src[0].abs || inst->src[0].negate)) { inst->src[0].abs = false; inst->src[0].negate = false; - inst->conditional_mod = BRW_CONDITIONAL_Z; progress = true; break; } break; case BRW_OPCODE_SEL: + if (!devinfo->has_64bit_types && + (inst->dst.type == BRW_REGISTER_TYPE_DF || + inst->dst.type == BRW_REGISTER_TYPE_UQ || + inst->dst.type == BRW_REGISTER_TYPE_Q)) { + assert(inst->dst.type == inst->src[0].type); + assert(!inst->saturate); + assert(!inst->src[0].abs && !inst->src[0].negate); + assert(!inst->src[1].abs && !inst->src[1].negate); + const brw::fs_builder ibld(this, block, inst); + + set_predicate(inst->predicate, + ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0), + subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0))); + set_predicate(inst->predicate, + ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), + subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1), + subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 1))); + + inst->remove(block); + progress = true; + } if (inst->src[0].equals(inst->src[1])) { inst->opcode = BRW_OPCODE_MOV; inst->src[1] = reg_undef; @@ -2817,8 +2916,8 @@ bool progress = false; int depth = 0; - int remap[alloc.count]; - memset(remap, -1, sizeof(int) * alloc.count); + unsigned remap[alloc.count]; + memset(remap, ~0u, sizeof(unsigned) * alloc.count); foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) { @@ -2831,20 +2930,20 @@ /* Rewrite instruction sources. */ for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF && - remap[inst->src[i].nr] != -1 && + remap[inst->src[i].nr] != ~0u && remap[inst->src[i].nr] != inst->src[i].nr) { inst->src[i].nr = remap[inst->src[i].nr]; progress = true; } } - const int dst = inst->dst.nr; + const unsigned dst = inst->dst.nr; if (depth == 0 && inst->dst.file == VGRF && alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written && !inst->is_partial_write()) { - if (remap[dst] == -1) { + if (remap[dst] == ~0u) { remap[dst] = dst; } else { remap[dst] = alloc.allocate(regs_written(inst)); @@ -2852,7 +2951,7 @@ progress = true; } } else if (inst->dst.file == VGRF && - remap[dst] != -1 && + remap[dst] != ~0u && remap[dst] != dst) { inst->dst.nr = remap[dst]; progress = true; @@ -2863,7 +2962,7 @@ invalidate_live_intervals(); for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) { - if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != -1) { + if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != ~0u) { delta_xy[i].nr = remap[delta_xy[i].nr]; } } @@ -3018,6 +3117,7 @@ if (csel_inst != NULL) { progress = true; + csel_inst->saturate = inst->saturate; inst->remove(block); } @@ -3357,7 +3457,13 @@ if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF) { fs_inst *prev_inst = last_mrf_move[inst->dst.nr]; - if (prev_inst && inst->equals(prev_inst)) { + if (prev_inst && prev_inst->opcode == BRW_OPCODE_MOV && + inst->dst.equals(prev_inst->dst) && + inst->src[0].equals(prev_inst->src[0]) && + inst->saturate == prev_inst->saturate && + inst->predicate == prev_inst->predicate && + inst->conditional_mod == prev_inst->conditional_mod && + inst->exec_size == prev_inst->exec_size) { inst->remove(block); progress = true; continue; @@ -3551,7 +3657,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst) { int write_len = regs_written(inst); - int first_write_grf = inst->dst.nr; + unsigned first_write_grf = inst->dst.nr; bool needs_dep[BRW_MAX_MRF(devinfo->gen)]; assert(write_len < (int)sizeof(needs_dep) - 1); @@ -4501,11 +4607,72 @@ return sampler.file != IMM || sampler.ud >= 16; } +static unsigned +sampler_msg_type(const gen_device_info *devinfo, + opcode opcode, bool shadow_compare) +{ + assert(devinfo->gen >= 5); + switch (opcode) { + case SHADER_OPCODE_TEX: + return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE : + GEN5_SAMPLER_MESSAGE_SAMPLE; + case FS_OPCODE_TXB: + return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE : + GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; + case SHADER_OPCODE_TXL: + return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE : + GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; + case SHADER_OPCODE_TXL_LZ: + return shadow_compare ? GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ : + GEN9_SAMPLER_MESSAGE_SAMPLE_LZ; + case SHADER_OPCODE_TXS: + case SHADER_OPCODE_IMAGE_SIZE: + return GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; + case SHADER_OPCODE_TXD: + assert(!shadow_compare || devinfo->gen >= 8 || devinfo->is_haswell); + return shadow_compare ? HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE : + GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; + case SHADER_OPCODE_TXF: + return GEN5_SAMPLER_MESSAGE_SAMPLE_LD; + case SHADER_OPCODE_TXF_LZ: + assert(devinfo->gen >= 9); + return GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ; + case SHADER_OPCODE_TXF_CMS_W: + assert(devinfo->gen >= 9); + return GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W; + case SHADER_OPCODE_TXF_CMS: + return devinfo->gen >= 7 ? GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS : + GEN5_SAMPLER_MESSAGE_SAMPLE_LD; + case SHADER_OPCODE_TXF_UMS: + assert(devinfo->gen >= 7); + return GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS; + case SHADER_OPCODE_TXF_MCS: + assert(devinfo->gen >= 7); + return GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; + case SHADER_OPCODE_LOD: + return GEN5_SAMPLER_MESSAGE_LOD; + case SHADER_OPCODE_TG4: + assert(devinfo->gen >= 7); + return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C : + GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; + break; + case SHADER_OPCODE_TG4_OFFSET: + assert(devinfo->gen >= 7); + return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C : + GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; + case SHADER_OPCODE_SAMPLEINFO: + return GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO; + default: + unreachable("not reached"); + } +} + static void lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &coordinate, const fs_reg &shadow_c, fs_reg lod, const fs_reg &lod2, + const fs_reg &min_lod, const fs_reg &sample_index, const fs_reg &mcs, const fs_reg &surface, @@ -4515,6 +4682,7 @@ unsigned grad_components) { const gen_device_info *devinfo = bld.shader->devinfo; + const brw_stage_prog_data *prog_data = bld.shader->stage_prog_data; unsigned reg_width = bld.dispatch_width() / 8; unsigned header_size = 0, length = 0; fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE]; @@ -4625,6 +4793,11 @@ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod); length++; break; + case SHADER_OPCODE_IMAGE_SIZE: + /* We need an LOD; just use 0 */ + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + length++; + break; case SHADER_OPCODE_TXF: /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. * On Gen9 they are u, v, lod, r @@ -4716,7 +4889,16 @@ bld.MOV(sources[length++], offset(coordinate, bld, i)); } - int mlen; + if (min_lod.file != BAD_FILE) { + /* Account for all of the missing coordinate sources */ + length += 4 - coord_components; + if (op == SHADER_OPCODE_TXD) + length += (3 - grad_components) * 2; + + bld.MOV(sources[length++], min_lod); + } + + unsigned mlen; if (reg_width == 2) mlen = length * reg_width - header_size; else @@ -4727,14 +4909,81 @@ bld.LOAD_PAYLOAD(src_payload, sources, length, header_size); /* Generate the SEND. */ - inst->opcode = op; - inst->src[0] = src_payload; - inst->src[1] = surface; - inst->src[2] = sampler; - inst->resize_sources(3); + inst->opcode = SHADER_OPCODE_SEND; inst->mlen = mlen; inst->header_size = header_size; + const unsigned msg_type = + sampler_msg_type(devinfo, op, inst->shadow_compare); + const unsigned simd_mode = + inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 : + BRW_SAMPLER_SIMD_MODE_SIMD16; + + uint32_t base_binding_table_index; + switch (op) { + case SHADER_OPCODE_TG4: + case SHADER_OPCODE_TG4_OFFSET: + base_binding_table_index = prog_data->binding_table.gather_texture_start; + break; + case SHADER_OPCODE_IMAGE_SIZE: + base_binding_table_index = prog_data->binding_table.image_start; + break; + default: + base_binding_table_index = prog_data->binding_table.texture_start; + break; + } + + inst->sfid = BRW_SFID_SAMPLER; + if (surface.file == IMM && sampler.file == IMM) { + inst->desc = brw_sampler_desc(devinfo, + surface.ud + base_binding_table_index, + sampler.ud % 16, + msg_type, + simd_mode, + 0 /* return_format unused on gen7+ */); + inst->src[0] = brw_imm_ud(0); + } else { + /* Immediate portion of the descriptor */ + inst->desc = brw_sampler_desc(devinfo, + 0, /* surface */ + 0, /* sampler */ + msg_type, + simd_mode, + 0 /* return_format unused on gen7+ */); + const fs_builder ubld = bld.group(1, 0).exec_all(); + fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD); + if (surface.equals(sampler)) { + /* This case is common in GL */ + ubld.MUL(desc, surface, brw_imm_ud(0x101)); + } else { + if (sampler.file == IMM) { + ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8)); + } else { + ubld.SHL(desc, sampler, brw_imm_ud(8)); + ubld.OR(desc, desc, surface); + } + } + if (base_binding_table_index) + ubld.ADD(desc, desc, brw_imm_ud(base_binding_table_index)); + ubld.AND(desc, desc, brw_imm_ud(0xfff)); + + inst->src[0] = component(desc, 0); + } + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + + inst->src[2] = src_payload; + inst->resize_sources(3); + + if (inst->eot) { + /* EOT sampler messages don't make sense to split because it would + * involve ending half of the thread early. + */ + assert(inst->group == 0); + /* We need to use SENDC for EOT sampler messages */ + inst->check_tdr = true; + inst->send_has_side_effects = true; + } + /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */ assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE); } @@ -4747,6 +4996,7 @@ const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C]; const fs_reg &lod = inst->src[TEX_LOGICAL_SRC_LOD]; const fs_reg &lod2 = inst->src[TEX_LOGICAL_SRC_LOD2]; + const fs_reg &min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD]; const fs_reg &sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX]; const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS]; const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE]; @@ -4759,7 +5009,8 @@ if (devinfo->gen >= 7) { lower_sampler_logical_send_gen7(bld, inst, op, coordinate, - shadow_c, lod, lod2, sample_index, + shadow_c, lod, lod2, min_lod, + sample_index, mcs, surface, sampler, tg4_offset, coord_components, grad_components); } else if (devinfo->gen >= 5) { @@ -4790,8 +5041,7 @@ } static void -lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, - const fs_reg &sample_mask) +lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) { const gen_device_info *devinfo = bld.shader->devinfo; @@ -4801,10 +5051,17 @@ const fs_reg &surface = inst->src[2]; const UNUSED fs_reg &dims = inst->src[3]; const fs_reg &arg = inst->src[4]; + assert(arg.file == IMM); /* Calculate the total number of components of the payload. */ const unsigned addr_sz = inst->components_read(0); const unsigned src_sz = inst->components_read(1); + + const bool is_typed_access = + inst->opcode == SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL || + inst->opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL || + inst->opcode == SHADER_OPCODE_TYPED_ATOMIC_LOGICAL; + /* From the BDW PRM Volume 7, page 147: * * "For the Data Cache Data Port*, the header must be present for the @@ -4815,28 +5072,43 @@ * messages prior to Gen9, since we have to provide a header anyway. On * Gen11+ the header has been removed so we can only use predication. */ - const unsigned header_sz = devinfo->gen < 9 && - (op == SHADER_OPCODE_TYPED_SURFACE_READ || - op == SHADER_OPCODE_TYPED_SURFACE_WRITE || - op == SHADER_OPCODE_TYPED_ATOMIC) ? 1 : 0; - const unsigned sz = header_sz + addr_sz + src_sz; - - /* Allocate space for the payload. */ - fs_reg *const components = new fs_reg[sz]; - const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); - unsigned n = 0; - - /* Construct the payload. */ - if (header_sz) - components[n++] = emit_surface_header(bld, sample_mask); + const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0; - for (unsigned i = 0; i < addr_sz; i++) - components[n++] = offset(addr, bld, i); + const bool has_side_effects = inst->has_side_effects(); + fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() : + fs_reg(brw_imm_d(0xffff)); + + fs_reg payload, payload2; + unsigned mlen, ex_mlen = 0; + if (devinfo->gen >= 9) { + /* We have split sends on gen9 and above */ + assert(header_sz == 0); + payload = bld.move_to_vgrf(addr, addr_sz); + payload2 = bld.move_to_vgrf(src, src_sz); + mlen = addr_sz * (inst->exec_size / 8); + ex_mlen = src_sz * (inst->exec_size / 8); + } else { + /* Allocate space for the payload. */ + const unsigned sz = header_sz + addr_sz + src_sz; + payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); + fs_reg *const components = new fs_reg[sz]; + unsigned n = 0; + + /* Construct the payload. */ + if (header_sz) + components[n++] = emit_surface_header(bld, sample_mask); + + for (unsigned i = 0; i < addr_sz; i++) + components[n++] = offset(addr, bld, i); - for (unsigned i = 0; i < src_sz; i++) - components[n++] = offset(src, bld, i); + for (unsigned i = 0; i < src_sz; i++) + components[n++] = offset(src, bld, i); - bld.LOAD_PAYLOAD(payload, components, sz, header_sz); + bld.LOAD_PAYLOAD(payload, components, sz, header_sz); + mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8; + + delete[] components; + } /* Predicate the instruction on the sample mask if no header is * provided. @@ -4864,17 +5136,128 @@ } } + uint32_t sfid; + switch (inst->opcode) { + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + /* Byte scattered opcodes go through the normal data cache */ + sfid = GEN7_SFID_DATAPORT_DATA_CACHE; + break; + + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: + /* Untyped Surface messages go through the data cache but the SFID value + * changed on Haswell. + */ + sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN7_SFID_DATAPORT_DATA_CACHE); + break; + + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + /* Typed surface messages go through the render cache on IVB and the + * data cache on HSW+. + */ + sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? + HSW_SFID_DATAPORT_DATA_CACHE_1 : + GEN6_SFID_DATAPORT_RENDER_CACHE); + break; + + default: + unreachable("Unsupported surface opcode"); + } + + uint32_t desc; + switch (inst->opcode) { + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size, + arg.ud, /* num_channels */ + false /* write */); + break; + + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size, + arg.ud, /* num_channels */ + true /* write */); + break; + + case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size, + arg.ud, /* bit_size */ + false /* write */); + break; + + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: + desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size, + arg.ud, /* bit_size */ + true /* write */); + break; + + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size, + arg.ud, /* atomic_op */ + !inst->dst.is_null()); + break; + + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: + desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size, + arg.ud, /* atomic_op */ + !inst->dst.is_null()); + break; + + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group, + arg.ud, /* num_channels */ + false /* write */); + break; + + case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: + desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, inst->group, + arg.ud, /* num_channels */ + true /* write */); + break; + + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: + desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group, + arg.ud, /* atomic_op */ + !inst->dst.is_null()); + break; + + default: + unreachable("Unknown surface logical instruction"); + } + /* Update the original instruction. */ - inst->opcode = op; - inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8; + inst->opcode = SHADER_OPCODE_SEND; + inst->mlen = mlen; + inst->ex_mlen = ex_mlen; inst->header_size = header_sz; + inst->send_has_side_effects = has_side_effects; + inst->send_is_volatile = !has_side_effects; - inst->src[0] = payload; - inst->src[1] = surface; - inst->src[2] = arg; - inst->resize_sources(3); + /* Set up SFID and descriptors */ + inst->sfid = sfid; + inst->desc = desc; + if (surface.file == IMM) { + inst->desc |= surface.ud & 0xff; + inst->src[0] = brw_imm_ud(0); + } else { + const fs_builder ubld = bld.exec_all().group(1, 0); + fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.AND(tmp, surface, brw_imm_ud(0xff)); + inst->src[0] = component(tmp, 0); + } + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + + /* Finally, the payload */ + inst->src[2] = payload; + inst->src[3] = payload2; - delete[] components; + inst->resize_sources(4); } static void @@ -4883,16 +5266,37 @@ const gen_device_info *devinfo = bld.shader->devinfo; if (devinfo->gen >= 7) { + fs_reg index = inst->src[0]; /* We are switching the instruction from an ALU-like instruction to a * send-from-grf instruction. Since sends can't handle strides or * source modifiers, we have to make a copy of the offset source. */ - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.MOV(tmp, inst->src[1]); - inst->src[1] = tmp; + fs_reg offset = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(offset, inst->src[1]); + + const unsigned simd_mode = + inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 : + BRW_SAMPLER_SIMD_MODE_SIMD16; - inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7; + inst->opcode = SHADER_OPCODE_SEND; inst->mlen = inst->exec_size / 8; + inst->resize_sources(3); + + inst->sfid = BRW_SFID_SAMPLER; + inst->desc = brw_sampler_desc(devinfo, 0, 0, + GEN5_SAMPLER_MESSAGE_SAMPLE_LD, + simd_mode, 0); + if (index.file == IMM) { + inst->desc |= index.ud & 0xff; + inst->src[0] = brw_imm_ud(0); + } else { + const fs_builder ubld = bld.exec_all().group(1, 0); + fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.AND(tmp, index, brw_imm_ud(0xff)); + inst->src[0] = component(tmp, 0); + } + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + inst->src[2] = offset; /* payload */ } else { const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->gen), BRW_REGISTER_TYPE_UD); @@ -4978,6 +5382,10 @@ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXS); break; + case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_IMAGE_SIZE); + break; + case FS_OPCODE_TXB_LOGICAL: lower_sampler_logical_send(ibld, inst, FS_OPCODE_TXB); break; @@ -5015,57 +5423,15 @@ break; case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_UNTYPED_SURFACE_READ, - fs_reg()); - break; - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_UNTYPED_SURFACE_WRITE, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_BYTE_SCATTERED_READ, - fs_reg()); - break; - case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_BYTE_SCATTERED_WRITE, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_UNTYPED_ATOMIC, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_TYPED_SURFACE_READ, - brw_imm_d(0xffff)); - break; - case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_TYPED_SURFACE_WRITE, - ibld.sample_mask_reg()); - break; - case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: - lower_surface_logical_send(ibld, inst, - SHADER_OPCODE_TYPED_ATOMIC, - ibld.sample_mask_reg()); + lower_surface_logical_send(ibld, inst); break; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: @@ -5296,6 +5662,14 @@ get_sampler_lowered_simd_width(const struct gen_device_info *devinfo, const fs_inst *inst) { + /* If we have a min_lod parameter on anything other than a simple sample + * message, it will push it over 5 arguments and we have to fall back to + * SIMD8. + */ + if (inst->opcode != SHADER_OPCODE_TEX && + inst->components_read(TEX_LOGICAL_SRC_MIN_LOD)) + return 8; + /* Calculate the number of coordinate components that have to be present * assuming that additional arguments follow the texel coordinates in the * message payload. On IVB+ there is no need for padding, on ILK-SNB we @@ -5444,10 +5818,7 @@ case FS_OPCODE_DDX_FINE: case FS_OPCODE_DDY_COARSE: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_PACK_HALF_2x16_SPLIT: - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: @@ -5967,6 +6338,10 @@ fprintf(file, "(mlen: %d) ", inst->mlen); } + if (inst->ex_mlen) { + fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); + } + if (inst->eot) { fprintf(file, "(EOT) "); } @@ -6078,6 +6453,11 @@ brw_vf_to_float((inst->src[i].ud >> 16) & 0xff), brw_vf_to_float((inst->src[i].ud >> 24) & 0xff)); break; + case BRW_REGISTER_TYPE_V: + case BRW_REGISTER_TYPE_UV: + fprintf(file, "%08x%s", inst->src[i].ud, + inst->src[i].type == BRW_REGISTER_TYPE_V ? "V" : "UV"); + break; default: fprintf(file, "???"); break; @@ -6444,18 +6824,68 @@ OPT(dead_code_eliminate); } - if (OPT(lower_conversions)) { + if (OPT(lower_regioning)) { OPT(opt_copy_propagation); OPT(dead_code_eliminate); OPT(lower_simd_width); } + OPT(fixup_sends_duplicate_payload); + lower_uniform_pull_constant_loads(); validate(); } /** + * From the Skylake PRM Vol. 2a docs for sends: + * + * "It is required that the second block of GRFs does not overlap with the + * first block." + * + * There are plenty of cases where we may accidentally violate this due to + * having, for instance, both sources be the constant 0. This little pass + * just adds a new vgrf for the second payload and copies it over. + */ +bool +fs_visitor::fixup_sends_duplicate_payload() +{ + bool progress = false; + + foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { + if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 && + regions_overlap(inst->src[2], inst->mlen * REG_SIZE, + inst->src[3], inst->ex_mlen * REG_SIZE)) { + fs_reg tmp = fs_reg(VGRF, alloc.allocate(inst->ex_mlen), + BRW_REGISTER_TYPE_UD); + /* Sadly, we've lost all notion of channels and bit sizes at this + * point. Just WE_all it. + */ + const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0); + fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD); + fs_reg copy_dst = tmp; + for (unsigned i = 0; i < inst->ex_mlen; i += 2) { + if (inst->ex_mlen == i + 1) { + /* Only one register left; do SIMD8 */ + ibld.group(8, 0).MOV(copy_dst, copy_src); + } else { + ibld.MOV(copy_dst, copy_src); + } + copy_src = offset(copy_src, ibld, 1); + copy_dst = offset(copy_dst, ibld, 1); + } + inst->src[3] = tmp; + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + +/** * Three source instruction must have a GRF/MRF destination register. * ARF NULL is not allowed. Fix that up by allocating a temporary GRF. */ @@ -7161,7 +7591,7 @@ void *mem_ctx, const struct brw_wm_prog_key *key, struct brw_wm_prog_data *prog_data, - const nir_shader *src_shader, + nir_shader *shader, struct gl_program *prog, int shader_time_index8, int shader_time_index16, int shader_time_index32, bool allow_spilling, @@ -7170,7 +7600,6 @@ { const struct gen_device_info *devinfo = compiler->devinfo; - nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true); brw_nir_lower_fs_inputs(shader, devinfo, key); brw_nir_lower_fs_outputs(shader); diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_cse.cpp mesa-19.0.1/src/intel/compiler/brw_fs_cse.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_cse.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_cse.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -74,7 +74,6 @@ case FS_OPCODE_FB_READ_LOGICAL: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: @@ -184,8 +183,13 @@ a->dst.type == b->dst.type && a->offset == b->offset && a->mlen == b->mlen && + a->ex_mlen == b->ex_mlen && + a->sfid == b->sfid && + a->desc == b->desc && a->size_written == b->size_written && a->base_mrf == b->base_mrf && + a->check_tdr == b->check_tdr && + a->send_has_side_effects == b->send_has_side_effects && a->eot == b->eot && a->header_size == b->header_size && a->shadow_compare == b->shadow_compare && diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_generator.cpp mesa-19.0.1/src/intel/compiler/brw_fs_generator.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_generator.cpp 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_generator.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -90,9 +90,16 @@ * different execution size when the number of components * written to each destination GRF is not the same. */ - const unsigned width = MIN2(reg_width, phys_width); - brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0); - brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride); + if (reg->stride > 4) { + assert(reg != &inst->dst); + assert(reg->stride * type_sz(reg->type) <= REG_SIZE); + brw_reg = brw_vecn_reg(1, brw_file_from_reg(reg), reg->nr, 0); + brw_reg = stride(brw_reg, reg->stride, 1, 0); + } else { + const unsigned width = MIN2(reg_width, phys_width); + brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0); + brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride); + } if (devinfo->gen == 7 && !devinfo->is_haswell) { /* From the IvyBridge PRM (EU Changes by Processor Generation, page 13): @@ -251,6 +258,40 @@ } void +fs_generator::generate_send(fs_inst *inst, + struct brw_reg dst, + struct brw_reg desc, + struct brw_reg ex_desc, + struct brw_reg payload, + struct brw_reg payload2) +{ + const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE && + dst.nr == BRW_ARF_NULL; + const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE; + + uint32_t desc_imm = inst->desc | + brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size); + + uint32_t ex_desc_imm = brw_message_ex_desc(devinfo, inst->ex_mlen); + + if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm) { + /* If we have any sort of extended descriptor, then we need SENDS. This + * also covers the dual-payload case because ex_mlen goes in ex_desc. + */ + brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2, + desc, desc_imm, ex_desc, ex_desc_imm); + if (inst->check_tdr) + brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDSC); + } else { + brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm); + if (inst->check_tdr) + brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC); + } + + brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot); +} + +void fs_generator::fire_fb_write(fs_inst *inst, struct brw_reg payload, struct brw_reg implied_header, @@ -315,8 +356,6 @@ if (devinfo->gen >= 6) brw_inst_set_rt_slot_group(devinfo, insn, inst->group / 16); - - brw_mark_surface_used(&prog_data->base, surf_index); } void @@ -373,8 +412,6 @@ gen9_fb_READ(p, dst, payload, surf_index, inst->header_size, inst->size_written / REG_SIZE, prog_data->persample_dispatch); - - brw_mark_surface_used(&prog_data->base, surf_index); } void @@ -440,7 +477,8 @@ if (type_sz(reg.type) > 4 && ((devinfo->gen == 7 && !devinfo->is_haswell) || - devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { + devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) || + !devinfo->has_64bit_types)) { /* IVB has an issue (which we found empirically) where it reads two * address register components per channel for indirectly addressed * 64-bit sources. @@ -938,15 +976,14 @@ inst->header_size > 0, simd_mode, BRW_SAMPLER_RETURN_FORMAT_SINT32); - - brw_mark_surface_used(prog_data, surf_index.ud); } void -fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, +fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg surface_index, struct brw_reg sampler_index) { + assert(devinfo->gen < 7); assert(inst->size_written % REG_SIZE == 0); int msg_type = -1; uint32_t simd_mode; @@ -1015,71 +1052,26 @@ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; - case SHADER_OPCODE_TXL_LZ: - assert(devinfo->gen >= 9); - if (inst->shadow_compare) { - msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ; - } else { - msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LZ; - } - break; case SHADER_OPCODE_TXS: - case SHADER_OPCODE_IMAGE_SIZE: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; break; case SHADER_OPCODE_TXD: - if (inst->shadow_compare) { - /* Gen7.5+. Otherwise, lowered in NIR */ - assert(devinfo->gen >= 8 || devinfo->is_haswell); - msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; - } + assert(!inst->shadow_compare); + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; - case SHADER_OPCODE_TXF_LZ: - assert(devinfo->gen >= 9); - msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ; - break; - case SHADER_OPCODE_TXF_CMS_W: - assert(devinfo->gen >= 9); - msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W; - break; case SHADER_OPCODE_TXF_CMS: - if (devinfo->gen >= 7) - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; - else - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; - break; - case SHADER_OPCODE_TXF_UMS: - assert(devinfo->gen >= 7); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS; - break; - case SHADER_OPCODE_TXF_MCS: - assert(devinfo->gen >= 7); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_LOD: msg_type = GEN5_SAMPLER_MESSAGE_LOD; break; case SHADER_OPCODE_TG4: - if (inst->shadow_compare) { - assert(devinfo->gen >= 7); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; - } else { - assert(devinfo->gen >= 6); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; - } - break; - case SHADER_OPCODE_TG4_OFFSET: - assert(devinfo->gen >= 7); - if (inst->shadow_compare) { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; - } else { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; - } + assert(devinfo->gen == 6); + assert(!inst->shadow_compare); + msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; break; case SHADER_OPCODE_SAMPLEINFO: msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO; @@ -1158,16 +1150,14 @@ dst = vec16(dst); } - assert(devinfo->gen < 7 || inst->header_size == 0 || - src.file == BRW_GENERAL_REGISTER_FILE); - assert(sampler_index.type == BRW_REGISTER_TYPE_UD); /* Load the message header if present. If there's a texture offset, * we need to set it up explicitly and load the offset bitfield. * Otherwise, we can use an implied move from g0 to the first message reg. */ - if (inst->header_size != 0 && devinfo->gen < 7) { + struct brw_reg src = brw_null_reg(); + if (inst->header_size != 0) { if (devinfo->gen < 6 && !inst->offset) { /* Set up an implied move from g0 to the MRF. */ src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -1196,85 +1186,28 @@ uint32_t base_binding_table_index; switch (inst->opcode) { case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: base_binding_table_index = prog_data->binding_table.gather_texture_start; break; - case SHADER_OPCODE_IMAGE_SIZE: - base_binding_table_index = prog_data->binding_table.image_start; - break; default: base_binding_table_index = prog_data->binding_table.texture_start; break; } - if (surface_index.file == BRW_IMMEDIATE_VALUE && - sampler_index.file == BRW_IMMEDIATE_VALUE) { - uint32_t surface = surface_index.ud; - uint32_t sampler = sampler_index.ud; - - brw_SAMPLE(p, - retype(dst, BRW_REGISTER_TYPE_UW), - inst->base_mrf, - src, - surface + base_binding_table_index, - sampler % 16, - msg_type, - inst->size_written / REG_SIZE, - inst->mlen, - inst->header_size != 0, - simd_mode, - return_format); + assert(surface_index.file == BRW_IMMEDIATE_VALUE); + assert(sampler_index.file == BRW_IMMEDIATE_VALUE); - brw_mark_surface_used(prog_data, surface + base_binding_table_index); - } else { - /* Non-const sampler index */ - - struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - - if (brw_regs_equal(&surface_reg, &sampler_reg)) { - brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); - } else { - if (sampler_reg.file == BRW_IMMEDIATE_VALUE) { - brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8)); - } else { - brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); - brw_OR(p, addr, addr, surface_reg); - } - } - if (base_binding_table_index) - brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); - brw_AND(p, addr, addr, brw_imm_ud(0xfff)); - - brw_pop_insn_state(p); - - /* dst = send(offset, a0.0 | ) */ - brw_send_indirect_message( - p, BRW_SFID_SAMPLER, dst, src, addr, - brw_message_desc(devinfo, inst->mlen, inst->size_written / REG_SIZE, - inst->header_size) | - brw_sampler_desc(devinfo, - 0 /* surface */, - 0 /* sampler */, - msg_type, - simd_mode, - return_format)); - - /* visitor knows more than we do about the surface limit required, - * so has already done marking. - */ - } - - if (is_combined_send) { - brw_inst_set_eot(p->devinfo, brw_last_inst, true); - brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC); - } + brw_SAMPLE(p, + retype(dst, BRW_REGISTER_TYPE_UW), + inst->base_mrf, + src, + surface_index.ud + base_binding_table_index, + sampler_index.ud % 16, + msg_type, + inst->size_written / REG_SIZE, + inst->mlen, + inst->header_size != 0, + simd_mode, + return_format); } @@ -1596,75 +1529,6 @@ } void -fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) -{ - assert(devinfo->gen >= 7); - /* Varying-offset pull constant loads are treated as a normal expression on - * gen7, so the fact that it's a send message is hidden at the IR level. - */ - assert(inst->header_size == 0); - assert(inst->mlen); - assert(index.type == BRW_REGISTER_TYPE_UD); - - uint32_t simd_mode, rlen; - if (inst->exec_size == 16) { - rlen = 8; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } else { - assert(inst->exec_size == 8); - rlen = 4; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - } - - if (index.file == BRW_IMMEDIATE_VALUE) { - - uint32_t surf_index = index.ud; - - brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); - brw_inst_set_sfid(devinfo, send, BRW_SFID_SAMPLER); - brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW)); - brw_set_src0(p, send, offset); - brw_set_desc(p, send, - brw_message_desc(devinfo, inst->mlen, rlen, false) | - brw_sampler_desc(devinfo, surf_index, - 0, /* LD message ignores sampler unit */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - simd_mode, 0)); - - } else { - - struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - - /* a0.0 = surf_index & 0xff */ - brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); - brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1); - brw_set_dest(p, insn_and, addr); - brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD))); - brw_set_src1(p, insn_and, brw_imm_ud(0x0ff)); - - brw_pop_insn_state(p); - - /* dst = send(offset, a0.0 | ) */ - brw_send_indirect_message( - p, BRW_SFID_SAMPLER, retype(dst, BRW_REGISTER_TYPE_UW), - offset, addr, - brw_message_desc(devinfo, inst->mlen, rlen, false) | - brw_sampler_desc(devinfo, - 0 /* surface */, - 0 /* sampler */, - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - simd_mode, - 0)); - } -} - -void fs_generator::generate_pixel_interpolator_query(fs_inst *inst, struct brw_reg dst, struct brw_reg src, @@ -1756,35 +1620,6 @@ } void -fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src) -{ - assert(devinfo->gen >= 7); - assert(dst.type == BRW_REGISTER_TYPE_F); - assert(src.type == BRW_REGISTER_TYPE_UD); - - /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32: - * - * Because this instruction does not have a 16-bit floating-point type, - * the source data type must be Word (W). The destination type must be - * F (Float). - */ - struct brw_reg src_w = spread(retype(src, BRW_REGISTER_TYPE_W), 2); - - /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll. - * For the Y case, we wish to access only the upper word; therefore - * a 16-bit subregister offset is needed. - */ - assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X || - inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y); - if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y) - src_w.subnr += 2; - - brw_F16TO32(p, dst, src_w); -} - -void fs_generator::generate_shader_time_add(fs_inst *, struct brw_reg payload, struct brw_reg offset, @@ -1818,9 +1653,6 @@ brw_shader_time_add(p, payload, prog_data->binding_table.shader_time_start); brw_pop_insn_state(p); - - brw_mark_surface_used(prog_data, - prog_data->binding_table.shader_time_start); } void @@ -1846,7 +1678,7 @@ struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg); foreach_block_and_inst (block, fs_inst, inst, cfg) { - struct brw_reg src[3], dst; + struct brw_reg src[4], dst; unsigned int last_insn_offset = p->next_insn_offset; bool multiple_instructions_emitted = false; @@ -2169,6 +2001,12 @@ src[0].subnr = 4 * type_sz(src[0].type); brw_MOV(p, dst, stride(src[0], 8, 4, 1)); break; + + case SHADER_OPCODE_SEND: + generate_send(inst, dst, src[0], src[1], src[2], + inst->ex_mlen > 0 ? src[3] : brw_null_reg()); + break; + case SHADER_OPCODE_GET_BUFFER_SIZE: generate_get_buffer_size(inst, dst, src[0], src[1]); break; @@ -2176,23 +2014,14 @@ case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_LZ: case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_TXF_CMS_W: - case SHADER_OPCODE_TXF_UMS: - case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: - case SHADER_OPCODE_TXL_LZ: case SHADER_OPCODE_TXS: case SHADER_OPCODE_LOD: case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(inst, dst, src[0], src[1], src[2]); - break; - - case SHADER_OPCODE_IMAGE_SIZE: - generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0)); + assert(inst->src[0].file == BAD_FILE); + generate_tex(inst, dst, src[1], src[2]); break; case FS_OPCODE_DDX_COARSE: @@ -2249,10 +2078,6 @@ generate_varying_pull_constant_load_gen4(inst, dst, src[0]); break; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]); - break; - case FS_OPCODE_REP_FB_WRITE: case FS_OPCODE_FB_WRITE: generate_fb_write(inst, src[0]); @@ -2270,73 +2095,12 @@ generate_shader_time_add(inst, src[0], src[1], src[2]); break; - case SHADER_OPCODE_UNTYPED_ATOMIC: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, - inst->mlen, !inst->dst.is_null(), - inst->header_size); - break; - - case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_untyped_atomic_float(p, dst, src[0], src[1], src[2].ud, - inst->mlen, !inst->dst.is_null(), - inst->header_size); - break; - - case SHADER_OPCODE_UNTYPED_SURFACE_READ: - assert(!inst->header_size); - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_untyped_surface_read(p, dst, src[0], src[1], - inst->mlen, src[2].ud); - break; - - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_untyped_surface_write(p, src[0], src[1], - inst->mlen, src[2].ud, - inst->header_size); - break; - - case SHADER_OPCODE_BYTE_SCATTERED_READ: - assert(!inst->header_size); - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_byte_scattered_read(p, dst, src[0], src[1], - inst->mlen, src[2].ud); - break; - - case SHADER_OPCODE_BYTE_SCATTERED_WRITE: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_byte_scattered_write(p, src[0], src[1], - inst->mlen, src[2].ud, - inst->header_size); - break; - - case SHADER_OPCODE_TYPED_ATOMIC: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_typed_atomic(p, dst, src[0], src[1], - src[2].ud, inst->mlen, !inst->dst.is_null(), - inst->header_size); - break; - - case SHADER_OPCODE_TYPED_SURFACE_READ: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_typed_surface_read(p, dst, src[0], src[1], - inst->mlen, src[2].ud, - inst->header_size); - break; - - case SHADER_OPCODE_TYPED_SURFACE_WRITE: - assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud, - inst->header_size); - break; - case SHADER_OPCODE_MEMORY_FENCE: brw_memory_fence(p, dst, BRW_OPCODE_SEND); break; case SHADER_OPCODE_INTERLOCK: + assert(devinfo->gen >= 9); /* The interlock is basically a memory fence issued via sendc */ brw_memory_fence(p, dst, BRW_OPCODE_SENDC); break; @@ -2421,11 +2185,6 @@ generate_pack_half_2x16_split(inst, dst, src[0], src[1]); break; - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: - generate_unpack_half_2x16_split(inst, dst, src[0]); - break; - case FS_OPCODE_PLACEHOLDER_HALT: /* This is the place where the final HALT needs to be inserted if * we've emitted any discards. If not, this will emit no code. diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs.h mesa-19.0.1/src/intel/compiler/brw_fs.h --- mesa-18.3.3/src/intel/compiler/brw_fs.h 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs.h 2019-03-31 23:16:37.000000000 +0000 @@ -103,6 +103,7 @@ void setup_vs_payload(); void setup_gs_payload(); void setup_cs_payload(); + bool fixup_sends_duplicate_payload(); void fixup_3src_null_dest(); void assign_curb_setup(); void calculate_urb_setup(); @@ -119,7 +120,7 @@ void setup_payload_interference(struct ra_graph *g, int payload_reg_count, int first_payload_node); int choose_spill_reg(struct ra_graph *g); - void spill_reg(int spill_reg); + void spill_reg(unsigned spill_reg); void split_virtual_grfs(); bool compact_virtual_grfs(); void assign_constant_locations(); @@ -164,7 +165,7 @@ void lower_uniform_pull_constant_loads(); bool lower_load_payload(); bool lower_pack(); - bool lower_conversions(); + bool lower_regioning(); bool lower_logical_sends(); bool lower_integer_multiplication(); bool lower_minmax(); @@ -218,6 +219,8 @@ nir_intrinsic_instr *instr); fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld, nir_intrinsic_instr *instr); + fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); void nir_emit_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); void nir_emit_tes_intrinsic(const brw::fs_builder &bld, @@ -404,6 +407,12 @@ struct brw_reg payload, struct brw_reg implied_header, GLuint nr); + void generate_send(fs_inst *inst, + struct brw_reg dst, + struct brw_reg desc, + struct brw_reg ex_desc, + struct brw_reg payload, + struct brw_reg payload2); void generate_fb_write(fs_inst *inst, struct brw_reg payload); void generate_fb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload); @@ -413,7 +422,7 @@ void generate_barrier(fs_inst *inst, struct brw_reg src); bool generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); - void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg surface_index, struct brw_reg sampler_index); void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst, @@ -436,10 +445,6 @@ void generate_varying_pull_constant_load_gen4(fs_inst *inst, struct brw_reg dst, struct brw_reg index); - void generate_varying_pull_constant_load_gen7(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); void generate_mov_dispatch_to_flags(fs_inst *inst); void generate_pixel_interpolator_query(fs_inst *inst, @@ -459,9 +464,6 @@ struct brw_reg dst, struct brw_reg x, struct brw_reg y); - void generate_unpack_half_2x16_split(fs_inst *inst, - struct brw_reg dst, - struct brw_reg src); void generate_shader_time_add(fs_inst *inst, struct brw_reg payload, @@ -534,24 +536,8 @@ } } - /** - * Remove any modifiers from the \p i-th source region of the instruction, - * including negate, abs and any implicit type conversion to the execution - * type. Instead any source modifiers will be implemented as a separate - * MOV instruction prior to the original instruction. - */ - inline bool - lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) - { - assert(inst->components_read(i) == 1); - const fs_builder ibld(v, block, inst); - const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); - - ibld.MOV(tmp, inst->src[i]); - inst->src[i] = tmp; - - return true; - } + bool + lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i); } void shuffle_from_32bit_read(const brw::fs_builder &bld, diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_lower_conversions.cpp mesa-19.0.1/src/intel/compiler/brw_fs_lower_conversions.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_lower_conversions.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_lower_conversions.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,132 +0,0 @@ -/* - * Copyright © 2015 Connor Abbott - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_fs.h" -#include "brw_cfg.h" -#include "brw_fs_builder.h" - -using namespace brw; - -static bool -supports_type_conversion(const fs_inst *inst) { - switch (inst->opcode) { - case BRW_OPCODE_MOV: - case SHADER_OPCODE_MOV_INDIRECT: - return true; - case BRW_OPCODE_SEL: - return inst->dst.type == get_exec_type(inst); - default: - /* FIXME: We assume the opcodes don't explicitly mentioned - * before just work fine with arbitrary conversions. - */ - return true; - } -} - -/* From the SKL PRM Vol 2a, "Move": - * - * "A mov with the same source and destination type, no source modifier, - * and no saturation is a raw move. A packed byte destination region (B - * or UB type with HorzStride == 1 and ExecSize > 1) can only be written - * using raw move." - */ -static bool -is_byte_raw_mov (const fs_inst *inst) -{ - return type_sz(inst->dst.type) == 1 && - inst->opcode == BRW_OPCODE_MOV && - inst->src[0].type == inst->dst.type && - !inst->saturate && - !inst->src[0].negate && - !inst->src[0].abs; -} - -bool -fs_visitor::lower_conversions() -{ - bool progress = false; - - foreach_block_and_inst(block, fs_inst, inst, cfg) { - const fs_builder ibld(this, block, inst); - fs_reg dst = inst->dst; - bool saturate = inst->saturate; - - if (supports_type_conversion(inst)) { - if (type_sz(inst->dst.type) < get_exec_type_size(inst) && - !is_byte_raw_mov(inst)) { - /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to - * Single Precision Float": - * - * The upper Dword of every Qword will be written with undefined - * value when converting DF to F. - * - * So we need to allocate a temporary that's two registers, and then do - * a strided MOV to get the lower DWord of every Qword that has the - * result. - * - * This restriction applies, in general, whenever we convert to - * a type with a smaller bit-size. - */ - fs_reg temp = ibld.vgrf(get_exec_type(inst)); - fs_reg strided_temp = subscript(temp, dst.type, 0); - - assert(inst->size_written == inst->dst.component_size(inst->exec_size)); - inst->dst = strided_temp; - inst->saturate = false; - /* As it is an strided destination, we write n-times more being n the - * size ratio between source and destination types. Update - * size_written accordingly. - */ - inst->size_written = inst->dst.component_size(inst->exec_size); - - fs_inst *mov = ibld.at(block, inst->next).MOV(dst, strided_temp); - mov->saturate = saturate; - mov->predicate = inst->predicate; - - progress = true; - } - } else { - fs_reg temp0 = ibld.vgrf(get_exec_type(inst)); - - assert(inst->size_written == inst->dst.component_size(inst->exec_size)); - inst->dst = temp0; - /* As it is an strided destination, we write n-times more being n the - * size ratio between source and destination types. Update - * size_written accordingly. - */ - inst->size_written = inst->dst.component_size(inst->exec_size); - inst->saturate = false; - /* Now, do the conversion to original destination's type. In next iteration, - * we will lower it if it is a d2f conversion. - */ - ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate; - - progress = true; - } - } - - if (progress) - invalidate_live_intervals(); - - return progress; -} diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_lower_regioning.cpp mesa-19.0.1/src/intel/compiler/brw_fs_lower_regioning.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_lower_regioning.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_lower_regioning.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,421 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_fs.h" +#include "brw_cfg.h" +#include "brw_fs_builder.h" + +using namespace brw; + +namespace { + /* From the SKL PRM Vol 2a, "Move": + * + * "A mov with the same source and destination type, no source modifier, + * and no saturation is a raw move. A packed byte destination region (B + * or UB type with HorzStride == 1 and ExecSize > 1) can only be written + * using raw move." + */ + bool + is_byte_raw_mov(const fs_inst *inst) + { + return type_sz(inst->dst.type) == 1 && + inst->opcode == BRW_OPCODE_MOV && + inst->src[0].type == inst->dst.type && + !inst->saturate && + !inst->src[0].negate && + !inst->src[0].abs; + } + + /* + * Return an acceptable byte stride for the destination of an instruction + * that requires it to have some particular alignment. + */ + unsigned + required_dst_byte_stride(const fs_inst *inst) + { + if (inst->dst.is_accumulator()) { + /* If the destination is an accumulator, insist that we leave the + * stride alone. We cannot "fix" accumulator destinations by writing + * to a temporary and emitting a MOV into the original destination. + * For multiply instructions (our one use of the accumulator), the + * MUL writes the full 66 bits of the accumulator whereas the MOV we + * would emit only writes 33 bits and leaves the top 33 bits + * undefined. + * + * It's safe to just require the original stride here because the + * lowering pass will detect the mismatch in has_invalid_src_region + * and fix the sources of the multiply instead of the destination. + */ + return inst->dst.stride * type_sz(inst->dst.type); + } else if (type_sz(inst->dst.type) < get_exec_type_size(inst) && + !is_byte_raw_mov(inst)) { + return get_exec_type_size(inst); + } else { + unsigned stride = inst->dst.stride * type_sz(inst->dst.type); + + for (unsigned i = 0; i < inst->sources; i++) { + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) + stride = MAX2(stride, inst->src[i].stride * + type_sz(inst->src[i].type)); + } + + return stride; + } + } + + /* + * Return an acceptable byte sub-register offset for the destination of an + * instruction that requires it to be aligned to the sub-register offset of + * the sources. + */ + unsigned + required_dst_byte_offset(const fs_inst *inst) + { + for (unsigned i = 0; i < inst->sources; i++) { + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) + if (reg_offset(inst->src[i]) % REG_SIZE != + reg_offset(inst->dst) % REG_SIZE) + return 0; + } + + return reg_offset(inst->dst) % REG_SIZE; + } + + /* + * Return whether the instruction has an unsupported channel bit layout + * specified for the i-th source region. + */ + bool + has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst, + unsigned i) + { + if (is_unordered(inst) || inst->is_control_source(i)) { + return false; + } else { + const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); + const unsigned src_byte_stride = inst->src[i].stride * + type_sz(inst->src[i].type); + const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; + const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE; + + return has_dst_aligned_region_restriction(devinfo, inst) && + !is_uniform(inst->src[i]) && + (src_byte_stride != dst_byte_stride || + src_byte_offset != dst_byte_offset); + } + } + + /* + * Return whether the instruction has an unsupported channel bit layout + * specified for the destination region. + */ + bool + has_invalid_dst_region(const gen_device_info *devinfo, + const fs_inst *inst) + { + if (is_unordered(inst)) { + return false; + } else { + const brw_reg_type exec_type = get_exec_type(inst); + const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; + const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); + const bool is_narrowing_conversion = !is_byte_raw_mov(inst) && + type_sz(inst->dst.type) < type_sz(exec_type); + + return (has_dst_aligned_region_restriction(devinfo, inst) && + (required_dst_byte_stride(inst) != dst_byte_stride || + required_dst_byte_offset(inst) != dst_byte_offset)) || + (is_narrowing_conversion && + required_dst_byte_stride(inst) != dst_byte_stride); + } + } + + /* + * Return whether the instruction has unsupported source modifiers + * specified for the i-th source region. + */ + bool + has_invalid_src_modifiers(const gen_device_info *devinfo, const fs_inst *inst, + unsigned i) + { + return !inst->can_do_source_mods(devinfo) && + (inst->src[i].negate || inst->src[i].abs); + } + + /* + * Return whether the instruction has an unsupported type conversion + * specified for the destination. + */ + bool + has_invalid_conversion(const gen_device_info *devinfo, const fs_inst *inst) + { + switch (inst->opcode) { + case BRW_OPCODE_MOV: + return false; + case BRW_OPCODE_SEL: + return inst->dst.type != get_exec_type(inst); + case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_MOV_INDIRECT: + /* The source and destination types of these may be hard-coded to + * integer at codegen time due to hardware limitations of 64-bit + * types. + */ + return ((devinfo->gen == 7 && !devinfo->is_haswell) || + devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) && + type_sz(inst->src[0].type) > 4 && + inst->dst.type != inst->src[0].type; + default: + /* FIXME: We assume the opcodes don't explicitly mentioned before + * just work fine with arbitrary conversions. + */ + return false; + } + } + + /** + * Return whether the instruction has non-standard semantics for the + * conditional mod which don't cause the flag register to be updated with + * the comparison result. + */ + bool + has_inconsistent_cmod(const fs_inst *inst) + { + return inst->opcode == BRW_OPCODE_SEL || + inst->opcode == BRW_OPCODE_CSEL || + inst->opcode == BRW_OPCODE_IF || + inst->opcode == BRW_OPCODE_WHILE; + } + + bool + lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst); +} + +namespace brw { + /** + * Remove any modifiers from the \p i-th source region of the instruction, + * including negate, abs and any implicit type conversion to the execution + * type. Instead any source modifiers will be implemented as a separate + * MOV instruction prior to the original instruction. + */ + bool + lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) + { + assert(inst->components_read(i) == 1); + const fs_builder ibld(v, block, inst); + const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); + + lower_instruction(v, block, ibld.MOV(tmp, inst->src[i])); + inst->src[i] = tmp; + + return true; + } +} + +namespace { + /** + * Remove any modifiers from the destination region of the instruction, + * including saturate, conditional mod and any implicit type conversion + * from the execution type. Instead any destination modifiers will be + * implemented as a separate MOV instruction after the original + * instruction. + */ + bool + lower_dst_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst) + { + const fs_builder ibld(v, block, inst); + const brw_reg_type type = get_exec_type(inst); + /* Not strictly necessary, but if possible use a temporary with the same + * channel alignment as the current destination in order to avoid + * violating the restrictions enforced later on by lower_src_region() + * and lower_dst_region(), which would introduce additional copy + * instructions into the program unnecessarily. + */ + const unsigned stride = + type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 : + type_sz(inst->dst.type) * inst->dst.stride / type_sz(type); + const fs_reg tmp = horiz_stride(ibld.vgrf(type, stride), stride); + + /* Emit a MOV taking care of all the destination modifiers. */ + fs_inst *mov = ibld.at(block, inst->next).MOV(inst->dst, tmp); + mov->saturate = inst->saturate; + if (!has_inconsistent_cmod(inst)) + mov->conditional_mod = inst->conditional_mod; + if (inst->opcode != BRW_OPCODE_SEL) { + mov->predicate = inst->predicate; + mov->predicate_inverse = inst->predicate_inverse; + } + mov->flag_subreg = inst->flag_subreg; + lower_instruction(v, block, mov); + + /* Point the original instruction at the temporary, and clean up any + * destination modifiers. + */ + assert(inst->size_written == inst->dst.component_size(inst->exec_size)); + inst->dst = tmp; + inst->size_written = inst->dst.component_size(inst->exec_size); + inst->saturate = false; + if (!has_inconsistent_cmod(inst)) + inst->conditional_mod = BRW_CONDITIONAL_NONE; + + assert(!inst->flags_written() || !mov->predicate); + return true; + } + + /** + * Remove any non-trivial shuffling of data from the \p i-th source region + * of the instruction. Instead implement the region as a series of integer + * copies into a temporary with the same channel layout as the destination. + */ + bool + lower_src_region(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) + { + assert(inst->components_read(i) == 1); + const fs_builder ibld(v, block, inst); + const unsigned stride = type_sz(inst->dst.type) * inst->dst.stride / + type_sz(inst->src[i].type); + assert(stride > 0); + const fs_reg tmp = horiz_stride(ibld.vgrf(inst->src[i].type, stride), + stride); + + /* Emit a series of 32-bit integer copies with any source modifiers + * cleaned up (because their semantics are dependent on the type). + */ + const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4), + false); + const unsigned n = type_sz(tmp.type) / type_sz(raw_type); + fs_reg raw_src = inst->src[i]; + raw_src.negate = false; + raw_src.abs = false; + + for (unsigned j = 0; j < n; j++) + ibld.MOV(subscript(tmp, raw_type, j), subscript(raw_src, raw_type, j)); + + /* Point the original instruction at the temporary, making sure to keep + * any source modifiers in the instruction. + */ + fs_reg lower_src = tmp; + lower_src.negate = inst->src[i].negate; + lower_src.abs = inst->src[i].abs; + inst->src[i] = lower_src; + + return true; + } + + /** + * Remove any non-trivial shuffling of data from the destination region of + * the instruction. Instead implement the region as a series of integer + * copies from a temporary with a channel layout compatible with the + * sources. + */ + bool + lower_dst_region(fs_visitor *v, bblock_t *block, fs_inst *inst) + { + /* We cannot replace the result of an integer multiply which writes the + * accumulator because MUL+MACH pairs act on the accumulator as a 66-bit + * value whereas the MOV will act on only 32 or 33 bits of the + * accumulator. + */ + assert(inst->opcode != BRW_OPCODE_MUL || !inst->dst.is_accumulator() || + brw_reg_type_is_floating_point(inst->dst.type)); + + const fs_builder ibld(v, block, inst); + const unsigned stride = required_dst_byte_stride(inst) / + type_sz(inst->dst.type); + assert(stride > 0); + const fs_reg tmp = horiz_stride(ibld.vgrf(inst->dst.type, stride), + stride); + + /* Emit a series of 32-bit integer copies from the temporary into the + * original destination. + */ + const brw_reg_type raw_type = brw_int_type(MIN2(type_sz(tmp.type), 4), + false); + const unsigned n = type_sz(tmp.type) / type_sz(raw_type); + + if (inst->predicate && inst->opcode != BRW_OPCODE_SEL) { + /* Note that in general we cannot simply predicate the copies on the + * same flag register as the original instruction, since it may have + * been overwritten by the instruction itself. Instead initialize + * the temporary with the previous contents of the destination + * register. + */ + for (unsigned j = 0; j < n; j++) + ibld.MOV(subscript(tmp, raw_type, j), + subscript(inst->dst, raw_type, j)); + } + + for (unsigned j = 0; j < n; j++) + ibld.at(block, inst->next).MOV(subscript(inst->dst, raw_type, j), + subscript(tmp, raw_type, j)); + + /* Point the original instruction at the temporary, making sure to keep + * any destination modifiers in the instruction. + */ + assert(inst->size_written == inst->dst.component_size(inst->exec_size)); + inst->dst = tmp; + inst->size_written = inst->dst.component_size(inst->exec_size); + + return true; + } + + /** + * Legalize the source and destination regioning controls of the specified + * instruction. + */ + bool + lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst) + { + const gen_device_info *devinfo = v->devinfo; + bool progress = false; + + if (has_invalid_conversion(devinfo, inst)) + progress |= lower_dst_modifiers(v, block, inst); + + if (has_invalid_dst_region(devinfo, inst)) + progress |= lower_dst_region(v, block, inst); + + for (unsigned i = 0; i < inst->sources; i++) { + if (has_invalid_src_modifiers(devinfo, inst, i)) + progress |= lower_src_modifiers(v, block, inst, i); + + if (has_invalid_src_region(devinfo, inst, i)) + progress |= lower_src_region(v, block, inst, i); + } + + return progress; + } +} + +bool +fs_visitor::lower_regioning() +{ + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) + progress |= lower_instruction(this, block, inst); + + if (progress) + invalidate_live_intervals(); + + return progress; +} diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_nir.cpp mesa-19.0.1/src/intel/compiler/brw_fs_nir.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_nir.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_nir.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -26,6 +26,7 @@ #include "brw_fs_surface_builder.h" #include "brw_nir.h" #include "util/u_math.h" +#include "util/bitscan.h" using namespace brw; using namespace brw::surface_access; @@ -511,8 +512,16 @@ src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16) return false; - nir_const_value *element = nir_src_as_const_value(src0->src[1].src); - assert(element != NULL); + /* If either opcode has source modifiers, bail. + * + * TODO: We can potentially handle source modifiers if both of the opcodes + * we're combining are signed integers. + */ + if (instr->src[0].abs || instr->src[0].negate || + src0->src[0].abs || src0->src[0].negate) + return false; + + unsigned element = nir_src_as_uint(src0->src[1].src); /* Element type to extract.*/ const brw_reg_type type = brw_int_type( @@ -526,7 +535,7 @@ op0 = offset(op0, bld, src0->src[0].swizzle[0]); set_saturate(instr->dest.saturate, - bld.MOV(result, subscript(op0, type, element->u32[0]))); + bld.MOV(result, subscript(op0, type, element))); return true; } @@ -544,14 +553,18 @@ if (src0->intrinsic != nir_intrinsic_load_front_face) return false; - nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); - if (!value1 || fabsf(value1->f32[0]) != 1.0f) + if (!nir_src_is_const(instr->src[1].src) || + !nir_src_is_const(instr->src[2].src)) return false; - nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src); - if (!value2 || fabsf(value2->f32[0]) != 1.0f) + const float value1 = nir_src_as_float(instr->src[1].src); + const float value2 = nir_src_as_float(instr->src[2].src); + if (fabsf(value1) != 1.0f || fabsf(value2) != 1.0f) return false; + /* nir_opt_algebraic should have gotten rid of bcsel(b, a, a) */ + assert(value1 == -value2); + fs_reg tmp = vgrf(glsl_type::int_type); if (devinfo->gen >= 6) { @@ -569,7 +582,7 @@ * surely be TRIANGLES */ - if (value1->f32[0] == -1.0f) { + if (value1 == -1.0f) { g0.negate = true; } @@ -590,7 +603,7 @@ * surely be TRIANGLES */ - if (value1->f32[0] == -1.0f) { + if (value1 == -1.0f) { g1_6.negate = true; } @@ -784,8 +797,13 @@ inst->saturate = instr->dest.saturate; break; - case nir_op_b2i: - case nir_op_b2f: + case nir_op_b2i8: + case nir_op_b2i16: + case nir_op_b2i32: + case nir_op_b2i64: + case nir_op_b2f16: + case nir_op_b2f32: + case nir_op_b2f64: op[0].type = BRW_REGISTER_TYPE_D; op[0].negate = !op[0].negate; /* fallthrough */ @@ -796,30 +814,6 @@ case nir_op_i2i64: case nir_op_u2f64: case nir_op_u2u64: - /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions: - * - * "When source or destination is 64b (...), regioning in Align1 - * must follow these rules: - * - * 1. Source and destination horizontal stride must be aligned to - * the same qword. - * (...)" - * - * This means that conversions from bit-sizes smaller than 64-bit to - * 64-bit need to have the source data elements aligned to 64-bit. - * This restriction does not apply to BDW and later. - */ - if (nir_dest_bit_size(instr->dest.dest) == 64 && - nir_src_bit_size(instr->src[0].src) < 64 && - (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { - fs_reg tmp = bld.vgrf(result.type, 1); - tmp = subscript(tmp, op[0].type, 0); - inst = bld.MOV(tmp, op[0]); - inst = bld.MOV(result, tmp); - inst->saturate = instr->dest.saturate; - break; - } - /* fallthrough */ case nir_op_f2f32: case nir_op_f2i32: case nir_op_f2u32: @@ -973,6 +967,11 @@ inst->saturate = instr->dest.saturate; break; + case nir_op_uadd_sat: + inst = bld.ADD(result, op[0], op[1]); + inst->saturate = true; + break; + case nir_op_fmul: inst = bld.MUL(result, op[0], op[1]); inst->saturate = instr->dest.saturate; @@ -1042,10 +1041,10 @@ break; } - case nir_op_flt: - case nir_op_fge: - case nir_op_feq: - case nir_op_fne: { + case nir_op_flt32: + case nir_op_fge32: + case nir_op_feq32: + case nir_op_fne32: { fs_reg dest = result; const uint32_t bit_size = nir_src_bit_size(instr->src[0].src); @@ -1054,16 +1053,16 @@ brw_conditional_mod cond; switch (instr->op) { - case nir_op_flt: + case nir_op_flt32: cond = BRW_CONDITIONAL_L; break; - case nir_op_fge: + case nir_op_fge32: cond = BRW_CONDITIONAL_GE; break; - case nir_op_feq: + case nir_op_feq32: cond = BRW_CONDITIONAL_Z; break; - case nir_op_fne: + case nir_op_fne32: cond = BRW_CONDITIONAL_NZ; break; default: @@ -1086,12 +1085,12 @@ break; } - case nir_op_ilt: - case nir_op_ult: - case nir_op_ige: - case nir_op_uge: - case nir_op_ieq: - case nir_op_ine: { + case nir_op_ilt32: + case nir_op_ult32: + case nir_op_ige32: + case nir_op_uge32: + case nir_op_ieq32: + case nir_op_ine32: { fs_reg dest = result; const uint32_t bit_size = nir_src_bit_size(instr->src[0].src); @@ -1100,18 +1099,18 @@ brw_conditional_mod cond; switch (instr->op) { - case nir_op_ilt: - case nir_op_ult: + case nir_op_ilt32: + case nir_op_ult32: cond = BRW_CONDITIONAL_L; break; - case nir_op_ige: - case nir_op_uge: + case nir_op_ige32: + case nir_op_uge32: cond = BRW_CONDITIONAL_GE; break; - case nir_op_ieq: + case nir_op_ieq32: cond = BRW_CONDITIONAL_Z; break; - case nir_op_ine: + case nir_op_ine32: cond = BRW_CONDITIONAL_NZ; break; default: @@ -1164,18 +1163,18 @@ case nir_op_fdot2: case nir_op_fdot3: case nir_op_fdot4: - case nir_op_ball_fequal2: - case nir_op_ball_iequal2: - case nir_op_ball_fequal3: - case nir_op_ball_iequal3: - case nir_op_ball_fequal4: - case nir_op_ball_iequal4: - case nir_op_bany_fnequal2: - case nir_op_bany_inequal2: - case nir_op_bany_fnequal3: - case nir_op_bany_inequal3: - case nir_op_bany_fnequal4: - case nir_op_bany_inequal4: + case nir_op_b32all_fequal2: + case nir_op_b32all_iequal2: + case nir_op_b32all_fequal3: + case nir_op_b32all_iequal3: + case nir_op_b32all_fequal4: + case nir_op_b32all_iequal4: + case nir_op_b32any_fnequal2: + case nir_op_b32any_inequal2: + case nir_op_b32any_fnequal3: + case nir_op_b32any_inequal3: + case nir_op_b32any_fnequal4: + case nir_op_b32any_inequal4: unreachable("Lowered by nir_lower_alu_reductions"); case nir_op_fnoise1_1: @@ -1209,15 +1208,15 @@ inst->saturate = instr->dest.saturate; break; - case nir_op_i2b: - case nir_op_f2b: { + case nir_op_i2b32: + case nir_op_f2b32: { uint32_t bit_size = nir_src_bit_size(instr->src[0].src); if (bit_size == 64) { /* two-argument instructions can't take 64-bit immediates */ fs_reg zero; fs_reg tmp; - if (instr->op == nir_op_f2b) { + if (instr->op == nir_op_f2b32) { zero = vgrf(glsl_type::double_type); tmp = vgrf(glsl_type::double_type); bld.MOV(zero, setup_imm_df(bld, 0.0)); @@ -1236,10 +1235,10 @@ } else { fs_reg zero; if (bit_size == 32) { - zero = instr->op == nir_op_f2b ? brw_imm_f(0.0f) : brw_imm_d(0); + zero = instr->op == nir_op_f2b32 ? brw_imm_f(0.0f) : brw_imm_d(0); } else { assert(bit_size == 16); - zero = instr->op == nir_op_f2b ? + zero = instr->op == nir_op_f2b32 ? retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF) : brw_imm_w(0); } bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ); @@ -1329,11 +1328,13 @@ unreachable("not reached: should be handled by lower_packing_builtins"); case nir_op_unpack_half_2x16_split_x: - inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]); + inst = bld.emit(BRW_OPCODE_F16TO32, result, + subscript(op[0], BRW_REGISTER_TYPE_UW, 0)); inst->saturate = instr->dest.saturate; break; case nir_op_unpack_half_2x16_split_y: - inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]); + inst = bld.emit(BRW_OPCODE_F16TO32, result, + subscript(op[0], BRW_REGISTER_TYPE_UW, 1)); inst->saturate = instr->dest.saturate; break; @@ -1449,36 +1450,14 @@ unreachable("not reached: should have been lowered"); case nir_op_ishl: + bld.SHL(result, op[0], op[1]); + break; case nir_op_ishr: - case nir_op_ushr: { - fs_reg shift_count = op[1]; - - if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { - if (op[1].file == VGRF && - (result.type == BRW_REGISTER_TYPE_Q || - result.type == BRW_REGISTER_TYPE_UQ)) { - shift_count = fs_reg(VGRF, alloc.allocate(dispatch_width / 4), - BRW_REGISTER_TYPE_UD); - shift_count.stride = 2; - bld.MOV(shift_count, op[1]); - } - } - - switch (instr->op) { - case nir_op_ishl: - bld.SHL(result, op[0], shift_count); - break; - case nir_op_ishr: - bld.ASR(result, op[0], shift_count); - break; - case nir_op_ushr: - bld.SHR(result, op[0], shift_count); - break; - default: - unreachable("not reached"); - } + bld.ASR(result, op[0], op[1]); + break; + case nir_op_ushr: + bld.SHR(result, op[0], op[1]); break; - } case nir_op_pack_half_2x16_split: bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]); @@ -1494,7 +1473,7 @@ inst->saturate = instr->dest.saturate; break; - case nir_op_bcsel: + case nir_op_b32csel: if (optimize_frontfacing_ternary(instr, result)) return; @@ -1505,8 +1484,7 @@ case nir_op_extract_u8: case nir_op_extract_i8: { - nir_const_value *byte = nir_src_as_const_value(instr->src[1].src); - assert(byte != NULL); + unsigned byte = nir_src_as_uint(instr->src[1].src); /* The PRMs say: * @@ -1515,20 +1493,29 @@ * Use two instructions and a word or DWord intermediate integer type. */ if (nir_dest_bit_size(instr->dest.dest) == 64) { - const brw_reg_type type = brw_int_type(2, instr->op == nir_op_extract_i8); + const brw_reg_type type = brw_int_type(1, instr->op == nir_op_extract_i8); if (instr->op == nir_op_extract_i8) { /* If we need to sign extend, extract to a word first */ fs_reg w_temp = bld.vgrf(BRW_REGISTER_TYPE_W); - bld.MOV(w_temp, subscript(op[0], type, byte->u32[0])); + bld.MOV(w_temp, subscript(op[0], type, byte)); bld.MOV(result, w_temp); + } else if (byte & 1) { + /* Extract the high byte from the word containing the desired byte + * offset. + */ + bld.SHR(result, + subscript(op[0], BRW_REGISTER_TYPE_UW, byte / 2), + brw_imm_uw(8)); } else { /* Otherwise use an AND with 0xff and a word type */ - bld.AND(result, subscript(op[0], type, byte->u32[0] / 2), brw_imm_uw(0xff)); + bld.AND(result, + subscript(op[0], BRW_REGISTER_TYPE_UW, byte / 2), + brw_imm_uw(0xff)); } } else { const brw_reg_type type = brw_int_type(1, instr->op == nir_op_extract_i8); - bld.MOV(result, subscript(op[0], type, byte->u32[0])); + bld.MOV(result, subscript(op[0], type, byte)); } break; } @@ -1536,9 +1523,8 @@ case nir_op_extract_u16: case nir_op_extract_i16: { const brw_reg_type type = brw_int_type(2, instr->op == nir_op_extract_i16); - nir_const_value *word = nir_src_as_const_value(instr->src[1].src); - assert(word != NULL); - bld.MOV(result, subscript(op[0], type, word->u32[0])); + unsigned word = nir_src_as_uint(instr->src[1].src); + bld.MOV(result, subscript(op[0], type, word)); break; } @@ -1649,9 +1635,9 @@ fs_reg fs_visitor::get_nir_src_imm(const nir_src &src) { - nir_const_value *val = nir_src_as_const_value(src); assert(nir_src_bit_size(src) == 32); - return val ? fs_reg(brw_imm_d(val->i32[0])) : get_nir_src(src); + return nir_src_is_const(src) ? + fs_reg(brw_imm_d(nir_src_as_int(src))) : get_nir_src(src); } fs_reg @@ -1872,7 +1858,7 @@ } /* Store the control data bits in the message payload and send it. */ - int mlen = 2; + unsigned mlen = 2; if (channel_mask.file != BAD_FILE) mlen += 4; /* channel masks, plus 3 extra copies of the data */ if (per_slot_offset.file != BAD_FILE) @@ -1880,7 +1866,7 @@ fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, mlen); fs_reg *sources = ralloc_array(mem_ctx, fs_reg, mlen); - int i = 0; + unsigned i = 0; sources[i++] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); if (per_slot_offset.file != BAD_FILE) sources[i++] = per_slot_offset; @@ -2049,19 +2035,16 @@ unsigned first_component) { struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data); - - nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); - nir_const_value *offset_const = nir_src_as_const_value(offset_src); const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8; /* TODO: figure out push input layout for invocations == 1 */ /* TODO: make this work with 64-bit inputs */ if (gs_prog_data->invocations == 1 && type_sz(dst.type) <= 4 && - offset_const != NULL && vertex_const != NULL && - 4 * (base_offset + offset_const->u32[0]) < push_reg_count) { - int imm_offset = (base_offset + offset_const->u32[0]) * 4 + - vertex_const->u32[0] * push_reg_count; + nir_src_is_const(offset_src) && nir_src_is_const(vertex_src) && + 4 * (base_offset + nir_src_as_uint(offset_src)) < push_reg_count) { + int imm_offset = (base_offset + nir_src_as_uint(offset_src)) * 4 + + nir_src_as_uint(vertex_src) * push_reg_count; for (unsigned i = 0; i < num_components; i++) { bld.MOV(offset(dst, bld, i), fs_reg(ATTR, imm_offset + i + first_component, dst.type)); @@ -2076,10 +2059,10 @@ fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); if (gs_prog_data->invocations == 1) { - if (vertex_const) { + if (nir_src_is_const(vertex_src)) { /* The vertex index is constant; just select the proper URB handle. */ icp_handle = - retype(brw_vec8_grf(first_icp_handle + vertex_const->i32[0], 0), + retype(brw_vec8_grf(first_icp_handle + nir_src_as_uint(vertex_src), 0), BRW_REGISTER_TYPE_UD); } else { /* The vertex index is non-constant. We need to use indirect @@ -2120,12 +2103,11 @@ } else { assert(gs_prog_data->invocations > 1); - if (vertex_const) { - assert(devinfo->gen >= 9 || vertex_const->i32[0] <= 5); + if (nir_src_is_const(vertex_src)) { + unsigned vertex = nir_src_as_uint(vertex_src); + assert(devinfo->gen >= 9 || vertex <= 5); bld.MOV(icp_handle, - retype(brw_vec1_grf(first_icp_handle + - vertex_const->i32[0] / 8, - vertex_const->i32[0] % 8), + retype(brw_vec1_grf(first_icp_handle + vertex / 8, vertex % 8), BRW_REGISTER_TYPE_UD)); } else { /* The vertex index is non-constant. We need to use indirect @@ -2169,7 +2151,7 @@ } for (unsigned iter = 0; iter < num_iterations; iter++) { - if (offset_const) { + if (nir_src_is_const(offset_src)) { /* Constant indexing - use global offset. */ if (first_component != 0) { unsigned read_components = num_components + first_component; @@ -2187,7 +2169,7 @@ inst->size_written = num_components * tmp_dst.component_size(inst->exec_size); } - inst->offset = base_offset + offset_const->u32[0]; + inst->offset = base_offset + nir_src_as_uint(offset_src); inst->mlen = 1; } else { /* Indirect indexing - use per-slot offsets as well. */ @@ -2225,7 +2207,7 @@ if (num_iterations > 1) { num_components = orig_num_components - 2; - if(offset_const) { + if(nir_src_is_const(offset_src)) { base_offset++; } else { fs_reg new_indirect = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); @@ -2240,121 +2222,19 @@ fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr) { nir_src *offset_src = nir_get_io_offset_src(instr); - nir_const_value *const_value = nir_src_as_const_value(*offset_src); - if (const_value) { + if (nir_src_is_const(*offset_src)) { /* The only constant offset we should find is 0. brw_nir.c's * add_const_offset_to_base() will fold other constant offsets * into instr->const_index[0]. */ - assert(const_value->u32[0] == 0); + assert(nir_src_as_uint(*offset_src) == 0); return fs_reg(); } return get_nir_src(*offset_src); } -static void -do_untyped_vector_read(const fs_builder &bld, - const fs_reg dest, - const fs_reg surf_index, - const fs_reg offset_reg, - unsigned num_components) -{ - if (type_sz(dest.type) <= 2) { - assert(dest.stride == 1); - boolean is_const_offset = offset_reg.file == BRW_IMMEDIATE_VALUE; - - if (is_const_offset) { - uint32_t start = offset_reg.ud & ~3; - uint32_t end = offset_reg.ud + num_components * type_sz(dest.type); - end = ALIGN(end, 4); - assert (end - start <= 16); - - /* At this point we have 16-bit component/s that have constant - * offset aligned to 4-bytes that can be read with untyped_reads. - * untyped_read message requires 32-bit aligned offsets. - */ - unsigned first_component = (offset_reg.ud & 3) / type_sz(dest.type); - unsigned num_components_32bit = (end - start) / 4; - - fs_reg read_result = - emit_untyped_read(bld, surf_index, brw_imm_ud(start), - 1 /* dims */, - num_components_32bit, - BRW_PREDICATE_NONE); - shuffle_from_32bit_read(bld, dest, read_result, first_component, - num_components); - } else { - fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD); - for (unsigned i = 0; i < num_components; i++) { - if (i == 0) { - bld.MOV(read_offset, offset_reg); - } else { - bld.ADD(read_offset, offset_reg, - brw_imm_ud(i * type_sz(dest.type))); - } - /* Non constant offsets are not guaranteed to be aligned 32-bits - * so they are read using one byte_scattered_read message - * for each component. - */ - fs_reg read_result = - emit_byte_scattered_read(bld, surf_index, read_offset, - 1 /* dims */, 1, - type_sz(dest.type) * 8 /* bit_size */, - BRW_PREDICATE_NONE); - bld.MOV(offset(dest, bld, i), - subscript (read_result, dest.type, 0)); - } - } - } else if (type_sz(dest.type) == 4) { - fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, - num_components, - BRW_PREDICATE_NONE); - read_result.type = dest.type; - for (unsigned i = 0; i < num_components; i++) - bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); - } else if (type_sz(dest.type) == 8) { - /* Reading a dvec, so we need to: - * - * 1. Multiply num_components by 2, to account for the fact that we - * need to read 64-bit components. - * 2. Shuffle the result of the load to form valid 64-bit elements - * 3. Emit a second load (for components z/w) if needed. - */ - fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.MOV(read_offset, offset_reg); - - int iters = num_components <= 2 ? 1 : 2; - - /* Load the dvec, the first iteration loads components x/y, the second - * iteration, if needed, loads components z/w - */ - for (int it = 0; it < iters; it++) { - /* Compute number of components to read in this iteration */ - int iter_components = MIN2(2, num_components); - num_components -= iter_components; - - /* Read. Since this message reads 32-bit components, we need to - * read twice as many components. - */ - fs_reg read_result = emit_untyped_read(bld, surf_index, read_offset, - 1 /* dims */, - iter_components * 2, - BRW_PREDICATE_NONE); - - /* Shuffle the 32-bit load result into valid 64-bit data */ - shuffle_from_32bit_read(bld, offset(dest, bld, it * 2), - read_result, 0, iter_components); - - bld.ADD(read_offset, read_offset, brw_imm_ud(16)); - } - } else { - unreachable("Unsupported type"); - } -} - void fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) @@ -2375,9 +2255,7 @@ unsigned first_component = nir_intrinsic_component(instr); unsigned num_components = instr->num_components; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - assert(const_offset && "Indirect input loads not allowed"); - src = offset(src, bld, const_offset->u32[0]); + src = offset(src, bld, nir_src_as_uint(instr->src[0])); if (type_sz(dest.type) == 8) first_component /= 2; @@ -2464,18 +2342,17 @@ unsigned imm_offset = instr->const_index[0]; const nir_src &vertex_src = instr->src[0]; - nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); fs_inst *inst; fs_reg icp_handle; - if (vertex_const) { + if (nir_src_is_const(vertex_src)) { /* Emit a MOV to resolve <0,1,0> regioning. */ icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + unsigned vertex = nir_src_as_uint(vertex_src); bld.MOV(icp_handle, - retype(brw_vec1_grf(1 + (vertex_const->i32[0] >> 3), - vertex_const->i32[0] & 7), + retype(brw_vec1_grf(1 + (vertex >> 3), vertex & 7), BRW_REGISTER_TYPE_UD)); } else if (tcs_prog_data->instances == 1 && vertex_src.is_ssa && @@ -3036,10 +2913,6 @@ wm_prog_data->binding_table.render_target_read_start - wm_prog_data->base.binding_table.texture_start; - brw_mark_surface_used( - bld.shader->stage_prog_data, - wm_prog_data->binding_table.render_target_read_start + target); - /* Calculate the fragment coordinates. */ const fs_reg coords = bld.vgrf(BRW_REGISTER_TYPE_UD, 3); bld.MOV(offset(coords, bld, 0), pixel_x); @@ -3070,7 +2943,7 @@ /* Emit the instruction. */ const fs_reg srcs[] = { coords, fs_reg(), brw_imm_ud(0), fs_reg(), - sample, mcs, + fs_reg(), sample, mcs, brw_imm_ud(surface), brw_imm_ud(0), fs_reg(), brw_imm_ud(3), brw_imm_ud(0) }; STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS); @@ -3189,10 +3062,9 @@ case nir_intrinsic_store_output: { const fs_reg src = get_nir_src(instr->src[0]); - const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - assert(const_offset && "Indirect output stores not allowed"); + const unsigned store_offset = nir_src_as_uint(instr->src[1]); const unsigned location = nir_intrinsic_base(instr) + - SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION); + SET_FIELD(store_offset, BRW_NIR_FRAG_OUTPUT_LOCATION); const fs_reg new_dest = retype(alloc_frag_output(this, location), src.type); @@ -3207,9 +3079,8 @@ const unsigned l = GET_FIELD(nir_intrinsic_base(instr), BRW_NIR_FRAG_OUTPUT_LOCATION); assert(l >= FRAG_RESULT_DATA0); - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - assert(const_offset && "Indirect output loads not allowed"); - const unsigned target = l - FRAG_RESULT_DATA0 + const_offset->u32[0]; + const unsigned load_offset = nir_src_as_uint(instr->src[0]); + const unsigned target = l - FRAG_RESULT_DATA0 + load_offset; const fs_reg tmp = bld.vgrf(dest.type, 4); if (reinterpret_cast(key)->coherent_fb_fetch) @@ -3298,10 +3169,8 @@ const glsl_interp_mode interpolation = (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr); - nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); - - if (const_sample) { - unsigned msg_data = const_sample->i32[0] << 4; + if (nir_src_is_const(instr->src[0])) { + unsigned msg_data = nir_src_as_uint(instr->src[0]) << 4; emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, @@ -3370,6 +3239,7 @@ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); if (const_offset) { + assert(nir_src_bit_size(instr->src[0]) == 32); unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf; unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf; @@ -3474,12 +3344,11 @@ static int get_op_for_atomic_add(nir_intrinsic_instr *instr, unsigned src) { - const nir_const_value *const val = nir_src_as_const_value(instr->src[src]); - - if (val != NULL) { - if (val->i32[0] == 1) + if (nir_src_is_const(instr->src[src])) { + int64_t add_val = nir_src_as_int(instr->src[src]); + if (add_val == 1) return BRW_AOP_INC; - else if (val->i32[0] == -1) + else if (add_val == -1) return BRW_AOP_DEC; } @@ -3525,7 +3394,6 @@ cs_prog_data->uses_num_work_groups = true; fs_reg surf_index = brw_imm_ud(surface); - brw_mark_surface_used(prog_data, surface); /* Read the 3 GLuint components of gl_NumWorkGroups */ for (unsigned i = 0; i < 3; i++) { @@ -3583,93 +3451,64 @@ case nir_intrinsic_load_shared: { assert(devinfo->gen >= 7); + assert(stage == MESA_SHADER_COMPUTE); - fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); + const unsigned bit_size = nir_dest_bit_size(instr->dest); + fs_reg offset_reg = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); - /* Get the offset to read from */ - fs_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); - } else { - offset_reg = vgrf(glsl_type::uint_type); - bld.ADD(offset_reg, - retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(instr->const_index[0])); - } + /* Make dest unsigned because that's what the temporary will be */ + dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); /* Read the vector */ - do_untyped_vector_read(bld, dest, surf_index, offset_reg, - instr->num_components); + if (nir_intrinsic_align(instr) >= 4) { + assert(nir_dest_bit_size(instr->dest) == 32); + fs_reg read_result = emit_untyped_read(bld, brw_imm_ud(GEN7_BTI_SLM), + offset_reg, 1 /* dims */, + instr->num_components, + BRW_PREDICATE_NONE); + for (unsigned i = 0; i < instr->num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); + } else { + assert(nir_dest_bit_size(instr->dest) <= 32); + assert(nir_dest_num_components(instr->dest) == 1); + fs_reg read_result = + emit_byte_scattered_read(bld, brw_imm_ud(GEN7_BTI_SLM), offset_reg, + 1 /* dims */, 1, bit_size, + BRW_PREDICATE_NONE); + bld.MOV(dest, read_result); + } break; } case nir_intrinsic_store_shared: { assert(devinfo->gen >= 7); + assert(stage == MESA_SHADER_COMPUTE); - /* Block index */ - fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM); - - /* Value */ + const unsigned bit_size = nir_src_bit_size(instr->src[0]); fs_reg val_reg = get_nir_src(instr->src[0]); + fs_reg offset_reg = retype(get_nir_src(instr->src[1]), + BRW_REGISTER_TYPE_UD); - /* Writemask */ - unsigned writemask = instr->const_index[1]; - - /* get_nir_src() retypes to integer. Be wary of 64-bit types though - * since the untyped writes below operate in units of 32-bits, which - * means that we need to write twice as many components each time. - * Also, we have to suffle 64-bit data to be in the appropriate layout - * expected by our 32-bit write messages. - */ - unsigned type_size = 4; - if (nir_src_bit_size(instr->src[0]) == 64) { - type_size = 8; - val_reg = shuffle_for_32bit_write(bld, val_reg, 0, - instr->num_components); - } - - unsigned type_slots = type_size / 4; - - /* Combine groups of consecutive enabled channels in one write - * message. We use ffs to find the first enabled channel and then ffs on - * the bit-inverse, down-shifted writemask to determine the length of - * the block of enabled bits. - */ - while (writemask) { - unsigned first_component = ffs(writemask) - 1; - unsigned length = ffs(~(writemask >> first_component)) - 1; - - /* We can't write more than 2 64-bit components at once. Limit the - * length of the write to what we can do and let the next iteration - * handle the rest - */ - if (type_size > 4) - length = MIN2(2, length); - - fs_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - if (const_offset) { - offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] + - type_size * first_component); - } else { - offset_reg = vgrf(glsl_type::uint_type); - bld.ADD(offset_reg, - retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(instr->const_index[0] + type_size * first_component)); - } + val_reg.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); - emit_untyped_write(bld, surf_index, offset_reg, - offset(val_reg, bld, first_component * type_slots), - 1 /* dims */, length * type_slots, + assert(nir_intrinsic_write_mask(instr) == + (1u << instr->num_components) - 1); + if (nir_intrinsic_align(instr) >= 4) { + assert(nir_src_bit_size(instr->src[0]) == 32); + assert(nir_src_num_components(instr->src[0]) <= 4); + emit_untyped_write(bld, brw_imm_ud(GEN7_BTI_SLM), offset_reg, val_reg, + 1 /* dims */, instr->num_components, BRW_PREDICATE_NONE); - - /* Clear the bits in the writemask that we just wrote, then try - * again to see if more channels are left. - */ - writemask &= (15 << (first_component + length)); + } else { + assert(nir_src_bit_size(instr->src[0]) <= 32); + assert(nir_src_num_components(instr->src[0]) == 1); + fs_reg write_src = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(write_src, val_reg); + emit_byte_scattered_write(bld, brw_imm_ud(GEN7_BTI_SLM), offset_reg, + write_src, 1 /* dims */, bit_size, + BRW_PREDICATE_NONE); } - break; } @@ -3762,6 +3601,27 @@ return bld.emit_uniformize(image); } +fs_reg +fs_visitor::get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld, + nir_intrinsic_instr *instr) +{ + /* SSBO stores are weird in that their index is in src[1] */ + const unsigned src = instr->intrinsic == nir_intrinsic_store_ssbo ? 1 : 0; + + fs_reg surf_index; + if (nir_src_is_const(instr->src[src])) { + unsigned index = stage_prog_data->binding_table.ssbo_start + + nir_src_as_uint(instr->src[src]); + surf_index = brw_imm_ud(index); + } else { + surf_index = vgrf(glsl_type::uint_type); + bld.ADD(surf_index, get_nir_src(instr->src[src]), + brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); + } + + return surf_index; +} + static unsigned image_intrinsic_coord_components(nir_intrinsic_instr *instr) { @@ -3889,18 +3749,20 @@ BRW_REGISTER_TYPE_UD); image = bld.emit_uniformize(image); + fs_reg srcs[TEX_LOGICAL_NUM_SRCS]; + srcs[TEX_LOGICAL_SRC_SURFACE] = image; + srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_d(0); + srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(0); + srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0); + /* Since the image size is always uniform, we can just emit a SIMD8 * query instruction and splat the result out. */ const fs_builder ubld = bld.exec_all().group(8, 0); - /* The LOD also serves as the message payload */ - fs_reg lod = ubld.vgrf(BRW_REGISTER_TYPE_UD); - ubld.MOV(lod, brw_imm_ud(0)); - fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 4); - fs_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE, tmp, lod, image); - inst->mlen = 1; + fs_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL, + tmp, srcs, ARRAY_SIZE(srcs)); inst->size_written = 4 * REG_SIZE; for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) { @@ -3981,13 +3843,13 @@ fs_reg src(UNIFORM, instr->const_index[0] / 4, dest.type); - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - if (const_offset) { - assert(const_offset->u32[0] % type_sz(dest.type) == 0); + if (nir_src_is_const(instr->src[0])) { + unsigned load_offset = nir_src_as_uint(instr->src[0]); + assert(load_offset % type_sz(dest.type) == 0); /* For 16-bit types we add the module of the const_index[0] * offset to access to not 32-bit aligned element */ - src.offset = const_offset->u32[0] + instr->const_index[0] % 4; + src.offset = load_offset + instr->const_index[0] % 4; for (unsigned j = 0; j < instr->num_components; j++) { bld.MOV(offset(dest, bld, j), offset(src, bld, j)); @@ -4037,14 +3899,11 @@ } case nir_intrinsic_load_ubo: { - nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); fs_reg surf_index; - - if (const_index) { + if (nir_src_is_const(instr->src[0])) { const unsigned index = stage_prog_data->binding_table.ubo_start + - const_index->u32[0]; + nir_src_as_uint(instr->src[0]); surf_index = brw_imm_ud(index); - brw_mark_surface_used(prog_data, index); } else { /* The block index is not a constant. Evaluate the index expression * per-channel and add the base UBO index; we have to select a value @@ -4054,17 +3913,9 @@ bld.ADD(surf_index, get_nir_src(instr->src[0]), brw_imm_ud(stage_prog_data->binding_table.ubo_start)); surf_index = bld.emit_uniformize(surf_index); - - /* Assume this may touch any UBO. It would be nice to provide - * a tighter bound, but the array information is already lowered away. - */ - brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + - nir->info.num_ubos - 1); } - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - if (const_offset == NULL) { + if (!nir_src_is_const(instr->src[1])) { fs_reg base_offset = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD); @@ -4081,11 +3932,12 @@ * and we have to split it if necessary. */ const unsigned type_size = type_sz(dest.type); + const unsigned load_offset = nir_src_as_uint(instr->src[1]); /* See if we've selected this as a push constant candidate */ - if (const_index) { - const unsigned ubo_block = const_index->u32[0]; - const unsigned offset_256b = const_offset->u32[0] / 32; + if (nir_src_is_const(instr->src[0])) { + const unsigned ubo_block = nir_src_as_uint(instr->src[0]); + const unsigned offset_256b = load_offset / 32; fs_reg push_reg; for (int i = 0; i < 4; i++) { @@ -4095,7 +3947,7 @@ offset_256b < range->start + range->length) { push_reg = fs_reg(UNIFORM, UBO_START + i, dest.type); - push_reg.offset = const_offset->u32[0] - 32 * range->start; + push_reg.offset = load_offset - 32 * range->start; break; } } @@ -4114,7 +3966,7 @@ const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD); for (unsigned c = 0; c < instr->num_components;) { - const unsigned base = const_offset->u32[0] + c * type_size; + const unsigned base = load_offset + c * type_size; /* Number of usable components in the next block-aligned load. */ const unsigned count = MIN2(instr->num_components - c, (block_sz - base % block_sz) / type_size); @@ -4139,40 +3991,32 @@ case nir_intrinsic_load_ssbo: { assert(devinfo->gen >= 7); - nir_const_value *const_uniform_block = - nir_src_as_const_value(instr->src[0]); + const unsigned bit_size = nir_dest_bit_size(instr->dest); + fs_reg surf_index = get_nir_ssbo_intrinsic_index(bld, instr); + fs_reg offset_reg = retype(get_nir_src(instr->src[1]), + BRW_REGISTER_TYPE_UD); - fs_reg surf_index; - if (const_uniform_block) { - unsigned index = stage_prog_data->binding_table.ssbo_start + - const_uniform_block->u32[0]; - surf_index = brw_imm_ud(index); - brw_mark_surface_used(prog_data, index); - } else { - surf_index = vgrf(glsl_type::uint_type); - bld.ADD(surf_index, get_nir_src(instr->src[0]), - brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); + /* Make dest unsigned because that's what the temporary will be */ + dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); - /* Assume this may touch any UBO. It would be nice to provide - * a tighter bound, but the array information is already lowered away. - */ - brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ssbo_start + - nir->info.num_ssbos - 1); - } - - fs_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - if (const_offset) { - offset_reg = brw_imm_ud(const_offset->u32[0]); + /* Read the vector */ + if (nir_intrinsic_align(instr) >= 4) { + assert(nir_dest_bit_size(instr->dest) == 32); + fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, + instr->num_components, + BRW_PREDICATE_NONE); + for (unsigned i = 0; i < instr->num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); } else { - offset_reg = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD); + assert(nir_dest_bit_size(instr->dest) <= 32); + assert(nir_dest_num_components(instr->dest) == 1); + fs_reg read_result = + emit_byte_scattered_read(bld, surf_index, offset_reg, + 1 /* dims */, 1, bit_size, + BRW_PREDICATE_NONE); + bld.MOV(dest, read_result); } - - /* Read the vector */ - do_untyped_vector_read(bld, dest, surf_index, offset_reg, - instr->num_components); - break; } @@ -4182,143 +4026,30 @@ if (stage == MESA_SHADER_FRAGMENT) brw_wm_prog_data(prog_data)->has_side_effects = true; - /* Block index */ - fs_reg surf_index; - nir_const_value *const_uniform_block = - nir_src_as_const_value(instr->src[1]); - if (const_uniform_block) { - unsigned index = stage_prog_data->binding_table.ssbo_start + - const_uniform_block->u32[0]; - surf_index = brw_imm_ud(index); - brw_mark_surface_used(prog_data, index); - } else { - surf_index = vgrf(glsl_type::uint_type); - bld.ADD(surf_index, get_nir_src(instr->src[1]), - brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); - - brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ssbo_start + - nir->info.num_ssbos - 1); - } - - /* Value */ + const unsigned bit_size = nir_src_bit_size(instr->src[0]); fs_reg val_reg = get_nir_src(instr->src[0]); + fs_reg surf_index = get_nir_ssbo_intrinsic_index(bld, instr); + fs_reg offset_reg = retype(get_nir_src(instr->src[2]), + BRW_REGISTER_TYPE_UD); - /* Writemask */ - unsigned writemask = instr->const_index[0]; - - /* get_nir_src() retypes to integer. Be wary of 64-bit types though - * since the untyped writes below operate in units of 32-bits, which - * means that we need to write twice as many components each time. - * Also, we have to suffle 64-bit data to be in the appropriate layout - * expected by our 32-bit write messages. - */ - unsigned bit_size = nir_src_bit_size(instr->src[0]); - unsigned type_size = bit_size / 8; - - /* Combine groups of consecutive enabled channels in one write - * message. We use ffs to find the first enabled channel and then ffs on - * the bit-inverse, down-shifted writemask to determine the num_components - * of the block of enabled bits. - */ - while (writemask) { - unsigned first_component = ffs(writemask) - 1; - unsigned num_components = ffs(~(writemask >> first_component)) - 1; - fs_reg write_src = offset(val_reg, bld, first_component); - - nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); - - if (type_size > 4) { - /* We can't write more than 2 64-bit components at once. Limit - * the num_components of the write to what we can do and let the next - * iteration handle the rest. - */ - num_components = MIN2(2, num_components); - write_src = shuffle_for_32bit_write(bld, write_src, 0, - num_components); - } else if (type_size < 4) { - /* For 16-bit types we pack two consecutive values into a 32-bit - * word and use an untyped write message. For single values or not - * 32-bit-aligned we need to use byte-scattered writes because - * untyped writes works with 32-bit components with 32-bit - * alignment. byte_scattered_write messages only support one - * 16-bit component at a time. As VK_KHR_relaxed_block_layout - * could be enabled we can not guarantee that not constant offsets - * to be 32-bit aligned for 16-bit types. For example an array, of - * 16-bit vec3 with array element stride of 6. - * - * In the case of 32-bit aligned constant offsets if there is - * a 3-components vector we submit one untyped-write message - * of 32-bit (first two components), and one byte-scattered - * write message (the last component). - */ - - if ( !const_offset || ((const_offset->u32[0] + - type_size * first_component) % 4)) { - /* If we use a .yz writemask we also need to emit 2 - * byte-scattered write messages because of y-component not - * being aligned to 32-bit. - */ - num_components = 1; - } else if (num_components * type_size > 4 && - (num_components * type_size % 4)) { - /* If the pending components size is not a multiple of 4 bytes - * we left the not aligned components for following emits of - * length == 1 with byte_scattered_write. - */ - num_components -= (num_components * type_size % 4) / type_size; - } else if (num_components * type_size < 4) { - num_components = 1; - } - /* For num_components == 1 we are also shuffling the component - * because byte scattered writes of 16-bit need values to be dword - * aligned. Shuffling only one component would be the same as - * striding it. - */ - write_src = shuffle_for_32bit_write(bld, write_src, 0, - num_components); - } - - fs_reg offset_reg; - - if (const_offset) { - offset_reg = brw_imm_ud(const_offset->u32[0] + - type_size * first_component); - } else { - offset_reg = vgrf(glsl_type::uint_type); - bld.ADD(offset_reg, - retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(type_size * first_component)); - } - - if (type_size < 4 && num_components == 1) { - /* Untyped Surface messages have a fixed 32-bit size, so we need - * to rely on byte scattered in order to write 16-bit elements. - * The byte_scattered_write message needs that every written 16-bit - * type to be aligned 32-bits (stride=2). - */ - emit_byte_scattered_write(bld, surf_index, offset_reg, - write_src, - 1 /* dims */, - bit_size, - BRW_PREDICATE_NONE); - } else { - assert(num_components * type_size <= 16); - assert((num_components * type_size) % 4 == 0); - assert(offset_reg.file != BRW_IMMEDIATE_VALUE || - offset_reg.ud % 4 == 0); - unsigned num_slots = (num_components * type_size) / 4; - - emit_untyped_write(bld, surf_index, offset_reg, - write_src, - 1 /* dims */, num_slots, - BRW_PREDICATE_NONE); - } + val_reg.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); - /* Clear the bits in the writemask that we just wrote, then try - * again to see if more channels are left. - */ - writemask &= (15 << (first_component + num_components)); + assert(nir_intrinsic_write_mask(instr) == + (1u << instr->num_components) - 1); + if (nir_intrinsic_align(instr) >= 4) { + assert(nir_src_bit_size(instr->src[0]) == 32); + assert(nir_src_num_components(instr->src[0]) <= 4); + emit_untyped_write(bld, surf_index, offset_reg, val_reg, + 1 /* dims */, instr->num_components, + BRW_PREDICATE_NONE); + } else { + assert(nir_src_bit_size(instr->src[0]) <= 32); + assert(nir_src_num_components(instr->src[0]) == 1); + fs_reg write_src = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(write_src, val_reg); + emit_byte_scattered_write(bld, surf_index, offset_reg, + write_src, 1 /* dims */, bit_size, + BRW_PREDICATE_NONE); } break; } @@ -4326,9 +4057,7 @@ case nir_intrinsic_store_output: { fs_reg src = get_nir_src(instr->src[0]); - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - assert(const_offset && "Indirect output stores not allowed"); - + unsigned store_offset = nir_src_as_uint(instr->src[1]); unsigned num_components = instr->num_components; unsigned first_component = nir_intrinsic_component(instr); if (nir_src_bit_size(instr->src[0]) == 64) { @@ -4337,7 +4066,7 @@ } fs_reg new_dest = retype(offset(outputs[instr->const_index[0]], bld, - 4 * const_offset->u32[0]), src.type); + 4 * store_offset), src.type); for (unsigned j = 0; j < num_components; j++) { bld.MOV(offset(new_dest, bld, j + first_component), offset(src, bld, j)); @@ -4386,8 +4115,8 @@ break; case nir_intrinsic_get_buffer_size: { - nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); - unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0; + unsigned ssbo_index = nir_src_is_const(instr->src[0]) ? + nir_src_as_uint(instr->src[0]) : 0; /* A resinfo's sampler message is used to get the buffer size. The * SIMD8's writeback message consists of four registers and SIMD16's @@ -4440,8 +4169,6 @@ ubld.ADD(buffer_size, size_aligned4, negate(size_padding)); bld.MOV(retype(dest, ret_payload.type), component(buffer_size, 0)); - - brw_mark_surface_used(prog_data, index); break; } @@ -4623,11 +4350,10 @@ case nir_intrinsic_quad_broadcast: { const fs_reg value = get_nir_src(instr->src[0]); - nir_const_value *index = nir_src_as_const_value(instr->src[1]); - assert(nir_src_bit_size(instr->src[1]) == 32); + const unsigned index = nir_src_as_uint(instr->src[1]); bld.emit(SHADER_OPCODE_CLUSTER_BROADCAST, retype(dest, value.type), - value, brw_imm_ud(index->u32[0]), brw_imm_ud(4)); + value, brw_imm_ud(index), brw_imm_ud(4)); break; } @@ -4641,34 +4367,9 @@ const fs_reg tmp_left = horiz_stride(tmp, 2); const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2); - /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn": - * - * "When source or destination datatype is 64b or operation is - * integer DWord multiply, regioning in Align1 must follow - * these rules: - * - * [...] - * - * 3. Source and Destination offset must be the same, except - * the case of scalar source." - * - * In order to work around this, we have to emit two 32-bit MOVs instead - * of a single 64-bit MOV to do the shuffle. - */ - if (type_sz(value.type) > 4 && - (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { - ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 0), - subscript(src_right, BRW_REGISTER_TYPE_D, 0)); - ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 1), - subscript(src_right, BRW_REGISTER_TYPE_D, 1)); - ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 0), - subscript(src_left, BRW_REGISTER_TYPE_D, 0)); - ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 1), - subscript(src_left, BRW_REGISTER_TYPE_D, 1)); - } else { - ubld.MOV(tmp_left, src_right); - ubld.MOV(tmp_right, src_left); - } + ubld.MOV(tmp_left, src_right); + ubld.MOV(tmp_right, src_left); + bld.MOV(retype(dest, value.type), tmp); break; } @@ -4835,26 +4536,7 @@ if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_dest(instr->dest); - fs_reg surface; - nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); - if (const_surface) { - unsigned surf_index = stage_prog_data->binding_table.ssbo_start + - const_surface->u32[0]; - surface = brw_imm_ud(surf_index); - brw_mark_surface_used(prog_data, surf_index); - } else { - surface = vgrf(glsl_type::uint_type); - bld.ADD(surface, get_nir_src(instr->src[0]), - brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); - - /* Assume this may touch any SSBO. This is the same we do for other - * UBO/SSBO accesses with non-constant surface. - */ - brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ssbo_start + - nir->info.num_ssbos - 1); - } - + fs_reg surface = get_nir_ssbo_intrinsic_index(bld, instr); fs_reg offset = get_nir_src(instr->src[1]); fs_reg data1; if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) @@ -4885,26 +4567,7 @@ if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_dest(instr->dest); - fs_reg surface; - nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); - if (const_surface) { - unsigned surf_index = stage_prog_data->binding_table.ssbo_start + - const_surface->u32[0]; - surface = brw_imm_ud(surf_index); - brw_mark_surface_used(prog_data, surf_index); - } else { - surface = vgrf(glsl_type::uint_type); - bld.ADD(surface, get_nir_src(instr->src[0]), - brw_imm_ud(stage_prog_data->binding_table.ssbo_start)); - - /* Assume this may touch any SSBO. This is the same we do for other - * UBO/SSBO accesses with non-constant surface. - */ - brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ssbo_start + - nir->info.num_ssbos - 1); - } - + fs_reg surface = get_nir_ssbo_intrinsic_index(bld, instr); fs_reg offset = get_nir_src(instr->src[1]); fs_reg data1 = get_nir_src(instr->src[2]); fs_reg data2; @@ -4940,9 +4603,9 @@ data2 = get_nir_src(instr->src[2]); /* Get the offset */ - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - if (const_offset) { - offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); + if (nir_src_is_const(instr->src[0])) { + offset = brw_imm_ud(instr->const_index[0] + + nir_src_as_uint(instr->src[0])); } else { offset = vgrf(glsl_type::uint_type); bld.ADD(offset, @@ -4977,9 +4640,9 @@ data2 = get_nir_src(instr->src[2]); /* Get the offset */ - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - if (const_offset) { - offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]); + if (nir_src_is_const(instr->src[0])) { + offset = brw_imm_ud(instr->const_index[0] + + nir_src_as_uint(instr->src[0])); } else { offset = vgrf(glsl_type::uint_type); bld.ADD(offset, @@ -5062,6 +4725,10 @@ break; } break; + case nir_tex_src_min_lod: + srcs[TEX_LOGICAL_SRC_MIN_LOD] = + retype(get_nir_src_imm(instr->src[i].src), BRW_REGISTER_TYPE_F); + break; case nir_tex_src_ms_index: srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_REGISTER_TYPE_UD); break; @@ -5069,6 +4736,7 @@ case nir_tex_src_offset: { nir_const_value *const_offset = nir_src_as_const_value(instr->src[i].src); + assert(nir_src_bit_size(instr->src[i].src) == 32); unsigned offset_bits = 0; if (const_offset && brw_texture_offset(const_offset->i32, @@ -5086,15 +4754,6 @@ unreachable("should be lowered"); case nir_tex_src_texture_offset: { - /* Figure out the highest possible texture index and mark it as used */ - uint32_t max_used = texture + instr->texture_array_size - 1; - if (instr->op == nir_texop_tg4 && devinfo->gen < 8) { - max_used += stage_prog_data->binding_table.gather_texture_start; - } else { - max_used += stage_prog_data->binding_table.texture_start; - } - brw_mark_surface_used(prog_data, max_used); - /* Emit code to evaluate the actual indexing expression */ fs_reg tmp = vgrf(glsl_type::uint_type); bld.ADD(tmp, src, brw_imm_ud(texture)); @@ -5116,9 +4775,7 @@ break; case nir_tex_src_plane: { - nir_const_value *const_plane = - nir_src_as_const_value(instr->src[i].src); - const uint32_t plane = const_plane->u32[0]; + const uint32_t plane = nir_src_as_uint(instr->src[i].src); const uint32_t texture_index = instr->texture_index + stage_prog_data->binding_table.plane_start[plane] - diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_reg_allocate.cpp mesa-19.0.1/src/intel/compiler/brw_fs_reg_allocate.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_reg_allocate.cpp 2018-07-14 15:13:03.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_reg_allocate.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -617,7 +617,9 @@ * highest register that works. */ if (inst->eot) { - int size = alloc.sizes[inst->src[0].nr]; + const int vgrf = inst->opcode == SHADER_OPCODE_SEND ? + inst->src[2].nr : inst->src[0].nr; + int size = alloc.sizes[vgrf]; int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1; /* If something happened to spill, we want to push the EOT send @@ -626,32 +628,30 @@ */ reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf; - ra_set_node_reg(g, inst->src[0].nr, reg); + ra_set_node_reg(g, vgrf, reg); break; } } } - if (dispatch_width > 8) { - /* In 16-wide dispatch we have an issue where a compressed - * instruction is actually two instructions executed simultaneiously. - * It's actually ok to have the source and destination registers be - * the same. In this case, each instruction over-writes its own - * source and there's no problem. The real problem here is if the - * source and destination registers are off by one. Then you can end - * up in a scenario where the first instruction over-writes the - * source of the second instruction. Since the compiler doesn't know - * about this level of granularity, we simply make the source and - * destination interfere. - */ - foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (inst->dst.file != VGRF) - continue; + /* In 16-wide instructions we have an issue where a compressed + * instruction is actually two instructions executed simultaneously. + * It's actually ok to have the source and destination registers be + * the same. In this case, each instruction over-writes its own + * source and there's no problem. The real problem here is if the + * source and destination registers are off by one. Then you can end + * up in a scenario where the first instruction over-writes the + * source of the second instruction. Since the compiler doesn't know + * about this level of granularity, we simply make the source and + * destination interfere. + */ + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->exec_size < 16 || inst->dst.file != VGRF) + continue; - for (int i = 0; i < inst->sources; ++i) { - if (inst->src[i].file == VGRF) { - ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr); - } + for (int i = 0; i < inst->sources; ++i) { + if (inst->src[i].file == VGRF) { + ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr); } } } @@ -667,15 +667,14 @@ * messages adding a node interference to the grf127_send_hack_node. * This node has a fixed asignment to grf127. * - * We don't apply it to SIMD16 because previous code avoids any register - * overlap between sources and destination. + * We don't apply it to SIMD16 instructions because previous code avoids + * any register overlap between sources and destination. */ ra_set_node_reg(g, grf127_send_hack_node, 127); - if (dispatch_width == 8) { - foreach_block_and_inst(block, fs_inst, inst, cfg) { - if (inst->is_send_from_grf() && inst->dst.file == VGRF) - ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node); - } + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->exec_size < 16 && inst->is_send_from_grf() && + inst->dst.file == VGRF) + ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node); } if (spilled_any_registers) { @@ -695,6 +694,33 @@ } } + /* From the Skylake PRM Vol. 2a docs for sends: + * + * "It is required that the second block of GRFs does not overlap with + * the first block." + * + * Normally, this is taken care of by fixup_sends_duplicate_payload() but + * in the case where one of the registers is an undefined value, the + * register allocator may decide that they don't interfere even though + * they're used as sources in the same instruction. We also need to add + * interference here. + */ + if (devinfo->gen >= 9) { + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 && + inst->src[2].file == VGRF && + inst->src[3].file == VGRF && + inst->src[2].nr != inst->src[3].nr) { + for (unsigned i = 0; i < inst->mlen; i++) { + for (unsigned j = 0; j < inst->ex_mlen; j++) { + ra_add_node_interference(g, inst->src[2].nr + i, + inst->src[3].nr + j); + } + } + } + } + } + /* Debug of register spilling: Go spill everything. */ if (unlikely(spill_all)) { int reg = choose_spill_reg(g); @@ -914,7 +940,7 @@ } void -fs_visitor::spill_reg(int spill_reg) +fs_visitor::spill_reg(unsigned spill_reg) { int size = alloc.sizes[spill_reg]; unsigned int spill_offset = last_scratch; diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_register_coalesce.cpp mesa-19.0.1/src/intel/compiler/brw_fs_register_coalesce.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_register_coalesce.cpp 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_register_coalesce.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -158,7 +158,7 @@ int src_size = 0; int channels_remaining = 0; - int src_reg = -1, dst_reg = -1; + unsigned src_reg = ~0u, dst_reg = ~0u; int dst_reg_offset[MAX_VGRF_SIZE]; fs_inst *mov[MAX_VGRF_SIZE]; int dst_var[MAX_VGRF_SIZE]; @@ -221,7 +221,7 @@ if (dst_reg_offset[i] != dst_reg_offset[0] + i) { /* Registers are out-of-order. */ can_coalesce = false; - src_reg = -1; + src_reg = ~0u; break; } @@ -231,7 +231,7 @@ if (!can_coalesce_vars(live_intervals, cfg, inst, dst_var[i], src_var[i])) { can_coalesce = false; - src_reg = -1; + src_reg = ~0u; break; } } @@ -278,7 +278,7 @@ MAX2(live_intervals->end[dst_var[i]], live_intervals->end[src_var[i]]); } - src_reg = -1; + src_reg = ~0u; } if (progress) { diff -Nru mesa-18.3.3/src/intel/compiler/brw_fs_sel_peephole.cpp mesa-19.0.1/src/intel/compiler/brw_fs_sel_peephole.cpp --- mesa-18.3.3/src/intel/compiler/brw_fs_sel_peephole.cpp 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_fs_sel_peephole.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -198,8 +198,7 @@ */ fs_reg src0(then_mov[i]->src[0]); if (src0.file == IMM) { - src0 = vgrf(glsl_type::float_type); - src0.type = then_mov[i]->src[0].type; + src0 = ibld.vgrf(then_mov[i]->src[0].type); ibld.MOV(src0, then_mov[i]->src[0]); } diff -Nru mesa-18.3.3/src/intel/compiler/brw_inst.h mesa-19.0.1/src/intel/compiler/brw_inst.h --- mesa-18.3.3/src/intel/compiler/brw_inst.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_inst.h 2019-03-31 23:16:37.000000000 +0000 @@ -455,6 +455,19 @@ FC(gen4_pop_count, 115, 112, devinfo->gen < 6) /** @} */ +/** + * SEND instructions: + * @{ + */ +FC(send_ex_desc_ia_subreg_nr, 82, 80, devinfo->gen >= 9) +FC(send_src0_address_mode, 79, 79, devinfo->gen >= 9) +FC(send_sel_reg32_desc, 77, 77, devinfo->gen >= 9) +FC(send_sel_reg32_ex_desc, 61, 61, devinfo->gen >= 9) +FC(send_src1_reg_nr, 51, 44, devinfo->gen >= 9) +FC(send_src1_reg_file, 36, 36, devinfo->gen >= 9) +FC(send_dst_reg_file, 35, 35, devinfo->gen >= 9) +/** @} */ + /* Message descriptor bits */ #define MD(x) ((x) + 96) @@ -513,11 +526,21 @@ brw_inst *inst, uint32_t value) { assert(devinfo->gen >= 9); - brw_inst_set_bits(inst, 94, 91, (value >> 28) & ((1u << 4) - 1)); - brw_inst_set_bits(inst, 88, 85, (value >> 24) & ((1u << 4) - 1)); - brw_inst_set_bits(inst, 83, 80, (value >> 20) & ((1u << 4) - 1)); - brw_inst_set_bits(inst, 67, 64, (value >> 16) & ((1u << 4) - 1)); - assert((value & ((1u << 16) - 1)) == 0); + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) { + brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28)); + brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24)); + brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20)); + brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16)); + assert(GET_BITS(value, 15, 0) == 0); + } else { + assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC); + brw_inst_set_bits(inst, 95, 80, GET_BITS(value, 31, 16)); + assert(GET_BITS(value, 15, 10) == 0); + brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 9, 6)); + assert(GET_BITS(value, 5, 0) == 0); + } } /** @@ -530,10 +553,18 @@ const brw_inst *inst) { assert(devinfo->gen >= 9); - return (brw_inst_bits(inst, 94, 91) << 28 | - brw_inst_bits(inst, 88, 85) << 24 | - brw_inst_bits(inst, 83, 80) << 20 | - brw_inst_bits(inst, 67, 64) << 16); + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) { + return (brw_inst_bits(inst, 94, 91) << 28 | + brw_inst_bits(inst, 88, 85) << 24 | + brw_inst_bits(inst, 83, 80) << 20 | + brw_inst_bits(inst, 67, 64) << 16); + } else { + assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC); + return (brw_inst_bits(inst, 95, 80) << 16 | + brw_inst_bits(inst, 67, 64) << 6); + } } /** @@ -933,10 +964,11 @@ { \ assert((value & ~0x3ff) == 0); \ if (devinfo->gen >= 8) { \ - brw_inst_set_bits(inst, g8_high, g8_low, value & 0x1ff); \ - brw_inst_set_bits(inst, g8_nine, g8_nine, value >> 9); \ + assert(GET_BITS(value, 3, 0) == 0); \ + brw_inst_set_bits(inst, g8_high, g8_low, GET_BITS(value, 8, 4)); \ + brw_inst_set_bits(inst, g8_nine, g8_nine, GET_BITS(value, 9, 9)); \ } else { \ - brw_inst_set_bits(inst, g4_high, g4_low, value >> 9); \ + brw_inst_set_bits(inst, g4_high, g4_low, value); \ } \ } \ static inline unsigned \ @@ -944,7 +976,7 @@ const brw_inst *inst) \ { \ if (devinfo->gen >= 8) { \ - return brw_inst_bits(inst, g8_high, g8_low) | \ + return (brw_inst_bits(inst, g8_high, g8_low) << 4) | \ (brw_inst_bits(inst, g8_nine, g8_nine) << 9); \ } else { \ return brw_inst_bits(inst, g4_high, g4_low); \ @@ -955,9 +987,11 @@ * Compared to Align1, these are missing the low 4 bits. * -Gen 4- ----Gen8---- */ -BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100) -BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68) -BRW_IA16_ADDR_IMM(dst, 57, 52, 47, 56, 52) +BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100) +BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68) +BRW_IA16_ADDR_IMM(dst, 57, 52, 47, 56, 52) +BRW_IA16_ADDR_IMM(send_src0, -1, -1, 78, 72, 68) +BRW_IA16_ADDR_IMM(send_dst, -1, -1, 62, 56, 52) /** * Fetch a set of contiguous bits from the instruction. diff -Nru mesa-18.3.3/src/intel/compiler/brw_ir_allocator.h mesa-19.0.1/src/intel/compiler/brw_ir_allocator.h --- mesa-18.3.3/src/intel/compiler/brw_ir_allocator.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_ir_allocator.h 2019-03-31 23:16:37.000000000 +0000 @@ -47,6 +47,7 @@ unsigned allocate(unsigned size) { + assert(size > 0); if (capacity <= count) { capacity = MAX2(16, capacity * 2); sizes = (unsigned *)realloc(sizes, capacity * sizeof(unsigned)); diff -Nru mesa-18.3.3/src/intel/compiler/brw_ir_fs.h mesa-19.0.1/src/intel/compiler/brw_ir_fs.h --- mesa-18.3.3/src/intel/compiler/brw_ir_fs.h 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_ir_fs.h 2019-03-31 23:16:37.000000000 +0000 @@ -347,18 +347,24 @@ void resize_sources(uint8_t num_sources); - bool equals(fs_inst *inst) const; bool is_send_from_grf() const; bool is_partial_write() const; bool is_copy_payload(const brw::simple_allocator &grf_alloc) const; unsigned components_read(unsigned i) const; unsigned size_read(int arg) const; - bool can_do_source_mods(const struct gen_device_info *devinfo); + bool can_do_source_mods(const struct gen_device_info *devinfo) const; bool can_do_cmod(); bool can_change_types() const; bool has_source_and_destination_hazard() const; /** + * Return whether \p arg is a control source of a virtual instruction which + * shouldn't contribute to the execution type and usual regioning + * restriction calculations of arithmetic instructions. + */ + bool is_control_source(unsigned arg) const; + + /** * Return the subset of flag registers read by the instruction as a bitset * with byte granularity. */ @@ -462,7 +468,8 @@ brw_reg_type exec_type = BRW_REGISTER_TYPE_B; for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file != BAD_FILE) { + if (inst->src[i].file != BAD_FILE && + !inst->is_control_source(i)) { const brw_reg_type t = get_exec_type(inst->src[i].type); if (type_sz(t) > type_sz(exec_type)) exec_type = t; @@ -477,6 +484,27 @@ assert(exec_type != BRW_REGISTER_TYPE_B); + /* Promotion of the execution type to 32-bit for conversions from or to + * half-float seems to be consistent with the following text from the + * Cherryview PRM Vol. 7, "Execution Data Type": + * + * "When single precision and half precision floats are mixed between + * source operands or between source and destination operand [..] single + * precision float is the execution datatype." + * + * and from "Register Region Restrictions": + * + * "Conversion between Integer and HF (Half Float) must be DWord aligned + * and strided by a DWord on the destination." + */ + if (type_sz(exec_type) == 2 && + inst->dst.type != exec_type) { + if (exec_type == BRW_REGISTER_TYPE_HF) + exec_type = BRW_REGISTER_TYPE_F; + else if (inst->dst.type == BRW_REGISTER_TYPE_HF) + exec_type = BRW_REGISTER_TYPE_D; + } + return exec_type; } @@ -487,6 +515,16 @@ } /** + * Return whether the instruction isn't an ALU instruction and cannot be + * assumed to complete in-order. + */ +static inline bool +is_unordered(const fs_inst *inst) +{ + return inst->mlen || inst->is_send_from_grf() || inst->is_math(); +} + +/** * Return whether the following regioning restriction applies to the specified * instruction. From the Cherryview PRM Vol 7. "Register Region * Restrictions": diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_analyze_boolean_resolves.c mesa-19.0.1/src/intel/compiler/brw_nir_analyze_boolean_resolves.c --- mesa-18.3.3/src/intel/compiler/brw_nir_analyze_boolean_resolves.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_nir_analyze_boolean_resolves.c 2019-03-31 23:16:37.000000000 +0000 @@ -109,18 +109,18 @@ uint8_t resolve_status; nir_alu_instr *alu = nir_instr_as_alu(instr); switch (alu->op) { - case nir_op_ball_fequal2: - case nir_op_ball_iequal2: - case nir_op_ball_fequal3: - case nir_op_ball_iequal3: - case nir_op_ball_fequal4: - case nir_op_ball_iequal4: - case nir_op_bany_fnequal2: - case nir_op_bany_inequal2: - case nir_op_bany_fnequal3: - case nir_op_bany_inequal3: - case nir_op_bany_fnequal4: - case nir_op_bany_inequal4: + case nir_op_b32all_fequal2: + case nir_op_b32all_iequal2: + case nir_op_b32all_fequal3: + case nir_op_b32all_iequal3: + case nir_op_b32all_fequal4: + case nir_op_b32all_iequal4: + case nir_op_b32any_fnequal2: + case nir_op_b32any_inequal2: + case nir_op_b32any_fnequal3: + case nir_op_b32any_inequal3: + case nir_op_b32any_fnequal4: + case nir_op_b32any_inequal4: /* These are only implemented by the vec4 backend and its * implementation emits resolved booleans. At some point in the * future, this may change and we'll have to remove some of the diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_analyze_ubo_ranges.c mesa-19.0.1/src/intel/compiler/brw_nir_analyze_ubo_ranges.c --- mesa-18.3.3/src/intel/compiler/brw_nir_analyze_ubo_ranges.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_nir_analyze_ubo_ranges.c 2019-03-31 23:16:37.000000000 +0000 @@ -147,12 +147,11 @@ continue; /* Not a uniform or UBO intrinsic */ } - nir_const_value *block_const = nir_src_as_const_value(intrin->src[0]); - nir_const_value *offset_const = nir_src_as_const_value(intrin->src[1]); - - if (block_const && offset_const) { - const int block = block_const->u32[0]; - const int offset = offset_const->u32[0] / 32; + if (nir_src_is_const(intrin->src[0]) && + nir_src_is_const(intrin->src[1])) { + const int block = nir_src_as_uint(intrin->src[0]); + const unsigned byte_offset = nir_src_as_uint(intrin->src[1]); + const int offset = byte_offset / 32; /* Avoid shifting by larger than the width of our bitfield, as this * is undefined in C. Even if we require multiple bits to represent @@ -166,8 +165,8 @@ /* The value might span multiple 32-byte chunks. */ const int bytes = nir_intrinsic_dest_components(intrin) * (nir_dest_bit_size(intrin->dest) / 8); - const int start = ROUND_DOWN_TO(offset_const->u32[0], 32); - const int end = ALIGN(offset_const->u32[0] + bytes, 32); + const int start = ROUND_DOWN_TO(byte_offset, 32); + const int end = ALIGN(byte_offset + bytes, 32); const int chunks = (end - start) / 32; /* TODO: should we count uses in loops as higher benefit? */ diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir.c mesa-19.0.1/src/intel/compiler/brw_nir.c --- mesa-18.3.3/src/intel/compiler/brw_nir.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_nir.c 2019-03-31 23:16:37.000000000 +0000 @@ -527,7 +527,7 @@ if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput) indirect_mask |= nir_var_shader_out; if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) - indirect_mask |= nir_var_local; + indirect_mask |= nir_var_function_temp; return indirect_mask; } @@ -542,8 +542,9 @@ bool progress; do { progress = false; - OPT(nir_split_array_vars, nir_var_local); - OPT(nir_shrink_vec_array_vars, nir_var_local); + OPT(nir_split_array_vars, nir_var_function_temp); + OPT(nir_shrink_vec_array_vars, nir_var_function_temp); + OPT(nir_opt_deref); OPT(nir_lower_vars_to_ssa); if (allow_copies) { /* Only run this pass in the first call to brw_nir_optimize. Later @@ -568,8 +569,20 @@ OPT(nir_copy_prop); OPT(nir_opt_dce); OPT(nir_opt_cse); - OPT(nir_opt_peephole_select, 0); + + /* For indirect loads of uniforms (push constants), we assume that array + * indices will nearly always be in bounds and the cost of the load is + * low. Therefore there shouldn't be a performance benefit to avoid it. + * However, in vec4 tessellation shaders, these loads operate by + * actually pulling from memory. + */ + const bool is_vec4_tessellation = !is_scalar && + (nir->info.stage == MESA_SHADER_TESS_CTRL || + nir->info.stage == MESA_SHADER_TESS_EVAL); + OPT(nir_opt_peephole_select, 0, !is_vec4_tessellation); + OPT(nir_opt_intrinsics); + OPT(nir_opt_idiv_const, 32); OPT(nir_opt_algebraic); OPT(nir_opt_constant_folding); OPT(nir_opt_dead_cf); @@ -587,22 +600,13 @@ } OPT(nir_opt_remove_phis); OPT(nir_opt_undef); - OPT(nir_lower_doubles, nir_lower_drcp | - nir_lower_dsqrt | - nir_lower_drsq | - nir_lower_dtrunc | - nir_lower_dfloor | - nir_lower_dceil | - nir_lower_dfract | - nir_lower_dround_even | - nir_lower_dmod); OPT(nir_lower_pack); } while (progress); /* Workaround Gfxbench unused local sampler variable which will trigger an * assert in the opt_large_constants pass. */ - OPT(nir_remove_dead_variables, nir_var_local); + OPT(nir_remove_dead_variables, nir_var_function_temp); return nir; } @@ -643,6 +647,76 @@ const bool is_scalar = compiler->scalar_stage[nir->info.stage]; + if (is_scalar) { + OPT(nir_lower_alu_to_scalar); + } + + /* Run opt_algebraic before int64 lowering so we can hopefully get rid + * of some int64 instructions. + */ + OPT(nir_opt_algebraic); + + /* Lower 64-bit operations before nir_optimize so that loop unrolling sees + * their actual cost. + */ + nir_lower_int64_options int64_options = + nir_lower_imul64 | + nir_lower_isign64 | + nir_lower_divmod64 | + nir_lower_imul_high64; + nir_lower_doubles_options fp64_options = + nir_lower_drcp | + nir_lower_dsqrt | + nir_lower_drsq | + nir_lower_dtrunc | + nir_lower_dfloor | + nir_lower_dceil | + nir_lower_dfract | + nir_lower_dround_even | + nir_lower_dmod; + + if (!devinfo->has_64bit_types) { + int64_options |= nir_lower_mov64 | + nir_lower_icmp64 | + nir_lower_iadd64 | + nir_lower_iabs64 | + nir_lower_ineg64 | + nir_lower_logic64 | + nir_lower_minmax64 | + nir_lower_shift64; + fp64_options |= nir_lower_fp64_full_software; + } + + bool lowered_64bit_ops = false; + do { + progress = false; + + OPT(nir_lower_int64, int64_options); + OPT(nir_lower_doubles, fp64_options); + + /* Necessary to lower add -> sub and div -> mul/rcp */ + OPT(nir_opt_algebraic); + + lowered_64bit_ops |= progress; + } while (progress); + + if (lowered_64bit_ops) { + OPT(nir_lower_constant_initializers, nir_var_function_temp); + OPT(nir_lower_returns); + OPT(nir_inline_functions); + OPT(nir_opt_deref); + } + + const nir_function *entry_point = nir_shader_get_entrypoint(nir)->function; + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func != entry_point) { + exec_node_remove(&func->node); + } + } + assert(exec_list_length(&nir->functions) == 1); + + OPT(nir_lower_constant_initializers, ~nir_var_function_temp); + if (nir->info.stage == MESA_SHADER_GEOMETRY) OPT(nir_lower_gs_intrinsics); @@ -656,6 +730,9 @@ .lower_txf_offset = true, .lower_rect_offset = true, .lower_txd_cube_map = true, + .lower_txb_shadow_clamp = true, + .lower_txd_shadow_clamp = true, + .lower_txd_offset_clamp = true, }; OPT(nir_lower_tex, &tex_options); @@ -664,19 +741,7 @@ OPT(nir_lower_global_vars_to_local); OPT(nir_split_var_copies); - OPT(nir_split_struct_vars, nir_var_local); - - /* Run opt_algebraic before int64 lowering so we can hopefully get rid - * of some int64 instructions. - */ - OPT(nir_opt_algebraic); - - /* Lower int64 instructions before nir_optimize so that loop unrolling - * sees their actual cost. - */ - OPT(nir_lower_int64, nir_lower_imul64 | - nir_lower_isign64 | - nir_lower_divmod64); + OPT(nir_split_struct_vars, nir_var_function_temp); nir = brw_nir_optimize(nir, compiler, is_scalar, true); @@ -714,6 +779,19 @@ brw_nir_no_indirect_mask(compiler, nir->info.stage); OPT(nir_lower_indirect_derefs, indirect_mask); + OPT(brw_nir_lower_mem_access_bit_sizes); + + /* Lower array derefs of vectors for SSBO and UBO loads. For both UBOs and + * SSBOs, our back-end is capable of loading an entire vec4 at a time and + * we would like to take advantage of that whenever possible regardless of + * whether or not the app gives us full loads. This should allow the + * optimizer to combine UBO and SSBO load operations and save us some send + * messages. + */ + OPT(nir_lower_array_deref_of_vec, + nir_var_mem_ubo | nir_var_mem_ssbo, + nir_lower_direct_array_deref_of_vec_load); + /* Get rid of split copies */ nir = brw_nir_optimize(nir, compiler, is_scalar, false); @@ -740,6 +818,9 @@ *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false); } + if (nir_link_opt_varyings(*producer, *consumer)) + *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false); + NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out); NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in); @@ -759,6 +840,23 @@ *producer = brw_nir_optimize(*producer, compiler, p_is_scalar, false); *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false); } + + NIR_PASS_V(*producer, nir_lower_io_to_vector, nir_var_shader_out); + NIR_PASS_V(*consumer, nir_lower_io_to_vector, nir_var_shader_in); + + if ((*producer)->info.stage != MESA_SHADER_TESS_CTRL) { + /* Calling lower_io_to_vector creates output variable writes with + * write-masks. On non-TCS outputs, the back-end can't handle it and we + * need to call nir_lower_io_to_temporaries to get rid of them. This, + * in turn, creates temporary variables and extra copy_deref intrinsics + * that we need to clean up. + */ + NIR_PASS_V(*producer, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(*producer), true, false); + NIR_PASS_V(*producer, nir_lower_global_vars_to_local); + NIR_PASS_V(*producer, nir_split_var_copies); + NIR_PASS_V(*producer, nir_lower_var_copies); + } } /* Prepare the given shader for codegen @@ -793,11 +891,13 @@ OPT(nir_opt_algebraic_late); - OPT(nir_lower_to_source_mods); + OPT(nir_lower_to_source_mods, nir_lower_all_source_mods); OPT(nir_copy_prop); OPT(nir_opt_dce); OPT(nir_opt_move_comparisons); + OPT(nir_lower_bool_to_int32); + OPT(nir_lower_locals_to_regs); if (unlikely(debug_enabled)) { @@ -847,7 +947,9 @@ bool is_scalar) { const struct gen_device_info *devinfo = compiler->devinfo; - nir_lower_tex_options tex_options = { 0 }; + nir_lower_tex_options tex_options = { + .lower_txd_clamp_if_sampler_index_not_lt_16 = true, + }; /* Iron Lake and prior require lowering of all rectangle textures */ if (devinfo->gen < 6) @@ -877,6 +979,7 @@ tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask; tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask; tex_options.lower_xy_uxvx_external = key_tex->xy_uxvx_image_mask; + tex_options.lower_ayuv_external = key_tex->ayuv_image_mask; if (nir_lower_tex(nir, &tex_options)) { nir_validate_shader(nir, "after nir_lower_tex"); @@ -973,8 +1076,7 @@ nir_intrinsic_instr *load; nir_intrinsic_instr *store; nir_ssa_def *zero = nir_imm_int(&b, 0); - nir_ssa_def *invoc_id = - nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0); + nir_ssa_def *invoc_id = nir_load_invocation_id(&b); nir->info.inputs_read = key->outputs_written & ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir.h mesa-19.0.1/src/intel/compiler/brw_nir.h --- mesa-18.3.3/src/intel/compiler/brw_nir.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_nir.h 2019-03-31 23:16:37.000000000 +0000 @@ -119,6 +119,8 @@ void brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *index); +bool brw_nir_lower_mem_access_bit_sizes(nir_shader *shader); + nir_shader *brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, bool is_scalar); diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_lower_cs_intrinsics.c mesa-19.0.1/src/intel/compiler/brw_nir_lower_cs_intrinsics.c --- mesa-18.3.3/src/intel/compiler/brw_nir_lower_cs_intrinsics.c 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_nir_lower_cs_intrinsics.c 2019-03-31 23:16:37.000000000 +0000 @@ -70,39 +70,6 @@ break; } - case nir_intrinsic_load_local_invocation_id: { - /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based - * on this formula: - * - * gl_LocalInvocationID.x = - * gl_LocalInvocationIndex % gl_WorkGroupSize.x; - * gl_LocalInvocationID.y = - * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) % - * gl_WorkGroupSize.y; - * gl_LocalInvocationID.z = - * (gl_LocalInvocationIndex / - * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) % - * gl_WorkGroupSize.z; - */ - unsigned *size = nir->info.cs.local_size; - - nir_ssa_def *local_index = nir_load_local_invocation_index(b); - - nir_const_value uvec3; - memset(&uvec3, 0, sizeof(uvec3)); - uvec3.u32[0] = 1; - uvec3.u32[1] = size[0]; - uvec3.u32[2] = size[0] * size[1]; - nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3); - uvec3.u32[0] = size[0]; - uvec3.u32[1] = size[1]; - uvec3.u32[2] = size[2]; - nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3); - - sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val); - break; - } - case nir_intrinsic_load_subgroup_id: if (state->local_workgroup_size > 8) continue; diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_lower_image_load_store.c mesa-19.0.1/src/intel/compiler/brw_nir_lower_image_load_store.c --- mesa-18.3.3/src/intel/compiler/brw_nir_lower_image_load_store.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_nir_lower_image_load_store.c 2019-03-31 23:16:37.000000000 +0000 @@ -544,38 +544,16 @@ break; case ISL_SFLOAT: - if (image.bits[0] == 16) { - nir_ssa_def *f16comps[4]; - for (unsigned i = 0; i < image.chans; i++) { - f16comps[i] = nir_pack_half_2x16_split(b, nir_channel(b, color, i), - nir_imm_float(b, 0)); - } - color = nir_vec(b, f16comps, image.chans); - } + if (image.bits[0] == 16) + color = nir_format_float_to_half(b, color); break; case ISL_UINT: - if (image.bits[0] < 32) { - nir_const_value max; - for (unsigned i = 0; i < image.chans; i++) { - assert(image.bits[i] < 32); - max.u32[i] = (1u << image.bits[i]) - 1; - } - color = nir_umin(b, color, nir_build_imm(b, image.chans, 32, max)); - } + color = nir_format_clamp_uint(b, color, image.bits); break; case ISL_SINT: - if (image.bits[0] < 32) { - nir_const_value min, max; - for (unsigned i = 0; i < image.chans; i++) { - assert(image.bits[i] < 32); - max.i32[i] = (1 << (image.bits[i] - 1)) - 1; - min.i32[i] = -(1 << (image.bits[i] - 1)); - } - color = nir_imin(b, color, nir_build_imm(b, image.chans, 32, max)); - color = nir_imax(b, color, nir_build_imm(b, image.chans, 32, min)); - } + color = nir_format_clamp_sint(b, color, image.bits); break; default: diff -Nru mesa-18.3.3/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c mesa-19.0.1/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c --- mesa-18.3.3/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,313 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_nir.h" +#include "compiler/nir/nir_builder.h" +#include "util/u_math.h" +#include "util/bitscan.h" + +static nir_ssa_def * +dup_mem_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, + nir_ssa_def *store_src, int offset, + unsigned num_components, unsigned bit_size, + unsigned align) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; + + nir_intrinsic_instr *dup = + nir_intrinsic_instr_create(b->shader, intrin->intrinsic); + + nir_src *intrin_offset_src = nir_get_io_offset_src(intrin); + for (unsigned i = 0; i < info->num_srcs; i++) { + assert(intrin->src[i].is_ssa); + if (i == 0 && store_src) { + assert(!info->has_dest); + assert(&intrin->src[i] != intrin_offset_src); + dup->src[i] = nir_src_for_ssa(store_src); + } else if (&intrin->src[i] == intrin_offset_src) { + dup->src[i] = nir_src_for_ssa(nir_iadd_imm(b, intrin->src[i].ssa, + offset)); + } else { + dup->src[i] = nir_src_for_ssa(intrin->src[i].ssa); + } + } + + dup->num_components = num_components; + + for (unsigned i = 0; i < info->num_indices; i++) + dup->const_index[i] = intrin->const_index[i]; + + nir_intrinsic_set_align(dup, align, 0); + + if (info->has_dest) { + assert(intrin->dest.is_ssa); + nir_ssa_dest_init(&dup->instr, &dup->dest, + num_components, bit_size, + intrin->dest.ssa.name); + } else { + nir_intrinsic_set_write_mask(dup, (1 << num_components) - 1); + } + + nir_builder_instr_insert(b, &dup->instr); + + return info->has_dest ? &dup->dest.ssa : NULL; +} + +static bool +lower_mem_load_bit_size(nir_builder *b, nir_intrinsic_instr *intrin) +{ + assert(intrin->dest.is_ssa); + if (intrin->dest.ssa.bit_size == 32) + return false; + + const unsigned bit_size = intrin->dest.ssa.bit_size; + const unsigned num_components = intrin->dest.ssa.num_components; + const unsigned bytes_read = num_components * (bit_size / 8); + const unsigned align = nir_intrinsic_align(intrin); + + nir_ssa_def *result[4] = { NULL, }; + + nir_src *offset_src = nir_get_io_offset_src(intrin); + if (bit_size < 32 && nir_src_is_const(*offset_src)) { + /* The offset is constant so we can use a 32-bit load and just shift it + * around as needed. + */ + const int load_offset = nir_src_as_uint(*offset_src) % 4; + assert(load_offset % (bit_size / 8) == 0); + const unsigned load_comps32 = DIV_ROUND_UP(bytes_read + load_offset, 4); + /* A 16-bit vec4 is a 32-bit vec2. We add an extra component in case + * we offset into a component with load_offset. + */ + assert(load_comps32 <= 3); + + nir_ssa_def *load = dup_mem_intrinsic(b, intrin, NULL, -load_offset, + load_comps32, 32, 4); + nir_ssa_def *unpacked[3]; + for (unsigned i = 0; i < load_comps32; i++) + unpacked[i] = nir_unpack_bits(b, nir_channel(b, load, i), bit_size); + + assert(load_offset % (bit_size / 8) == 0); + const unsigned divisor = 32 / bit_size; + + for (unsigned i = 0; i < num_components; i++) { + unsigned load_i = i + load_offset / (bit_size / 8); + result[i] = nir_channel(b, unpacked[load_i / divisor], + load_i % divisor); + } + } else { + /* Otherwise, we have to break it into smaller loads */ + unsigned res_idx = 0; + int load_offset = 0; + while (load_offset < bytes_read) { + const unsigned bytes_left = bytes_read - load_offset; + unsigned load_bit_size, load_comps; + if (align < 4) { + load_comps = 1; + /* Choose a byte, word, or dword */ + load_bit_size = util_next_power_of_two(MIN2(bytes_left, 4)) * 8; + } else { + assert(load_offset % 4 == 0); + load_bit_size = 32; + load_comps = DIV_ROUND_UP(MIN2(bytes_left, 16), 4); + } + + nir_ssa_def *load = dup_mem_intrinsic(b, intrin, NULL, load_offset, + load_comps, load_bit_size, + align); + + nir_ssa_def *unpacked = nir_bitcast_vector(b, load, bit_size); + for (unsigned i = 0; i < unpacked->num_components; i++) { + if (res_idx < num_components) + result[res_idx++] = nir_channel(b, unpacked, i); + } + + load_offset += load_comps * (load_bit_size / 8); + } + } + + nir_ssa_def *vec_result = nir_vec(b, result, num_components); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(vec_result)); + nir_instr_remove(&intrin->instr); + + return true; +} + +static bool +lower_mem_store_bit_size(nir_builder *b, nir_intrinsic_instr *intrin) +{ + assert(intrin->src[0].is_ssa); + nir_ssa_def *value = intrin->src[0].ssa; + + assert(intrin->num_components == value->num_components); + const unsigned bit_size = value->bit_size; + const unsigned num_components = intrin->num_components; + const unsigned bytes_written = num_components * (bit_size / 8); + const unsigned align_mul = nir_intrinsic_align_mul(intrin); + const unsigned align_offset = nir_intrinsic_align_offset(intrin); + const unsigned align = nir_intrinsic_align(intrin); + + nir_component_mask_t writemask = nir_intrinsic_write_mask(intrin); + assert(writemask < (1 << num_components)); + + if ((value->bit_size <= 32 && num_components == 1) || + (value->bit_size == 32 && writemask == (1 << num_components) - 1)) + return false; + + nir_src *offset_src = nir_get_io_offset_src(intrin); + const bool offset_is_const = nir_src_is_const(*offset_src); + const unsigned const_offset = + offset_is_const ? nir_src_as_uint(*offset_src) : 0; + + assert(num_components * (bit_size / 8) <= 32); + uint32_t byte_mask = 0; + for (unsigned i = 0; i < num_components; i++) { + if (writemask & (1 << i)) + byte_mask |= ((1 << (bit_size / 8)) - 1) << i * (bit_size / 8); + } + + while (byte_mask) { + const int start = ffs(byte_mask) - 1; + assert(start % (bit_size / 8) == 0); + + int end; + for (end = start + 1; end < bytes_written; end++) { + if (!(byte_mask & (1 << end))) + break; + } + /* The size of the current contiguous chunk in bytes */ + const unsigned chunk_bytes = end - start; + + const bool is_dword_aligned = + (align_mul >= 4 && (align_offset + start) % 4 == 0) || + (offset_is_const && (start + const_offset) % 4 == 0); + + unsigned store_comps, store_bit_size, store_align; + if (chunk_bytes >= 4 && is_dword_aligned) { + store_align = MAX2(align, 4); + store_bit_size = 32; + store_comps = MIN2(chunk_bytes, 16) / 4; + } else { + store_align = align; + store_comps = 1; + store_bit_size = MIN2(chunk_bytes, 4) * 8; + /* The bit size must be a power of two */ + if (store_bit_size == 24) + store_bit_size = 16; + } + + const unsigned store_bytes = store_comps * (store_bit_size / 8); + assert(store_bytes % (bit_size / 8) == 0); + const unsigned store_first_src_comp = start / (bit_size / 8); + const unsigned store_src_comps = store_bytes / (bit_size / 8); + assert(store_first_src_comp + store_src_comps <= num_components); + + unsigned src_swiz[4]; + for (unsigned i = 0; i < store_src_comps; i++) + src_swiz[i] = store_first_src_comp + i; + nir_ssa_def *store_value = + nir_swizzle(b, value, src_swiz, store_src_comps, false); + nir_ssa_def *packed = nir_bitcast_vector(b, store_value, store_bit_size); + + dup_mem_intrinsic(b, intrin, packed, start, + store_comps, store_bit_size, store_align); + + byte_mask &= ~(((1u << store_bytes) - 1) << start); + } + + nir_instr_remove(&intrin->instr); + + return true; +} + +static bool +lower_mem_access_bit_sizes_impl(nir_function_impl *impl) +{ + bool progress = false; + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + b.cursor = nir_after_instr(instr); + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_shared: + if (lower_mem_load_bit_size(&b, intrin)) + progress = true; + break; + + case nir_intrinsic_store_ssbo: + case nir_intrinsic_store_shared: + if (lower_mem_store_bit_size(&b, intrin)) + progress = true; + break; + + default: + break; + } + } + } + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + return progress; +} + +/** + * This pass loads arbitrary SSBO and shared memory load/store operations to + * intrinsics which are natively handleable by GEN hardware. In particular, + * we have two general types of memory load/store messages: + * + * - Untyped surface read/write: These can load/store between one and four + * dword components to/from a dword-aligned offset. + * + * - Byte scattered read/write: These can load/store a single byte, word, or + * dword scalar to/from an unaligned byte offset. + * + * Neither type of message can do a write-masked store. This pass converts + * all nir load/store intrinsics into a series of either 8 or 32-bit + * load/store intrinsics with a number of components that we can directly + * handle in hardware and with a trivial write-mask. + */ +bool +brw_nir_lower_mem_access_bit_sizes(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(func, shader) { + if (func->impl && lower_mem_access_bit_sizes_impl(func->impl)) + progress = true; + } + + return progress; +} diff -Nru mesa-18.3.3/src/intel/compiler/brw_reg.h mesa-19.0.1/src/intel/compiler/brw_reg.h --- mesa-18.3.3/src/intel/compiler/brw_reg.h 2018-10-21 19:21:32.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_reg.h 2019-03-31 23:16:37.000000000 +0000 @@ -221,15 +221,15 @@ unsigned negate:1; /* source only */ unsigned abs:1; /* source only */ unsigned address_mode:1; /* relative addressing, hopefully! */ - unsigned pad0:1; + unsigned pad0:17; unsigned subnr:5; /* :1 in align16 */ - unsigned nr:16; }; uint32_t bits; }; union { struct { + unsigned nr; unsigned swizzle:8; /* src only, align16 only */ unsigned writemask:4; /* dest only, align16 only */ int indirect_offset:10; /* relative addressing offset */ @@ -251,8 +251,7 @@ static inline bool brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b) { - const bool df = a->type == BRW_REGISTER_TYPE_DF && a->file == IMM; - return a->bits == b->bits && (df ? a->u64 == b->u64 : a->ud == b->ud); + return a->bits == b->bits && a->u64 == b->u64; } static inline bool diff -Nru mesa-18.3.3/src/intel/compiler/brw_schedule_instructions.cpp mesa-19.0.1/src/intel/compiler/brw_schedule_instructions.cpp --- mesa-18.3.3/src/intel/compiler/brw_schedule_instructions.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_schedule_instructions.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -323,7 +323,6 @@ break; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: case VS_OPCODE_PULL_CONSTANT_LOAD: @@ -414,6 +413,102 @@ latency = is_haswell ? 300 : 600; break; + case SHADER_OPCODE_SEND: + switch (inst->sfid) { + case BRW_SFID_SAMPLER: { + unsigned msg_type = (inst->desc >> 12) & 0x1f; + switch (msg_type) { + case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO: + case GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO: + /* See also SHADER_OPCODE_TXS */ + latency = 100; + break; + + default: + /* See also SHADER_OPCODE_TEX */ + latency = 200; + break; + } + break; + } + + case GEN6_SFID_DATAPORT_RENDER_CACHE: + switch ((inst->desc >> 14) & 0x1f) { + case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE: + case GEN7_DATAPORT_RC_TYPED_SURFACE_READ: + /* See also SHADER_OPCODE_TYPED_SURFACE_READ */ + assert(!is_haswell); + latency = 600; + break; + + case GEN7_DATAPORT_RC_TYPED_ATOMIC_OP: + /* See also SHADER_OPCODE_TYPED_ATOMIC */ + assert(!is_haswell); + latency = 14000; + break; + + default: + unreachable("Unknown render cache message"); + } + break; + + case GEN7_SFID_DATAPORT_DATA_CACHE: + switch ((inst->desc >> 14) & 0x1f) { + case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ: + case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE: + /* We have no data for this but assume it's roughly the same as + * untyped surface read/write. + */ + latency = 300; + break; + + case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ: + case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE: + /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */ + assert(!is_haswell); + latency = 600; + break; + + case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP: + /* See also SHADER_OPCODE_UNTYPED_ATOMIC */ + assert(!is_haswell); + latency = 14000; + break; + + default: + unreachable("Unknown data cache message"); + } + break; + + case HSW_SFID_DATAPORT_DATA_CACHE_1: + switch ((inst->desc >> 14) & 0x1f) { + case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ: + case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE: + case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ: + case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE: + /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */ + latency = 300; + break; + + case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP: + case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2: + case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2: + case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP: + case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP: + /* See also SHADER_OPCODE_UNTYPED_ATOMIC */ + latency = 14000; + break; + + default: + unreachable("Unknown data cache message"); + } + break; + + default: + unreachable("Unknown SFID"); + } + break; + default: /* 2 cycles: * mul(8) g4<1>F g2<0,1,0>F 0.5F { align1 WE_normal 1Q }; @@ -430,7 +525,7 @@ class instruction_scheduler { public: instruction_scheduler(backend_shader *s, int grf_count, - int hw_reg_count, int block_count, + unsigned hw_reg_count, int block_count, instruction_scheduler_mode mode) { this->bs = s; @@ -511,7 +606,7 @@ bool post_reg_alloc; int instructions_to_schedule; int grf_count; - int hw_reg_count; + unsigned hw_reg_count; int reg_pressure; int block_idx; exec_list instructions; @@ -665,7 +760,7 @@ int payload_last_use_ip[hw_reg_count]; v->calculate_payload_ranges(hw_reg_count, payload_last_use_ip); - for (int i = 0; i < hw_reg_count; i++) { + for (unsigned i = 0; i < hw_reg_count; i++) { if (payload_last_use_ip[i] == -1) continue; @@ -973,7 +1068,7 @@ * After register allocation, reg_offsets are gone and we track individual * GRF registers. */ - schedule_node *last_grf_write[grf_count * 16]; + schedule_node **last_grf_write; schedule_node *last_mrf_write[BRW_MAX_MRF(v->devinfo->gen)]; schedule_node *last_conditional_mod[8] = {}; schedule_node *last_accumulator_write = NULL; @@ -984,7 +1079,7 @@ */ schedule_node *last_fixed_grf_write = NULL; - memset(last_grf_write, 0, sizeof(last_grf_write)); + last_grf_write = (schedule_node **)calloc(sizeof(schedule_node *), grf_count * 16); memset(last_mrf_write, 0, sizeof(last_mrf_write)); /* top-to-bottom dependencies: RAW and WAW. */ @@ -1111,7 +1206,7 @@ } /* bottom-to-top dependencies: WAR */ - memset(last_grf_write, 0, sizeof(last_grf_write)); + memset(last_grf_write, 0, sizeof(schedule_node *) * grf_count * 16); memset(last_mrf_write, 0, sizeof(last_mrf_write)); memset(last_conditional_mod, 0, sizeof(last_conditional_mod)); last_accumulator_write = NULL; @@ -1227,6 +1322,8 @@ last_accumulator_write = n; } } + + free(last_grf_write); } void diff -Nru mesa-18.3.3/src/intel/compiler/brw_shader.cpp mesa-19.0.1/src/intel/compiler/brw_shader.cpp --- mesa-18.3.3/src/intel/compiler/brw_shader.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_shader.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -206,6 +206,9 @@ case SHADER_OPCODE_COS: return "cos"; + case SHADER_OPCODE_SEND: + return "send"; + case SHADER_OPCODE_TEX: return "tex"; case SHADER_OPCODE_TEX_LOGICAL: @@ -269,6 +272,8 @@ case SHADER_OPCODE_IMAGE_SIZE: return "image_size"; + case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: + return "image_size_logical"; case SHADER_OPCODE_SHADER_TIME_ADD: return "shader_time_add"; @@ -402,8 +407,6 @@ return "uniform_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: return "varying_pull_const_gen4"; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - return "varying_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: return "varying_pull_const_logical"; @@ -415,10 +418,6 @@ case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: - return "unpack_half_2x16_split_x"; - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: - return "unpack_half_2x16_split_y"; case FS_OPCODE_PLACEHOLDER_HALT: return "placeholder_halt"; @@ -1001,6 +1000,9 @@ backend_instruction::has_side_effects() const { switch (opcode) { + case SHADER_OPCODE_SEND: + return send_has_side_effects; + case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: @@ -1037,6 +1039,9 @@ backend_instruction::is_volatile() const { switch (opcode) { + case SHADER_OPCODE_SEND: + return send_is_volatile; + case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_READ: @@ -1189,7 +1194,7 @@ const struct brw_tes_prog_key *key, const struct brw_vue_map *input_vue_map, struct brw_tes_prog_data *prog_data, - const nir_shader *src_shader, + nir_shader *nir, struct gl_program *prog, int shader_time_index, char **error_str) @@ -1198,7 +1203,6 @@ const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; const unsigned *assembly; - nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); nir->info.inputs_read = key->inputs_read; nir->info.patch_inputs_read = key->patch_inputs_read; diff -Nru mesa-18.3.3/src/intel/compiler/brw_shader.h mesa-19.0.1/src/intel/compiler/brw_shader.h --- mesa-18.3.3/src/intel/compiler/brw_shader.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_shader.h 2019-03-31 23:16:37.000000000 +0000 @@ -156,8 +156,11 @@ uint32_t offset; /**< spill/unspill offset or texture offset bitfield */ uint8_t mlen; /**< SEND message length */ + uint8_t ex_mlen; /**< SENDS extended message length */ int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ uint8_t target; /**< MRT target. */ + uint8_t sfid; /**< SFID for SEND instructions */ + uint32_t desc; /**< SEND[S] message descriptor immediate */ unsigned size_written; /**< Data written to the destination register in bytes. */ enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ @@ -170,6 +173,9 @@ bool no_dd_check:1; bool saturate:1; bool shadow_compare:1; + bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */ + bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */ + bool send_is_volatile:1; /**< Only valid for SHADER_OPCODE_SEND */ bool eot:1; /* Chooses which flag subregister (f0.0 to f1.1) is used for conditional diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_cmod_propagation.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_cmod_propagation.cpp --- mesa-18.3.3/src/intel/compiler/brw_vec4_cmod_propagation.cpp 2018-07-14 15:13:03.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4_cmod_propagation.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -47,7 +47,7 @@ } static bool -opt_cmod_propagation_local(bblock_t *block) +opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v) { bool progress = false; int ip = block->end_ip + 1; @@ -146,12 +146,109 @@ scan_inst->dst, scan_inst->size_written)) { if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) || scan_inst->dst.offset != inst->src[0].offset || - writemasks_incompatible(scan_inst, inst) || scan_inst->exec_size != inst->exec_size || scan_inst->group != inst->group) { break; } + /* If scan_inst is a CMP that produces a single value and inst is + * a CMP.NZ that consumes only that value, remove inst. + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NZ && + (inst->src[0].type == BRW_REGISTER_TYPE_D || + inst->src[0].type == BRW_REGISTER_TYPE_UD) && + (inst->opcode == BRW_OPCODE_CMP || + inst->opcode == BRW_OPCODE_MOV) && + scan_inst->opcode == BRW_OPCODE_CMP && + ((inst->src[0].swizzle == BRW_SWIZZLE_XXXX && + scan_inst->dst.writemask == WRITEMASK_X) || + (inst->src[0].swizzle == BRW_SWIZZLE_YYYY && + scan_inst->dst.writemask == WRITEMASK_Y) || + (inst->src[0].swizzle == BRW_SWIZZLE_ZZZZ && + scan_inst->dst.writemask == WRITEMASK_Z) || + (inst->src[0].swizzle == BRW_SWIZZLE_WWWW && + scan_inst->dst.writemask == WRITEMASK_W))) { + if (inst->dst.writemask != scan_inst->dst.writemask) { + src_reg temp(v, glsl_type::vec4_type, 1); + + /* Given a sequence like: + * + * cmp.ge.f0(8) g21<1>.zF g20<4>.xF g18<4>.xF + * ... + * cmp.nz.f0(8) null<1>D g21<4>.zD 0D + * + * Replace it with something like: + * + * cmp.ge.f0(8) g22<1>.zF g20<4>.xF g18<4>.xF + * mov(8) g21<1>.xF g22<1>.zzzzF + * + * The added MOV will most likely be removed later. In the + * worst case, it should be cheaper to schedule. + */ + temp.swizzle = brw_swizzle_for_mask(inst->dst.writemask); + temp.type = scan_inst->src[0].type; + + vec4_instruction *mov = v->MOV(scan_inst->dst, temp); + + /* Modify the source swizzles on scan_inst. If scan_inst + * was + * + * cmp.ge.f0(8) g21<1>.zF g20<4>.wzyxF g18<4>.yxwzF + * + * replace it with + * + * cmp.ge.f0(8) g21<1>.zF g20<4>.yyyyF g18<4>.wwwwF + */ + unsigned src0_chan; + unsigned src1_chan; + switch (scan_inst->dst.writemask) { + case WRITEMASK_X: + src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 0); + src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 0); + break; + case WRITEMASK_Y: + src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 1); + src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 1); + break; + case WRITEMASK_Z: + src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 2); + src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 2); + break; + case WRITEMASK_W: + src0_chan = BRW_GET_SWZ(scan_inst->src[0].swizzle, 3); + src1_chan = BRW_GET_SWZ(scan_inst->src[1].swizzle, 3); + break; + default: + unreachable("Impossible writemask"); + } + + scan_inst->src[0].swizzle = BRW_SWIZZLE4(src0_chan, + src0_chan, + src0_chan, + src0_chan); + + /* There's no swizzle on immediate value sources. */ + if (scan_inst->src[1].file != IMM) { + scan_inst->src[1].swizzle = BRW_SWIZZLE4(src1_chan, + src1_chan, + src1_chan, + src1_chan); + } + + scan_inst->dst = dst_reg(temp); + scan_inst->dst.writemask = inst->dst.writemask; + + scan_inst->insert_after(block, mov); + } + + inst->remove(block); + progress = true; + break; + } + + if (writemasks_incompatible(scan_inst, inst)) + break; + /* CMP's result is the same regardless of dest type. */ if (inst->conditional_mod == BRW_CONDITIONAL_NZ && scan_inst->opcode == BRW_OPCODE_CMP && @@ -256,7 +353,7 @@ bool progress = false; foreach_block_reverse(block, cfg) { - progress = opt_cmod_propagation_local(block) || progress; + progress = opt_cmod_propagation_local(block, this) || progress; } if (progress) diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4.cpp mesa-19.0.1/src/intel/compiler/brw_vec4.cpp --- mesa-18.3.3/src/intel/compiler/brw_vec4.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -409,7 +409,7 @@ bool progress = false; foreach_block(block, cfg) { - int last_reg = -1, last_offset = -1; + unsigned last_reg = ~0u, last_offset = ~0u; enum brw_reg_file last_reg_file = BAD_FILE; uint8_t imm[4] = { 0 }; @@ -442,7 +442,7 @@ need_type = BRW_REGISTER_TYPE_F; } } else { - last_reg = -1; + last_reg = ~0u; } /* If this wasn't a MOV, or the destination register doesn't match, @@ -470,7 +470,7 @@ } inst_count = 0; - last_reg = -1; + last_reg = ~0u;; writemask = 0; dest_type = BRW_REGISTER_TYPE_F; @@ -892,18 +892,6 @@ progress = true; } break; - case BRW_OPCODE_CMP: - if (inst->conditional_mod == BRW_CONDITIONAL_GE && - inst->src[0].abs && - inst->src[0].negate && - inst->src[1].is_zero()) { - inst->src[0].abs = false; - inst->src[0].negate = false; - inst->conditional_mod = BRW_CONDITIONAL_Z; - progress = true; - break; - } - break; case SHADER_OPCODE_BROADCAST: if (is_uniform(inst->src[0]) || inst->src[1].is_zero()) { @@ -1409,8 +1397,10 @@ * in the register instead. */ if (to_mrf && scan_inst->mlen > 0) { - if (inst->dst.nr >= scan_inst->base_mrf && - inst->dst.nr < scan_inst->base_mrf + scan_inst->mlen) { + unsigned start = scan_inst->base_mrf; + unsigned end = scan_inst->base_mrf + scan_inst->mlen; + + if (inst->dst.nr >= start && inst->dst.nr < end) { break; } } else { @@ -2828,12 +2818,11 @@ void *mem_ctx, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, - const nir_shader *src_shader, + nir_shader *shader, int shader_time_index, char **error_str) { const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX]; - nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar); const unsigned *assembly = NULL; diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_generator.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_generator.cpp --- mesa-18.3.3/src/intel/compiler/brw_vec4_generator.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4_generator.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -291,8 +291,6 @@ inst->header_size != 0, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); - - brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index); } else { /* Non-constant sampler index. */ @@ -1351,8 +1349,6 @@ inst->header_size > 0, BRW_SAMPLER_SIMD_MODE_SIMD4X2, BRW_SAMPLER_RETURN_FORMAT_SINT32); - - brw_mark_surface_used(&prog_data->base, surf_index.ud); } static void @@ -1378,9 +1374,6 @@ 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0)); - - brw_mark_surface_used(&prog_data->base, surf_index.ud); - } else { struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); @@ -1866,8 +1859,6 @@ case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], prog_data->base.binding_table.shader_time_start); - brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.shader_time_start); break; case SHADER_OPCODE_UNTYPED_ATOMIC: diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_gs_nir.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_gs_nir.cpp --- mesa-18.3.3/src/intel/compiler/brw_vec4_gs_nir.cpp 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4_gs_nir.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -41,14 +41,14 @@ /* The EmitNoIndirectInput flag guarantees our vertex index will * be constant. We should handle indirects someday. */ - nir_const_value *vertex = nir_src_as_const_value(instr->src[0]); - nir_const_value *offset_reg = nir_src_as_const_value(instr->src[1]); + const unsigned vertex = nir_src_as_uint(instr->src[0]); + const unsigned offset_reg = nir_src_as_uint(instr->src[1]); const unsigned input_array_stride = prog_data->urb_read_length * 2; if (nir_dest_bit_size(instr->dest) == 64) { - src = src_reg(ATTR, input_array_stride * vertex->u32[0] + - instr->const_index[0] + offset_reg->u32[0], + src = src_reg(ATTR, input_array_stride * vertex + + instr->const_index[0] + offset_reg, glsl_type::dvec4_type); dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); @@ -65,8 +65,8 @@ /* Make up a type...we have no way of knowing... */ const glsl_type *const type = glsl_type::ivec(instr->num_components); - src = src_reg(ATTR, input_array_stride * vertex->u32[0] + - instr->const_index[0] + offset_reg->u32[0], + src = src_reg(ATTR, input_array_stride * vertex + + instr->const_index[0] + offset_reg, type); src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_gs_visitor.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_gs_visitor.cpp --- mesa-18.3.3/src/intel/compiler/brw_vec4_gs_visitor.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4_gs_visitor.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -615,7 +615,7 @@ void *mem_ctx, const struct brw_gs_prog_key *key, struct brw_gs_prog_data *prog_data, - const nir_shader *src_shader, + nir_shader *shader, struct gl_program *prog, int shader_time_index, char **error_str) @@ -625,7 +625,6 @@ c.key = *key; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY]; - nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); /* The GLSL linker will have already matched up GS inputs and the outputs * of prior stages. The driver does extend VS outputs in some cases, but @@ -668,7 +667,7 @@ prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; /* We only have to emit control bits if we are using streams */ - if (prog && prog->info.gs.uses_streams) + if (shader->info.gs.uses_streams) c.control_data_bits_per_vertex = 2; else c.control_data_bits_per_vertex = 0; diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4.h mesa-19.0.1/src/intel/compiler/brw_vec4.h --- mesa-18.3.3/src/intel/compiler/brw_vec4.h 2018-03-26 16:53:06.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4.h 2019-03-31 23:16:37.000000000 +0000 @@ -132,7 +132,7 @@ bool reg_allocate(); void evaluate_spill_costs(float *spill_costs, bool *no_spill); int choose_spill_reg(struct ra_graph *g); - void spill_reg(int spill_reg); + void spill_reg(unsigned spill_reg); void move_grf_array_access_to_scratch(); void move_uniform_array_access_to_pull_constants(); void move_push_constants_to_pull_constants(); @@ -338,6 +338,7 @@ virtual void nir_emit_block(nir_block *block); virtual void nir_emit_instr(nir_instr *instr); virtual void nir_emit_load_const(nir_load_const_instr *instr); + src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr); virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); virtual void nir_emit_alu(nir_alu_instr *instr); virtual void nir_emit_jump(nir_jump_instr *instr); @@ -354,6 +355,7 @@ unsigned num_components = 4); src_reg get_nir_src(const nir_src &src, unsigned num_components = 4); + src_reg get_nir_src_imm(const nir_src &src); src_reg get_indirect_offset(nir_intrinsic_instr *instr); dst_reg *nir_locals; diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_nir.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_nir.cpp --- mesa-18.3.3/src/intel/compiler/brw_vec4_nir.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4_nir.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -253,17 +253,25 @@ } src_reg +vec4_visitor::get_nir_src_imm(const nir_src &src) +{ + assert(nir_src_num_components(src) == 1); + assert(nir_src_bit_size(src) == 32); + return nir_src_is_const(src) ? src_reg(brw_imm_d(nir_src_as_int(src))) : + get_nir_src(src, 1); +} + +src_reg vec4_visitor::get_indirect_offset(nir_intrinsic_instr *instr) { nir_src *offset_src = nir_get_io_offset_src(instr); - nir_const_value *const_value = nir_src_as_const_value(*offset_src); - if (const_value) { + if (nir_src_is_const(*offset_src)) { /* The only constant offset we should find is 0. brw_nir.c's * add_const_offset_to_base() will fold other constant offsets * into instr->const_index[0]. */ - assert(const_value->u32[0] == 0); + assert(nir_src_as_uint(*offset_src) == 0); return src_reg(); } @@ -368,6 +376,27 @@ nir_ssa_values[instr->def.index] = reg; } +src_reg +vec4_visitor::get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr) +{ + /* SSBO stores are weird in that their index is in src[1] */ + const unsigned src = instr->intrinsic == nir_intrinsic_store_ssbo ? 1 : 0; + + src_reg surf_index; + if (nir_src_is_const(instr->src[src])) { + unsigned index = prog_data->base.binding_table.ssbo_start + + nir_src_as_uint(instr->src[src]); + surf_index = brw_imm_ud(index); + } else { + surf_index = src_reg(this, glsl_type::uint_type); + emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[src], 1), + brw_imm_ud(prog_data->base.binding_table.ssbo_start))); + surf_index = emit_uniformize(surf_index); + } + + return surf_index; +} + void vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { @@ -377,15 +406,13 @@ switch (instr->intrinsic) { case nir_intrinsic_load_input: { - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - /* We set EmitNoIndirectInput for VS */ - assert(const_offset); + unsigned load_offset = nir_src_as_uint(instr->src[0]); dest = get_nir_dest(instr->dest); dest.writemask = brw_writemask_for_size(instr->num_components); - src = src_reg(ATTR, instr->const_index[0] + const_offset->u32[0], + src = src_reg(ATTR, instr->const_index[0] + load_offset, glsl_type::uvec4_type); src = retype(src, dest.type); @@ -404,10 +431,8 @@ } case nir_intrinsic_store_output: { - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - assert(const_offset); - - int varying = instr->const_index[0] + const_offset->u32[0]; + unsigned store_offset = nir_src_as_uint(instr->src[1]); + int varying = instr->const_index[0] + store_offset; bool is_64bit = nir_src_bit_size(instr->src[0]) == 64; if (is_64bit) { @@ -442,8 +467,8 @@ } case nir_intrinsic_get_buffer_size: { - nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); - unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0; + unsigned ssbo_index = nir_src_is_const(instr->src[0]) ? + nir_src_as_uint(instr->src[0]) : 0; const unsigned index = prog_data->base.binding_table.ssbo_start + ssbo_index; @@ -462,49 +487,24 @@ emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod)); emit(inst); - - brw_mark_surface_used(&prog_data->base, index); break; } case nir_intrinsic_store_ssbo: { assert(devinfo->gen >= 7); - /* Block index */ - src_reg surf_index; - nir_const_value *const_uniform_block = - nir_src_as_const_value(instr->src[1]); - if (const_uniform_block) { - unsigned index = prog_data->base.binding_table.ssbo_start + - const_uniform_block->u32[0]; - surf_index = brw_imm_ud(index); - brw_mark_surface_used(&prog_data->base, index); - } else { - surf_index = src_reg(this, glsl_type::uint_type); - emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1), - brw_imm_ud(prog_data->base.binding_table.ssbo_start))); - surf_index = emit_uniformize(surf_index); - - brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ssbo_start + - nir->info.num_ssbos - 1); - } - - /* Offset */ - src_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); - if (const_offset) { - offset_reg = brw_imm_ud(const_offset->u32[0]); - } else { - offset_reg = get_nir_src(instr->src[2], 1); - } + /* brw_nir_lower_mem_access_bit_sizes takes care of this */ + assert(nir_src_bit_size(instr->src[0]) == 32); + assert(nir_intrinsic_write_mask(instr) == + (1u << instr->num_components) - 1); + + src_reg surf_index = get_nir_ssbo_intrinsic_index(instr); + src_reg offset_reg = retype(get_nir_src_imm(instr->src[2]), + BRW_REGISTER_TYPE_UD); /* Value */ src_reg val_reg = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, 4); - /* Writemask */ - unsigned write_mask = instr->const_index[0]; - /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped * writes will use SIMD8 mode. In order to hide this and keep symmetry across * typed and untyped messages and across hardware platforms, the @@ -546,158 +546,30 @@ const vec4_builder bld = vec4_builder(this).at_end() .annotate(current_annotation, base_ir); - unsigned type_slots = nir_src_bit_size(instr->src[0]) / 32; - if (type_slots == 2) { - dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(tmp, retype(val_reg, tmp.type), true); - val_reg = src_reg(retype(tmp, BRW_REGISTER_TYPE_F)); - } - - uint8_t swizzle[4] = { 0, 0, 0, 0}; - int num_channels = 0; - unsigned skipped_channels = 0; - int num_components = instr->num_components; - for (int i = 0; i < num_components; i++) { - /* Read components Z/W of a dvec from the appropriate place. We will - * also have to adjust the swizzle (we do that with the '% 4' below) - */ - if (i == 2 && type_slots == 2) - val_reg = byte_offset(val_reg, REG_SIZE); - - /* Check if this channel needs to be written. If so, record the - * channel we need to take the data from in the swizzle array - */ - int component_mask = 1 << i; - int write_test = write_mask & component_mask; - if (write_test) { - /* If we are writing doubles we have to write 2 channels worth of - * of data (64 bits) for each double component. - */ - swizzle[num_channels++] = (i * type_slots) % 4; - if (type_slots == 2) - swizzle[num_channels++] = (i * type_slots + 1) % 4; - } - - /* If we don't have to write this channel it means we have a gap in the - * vector, so write the channels we accumulated until now, if any. Do - * the same if this was the last component in the vector, if we have - * enough channels for a full vec4 write or if we have processed - * components XY of a dvec (since components ZW are not in the same - * SIMD register) - */ - if (!write_test || i == num_components - 1 || num_channels == 4 || - (i == 1 && type_slots == 2)) { - if (num_channels > 0) { - /* We have channels to write, so update the offset we need to - * write at to skip the channels we skipped, if any. - */ - if (skipped_channels > 0) { - if (offset_reg.file == IMM) { - offset_reg.ud += 4 * skipped_channels; - } else { - emit(ADD(dst_reg(offset_reg), offset_reg, - brw_imm_ud(4 * skipped_channels))); - } - } - - /* Swizzle the data register so we take the data from the channels - * we need to write and send the write message. This will write - * num_channels consecutive dwords starting at offset. - */ - val_reg.swizzle = - BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); - emit_untyped_write(bld, surf_index, offset_reg, val_reg, - 1 /* dims */, num_channels /* size */, - BRW_PREDICATE_NONE); - - /* If we have to do a second write we will have to update the - * offset so that we jump over the channels we have just written - * now. - */ - skipped_channels = num_channels; - - /* Restart the count for the next write message */ - num_channels = 0; - } - - /* If we didn't write the channel, increase skipped count */ - if (!write_test) - skipped_channels += type_slots; - } - } - + emit_untyped_write(bld, surf_index, offset_reg, val_reg, + 1 /* dims */, instr->num_components /* size */, + BRW_PREDICATE_NONE); break; } case nir_intrinsic_load_ssbo: { assert(devinfo->gen >= 7); - nir_const_value *const_uniform_block = - nir_src_as_const_value(instr->src[0]); + /* brw_nir_lower_mem_access_bit_sizes takes care of this */ + assert(nir_dest_bit_size(instr->dest) == 32); - src_reg surf_index; - if (const_uniform_block) { - unsigned index = prog_data->base.binding_table.ssbo_start + - const_uniform_block->u32[0]; - surf_index = brw_imm_ud(index); - - brw_mark_surface_used(&prog_data->base, index); - } else { - surf_index = src_reg(this, glsl_type::uint_type); - emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1), - brw_imm_ud(prog_data->base.binding_table.ssbo_start))); - surf_index = emit_uniformize(surf_index); - - /* Assume this may touch any UBO. It would be nice to provide - * a tighter bound, but the array information is already lowered away. - */ - brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ssbo_start + - nir->info.num_ssbos - 1); - } - - src_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - if (const_offset) { - offset_reg = brw_imm_ud(const_offset->u32[0]); - } else { - offset_reg = get_nir_src(instr->src[1], 1); - } + src_reg surf_index = get_nir_ssbo_intrinsic_index(instr); + src_reg offset_reg = retype(get_nir_src_imm(instr->src[1]), + BRW_REGISTER_TYPE_UD); /* Read the vector */ const vec4_builder bld = vec4_builder(this).at_end() .annotate(current_annotation, base_ir); - src_reg read_result; + src_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, 4 /* size*/, + BRW_PREDICATE_NONE); dst_reg dest = get_nir_dest(instr->dest); - if (type_sz(dest.type) < 8) { - read_result = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, 4 /* size*/, - BRW_PREDICATE_NONE); - } else { - src_reg shuffled = src_reg(this, glsl_type::dvec4_type); - - src_reg temp; - temp = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, 4 /* size*/, - BRW_PREDICATE_NONE); - emit(MOV(dst_reg(retype(shuffled, temp.type)), temp)); - - if (offset_reg.file == IMM) - offset_reg.ud += 16; - else - emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16))); - - temp = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, 4 /* size*/, - BRW_PREDICATE_NONE); - emit(MOV(dst_reg(retype(byte_offset(shuffled, REG_SIZE), temp.type)), - temp)); - - read_result = src_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(dst_reg(read_result), shuffled, false); - } - read_result.type = dest.type; read_result.swizzle = brw_swizzle_for_size(instr->num_components); emit(MOV(dest, read_result)); @@ -706,12 +578,12 @@ case nir_intrinsic_ssbo_atomic_add: { int op = BRW_AOP_ADD; - const nir_const_value *const val = nir_src_as_const_value(instr->src[2]); - if (val != NULL) { - if (val->i32[0] == 1) + if (nir_src_is_const(instr->src[2])) { + int add_val = nir_src_as_int(instr->src[2]); + if (add_val == 1) op = BRW_AOP_INC; - else if (val->i32[0] == -1) + else if (add_val == -1) op = BRW_AOP_DEC; } @@ -778,14 +650,14 @@ unsigned shift = (nir_intrinsic_base(instr) % 16) / type_size; assert(shift + instr->num_components <= 4); - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - if (const_offset) { + if (nir_src_is_const(instr->src[0])) { + const unsigned load_offset = nir_src_as_uint(instr->src[0]); /* Offsets are in bytes but they should always be multiples of 4 */ - assert(const_offset->u32[0] % 4 == 0); + assert(load_offset % 4 == 0); src.swizzle = brw_swizzle_for_size(instr->num_components); dest.writemask = brw_writemask_for_size(instr->num_components); - unsigned offset = const_offset->u32[0] + shift * type_size; + unsigned offset = load_offset + shift * type_size; src.offset = ROUND_DOWN_TO(offset, 16); shift = (offset % 16) / type_size; assert(shift + instr->num_components <= 4); @@ -810,19 +682,17 @@ } case nir_intrinsic_load_ubo: { - nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]); src_reg surf_index; dest = get_nir_dest(instr->dest); - if (const_block_index) { + if (nir_src_is_const(instr->src[0])) { /* The block index is a constant, so just emit the binding table entry * as an immediate. */ const unsigned index = prog_data->base.binding_table.ubo_start + - const_block_index->u32[0]; + nir_src_as_uint(instr->src[0]); surf_index = brw_imm_ud(index); - brw_mark_surface_used(&prog_data->base, index); } else { /* The block index is not a constant. Evaluate the index expression * per-channel and add the base UBO index; we have to select a value @@ -833,19 +703,12 @@ instr->num_components), brw_imm_ud(prog_data->base.binding_table.ubo_start))); surf_index = emit_uniformize(surf_index); - - /* Assume this may touch any UBO. It would be nice to provide - * a tighter bound, but the array information is already lowered away. - */ - brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ubo_start + - nir->info.num_ubos - 1); } src_reg offset_reg; - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); - if (const_offset) { - offset_reg = brw_imm_ud(const_offset->u32[0] & ~15); + if (nir_src_is_const(instr->src[1])) { + unsigned load_offset = nir_src_as_uint(instr->src[1]); + offset_reg = brw_imm_ud(load_offset & ~15); } else { offset_reg = src_reg(this, glsl_type::uint_type); emit(MOV(dst_reg(offset_reg), @@ -877,13 +740,14 @@ } packed_consts.swizzle = brw_swizzle_for_size(instr->num_components); - if (const_offset) { + if (nir_src_is_const(instr->src[1])) { + unsigned load_offset = nir_src_as_uint(instr->src[1]); unsigned type_size = type_sz(dest.type); packed_consts.swizzle += - BRW_SWIZZLE4(const_offset->u32[0] % 16 / type_size, - const_offset->u32[0] % 16 / type_size, - const_offset->u32[0] % 16 / type_size, - const_offset->u32[0] % 16 / type_size); + BRW_SWIZZLE4(load_offset % 16 / type_size, + load_offset % 16 / type_size, + load_offset % 16 / type_size, + load_offset % 16 / type_size); } emit(MOV(dest, retype(packed_consts, dest.type))); @@ -922,26 +786,7 @@ if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_dest(instr->dest); - src_reg surface; - nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); - if (const_surface) { - unsigned surf_index = prog_data->base.binding_table.ssbo_start + - const_surface->u32[0]; - surface = brw_imm_ud(surf_index); - brw_mark_surface_used(&prog_data->base, surf_index); - } else { - surface = src_reg(this, glsl_type::uint_type); - emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]), - brw_imm_ud(prog_data->base.binding_table.ssbo_start))); - - /* Assume this may touch any UBO. This is the same we do for other - * UBO/SSBO accesses with non-constant surface. - */ - brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ssbo_start + - nir->info.num_ssbos - 1); - } - + src_reg surface = get_nir_ssbo_intrinsic_index(instr); src_reg offset = get_nir_src(instr->src[1], 1); src_reg data1; if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) @@ -973,34 +818,34 @@ brw_conditional_for_nir_comparison(nir_op op) { switch (op) { - case nir_op_flt: - case nir_op_ilt: - case nir_op_ult: + case nir_op_flt32: + case nir_op_ilt32: + case nir_op_ult32: return BRW_CONDITIONAL_L; - case nir_op_fge: - case nir_op_ige: - case nir_op_uge: + case nir_op_fge32: + case nir_op_ige32: + case nir_op_uge32: return BRW_CONDITIONAL_GE; - case nir_op_feq: - case nir_op_ieq: - case nir_op_ball_fequal2: - case nir_op_ball_iequal2: - case nir_op_ball_fequal3: - case nir_op_ball_iequal3: - case nir_op_ball_fequal4: - case nir_op_ball_iequal4: + case nir_op_feq32: + case nir_op_ieq32: + case nir_op_b32all_fequal2: + case nir_op_b32all_iequal2: + case nir_op_b32all_fequal3: + case nir_op_b32all_iequal3: + case nir_op_b32all_fequal4: + case nir_op_b32all_iequal4: return BRW_CONDITIONAL_Z; - case nir_op_fne: - case nir_op_ine: - case nir_op_bany_fnequal2: - case nir_op_bany_inequal2: - case nir_op_bany_fnequal3: - case nir_op_bany_inequal3: - case nir_op_bany_fnequal4: - case nir_op_bany_inequal4: + case nir_op_fne32: + case nir_op_ine32: + case nir_op_b32any_fnequal2: + case nir_op_b32any_inequal2: + case nir_op_b32any_fnequal3: + case nir_op_b32any_inequal3: + case nir_op_b32any_fnequal4: + case nir_op_b32any_inequal4: return BRW_CONDITIONAL_NZ; default: @@ -1020,20 +865,20 @@ nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); switch (cmp_instr->op) { - case nir_op_bany_fnequal2: - case nir_op_bany_inequal2: - case nir_op_bany_fnequal3: - case nir_op_bany_inequal3: - case nir_op_bany_fnequal4: - case nir_op_bany_inequal4: + case nir_op_b32any_fnequal2: + case nir_op_b32any_inequal2: + case nir_op_b32any_fnequal3: + case nir_op_b32any_inequal3: + case nir_op_b32any_fnequal4: + case nir_op_b32any_inequal4: *predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; - case nir_op_ball_fequal2: - case nir_op_ball_iequal2: - case nir_op_ball_fequal3: - case nir_op_ball_iequal3: - case nir_op_ball_fequal4: - case nir_op_ball_iequal4: + case nir_op_b32all_fequal2: + case nir_op_b32all_iequal2: + case nir_op_b32all_fequal3: + case nir_op_b32all_iequal3: + case nir_op_b32all_fequal4: + case nir_op_b32all_iequal4: *predicate = BRW_PREDICATE_ALIGN16_ALL4H; break; default: @@ -1225,6 +1070,12 @@ inst->saturate = instr->dest.saturate; break; + case nir_op_uadd_sat: + assert(nir_dest_bit_size(instr->dest.dest) < 64); + inst = emit(ADD(dst, op[0], op[1])); + inst->saturate = true; + break; + case nir_op_fmul: inst = emit(MUL(dst, op[0], op[1])); inst->saturate = instr->dest.saturate; @@ -1233,21 +1084,22 @@ case nir_op_imul: { assert(nir_dest_bit_size(instr->dest.dest) < 64); if (devinfo->gen < 8) { - nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src); - nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src); - /* For integer multiplication, the MUL uses the low 16 bits of one of * the operands (src0 through SNB, src1 on IVB and later). The MACH * accumulates in the contribution of the upper 16 bits of that * operand. If we can determine that one of the args is in the low * 16 bits, though, we can just emit a single MUL. */ - if (value0 && value0->u32[0] < (1 << 16)) { + if (nir_src_is_const(instr->src[0].src) && + nir_alu_instr_src_read_mask(instr, 0) == 1 && + nir_src_comp_as_uint(instr->src[0].src, 0) < (1 << 16)) { if (devinfo->gen < 7) emit(MUL(dst, op[0], op[1])); else emit(MUL(dst, op[1], op[0])); - } else if (value1 && value1->u32[0] < (1 << 16)) { + } else if (nir_src_is_const(instr->src[1].src) && + nir_alu_instr_src_read_mask(instr, 1) == 1 && + nir_src_comp_as_uint(instr->src[1].src, 0) < (1 << 16)) { if (devinfo->gen < 7) emit(MUL(dst, op[1], op[0])); else @@ -1473,18 +1325,18 @@ case nir_op_fddy_fine: unreachable("derivatives are not valid in vertex shaders"); - case nir_op_ilt: - case nir_op_ult: - case nir_op_ige: - case nir_op_uge: - case nir_op_ieq: - case nir_op_ine: + case nir_op_ilt32: + case nir_op_ult32: + case nir_op_ige32: + case nir_op_uge32: + case nir_op_ieq32: + case nir_op_ine32: assert(nir_dest_bit_size(instr->dest.dest) < 64); /* Fallthrough */ - case nir_op_flt: - case nir_op_fge: - case nir_op_feq: - case nir_op_fne: { + case nir_op_flt32: + case nir_op_fge32: + case nir_op_feq32: + case nir_op_fne32: { enum brw_conditional_mod conditional_mod = brw_conditional_for_nir_comparison(instr->op); @@ -1505,14 +1357,14 @@ break; } - case nir_op_ball_iequal2: - case nir_op_ball_iequal3: - case nir_op_ball_iequal4: + case nir_op_b32all_iequal2: + case nir_op_b32all_iequal3: + case nir_op_b32all_iequal4: assert(nir_dest_bit_size(instr->dest.dest) < 64); /* Fallthrough */ - case nir_op_ball_fequal2: - case nir_op_ball_fequal3: - case nir_op_ball_fequal4: { + case nir_op_b32all_fequal2: + case nir_op_b32all_fequal3: + case nir_op_b32all_fequal4: { unsigned swiz = brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); @@ -1524,14 +1376,14 @@ break; } - case nir_op_bany_inequal2: - case nir_op_bany_inequal3: - case nir_op_bany_inequal4: + case nir_op_b32any_inequal2: + case nir_op_b32any_inequal3: + case nir_op_b32any_inequal4: assert(nir_dest_bit_size(instr->dest.dest) < 64); /* Fallthrough */ - case nir_op_bany_fnequal2: - case nir_op_bany_fnequal3: - case nir_op_bany_fnequal4: { + case nir_op_b32any_fnequal2: + case nir_op_b32any_fnequal3: + case nir_op_b32any_fnequal4: { unsigned swiz = brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); @@ -1579,8 +1431,9 @@ emit(AND(dst, op[0], op[1])); break; - case nir_op_b2i: - case nir_op_b2f: + case nir_op_b2i32: + case nir_op_b2f32: + case nir_op_b2f64: if (nir_dest_bit_size(instr->dest.dest) > 32) { assert(dst.type == BRW_REGISTER_TYPE_DF); emit_conversion_to_double(dst, negate(op[0]), false); @@ -1589,7 +1442,7 @@ } break; - case nir_op_f2b: + case nir_op_f2b32: if (nir_src_bit_size(instr->src[0].src) == 64) { /* We use a MOV with conditional_mod to check if the provided value is * 0.0. We want this to flush denormalized numbers to zero, so we set a @@ -1610,7 +1463,7 @@ } break; - case nir_op_i2b: + case nir_op_i2b32: emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); break; @@ -1930,7 +1783,7 @@ inst->saturate = instr->dest.saturate; break; - case nir_op_bcsel: + case nir_op_b32csel: enum brw_predicate predicate; if (!optimize_predicate(instr, &predicate)) { emit(CMP(dst_null_d(), op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ)); @@ -2151,6 +2004,7 @@ case nir_tex_src_offset: { nir_const_value *const_offset = nir_src_as_const_value(instr->src[i].src); + assert(nir_src_bit_size(instr->src[i].src) == 32); if (!const_offset || !brw_texture_offset(const_offset->i32, nir_tex_instr_src_size(instr, i), @@ -2162,20 +2016,6 @@ } case nir_tex_src_texture_offset: { - /* The highest texture which may be used by this operation is - * the last element of the array. Mark it here, because the generator - * doesn't have enough information to determine the bound. - */ - uint32_t array_size = instr->texture_array_size; - uint32_t max_used = texture + array_size - 1; - if (instr->op == nir_texop_tg4) { - max_used += prog_data->base.binding_table.gather_texture_start; - } else { - max_used += prog_data->base.binding_table.texture_start; - } - - brw_mark_surface_used(&prog_data->base, max_used); - /* Emit code to evaluate the actual indexing expression */ src_reg src = get_nir_src(instr->src[i].src, 1); src_reg temp(this, glsl_type::uint_type); diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_reg_allocate.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_reg_allocate.cpp --- mesa-18.3.3/src/intel/compiler/brw_vec4_reg_allocate.cpp 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4_reg_allocate.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -502,18 +502,18 @@ } void -vec4_visitor::spill_reg(int spill_reg_nr) +vec4_visitor::spill_reg(unsigned spill_reg_nr) { assert(alloc.sizes[spill_reg_nr] == 1 || alloc.sizes[spill_reg_nr] == 2); - unsigned int spill_offset = last_scratch; + unsigned spill_offset = last_scratch; last_scratch += alloc.sizes[spill_reg_nr]; /* Generate spill/unspill instructions for the objects being spilled. */ - int scratch_reg = -1; + unsigned scratch_reg = ~0u; foreach_block_and_inst(block, vec4_instruction, inst, cfg) { - for (unsigned int i = 0; i < 3; i++) { + for (unsigned i = 0; i < 3; i++) { if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg_nr) { - if (scratch_reg == -1 || + if (scratch_reg == ~0u || !can_use_scratch_for_source(inst, i, scratch_reg)) { /* We need to unspill anyway so make sure we read the full vec4 * in any case. This way, the cached register can be reused @@ -529,7 +529,7 @@ dst_reg(temp), inst->src[i], spill_offset); temp.offset = inst->src[i].offset; } - assert(scratch_reg != -1); + assert(scratch_reg != ~0u); inst->src[i].nr = scratch_reg; } } diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_tcs.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_tcs.cpp --- mesa-18.3.3/src/intel/compiler/brw_vec4_tcs.cpp 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4_tcs.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -260,10 +260,8 @@ src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; - nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]); - src_reg vertex_index = - vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0])) - : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); + src_reg vertex_index = retype(get_nir_src_imm(instr->src[0]), + BRW_REGISTER_TYPE_UD); unsigned first_component = nir_intrinsic_component(instr); if (nir_dest_bit_size(instr->dest) == 64) { @@ -380,7 +378,7 @@ void *mem_ctx, const struct brw_tcs_prog_key *key, struct brw_tcs_prog_data *prog_data, - const nir_shader *src_shader, + nir_shader *nir, int shader_time_index, char **error_str) { @@ -389,7 +387,6 @@ const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL]; const unsigned *assembly; - nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); nir->info.outputs_written = key->outputs_written; nir->info.patch_outputs_written = key->patch_outputs_written; diff -Nru mesa-18.3.3/src/intel/compiler/brw_vec4_visitor.cpp mesa-19.0.1/src/intel/compiler/brw_vec4_visitor.cpp --- mesa-18.3.3/src/intel/compiler/brw_vec4_visitor.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/brw_vec4_visitor.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -1201,12 +1201,14 @@ if (output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE) { current_annotation = "Clipping flags"; dst_reg flags0 = dst_reg(this, glsl_type::uint_type); - dst_reg flags1 = dst_reg(this, glsl_type::uint_type); emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0)); emit(OR(header1_w, src_reg(header1_w), src_reg(flags0))); + } + if (output_reg[VARYING_SLOT_CLIP_DIST1][0].file != BAD_FILE) { + dst_reg flags1 = dst_reg(this, glsl_type::uint_type); emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0)); emit(SHL(flags1, src_reg(flags1), brw_imm_d(4))); @@ -1335,8 +1337,8 @@ } } -static int -align_interleaved_urb_mlen(const struct gen_device_info *devinfo, int mlen) +static unsigned +align_interleaved_urb_mlen(const struct gen_device_info *devinfo, unsigned mlen) { if (devinfo->gen >= 6) { /* URB data written (does not include the message header reg) must @@ -1746,8 +1748,6 @@ src = byte_offset(src, 16); } - brw_mark_surface_used(&prog_data->base, index); - if (is_64bit) { temp = retype(temp, BRW_REGISTER_TYPE_DF); shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst); diff -Nru mesa-18.3.3/src/intel/compiler/gen6_gs_visitor.cpp mesa-19.0.1/src/intel/compiler/gen6_gs_visitor.cpp --- mesa-18.3.3/src/intel/compiler/gen6_gs_visitor.cpp 2018-07-14 15:13:00.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/gen6_gs_visitor.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -274,8 +274,8 @@ emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data); } -static int -align_interleaved_urb_mlen(int mlen) +static unsigned +align_interleaved_urb_mlen(unsigned mlen) { /* URB data written (does not include the message header reg) must * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, diff -Nru mesa-18.3.3/src/intel/compiler/meson.build mesa-19.0.1/src/intel/compiler/meson.build --- mesa-18.3.3/src/intel/compiler/meson.build 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -55,8 +55,8 @@ 'brw_fs.h', 'brw_fs_live_variables.cpp', 'brw_fs_live_variables.h', - 'brw_fs_lower_conversions.cpp', 'brw_fs_lower_pack.cpp', + 'brw_fs_lower_regioning.cpp', 'brw_fs_nir.cpp', 'brw_fs_reg_allocate.cpp', 'brw_fs_register_coalesce.cpp', @@ -78,6 +78,7 @@ 'brw_nir_attribute_workarounds.c', 'brw_nir_lower_cs_intrinsics.c', 'brw_nir_lower_image_load_store.c', + 'brw_nir_lower_mem_access_bit_sizes.c', 'brw_nir_opt_peephole_ffma.c', 'brw_nir_tcs_workarounds.c', 'brw_packed_float.c', @@ -157,7 +158,8 @@ libintel_compiler, libintel_common, libintel_dev, libmesa_util, libisl, ], dependencies : [dep_thread, dep_dl, idep_gtest, idep_nir], - ) + ), + suite : ['intel'], ) endforeach endif diff -Nru mesa-18.3.3/src/intel/compiler/test_fs_cmod_propagation.cpp mesa-19.0.1/src/intel/compiler/test_fs_cmod_propagation.cpp --- mesa-18.3.3/src/intel/compiler/test_fs_cmod_propagation.cpp 2018-03-26 16:53:06.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/test_fs_cmod_propagation.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -889,3 +889,35 @@ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 1)->opcode); EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); } + +TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch) +{ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::int_type); + src0.type = BRW_REGISTER_TYPE_W; + + bld.ASR(dest0, negate(src0), brw_imm_d(15)); + bld.CMP(bld.null_reg_ud(), retype(dest0, BRW_REGISTER_TYPE_UD), + brw_imm_ud(0u), BRW_CONDITIONAL_LE); + + /* = Before = + * 0: asr(8) dest:D -src0:W 15D + * 1: cmp.le.f0(8) null:UD dest:UD 0UD + * + * = After = + * (no changes) + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ASR, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 1)->conditional_mod); +} diff -Nru mesa-18.3.3/src/intel/compiler/test_vec4_cmod_propagation.cpp mesa-19.0.1/src/intel/compiler/test_vec4_cmod_propagation.cpp --- mesa-18.3.3/src/intel/compiler/test_vec4_cmod_propagation.cpp 2018-07-14 15:13:03.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/test_vec4_cmod_propagation.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -57,7 +57,7 @@ protected: /* Dummy implementation for pure virtual methods */ - virtual dst_reg *make_reg_for_system_value(int location) + virtual dst_reg *make_reg_for_system_value(int /* location */) { unreachable("Not reached"); } @@ -82,12 +82,12 @@ unreachable("Not reached"); } - virtual void emit_urb_write_header(int mrf) + virtual void emit_urb_write_header(int /* mrf */) { unreachable("Not reached"); } - virtual vec4_instruction *emit_urb_write_opcode(bool complete) + virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */) { unreachable("Not reached"); } diff -Nru mesa-18.3.3/src/intel/compiler/test_vec4_copy_propagation.cpp mesa-19.0.1/src/intel/compiler/test_vec4_copy_propagation.cpp --- mesa-18.3.3/src/intel/compiler/test_vec4_copy_propagation.cpp 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/test_vec4_copy_propagation.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -54,7 +54,7 @@ } protected: - virtual dst_reg *make_reg_for_system_value(int location) + virtual dst_reg *make_reg_for_system_value(int /* location */) { unreachable("Not reached"); } @@ -74,12 +74,12 @@ unreachable("Not reached"); } - virtual void emit_urb_write_header(int mrf) + virtual void emit_urb_write_header(int /* mrf */) { unreachable("Not reached"); } - virtual vec4_instruction *emit_urb_write_opcode(bool complete) + virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */) { unreachable("Not reached"); } diff -Nru mesa-18.3.3/src/intel/compiler/test_vec4_register_coalesce.cpp mesa-19.0.1/src/intel/compiler/test_vec4_register_coalesce.cpp --- mesa-18.3.3/src/intel/compiler/test_vec4_register_coalesce.cpp 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/compiler/test_vec4_register_coalesce.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -57,7 +57,7 @@ } protected: - virtual dst_reg *make_reg_for_system_value(int location) + virtual dst_reg *make_reg_for_system_value(int /* location */) { unreachable("Not reached"); } @@ -77,12 +77,12 @@ unreachable("Not reached"); } - virtual void emit_urb_write_header(int mrf) + virtual void emit_urb_write_header(int /* mrf */) { unreachable("Not reached"); } - virtual vec4_instruction *emit_urb_write_opcode(bool complete) + virtual vec4_instruction *emit_urb_write_opcode(bool /* complete */) { unreachable("Not reached"); } diff -Nru mesa-18.3.3/src/intel/dev/gen_device_info.c mesa-19.0.1/src/intel/dev/gen_device_info.c --- mesa-18.3.3/src/intel/dev/gen_device_info.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/dev/gen_device_info.c 2019-03-31 23:16:37.000000000 +0000 @@ -777,6 +777,7 @@ .num_subslices = { 2, }, .num_eu_per_subslice = 6, .l3_banks = 2, + .urb.size = 192, .simulator_id = 24, }; static const struct gen_device_info gen_device_info_cfl_gt2 = { diff -Nru mesa-18.3.3/src/intel/genxml/gen10.xml mesa-19.0.1/src/intel/genxml/gen10.xml --- mesa-18.3.3/src/intel/genxml/gen10.xml 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen10.xml 2019-03-31 23:16:37.000000000 +0000 @@ -219,14 +219,9 @@ - - - - - - + @@ -495,7 +490,6 @@ - @@ -813,7 +807,7 @@ - + @@ -839,7 +833,7 @@ - + @@ -855,7 +849,7 @@ - + @@ -872,7 +866,7 @@ - + @@ -889,7 +883,7 @@ - + @@ -906,7 +900,7 @@ - + @@ -923,7 +917,7 @@ - + @@ -940,7 +934,7 @@ - + @@ -949,7 +943,7 @@ - + @@ -958,7 +952,7 @@ - + @@ -967,7 +961,7 @@ - + @@ -976,7 +970,7 @@ - + @@ -985,7 +979,7 @@ - + @@ -993,13 +987,13 @@ - + - + @@ -1009,7 +1003,7 @@ - + @@ -1019,7 +1013,7 @@ - + @@ -1030,7 +1024,7 @@ - + @@ -1040,7 +1034,7 @@ - + @@ -1080,58 +1074,58 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -1157,8 +1151,7 @@ - - + @@ -1169,7 +1162,7 @@ - + @@ -1188,7 +1181,7 @@ - + @@ -1236,7 +1229,7 @@ - + @@ -1260,7 +1253,7 @@ - + @@ -1284,7 +1277,7 @@ - + @@ -1308,7 +1301,7 @@ - + @@ -1334,7 +1327,7 @@ - + @@ -1360,7 +1353,7 @@ - + @@ -1368,11 +1361,11 @@ - + - + @@ -1441,20 +1434,19 @@ - + - - + - + @@ -1500,7 +1492,7 @@ - + @@ -1511,13 +1503,12 @@ - - + - + @@ -1531,7 +1522,7 @@ - + @@ -1541,7 +1532,7 @@ - + @@ -1555,7 +1546,7 @@ - + @@ -1565,7 +1556,7 @@ - + @@ -1576,7 +1567,7 @@ - + @@ -1639,7 +1630,7 @@ - + @@ -1656,7 +1647,7 @@ - + @@ -1694,7 +1685,7 @@ - + @@ -1704,7 +1695,7 @@ - + @@ -1714,7 +1705,7 @@ - + @@ -1724,7 +1715,7 @@ - + @@ -1734,7 +1725,7 @@ - + @@ -1744,7 +1735,7 @@ - + @@ -1805,7 +1796,7 @@ - + @@ -1823,7 +1814,7 @@ - + @@ -1834,7 +1825,7 @@ - + @@ -1848,7 +1839,7 @@ - + @@ -1857,7 +1848,7 @@ - + @@ -1866,7 +1857,7 @@ - + @@ -1875,7 +1866,7 @@ - + @@ -1884,7 +1875,7 @@ - + @@ -1893,7 +1884,7 @@ - + @@ -1902,7 +1893,7 @@ - + @@ -1972,7 +1963,7 @@ - + @@ -2005,7 +1996,7 @@ - + @@ -2019,7 +2010,7 @@ - + @@ -2028,7 +2019,7 @@ - + @@ -2052,7 +2043,10 @@ - + + + + @@ -2060,7 +2054,7 @@ - + @@ -2068,8 +2062,7 @@ - - + @@ -2078,7 +2071,7 @@ - + @@ -2097,21 +2090,20 @@ - + - - + - + @@ -2144,7 +2136,7 @@ - + @@ -2174,7 +2166,7 @@ - + @@ -2184,7 +2176,7 @@ - + @@ -2195,7 +2187,7 @@ - + @@ -2206,7 +2198,7 @@ - + @@ -2217,7 +2209,7 @@ - + @@ -2228,7 +2220,7 @@ - + @@ -2239,7 +2231,7 @@ - + @@ -2250,7 +2242,7 @@ - + @@ -2263,7 +2255,7 @@ - + @@ -2303,7 +2295,7 @@ - + @@ -2314,7 +2306,7 @@ - + @@ -2338,7 +2330,7 @@ - + @@ -2378,7 +2370,7 @@ - + @@ -2386,7 +2378,7 @@ - + @@ -2395,7 +2387,7 @@ - + @@ -2404,7 +2396,7 @@ - + @@ -2413,7 +2405,7 @@ - + @@ -2457,7 +2449,7 @@ - + @@ -2514,7 +2506,7 @@ - + @@ -2523,7 +2515,7 @@ - + @@ -2551,7 +2543,7 @@ - + @@ -2573,7 +2565,7 @@ - + @@ -2602,7 +2594,7 @@ - + @@ -2612,7 +2604,7 @@ - + @@ -2622,7 +2614,7 @@ - + @@ -2662,7 +2654,7 @@ - + @@ -2686,7 +2678,7 @@ - + @@ -2704,7 +2696,7 @@ - + @@ -2758,7 +2750,7 @@ - + @@ -2769,7 +2761,7 @@ - + @@ -2892,7 +2884,7 @@ - + @@ -2928,7 +2920,7 @@ - + @@ -3000,7 +2992,7 @@ - + @@ -3013,7 +3005,7 @@ - + @@ -3058,6 +3050,8 @@ + + @@ -3068,7 +3062,7 @@ - + @@ -3078,7 +3072,7 @@ - + @@ -3087,7 +3081,7 @@ - + @@ -3096,7 +3090,7 @@ - + @@ -3208,7 +3202,7 @@ - + @@ -3229,7 +3223,7 @@ - + @@ -3252,7 +3246,7 @@ - + @@ -3267,7 +3261,7 @@ - + @@ -3311,27 +3305,27 @@ - + - + - + - + - + - + - + @@ -3342,16 +3336,16 @@ - + - + - + @@ -3553,6 +3547,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mesa-18.3.3/src/intel/genxml/gen11.xml mesa-19.0.1/src/intel/genxml/gen11.xml --- mesa-18.3.3/src/intel/genxml/gen11.xml 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen11.xml 2019-03-31 23:16:37.000000000 +0000 @@ -220,14 +220,9 @@ - - - - - - + @@ -496,7 +491,6 @@ - @@ -823,7 +817,7 @@ - + @@ -849,7 +843,7 @@ - + @@ -858,7 +852,7 @@ - + @@ -874,7 +868,7 @@ - + @@ -891,7 +885,7 @@ - + @@ -908,7 +902,7 @@ - + @@ -925,7 +919,7 @@ - + @@ -942,7 +936,7 @@ - + @@ -959,7 +953,7 @@ - + @@ -968,7 +962,7 @@ - + @@ -977,7 +971,7 @@ - + @@ -986,7 +980,7 @@ - + @@ -995,7 +989,7 @@ - + @@ -1004,7 +998,7 @@ - + @@ -1012,13 +1006,13 @@ - + - + @@ -1028,7 +1022,7 @@ - + @@ -1038,7 +1032,7 @@ - + @@ -1049,7 +1043,7 @@ - + @@ -1059,7 +1053,7 @@ - + @@ -1099,58 +1093,58 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -1176,8 +1170,7 @@ - - + @@ -1188,7 +1181,7 @@ - + @@ -1207,7 +1200,7 @@ - + @@ -1254,7 +1247,7 @@ - + @@ -1278,7 +1271,7 @@ - + @@ -1302,7 +1295,7 @@ - + @@ -1326,7 +1319,7 @@ - + @@ -1352,7 +1345,7 @@ - + @@ -1378,7 +1371,7 @@ - + @@ -1386,11 +1379,11 @@ - + - + @@ -1457,14 +1450,13 @@ - + - - + @@ -1475,7 +1467,7 @@ - + @@ -1520,7 +1512,7 @@ - + @@ -1531,13 +1523,12 @@ - - + - + @@ -1551,7 +1542,7 @@ - + @@ -1561,7 +1552,7 @@ - + @@ -1575,7 +1566,7 @@ - + @@ -1585,7 +1576,7 @@ - + @@ -1596,7 +1587,7 @@ - + @@ -1659,7 +1650,7 @@ - + @@ -1676,7 +1667,7 @@ - + @@ -1714,7 +1705,7 @@ - + @@ -1724,7 +1715,7 @@ - + @@ -1734,7 +1725,7 @@ - + @@ -1744,7 +1735,7 @@ - + @@ -1754,7 +1745,7 @@ - + @@ -1764,7 +1755,7 @@ - + @@ -1825,7 +1816,7 @@ - + @@ -1843,7 +1834,7 @@ - + @@ -1854,7 +1845,7 @@ - + @@ -1868,7 +1859,7 @@ - + @@ -1877,7 +1868,7 @@ - + @@ -1886,7 +1877,7 @@ - + @@ -1895,7 +1886,7 @@ - + @@ -1904,7 +1895,7 @@ - + @@ -1913,7 +1904,7 @@ - + @@ -1922,7 +1913,7 @@ - + @@ -1992,7 +1983,7 @@ - + @@ -2025,7 +2016,7 @@ - + @@ -2039,7 +2030,7 @@ - + @@ -2048,7 +2039,7 @@ - + @@ -2072,7 +2063,10 @@ - + + + + @@ -2080,7 +2074,7 @@ - + @@ -2088,8 +2082,7 @@ - - + @@ -2098,7 +2091,7 @@ - + @@ -2117,21 +2110,20 @@ - + - - + - + @@ -2164,7 +2156,7 @@ - + @@ -2194,7 +2186,7 @@ - + @@ -2204,7 +2196,7 @@ - + @@ -2215,7 +2207,7 @@ - + @@ -2226,7 +2218,7 @@ - + @@ -2237,7 +2229,7 @@ - + @@ -2248,7 +2240,7 @@ - + @@ -2259,7 +2251,7 @@ - + @@ -2270,7 +2262,7 @@ - + @@ -2283,7 +2275,7 @@ - + @@ -2323,7 +2315,7 @@ - + @@ -2334,7 +2326,7 @@ - + @@ -2358,7 +2350,7 @@ - + @@ -2398,7 +2390,7 @@ - + @@ -2406,7 +2398,7 @@ - + @@ -2415,7 +2407,7 @@ - + @@ -2424,7 +2416,7 @@ - + @@ -2433,7 +2425,7 @@ - + @@ -2476,7 +2468,7 @@ - + @@ -2533,7 +2525,7 @@ - + @@ -2542,7 +2534,7 @@ - + @@ -2570,7 +2562,7 @@ - + @@ -2592,7 +2584,7 @@ - + @@ -2621,7 +2613,7 @@ - + @@ -2631,7 +2623,7 @@ - + @@ -2641,7 +2633,7 @@ - + @@ -2676,7 +2668,7 @@ - + @@ -2695,7 +2687,7 @@ - + @@ -2713,7 +2705,7 @@ - + @@ -2762,7 +2754,7 @@ - + @@ -2772,7 +2764,7 @@ - + @@ -2857,7 +2849,7 @@ - + @@ -2893,7 +2885,7 @@ - + @@ -2993,7 +2985,7 @@ - + @@ -3007,7 +2999,7 @@ - + @@ -3053,6 +3045,8 @@ + + @@ -3063,7 +3057,7 @@ - + @@ -3073,7 +3067,7 @@ - + @@ -3082,7 +3076,7 @@ - + @@ -3091,7 +3085,7 @@ - + @@ -3149,7 +3143,7 @@ - + @@ -3213,7 +3207,7 @@ - + @@ -3224,7 +3218,7 @@ - + @@ -3247,7 +3241,7 @@ - + @@ -3255,7 +3249,7 @@ - + @@ -3270,7 +3264,7 @@ - + @@ -3314,27 +3308,27 @@ - + - + - + - + - + - + - + @@ -3345,16 +3339,16 @@ - + - + - + @@ -3546,11 +3540,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mesa-18.3.3/src/intel/genxml/gen45.xml mesa-19.0.1/src/intel/genxml/gen45.xml --- mesa-18.3.3/src/intel/genxml/gen45.xml 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen45.xml 2019-03-31 23:16:37.000000000 +0000 @@ -619,7 +619,7 @@ - + @@ -638,7 +638,7 @@ - + @@ -650,7 +650,7 @@ - + @@ -663,7 +663,7 @@ - + @@ -675,7 +675,7 @@ - + @@ -725,7 +725,7 @@ - + @@ -739,7 +739,7 @@ - + @@ -748,7 +748,7 @@ - + @@ -764,7 +764,7 @@ - + @@ -778,7 +778,7 @@ - + @@ -794,7 +794,7 @@ - + @@ -804,7 +804,7 @@ - + @@ -815,7 +815,7 @@ - + @@ -826,7 +826,7 @@ - + @@ -837,7 +837,7 @@ - + @@ -845,7 +845,7 @@ - + @@ -855,7 +855,7 @@ - + @@ -866,7 +866,7 @@ - + @@ -918,7 +918,7 @@ - + @@ -944,7 +944,7 @@ - + @@ -955,7 +955,7 @@ - + @@ -973,7 +973,7 @@ - + @@ -982,7 +982,7 @@ - + @@ -1002,7 +1002,7 @@ - + @@ -1025,7 +1025,7 @@ - + @@ -1051,7 +1051,7 @@ - + @@ -1078,7 +1078,7 @@ - + diff -Nru mesa-18.3.3/src/intel/genxml/gen4.xml mesa-19.0.1/src/intel/genxml/gen4.xml --- mesa-18.3.3/src/intel/genxml/gen4.xml 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen4.xml 2019-03-31 23:16:37.000000000 +0000 @@ -602,7 +602,7 @@ - + @@ -622,7 +622,7 @@ - + @@ -635,7 +635,7 @@ - + @@ -647,7 +647,7 @@ - + @@ -695,7 +695,7 @@ - + @@ -709,7 +709,7 @@ - + @@ -718,7 +718,7 @@ - + @@ -734,7 +734,7 @@ - + @@ -748,7 +748,7 @@ - + @@ -764,7 +764,7 @@ - + @@ -774,7 +774,7 @@ - + @@ -785,7 +785,7 @@ - + @@ -796,7 +796,7 @@ - + @@ -807,7 +807,7 @@ - + @@ -815,7 +815,7 @@ - + @@ -825,7 +825,7 @@ - + @@ -836,7 +836,7 @@ - + @@ -888,7 +888,7 @@ - + @@ -911,7 +911,7 @@ - + @@ -922,7 +922,7 @@ - + @@ -940,7 +940,7 @@ - + @@ -969,7 +969,7 @@ - + @@ -992,7 +992,7 @@ - + @@ -1018,7 +1018,7 @@ - + @@ -1045,7 +1045,7 @@ - + diff -Nru mesa-18.3.3/src/intel/genxml/gen5.xml mesa-19.0.1/src/intel/genxml/gen5.xml --- mesa-18.3.3/src/intel/genxml/gen5.xml 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen5.xml 2019-03-31 23:16:37.000000000 +0000 @@ -441,7 +441,7 @@ - + @@ -720,7 +720,7 @@ - + @@ -739,7 +739,7 @@ - + @@ -751,7 +751,7 @@ - + @@ -764,7 +764,7 @@ - + @@ -774,7 +774,7 @@ - + @@ -786,7 +786,7 @@ - + @@ -800,7 +800,7 @@ - + @@ -809,7 +809,7 @@ - + @@ -819,7 +819,7 @@ - + @@ -835,7 +835,7 @@ - + @@ -849,7 +849,7 @@ - + @@ -859,7 +859,7 @@ - + @@ -870,7 +870,7 @@ - + @@ -886,7 +886,7 @@ - + @@ -896,7 +896,7 @@ - + @@ -907,7 +907,7 @@ - + @@ -918,7 +918,7 @@ - + @@ -926,7 +926,7 @@ - + @@ -936,7 +936,7 @@ - + @@ -947,7 +947,7 @@ - + @@ -1000,7 +1000,7 @@ - + @@ -1031,7 +1031,7 @@ - + @@ -1043,7 +1043,7 @@ - + @@ -1065,7 +1065,7 @@ i - + @@ -1074,7 +1074,7 @@ - + @@ -1094,7 +1094,7 @@ - + @@ -1117,7 +1117,7 @@ - + @@ -1143,7 +1143,7 @@ - + @@ -1170,7 +1170,7 @@ - + diff -Nru mesa-18.3.3/src/intel/genxml/gen6.xml mesa-19.0.1/src/intel/genxml/gen6.xml --- mesa-18.3.3/src/intel/genxml/gen6.xml 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen6.xml 2019-03-31 23:16:37.000000000 +0000 @@ -126,8 +126,7 @@ - - + @@ -384,7 +383,6 @@ - @@ -494,7 +492,7 @@ - + @@ -516,7 +514,7 @@ - + @@ -528,7 +526,7 @@ - + @@ -542,7 +540,7 @@ - + @@ -556,7 +554,7 @@ - + @@ -567,7 +565,7 @@ - + @@ -577,7 +575,7 @@ - + @@ -633,7 +631,7 @@ - + @@ -642,12 +640,12 @@ - + - + @@ -656,12 +654,12 @@ - + - + @@ -670,12 +668,12 @@ - + - + @@ -720,11 +718,10 @@ - - + - + @@ -738,7 +735,7 @@ - + @@ -786,7 +783,7 @@ - + @@ -798,25 +795,23 @@ - + - - + - + - - + @@ -828,7 +823,7 @@ - + @@ -842,7 +837,7 @@ - + @@ -852,7 +847,7 @@ - + @@ -876,7 +871,7 @@ - + @@ -886,7 +881,7 @@ - + @@ -897,7 +892,7 @@ - + @@ -908,7 +903,7 @@ - + @@ -922,7 +917,7 @@ - + @@ -936,7 +931,7 @@ - + @@ -945,7 +940,7 @@ - + @@ -954,7 +949,7 @@ - + @@ -1062,19 +1057,18 @@ - + - - + - + @@ -1086,7 +1080,7 @@ - + @@ -1097,7 +1091,7 @@ - + @@ -1108,7 +1102,7 @@ - + @@ -1116,7 +1110,7 @@ - + @@ -1130,7 +1124,7 @@ - + @@ -1164,7 +1158,7 @@ - + @@ -1257,7 +1251,7 @@ - + @@ -1267,7 +1261,7 @@ - + @@ -1278,7 +1272,7 @@ - + @@ -1288,7 +1282,7 @@ - + @@ -1315,7 +1309,7 @@ - + @@ -1333,7 +1327,7 @@ - + @@ -1383,7 +1377,7 @@ - + @@ -1396,7 +1390,7 @@ - + @@ -1473,7 +1467,7 @@ - + @@ -1495,7 +1489,7 @@ - + @@ -1527,7 +1521,7 @@ - + @@ -1564,7 +1558,7 @@ - + @@ -1611,7 +1605,7 @@ - + @@ -1624,7 +1618,7 @@ - + @@ -1642,7 +1636,7 @@ - + @@ -1653,7 +1647,7 @@ - + @@ -1689,33 +1683,27 @@ - + - - - - - - @@ -1728,7 +1716,7 @@ - + @@ -1738,7 +1726,7 @@ - + diff -Nru mesa-18.3.3/src/intel/genxml/gen75.xml mesa-19.0.1/src/intel/genxml/gen75.xml --- mesa-18.3.3/src/intel/genxml/gen75.xml 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen75.xml 2019-03-31 23:16:37.000000000 +0000 @@ -165,7 +165,7 @@ - + @@ -188,8 +188,7 @@ - - + @@ -463,7 +462,6 @@ - @@ -654,7 +652,7 @@ - + @@ -676,7 +674,7 @@ - + @@ -688,7 +686,7 @@ - + @@ -705,7 +703,7 @@ - + @@ -722,7 +720,7 @@ - + @@ -739,7 +737,7 @@ - + @@ -756,7 +754,7 @@ - + @@ -773,7 +771,7 @@ - + @@ -782,7 +780,7 @@ - + @@ -791,7 +789,7 @@ - + @@ -800,7 +798,7 @@ - + @@ -809,7 +807,7 @@ - + @@ -818,7 +816,7 @@ - + @@ -826,11 +824,11 @@ - + - + @@ -840,7 +838,7 @@ - + @@ -850,7 +848,7 @@ - + @@ -861,7 +859,7 @@ - + @@ -871,7 +869,7 @@ - + @@ -924,7 +922,7 @@ - + @@ -933,7 +931,7 @@ - + @@ -942,7 +940,7 @@ - + @@ -951,7 +949,7 @@ - + @@ -960,7 +958,7 @@ - + @@ -969,7 +967,7 @@ - + @@ -999,14 +997,13 @@ - - + - + @@ -1016,7 +1013,7 @@ - + @@ -1035,7 +1032,7 @@ - + @@ -1074,7 +1071,7 @@ - + @@ -1089,7 +1086,7 @@ - + @@ -1104,7 +1101,7 @@ - + @@ -1119,7 +1116,7 @@ - + @@ -1135,7 +1132,7 @@ - + @@ -1151,7 +1148,7 @@ - + @@ -1160,11 +1157,11 @@ - + - + @@ -1227,19 +1224,18 @@ - + - - + - + @@ -1279,13 +1275,12 @@ - + - - + @@ -1296,7 +1291,7 @@ - + @@ -1310,7 +1305,7 @@ - + @@ -1320,7 +1315,7 @@ - + @@ -1354,7 +1349,7 @@ - + @@ -1364,7 +1359,7 @@ - + @@ -1375,7 +1370,7 @@ - + @@ -1432,7 +1427,7 @@ - + @@ -1442,7 +1437,7 @@ - + @@ -1452,7 +1447,7 @@ - + @@ -1462,7 +1457,7 @@ - + @@ -1472,7 +1467,7 @@ - + @@ -1482,7 +1477,7 @@ - + @@ -1529,7 +1524,7 @@ - + @@ -1540,7 +1535,7 @@ - + @@ -1554,7 +1549,7 @@ - + @@ -1563,7 +1558,7 @@ - + @@ -1572,7 +1567,7 @@ - + @@ -1581,7 +1576,7 @@ - + @@ -1590,7 +1585,7 @@ - + @@ -1599,7 +1594,7 @@ - + @@ -1608,7 +1603,7 @@ - + @@ -1646,7 +1641,7 @@ - + @@ -1655,7 +1650,7 @@ - + @@ -1718,7 +1713,10 @@ - + + + + @@ -1729,21 +1727,20 @@ - + - - + - + @@ -1762,20 +1759,19 @@ - + - - + - + @@ -1803,7 +1799,7 @@ - + @@ -1834,7 +1830,7 @@ - + @@ -1845,7 +1841,7 @@ - + @@ -1856,7 +1852,7 @@ - + @@ -1867,7 +1863,7 @@ - + @@ -1878,7 +1874,7 @@ - + @@ -1889,7 +1885,7 @@ - + @@ -1900,7 +1896,7 @@ - + @@ -1910,7 +1906,7 @@ - + @@ -1918,7 +1914,7 @@ - + @@ -1927,7 +1923,7 @@ - + @@ -1936,7 +1932,7 @@ - + @@ -1975,7 +1971,7 @@ - + @@ -2045,7 +2041,7 @@ - + @@ -2054,7 +2050,7 @@ - + @@ -2082,7 +2078,7 @@ - + @@ -2109,7 +2105,7 @@ - + @@ -2119,7 +2115,7 @@ - + @@ -2129,7 +2125,7 @@ - + @@ -2166,7 +2162,7 @@ - + @@ -2184,7 +2180,7 @@ - + @@ -2233,7 +2229,7 @@ - + @@ -2245,7 +2241,7 @@ - + @@ -2331,7 +2327,7 @@ - + @@ -2354,7 +2350,7 @@ - + @@ -2404,7 +2400,7 @@ - + @@ -2417,7 +2413,7 @@ - + @@ -2438,7 +2434,7 @@ - + @@ -2469,6 +2465,8 @@ + + @@ -2479,7 +2477,7 @@ - + @@ -2489,7 +2487,7 @@ - + @@ -2498,7 +2496,7 @@ - + @@ -2507,7 +2505,7 @@ - + @@ -2530,7 +2528,7 @@ - + @@ -2578,7 +2576,7 @@ - + @@ -2598,14 +2596,14 @@ - + - + @@ -2618,7 +2616,7 @@ - + @@ -2641,7 +2639,7 @@ - + @@ -2653,7 +2651,7 @@ - + @@ -2695,27 +2693,27 @@ - + - - + + - + - + - + - + @@ -2727,7 +2725,7 @@ - + @@ -2737,7 +2735,7 @@ - + @@ -2746,14 +2744,14 @@ - + - + @@ -2972,6 +2970,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mesa-18.3.3/src/intel/genxml/gen7.xml mesa-19.0.1/src/intel/genxml/gen7.xml --- mesa-18.3.3/src/intel/genxml/gen7.xml 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen7.xml 2019-03-31 23:16:37.000000000 +0000 @@ -157,7 +157,7 @@ - + @@ -169,8 +169,7 @@ - - + @@ -443,7 +442,6 @@ - @@ -568,7 +566,7 @@ - + @@ -589,7 +587,7 @@ - + @@ -601,7 +599,7 @@ - + @@ -610,7 +608,7 @@ - + @@ -619,7 +617,7 @@ - + @@ -628,7 +626,7 @@ - + @@ -637,7 +635,7 @@ - + @@ -646,7 +644,7 @@ - + @@ -656,7 +654,7 @@ - + @@ -666,7 +664,7 @@ - + @@ -677,7 +675,7 @@ - + @@ -687,7 +685,7 @@ - + @@ -740,7 +738,7 @@ - + @@ -749,7 +747,7 @@ - + @@ -758,7 +756,7 @@ - + @@ -767,7 +765,7 @@ - + @@ -776,7 +774,7 @@ - + @@ -785,7 +783,7 @@ - + @@ -815,14 +813,13 @@ - - + - + @@ -832,7 +829,7 @@ - + @@ -846,7 +843,7 @@ - + @@ -881,7 +878,7 @@ - + @@ -943,19 +940,18 @@ - + - - + - + @@ -991,13 +987,12 @@ - + - - + @@ -1009,7 +1004,7 @@ - + @@ -1023,7 +1018,7 @@ - + @@ -1033,7 +1028,7 @@ - + @@ -1066,7 +1061,7 @@ - + @@ -1076,7 +1071,7 @@ - + @@ -1087,7 +1082,7 @@ - + @@ -1139,7 +1134,7 @@ - + @@ -1153,7 +1148,7 @@ - + @@ -1167,7 +1162,7 @@ - + @@ -1181,7 +1176,7 @@ - + @@ -1195,7 +1190,7 @@ - + @@ -1209,7 +1204,7 @@ - + @@ -1220,7 +1215,7 @@ - + @@ -1234,7 +1229,7 @@ - + @@ -1243,7 +1238,7 @@ - + @@ -1252,7 +1247,7 @@ - + @@ -1261,7 +1256,7 @@ - + @@ -1270,7 +1265,7 @@ - + @@ -1279,7 +1274,7 @@ - + @@ -1288,7 +1283,7 @@ - + @@ -1329,7 +1324,7 @@ - + @@ -1338,7 +1333,7 @@ - + @@ -1404,7 +1399,10 @@ - + + + + @@ -1415,21 +1413,20 @@ - + - - + - + @@ -1448,19 +1445,18 @@ - + - - + - + @@ -1488,7 +1484,7 @@ - + @@ -1519,7 +1515,7 @@ - + @@ -1530,7 +1526,7 @@ - + @@ -1541,7 +1537,7 @@ - + @@ -1552,7 +1548,7 @@ - + @@ -1563,7 +1559,7 @@ - + @@ -1574,7 +1570,7 @@ - + @@ -1585,7 +1581,7 @@ - + @@ -1593,7 +1589,7 @@ - + @@ -1602,7 +1598,7 @@ - + @@ -1611,7 +1607,7 @@ - + @@ -1645,7 +1641,7 @@ - + @@ -1710,7 +1706,7 @@ - + @@ -1734,7 +1730,7 @@ - + @@ -1761,7 +1757,7 @@ - + @@ -1771,7 +1767,7 @@ - + @@ -1781,7 +1777,7 @@ - + @@ -1813,7 +1809,7 @@ - + @@ -1831,7 +1827,7 @@ - + @@ -1881,7 +1877,7 @@ - + @@ -1891,7 +1887,7 @@ - + @@ -1971,7 +1967,7 @@ - + @@ -1994,7 +1990,7 @@ - + @@ -2058,6 +2054,8 @@ + + @@ -2068,7 +2066,7 @@ - + @@ -2141,7 +2139,7 @@ - + @@ -2154,7 +2152,7 @@ - + @@ -2177,7 +2175,7 @@ - + @@ -2189,7 +2187,7 @@ - + @@ -2231,28 +2229,28 @@ - + - - + + - + - + - + - + @@ -2264,7 +2262,7 @@ - + @@ -2274,7 +2272,7 @@ - + @@ -2283,14 +2281,14 @@ - + - + @@ -2489,6 +2487,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mesa-18.3.3/src/intel/genxml/gen8.xml mesa-19.0.1/src/intel/genxml/gen8.xml --- mesa-18.3.3/src/intel/genxml/gen8.xml 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen8.xml 2019-03-31 23:16:37.000000000 +0000 @@ -216,8 +216,7 @@ - - + @@ -462,7 +461,6 @@ - @@ -718,7 +716,7 @@ - + @@ -740,7 +738,7 @@ - + @@ -756,7 +754,7 @@ - + @@ -773,7 +771,7 @@ - + @@ -790,7 +788,7 @@ - + @@ -807,7 +805,7 @@ - + @@ -824,7 +822,7 @@ - + @@ -841,7 +839,7 @@ - + @@ -850,7 +848,7 @@ - + @@ -859,7 +857,7 @@ - + @@ -868,7 +866,7 @@ - + @@ -877,7 +875,7 @@ - + @@ -886,7 +884,7 @@ - + @@ -894,13 +892,13 @@ - + - + @@ -910,7 +908,7 @@ - + @@ -920,7 +918,7 @@ - + @@ -931,7 +929,7 @@ - + @@ -941,7 +939,7 @@ - + @@ -981,57 +979,57 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -1059,13 +1057,12 @@ - - + - + @@ -1084,7 +1081,7 @@ - + @@ -1131,7 +1128,7 @@ - + @@ -1147,7 +1144,7 @@ - + @@ -1163,7 +1160,7 @@ - + @@ -1179,7 +1176,7 @@ - + @@ -1196,7 +1193,7 @@ - + @@ -1213,7 +1210,7 @@ - + @@ -1221,11 +1218,11 @@ - + - + @@ -1293,20 +1290,19 @@ - + - - + - + @@ -1345,7 +1341,7 @@ - + @@ -1356,13 +1352,12 @@ - - + - + @@ -1376,7 +1371,7 @@ - + @@ -1386,7 +1381,7 @@ - + @@ -1400,7 +1395,7 @@ - + @@ -1410,7 +1405,7 @@ - + @@ -1421,7 +1416,7 @@ - + @@ -1479,7 +1474,7 @@ - + @@ -1496,7 +1491,7 @@ - + @@ -1522,7 +1517,7 @@ - + @@ -1532,7 +1527,7 @@ - + @@ -1542,7 +1537,7 @@ - + @@ -1552,7 +1547,7 @@ - + @@ -1562,7 +1557,7 @@ - + @@ -1572,7 +1567,7 @@ - + @@ -1631,7 +1626,7 @@ - + @@ -1642,7 +1637,7 @@ - + @@ -1656,7 +1651,7 @@ - + @@ -1665,7 +1660,7 @@ - + @@ -1674,7 +1669,7 @@ - + @@ -1683,7 +1678,7 @@ - + @@ -1692,7 +1687,7 @@ - + @@ -1701,7 +1696,7 @@ - + @@ -1710,7 +1705,7 @@ - + @@ -1748,7 +1743,7 @@ - + @@ -1773,7 +1768,7 @@ - + @@ -1787,7 +1782,7 @@ - + @@ -1796,7 +1791,7 @@ - + @@ -1821,7 +1816,10 @@ - + + + + @@ -1829,7 +1827,7 @@ - + @@ -1837,8 +1835,7 @@ - - + @@ -1847,7 +1844,7 @@ - + @@ -1866,21 +1863,20 @@ - + - - + - + @@ -1913,7 +1909,7 @@ - + @@ -1944,7 +1940,7 @@ - + @@ -1955,7 +1951,7 @@ - + @@ -1966,7 +1962,7 @@ - + @@ -1977,7 +1973,7 @@ - + @@ -1988,7 +1984,7 @@ - + @@ -1999,7 +1995,7 @@ - + @@ -2010,7 +2006,7 @@ - + @@ -2020,7 +2016,7 @@ - + @@ -2031,7 +2027,7 @@ - + @@ -2055,7 +2051,7 @@ - + @@ -2063,7 +2059,7 @@ - + @@ -2072,7 +2068,7 @@ - + @@ -2081,7 +2077,7 @@ - + @@ -2090,7 +2086,7 @@ - + @@ -2133,7 +2129,7 @@ - + @@ -2190,7 +2186,7 @@ - + @@ -2199,7 +2195,7 @@ - + @@ -2225,7 +2221,7 @@ - + @@ -2247,7 +2243,7 @@ - + @@ -2256,7 +2252,7 @@ - + @@ -2285,7 +2281,7 @@ - + @@ -2295,7 +2291,7 @@ - + @@ -2305,7 +2301,7 @@ - + @@ -2343,7 +2339,7 @@ - + @@ -2378,7 +2374,7 @@ - + @@ -2396,7 +2392,7 @@ - + @@ -2443,7 +2439,7 @@ - + @@ -2454,7 +2450,7 @@ - + @@ -2568,7 +2564,7 @@ - + @@ -2631,7 +2627,7 @@ - + @@ -2644,7 +2640,7 @@ - + @@ -2658,7 +2654,7 @@ - + @@ -2697,6 +2693,8 @@ + + @@ -2707,7 +2705,7 @@ - + @@ -2717,7 +2715,7 @@ - + @@ -2726,7 +2724,7 @@ - + @@ -2735,7 +2733,7 @@ - + @@ -2784,7 +2782,7 @@ - + @@ -2844,7 +2842,7 @@ - + @@ -2858,20 +2856,20 @@ - + - + - + @@ -2884,7 +2882,7 @@ - + @@ -2901,7 +2899,7 @@ - + @@ -2913,7 +2911,7 @@ - + @@ -2955,27 +2953,27 @@ - + - + - + - + - + - + - + @@ -2987,7 +2985,7 @@ - + @@ -2997,7 +2995,7 @@ - + @@ -3006,14 +3004,14 @@ - + - + @@ -3206,6 +3204,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mesa-18.3.3/src/intel/genxml/gen9.xml mesa-19.0.1/src/intel/genxml/gen9.xml --- mesa-18.3.3/src/intel/genxml/gen9.xml 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/genxml/gen9.xml 2019-03-31 23:16:37.000000000 +0000 @@ -219,14 +219,9 @@ - - - - - - + @@ -494,7 +489,6 @@ - @@ -776,7 +770,7 @@ - + @@ -798,7 +792,7 @@ - + @@ -814,7 +808,7 @@ - + @@ -831,7 +825,7 @@ - + @@ -848,7 +842,7 @@ - + @@ -865,7 +859,7 @@ - + @@ -882,7 +876,7 @@ - + @@ -899,7 +893,7 @@ - + @@ -908,7 +902,7 @@ - + @@ -917,7 +911,7 @@ - + @@ -926,7 +920,7 @@ - + @@ -935,7 +929,7 @@ - + @@ -944,7 +938,7 @@ - + @@ -952,13 +946,13 @@ - + - + @@ -968,7 +962,7 @@ - + @@ -978,7 +972,7 @@ - + @@ -989,7 +983,7 @@ - + @@ -999,7 +993,7 @@ - + @@ -1039,57 +1033,57 @@ - + - + - + - + - + - + - + - + - + - + - + @@ -1115,8 +1109,7 @@ - - + @@ -1127,7 +1120,7 @@ - + @@ -1146,7 +1139,7 @@ - + @@ -1194,7 +1187,7 @@ - + @@ -1218,7 +1211,7 @@ - + @@ -1242,7 +1235,7 @@ - + @@ -1266,7 +1259,7 @@ - + @@ -1292,7 +1285,7 @@ - + @@ -1318,7 +1311,7 @@ - + @@ -1326,11 +1319,11 @@ - + - + @@ -1399,20 +1392,19 @@ - + - - + - + @@ -1458,7 +1450,7 @@ - + @@ -1469,13 +1461,12 @@ - - + - + @@ -1489,7 +1480,7 @@ - + @@ -1499,7 +1490,7 @@ - + @@ -1513,7 +1504,7 @@ - + @@ -1523,7 +1514,7 @@ - + @@ -1534,7 +1525,7 @@ - + @@ -1596,7 +1587,7 @@ - + @@ -1613,7 +1604,7 @@ - + @@ -1646,7 +1637,7 @@ - + @@ -1656,7 +1647,7 @@ - + @@ -1666,7 +1657,7 @@ - + @@ -1676,7 +1667,7 @@ - + @@ -1686,7 +1677,7 @@ - + @@ -1696,7 +1687,7 @@ - + @@ -1757,7 +1748,7 @@ - + @@ -1775,7 +1766,7 @@ - + @@ -1786,7 +1777,7 @@ - + @@ -1800,7 +1791,7 @@ - + @@ -1809,7 +1800,7 @@ - + @@ -1818,7 +1809,7 @@ - + @@ -1827,7 +1818,7 @@ - + @@ -1836,7 +1827,7 @@ - + @@ -1845,7 +1836,7 @@ - + @@ -1854,7 +1845,7 @@ - + @@ -1924,7 +1915,7 @@ - + @@ -1957,7 +1948,7 @@ - + @@ -1971,7 +1962,7 @@ - + @@ -1980,7 +1971,7 @@ - + @@ -2004,7 +1995,10 @@ - + + + + @@ -2012,7 +2006,7 @@ - + @@ -2020,8 +2014,7 @@ - - + @@ -2030,7 +2023,7 @@ - + @@ -2049,21 +2042,20 @@ - + - - + - + @@ -2096,7 +2088,7 @@ - + @@ -2126,7 +2118,7 @@ - + @@ -2136,7 +2128,7 @@ - + @@ -2147,7 +2139,7 @@ - + @@ -2158,7 +2150,7 @@ - + @@ -2169,7 +2161,7 @@ - + @@ -2180,7 +2172,7 @@ - + @@ -2191,7 +2183,7 @@ - + @@ -2202,7 +2194,7 @@ - + @@ -2214,7 +2206,7 @@ - + @@ -2254,7 +2246,7 @@ - + @@ -2265,7 +2257,7 @@ - + @@ -2289,7 +2281,7 @@ - + @@ -2297,7 +2289,7 @@ - + @@ -2306,7 +2298,7 @@ - + @@ -2315,7 +2307,7 @@ - + @@ -2324,7 +2316,7 @@ - + @@ -2367,7 +2359,7 @@ - + @@ -2424,7 +2416,7 @@ - + @@ -2433,7 +2425,7 @@ - + @@ -2461,7 +2453,7 @@ - + @@ -2483,7 +2475,7 @@ - + @@ -2492,7 +2484,7 @@ - + @@ -2521,7 +2513,7 @@ - + @@ -2531,7 +2523,7 @@ - + @@ -2541,7 +2533,7 @@ - + @@ -2581,7 +2573,7 @@ - + @@ -2618,7 +2610,7 @@ - + @@ -2636,7 +2628,7 @@ - + @@ -2690,7 +2682,7 @@ - + @@ -2701,7 +2693,7 @@ - + @@ -2811,7 +2803,7 @@ - + @@ -2847,7 +2839,7 @@ - + @@ -2918,7 +2910,7 @@ - + @@ -2931,7 +2923,7 @@ - + @@ -2945,7 +2937,7 @@ - + @@ -2984,6 +2976,8 @@ + + @@ -2994,7 +2988,7 @@ - + @@ -3004,7 +2998,7 @@ - + @@ -3013,7 +3007,7 @@ - + @@ -3022,7 +3016,7 @@ - + @@ -3131,7 +3125,7 @@ - + @@ -3145,13 +3139,13 @@ - + - + @@ -3163,7 +3157,7 @@ - + @@ -3186,7 +3180,7 @@ - + @@ -3201,7 +3195,7 @@ - + @@ -3244,27 +3238,27 @@ - + - + - + - + - + - + - + @@ -3275,12 +3269,12 @@ - + - + @@ -3290,7 +3284,7 @@ - + @@ -3491,6 +3485,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mesa-18.3.3/src/intel/isl/isl.c mesa-19.0.1/src/intel/isl/isl.c --- mesa-18.3.3/src/intel/isl/isl.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/isl/isl.c 2019-03-31 23:16:37.000000000 +0000 @@ -35,6 +35,52 @@ #include "isl_gen9.h" #include "isl_priv.h" +void +isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type) +{ +#ifdef USE_SSE41 + if (copy_type == ISL_MEMCPY_STREAMING_LOAD) { + _isl_memcpy_linear_to_tiled_sse41( + xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, + tiling, copy_type); + return; + } +#endif + + _isl_memcpy_linear_to_tiled( + xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, + tiling, copy_type); +} + +void +isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type) +{ +#ifdef USE_SSE41 + if (copy_type == ISL_MEMCPY_STREAMING_LOAD) { + _isl_memcpy_tiled_to_linear_sse41( + xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, + tiling, copy_type); + return; + } +#endif + + _isl_memcpy_tiled_to_linear( + xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling, + tiling, copy_type); +} + void PRINTFLIKE(3, 4) UNUSED __isl_finishme(const char *file, int line, const char *fmt, ...) { diff -Nru mesa-18.3.3/src/intel/isl/isl_emit_depth_stencil.c mesa-19.0.1/src/intel/isl/isl_emit_depth_stencil.c --- mesa-18.3.3/src/intel/isl/isl_emit_depth_stencil.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/isl/isl_emit_depth_stencil.c 2019-03-31 23:16:37.000000000 +0000 @@ -94,7 +94,7 @@ #endif db.SurfaceBaseAddress = info->depth_address; #if GEN_GEN >= 6 - db.DepthBufferMOCS = info->mocs; + db.MOCS = info->mocs; #endif #if GEN_GEN <= 6 @@ -138,7 +138,7 @@ #endif sb.SurfaceBaseAddress = info->stencil_address; #if GEN_GEN >= 6 - sb.StencilBufferMOCS = info->mocs; + sb.MOCS = info->mocs; #endif sb.SurfacePitch = info->stencil_surf->row_pitch_B - 1; #if GEN_GEN >= 8 @@ -161,7 +161,7 @@ db.HierarchicalDepthBufferEnable = true; hiz.SurfaceBaseAddress = info->hiz_address; - hiz.HierarchicalDepthBufferMOCS = info->mocs; + hiz.MOCS = info->mocs; hiz.SurfacePitch = info->hiz_surf->row_pitch_B - 1; #if GEN_GEN >= 8 /* From the SKL PRM Vol2a: diff -Nru mesa-18.3.3/src/intel/isl/isl_format_layout.csv mesa-19.0.1/src/intel/isl/isl_format_layout.csv --- mesa-18.3.3/src/intel/isl/isl_format_layout.csv 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/isl/isl_format_layout.csv 2019-03-31 23:16:37.000000000 +0000 @@ -211,7 +211,7 @@ R8_SSCALED , 8, 1, 1, 1, ss8, , , , , , , r, linear, R8_USCALED , 8, 1, 1, 1, us8, , , , , , , r, linear, P8_UNORM_PALETTE0 , 8, 1, 1, 1, , , , , , , un8, p, linear, -L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , l, linear, +L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , l, srgb, P8_UNORM_PALETTE1 , 8, 1, 1, 1, , , , , , , un8, p, linear, P4A4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, pa, linear, A4P4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, ap, linear, diff -Nru mesa-18.3.3/src/intel/isl/isl.h mesa-19.0.1/src/intel/isl/isl.h --- mesa-18.3.3/src/intel/isl/isl.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/isl/isl.h 2019-03-31 23:16:37.000000000 +0000 @@ -949,6 +949,12 @@ ISL_MSAA_LAYOUT_ARRAY, }; +typedef enum { + ISL_MEMCPY = 0, + ISL_MEMCPY_BGRA8, + ISL_MEMCPY_STREAMING_LOAD, + ISL_MEMCPY_INVALID, +} isl_memcpy_type; struct isl_device { const struct gen_device_info *info; @@ -2065,6 +2071,32 @@ isl_surf_get_depth_format(const struct isl_device *dev, const struct isl_surf *surf); +/** + * @brief performs a copy from linear to tiled surface + * + */ +void +isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type); + +/** + * @brief performs a copy from tiled to linear surface + * + */ +void +isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type); + #ifdef __cplusplus } #endif diff -Nru mesa-18.3.3/src/intel/isl/isl_priv.h mesa-19.0.1/src/intel/isl/isl_priv.h --- mesa-18.3.3/src/intel/isl/isl_priv.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/src/intel/isl/isl_priv.h 2019-03-31 23:16:37.000000000 +0000 @@ -25,6 +25,7 @@ #define ISL_PRIV_H #include +#include #include #include "dev/gen_device_info.h" @@ -47,6 +48,8 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) +typedef void *(*isl_mem_copy_fn)(void *dest, const void *src, size_t n); + static inline bool isl_is_pow2(uintmax_t n) { @@ -158,6 +161,42 @@ }; } +void +_isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type); + +void +_isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type); + +void +_isl_memcpy_linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type); + +void +_isl_memcpy_tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type); + /* This is useful for adding the isl_prefix to genX functions */ #define __PASTE2(x, y) x ## y #define __PASTE(x, y) __PASTE2(x, y) diff -Nru mesa-18.3.3/src/intel/isl/isl_tiled_memcpy.c mesa-19.0.1/src/intel/isl/isl_tiled_memcpy.c --- mesa-18.3.3/src/intel/isl/isl_tiled_memcpy.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/intel/isl/isl_tiled_memcpy.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,1005 @@ +/* + * Mesa 3-D graphics library + * + * Copyright 2012 Intel Corporation + * Copyright 2013 Google + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chad Versace + * Frank Henigman + */ + +#include + +#include "util/macros.h" +#include "main/macros.h" + +#include "isl_priv.h" + +#if defined(__SSSE3__) +#include +#elif defined(__SSE2__) +#include +#endif + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +#define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b) +#define ALIGN_UP(a, b) ALIGN(a, b) + +/* Tile dimensions. Width and span are in bytes, height is in pixels (i.e. + * unitless). A "span" is the most number of bytes we can copy from linear + * to tiled without needing to calculate a new destination address. + */ +static const uint32_t xtile_width = 512; +static const uint32_t xtile_height = 8; +static const uint32_t xtile_span = 64; +static const uint32_t ytile_width = 128; +static const uint32_t ytile_height = 32; +static const uint32_t ytile_span = 16; + +static inline uint32_t +ror(uint32_t n, uint32_t d) +{ + return (n >> d) | (n << (32 - d)); +} + +static inline uint32_t +bswap32(uint32_t n) +{ +#if defined(HAVE___BUILTIN_BSWAP32) + return __builtin_bswap32(n); +#else + return (n >> 24) | + ((n >> 8) & 0x0000ff00) | + ((n << 8) & 0x00ff0000) | + (n << 24); +#endif +} + +/** + * Copy RGBA to BGRA - swap R and B. + */ +static inline void * +rgba8_copy(void *dst, const void *src, size_t bytes) +{ + uint32_t *d = dst; + uint32_t const *s = src; + + assert(bytes % 4 == 0); + + while (bytes >= 4) { + *d = ror(bswap32(*s), 8); + d += 1; + s += 1; + bytes -= 4; + } + return dst; +} + +#ifdef __SSSE3__ +static const uint8_t rgba8_permutation[16] = + { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 }; + +static inline void +rgba8_copy_16_aligned_dst(void *dst, const void *src) +{ + _mm_store_si128(dst, + _mm_shuffle_epi8(_mm_loadu_si128(src), + *(__m128i *)rgba8_permutation)); +} + +static inline void +rgba8_copy_16_aligned_src(void *dst, const void *src) +{ + _mm_storeu_si128(dst, + _mm_shuffle_epi8(_mm_load_si128(src), + *(__m128i *)rgba8_permutation)); +} + +#elif defined(__SSE2__) +static inline void +rgba8_copy_16_aligned_dst(void *dst, const void *src) +{ + __m128i srcreg, dstreg, agmask, ag, rb, br; + + agmask = _mm_set1_epi32(0xFF00FF00); + srcreg = _mm_loadu_si128((__m128i *)src); + + rb = _mm_andnot_si128(agmask, srcreg); + ag = _mm_and_si128(agmask, srcreg); + br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)), + _MM_SHUFFLE(2, 3, 0, 1)); + dstreg = _mm_or_si128(ag, br); + + _mm_store_si128((__m128i *)dst, dstreg); +} + +static inline void +rgba8_copy_16_aligned_src(void *dst, const void *src) +{ + __m128i srcreg, dstreg, agmask, ag, rb, br; + + agmask = _mm_set1_epi32(0xFF00FF00); + srcreg = _mm_load_si128((__m128i *)src); + + rb = _mm_andnot_si128(agmask, srcreg); + ag = _mm_and_si128(agmask, srcreg); + br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)), + _MM_SHUFFLE(2, 3, 0, 1)); + dstreg = _mm_or_si128(ag, br); + + _mm_storeu_si128((__m128i *)dst, dstreg); +} +#endif + +/** + * Copy RGBA to BGRA - swap R and B, with the destination 16-byte aligned. + */ +static inline void * +rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes) +{ + assert(bytes == 0 || !(((uintptr_t)dst) & 0xf)); + +#if defined(__SSSE3__) || defined(__SSE2__) + if (bytes == 64) { + rgba8_copy_16_aligned_dst(dst + 0, src + 0); + rgba8_copy_16_aligned_dst(dst + 16, src + 16); + rgba8_copy_16_aligned_dst(dst + 32, src + 32); + rgba8_copy_16_aligned_dst(dst + 48, src + 48); + return dst; + } + + while (bytes >= 16) { + rgba8_copy_16_aligned_dst(dst, src); + src += 16; + dst += 16; + bytes -= 16; + } +#endif + + rgba8_copy(dst, src, bytes); + + return dst; +} + +/** + * Copy RGBA to BGRA - swap R and B, with the source 16-byte aligned. + */ +static inline void * +rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes) +{ + assert(bytes == 0 || !(((uintptr_t)src) & 0xf)); + +#if defined(__SSSE3__) || defined(__SSE2__) + if (bytes == 64) { + rgba8_copy_16_aligned_src(dst + 0, src + 0); + rgba8_copy_16_aligned_src(dst + 16, src + 16); + rgba8_copy_16_aligned_src(dst + 32, src + 32); + rgba8_copy_16_aligned_src(dst + 48, src + 48); + return dst; + } + + while (bytes >= 16) { + rgba8_copy_16_aligned_src(dst, src); + src += 16; + dst += 16; + bytes -= 16; + } +#endif + + rgba8_copy(dst, src, bytes); + + return dst; +} + +/** + * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3). + * These ranges are in bytes, i.e. pixels * bytes-per-pixel. + * The first and last ranges must be shorter than a "span" (the longest linear + * stretch within a tile) and the middle must equal a whole number of spans. + * Ranges may be empty. The region copied must land entirely within one tile. + * 'dst' is the start of the tile and 'src' is the corresponding + * address to copy from, though copying begins at (x0, y0). + * To enable swizzling 'swizzle_bit' must be 1<<6, otherwise zero. + * Swizzling flips bit 6 in the copy destination offset, when certain other + * bits are set in it. + */ +typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t y0, uint32_t y1, + char *dst, const char *src, + int32_t linear_pitch, + uint32_t swizzle_bit, + isl_memcpy_type copy_type); + +/** + * Copy texture data from linear to X tile layout. + * + * \copydoc tile_copy_fn + * + * The mem_copy parameters allow the user to specify an alternative mem_copy + * function that, for instance, may do RGBA -> BGRA swizzling. The first + * function must handle any memory alignment while the second function must + * only handle 16-byte alignment in whichever side (source or destination) is + * tiled. + */ +static inline void +linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t y0, uint32_t y1, + char *dst, const char *src, + int32_t src_pitch, + uint32_t swizzle_bit, + isl_mem_copy_fn mem_copy, + isl_mem_copy_fn mem_copy_align16) +{ + /* The copy destination offset for each range copied is the sum of + * an X offset 'x0' or 'xo' and a Y offset 'yo.' + */ + uint32_t xo, yo; + + src += (ptrdiff_t)y0 * src_pitch; + + for (yo = y0 * xtile_width; yo < y1 * xtile_width; yo += xtile_width) { + /* Bits 9 and 10 of the copy destination offset control swizzling. + * Only 'yo' contributes to those bits in the total offset, + * so calculate 'swizzle' just once per row. + * Move bits 9 and 10 three and four places respectively down + * to bit 6 and xor them. + */ + uint32_t swizzle = ((yo >> 3) ^ (yo >> 4)) & swizzle_bit; + + mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0); + + for (xo = x1; xo < x2; xo += xtile_span) { + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span); + } + + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); + + src += src_pitch; + } +} + +/** + * Copy texture data from linear to Y tile layout. + * + * \copydoc tile_copy_fn + */ +static inline void +linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t y0, uint32_t y3, + char *dst, const char *src, + int32_t src_pitch, + uint32_t swizzle_bit, + isl_mem_copy_fn mem_copy, + isl_mem_copy_fn mem_copy_align16) +{ + /* Y tiles consist of columns that are 'ytile_span' wide (and the same height + * as the tile). Thus the destination offset for (x,y) is the sum of: + * (x % column_width) // position within column + * (x / column_width) * bytes_per_column // column number * bytes per column + * y * column_width + * + * The copy destination offset for each range copied is the sum of + * an X offset 'xo0' or 'xo' and a Y offset 'yo.' + */ + const uint32_t column_width = ytile_span; + const uint32_t bytes_per_column = column_width * ytile_height; + + uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 4)); + uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 4)); + + uint32_t xo0 = (x0 % ytile_span) + (x0 / ytile_span) * bytes_per_column; + uint32_t xo1 = (x1 % ytile_span) + (x1 / ytile_span) * bytes_per_column; + + /* Bit 9 of the destination offset control swizzling. + * Only the X offset contributes to bit 9 of the total offset, + * so swizzle can be calculated in advance for these X positions. + * Move bit 9 three places down to bit 6. + */ + uint32_t swizzle0 = (xo0 >> 3) & swizzle_bit; + uint32_t swizzle1 = (xo1 >> 3) & swizzle_bit; + + uint32_t x, yo; + + src += (ptrdiff_t)y0 * src_pitch; + + if (y0 != y1) { + for (yo = y0 * column_width; yo < y1 * column_width; yo += column_width) { + uint32_t xo = xo1; + uint32_t swizzle = swizzle1; + + mem_copy(dst + ((xo0 + yo) ^ swizzle0), src + x0, x1 - x0); + + /* Step by spans/columns. As it happens, the swizzle bit flips + * at each step so we don't need to calculate it explicitly. + */ + for (x = x1; x < x2; x += ytile_span) { + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span); + xo += bytes_per_column; + swizzle ^= swizzle_bit; + } + + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); + + src += src_pitch; + } + } + + for (yo = y1 * column_width; yo < y2 * column_width; yo += 4 * column_width) { + uint32_t xo = xo1; + uint32_t swizzle = swizzle1; + + if (x0 != x1) { + mem_copy(dst + ((xo0 + yo + 0 * column_width) ^ swizzle0), src + x0 + 0 * src_pitch, x1 - x0); + mem_copy(dst + ((xo0 + yo + 1 * column_width) ^ swizzle0), src + x0 + 1 * src_pitch, x1 - x0); + mem_copy(dst + ((xo0 + yo + 2 * column_width) ^ swizzle0), src + x0 + 2 * src_pitch, x1 - x0); + mem_copy(dst + ((xo0 + yo + 3 * column_width) ^ swizzle0), src + x0 + 3 * src_pitch, x1 - x0); + } + + /* Step by spans/columns. As it happens, the swizzle bit flips + * at each step so we don't need to calculate it explicitly. + */ + for (x = x1; x < x2; x += ytile_span) { + mem_copy_align16(dst + ((xo + yo + 0 * column_width) ^ swizzle), src + x + 0 * src_pitch, ytile_span); + mem_copy_align16(dst + ((xo + yo + 1 * column_width) ^ swizzle), src + x + 1 * src_pitch, ytile_span); + mem_copy_align16(dst + ((xo + yo + 2 * column_width) ^ swizzle), src + x + 2 * src_pitch, ytile_span); + mem_copy_align16(dst + ((xo + yo + 3 * column_width) ^ swizzle), src + x + 3 * src_pitch, ytile_span); + xo += bytes_per_column; + swizzle ^= swizzle_bit; + } + + if (x2 != x3) { + mem_copy_align16(dst + ((xo + yo + 0 * column_width) ^ swizzle), src + x2 + 0 * src_pitch, x3 - x2); + mem_copy_align16(dst + ((xo + yo + 1 * column_width) ^ swizzle), src + x2 + 1 * src_pitch, x3 - x2); + mem_copy_align16(dst + ((xo + yo + 2 * column_width) ^ swizzle), src + x2 + 2 * src_pitch, x3 - x2); + mem_copy_align16(dst + ((xo + yo + 3 * column_width) ^ swizzle), src + x2 + 3 * src_pitch, x3 - x2); + } + + src += 4 * src_pitch; + } + + if (y2 != y3) { + for (yo = y2 * column_width; yo < y3 * column_width; yo += column_width) { + uint32_t xo = xo1; + uint32_t swizzle = swizzle1; + + mem_copy(dst + ((xo0 + yo) ^ swizzle0), src + x0, x1 - x0); + + /* Step by spans/columns. As it happens, the swizzle bit flips + * at each step so we don't need to calculate it explicitly. + */ + for (x = x1; x < x2; x += ytile_span) { + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span); + xo += bytes_per_column; + swizzle ^= swizzle_bit; + } + + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); + + src += src_pitch; + } + } +} + +/** + * Copy texture data from X tile layout to linear. + * + * \copydoc tile_copy_fn + */ +static inline void +xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t y0, uint32_t y1, + char *dst, const char *src, + int32_t dst_pitch, + uint32_t swizzle_bit, + isl_mem_copy_fn mem_copy, + isl_mem_copy_fn mem_copy_align16) +{ + /* The copy destination offset for each range copied is the sum of + * an X offset 'x0' or 'xo' and a Y offset 'yo.' + */ + uint32_t xo, yo; + + dst += (ptrdiff_t)y0 * dst_pitch; + + for (yo = y0 * xtile_width; yo < y1 * xtile_width; yo += xtile_width) { + /* Bits 9 and 10 of the copy destination offset control swizzling. + * Only 'yo' contributes to those bits in the total offset, + * so calculate 'swizzle' just once per row. + * Move bits 9 and 10 three and four places respectively down + * to bit 6 and xor them. + */ + uint32_t swizzle = ((yo >> 3) ^ (yo >> 4)) & swizzle_bit; + + mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0); + + for (xo = x1; xo < x2; xo += xtile_span) { + mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span); + } + + mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); + + dst += dst_pitch; + } +} + + /** + * Copy texture data from Y tile layout to linear. + * + * \copydoc tile_copy_fn + */ +static inline void +ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t y0, uint32_t y3, + char *dst, const char *src, + int32_t dst_pitch, + uint32_t swizzle_bit, + isl_mem_copy_fn mem_copy, + isl_mem_copy_fn mem_copy_align16) +{ + /* Y tiles consist of columns that are 'ytile_span' wide (and the same height + * as the tile). Thus the destination offset for (x,y) is the sum of: + * (x % column_width) // position within column + * (x / column_width) * bytes_per_column // column number * bytes per column + * y * column_width + * + * The copy destination offset for each range copied is the sum of + * an X offset 'xo0' or 'xo' and a Y offset 'yo.' + */ + const uint32_t column_width = ytile_span; + const uint32_t bytes_per_column = column_width * ytile_height; + + uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 4)); + uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 4)); + + uint32_t xo0 = (x0 % ytile_span) + (x0 / ytile_span) * bytes_per_column; + uint32_t xo1 = (x1 % ytile_span) + (x1 / ytile_span) * bytes_per_column; + + /* Bit 9 of the destination offset control swizzling. + * Only the X offset contributes to bit 9 of the total offset, + * so swizzle can be calculated in advance for these X positions. + * Move bit 9 three places down to bit 6. + */ + uint32_t swizzle0 = (xo0 >> 3) & swizzle_bit; + uint32_t swizzle1 = (xo1 >> 3) & swizzle_bit; + + uint32_t x, yo; + + dst += (ptrdiff_t)y0 * dst_pitch; + + if (y0 != y1) { + for (yo = y0 * column_width; yo < y1 * column_width; yo += column_width) { + uint32_t xo = xo1; + uint32_t swizzle = swizzle1; + + mem_copy(dst + x0, src + ((xo0 + yo) ^ swizzle0), x1 - x0); + + /* Step by spans/columns. As it happens, the swizzle bit flips + * at each step so we don't need to calculate it explicitly. + */ + for (x = x1; x < x2; x += ytile_span) { + mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span); + xo += bytes_per_column; + swizzle ^= swizzle_bit; + } + + mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); + + dst += dst_pitch; + } + } + + for (yo = y1 * column_width; yo < y2 * column_width; yo += 4 * column_width) { + uint32_t xo = xo1; + uint32_t swizzle = swizzle1; + + if (x0 != x1) { + mem_copy(dst + x0 + 0 * dst_pitch, src + ((xo0 + yo + 0 * column_width) ^ swizzle0), x1 - x0); + mem_copy(dst + x0 + 1 * dst_pitch, src + ((xo0 + yo + 1 * column_width) ^ swizzle0), x1 - x0); + mem_copy(dst + x0 + 2 * dst_pitch, src + ((xo0 + yo + 2 * column_width) ^ swizzle0), x1 - x0); + mem_copy(dst + x0 + 3 * dst_pitch, src + ((xo0 + yo + 3 * column_width) ^ swizzle0), x1 - x0); + } + + /* Step by spans/columns. As it happens, the swizzle bit flips + * at each step so we don't need to calculate it explicitly. + */ + for (x = x1; x < x2; x += ytile_span) { + mem_copy_align16(dst + x + 0 * dst_pitch, src + ((xo + yo + 0 * column_width) ^ swizzle), ytile_span); + mem_copy_align16(dst + x + 1 * dst_pitch, src + ((xo + yo + 1 * column_width) ^ swizzle), ytile_span); + mem_copy_align16(dst + x + 2 * dst_pitch, src + ((xo + yo + 2 * column_width) ^ swizzle), ytile_span); + mem_copy_align16(dst + x + 3 * dst_pitch, src + ((xo + yo + 3 * column_width) ^ swizzle), ytile_span); + xo += bytes_per_column; + swizzle ^= swizzle_bit; + } + + if (x2 != x3) { + mem_copy_align16(dst + x2 + 0 * dst_pitch, src + ((xo + yo + 0 * column_width) ^ swizzle), x3 - x2); + mem_copy_align16(dst + x2 + 1 * dst_pitch, src + ((xo + yo + 1 * column_width) ^ swizzle), x3 - x2); + mem_copy_align16(dst + x2 + 2 * dst_pitch, src + ((xo + yo + 2 * column_width) ^ swizzle), x3 - x2); + mem_copy_align16(dst + x2 + 3 * dst_pitch, src + ((xo + yo + 3 * column_width) ^ swizzle), x3 - x2); + } + + dst += 4 * dst_pitch; + } + + if (y2 != y3) { + for (yo = y2 * column_width; yo < y3 * column_width; yo += column_width) { + uint32_t xo = xo1; + uint32_t swizzle = swizzle1; + + mem_copy(dst + x0, src + ((xo0 + yo) ^ swizzle0), x1 - x0); + + /* Step by spans/columns. As it happens, the swizzle bit flips + * at each step so we don't need to calculate it explicitly. + */ + for (x = x1; x < x2; x += ytile_span) { + mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span); + xo += bytes_per_column; + swizzle ^= swizzle_bit; + } + + mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); + + dst += dst_pitch; + } + } +} + +#if defined(INLINE_SSE41) +static ALWAYS_INLINE void * +_memcpy_streaming_load(void *dest, const void *src, size_t count) +{ + if (count == 16) { + __m128i val = _mm_stream_load_si128((__m128i *)src); + _mm_storeu_si128((__m128i *)dest, val); + return dest; + } else if (count == 64) { + __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0); + __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1); + __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2); + __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3); + _mm_storeu_si128(((__m128i *)dest) + 0, val0); + _mm_storeu_si128(((__m128i *)dest) + 1, val1); + _mm_storeu_si128(((__m128i *)dest) + 2, val2); + _mm_storeu_si128(((__m128i *)dest) + 3, val3); + return dest; + } else { + assert(count < 64); /* and (count < 16) for ytiled */ + return memcpy(dest, src, count); + } +} +#endif + +static isl_mem_copy_fn +choose_copy_function(isl_memcpy_type copy_type) +{ + switch(copy_type) { + case ISL_MEMCPY: + return memcpy; + case ISL_MEMCPY_BGRA8: + return rgba8_copy; + case ISL_MEMCPY_STREAMING_LOAD: +#if defined(INLINE_SSE41) + return _memcpy_streaming_load; +#else + unreachable("ISL_MEMCOPY_STREAMING_LOAD requires sse4.1"); +#endif + case ISL_MEMCPY_INVALID: + unreachable("invalid copy_type"); + } + unreachable("unhandled copy_type"); + return NULL; +} + +/** + * Copy texture data from linear to X tile layout, faster. + * + * Same as \ref linear_to_xtiled but faster, because it passes constant + * parameters for common cases, allowing the compiler to inline code + * optimized for those cases. + * + * \copydoc tile_copy_fn + */ +static FLATTEN void +linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t y0, uint32_t y1, + char *dst, const char *src, + int32_t src_pitch, + uint32_t swizzle_bit, + isl_memcpy_type copy_type) +{ + isl_mem_copy_fn mem_copy = choose_copy_function(copy_type); + + if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) { + if (mem_copy == memcpy) + return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, src_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) + return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, src_pitch, swizzle_bit, + rgba8_copy, rgba8_copy_aligned_dst); + else + unreachable("not reached"); + } else { + if (mem_copy == memcpy) + return linear_to_xtiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, + memcpy, memcpy); + else if (mem_copy == rgba8_copy) + return linear_to_xtiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, + rgba8_copy, rgba8_copy_aligned_dst); + else + unreachable("not reached"); + } + linear_to_xtiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); +} + +/** + * Copy texture data from linear to Y tile layout, faster. + * + * Same as \ref linear_to_ytiled but faster, because it passes constant + * parameters for common cases, allowing the compiler to inline code + * optimized for those cases. + * + * \copydoc tile_copy_fn + */ +static FLATTEN void +linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t y0, uint32_t y1, + char *dst, const char *src, + int32_t src_pitch, + uint32_t swizzle_bit, + isl_memcpy_type copy_type) +{ + isl_mem_copy_fn mem_copy = choose_copy_function(copy_type); + + if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) { + if (mem_copy == memcpy) + return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, src_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) + return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, src_pitch, swizzle_bit, + rgba8_copy, rgba8_copy_aligned_dst); + else + unreachable("not reached"); + } else { + if (mem_copy == memcpy) + return linear_to_ytiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) + return linear_to_ytiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, + rgba8_copy, rgba8_copy_aligned_dst); + else + unreachable("not reached"); + } + linear_to_ytiled(x0, x1, x2, x3, y0, y1, + dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); +} + +/** + * Copy texture data from X tile layout to linear, faster. + * + * Same as \ref xtile_to_linear but faster, because it passes constant + * parameters for common cases, allowing the compiler to inline code + * optimized for those cases. + * + * \copydoc tile_copy_fn + */ +static FLATTEN void +xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t y0, uint32_t y1, + char *dst, const char *src, + int32_t dst_pitch, + uint32_t swizzle_bit, + isl_memcpy_type copy_type) +{ + isl_mem_copy_fn mem_copy = choose_copy_function(copy_type); + + if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) { + if (mem_copy == memcpy) + return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) + return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, dst_pitch, swizzle_bit, + rgba8_copy, rgba8_copy_aligned_src); +#if defined(INLINE_SSE41) + else if (mem_copy == _memcpy_streaming_load) + return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, + dst, src, dst_pitch, swizzle_bit, + memcpy, _memcpy_streaming_load); +#endif + else + unreachable("not reached"); + } else { + if (mem_copy == memcpy) + return xtiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) + return xtiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + rgba8_copy, rgba8_copy_aligned_src); +#if defined(INLINE_SSE41) + else if (mem_copy == _memcpy_streaming_load) + return xtiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + memcpy, _memcpy_streaming_load); +#endif + else + unreachable("not reached"); + } + xtiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); +} + +/** + * Copy texture data from Y tile layout to linear, faster. + * + * Same as \ref ytile_to_linear but faster, because it passes constant + * parameters for common cases, allowing the compiler to inline code + * optimized for those cases. + * + * \copydoc tile_copy_fn + */ +static FLATTEN void +ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, + uint32_t y0, uint32_t y1, + char *dst, const char *src, + int32_t dst_pitch, + uint32_t swizzle_bit, + isl_memcpy_type copy_type) +{ + isl_mem_copy_fn mem_copy = choose_copy_function(copy_type); + + if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) { + if (mem_copy == memcpy) + return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) + return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, dst_pitch, swizzle_bit, + rgba8_copy, rgba8_copy_aligned_src); +#if defined(INLINE_SSE41) + else if (copy_type == ISL_MEMCPY_STREAMING_LOAD) + return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, + dst, src, dst_pitch, swizzle_bit, + memcpy, _memcpy_streaming_load); +#endif + else + unreachable("not reached"); + } else { + if (mem_copy == memcpy) + return ytiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) + return ytiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + rgba8_copy, rgba8_copy_aligned_src); +#if defined(INLINE_SSE41) + else if (copy_type == ISL_MEMCPY_STREAMING_LOAD) + return ytiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, + memcpy, _memcpy_streaming_load); +#endif + else + unreachable("not reached"); + } + ytiled_to_linear(x0, x1, x2, x3, y0, y1, + dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); +} + +/** + * Copy from linear to tiled texture. + * + * Divide the region given by X range [xt1, xt2) and Y range [yt1, yt2) into + * pieces that do not cross tile boundaries and copy each piece with a tile + * copy function (\ref tile_copy_fn). + * The X range is in bytes, i.e. pixels * bytes-per-pixel. + * The Y range is in pixels (i.e. unitless). + * 'dst' is the address of (0, 0) in the destination tiled texture. + * 'src' is the address of (xt1, yt1) in the source linear texture. + */ +static void +intel_linear_to_tiled(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type) +{ + tile_copy_fn tile_copy; + uint32_t xt0, xt3; + uint32_t yt0, yt3; + uint32_t xt, yt; + uint32_t tw, th, span; + uint32_t swizzle_bit = has_swizzling ? 1<<6 : 0; + + if (tiling == ISL_TILING_X) { + tw = xtile_width; + th = xtile_height; + span = xtile_span; + tile_copy = linear_to_xtiled_faster; + } else if (tiling == ISL_TILING_Y0) { + tw = ytile_width; + th = ytile_height; + span = ytile_span; + tile_copy = linear_to_ytiled_faster; + } else { + unreachable("unsupported tiling"); + } + + /* Round out to tile boundaries. */ + xt0 = ALIGN_DOWN(xt1, tw); + xt3 = ALIGN_UP (xt2, tw); + yt0 = ALIGN_DOWN(yt1, th); + yt3 = ALIGN_UP (yt2, th); + + /* Loop over all tiles to which we have something to copy. + * 'xt' and 'yt' are the origin of the destination tile, whether copying + * copying a full or partial tile. + * tile_copy() copies one tile or partial tile. + * Looping x inside y is the faster memory access pattern. + */ + for (yt = yt0; yt < yt3; yt += th) { + for (xt = xt0; xt < xt3; xt += tw) { + /* The area to update is [x0,x3) x [y0,y1). + * May not want the whole tile, hence the min and max. + */ + uint32_t x0 = MAX2(xt1, xt); + uint32_t y0 = MAX2(yt1, yt); + uint32_t x3 = MIN2(xt2, xt + tw); + uint32_t y1 = MIN2(yt2, yt + th); + + /* [x0,x3) is split into [x0,x1), [x1,x2), [x2,x3) such that + * the middle interval is the longest span-aligned part. + * The sub-ranges could be empty. + */ + uint32_t x1, x2; + x1 = ALIGN_UP(x0, span); + if (x1 > x3) + x1 = x2 = x3; + else + x2 = ALIGN_DOWN(x3, span); + + assert(x0 <= x1 && x1 <= x2 && x2 <= x3); + assert(x1 - x0 < span && x3 - x2 < span); + assert(x3 - x0 <= tw); + assert((x2 - x1) % span == 0); + + /* Translate by (xt,yt) for single-tile copier. */ + tile_copy(x0-xt, x1-xt, x2-xt, x3-xt, + y0-yt, y1-yt, + dst + (ptrdiff_t)xt * th + (ptrdiff_t)yt * dst_pitch, + src + (ptrdiff_t)xt - xt1 + ((ptrdiff_t)yt - yt1) * src_pitch, + src_pitch, + swizzle_bit, + copy_type); + } + } +} + +/** + * Copy from tiled to linear texture. + * + * Divide the region given by X range [xt1, xt2) and Y range [yt1, yt2) into + * pieces that do not cross tile boundaries and copy each piece with a tile + * copy function (\ref tile_copy_fn). + * The X range is in bytes, i.e. pixels * bytes-per-pixel. + * The Y range is in pixels (i.e. unitless). + * 'dst' is the address of (xt1, yt1) in the destination linear texture. + * 'src' is the address of (0, 0) in the source tiled texture. + */ +static void +intel_tiled_to_linear(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type) +{ + tile_copy_fn tile_copy; + uint32_t xt0, xt3; + uint32_t yt0, yt3; + uint32_t xt, yt; + uint32_t tw, th, span; + uint32_t swizzle_bit = has_swizzling ? 1<<6 : 0; + + if (tiling == ISL_TILING_X) { + tw = xtile_width; + th = xtile_height; + span = xtile_span; + tile_copy = xtiled_to_linear_faster; + } else if (tiling == ISL_TILING_Y0) { + tw = ytile_width; + th = ytile_height; + span = ytile_span; + tile_copy = ytiled_to_linear_faster; + } else { + unreachable("unsupported tiling"); + } + +#if defined(INLINE_SSE41) + if (copy_type == ISL_MEMCPY_STREAMING_LOAD) { + /* The hidden cacheline sized register used by movntdqa can apparently + * give you stale data, so do an mfence to invalidate it. + */ + _mm_mfence(); + } +#endif + + /* Round out to tile boundaries. */ + xt0 = ALIGN_DOWN(xt1, tw); + xt3 = ALIGN_UP (xt2, tw); + yt0 = ALIGN_DOWN(yt1, th); + yt3 = ALIGN_UP (yt2, th); + + /* Loop over all tiles to which we have something to copy. + * 'xt' and 'yt' are the origin of the destination tile, whether copying + * copying a full or partial tile. + * tile_copy() copies one tile or partial tile. + * Looping x inside y is the faster memory access pattern. + */ + for (yt = yt0; yt < yt3; yt += th) { + for (xt = xt0; xt < xt3; xt += tw) { + /* The area to update is [x0,x3) x [y0,y1). + * May not want the whole tile, hence the min and max. + */ + uint32_t x0 = MAX2(xt1, xt); + uint32_t y0 = MAX2(yt1, yt); + uint32_t x3 = MIN2(xt2, xt + tw); + uint32_t y1 = MIN2(yt2, yt + th); + + /* [x0,x3) is split into [x0,x1), [x1,x2), [x2,x3) such that + * the middle interval is the longest span-aligned part. + * The sub-ranges could be empty. + */ + uint32_t x1, x2; + x1 = ALIGN_UP(x0, span); + if (x1 > x3) + x1 = x2 = x3; + else + x2 = ALIGN_DOWN(x3, span); + + assert(x0 <= x1 && x1 <= x2 && x2 <= x3); + assert(x1 - x0 < span && x3 - x2 < span); + assert(x3 - x0 <= tw); + assert((x2 - x1) % span == 0); + + /* Translate by (xt,yt) for single-tile copier. */ + tile_copy(x0-xt, x1-xt, x2-xt, x3-xt, + y0-yt, y1-yt, + dst + (ptrdiff_t)xt - xt1 + ((ptrdiff_t)yt - yt1) * dst_pitch, + src + (ptrdiff_t)xt * th + (ptrdiff_t)yt * src_pitch, + dst_pitch, + swizzle_bit, + copy_type); + } + } +} diff -Nru mesa-18.3.3/src/intel/isl/isl_tiled_memcpy_normal.c mesa-19.0.1/src/intel/isl/isl_tiled_memcpy_normal.c --- mesa-18.3.3/src/intel/isl/isl_tiled_memcpy_normal.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/intel/isl/isl_tiled_memcpy_normal.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,59 @@ +/* + * Mesa 3-D graphics library + * + * Copyright 2012 Intel Corporation + * Copyright 2013 Google + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chad Versace + * Frank Henigman + */ + + +#include "isl_tiled_memcpy.c" + +void +_isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type) +{ + intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, + has_swizzling, tiling, copy_type); +} + +void +_isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type) +{ + intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, + has_swizzling, tiling, copy_type); +} diff -Nru mesa-18.3.3/src/intel/isl/isl_tiled_memcpy_sse41.c mesa-19.0.1/src/intel/isl/isl_tiled_memcpy_sse41.c --- mesa-18.3.3/src/intel/isl/isl_tiled_memcpy_sse41.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/intel/isl/isl_tiled_memcpy_sse41.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,60 @@ +/* + * Mesa 3-D graphics library + * + * Copyright 2012 Intel Corporation + * Copyright 2013 Google + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chad Versace + * Frank Henigman + */ + +#define INLINE_SSE41 + +#include "isl_tiled_memcpy.c" + +void +_isl_memcpy_linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type) +{ + intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, + has_swizzling, tiling, copy_type); +} + +void +_isl_memcpy_tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type) +{ + intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, + has_swizzling, tiling, copy_type); +} diff -Nru mesa-18.3.3/src/intel/isl/meson.build mesa-19.0.1/src/intel/isl/meson.build --- mesa-18.3.3/src/intel/isl/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/isl/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -69,6 +69,39 @@ command : [prog_python, '@INPUT0@', '--csv', '@INPUT1@', '--out', '@OUTPUT@'], ) +files_isl_tiled_memcpy = files( + 'isl_tiled_memcpy_normal.c' +) + +files_isl_tiled_memcpy_sse41 = files( + 'isl_tiled_memcpy_sse41.c', +) + +isl_tiled_memcpy = static_library( + 'isl_tiled_memcpy', + [files_isl_tiled_memcpy], + include_directories : [ + inc_common, inc_intel, inc_drm_uapi, + ], + c_args : [c_vis_args, no_override_init_args, '-msse2'], + extra_files : ['isl_tiled_memcpy.c'] +) + +if with_sse41 + isl_tiled_memcpy_sse41 = static_library( + 'isl_tiled_memcpy_sse41', + [files_isl_tiled_memcpy_sse41], + include_directories : [ + inc_common, inc_intel, inc_drm_uapi, + ], + link_args : ['-Wl,--exclude-libs=ALL'], + c_args : [c_vis_args, no_override_init_args, '-msse2', sse41_args], + extra_files : ['isl_tiled_memcpy.c'] + ) +else + isl_tiled_memcpy_sse41 = [] +endif + libisl_files = files( 'isl.c', 'isl.h', @@ -83,7 +116,7 @@ 'isl', [libisl_files, isl_format_layout_c, genX_bits_h], include_directories : [inc_common, inc_intel, inc_drm_uapi], - link_with : isl_gen_libs, + link_with : [isl_gen_libs, isl_tiled_memcpy, isl_tiled_memcpy_sse41], c_args : [c_vis_args, no_override_init_args], ) @@ -96,6 +129,7 @@ dependencies : dep_m, include_directories : [inc_common, inc_intel], link_with : [libisl, libintel_dev, libmesa_util], - ) + ), + suite : ['intel'], ) endif diff -Nru mesa-18.3.3/src/intel/Makefile.isl.am mesa-19.0.1/src/intel/Makefile.isl.am --- mesa-18.3.3/src/intel/Makefile.isl.am 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/Makefile.isl.am 2019-03-31 23:16:37.000000000 +0000 @@ -31,11 +31,29 @@ isl/libisl-gen11.la \ $(NULL) -noinst_LTLIBRARIES += $(ISL_GEN_LIBS) isl/libisl.la +noinst_LTLIBRARIES += $(ISL_GEN_LIBS) \ + isl/libisl.la \ + libisl_tiled_memcpy.la + +isl_libisl_la_LIBADD = $(ISL_GEN_LIBS) \ + libisl_tiled_memcpy.la + +if SSE41_SUPPORTED +isl_libisl_la_LIBADD += libisl_tiled_memcpy_sse41.la +noinst_LTLIBRARIES += libisl_tiled_memcpy_sse41.la +endif -isl_libisl_la_LIBADD = $(ISL_GEN_LIBS) isl_libisl_la_SOURCES = $(ISL_FILES) $(ISL_GENERATED_FILES) +libisl_tiled_memcpy_la_SOURCES = $(ISL_TILED_MEMCPY_FILES) +libisl_tiled_memcpy_la_CFLAGS = $(AM_CFLAGS) + +libisl_tiled_memcpy_sse41_la_SOURCES = $(ISL_TILED_MEMCPY_SSE41_FILES) +libisl_tiled_memcpy_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS) + +isl_tiled_memcpy_normal.c: $(ISL_TILED_MEMCPY_DEP_FILES) +isl_tiled_memcpy_sse41.c: $(ISL_TILED_MEMCPY_DEP_FILES) + isl_libisl_gen4_la_SOURCES = $(ISL_GEN4_FILES) isl_libisl_gen4_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=40 @@ -90,4 +108,5 @@ EXTRA_DIST += \ isl/gen_format_layout.py \ isl/isl_format_layout.csv \ - isl/README + isl/README \ + $(ISL_TILED_MEMCPY_DEP_FILES) diff -Nru mesa-18.3.3/src/intel/Makefile.sources mesa-19.0.1/src/intel/Makefile.sources --- mesa-18.3.3/src/intel/Makefile.sources 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -62,8 +62,8 @@ compiler/brw_fs.h \ compiler/brw_fs_live_variables.cpp \ compiler/brw_fs_live_variables.h \ - compiler/brw_fs_lower_conversions.cpp \ compiler/brw_fs_lower_pack.cpp \ + compiler/brw_fs_lower_regioning.cpp \ compiler/brw_fs_nir.cpp \ compiler/brw_fs_reg_allocate.cpp \ compiler/brw_fs_register_coalesce.cpp \ @@ -85,6 +85,7 @@ compiler/brw_nir_attribute_workarounds.c \ compiler/brw_nir_lower_cs_intrinsics.c \ compiler/brw_nir_lower_image_load_store.c \ + compiler/brw_nir_lower_mem_access_bit_sizes.c \ compiler/brw_nir_opt_peephole_ffma.c \ compiler/brw_nir_tcs_workarounds.c \ compiler/brw_packed_float.c \ @@ -218,8 +219,18 @@ ISL_GENERATED_FILES = \ isl/isl_format_layout.c +ISL_TILED_MEMCPY_FILES = \ + isl/isl_tiled_memcpy_normal.c + +ISL_TILED_MEMCPY_SSE41_FILES = \ + isl/isl_tiled_memcpy_sse41.c + +ISL_TILED_MEMCPY_DEP_FILES = \ + isl/isl_tiled_memcpy.c + VULKAN_FILES := \ vulkan/anv_allocator.c \ + vulkan/anv_android.h \ vulkan/anv_batch_chain.c \ vulkan/anv_blorp.c \ vulkan/anv_cmd_buffer.c \ @@ -246,6 +257,9 @@ vulkan/anv_wsi.c \ vulkan/vk_format_info.h +VULKAN_NON_ANDROID_FILES := \ + vulkan/anv_android_stubs.c + VULKAN_ANDROID_FILES := \ vulkan/anv_android.c diff -Nru mesa-18.3.3/src/intel/Makefile.vulkan.am mesa-19.0.1/src/intel/Makefile.vulkan.am --- mesa-18.3.3/src/intel/Makefile.vulkan.am 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/Makefile.vulkan.am 2019-03-31 23:16:37.000000000 +0000 @@ -171,6 +171,8 @@ VULKAN_CFLAGS += $(ANDROID_CFLAGS) VULKAN_LIB_DEPS += $(ANDROID_LIBS) VULKAN_SOURCES += $(VULKAN_ANDROID_FILES) +else +VULKAN_SOURCES += $(VULKAN_NON_ANDROID_FILES) endif if HAVE_PLATFORM_X11 @@ -251,6 +253,7 @@ vulkan/tests/block_pool_no_free \ vulkan/tests/state_pool_no_free \ vulkan/tests/state_pool_free_list_only \ + vulkan/tests/state_pool_padding \ vulkan/tests/state_pool VULKAN_TEST_LDADD = \ @@ -260,15 +263,23 @@ check_PROGRAMS += $(VULKAN_TESTS) TESTS += $(VULKAN_TESTS) +vulkan_tests_block_pool_no_free_CFLAGS = $(VULKAN_CFLAGS) vulkan_tests_block_pool_no_free_CPPFLAGS = $(VULKAN_CPPFLAGS) vulkan_tests_block_pool_no_free_LDADD = $(VULKAN_TEST_LDADD) +vulkan_tests_state_pool_no_free_CFLAGS = $(VULKAN_CFLAGS) vulkan_tests_state_pool_no_free_CPPFLAGS = $(VULKAN_CPPFLAGS) vulkan_tests_state_pool_no_free_LDADD = $(VULKAN_TEST_LDADD) +vulkan_tests_state_pool_free_list_only_CFLAGS = $(VULKAN_CFLAGS) vulkan_tests_state_pool_free_list_only_CPPFLAGS = $(VULKAN_CPPFLAGS) vulkan_tests_state_pool_free_list_only_LDADD = $(VULKAN_TEST_LDADD) +vulkan_tests_state_pool_padding_CFLAGS = $(VULKAN_CFLAGS) +vulkan_tests_state_pool_padding_CPPFLAGS = $(VULKAN_CPPFLAGS) +vulkan_tests_state_pool_padding_LDADD = $(VULKAN_TEST_LDADD) + +vulkan_tests_state_pool_CFLAGS = $(VULKAN_CFLAGS) vulkan_tests_state_pool_CPPFLAGS = $(VULKAN_CPPFLAGS) vulkan_tests_state_pool_LDADD = $(VULKAN_TEST_LDADD) diff -Nru mesa-18.3.3/src/intel/tools/aubinator.c mesa-19.0.1/src/intel/tools/aubinator.c --- mesa-18.3.3/src/intel/tools/aubinator.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/aubinator.c 2019-03-31 23:16:37.000000000 +0000 @@ -40,7 +40,6 @@ #include "util/macros.h" -#include "common/gen_decoder.h" #include "aub_read.h" #include "aub_mem.h" @@ -131,7 +130,7 @@ } static void -handle_execlist_write(void *user_data, enum gen_engine engine, uint64_t context_descriptor) +handle_execlist_write(void *user_data, enum drm_i915_gem_engine_class engine, uint64_t context_descriptor) { const uint32_t pphwsp_size = 4096; uint32_t pphwsp_addr = context_descriptor & 0xfffff000; @@ -143,6 +142,7 @@ uint32_t ring_buffer_head = context[5]; uint32_t ring_buffer_tail = context[7]; uint32_t ring_buffer_start = context[9]; + uint32_t ring_buffer_length = (context[11] & 0x1ff000) + 4096; mem.pml4 = (uint64_t)context[49] << 32 | context[51]; batch_ctx.user_data = &mem; @@ -150,7 +150,7 @@ struct gen_batch_decode_bo ring_bo = aub_mem_get_ggtt_bo(&mem, ring_buffer_start); assert(ring_bo.size > 0); - void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr); + void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr) + ring_buffer_head; if (context_descriptor & 0x100 /* ppgtt */) { batch_ctx.get_bo = aub_mem_get_ppgtt_bo; @@ -158,19 +158,21 @@ batch_ctx.get_bo = aub_mem_get_ggtt_bo; } - (void)engine; /* TODO */ - gen_print_batch(&batch_ctx, commands, ring_buffer_tail - ring_buffer_head, - 0); + batch_ctx.engine = engine; + gen_print_batch(&batch_ctx, commands, + MIN2(ring_buffer_tail - ring_buffer_head, ring_buffer_length), + ring_bo.addr + ring_buffer_head); aub_mem_clear_bo_maps(&mem); } static void -handle_ring_write(void *user_data, enum gen_engine engine, +handle_ring_write(void *user_data, enum drm_i915_gem_engine_class engine, const void *data, uint32_t data_len) { batch_ctx.user_data = &mem; batch_ctx.get_bo = aub_mem_get_ggtt_bo; + batch_ctx.engine = engine; gen_print_batch(&batch_ctx, data, data_len, 0); aub_mem_clear_bo_maps(&mem); diff -Nru mesa-18.3.3/src/intel/tools/aubinator_error_decode.c mesa-19.0.1/src/intel/tools/aubinator_error_decode.c --- mesa-18.3.3/src/intel/tools/aubinator_error_decode.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/aubinator_error_decode.c 2019-03-31 23:16:37.000000000 +0000 @@ -76,49 +76,42 @@ } struct ring_register_mapping { - unsigned ring_class; + enum drm_i915_gem_engine_class ring_class; unsigned ring_instance; const char *register_name; }; -enum { - RCS, - BCS, - VCS, - VECS, -}; - static const struct ring_register_mapping acthd_registers[] = { - { BCS, 0, "BCS_ACTHD_UDW" }, - { VCS, 0, "VCS_ACTHD_UDW" }, - { VCS, 1, "VCS2_ACTHD_UDW" }, - { RCS, 0, "ACTHD_UDW" }, - { VECS, 0, "VECS_ACTHD_UDW" }, + { I915_ENGINE_CLASS_COPY, 0, "BCS_ACTHD_UDW" }, + { I915_ENGINE_CLASS_VIDEO, 0, "VCS_ACTHD_UDW" }, + { I915_ENGINE_CLASS_VIDEO, 1, "VCS2_ACTHD_UDW" }, + { I915_ENGINE_CLASS_RENDER, 0, "ACTHD_UDW" }, + { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "VECS_ACTHD_UDW" }, }; static const struct ring_register_mapping ctl_registers[] = { - { BCS, 0, "BCS_RING_BUFFER_CTL" }, - { VCS, 0, "VCS_RING_BUFFER_CTL" }, - { VCS, 1, "VCS2_RING_BUFFER_CTL" }, - { RCS, 0, "RCS_RING_BUFFER_CTL" }, - { VECS, 0, "VECS_RING_BUFFER_CTL" }, + { I915_ENGINE_CLASS_COPY, 0, "BCS_RING_BUFFER_CTL" }, + { I915_ENGINE_CLASS_VIDEO, 0, "VCS_RING_BUFFER_CTL" }, + { I915_ENGINE_CLASS_VIDEO, 1, "VCS2_RING_BUFFER_CTL" }, + { I915_ENGINE_CLASS_RENDER, 0, "RCS_RING_BUFFER_CTL" }, + { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "VECS_RING_BUFFER_CTL" }, }; static const struct ring_register_mapping fault_registers[] = { - { BCS, 0, "BCS_FAULT_REG" }, - { VCS, 0, "VCS_FAULT_REG" }, - { RCS, 0, "RCS_FAULT_REG" }, - { VECS, 0, "VECS_FAULT_REG" }, + { I915_ENGINE_CLASS_COPY, 0, "BCS_FAULT_REG" }, + { I915_ENGINE_CLASS_VIDEO, 0, "VCS_FAULT_REG" }, + { I915_ENGINE_CLASS_RENDER, 0, "RCS_FAULT_REG" }, + { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "VECS_FAULT_REG" }, }; static int ring_name_to_class(const char *ring_name, - unsigned int *class) + enum drm_i915_gem_engine_class *class) { static const char *class_names[] = { - [RCS] = "rcs", - [BCS] = "bcs", - [VCS] = "vcs", - [VECS] = "vecs", + [I915_ENGINE_CLASS_RENDER] = "rcs", + [I915_ENGINE_CLASS_COPY] = "bcs", + [I915_ENGINE_CLASS_VIDEO] = "vcs", + [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "vecs", }; for (size_t i = 0; i < ARRAY_SIZE(class_names); i++) { if (strncmp(ring_name, class_names[i], strlen(class_names[i]))) @@ -133,11 +126,11 @@ unsigned int class; int instance; } legacy_names[] = { - { "render", RCS, 0 }, - { "blt", BCS, 0 }, - { "bsd", VCS, 0 }, - { "bsd2", VCS, 1 }, - { "vebox", VECS, 0 }, + { "render", I915_ENGINE_CLASS_RENDER, 0 }, + { "blt", I915_ENGINE_CLASS_COPY, 0 }, + { "bsd", I915_ENGINE_CLASS_VIDEO, 0 }, + { "bsd2", I915_ENGINE_CLASS_VIDEO, 1 }, + { "vebox", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0 }, }; for (size_t i = 0; i < ARRAY_SIZE(legacy_names); i++) { if (strcmp(ring_name, legacy_names[i].name)) @@ -155,7 +148,7 @@ unsigned nb_mapping, const char *ring_name) { - unsigned int class; + enum drm_i915_gem_engine_class class; int instance; instance = ring_name_to_class(ring_name, &class); @@ -174,7 +167,7 @@ instdone_register_for_ring(const struct gen_device_info *devinfo, const char *ring_name) { - unsigned int class; + enum drm_i915_gem_engine_class class; int instance; instance = ring_name_to_class(ring_name, &class); @@ -182,16 +175,16 @@ return NULL; switch (class) { - case RCS: + case I915_ENGINE_CLASS_RENDER: if (devinfo->gen == 6) return "INSTDONE_2"; else return "INSTDONE_1"; - case BCS: + case I915_ENGINE_CLASS_COPY: return "BCS_INSTDONE"; - case VCS: + case I915_ENGINE_CLASS_VIDEO: switch (instance) { case 0: return "VCS_INSTDONE"; @@ -201,8 +194,11 @@ return NULL; } - case VECS: + case I915_ENGINE_CLASS_VIDEO_ENHANCE: return "VECS_INSTDONE"; + + default: + return NULL; } return NULL; @@ -601,6 +597,9 @@ for (int s = 0; s < num_sections; s++) { + enum drm_i915_gem_engine_class class; + ring_name_to_class(sections[s].ring_name, &class); + printf("--- %s (%s) at 0x%08x %08x\n", sections[s].buffer_name, sections[s].ring_name, (unsigned) (sections[s].gtt_offset >> 32), @@ -610,6 +609,7 @@ strcmp(sections[s].buffer_name, "batch buffer") == 0 || strcmp(sections[s].buffer_name, "ring buffer") == 0 || strcmp(sections[s].buffer_name, "HW Context") == 0) { + batch_ctx.engine = class; gen_print_batch(&batch_ctx, sections[s].data, sections[s].dword_count * 4, sections[s].gtt_offset); diff -Nru mesa-18.3.3/src/intel/tools/aubinator_viewer.cpp mesa-19.0.1/src/intel/tools/aubinator_viewer.cpp --- mesa-18.3.3/src/intel/tools/aubinator_viewer.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/aubinator_viewer.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -97,7 +97,7 @@ } static void -handle_ring_write(void *user_data, enum gen_engine engine, +handle_ring_write(void *user_data, enum drm_i915_gem_engine_class engine, const void *ring_data, uint32_t ring_data_len) { struct aub_file *file = (struct aub_file *) user_data; @@ -387,16 +387,14 @@ window->base.display = display_shader_window; window->base.destroy = destroy_shader_window; - struct gen_batch_decode_bo shader_bo; - if (mem->pml4) - shader_bo = aub_mem_get_ppgtt_bo(mem, address); - else - shader_bo = aub_mem_get_ggtt_bo(mem, address); - + struct gen_batch_decode_bo shader_bo = + aub_mem_get_ppgtt_bo(mem, address); if (shader_bo.map) { FILE *f = open_memstream(&window->shader, &window->shader_size); if (f) { - gen_disasm_disassemble(context.file->disasm, shader_bo.map, 0, f); + gen_disasm_disassemble(context.file->disasm, + (const uint8_t *) shader_bo.map + + (address - shader_bo.addr), 0, f); fclose(f); } } @@ -695,7 +693,7 @@ } static void -display_batch_ring_write(void *user_data, enum gen_engine engine, +display_batch_ring_write(void *user_data, enum drm_i915_gem_engine_class engine, const void *data, uint32_t data_len) { struct batch_window *window = (struct batch_window *) user_data; @@ -706,7 +704,8 @@ } static void -display_batch_execlist_write(void *user_data, enum gen_engine engine, +display_batch_execlist_write(void *user_data, + enum drm_i915_gem_engine_class engine, uint64_t context_descriptor) { struct batch_window *window = (struct batch_window *) user_data; @@ -722,19 +721,21 @@ uint32_t ring_buffer_head = context_img[5]; uint32_t ring_buffer_tail = context_img[7]; uint32_t ring_buffer_start = context_img[9]; + uint32_t ring_buffer_length = (context_img[11] & 0x1ff000) + 4096; window->mem.pml4 = (uint64_t)context_img[49] << 32 | context_img[51]; struct gen_batch_decode_bo ring_bo = aub_mem_get_ggtt_bo(&window->mem, ring_buffer_start); assert(ring_bo.size > 0); - void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr); + void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr) + ring_buffer_head; window->uses_ppgtt = true; + window->decode_ctx.engine = engine; aub_viewer_render_batch(&window->decode_ctx, commands, - ring_buffer_tail - ring_buffer_head, - ring_buffer_start); + MIN2(ring_buffer_tail - ring_buffer_head, ring_buffer_length), + ring_buffer_start + ring_buffer_head); } static void @@ -992,6 +993,7 @@ ImGui::ColorEdit3("error", (float *)&cfg->error_color, cflags); ImGui::SameLine(); ImGui::ColorEdit3("highlight", (float *)&cfg->highlight_color, cflags); ImGui::SameLine(); ImGui::ColorEdit3("dwords", (float *)&cfg->dwords_color, cflags); ImGui::SameLine(); + ImGui::ColorEdit3("booleans", (float *)&cfg->boolean_color, cflags); ImGui::SameLine(); if (ImGui::Button("Commands list") || has_ctrl_key('c')) { show_commands_window(); } ImGui::SameLine(); if (ImGui::Button("Registers list") || has_ctrl_key('r')) { show_register_window(); } ImGui::SameLine(); diff -Nru mesa-18.3.3/src/intel/tools/aubinator_viewer_decoder.cpp mesa-19.0.1/src/intel/tools/aubinator_viewer_decoder.cpp --- mesa-18.3.3/src/intel/tools/aubinator_viewer_decoder.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/aubinator_viewer_decoder.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -42,6 +42,7 @@ ctx->get_bo = get_bo; ctx->get_state_size = get_state_size; ctx->user_data = user_data; + ctx->engine = I915_ENGINE_CLASS_RENDER; ctx->cfg = cfg; ctx->decode_cfg = decode_cfg; @@ -73,7 +74,12 @@ } if (!gen_field_is_header(iter.field)) { if (ctx->decode_cfg->field_filter.PassFilter(iter.name)) { - ImGui::Text("%s: %s", iter.name, iter.value); + if (iter.field->type.kind == gen_type::GEN_TYPE_BOOL && iter.raw_value) { + ImGui::Text("%s: ", iter.name); ImGui::SameLine(); + ImGui::TextColored(ctx->cfg->boolean_color, "true"); + } else { + ImGui::Text("%s: %s", iter.name, iter.value); + } if (iter.struct_desc) { int struct_dword = iter.start_bit / 32; uint64_t struct_address = address + 4 * struct_dword; @@ -140,7 +146,8 @@ uint64_t addr = ctx->instruction_base + ksp; struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr); if (!bo.map) { - ImGui::TextColored(ctx->cfg->missing_color, "Shader unavailable"); + ImGui::TextColored(ctx->cfg->missing_color, + "Shader unavailable addr=0x%012" PRIx64, addr); return; } @@ -231,8 +238,12 @@ continue; } - ImGui::Text("pointer %u: %08x", i, pointers[i]); - aub_viewer_print_group(ctx, strct, addr, (const uint8_t *) bo.map + (addr - bo.addr)); + const uint8_t *state = (const uint8_t *) bo.map + (addr - bo.addr); + if (ImGui::TreeNodeEx(&pointers[i], ImGuiTreeNodeFlags_Framed, + "pointer %u: %08x", i, pointers[i])) { + aub_viewer_print_group(ctx, strct, addr, state); + ImGui::TreePop(); + } } } @@ -260,8 +271,11 @@ } for (int i = 0; i < count; i++) { - ImGui::Text("sampler state %d", i); - aub_viewer_print_group(ctx, strct, state_addr, state_map); + if (ImGui::TreeNodeEx(state_map, ImGuiTreeNodeFlags_Framed, + "sampler state %d", i)) { + aub_viewer_print_group(ctx, strct, state_addr, state_map); + ImGui::TreePop(); + } state_addr += 16; state_map += 16; } @@ -624,8 +638,6 @@ struct gen_group *inst, const uint32_t *p, const char *struct_type, int count) { - struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type); - uint32_t state_offset = 0; struct gen_field_iterator iter; @@ -648,12 +660,28 @@ return; } + struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type); + if (strcmp(struct_type, "BLEND_STATE") == 0) { + /* Blend states are different from the others because they have a header + * struct called BLEND_STATE which is followed by a variable number of + * BLEND_STATE_ENTRY structs. + */ + ImGui::Text("%s", struct_type); + aub_viewer_print_group(ctx, state, state_addr, state_map); + + state_addr += state->dw_length * 4; + state_map += state->dw_length * 4; + + struct_type = "BLEND_STATE_ENTRY"; + state = gen_spec_find_struct(ctx->spec, struct_type); + } + for (int i = 0; i < count; i++) { ImGui::Text("%s %d", struct_type, i); - aub_viewer_print_group(ctx, state, state_offset, state_map); + aub_viewer_print_group(ctx, state, state_addr, state_map); state_addr += state->dw_length * 4; - state_map += state->dw_length; + state_map += state->dw_length * 4; } } @@ -871,7 +899,7 @@ int length; for (p = batch; p < end; p += length) { - inst = gen_spec_find_instruction(ctx->spec, p); + inst = gen_spec_find_instruction(ctx->spec, ctx->engine, p); length = gen_group_get_length(inst, p); assert(inst == NULL || length > 0); length = MAX2(1, length); @@ -880,7 +908,7 @@ if (inst == NULL) { ImGui::TextColored(ctx->cfg->error_color, - "x%08" PRIx64 ": unknown instruction %08x", + "0x%08" PRIx64 ": unknown instruction %08x", offset, p[0]); continue; } diff -Nru mesa-18.3.3/src/intel/tools/aubinator_viewer.h mesa-19.0.1/src/intel/tools/aubinator_viewer.h --- mesa-18.3.3/src/intel/tools/aubinator_viewer.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/aubinator_viewer.h 2019-03-31 23:16:37.000000000 +0000 @@ -12,13 +12,15 @@ ImColor highlight_color; ImColor error_color; ImColor missing_color; + ImColor boolean_color; aub_viewer_cfg() : clear_color(114, 144, 154), dwords_color(29, 177, 194, 255), highlight_color(0, 230, 0, 255), error_color(236, 255, 0, 255), - missing_color(230, 0, 230, 255) {} + missing_color(230, 0, 230, 255), + boolean_color(228, 75, 255) {} }; struct aub_viewer_decode_cfg { @@ -68,6 +70,7 @@ struct gen_spec *spec; struct gen_disasm *disasm; + enum drm_i915_gem_engine_class engine; struct aub_viewer_cfg *cfg; struct aub_viewer_decode_cfg *decode_cfg; diff -Nru mesa-18.3.3/src/intel/tools/aub_mem.c mesa-19.0.1/src/intel/tools/aub_mem.c --- mesa-18.3.3/src/intel/tools/aub_mem.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/aub_mem.c 2019-03-31 23:16:37.000000000 +0000 @@ -289,8 +289,9 @@ continue; uint32_t map_offset = i->virt_addr - address; - void *res = mmap((uint8_t *)bo.map + map_offset, 4096, PROT_READ, - MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); + MAYBE_UNUSED void *res = + mmap((uint8_t *)bo.map + map_offset, 4096, PROT_READ, + MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); assert(res != MAP_FAILED); } @@ -354,8 +355,9 @@ for (uint64_t page = address; page < end; page += 4096) { struct phys_mem *phys_mem = ppgtt_walk(mem, mem->pml4, page); - void *res = mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ, - MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); + MAYBE_UNUSED void *res = + mmap((uint8_t *)bo.map + (page - bo.addr), 4096, PROT_READ, + MAP_SHARED | MAP_FIXED, mem->mem_fd, phys_mem->fd_offset); assert(res != MAP_FAILED); } diff -Nru mesa-18.3.3/src/intel/tools/aub_read.c mesa-19.0.1/src/intel/tools/aub_read.c --- mesa-18.3.3/src/intel/tools/aub_read.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/aub_read.c 2019-03-31 23:16:37.000000000 +0000 @@ -136,7 +136,7 @@ int type = p[1] & AUB_TRACE_TYPE_MASK; int address_space = p[1] & AUB_TRACE_ADDRESS_SPACE_MASK; int header_length = p[0] & 0xffff; - int engine = GEN_ENGINE_RENDER; + enum drm_i915_gem_engine_class engine = I915_ENGINE_CLASS_RENDER; const void *data = p + header_length + 2; uint64_t address = gen_48b_address((read->devinfo.gen >= 8 ? ((uint64_t) p[5] << 32) : 0) | ((uint64_t) p[3])); @@ -151,13 +151,13 @@ case AUB_TRACE_OP_COMMAND_WRITE: switch (type) { case AUB_TRACE_TYPE_RING_PRB0: - engine = GEN_ENGINE_RENDER; + engine = I915_ENGINE_CLASS_RENDER; break; case AUB_TRACE_TYPE_RING_PRB1: - engine = GEN_ENGINE_VIDEO; + engine = I915_ENGINE_CLASS_VIDEO; break; case AUB_TRACE_TYPE_RING_PRB2: - engine = GEN_ENGINE_BLITTER; + engine = I915_ENGINE_CLASS_COPY; break; default: parse_error(read, p, "command write to unknown ring %d\n", type); @@ -182,7 +182,7 @@ if (read->reg_write) read->reg_write(read->user_data, offset, value); - int engine; + enum drm_i915_gem_engine_class engine; uint64_t context_descriptor; switch (offset) { @@ -192,7 +192,7 @@ return; read->render_elsp_index = 0; - engine = GEN_ENGINE_RENDER; + engine = I915_ENGINE_CLASS_RENDER; context_descriptor = (uint64_t)read->render_elsp[2] << 32 | read->render_elsp[3]; break; @@ -202,7 +202,7 @@ return; read->video_elsp_index = 0; - engine = GEN_ENGINE_VIDEO; + engine = I915_ENGINE_CLASS_VIDEO; context_descriptor = (uint64_t)read->video_elsp[2] << 32 | read->video_elsp[3]; break; @@ -212,46 +212,40 @@ return; read->blitter_elsp_index = 0; - engine = GEN_ENGINE_BLITTER; + engine = I915_ENGINE_CLASS_COPY; context_descriptor = (uint64_t)read->blitter_elsp[2] << 32 | read->blitter_elsp[3]; break; case 0x2510: /* render elsq0 lo */ read->render_elsp[3] = value; return; - break; case 0x2514: /* render elsq0 hi */ read->render_elsp[2] = value; return; - break; case 0x12510: /* video elsq0 lo */ read->video_elsp[3] = value; return; - break; case 0x12514: /* video elsq0 hi */ read->video_elsp[2] = value; return; - break; case 0x22510: /* blitter elsq0 lo */ read->blitter_elsp[3] = value; return; - break; case 0x22514: /* blitter elsq0 hi */ read->blitter_elsp[2] = value; return; - break; case 0x2550: /* render elsc */ - engine = GEN_ENGINE_RENDER; + engine = I915_ENGINE_CLASS_RENDER; context_descriptor = (uint64_t)read->render_elsp[2] << 32 | read->render_elsp[3]; break; case 0x12550: /* video_elsc */ - engine = GEN_ENGINE_VIDEO; + engine = I915_ENGINE_CLASS_VIDEO; context_descriptor = (uint64_t)read->video_elsp[2] << 32 | read->video_elsp[3]; break; case 0x22550: /* blitter elsc */ - engine = GEN_ENGINE_BLITTER; + engine = I915_ENGINE_CLASS_COPY; context_descriptor = (uint64_t)read->blitter_elsp[2] << 32 | read->blitter_elsp[3]; break; @@ -294,7 +288,8 @@ int aub_read_command(struct aub_read *read, const void *data, uint32_t data_len) { - const uint32_t *p = data, *end = data + data_len, *next; + const uint32_t *p = data, *next; + MAYBE_UNUSED const uint32_t *end = data + data_len; uint32_t h, header_length, bias; assert(data_len >= 4); diff -Nru mesa-18.3.3/src/intel/tools/aub_read.h mesa-19.0.1/src/intel/tools/aub_read.h --- mesa-18.3.3/src/intel/tools/aub_read.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/aub_read.h 2019-03-31 23:16:37.000000000 +0000 @@ -28,17 +28,12 @@ #include #include "dev/gen_device_info.h" +#include "drm-uapi/i915_drm.h" #ifdef __cplusplus extern "C" { #endif -enum gen_engine { - GEN_ENGINE_RENDER = 1, - GEN_ENGINE_VIDEO = 2, - GEN_ENGINE_BLITTER = 3, -}; - struct aub_read { /* Caller's data */ void *user_data; @@ -55,9 +50,9 @@ void (*reg_write)(void *user_data, uint32_t reg_offset, uint32_t reg_value); - void (*ring_write)(void *user_data, enum gen_engine engine, + void (*ring_write)(void *user_data, enum drm_i915_gem_engine_class engine, const void *data, uint32_t data_len); - void (*execlist_write)(void *user_data, enum gen_engine engine, + void (*execlist_write)(void *user_data, enum drm_i915_gem_engine_class engine, uint64_t context_descriptor); /* Reader's data */ diff -Nru mesa-18.3.3/src/intel/tools/i965_disasm.c mesa-19.0.1/src/intel/tools/i965_disasm.c --- mesa-18.3.3/src/intel/tools/i965_disasm.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/i965_disasm.c 2019-03-31 23:16:37.000000000 +0000 @@ -47,17 +47,23 @@ static void * i965_disasm_read_binary(FILE *fp, size_t *end) { + size_t size; void *assembly; *end = i965_disasm_get_file_size(fp); + if (!*end) + return NULL; assembly = malloc(*end + 1); if (assembly == NULL) return NULL; - fread(assembly, *end, 1, fp); + size = fread(assembly, *end, 1, fp); fclose(fp); - + if (!size) { + free(assembly); + return NULL; + } return assembly; } @@ -167,7 +173,11 @@ assembly = i965_disasm_read_binary(fp, &end); if (!assembly) { - fprintf(stderr, "Unable to allocate buffer to read binary file\n"); + if (end) + fprintf(stderr, "Unable to allocate buffer to read binary file\n"); + else + fprintf(stderr, "Input file is empty\n"); + exit(EXIT_FAILURE); } diff -Nru mesa-18.3.3/src/intel/tools/intel_dump_gpu.c mesa-19.0.1/src/intel/tools/intel_dump_gpu.c --- mesa-18.3.3/src/intel/tools/intel_dump_gpu.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/intel_dump_gpu.c 2019-03-31 23:16:37.000000000 +0000 @@ -358,10 +358,16 @@ verbose = 2; } } else if (!strcmp(key, "device")) { + fail_if(device != 0, "Device/Platform override specified multiple times."); fail_if(sscanf(value, "%i", &device) != 1, "failed to parse device id '%s'", value); device_override = true; + } else if (!strcmp(key, "platform")) { + fail_if(device != 0, "Device/Platform override specified multiple times."); + device = gen_device_name_to_pci_device_id(value); + fail_if(device == -1, "Unknown platform '%s'", value); + device_override = true; } else if (!strcmp(key, "file")) { output_filename = strdup(value); output_file = fopen(output_filename, "w+"); diff -Nru mesa-18.3.3/src/intel/tools/intel_dump_gpu.in mesa-19.0.1/src/intel/tools/intel_dump_gpu.in --- mesa-18.3.3/src/intel/tools/intel_dump_gpu.in 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/intel_dump_gpu.in 2019-03-31 23:16:37.000000000 +0000 @@ -8,15 +8,19 @@ Run COMMAND with ARGUMENTS and dump an AUB file that captures buffer contents and execution of the GEM application. - -o, --output=FILE Name of AUB file. Defaults to COMMAND.aub + -g, --gdb Launch GDB - --device=ID Override PCI ID of the reported device + -o, --output=FILE Name of AUB file. Defaults to COMMAND.aub - -v Enable verbose output + --device=ID Override PCI ID of the reported device - -vv Enable extra verbosity - dumps gtt mappings + -p, --platform=NAME Override PCI ID using a platform name - --help Display this help message and exit + -v Enable verbose output + + -vv Enable extra verbosity - dumps gtt mappings + + --help Display this help message and exit EOF @@ -35,11 +39,6 @@ while true; do case "$1" in - -o) - file=$2 - add_arg "file=${file:-$(basename ${file}).aub}" - shift 2 - ;; -v) add_arg "verbose=1" shift 1 @@ -48,6 +47,11 @@ add_arg "verbose=2" shift 1 ;; + -o) + file=$2 + add_arg "file=${file:-$(basename ${file}).aub}" + shift 2 + ;; -o*) file=${1##-o} add_arg "file=${file:-$(basename ${file}).aub}" @@ -62,6 +66,21 @@ add_arg "device=${1##--device=}" shift ;; + -p) + platform=$2 + add_arg "platform=${platform}" + shift 2 + ;; + -p*) + platform=${1##-p} + add_arg "platform=${platform}" + shift + ;; + --platform=*) + platform=${1##-p} + add_arg "platform=${platform}" + shift + ;; --gdb) gdb=1 shift diff -Nru mesa-18.3.3/src/intel/tools/intel_sanitize_gpu.c mesa-19.0.1/src/intel/tools/intel_sanitize_gpu.c --- mesa-18.3.3/src/intel/tools/intel_sanitize_gpu.c 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/intel_sanitize_gpu.c 2019-03-31 23:16:37.000000000 +0000 @@ -39,6 +39,7 @@ #include #include "util/hash_table.h" +#include "util/u_math.h" #define INTEL_LOG_TAG "INTEL-SANITIZE-GPU" #include "common/intel_log.h" @@ -109,8 +110,7 @@ { struct refcnt_hash_table *r = malloc(sizeof(*r)); r->refcnt = 1; - r->t = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + r->t = _mesa_pointer_hash_table_create(NULL); _mesa_hash_table_insert(fds_to_bo_sizes, (void*)(uintptr_t)fd, (void*)(uintptr_t)r); } @@ -165,7 +165,7 @@ { struct drm_i915_gem_mmap mmap_arg = { .handle = handle, - .offset = bo_size(fd, handle), + .offset = align64(bo_size(fd, handle), 4096), .size = PADDING_SIZE, .flags = 0, }; @@ -207,9 +207,11 @@ static int create_with_padding(int fd, struct drm_i915_gem_create *create) { - create->size += PADDING_SIZE; + uint64_t original_size = create->size; + + create->size = align64(original_size, 4096) + PADDING_SIZE; int ret = libc_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, create); - create->size -= PADDING_SIZE; + create->size = original_size; if (ret != 0) return ret; @@ -217,14 +219,16 @@ uint8_t *noise_values; struct drm_i915_gem_mmap mmap_arg = { .handle = create->handle, - .offset = create->size, + .offset = align64(create->size, 4096), .size = PADDING_SIZE, .flags = 0, }; ret = libc_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); - if (ret != 0) + if (ret != 0) { + intel_logd("Unable to map buffer %d for pad creation.\n", create->handle); return 0; + } noise_values = (uint8_t*) (uintptr_t) mmap_arg.addr_ptr; fill_noise_buffer(noise_values, create->handle & 0xFF, @@ -421,8 +425,7 @@ static void __attribute__ ((constructor)) init(void) { - fds_to_bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + fds_to_bo_sizes = _mesa_pointer_hash_table_create(NULL); libc_open = dlsym(RTLD_NEXT, "open"); libc_close = dlsym(RTLD_NEXT, "close"); libc_fcntl = dlsym(RTLD_NEXT, "fcntl"); diff -Nru mesa-18.3.3/src/intel/tools/intel_sanitize_gpu.in mesa-19.0.1/src/intel/tools/intel_sanitize_gpu.in --- mesa-18.3.3/src/intel/tools/intel_sanitize_gpu.in 2018-04-11 19:02:35.000000000 +0000 +++ mesa-19.0.1/src/intel/tools/intel_sanitize_gpu.in 2019-03-31 23:16:37.000000000 +0000 @@ -1,4 +1,57 @@ #!/bin/bash # -*- mode: sh -*- -LD_PRELOAD="@install_libexecdir@/libintel_sanitize_gpu.so${LD_PRELOAD:+:$LD_PRELOAD}" exec "$@" +function show_help() { + cat <u64; - while (current.offset != EMPTY) { - /* We have to add a memory barrier here so that the list head (and - * offset) gets read before we read the map pointer. This way we - * know that the map pointer is valid for the given offset at the - * point where we read it. - */ - __sync_synchronize(); + table->device = device; - int32_t *next_ptr = *map + current.offset; - new.offset = VG_NOACCESS_READ(next_ptr); - new.count = current.count + 1; - old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); - if (old.u64 == current.u64) { - *offset = current.offset; - return true; - } - current = old; + table->fd = memfd_create("state table", MFD_CLOEXEC); + if (table->fd == -1) + return vk_error(VK_ERROR_INITIALIZATION_FAILED); + + /* Just make it 2GB up-front. The Linux kernel won't actually back it + * with pages until we either map and fault on one of them or we use + * userptr and send a chunk of it off to the GPU. + */ + if (ftruncate(table->fd, BLOCK_POOL_MEMFD_SIZE) == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; } - return false; + if (!u_vector_init(&table->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_state_table_cleanup)), + 128)) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } + + table->state.next = 0; + table->state.end = 0; + table->size = 0; + + uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE; + result = anv_state_table_expand_range(table, initial_size); + if (result != VK_SUCCESS) + goto fail_mmap_cleanups; + + return VK_SUCCESS; + + fail_mmap_cleanups: + u_vector_finish(&table->mmap_cleanups); + fail_fd: + close(table->fd); + + return result; } -static void -anv_free_list_push(union anv_free_list *list, void *map, int32_t offset, - uint32_t size, uint32_t count) +static VkResult +anv_state_table_expand_range(struct anv_state_table *table, uint32_t size) { - union anv_free_list current, old, new; - int32_t *next_ptr = map + offset; + void *map; + struct anv_mmap_cleanup *cleanup; + + /* Assert that we only ever grow the pool */ + assert(size >= table->state.end); + + /* Make sure that we don't go outside the bounds of the memfd */ + if (size > BLOCK_POOL_MEMFD_SIZE) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + cleanup = u_vector_add(&table->mmap_cleanups); + if (!cleanup) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + *cleanup = ANV_MMAP_CLEANUP_INIT; + + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, table->fd, 0); + if (map == MAP_FAILED) { + return vk_errorf(table->device->instance, table->device, + VK_ERROR_OUT_OF_HOST_MEMORY, "mmap failed: %m"); + } + + cleanup->map = map; + cleanup->size = size; + + table->map = map; + table->size = size; + + return VK_SUCCESS; +} + +static VkResult +anv_state_table_grow(struct anv_state_table *table) +{ + VkResult result = VK_SUCCESS; - /* If we're returning more than one chunk, we need to build a chain to add - * to the list. Fortunately, we can do this without any atomics since we - * own everything in the chain right now. `offset` is left pointing to the - * head of our chain list while `next_ptr` points to the tail. + uint32_t used = align_u32(table->state.next * ANV_STATE_ENTRY_SIZE, + PAGE_SIZE); + uint32_t old_size = table->size; + + /* The block pool is always initialized to a nonzero size and this function + * is always called after initialization. */ - for (uint32_t i = 1; i < count; i++) { - VG_NOACCESS_WRITE(next_ptr, offset + i * size); - next_ptr = map + offset + i * size; + assert(old_size > 0); + + uint32_t required = MAX2(used, old_size); + if (used * 2 <= required) { + /* If we're in this case then this isn't the firsta allocation and we + * already have enough space on both sides to hold double what we + * have allocated. There's nothing for us to do. + */ + goto done; + } + + uint32_t size = old_size * 2; + while (size < required) + size *= 2; + + assert(size > table->size); + + result = anv_state_table_expand_range(table, size); + + done: + return result; +} + +void +anv_state_table_finish(struct anv_state_table *table) +{ + struct anv_state_table_cleanup *cleanup; + + u_vector_foreach(cleanup, &table->mmap_cleanups) { + if (cleanup->map) + munmap(cleanup->map, cleanup->size); + } + + u_vector_finish(&table->mmap_cleanups); + + close(table->fd); +} + +VkResult +anv_state_table_add(struct anv_state_table *table, uint32_t *idx, + uint32_t count) +{ + struct anv_block_state state, old, new; + VkResult result; + + assert(idx); + + while(1) { + state.u64 = __sync_fetch_and_add(&table->state.u64, count); + if (state.next + count <= state.end) { + assert(table->map); + struct anv_free_entry *entry = &table->map[state.next]; + for (int i = 0; i < count; i++) { + entry[i].state.idx = state.next + i; + } + *idx = state.next; + return VK_SUCCESS; + } else if (state.next <= state.end) { + /* We allocated the first block outside the pool so we have to grow + * the pool. pool_state->next acts a mutex: threads who try to + * allocate now will get block indexes above the current limit and + * hit futex_wait below. + */ + new.next = state.next + count; + do { + result = anv_state_table_grow(table); + if (result != VK_SUCCESS) + return result; + new.end = table->size / ANV_STATE_ENTRY_SIZE; + } while (new.end < new.next); + + old.u64 = __sync_lock_test_and_set(&table->state.u64, new.u64); + if (old.next != state.next) + futex_wake(&table->state.end, INT_MAX); + } else { + futex_wait(&table->state.end, state.end, NULL); + continue; + } } +} + +void +anv_free_list_push(union anv_free_list *list, + struct anv_state_table *table, + uint32_t first, uint32_t count) +{ + union anv_free_list current, old, new; + uint32_t last = first; + + for (uint32_t i = 1; i < count; i++, last++) + table->map[last].next = last + 1; old = *list; do { current = old; - VG_NOACCESS_WRITE(next_ptr, current.offset); - new.offset = offset; + table->map[last].next = current.offset; + new.offset = first; new.count = current.count + 1; old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); } while (old.u64 != current.u64); } +struct anv_state * +anv_free_list_pop(union anv_free_list *list, + struct anv_state_table *table) +{ + union anv_free_list current, new, old; + + current.u64 = list->u64; + while (current.offset != EMPTY) { + __sync_synchronize(); + new.offset = table->map[current.offset].next; + new.count = current.count + 1; + old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); + if (old.u64 == current.u64) { + struct anv_free_entry *entry = &table->map[current.offset]; + return &entry->state; + } + current = old; + } + + return NULL; +} + /* All pointers in the ptr_free_list are assumed to be page-aligned. This * means that the bottom 12 bits should all be zero. */ @@ -251,21 +434,32 @@ pool->device = device; pool->bo_flags = bo_flags; + pool->nbos = 0; + pool->size = 0; + pool->center_bo_offset = 0; pool->start_address = gen_canonical_address(start_address); + pool->map = NULL; - anv_bo_init(&pool->bo, 0, 0); + /* This pointer will always point to the first BO in the list */ + pool->bo = &pool->bos[0]; - pool->fd = memfd_create("block pool", MFD_CLOEXEC); - if (pool->fd == -1) - return vk_error(VK_ERROR_INITIALIZATION_FAILED); + anv_bo_init(pool->bo, 0, 0); - /* Just make it 2GB up-front. The Linux kernel won't actually back it - * with pages until we either map and fault on one of them or we use - * userptr and send a chunk of it off to the GPU. - */ - if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) { - result = vk_error(VK_ERROR_INITIALIZATION_FAILED); - goto fail_fd; + if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) { + pool->fd = memfd_create("block pool", MFD_CLOEXEC); + if (pool->fd == -1) + return vk_error(VK_ERROR_INITIALIZATION_FAILED); + + /* Just make it 2GB up-front. The Linux kernel won't actually back it + * with pages until we either map and fault on one of them or we use + * userptr and send a chunk of it off to the GPU. + */ + if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } + } else { + pool->fd = -1; } if (!u_vector_init(&pool->mmap_cleanups, @@ -289,7 +483,8 @@ fail_mmap_cleanups: u_vector_finish(&pool->mmap_cleanups); fail_fd: - close(pool->fd); + if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) + close(pool->fd); return result; } @@ -307,12 +502,10 @@ } u_vector_finish(&pool->mmap_cleanups); - - close(pool->fd); + if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) + close(pool->fd); } -#define PAGE_SIZE 4096 - static VkResult anv_block_pool_expand_range(struct anv_block_pool *pool, uint32_t center_bo_offset, uint32_t size) @@ -320,6 +513,7 @@ void *map; uint32_t gem_handle; struct anv_mmap_cleanup *cleanup; + const bool use_softpin = !!(pool->bo_flags & EXEC_OBJECT_PINNED); /* Assert that we only ever grow the pool */ assert(center_bo_offset >= pool->back_state.end); @@ -327,7 +521,8 @@ /* Assert that we don't go outside the bounds of the memfd */ assert(center_bo_offset <= BLOCK_POOL_MEMFD_CENTER); - assert(size - center_bo_offset <= + assert(use_softpin || + size - center_bo_offset <= BLOCK_POOL_MEMFD_SIZE - BLOCK_POOL_MEMFD_CENTER); cleanup = u_vector_add(&pool->mmap_cleanups); @@ -336,48 +531,55 @@ *cleanup = ANV_MMAP_CLEANUP_INIT; - /* Just leak the old map until we destroy the pool. We can't munmap it - * without races or imposing locking on the block allocate fast path. On - * the whole the leaked maps adds up to less than the size of the - * current map. MAP_POPULATE seems like the right thing to do, but we - * should try to get some numbers. - */ - map = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, pool->fd, - BLOCK_POOL_MEMFD_CENTER - center_bo_offset); - if (map == MAP_FAILED) - return vk_errorf(pool->device->instance, pool->device, - VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m"); - - gem_handle = anv_gem_userptr(pool->device, map, size); - if (gem_handle == 0) { - munmap(map, size); - return vk_errorf(pool->device->instance, pool->device, - VK_ERROR_TOO_MANY_OBJECTS, "userptr failed: %m"); + uint32_t newbo_size = size - pool->size; + if (use_softpin) { + gem_handle = anv_gem_create(pool->device, newbo_size); + map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0); + if (map == MAP_FAILED) + return vk_errorf(pool->device->instance, pool->device, + VK_ERROR_MEMORY_MAP_FAILED, "gem mmap failed: %m"); + assert(center_bo_offset == 0); + } else { + /* Just leak the old map until we destroy the pool. We can't munmap it + * without races or imposing locking on the block allocate fast path. On + * the whole the leaked maps adds up to less than the size of the + * current map. MAP_POPULATE seems like the right thing to do, but we + * should try to get some numbers. + */ + map = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, pool->fd, + BLOCK_POOL_MEMFD_CENTER - center_bo_offset); + if (map == MAP_FAILED) + return vk_errorf(pool->device->instance, pool->device, + VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m"); + + /* Now that we mapped the new memory, we can write the new + * center_bo_offset back into pool and update pool->map. */ + pool->center_bo_offset = center_bo_offset; + pool->map = map + center_bo_offset; + gem_handle = anv_gem_userptr(pool->device, map, size); + if (gem_handle == 0) { + munmap(map, size); + return vk_errorf(pool->device->instance, pool->device, + VK_ERROR_TOO_MANY_OBJECTS, "userptr failed: %m"); + } } cleanup->map = map; - cleanup->size = size; + cleanup->size = use_softpin ? newbo_size : size; cleanup->gem_handle = gem_handle; -#if 0 /* Regular objects are created I915_CACHING_CACHED on LLC platforms and * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are * always created as I915_CACHING_CACHED, which on non-LLC means - * snooped. That can be useful but comes with a bit of overheard. Since - * we're eplicitly clflushing and don't want the overhead we need to turn - * it off. */ - if (!pool->device->info.has_llc) { - anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE); - anv_gem_set_domain(pool->device, gem_handle, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); - } -#endif - - /* Now that we successfull allocated everything, we can write the new - * values back into pool. */ - pool->map = map + center_bo_offset; - pool->center_bo_offset = center_bo_offset; + * snooped. + * + * On platforms that support softpin, we are not going to use userptr + * anymore, but we still want to rely on the snooped states. So make sure + * everything is set to I915_CACHING_CACHED. + */ + if (!pool->device->info.has_llc) + anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_CACHED); /* For block pool BOs we have to be a bit careful about where we place them * in the GTT. There are two documented workarounds for state base address @@ -404,17 +606,82 @@ * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the * hard work for us. */ - anv_bo_init(&pool->bo, gem_handle, size); - if (pool->bo_flags & EXEC_OBJECT_PINNED) { - pool->bo.offset = pool->start_address + BLOCK_POOL_MEMFD_CENTER - - center_bo_offset; + struct anv_bo *bo; + uint32_t bo_size; + uint64_t bo_offset; + + assert(pool->nbos < ANV_MAX_BLOCK_POOL_BOS); + + if (use_softpin) { + /* With softpin, we add a new BO to the pool, and set its offset to right + * where the previous BO ends (the end of the pool). + */ + bo = &pool->bos[pool->nbos++]; + bo_size = newbo_size; + bo_offset = pool->start_address + pool->size; + } else { + /* Without softpin, we just need one BO, and we already have a pointer to + * it. Simply "allocate" it from our array if we didn't do it before. + * The offset doesn't matter since we are not pinning the BO anyway. + */ + if (pool->nbos == 0) + pool->nbos++; + bo = pool->bo; + bo_size = size; + bo_offset = 0; } - pool->bo.flags = pool->bo_flags; - pool->bo.map = map; + + anv_bo_init(bo, gem_handle, bo_size); + bo->offset = bo_offset; + bo->flags = pool->bo_flags; + bo->map = map; + pool->size = size; return VK_SUCCESS; } +static struct anv_bo * +anv_block_pool_get_bo(struct anv_block_pool *pool, int32_t *offset) +{ + struct anv_bo *bo, *bo_found = NULL; + int32_t cur_offset = 0; + + assert(offset); + + if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) + return pool->bo; + + anv_block_pool_foreach_bo(bo, pool) { + if (*offset < cur_offset + bo->size) { + bo_found = bo; + break; + } + cur_offset += bo->size; + } + + assert(bo_found != NULL); + *offset -= cur_offset; + + return bo_found; +} + +/** Returns current memory map of the block pool. + * + * The returned pointer points to the map for the memory at the specified + * offset. The offset parameter is relative to the "center" of the block pool + * rather than the start of the block pool BO map. + */ +void* +anv_block_pool_map(struct anv_block_pool *pool, int32_t offset) +{ + if (pool->bo_flags & EXEC_OBJECT_PINNED) { + struct anv_bo *bo = anv_block_pool_get_bo(pool, &offset); + return bo->map + offset; + } else { + return pool->map + offset; + } +} + /** Grows and re-centers the block pool. * * We grow the block pool in one or both directions in such a way that the @@ -464,7 +731,7 @@ assert(state == &pool->state || back_used > 0); - uint32_t old_size = pool->bo.size; + uint32_t old_size = pool->size; /* The block pool is always initialized to a nonzero size and this function * is always called after initialization. @@ -490,7 +757,7 @@ while (size < back_required + front_required) size *= 2; - assert(size > pool->bo.size); + assert(size > pool->size); /* We compute a new center_bo_offset such that, when we double the size * of the pool, we maintain the ratio of how much is used by each side. @@ -527,7 +794,7 @@ result = anv_block_pool_expand_range(pool, center_bo_offset, size); - pool->bo.flags = pool->bo_flags; + pool->bo->flags = pool->bo_flags; done: pthread_mutex_unlock(&pool->device->mutex); @@ -538,7 +805,7 @@ * needs to do so in order to maintain its concurrency model. */ if (state == &pool->state) { - return pool->bo.size - pool->center_bo_offset; + return pool->size - pool->center_bo_offset; } else { assert(pool->center_bo_offset > 0); return pool->center_bo_offset; @@ -551,16 +818,35 @@ static uint32_t anv_block_pool_alloc_new(struct anv_block_pool *pool, struct anv_block_state *pool_state, - uint32_t block_size) + uint32_t block_size, uint32_t *padding) { struct anv_block_state state, old, new; + /* Most allocations won't generate any padding */ + if (padding) + *padding = 0; + while (1) { state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size); if (state.next + block_size <= state.end) { - assert(pool->map); return state.next; } else if (state.next <= state.end) { + if (pool->bo_flags & EXEC_OBJECT_PINNED && state.next < state.end) { + /* We need to grow the block pool, but still have some leftover + * space that can't be used by that particular allocation. So we + * add that as a "padding", and return it. + */ + uint32_t leftover = state.end - state.next; + + /* If there is some leftover space in the pool, the caller must + * deal with it. + */ + assert(leftover == 0 || padding); + if (padding) + *padding = leftover; + state.next += leftover; + } + /* We allocated the first block outside the pool so we have to grow * the pool. pool_state->next acts a mutex: threads who try to * allocate now will get block indexes above the current limit and @@ -584,9 +870,13 @@ int32_t anv_block_pool_alloc(struct anv_block_pool *pool, - uint32_t block_size) + uint32_t block_size, uint32_t *padding) { - return anv_block_pool_alloc_new(pool, &pool->state, block_size); + uint32_t offset; + + offset = anv_block_pool_alloc_new(pool, &pool->state, block_size, padding); + + return offset; } /* Allocates a block out of the back of the block pool. @@ -603,7 +893,7 @@ uint32_t block_size) { int32_t offset = anv_block_pool_alloc_new(pool, &pool->back_state, - block_size); + block_size, NULL); /* The offset we get out of anv_block_pool_alloc_new() is actually the * number of bytes downwards from the middle to the end of the block. @@ -628,6 +918,12 @@ if (result != VK_SUCCESS) return result; + result = anv_state_table_init(&pool->table, device, 64); + if (result != VK_SUCCESS) { + anv_block_pool_finish(&pool->block_pool); + return result; + } + assert(util_is_power_of_two_or_zero(block_size)); pool->block_size = block_size; pool->back_alloc_free_list = ANV_FREE_LIST_EMPTY; @@ -645,6 +941,7 @@ anv_state_pool_finish(struct anv_state_pool *pool) { VG(VALGRIND_DESTROY_MEMPOOL(pool)); + anv_state_table_finish(&pool->table); anv_block_pool_finish(&pool->block_pool); } @@ -652,16 +949,24 @@ anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool, struct anv_block_pool *block_pool, uint32_t state_size, - uint32_t block_size) + uint32_t block_size, + uint32_t *padding) { struct anv_block_state block, old, new; uint32_t offset; + /* We don't always use anv_block_pool_alloc(), which would set *padding to + * zero for us. So if we have a pointer to padding, we must zero it out + * ourselves here, to make sure we always return some sensible value. + */ + if (padding) + *padding = 0; + /* If our state is large, we don't need any sub-allocation from a block. * Instead, we just grab whole (potentially large) blocks. */ if (state_size >= block_size) - return anv_block_pool_alloc(block_pool, state_size); + return anv_block_pool_alloc(block_pool, state_size, padding); restart: block.u64 = __sync_fetch_and_add(&pool->block.u64, state_size); @@ -669,7 +974,7 @@ if (block.next < block.end) { return block.next; } else if (block.next == block.end) { - offset = anv_block_pool_alloc(block_pool, block_size); + offset = anv_block_pool_alloc(block_pool, block_size, padding); new.next = offset + state_size; new.end = offset + block_size; old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); @@ -699,30 +1004,124 @@ return 1 << size_log2; } +/** Helper to push a chunk into the state table. + * + * It creates 'count' entries into the state table and update their sizes, + * offsets and maps, also pushing them as "free" states. + */ +static void +anv_state_pool_return_blocks(struct anv_state_pool *pool, + uint32_t chunk_offset, uint32_t count, + uint32_t block_size) +{ + /* Disallow returning 0 chunks */ + assert(count != 0); + + /* Make sure we always return chunks aligned to the block_size */ + assert(chunk_offset % block_size == 0); + + uint32_t st_idx; + VkResult result = anv_state_table_add(&pool->table, &st_idx, count); + assert(result == VK_SUCCESS); + for (int i = 0; i < count; i++) { + /* update states that were added back to the state table */ + struct anv_state *state_i = anv_state_table_get(&pool->table, + st_idx + i); + state_i->alloc_size = block_size; + state_i->offset = chunk_offset + block_size * i; + state_i->map = anv_block_pool_map(&pool->block_pool, state_i->offset); + } + + uint32_t block_bucket = anv_state_pool_get_bucket(block_size); + anv_free_list_push(&pool->buckets[block_bucket].free_list, + &pool->table, st_idx, count); +} + +/** Returns a chunk of memory back to the state pool. + * + * Do a two-level split. If chunk_size is bigger than divisor + * (pool->block_size), we return as many divisor sized blocks as we can, from + * the end of the chunk. + * + * The remaining is then split into smaller blocks (starting at small_size if + * it is non-zero), with larger blocks always being taken from the end of the + * chunk. + */ +static void +anv_state_pool_return_chunk(struct anv_state_pool *pool, + uint32_t chunk_offset, uint32_t chunk_size, + uint32_t small_size) +{ + uint32_t divisor = pool->block_size; + uint32_t nblocks = chunk_size / divisor; + uint32_t rest = chunk_size - nblocks * divisor; + + if (nblocks > 0) { + /* First return divisor aligned and sized chunks. We start returning + * larger blocks from the end fo the chunk, since they should already be + * aligned to divisor. Also anv_state_pool_return_blocks() only accepts + * aligned chunks. + */ + uint32_t offset = chunk_offset + rest; + anv_state_pool_return_blocks(pool, offset, nblocks, divisor); + } + + chunk_size = rest; + divisor /= 2; + + if (small_size > 0 && small_size < divisor) + divisor = small_size; + + uint32_t min_size = 1 << ANV_MIN_STATE_SIZE_LOG2; + + /* Just as before, return larger divisor aligned blocks from the end of the + * chunk first. + */ + while (chunk_size > 0 && divisor >= min_size) { + nblocks = chunk_size / divisor; + rest = chunk_size - nblocks * divisor; + if (nblocks > 0) { + anv_state_pool_return_blocks(pool, chunk_offset + rest, + nblocks, divisor); + chunk_size = rest; + } + divisor /= 2; + } +} + static struct anv_state anv_state_pool_alloc_no_vg(struct anv_state_pool *pool, uint32_t size, uint32_t align) { uint32_t bucket = anv_state_pool_get_bucket(MAX2(size, align)); - struct anv_state state; - state.alloc_size = anv_state_pool_get_bucket_size(bucket); + struct anv_state *state; + uint32_t alloc_size = anv_state_pool_get_bucket_size(bucket); + int32_t offset; /* Try free list first. */ - if (anv_free_list_pop(&pool->buckets[bucket].free_list, - &pool->block_pool.map, &state.offset)) { - assert(state.offset >= 0); + state = anv_free_list_pop(&pool->buckets[bucket].free_list, + &pool->table); + if (state) { + assert(state->offset >= 0); goto done; } /* Try to grab a chunk from some larger bucket and split it up */ for (unsigned b = bucket + 1; b < ANV_STATE_BUCKETS; b++) { - int32_t chunk_offset; - if (anv_free_list_pop(&pool->buckets[b].free_list, - &pool->block_pool.map, &chunk_offset)) { + state = anv_free_list_pop(&pool->buckets[b].free_list, &pool->table); + if (state) { unsigned chunk_size = anv_state_pool_get_bucket_size(b); + int32_t chunk_offset = state->offset; + + /* First lets update the state we got to its new size. offset and map + * remain the same. + */ + state->alloc_size = alloc_size; - /* We've found a chunk that's larger than the requested state size. + /* Now return the unused part of the chunk back to the pool as free + * blocks + * * There are a couple of options as to what we do with it: * * 1) We could fully split the chunk into state.alloc_size sized @@ -744,48 +1143,42 @@ * two-level split. If it's bigger than some fixed block_size, * we split it into block_size sized chunks and return all but * one of them. Then we split what remains into - * state.alloc_size sized chunks and return all but one. + * state.alloc_size sized chunks and return them. * - * We choose option (3). + * We choose something close to option (3), which is implemented with + * anv_state_pool_return_chunk(). That is done by returning the + * remaining of the chunk, with alloc_size as a hint of the size that + * we want the smaller chunk split into. */ - if (chunk_size > pool->block_size && - state.alloc_size < pool->block_size) { - assert(chunk_size % pool->block_size == 0); - /* We don't want to split giant chunks into tiny chunks. Instead, - * break anything bigger than a block into block-sized chunks and - * then break it down into bucket-sized chunks from there. Return - * all but the first block of the chunk to the block bucket. - */ - const uint32_t block_bucket = - anv_state_pool_get_bucket(pool->block_size); - anv_free_list_push(&pool->buckets[block_bucket].free_list, - pool->block_pool.map, - chunk_offset + pool->block_size, - pool->block_size, - (chunk_size / pool->block_size) - 1); - chunk_size = pool->block_size; - } - - assert(chunk_size % state.alloc_size == 0); - anv_free_list_push(&pool->buckets[bucket].free_list, - pool->block_pool.map, - chunk_offset + state.alloc_size, - state.alloc_size, - (chunk_size / state.alloc_size) - 1); - - state.offset = chunk_offset; + anv_state_pool_return_chunk(pool, chunk_offset + alloc_size, + chunk_size - alloc_size, alloc_size); goto done; } } - state.offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket], - &pool->block_pool, - state.alloc_size, - pool->block_size); + uint32_t padding; + offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket], + &pool->block_pool, + alloc_size, + pool->block_size, + &padding); + /* Everytime we allocate a new state, add it to the state pool */ + uint32_t idx; + VkResult result = anv_state_table_add(&pool->table, &idx, 1); + assert(result == VK_SUCCESS); + + state = anv_state_table_get(&pool->table, idx); + state->offset = offset; + state->alloc_size = alloc_size; + state->map = anv_block_pool_map(&pool->block_pool, offset); + + if (padding > 0) { + uint32_t return_offset = offset - padding; + anv_state_pool_return_chunk(pool, return_offset, padding, 0); + } done: - state.map = pool->block_pool.map + state.offset; - return state; + return *state; } struct anv_state @@ -802,22 +1195,30 @@ struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool) { - struct anv_state state; - state.alloc_size = pool->block_size; + struct anv_state *state; + uint32_t alloc_size = pool->block_size; - if (anv_free_list_pop(&pool->back_alloc_free_list, - &pool->block_pool.map, &state.offset)) { - assert(state.offset < 0); + state = anv_free_list_pop(&pool->back_alloc_free_list, &pool->table); + if (state) { + assert(state->offset < 0); goto done; } - state.offset = anv_block_pool_alloc_back(&pool->block_pool, - pool->block_size); + int32_t offset; + offset = anv_block_pool_alloc_back(&pool->block_pool, + pool->block_size); + uint32_t idx; + VkResult result = anv_state_table_add(&pool->table, &idx, 1); + assert(result == VK_SUCCESS); + + state = anv_state_table_get(&pool->table, idx); + state->offset = offset; + state->alloc_size = alloc_size; + state->map = anv_block_pool_map(&pool->block_pool, state->offset); done: - state.map = pool->block_pool.map + state.offset; - VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, state.alloc_size)); - return state; + VG(VALGRIND_MEMPOOL_ALLOC(pool, state->map, state->alloc_size)); + return *state; } static void @@ -829,12 +1230,10 @@ if (state.offset < 0) { assert(state.alloc_size == pool->block_size); anv_free_list_push(&pool->back_alloc_free_list, - pool->block_pool.map, state.offset, - state.alloc_size, 1); + &pool->table, state.idx, 1); } else { anv_free_list_push(&pool->buckets[bucket].free_list, - pool->block_pool.map, state.offset, - state.alloc_size, 1); + &pool->table, state.idx, 1); } } @@ -1037,6 +1436,14 @@ return vk_error(VK_ERROR_MEMORY_MAP_FAILED); } + /* We are removing the state flushes, so lets make sure that these buffers + * are cached/snooped. + */ + if (!pool->device->info.has_llc) { + anv_gem_set_caching(pool->device, new_bo.gem_handle, + I915_CACHING_CACHED); + } + *bo = new_bo; VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); @@ -1201,8 +1608,7 @@ VkResult anv_bo_cache_init(struct anv_bo_cache *cache) { - cache->bo_map = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + cache->bo_map = _mesa_pointer_hash_table_create(NULL); if (!cache->bo_map) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1319,7 +1725,7 @@ uint32_t gem_handle = anv_gem_fd_to_handle(device, fd); if (!gem_handle) { pthread_mutex_unlock(&cache->mutex); - return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE); } struct anv_cached_bo *bo = anv_bo_cache_lookup_locked(cache, gem_handle); @@ -1372,7 +1778,7 @@ if (size == (off_t)-1) { anv_gem_close(device, gem_handle); pthread_mutex_unlock(&cache->mutex); - return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE); } bo = vk_alloc(&device->alloc, sizeof(struct anv_cached_bo), 8, diff -Nru mesa-18.3.3/src/intel/vulkan/anv_android.c mesa-19.0.1/src/intel/vulkan/anv_android.c --- mesa-18.3.3/src/intel/vulkan/anv_android.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_android.c 2019-03-31 23:16:37.000000000 +0000 @@ -29,6 +29,8 @@ #include #include "anv_private.h" +#include "vk_format_info.h" +#include "vk_util.h" static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev); static int anv_hal_close(struct hw_device_t *dev); @@ -96,6 +98,317 @@ return -1; } +static VkResult +get_ahw_buffer_format_properties( + VkDevice device_h, + const struct AHardwareBuffer *buffer, + VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties) +{ + ANV_FROM_HANDLE(anv_device, device, device_h); + + /* Get a description of buffer contents . */ + AHardwareBuffer_Desc desc; + AHardwareBuffer_describe(buffer, &desc); + + /* Verify description. */ + uint64_t gpu_usage = + AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | + AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT | + AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + + /* "Buffer must be a valid Android hardware buffer object with at least + * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags." + */ + if (!(desc.usage & (gpu_usage))) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + + /* Fill properties fields based on description. */ + VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties; + + p->format = vk_format_from_android(desc.format); + + const struct anv_format *anv_format = anv_get_format(p->format); + p->externalFormat = (uint64_t) (uintptr_t) anv_format; + + /* Default to OPTIMAL tiling but set to linear in case + * of AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER usage. + */ + VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL; + + if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) + tiling = VK_IMAGE_TILING_LINEAR; + + p->formatFeatures = + anv_get_image_format_features(&device->info, p->format, anv_format, + tiling); + + /* "Images can be created with an external format even if the Android hardware + * buffer has a format which has an equivalent Vulkan format to enable + * consistent handling of images from sources that might use either category + * of format. However, all images created with an external format are subject + * to the valid usage requirements associated with external formats, even if + * the Android hardware buffer’s format has a Vulkan equivalent." + * + * "The formatFeatures member *must* include + * VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of + * VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or + * VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT" + */ + p->formatFeatures |= + VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT; + + /* "Implementations may not always be able to determine the color model, + * numerical range, or chroma offsets of the image contents, so the values + * in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions. + * Applications should treat these values as sensible defaults to use in + * the absence of more reliable information obtained through some other + * means." + */ + p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY; + p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY; + p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY; + p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY; + + p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601; + p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL; + + p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT; + p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT; + + return VK_SUCCESS; +} + +VkResult +anv_GetAndroidHardwareBufferPropertiesANDROID( + VkDevice device_h, + const struct AHardwareBuffer *buffer, + VkAndroidHardwareBufferPropertiesANDROID *pProperties) +{ + ANV_FROM_HANDLE(anv_device, dev, device_h); + struct anv_physical_device *pdevice = &dev->instance->physicalDevice; + + VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop = + vk_find_struct(pProperties->pNext, + ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID); + + /* Fill format properties of an Android hardware buffer. */ + if (format_prop) + get_ahw_buffer_format_properties(device_h, buffer, format_prop); + + /* NOTE - We support buffers with only one handle but do not error on + * multiple handle case. Reason is that we want to support YUV formats + * where we have many logical planes but they all point to the same + * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM. + */ + const native_handle_t *handle = + AHardwareBuffer_getNativeHandle(buffer); + int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1; + if (dma_buf < 0) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + + /* All memory types. */ + uint32_t memory_types = (1ull << pdevice->memory.type_count) - 1; + + pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END); + pProperties->memoryTypeBits = memory_types; + + return VK_SUCCESS; +} + +/* Construct ahw usage mask from image usage bits, see + * 'AHardwareBuffer Usage Equivalence' in Vulkan spec. + */ +uint64_t +anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create, + const VkImageUsageFlags vk_usage) +{ + uint64_t ahw_usage = 0; + + if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) + ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE; + + if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) + ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE; + + if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT; + + if (vk_create & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + ahw_usage |= AHARDWAREBUFFER_USAGE_GPU_CUBE_MAP; + + if (vk_create & VK_IMAGE_CREATE_PROTECTED_BIT) + ahw_usage |= AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT; + + /* No usage bits set - set at least one GPU usage. */ + if (ahw_usage == 0) + ahw_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE; + + return ahw_usage; +} + +VkResult +anv_GetMemoryAndroidHardwareBufferANDROID( + VkDevice device_h, + const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo, + struct AHardwareBuffer **pBuffer) +{ + ANV_FROM_HANDLE(anv_device_memory, mem, pInfo->memory); + + /* Some quotes from Vulkan spec: + * + * "If the device memory was created by importing an Android hardware + * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same + * Android hardware buffer object." + * + * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must + * have been included in VkExportMemoryAllocateInfo::handleTypes when + * memory was created." + */ + if (mem->ahw) { + *pBuffer = mem->ahw; + /* Increase refcount. */ + AHardwareBuffer_acquire(mem->ahw); + return VK_SUCCESS; + } + + return VK_ERROR_OUT_OF_HOST_MEMORY; +} + +/* + * Called from anv_AllocateMemory when import AHardwareBuffer. + */ +VkResult +anv_import_ahw_memory(VkDevice device_h, + struct anv_device_memory *mem, + const VkImportAndroidHardwareBufferInfoANDROID *info) +{ + ANV_FROM_HANDLE(anv_device, device, device_h); + + /* Import from AHardwareBuffer to anv_device_memory. */ + const native_handle_t *handle = + AHardwareBuffer_getNativeHandle(info->buffer); + + /* NOTE - We support buffers with only one handle but do not error on + * multiple handle case. Reason is that we want to support YUV formats + * where we have many logical planes but they all point to the same + * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM. + */ + int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1; + if (dma_buf < 0) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + + uint64_t bo_flags = ANV_BO_EXTERNAL; + if (device->instance->physicalDevice.supports_48bit_addresses) + bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + if (device->instance->physicalDevice.use_softpin) + bo_flags |= EXEC_OBJECT_PINNED; + + VkResult result = anv_bo_cache_import(device, &device->bo_cache, + dma_buf, bo_flags, &mem->bo); + assert(VK_SUCCESS); + + /* "If the vkAllocateMemory command succeeds, the implementation must + * acquire a reference to the imported hardware buffer, which it must + * release when the device memory object is freed. If the command fails, + * the implementation must not retain a reference." + */ + AHardwareBuffer_acquire(info->buffer); + mem->ahw = info->buffer; + + return VK_SUCCESS; +} + +VkResult +anv_create_ahw_memory(VkDevice device_h, + struct anv_device_memory *mem, + const VkMemoryAllocateInfo *pAllocateInfo) +{ + ANV_FROM_HANDLE(anv_device, dev, device_h); + + const VkMemoryDedicatedAllocateInfo *dedicated_info = + vk_find_struct_const(pAllocateInfo->pNext, + MEMORY_DEDICATED_ALLOCATE_INFO); + + uint32_t w = 0; + uint32_t h = 1; + uint32_t layers = 1; + uint32_t format = 0; + uint64_t usage = 0; + + /* If caller passed dedicated information. */ + if (dedicated_info && dedicated_info->image) { + ANV_FROM_HANDLE(anv_image, image, dedicated_info->image); + w = image->extent.width; + h = image->extent.height; + layers = image->array_size; + format = android_format_from_vk(image->vk_format); + usage = anv_ahw_usage_from_vk_usage(image->create_flags, image->usage); + } else if (dedicated_info && dedicated_info->buffer) { + ANV_FROM_HANDLE(anv_buffer, buffer, dedicated_info->buffer); + w = buffer->size; + format = AHARDWAREBUFFER_FORMAT_BLOB; + usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | + AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; + } else { + w = pAllocateInfo->allocationSize; + format = AHARDWAREBUFFER_FORMAT_BLOB; + usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | + AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; + } + + struct AHardwareBuffer *ahw = NULL; + struct AHardwareBuffer_Desc desc = { + .width = w, + .height = h, + .layers = layers, + .format = format, + .usage = usage, + }; + + if (AHardwareBuffer_allocate(&desc, &ahw) != 0) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + mem->ahw = ahw; + + return VK_SUCCESS; +} + +VkResult +anv_image_from_external( + VkDevice device_h, + const VkImageCreateInfo *base_info, + const struct VkExternalMemoryImageCreateInfo *create_info, + const VkAllocationCallbacks *alloc, + VkImage *out_image_h) +{ + ANV_FROM_HANDLE(anv_device, device, device_h); + + const struct VkExternalFormatANDROID *ext_info = + vk_find_struct_const(base_info->pNext, EXTERNAL_FORMAT_ANDROID); + + if (ext_info && ext_info->externalFormat != 0) { + assert(base_info->format == VK_FORMAT_UNDEFINED); + assert(base_info->imageType == VK_IMAGE_TYPE_2D); + assert(base_info->usage == VK_IMAGE_USAGE_SAMPLED_BIT); + assert(base_info->tiling == VK_IMAGE_TILING_OPTIMAL); + } + + struct anv_image_create_info anv_info = { + .vk_info = base_info, + .isl_extra_usage_flags = ISL_SURF_USAGE_DISABLE_AUX_BIT, + .external_format = true, + }; + + VkImage image_h; + VkResult result = anv_image_create(device_h, &anv_info, alloc, &image_h); + if (result != VK_SUCCESS) + return result; + + *out_image_h = image_h; + + return VK_SUCCESS; +} + VkResult anv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info, @@ -117,7 +430,7 @@ if (gralloc_info->handle->numFds != 1) { return vk_errorf(device->instance, device, - VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + VK_ERROR_INVALID_EXTERNAL_HANDLE, "VkNativeBufferANDROID::handle::numFds is %d, " "expected 1", gralloc_info->handle->numFds); } @@ -153,13 +466,13 @@ break; case -1: result = vk_errorf(device->instance, device, - VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + VK_ERROR_INVALID_EXTERNAL_HANDLE, "DRM_IOCTL_I915_GEM_GET_TILING failed for " "VkNativeBufferANDROID"); goto fail_tiling; default: result = vk_errorf(device->instance, device, - VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + VK_ERROR_INVALID_EXTERNAL_HANDLE, "DRM_IOCTL_I915_GEM_GET_TILING returned unknown " "tiling %d for VkNativeBufferANDROID", i915_tiling); goto fail_tiling; @@ -181,7 +494,7 @@ if (bo->size < image->size) { result = vk_errorf(device->instance, device, - VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + VK_ERROR_INVALID_EXTERNAL_HANDLE, "dma-buf from VkNativeBufferANDROID is too small for " "VkImage: %"PRIu64"B < %"PRIu64"B", bo->size, image->size); @@ -247,16 +560,16 @@ * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. */ - const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, + const VkPhysicalDeviceImageFormatInfo2 image_format_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, .format = format, .type = VK_IMAGE_TYPE_2D, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = imageUsage, }; - VkImageFormatProperties2KHR image_format_props = { - .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, + VkImageFormatProperties2 image_format_props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, }; /* Check that requested format and usage are supported. */ diff -Nru mesa-18.3.3/src/intel/vulkan/anv_android.h mesa-19.0.1/src/intel/vulkan/anv_android.h --- mesa-18.3.3/src/intel/vulkan/anv_android.h 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_android.h 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,57 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef ANV_ANDROID_H +#define ANV_ANDROID_H + +#include +#include +#include + +struct anv_device_memory; +struct anv_device; +struct anv_image; + +VkResult anv_image_from_gralloc(VkDevice device_h, + const VkImageCreateInfo *base_info, + const VkNativeBufferANDROID *gralloc_info, + const VkAllocationCallbacks *alloc, + VkImage *pImage); + +VkResult anv_image_from_external(VkDevice device_h, + const VkImageCreateInfo *base_info, + const struct VkExternalMemoryImageCreateInfo *create_info, + const VkAllocationCallbacks *alloc, + VkImage *out_image_h); + +uint64_t anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create, + const VkImageUsageFlags vk_usage); + +VkResult anv_import_ahw_memory(VkDevice device_h, + struct anv_device_memory *mem, + const VkImportAndroidHardwareBufferInfoANDROID *info); + +VkResult anv_create_ahw_memory(VkDevice device_h, + struct anv_device_memory *mem, + const VkMemoryAllocateInfo *pAllocateInfo); +#endif /* ANV_ANDROID_H */ diff -Nru mesa-18.3.3/src/intel/vulkan/anv_android_stubs.c mesa-19.0.1/src/intel/vulkan/anv_android_stubs.c --- mesa-18.3.3/src/intel/vulkan/anv_android_stubs.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_android_stubs.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,67 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_android.h" + +VkResult +anv_image_from_gralloc(VkDevice device_h, + const VkImageCreateInfo *base_info, + const VkNativeBufferANDROID *gralloc_info, + const VkAllocationCallbacks *alloc, + VkImage *pImage) +{ + return VK_ERROR_EXTENSION_NOT_PRESENT; +} + +uint64_t +anv_ahw_usage_from_vk_usage(const VkImageCreateFlags vk_create, + const VkImageUsageFlags vk_usage) +{ + return 0; +} + +VkResult +anv_import_ahw_memory(VkDevice device_h, + struct anv_device_memory *mem, + const VkImportAndroidHardwareBufferInfoANDROID *info) +{ + return VK_ERROR_EXTENSION_NOT_PRESENT; +} + +VkResult +anv_create_ahw_memory(VkDevice device_h, + struct anv_device_memory *mem, + const VkMemoryAllocateInfo *pAllocateInfo) +{ + return VK_ERROR_EXTENSION_NOT_PRESENT; +} + +VkResult +anv_image_from_external(VkDevice device_h, + const VkImageCreateInfo *base_info, + const struct VkExternalMemoryImageCreateInfo *create_info, + const VkAllocationCallbacks *alloc, + VkImage *out_image_h) +{ + return VK_ERROR_EXTENSION_NOT_PRESENT; +} diff -Nru mesa-18.3.3/src/intel/vulkan/anv_batch_chain.c mesa-19.0.1/src/intel/vulkan/anv_batch_chain.c --- mesa-18.3.3/src/intel/vulkan/anv_batch_chain.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_batch_chain.c 2019-03-31 23:16:37.000000000 +0000 @@ -75,8 +75,7 @@ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } - list->deps = _mesa_set_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + list->deps = _mesa_pointer_set_create(NULL); if (!list->deps) { vk_free(alloc, list->relocs); @@ -501,7 +500,7 @@ { struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states); return (struct anv_address) { - .bo = &anv_binding_table_pool(cmd_buffer->device)->block_pool.bo, + .bo = anv_binding_table_pool(cmd_buffer->device)->block_pool.bo, .offset = bt_block->offset, }; } @@ -679,8 +678,8 @@ return (struct anv_state) { 0 }; state.offset = cmd_buffer->bt_next; - state.map = anv_binding_table_pool(device)->block_pool.map + - bt_block->offset + state.offset; + state.map = anv_block_pool_map(&anv_binding_table_pool(device)->block_pool, + bt_block->offset + state.offset); cmd_buffer->bt_next += state.alloc_size; @@ -1037,6 +1036,12 @@ } static VkResult +anv_execbuf_add_bo_set(struct anv_execbuf *exec, + struct set *deps, + uint32_t extra_flags, + const VkAllocationCallbacks *alloc); + +static VkResult anv_execbuf_add_bo(struct anv_execbuf *exec, struct anv_bo *bo, struct anv_reloc_list *relocs, @@ -1125,36 +1130,46 @@ } } - if (relocs->deps && relocs->deps->entries > 0) { - const uint32_t entries = relocs->deps->entries; - struct anv_bo **bos = - vk_alloc(alloc, entries * sizeof(*bos), - 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (bos == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + return anv_execbuf_add_bo_set(exec, relocs->deps, extra_flags, alloc); + } - struct anv_bo **bo = bos; - set_foreach(relocs->deps, entry) { - *bo++ = (void *)entry->key; - } + return VK_SUCCESS; +} - qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles); +/* Add BO dependencies to execbuf */ +static VkResult +anv_execbuf_add_bo_set(struct anv_execbuf *exec, + struct set *deps, + uint32_t extra_flags, + const VkAllocationCallbacks *alloc) +{ + if (!deps || deps->entries <= 0) + return VK_SUCCESS; - VkResult result = VK_SUCCESS; - for (bo = bos; bo < bos + entries; bo++) { - result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc); - if (result != VK_SUCCESS) - break; - } + const uint32_t entries = deps->entries; + struct anv_bo **bos = + vk_alloc(alloc, entries * sizeof(*bos), + 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (bos == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - vk_free(alloc, bos); + struct anv_bo **bo = bos; + set_foreach(deps, entry) { + *bo++ = (void *)entry->key; + } - if (result != VK_SUCCESS) - return result; - } + qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles); + + VkResult result = VK_SUCCESS; + for (bo = bos; bo < bos + entries; bo++) { + result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc); + if (result != VK_SUCCESS) + break; } - return VK_SUCCESS; + vk_free(alloc, bos); + + return result; } static VkResult @@ -1228,7 +1243,7 @@ * relocations that point to the pool bo with the correct offset. */ for (size_t i = 0; i < relocs->num_relocs; i++) { - if (relocs->reloc_bos[i] == &pool->block_pool.bo) { + if (relocs->reloc_bos[i] == pool->block_pool.bo) { /* Adjust the delta value in the relocation to correctly * correspond to the new delta. Initially, this value may have * been negative (if treated as unsigned), but we trust in @@ -1336,7 +1351,7 @@ * given time. The only option is to always relocate them. */ anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs, - &cmd_buffer->device->surface_state_pool.block_pool.bo, + cmd_buffer->device->surface_state_pool.block_pool.bo, true /* always relocate surface states */); /* Since we own all of the batch buffers, we know what values are stored @@ -1365,11 +1380,55 @@ adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs, cmd_buffer->last_ss_pool_center); - VkResult result = anv_execbuf_add_bo(execbuf, &ss_pool->block_pool.bo, - &cmd_buffer->surface_relocs, 0, - &cmd_buffer->device->alloc); - if (result != VK_SUCCESS) - return result; + VkResult result; + struct anv_bo *bo; + if (cmd_buffer->device->instance->physicalDevice.use_softpin) { + anv_block_pool_foreach_bo(bo, &ss_pool->block_pool) { + result = anv_execbuf_add_bo(execbuf, bo, NULL, 0, + &cmd_buffer->device->alloc); + if (result != VK_SUCCESS) + return result; + } + /* Add surface dependencies (BOs) to the execbuf */ + anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0, + &cmd_buffer->device->alloc); + + struct anv_block_pool *pool; + pool = &cmd_buffer->device->dynamic_state_pool.block_pool; + anv_block_pool_foreach_bo(bo, pool) { + result = anv_execbuf_add_bo(execbuf, bo, NULL, 0, + &cmd_buffer->device->alloc); + if (result != VK_SUCCESS) + return result; + } + + pool = &cmd_buffer->device->instruction_state_pool.block_pool; + anv_block_pool_foreach_bo(bo, pool) { + result = anv_execbuf_add_bo(execbuf, bo, NULL, 0, + &cmd_buffer->device->alloc); + if (result != VK_SUCCESS) + return result; + } + + pool = &cmd_buffer->device->binding_table_pool.block_pool; + anv_block_pool_foreach_bo(bo, pool) { + result = anv_execbuf_add_bo(execbuf, bo, NULL, 0, + &cmd_buffer->device->alloc); + if (result != VK_SUCCESS) + return result; + } + } else { + /* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs + * will get added automatically by processing relocations on the batch + * buffer. We have to add the surface state BO manually because it has + * relocations of its own that we need to be sure are processsed. + */ + result = anv_execbuf_add_bo(execbuf, ss_pool->block_pool.bo, + &cmd_buffer->surface_relocs, 0, + &cmd_buffer->device->alloc); + if (result != VK_SUCCESS) + return result; + } /* First, we walk over all of the bos we've seen and add them and their * relocations to the validate list. diff -Nru mesa-18.3.3/src/intel/vulkan/anv_blorp.c mesa-19.0.1/src/intel/vulkan/anv_blorp.c --- mesa-18.3.3/src/intel/vulkan/anv_blorp.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_blorp.c 2019-03-31 23:16:37.000000000 +0000 @@ -24,10 +24,11 @@ #include "anv_private.h" static bool -lookup_blorp_shader(struct blorp_context *blorp, +lookup_blorp_shader(struct blorp_batch *batch, const void *key, uint32_t key_size, uint32_t *kernel_out, void *prog_data_out) { + struct blorp_context *blorp = batch->blorp; struct anv_device *device = blorp->driver_ctx; /* The default cache must be a real cache */ @@ -50,13 +51,14 @@ } static bool -upload_blorp_shader(struct blorp_context *blorp, +upload_blorp_shader(struct blorp_batch *batch, const void *key, uint32_t key_size, const void *kernel, uint32_t kernel_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, uint32_t *kernel_out, void *prog_data_out) { + struct blorp_context *blorp = batch->blorp; struct anv_device *device = blorp->driver_ctx; /* The blorp cache must be a real cache */ @@ -71,7 +73,8 @@ anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache, key, key_size, kernel, kernel_size, NULL, 0, - prog_data, prog_data_size, &bind_map); + prog_data, prog_data_size, + NULL, &bind_map); if (!bin) return false; @@ -473,6 +476,8 @@ copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, srcImageLayout, regionCount, pRegions, false); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } static bool @@ -680,6 +685,8 @@ } blorp_batch_finish(&batch); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } void anv_CmdUpdateBuffer( @@ -716,10 +723,8 @@ memcpy(tmp_data.map, pData, copy_size); - anv_state_flush(cmd_buffer->device, tmp_data); - struct blorp_address src = { - .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo, .offset = tmp_data.offset, .mocs = cmd_buffer->device->default_mocs, }; @@ -737,6 +742,8 @@ } blorp_batch_finish(&batch); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } void anv_CmdFillBuffer( @@ -824,6 +831,8 @@ } blorp_batch_finish(&batch); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } void anv_CmdClearColorImage( @@ -1144,8 +1153,12 @@ * trash our depth and stencil buffers. */ struct blorp_batch batch; - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, - BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); + enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL; + if (cmd_buffer->state.conditional_render_enabled) { + anv_cmd_emit_conditional_render_predicate(cmd_buffer); + flags |= BLORP_BATCH_PREDICATE_ENABLE; + } + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, flags); for (uint32_t a = 0; a < attachmentCount; ++a) { if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { @@ -1169,63 +1182,52 @@ SUBPASS_STAGE_RESOLVE, }; -static void -resolve_surface(struct blorp_batch *batch, - struct blorp_surf *src_surf, - uint32_t src_level, uint32_t src_layer, - struct blorp_surf *dst_surf, - uint32_t dst_level, uint32_t dst_layer, - uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y, - uint32_t width, uint32_t height, - enum blorp_filter filter) -{ - blorp_blit(batch, - src_surf, src_level, src_layer, - ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, - dst_surf, dst_level, dst_layer, - ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, - src_x, src_y, src_x + width, src_y + height, - dst_x, dst_y, dst_x + width, dst_y + height, - filter, false, false); -} - -static void -resolve_image(struct anv_device *device, - struct blorp_batch *batch, - const struct anv_image *src_image, - VkImageLayout src_image_layout, - uint32_t src_level, uint32_t src_layer, - const struct anv_image *dst_image, - VkImageLayout dst_image_layout, - uint32_t dst_level, uint32_t dst_layer, - VkImageAspectFlags aspect_mask, - uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y, - uint32_t width, uint32_t height) +void +anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *src_image, + enum isl_aux_usage src_aux_usage, + uint32_t src_level, uint32_t src_base_layer, + const struct anv_image *dst_image, + enum isl_aux_usage dst_aux_usage, + uint32_t dst_level, uint32_t dst_base_layer, + VkImageAspectFlagBits aspect, + uint32_t src_x, uint32_t src_y, + uint32_t dst_x, uint32_t dst_y, + uint32_t width, uint32_t height, + uint32_t layer_count, + enum blorp_filter filter) { - struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; + struct blorp_batch batch; + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); assert(src_image->type == VK_IMAGE_TYPE_2D); assert(src_image->samples > 1); assert(dst_image->type == VK_IMAGE_TYPE_2D); assert(dst_image->samples == 1); assert(src_image->n_planes == dst_image->n_planes); + assert(!src_image->format->can_ycbcr); + assert(!dst_image->format->can_ycbcr); - uint32_t aspect_bit; - - anv_foreach_image_aspect_bit(aspect_bit, src_image, aspect_mask) { - struct blorp_surf src_surf, dst_surf; - get_blorp_surf_for_anv_image(device, src_image, 1UL << aspect_bit, - src_image_layout, ISL_AUX_USAGE_NONE, - &src_surf); - get_blorp_surf_for_anv_image(device, dst_image, 1UL << aspect_bit, - dst_image_layout, ISL_AUX_USAGE_NONE, - &dst_surf); - anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, - 1UL << aspect_bit, - dst_surf.aux_usage, - dst_level, dst_layer, 1); - - enum blorp_filter filter; + struct blorp_surf src_surf, dst_surf; + get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect, + ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + src_aux_usage, &src_surf); + if (src_aux_usage == ISL_AUX_USAGE_MCS) { + src_surf.clear_color_addr = anv_to_blorp_address( + anv_image_get_clear_color_addr(cmd_buffer->device, src_image, + VK_IMAGE_ASPECT_COLOR_BIT)); + } + get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect, + ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + dst_aux_usage, &dst_surf); + anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, + aspect, dst_aux_usage, + dst_level, dst_base_layer, layer_count); + + if (filter == BLORP_FILTER_NONE) { + /* If no explicit filter is provided, then it's implied by the type of + * the source image. + */ if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) || (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) || isl_format_has_int_channel(src_surf.surf->format)) { @@ -1233,15 +1235,20 @@ } else { filter = BLORP_FILTER_AVERAGE; } + } - assert(!src_image->format->can_ycbcr); - assert(!dst_image->format->can_ycbcr); - - resolve_surface(batch, - &src_surf, src_level, src_layer, - &dst_surf, dst_level, dst_layer, - src_x, src_y, dst_x, dst_y, width, height, filter); + for (uint32_t l = 0; l < layer_count; l++) { + blorp_blit(&batch, + &src_surf, src_level, src_base_layer + l, + ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, + &dst_surf, dst_level, dst_base_layer + l, + ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, + src_x, src_y, src_x + width, src_y + height, + dst_x, dst_y, dst_x + width, dst_y + height, + filter, false, false); } + + blorp_batch_finish(&batch); } void anv_CmdResolveImage( @@ -1257,8 +1264,7 @@ ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dst_image, dstImage); - struct blorp_batch batch; - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + assert(!src_image->format->can_ycbcr); for (uint32_t r = 0; r < regionCount; r++) { assert(pRegions[r].srcSubresource.aspectMask == @@ -1269,27 +1275,38 @@ const uint32_t layer_count = anv_get_layerCount(dst_image, &pRegions[r].dstSubresource); - VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask, - dst_mask = pRegions[r].dstSubresource.aspectMask; + VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask; + VkImageAspectFlags dst_mask = pRegions[r].dstSubresource.aspectMask; assert(anv_image_aspects_compatible(src_mask, dst_mask)); - for (uint32_t layer = 0; layer < layer_count; layer++) { - resolve_image(cmd_buffer->device, &batch, - src_image, srcImageLayout, - pRegions[r].srcSubresource.mipLevel, - pRegions[r].srcSubresource.baseArrayLayer + layer, - dst_image, dstImageLayout, - pRegions[r].dstSubresource.mipLevel, - pRegions[r].dstSubresource.baseArrayLayer + layer, - pRegions[r].dstSubresource.aspectMask, - pRegions[r].srcOffset.x, pRegions[r].srcOffset.y, - pRegions[r].dstOffset.x, pRegions[r].dstOffset.y, - pRegions[r].extent.width, pRegions[r].extent.height); + uint32_t aspect_bit; + anv_foreach_image_aspect_bit(aspect_bit, src_image, + pRegions[r].srcSubresource.aspectMask) { + enum isl_aux_usage src_aux_usage = + anv_layout_to_aux_usage(&cmd_buffer->device->info, src_image, + (1 << aspect_bit), srcImageLayout); + enum isl_aux_usage dst_aux_usage = + anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_image, + (1 << aspect_bit), dstImageLayout); + + anv_image_msaa_resolve(cmd_buffer, + src_image, src_aux_usage, + pRegions[r].srcSubresource.mipLevel, + pRegions[r].srcSubresource.baseArrayLayer, + dst_image, dst_aux_usage, + pRegions[r].dstSubresource.mipLevel, + pRegions[r].dstSubresource.baseArrayLayer, + (1 << aspect_bit), + pRegions[r].srcOffset.x, + pRegions[r].srcOffset.y, + pRegions[r].dstOffset.x, + pRegions[r].dstOffset.y, + pRegions[r].extent.width, + pRegions[r].extent.height, + layer_count, BLORP_FILTER_NONE); } } - - blorp_batch_finish(&batch); } static enum isl_aux_usage @@ -1304,115 +1321,6 @@ } void -anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) -{ - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_subpass *subpass = cmd_buffer->state.subpass; - - if (subpass->has_resolve) { - struct blorp_batch batch; - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); - - /* We are about to do some MSAA resolves. We need to flush so that the - * result of writes to the MSAA color attachments show up in the sampler - * when we blit to the single-sampled resolve target. - */ - cmd_buffer->state.pending_pipe_bits |= - ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; - - for (uint32_t i = 0; i < subpass->color_count; ++i) { - uint32_t src_att = subpass->color_attachments[i].attachment; - uint32_t dst_att = subpass->resolve_attachments[i].attachment; - - if (dst_att == VK_ATTACHMENT_UNUSED) - continue; - - assert(src_att < cmd_buffer->state.pass->attachment_count); - assert(dst_att < cmd_buffer->state.pass->attachment_count); - - if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) { - /* From the Vulkan 1.0 spec: - * - * If the first use of an attachment in a render pass is as a - * resolve attachment, then the loadOp is effectively ignored - * as the resolve is guaranteed to overwrite all pixels in the - * render area. - */ - cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0; - } - - struct anv_image_view *src_iview = fb->attachments[src_att]; - struct anv_image_view *dst_iview = fb->attachments[dst_att]; - - enum isl_aux_usage src_aux_usage = - cmd_buffer->state.attachments[src_att].aux_usage; - enum isl_aux_usage dst_aux_usage = - cmd_buffer->state.attachments[dst_att].aux_usage; - - const VkRect2D render_area = cmd_buffer->state.render_area; - - assert(src_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && - dst_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT); - - enum blorp_filter filter; - if (isl_format_has_int_channel(src_iview->planes[0].isl.format)) { - filter = BLORP_FILTER_SAMPLE_0; - } else { - filter = BLORP_FILTER_AVERAGE; - } - - struct blorp_surf src_surf, dst_surf; - get_blorp_surf_for_anv_image(cmd_buffer->device, src_iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - ANV_IMAGE_LAYOUT_EXPLICIT_AUX, - src_aux_usage, &src_surf); - if (src_aux_usage == ISL_AUX_USAGE_MCS) { - src_surf.clear_color_addr = anv_to_blorp_address( - anv_image_get_clear_color_addr(cmd_buffer->device, - src_iview->image, - VK_IMAGE_ASPECT_COLOR_BIT)); - } - get_blorp_surf_for_anv_image(cmd_buffer->device, dst_iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - ANV_IMAGE_LAYOUT_EXPLICIT_AUX, - dst_aux_usage, &dst_surf); - - uint32_t base_src_layer = src_iview->planes[0].isl.base_array_layer; - uint32_t base_dst_layer = dst_iview->planes[0].isl.base_array_layer; - - assert(src_iview->planes[0].isl.array_len >= fb->layers); - assert(dst_iview->planes[0].isl.array_len >= fb->layers); - - anv_cmd_buffer_mark_image_written(cmd_buffer, dst_iview->image, - VK_IMAGE_ASPECT_COLOR_BIT, - dst_surf.aux_usage, - dst_iview->planes[0].isl.base_level, - base_dst_layer, fb->layers); - - assert(!src_iview->image->format->can_ycbcr); - assert(!dst_iview->image->format->can_ycbcr); - - for (uint32_t i = 0; i < fb->layers; i++) { - resolve_surface(&batch, - &src_surf, - src_iview->planes[0].isl.base_level, - base_src_layer + i, - &dst_surf, - dst_iview->planes[0].isl.base_level, - base_dst_layer + i, - render_area.offset.x, render_area.offset.y, - render_area.offset.x, render_area.offset.y, - render_area.extent.width, render_area.extent.height, - filter); - } - } - - blorp_batch_finish(&batch); - } -} - -void anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, uint32_t base_level, uint32_t level_count, diff -Nru mesa-18.3.3/src/intel/vulkan/anv_cmd_buffer.c mesa-19.0.1/src/intel/vulkan/anv_cmd_buffer.c --- mesa-18.3.3/src/intel/vulkan/anv_cmd_buffer.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_cmd_buffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -128,8 +128,13 @@ anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer, struct anv_cmd_pipeline_state *pipe_state) { - for (uint32_t i = 0; i < ARRAY_SIZE(pipe_state->push_descriptors); i++) - vk_free(&cmd_buffer->pool->alloc, pipe_state->push_descriptors[i]); + for (uint32_t i = 0; i < ARRAY_SIZE(pipe_state->push_descriptors); i++) { + if (pipe_state->push_descriptors[i]) { + anv_descriptor_set_layout_unref(cmd_buffer->device, + pipe_state->push_descriptors[i]->set.layout); + vk_free(&cmd_buffer->pool->alloc, pipe_state->push_descriptors[i]); + } + } } static void @@ -377,6 +382,14 @@ level, base_layer, layer_count); } +void +anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer) +{ + anv_genX_call(&cmd_buffer->device->info, + cmd_emit_conditional_render_predicate, + cmd_buffer); +} + void anv_CmdBindPipeline( VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -645,6 +658,35 @@ } } +void anv_CmdBindTransformFeedbackBuffersEXT( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer* pBuffers, + const VkDeviceSize* pOffsets, + const VkDeviceSize* pSizes) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings; + + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ + + assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS); + for (uint32_t i = 0; i < bindingCount; i++) { + if (pBuffers[i] == VK_NULL_HANDLE) { + xfb[firstBinding + i].buffer = NULL; + } else { + ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]); + xfb[firstBinding + i].buffer = buffer; + xfb[firstBinding + i].offset = pOffsets[i]; + xfb[firstBinding + i].size = + anv_buffer_get_range(buffer, pOffsets[i], + pSizes ? pSizes[i] : VK_WHOLE_SIZE); + } + } +} + enum isl_format anv_isl_format_for_descriptor_type(VkDescriptorType type) { @@ -671,8 +713,6 @@ state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment); memcpy(state.map, data, size); - anv_state_flush(cmd_buffer->device, state); - VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size)); return state; @@ -692,8 +732,6 @@ for (uint32_t i = 0; i < dwords; i++) p[i] = a[i] | b[i]; - anv_state_flush(cmd_buffer->device, state); - VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4)); return state; @@ -754,8 +792,6 @@ for (unsigned i = 0; i < prog_data->nr_params; i++) u32_map[i] = anv_push_constant_value(data, prog_data->param[i]); - anv_state_flush(cmd_buffer->device, state); - return state; } @@ -810,8 +846,6 @@ } } - anv_state_flush(cmd_buffer->device, state); - return state; } @@ -928,10 +962,11 @@ return iview; } -static struct anv_push_descriptor_set * -anv_cmd_buffer_get_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point, - uint32_t set) +static struct anv_descriptor_set * +anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, + VkPipelineBindPoint bind_point, + struct anv_descriptor_set_layout *layout, + uint32_t _set) { struct anv_cmd_pipeline_state *pipe_state; if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { @@ -942,19 +977,31 @@ } struct anv_push_descriptor_set **push_set = - &pipe_state->push_descriptors[set]; + &pipe_state->push_descriptors[_set]; if (*push_set == NULL) { - *push_set = vk_alloc(&cmd_buffer->pool->alloc, - sizeof(struct anv_push_descriptor_set), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + *push_set = vk_zalloc(&cmd_buffer->pool->alloc, + sizeof(struct anv_push_descriptor_set), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (*push_set == NULL) { anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); return NULL; } } - return *push_set; + struct anv_descriptor_set *set = &(*push_set)->set; + + if (set->layout != layout) { + if (set->layout) + anv_descriptor_set_layout_unref(cmd_buffer->device, set->layout); + anv_descriptor_set_layout_ref(layout); + set->layout = layout; + } + set->size = anv_descriptor_set_layout_size(layout); + set->buffer_count = layout->buffer_count; + set->buffer_views = (*push_set)->buffer_views; + + return set; } void anv_CmdPushDescriptorSetKHR( @@ -972,19 +1019,12 @@ struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout; - struct anv_push_descriptor_set *push_set = - anv_cmd_buffer_get_push_descriptor_set(cmd_buffer, - pipelineBindPoint, _set); - if (!push_set) + struct anv_descriptor_set *set = + anv_cmd_buffer_push_descriptor_set(cmd_buffer, pipelineBindPoint, + set_layout, _set); + if (!set) return; - struct anv_descriptor_set *set = &push_set->set; - - set->layout = set_layout; - set->size = anv_descriptor_set_layout_size(set_layout); - set->buffer_count = set_layout->buffer_count; - set->buffer_views = push_set->buffer_views; - /* Go through the user supplied descriptors. */ for (uint32_t i = 0; i < descriptorWriteCount; i++) { const VkWriteDescriptorSet *write = &pDescriptorWrites[i]; @@ -1064,19 +1104,12 @@ struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout; - struct anv_push_descriptor_set *push_set = - anv_cmd_buffer_get_push_descriptor_set(cmd_buffer, - template->bind_point, _set); - if (!push_set) + struct anv_descriptor_set *set = + anv_cmd_buffer_push_descriptor_set(cmd_buffer, template->bind_point, + set_layout, _set); + if (!set) return; - struct anv_descriptor_set *set = &push_set->set; - - set->layout = set_layout; - set->size = anv_descriptor_set_layout_size(set_layout); - set->buffer_count = set_layout->buffer_count; - set->buffer_views = push_set->buffer_views; - anv_descriptor_set_write_template(set, cmd_buffer->device, &cmd_buffer->surface_state_stream, diff -Nru mesa-18.3.3/src/intel/vulkan/anv_descriptor_set.c mesa-19.0.1/src/intel/vulkan/anv_descriptor_set.c --- mesa-18.3.3/src/intel/vulkan/anv_descriptor_set.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_descriptor_set.c 2019-03-31 23:16:37.000000000 +0000 @@ -58,6 +58,9 @@ anv_foreach_stage(s, binding->stageFlags) surface_count[s] += sampler->n_planes; } + } else { + anv_foreach_stage(s, binding->stageFlags) + surface_count[s] += binding->descriptorCount; } break; @@ -458,6 +461,8 @@ &device->surface_state_pool, 4096); pool->surface_state_free_list = NULL; + list_inithead(&pool->desc_sets); + *pDescriptorPool = anv_descriptor_pool_to_handle(pool); return VK_SUCCESS; @@ -475,6 +480,12 @@ return; anv_state_stream_finish(&pool->surface_state_stream); + + list_for_each_entry_safe(struct anv_descriptor_set, set, + &pool->desc_sets, pool_link) { + anv_descriptor_set_destroy(device, pool, set); + } + vk_free2(&device->alloc, pAllocator, pool); } @@ -486,6 +497,11 @@ ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool); + list_for_each_entry_safe(struct anv_descriptor_set, set, + &pool->desc_sets, pool_link) { + anv_descriptor_set_destroy(device, pool, set); + } + pool->next = 0; pool->free_list = EMPTY; anv_state_stream_finish(&pool->surface_state_stream); @@ -630,6 +646,8 @@ entry->size = set->size; pool->free_list = (char *) entry - pool->data; } + + list_del(&set->pool_link); } VkResult anv_AllocateDescriptorSets( @@ -652,6 +670,8 @@ if (result != VK_SUCCESS) break; + list_addtail(&set->pool_link, &pool->desc_sets); + pDescriptorSets[i] = anv_descriptor_set_to_handle(set); } @@ -992,7 +1012,7 @@ template->entry_count = pCreateInfo->descriptorUpdateEntryCount; for (uint32_t i = 0; i < template->entry_count; i++) { - const VkDescriptorUpdateTemplateEntryKHR *pEntry = + const VkDescriptorUpdateTemplateEntry *pEntry = &pCreateInfo->pDescriptorUpdateEntries[i]; template->entries[i] = (struct anv_descriptor_template_entry) { diff -Nru mesa-18.3.3/src/intel/vulkan/anv_device.c mesa-19.0.1/src/intel/vulkan/anv_device.c --- mesa-18.3.3/src/intel/vulkan/anv_device.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_device.c 2019-03-31 23:16:37.000000000 +0000 @@ -41,6 +41,7 @@ #include "git_sha1.h" #include "vk_util.h" #include "common/gen_defines.h" +#include "compiler/glsl_types.h" #include "genxml/gen7_pack.h" @@ -60,8 +61,8 @@ va_end(args); } -static VkResult -anv_compute_heap_size(int fd, uint64_t gtt_size, uint64_t *heap_size) +static uint64_t +anv_compute_heap_size(int fd, uint64_t gtt_size) { /* Query the total ram from the system */ struct sysinfo info; @@ -83,9 +84,7 @@ */ uint64_t available_gtt = gtt_size * 3 / 4; - *heap_size = MIN2(available_ram, available_gtt); - - return VK_SUCCESS; + return MIN2(available_ram, available_gtt); } static VkResult @@ -109,10 +108,7 @@ device->supports_48bit_addresses = (device->info.gen >= 8) && gtt_size > (4ULL << 30 /* GiB */); - uint64_t heap_size = 0; - VkResult result = anv_compute_heap_size(fd, gtt_size, &heap_size); - if (result != VK_SUCCESS) - return result; + uint64_t heap_size = anv_compute_heap_size(fd, gtt_size); if (heap_size > (2ull << 30) && !device->supports_48bit_addresses) { /* When running with an overridden PCI ID, we may get a GTT size from @@ -708,6 +704,7 @@ vk_debug_report_instance_destroy(&instance->debug_report_callbacks); + _mesa_glsl_release_types(); _mesa_locale_fini(); vk_free(&instance->alloc, instance); @@ -865,7 +862,7 @@ .shaderInt64 = pdevice->info.gen >= 8 && pdevice->info.has_64bit_types, .shaderInt16 = pdevice->info.gen >= 8, - .shaderResourceMinLod = false, + .shaderResourceMinLod = pdevice->info.gen >= 9, .variableMultisampleRate = true, .inheritedQueries = true, }; @@ -893,9 +890,38 @@ vk_foreach_struct(ext, pFeatures->pNext) { switch (ext->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { - VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext; - features->protectedMemory = VK_FALSE; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: { + VkPhysicalDevice8BitStorageFeaturesKHR *features = + (VkPhysicalDevice8BitStorageFeaturesKHR *)ext; + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + features->storageBuffer8BitAccess = pdevice->info.gen >= 8; + features->uniformAndStorageBuffer8BitAccess = pdevice->info.gen >= 8; + features->storagePushConstant8 = pdevice->info.gen >= 8; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { + VkPhysicalDevice16BitStorageFeatures *features = + (VkPhysicalDevice16BitStorageFeatures *)ext; + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + features->storageBuffer16BitAccess = pdevice->info.gen >= 8; + features->uniformAndStorageBuffer16BitAccess = pdevice->info.gen >= 8; + features->storagePushConstant16 = pdevice->info.gen >= 8; + features->storageInputOutput16 = false; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { + VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = + (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext; + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + + features->conditionalRendering = pdevice->info.gen >= 8 || + pdevice->info.is_haswell; + features->inheritedConditionalRendering = pdevice->info.gen >= 8 || + pdevice->info.is_haswell; break; } @@ -908,10 +934,9 @@ break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: { - VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext; - features->variablePointersStorageBuffer = true; - features->variablePointers = true; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { + VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext; + features->protectedMemory = VK_FALSE; break; } @@ -922,32 +947,31 @@ break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: { + VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features = + (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext; + features->scalarBlockLayout = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: { VkPhysicalDeviceShaderDrawParameterFeatures *features = (void *)ext; features->shaderDrawParameters = true; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR: { - VkPhysicalDevice16BitStorageFeaturesKHR *features = - (VkPhysicalDevice16BitStorageFeaturesKHR *)ext; - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - - features->storageBuffer16BitAccess = pdevice->info.gen >= 8; - features->uniformAndStorageBuffer16BitAccess = pdevice->info.gen >= 8; - features->storagePushConstant16 = pdevice->info.gen >= 8; - features->storageInputOutput16 = false; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: { + VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext; + features->variablePointersStorageBuffer = true; + features->variablePointers = true; break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: { - VkPhysicalDevice8BitStorageFeaturesKHR *features = - (VkPhysicalDevice8BitStorageFeaturesKHR *)ext; - ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); - - features->storageBuffer8BitAccess = pdevice->info.gen >= 8; - features->uniformAndStorageBuffer8BitAccess = pdevice->info.gen >= 8; - features->storagePushConstant8 = pdevice->info.gen >= 8; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: { + VkPhysicalDeviceTransformFeedbackFeaturesEXT *features = + (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext; + features->transformFeedback = VK_TRUE; + features->geometryStreams = VK_TRUE; break; } @@ -1046,7 +1070,7 @@ 16 * devinfo->max_cs_threads, 16 * devinfo->max_cs_threads, }, - .subPixelPrecisionBits = 4 /* FIXME */, + .subPixelPrecisionBits = 8, .subTexelPrecisionBits = 4 /* FIXME */, .mipmapPrecisionBits = 4 /* FIXME */, .maxDrawIndexedIndexValue = UINT32_MAX, @@ -1126,11 +1150,31 @@ vk_foreach_struct(ext, pProperties->pNext) { switch (ext->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { - VkPhysicalDevicePushDescriptorPropertiesKHR *properties = - (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: { + VkPhysicalDeviceDepthStencilResolvePropertiesKHR *props = + (VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext; + + /* We support all of the depth resolve modes */ + props->supportedDepthResolveModes = + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR | + VK_RESOLVE_MODE_AVERAGE_BIT_KHR | + VK_RESOLVE_MODE_MIN_BIT_KHR | + VK_RESOLVE_MODE_MAX_BIT_KHR; + + /* Average doesn't make sense for stencil so we don't support that */ + props->supportedStencilResolveModes = + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR; + if (pdevice->info.gen >= 8) { + /* The advanced stencil resolve modes currently require stencil + * sampling be supported by the hardware. + */ + props->supportedStencilResolveModes |= + VK_RESOLVE_MODE_MIN_BIT_KHR | + VK_RESOLVE_MODE_MAX_BIT_KHR; + } - properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; + props->independentResolveNone = VK_TRUE; + props->independentResolve = VK_TRUE; break; } @@ -1201,6 +1245,21 @@ break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { + VkPhysicalDeviceProtectedMemoryProperties *props = + (VkPhysicalDeviceProtectedMemoryProperties *)ext; + props->protectedNoFault = false; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { + VkPhysicalDevicePushDescriptorPropertiesKHR *properties = + (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; + + properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: { VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties = (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext; @@ -1233,6 +1292,23 @@ break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: { + VkPhysicalDeviceTransformFeedbackPropertiesEXT *props = + (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext; + + props->maxTransformFeedbackStreams = MAX_XFB_STREAMS; + props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS; + props->maxTransformFeedbackBufferSize = (1ull << 32); + props->maxTransformFeedbackStreamDataSize = 128 * 4; + props->maxTransformFeedbackBufferDataSize = 128 * 4; + props->maxTransformFeedbackBufferDataStride = 2048; + props->transformFeedbackQueries = VK_TRUE; + props->transformFeedbackStreamsLinesTriangles = VK_FALSE; + props->transformFeedbackRasterizationStreamSelect = VK_FALSE; + props->transformFeedbackDraw = VK_TRUE; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props = (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext; @@ -1241,13 +1317,6 @@ break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { - VkPhysicalDeviceProtectedMemoryProperties *props = - (VkPhysicalDeviceProtectedMemoryProperties *)ext; - props->protectedNoFault = false; - break; - } - default: anv_debug_ignored_stype(ext->sType); break; @@ -1477,8 +1546,6 @@ state = anv_state_pool_alloc(pool, size, align); memcpy(state.map, p, size); - anv_state_flush(pool->block_pool.device, state); - return state; } @@ -2264,6 +2331,7 @@ mem->type = &pdevice->memory.types[pAllocateInfo->memoryTypeIndex]; mem->map = NULL; mem->map_size = 0; + mem->ahw = NULL; uint64_t bo_flags = 0; @@ -2286,6 +2354,43 @@ if (pdevice->use_softpin) bo_flags |= EXEC_OBJECT_PINNED; + const VkExportMemoryAllocateInfo *export_info = + vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO); + + /* Check if we need to support Android HW buffer export. If so, + * create AHardwareBuffer and import memory from it. + */ + bool android_export = false; + if (export_info && export_info->handleTypes & + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) + android_export = true; + + /* Android memory import. */ + const struct VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = + vk_find_struct_const(pAllocateInfo->pNext, + IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID); + + if (ahw_import_info) { + result = anv_import_ahw_memory(_device, mem, ahw_import_info); + if (result != VK_SUCCESS) + goto fail; + + goto success; + } else if (android_export) { + result = anv_create_ahw_memory(_device, mem, pAllocateInfo); + if (result != VK_SUCCESS) + goto fail; + + const struct VkImportAndroidHardwareBufferInfoANDROID import_info = { + .buffer = mem->ahw, + }; + result = anv_import_ahw_memory(_device, mem, &import_info); + if (result != VK_SUCCESS) + goto fail; + + goto success; + } + const VkImportMemoryFdInfoKHR *fd_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); @@ -2317,9 +2422,9 @@ */ if (mem->bo->size < aligned_alloc_size) { result = vk_errorf(device->instance, device, - VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + VK_ERROR_INVALID_EXTERNAL_HANDLE, "aligned allocationSize too large for " - "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: " + "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: " "%"PRIu64"B > %"PRIu64"B", aligned_alloc_size, mem->bo->size); anv_bo_cache_release(device, &device->bo_cache, mem->bo); @@ -2336,42 +2441,44 @@ * If the import fails, we leave the file descriptor open. */ close(fd_info->fd); - } else { - const VkExportMemoryAllocateInfoKHR *fd_info = - vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR); - if (fd_info && fd_info->handleTypes) - bo_flags |= ANV_BO_EXTERNAL; - - result = anv_bo_cache_alloc(device, &device->bo_cache, - pAllocateInfo->allocationSize, bo_flags, - &mem->bo); - if (result != VK_SUCCESS) - goto fail; + goto success; + } - const VkMemoryDedicatedAllocateInfoKHR *dedicated_info = - vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR); - if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) { - ANV_FROM_HANDLE(anv_image, image, dedicated_info->image); + /* Regular allocate (not importing memory). */ - /* Some legacy (non-modifiers) consumers need the tiling to be set on - * the BO. In this case, we have a dedicated allocation. - */ - if (image->needs_set_tiling) { - const uint32_t i915_tiling = - isl_tiling_to_i915_tiling(image->planes[0].surface.isl.tiling); - int ret = anv_gem_set_tiling(device, mem->bo->gem_handle, - image->planes[0].surface.isl.row_pitch_B, - i915_tiling); - if (ret) { - anv_bo_cache_release(device, &device->bo_cache, mem->bo); - return vk_errorf(device->instance, NULL, - VK_ERROR_OUT_OF_DEVICE_MEMORY, - "failed to set BO tiling: %m"); - } + if (export_info && export_info->handleTypes) + bo_flags |= ANV_BO_EXTERNAL; + + result = anv_bo_cache_alloc(device, &device->bo_cache, + pAllocateInfo->allocationSize, bo_flags, + &mem->bo); + if (result != VK_SUCCESS) + goto fail; + + const VkMemoryDedicatedAllocateInfo *dedicated_info = + vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); + if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) { + ANV_FROM_HANDLE(anv_image, image, dedicated_info->image); + + /* Some legacy (non-modifiers) consumers need the tiling to be set on + * the BO. In this case, we have a dedicated allocation. + */ + if (image->needs_set_tiling) { + const uint32_t i915_tiling = + isl_tiling_to_i915_tiling(image->planes[0].surface.isl.tiling); + int ret = anv_gem_set_tiling(device, mem->bo->gem_handle, + image->planes[0].surface.isl.row_pitch_B, + i915_tiling); + if (ret) { + anv_bo_cache_release(device, &device->bo_cache, mem->bo); + return vk_errorf(device->instance, NULL, + VK_ERROR_OUT_OF_DEVICE_MEMORY, + "failed to set BO tiling: %m"); } } } + success: *pMem = anv_device_memory_to_handle(mem); return VK_SUCCESS; @@ -2400,7 +2507,7 @@ VkResult anv_GetMemoryFdPropertiesKHR( VkDevice _device, - VkExternalMemoryHandleTypeFlagBitsKHR handleType, + VkExternalMemoryHandleTypeFlagBits handleType, int fd, VkMemoryFdPropertiesKHR* pMemoryFdProperties) { @@ -2442,6 +2549,11 @@ anv_bo_cache_release(device, &device->bo_cache, mem->bo); +#ifdef ANDROID + if (mem->ahw) + AHardwareBuffer_release(mem->ahw); +#endif + vk_free2(&device->alloc, pAllocator, mem); } @@ -2663,6 +2775,12 @@ */ uint32_t memory_types = (1ull << pdevice->memory.type_count) - 1; + /* We must have image allocated or imported at this point. According to the + * specification, external images must have been bound to memory before + * calling GetImageMemoryRequirements. + */ + assert(image->size > 0); + pMemoryRequirements->size = image->size; pMemoryRequirements->alignment = image->alignment; pMemoryRequirements->memoryTypeBits = memory_types; @@ -2683,8 +2801,8 @@ switch (ext->sType) { case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO: { struct anv_physical_device *pdevice = &device->instance->physicalDevice; - const VkImagePlaneMemoryRequirementsInfoKHR *plane_reqs = - (const VkImagePlaneMemoryRequirementsInfoKHR *) ext; + const VkImagePlaneMemoryRequirementsInfo *plane_reqs = + (const VkImagePlaneMemoryRequirementsInfo *) ext; uint32_t plane = anv_image_aspect_to_plane(image->aspects, plane_reqs->planeAspect); @@ -2703,6 +2821,12 @@ pMemoryRequirements->memoryRequirements.memoryTypeBits = (1ull << pdevice->memory.type_count) - 1; + /* We must have image allocated or imported at this point. According to the + * specification, external images must have been bound to memory before + * calling GetImageMemoryRequirements. + */ + assert(image->planes[plane].size > 0); + pMemoryRequirements->memoryRequirements.size = image->planes[plane].size; pMemoryRequirements->memoryRequirements.alignment = image->planes[plane].alignment; @@ -2719,7 +2843,7 @@ switch (ext->sType) { case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { VkMemoryDedicatedRequirements *requirements = (void *)ext; - if (image->needs_set_tiling) { + if (image->needs_set_tiling || image->external_format) { /* If we need to set the tiling for external consumers, we need a * dedicated allocation. * @@ -2981,8 +3105,6 @@ .size_B = range, .format = format, .stride_B = stride); - - anv_state_flush(device, state); } void anv_DestroySampler( diff -Nru mesa-18.3.3/src/intel/vulkan/anv_dump.c mesa-19.0.1/src/intel/vulkan/anv_dump.c --- mesa-18.3.3/src/intel/vulkan/anv_dump.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_dump.c 2019-03-31 23:16:37.000000000 +0000 @@ -429,9 +429,9 @@ case VK_IMAGE_ASPECT_COLOR_BIT: suffix = "c"; break; case VK_IMAGE_ASPECT_DEPTH_BIT: suffix = "d"; break; case VK_IMAGE_ASPECT_STENCIL_BIT: suffix = "s"; break; - case VK_IMAGE_ASPECT_PLANE_0_BIT_KHR: suffix = "c0"; break; - case VK_IMAGE_ASPECT_PLANE_1_BIT_KHR: suffix = "c1"; break; - case VK_IMAGE_ASPECT_PLANE_2_BIT_KHR: suffix = "c2"; break; + case VK_IMAGE_ASPECT_PLANE_0_BIT: suffix = "c0"; break; + case VK_IMAGE_ASPECT_PLANE_1_BIT: suffix = "c1"; break; + case VK_IMAGE_ASPECT_PLANE_2_BIT: suffix = "c2"; break; default: unreachable("Invalid aspect"); } diff -Nru mesa-18.3.3/src/intel/vulkan/anv_extensions.py mesa-19.0.1/src/intel/vulkan/anv_extensions.py --- mesa-18.3.3/src/intel/vulkan/anv_extensions.py 2019-01-13 21:16:37.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_extensions.py 2019-03-31 23:16:37.000000000 +0000 @@ -47,7 +47,7 @@ self.version = version self.enable = _bool_to_c_expr(enable) -API_PATCH_VERSION = 90 +API_PATCH_VERSION = 96 # Supported API versions. Each one is the maximum patch version for the given # version. Version come in increasing order and each version is available if @@ -69,15 +69,19 @@ # the those extension strings, then tests dEQP-VK.api.info.instance.extensions # and dEQP-VK.api.info.device fail due to the duplicated strings. EXTENSIONS = [ + Extension('VK_ANDROID_external_memory_android_hardware_buffer', 3, 'ANDROID'), Extension('VK_ANDROID_native_buffer', 5, 'ANDROID'), - Extension('VK_KHR_16bit_storage', 1, 'device->info.gen >= 8'), Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8'), + Extension('VK_KHR_16bit_storage', 1, 'device->info.gen >= 8'), Extension('VK_KHR_bind_memory2', 1, True), Extension('VK_KHR_create_renderpass2', 1, True), Extension('VK_KHR_dedicated_allocation', 1, True), + Extension('VK_KHR_depth_stencil_resolve', 1, True), Extension('VK_KHR_descriptor_update_template', 1, True), Extension('VK_KHR_device_group', 1, True), Extension('VK_KHR_device_group_creation', 1, True), + Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_KHR_draw_indirect_count', 1, True), Extension('VK_KHR_driver_properties', 1, True), Extension('VK_KHR_external_fence', 1, 'device->has_syncobj_wait'), @@ -99,6 +103,7 @@ Extension('VK_KHR_maintenance1', 1, True), Extension('VK_KHR_maintenance2', 1, True), Extension('VK_KHR_maintenance3', 1, True), + Extension('VK_KHR_multiview', 1, True), Extension('VK_KHR_push_descriptor', 1, True), Extension('VK_KHR_relaxed_block_layout', 1, True), Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True), @@ -111,9 +116,9 @@ Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'), Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'), Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), - Extension('VK_KHR_multiview', 1, True), - Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), + Extension('VK_EXT_calibrated_timestamps', 1, True), + Extension('VK_EXT_conditional_rendering', 1, 'device->info.gen >= 8 || device->info.is_haswell'), Extension('VK_EXT_debug_report', 8, True), Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_EXT_display_control', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), @@ -121,13 +126,14 @@ Extension('VK_EXT_external_memory_dma_buf', 1, True), Extension('VK_EXT_global_priority', 1, 'device->has_context_priority'), - Extension('VK_EXT_pci_bus_info', 1, False), + Extension('VK_EXT_pci_bus_info', 2, True), + Extension('VK_EXT_post_depth_coverage', 1, 'device->info.gen >= 9'), + Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen >= 9'), + Extension('VK_EXT_scalar_block_layout', 1, True), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen >= 9'), + Extension('VK_EXT_transform_feedback', 1, True), Extension('VK_EXT_vertex_attribute_divisor', 3, True), - Extension('VK_EXT_post_depth_coverage', 1, 'device->info.gen >= 9'), - Extension('VK_EXT_sampler_filter_minmax', 1, 'device->info.gen >= 9'), - Extension('VK_EXT_calibrated_timestamps', 1, True), Extension('VK_GOOGLE_decorate_string', 1, True), Extension('VK_GOOGLE_hlsl_functionality1', 1, True), ] diff -Nru mesa-18.3.3/src/intel/vulkan/anv_formats.c mesa-19.0.1/src/intel/vulkan/anv_formats.c --- mesa-18.3.3/src/intel/vulkan/anv_formats.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_formats.c 2019-03-31 23:16:37.000000000 +0000 @@ -51,26 +51,51 @@ .planes = { \ { .isl_format = __hw_fmt, .swizzle = __swizzle, \ .denominator_scales = { 1, 1, }, \ + .aspect = VK_IMAGE_ASPECT_COLOR_BIT, \ }, \ }, \ + .vk_format = __vk_fmt, \ .n_planes = 1, \ } #define fmt1(__vk_fmt, __hw_fmt) \ swiz_fmt1(__vk_fmt, __hw_fmt, RGBA) -#define fmt2(__vk_fmt, __fmt1, __fmt2) \ +#define d_fmt(__vk_fmt, __hw_fmt) \ [VK_ENUM_OFFSET(__vk_fmt)] = { \ .planes = { \ - { .isl_format = __fmt1, \ - .swizzle = RGBA, \ + { .isl_format = __hw_fmt, .swizzle = RGBA, \ .denominator_scales = { 1, 1, }, \ + .aspect = VK_IMAGE_ASPECT_DEPTH_BIT, \ }, \ - { .isl_format = __fmt2, \ - .swizzle = RGBA, \ + }, \ + .n_planes = 1, \ + } + +#define s_fmt(__vk_fmt, __hw_fmt) \ + [VK_ENUM_OFFSET(__vk_fmt)] = { \ + .planes = { \ + { .isl_format = __hw_fmt, .swizzle = RGBA, \ + .denominator_scales = { 1, 1, }, \ + .aspect = VK_IMAGE_ASPECT_STENCIL_BIT, \ + }, \ + }, \ + .n_planes = 1, \ + } + +#define ds_fmt2(__vk_fmt, __fmt1, __fmt2) \ + [VK_ENUM_OFFSET(__vk_fmt)] = { \ + .planes = { \ + { .isl_format = __fmt1, .swizzle = RGBA, \ + .denominator_scales = { 1, 1, }, \ + .aspect = VK_IMAGE_ASPECT_DEPTH_BIT, \ + }, \ + { .isl_format = __fmt2, .swizzle = RGBA, \ .denominator_scales = { 1, 1, }, \ + .aspect = VK_IMAGE_ASPECT_STENCIL_BIT, \ }, \ }, \ + .vk_format = __vk_fmt, \ .n_planes = 2, \ } @@ -79,22 +104,25 @@ .planes = { \ { .isl_format = ISL_FORMAT_UNSUPPORTED, }, \ }, \ + .vk_format = VK_FORMAT_UNDEFINED, \ } -#define y_plane(__hw_fmt, __swizzle, __ycbcr_swizzle, dhs, dvs) \ +#define y_plane(__plane, __hw_fmt, __swizzle, __ycbcr_swizzle, dhs, dvs) \ { .isl_format = __hw_fmt, \ .swizzle = __swizzle, \ .ycbcr_swizzle = __ycbcr_swizzle, \ .denominator_scales = { dhs, dvs, }, \ .has_chroma = false, \ + .aspect = VK_IMAGE_ASPECT_PLANE_0_BIT, /* Y plane is always plane 0 */ \ } -#define chroma_plane(__hw_fmt, __swizzle, __ycbcr_swizzle, dhs, dvs) \ +#define chroma_plane(__plane, __hw_fmt, __swizzle, __ycbcr_swizzle, dhs, dvs) \ { .isl_format = __hw_fmt, \ .swizzle = __swizzle, \ .ycbcr_swizzle = __ycbcr_swizzle, \ .denominator_scales = { dhs, dvs, }, \ .has_chroma = true, \ + .aspect = VK_IMAGE_ASPECT_PLANE_ ## __plane ## _BIT, \ } #define ycbcr_fmt(__vk_fmt, __n_planes, ...) \ @@ -102,6 +130,7 @@ .planes = { \ __VA_ARGS__, \ }, \ + .vk_format = __vk_fmt, \ .n_planes = __n_planes, \ .can_ycbcr = true, \ } @@ -224,13 +253,13 @@ fmt1(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT), fmt1(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP), - fmt1(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM), - fmt1(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS), - fmt1(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT), - fmt1(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT), + d_fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM), + d_fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS), + d_fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT), + s_fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT), fmt_unsupported(VK_FORMAT_D16_UNORM_S8_UINT), - fmt2(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, ISL_FORMAT_R8_UINT), - fmt2(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, ISL_FORMAT_R8_UINT), + ds_fmt2(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, ISL_FORMAT_R8_UINT), + ds_fmt2(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, ISL_FORMAT_R8_UINT), swiz_fmt1(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM, RGB1), swiz_fmt1(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB, RGB1), @@ -304,27 +333,27 @@ static const struct anv_format ycbcr_formats[] = { ycbcr_fmt(VK_FORMAT_G8B8G8R8_422_UNORM, 1, - y_plane(ISL_FORMAT_YCRCB_SWAPUV, RGBA, _ISL_SWIZZLE(BLUE, GREEN, RED, ZERO), 1, 1)), + y_plane(0, ISL_FORMAT_YCRCB_SWAPUV, RGBA, _ISL_SWIZZLE(BLUE, GREEN, RED, ZERO), 1, 1)), ycbcr_fmt(VK_FORMAT_B8G8R8G8_422_UNORM, 1, - y_plane(ISL_FORMAT_YCRCB_SWAPUVY, RGBA, _ISL_SWIZZLE(BLUE, GREEN, RED, ZERO), 1, 1)), + y_plane(0, ISL_FORMAT_YCRCB_SWAPUVY, RGBA, _ISL_SWIZZLE(BLUE, GREEN, RED, ZERO), 1, 1)), ycbcr_fmt(VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, 3, - y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 2), - chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 2)), + y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 2), + chroma_plane(2, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 2)), ycbcr_fmt(VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, 2, - y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R8G8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 2)), + y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R8G8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 2)), ycbcr_fmt(VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, 3, - y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 1), - chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 1)), + y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 1), + chroma_plane(2, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 1)), ycbcr_fmt(VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, 2, - y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R8G8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 1)), + y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R8G8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 1)), ycbcr_fmt(VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, 3, - y_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 1, 1)), + y_plane(0, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(2, ISL_FORMAT_R8_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 1, 1)), fmt_unsupported(VK_FORMAT_R10X6_UNORM_PACK16), fmt_unsupported(VK_FORMAT_R10X6G10X6_UNORM_2PACK16), @@ -353,23 +382,23 @@ fmt_unsupported(VK_FORMAT_B16G16R16G16_422_UNORM), ycbcr_fmt(VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, 3, - y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 2), - chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 2)), + y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 2), + chroma_plane(2, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 2)), ycbcr_fmt(VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, 2, - y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R16G16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 2)), + y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R16G16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 2)), ycbcr_fmt(VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, 3, - y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 1), - chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 1)), + y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 2, 1), + chroma_plane(2, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 2, 1)), ycbcr_fmt(VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, 2, - y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R16G16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 1)), + y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R16G16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, RED, ZERO, ZERO), 2, 1)), ycbcr_fmt(VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, 3, - y_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 1, 1), - chroma_plane(ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 1, 1)), + y_plane(0, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(GREEN, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(1, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(BLUE, ZERO, ZERO, ZERO), 1, 1), + chroma_plane(2, ISL_FORMAT_R16_UNORM, RGBA, _ISL_SWIZZLE(RED, ZERO, ZERO, ZERO), 1, 1)), }; #undef _fmt @@ -467,11 +496,11 @@ // Format capabilities -static VkFormatFeatureFlags -get_image_format_features(const struct gen_device_info *devinfo, - VkFormat vk_format, - const struct anv_format *anv_format, - VkImageTiling vk_tiling) +VkFormatFeatureFlags +anv_get_image_format_features(const struct gen_device_info *devinfo, + VkFormat vk_format, + const struct anv_format *anv_format, + VkImageTiling vk_tiling) { VkFormatFeatureFlags flags = 0; @@ -494,8 +523,8 @@ flags |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT | - VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | - VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; + VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT; return flags; } @@ -718,11 +747,11 @@ *pFormatProperties = (VkFormatProperties) { .linearTilingFeatures = - get_image_format_features(devinfo, vk_format, anv_format, - VK_IMAGE_TILING_LINEAR), + anv_get_image_format_features(devinfo, vk_format, anv_format, + VK_IMAGE_TILING_LINEAR), .optimalTilingFeatures = - get_image_format_features(devinfo, vk_format, anv_format, - VK_IMAGE_TILING_OPTIMAL), + anv_get_image_format_features(devinfo, vk_format, anv_format, + VK_IMAGE_TILING_OPTIMAL), .bufferFeatures = get_buffer_format_features(devinfo, vk_format, anv_format), }; @@ -756,7 +785,7 @@ struct anv_physical_device *physical_device, const VkPhysicalDeviceImageFormatInfo2 *info, VkImageFormatProperties *pImageFormatProperties, - VkSamplerYcbcrConversionImageFormatPropertiesKHR *pYcbcrImageFormatProperties) + VkSamplerYcbcrConversionImageFormatProperties *pYcbcrImageFormatProperties) { VkFormatFeatureFlags format_feature_flags; VkExtent3D maxExtent; @@ -769,8 +798,8 @@ if (format == NULL) goto unsupported; - format_feature_flags = get_image_format_features(devinfo, info->format, - format, info->tiling); + format_feature_flags = anv_get_image_format_features(devinfo, info->format, + format, info->tiling); switch (info->type) { default: @@ -948,6 +977,26 @@ VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, }; +static const VkExternalMemoryProperties android_buffer_props = { + .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT, + .exportFromImportedHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID, + .compatibleHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID, +}; + + +static const VkExternalMemoryProperties android_image_props = { + .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT, + .exportFromImportedHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID, + .compatibleHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID, +}; + VkResult anv_GetPhysicalDeviceImageFormatProperties2( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2* base_info, @@ -955,8 +1004,9 @@ { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; - VkExternalImageFormatPropertiesKHR *external_props = NULL; + VkExternalImageFormatProperties *external_props = NULL; VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL; + struct VkAndroidHardwareBufferUsageANDROID *android_usage = NULL; VkResult result; /* Extract input structs */ @@ -980,6 +1030,9 @@ case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES: ycbcr_props = (void *) s; break; + case VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_USAGE_ANDROID: + android_usage = (void *) s; + break; default: anv_debug_ignored_stype(s->sType); break; @@ -991,6 +1044,18 @@ if (result != VK_SUCCESS) goto fail; + bool ahw_supported = + physical_device->supported_extensions.ANDROID_external_memory_android_hardware_buffer; + + if (ahw_supported && android_usage) { + android_usage->androidHardwareBufferUsage = + anv_ahw_usage_from_vk_usage(base_info->flags, + base_info->usage); + + /* Limit maxArrayLayers to 1 for AHardwareBuffer based images for now. */ + base_props->imageFormatProperties.maxArrayLayers = 1; + } + /* From the Vulkan 1.0.42 spec: * * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will @@ -1004,6 +1069,12 @@ if (external_props) external_props->externalMemoryProperties = prime_fd_props; break; + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID: + if (ahw_supported && external_props) { + external_props->externalMemoryProperties = android_image_props; + break; + } + /* fallthrough if ahw not supported */ default: /* From the Vulkan 1.0.42 spec: * @@ -1081,11 +1152,19 @@ if (pExternalBufferInfo->flags) goto unsupported; + ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); + switch (pExternalBufferInfo->handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: pExternalBufferProperties->externalMemoryProperties = prime_fd_props; return; + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID: + if (physical_device->supported_extensions.ANDROID_external_memory_android_hardware_buffer) { + pExternalBufferProperties->externalMemoryProperties = android_buffer_props; + return; + } + /* fallthrough if ahw not supported */ default: goto unsupported; } @@ -1104,6 +1183,17 @@ ANV_FROM_HANDLE(anv_device, device, _device); struct anv_ycbcr_conversion *conversion; + /* Search for VkExternalFormatANDROID and resolve the format. */ + struct anv_format *ext_format = NULL; + const struct VkExternalFormatANDROID *ext_info = + vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_FORMAT_ANDROID); + + uint64_t format = ext_info ? ext_info->externalFormat : 0; + if (format) { + assert(pCreateInfo->format == VK_FORMAT_UNDEFINED); + ext_format = (struct anv_format *) (uintptr_t) format; + } + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO); conversion = vk_alloc2(&device->alloc, pAllocator, sizeof(*conversion), 8, @@ -1116,14 +1206,25 @@ conversion->format = anv_get_format(pCreateInfo->format); conversion->ycbcr_model = pCreateInfo->ycbcrModel; conversion->ycbcr_range = pCreateInfo->ycbcrRange; - conversion->mapping[0] = pCreateInfo->components.r; - conversion->mapping[1] = pCreateInfo->components.g; - conversion->mapping[2] = pCreateInfo->components.b; - conversion->mapping[3] = pCreateInfo->components.a; + + /* The Vulkan 1.1.95 spec says "When creating an external format conversion, + * the value of components if ignored." + */ + if (!ext_format) { + conversion->mapping[0] = pCreateInfo->components.r; + conversion->mapping[1] = pCreateInfo->components.g; + conversion->mapping[2] = pCreateInfo->components.b; + conversion->mapping[3] = pCreateInfo->components.a; + } + conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset; conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset; conversion->chroma_filter = pCreateInfo->chromaFilter; + /* Setup external format. */ + if (ext_format) + conversion->format = ext_format; + bool has_chroma_subsampled = false; for (uint32_t p = 0; p < conversion->format->n_planes; p++) { if (conversion->format->planes[p].has_chroma && diff -Nru mesa-18.3.3/src/intel/vulkan/anv_genX.h mesa-19.0.1/src/intel/vulkan/anv_genX.h --- mesa-18.3.3/src/intel/vulkan/anv_genX.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_genX.h 2019-03-31 23:16:37.000000000 +0000 @@ -66,6 +66,8 @@ uint32_t base_layer, uint32_t layer_count); +void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer); + void genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, const struct gen_l3_config *l3_config, diff -Nru mesa-18.3.3/src/intel/vulkan/anv_image.c mesa-19.0.1/src/intel/vulkan/anv_image.c --- mesa-18.3.3/src/intel/vulkan/anv_image.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_image.c 2019-03-31 23:16:37.000000000 +0000 @@ -159,28 +159,26 @@ static bool all_formats_ccs_e_compatible(const struct gen_device_info *devinfo, - const struct VkImageCreateInfo *vk_info) + const VkImageFormatListCreateInfoKHR *fmt_list, + struct anv_image *image) { enum isl_format format = - anv_get_isl_format(devinfo, vk_info->format, - VK_IMAGE_ASPECT_COLOR_BIT, vk_info->tiling); + anv_get_isl_format(devinfo, image->vk_format, + VK_IMAGE_ASPECT_COLOR_BIT, image->tiling); if (!isl_format_supports_ccs_e(devinfo, format)) return false; - if (!(vk_info->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) + if (!(image->create_flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) return true; - const VkImageFormatListCreateInfoKHR *fmt_list = - vk_find_struct_const(vk_info->pNext, IMAGE_FORMAT_LIST_CREATE_INFO_KHR); - if (!fmt_list || fmt_list->viewFormatCount == 0) return false; for (uint32_t i = 0; i < fmt_list->viewFormatCount; i++) { enum isl_format view_format = anv_get_isl_format(devinfo, fmt_list->pViewFormats[i], - VK_IMAGE_ASPECT_COLOR_BIT, vk_info->tiling); + VK_IMAGE_ASPECT_COLOR_BIT, image->tiling); if (!isl_formats_are_ccs_e_compatible(devinfo, format, view_format)) return false; @@ -245,7 +243,6 @@ */ static void add_aux_state_tracking_buffer(struct anv_image *image, - VkImageAspectFlagBits aspect, uint32_t plane, const struct anv_device *device) { @@ -300,11 +297,11 @@ static VkResult make_surface(const struct anv_device *dev, struct anv_image *image, - const struct anv_image_create_info *anv_info, + uint32_t stride, isl_tiling_flags_t tiling_flags, + isl_surf_usage_flags_t isl_extra_usage_flags, VkImageAspectFlagBits aspect) { - const VkImageCreateInfo *vk_info = anv_info->vk_info; bool ok; static const enum isl_surf_dim vk_to_isl_surf_dim[] = { @@ -313,8 +310,7 @@ [VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D, }; - image->extent = anv_sanitize_image_extent(vk_info->imageType, - vk_info->extent); + image->extent = anv_sanitize_image_extent(image->type, image->extent); const unsigned plane = anv_image_aspect_to_plane(image->aspects, aspect); const struct anv_format_plane plane_format = @@ -322,8 +318,8 @@ struct anv_surface *anv_surf = &image->planes[plane].surface; const isl_surf_usage_flags_t usage = - choose_isl_surf_usage(vk_info->flags, image->usage, - anv_info->isl_extra_usage_flags, aspect); + choose_isl_surf_usage(image->create_flags, image->usage, + isl_extra_usage_flags, aspect); /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need to * fall back to linear on Broadwell and earlier because we aren't @@ -333,24 +329,24 @@ */ bool needs_shadow = false; if (dev->info.gen <= 8 && - (vk_info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) && - vk_info->tiling == VK_IMAGE_TILING_OPTIMAL) { + (image->create_flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) && + image->tiling == VK_IMAGE_TILING_OPTIMAL) { assert(isl_format_is_compressed(plane_format.isl_format)); tiling_flags = ISL_TILING_LINEAR_BIT; needs_shadow = true; } ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, - .dim = vk_to_isl_surf_dim[vk_info->imageType], + .dim = vk_to_isl_surf_dim[image->type], .format = plane_format.isl_format, .width = image->extent.width / plane_format.denominator_scales[0], .height = image->extent.height / plane_format.denominator_scales[1], .depth = image->extent.depth, - .levels = vk_info->mipLevels, - .array_len = vk_info->arrayLayers, - .samples = vk_info->samples, + .levels = image->levels, + .array_len = image->array_size, + .samples = image->samples, .min_alignment_B = 0, - .row_pitch_B = anv_info->stride, + .row_pitch_B = stride, .usage = usage, .tiling_flags = tiling_flags); @@ -370,16 +366,16 @@ assert(tiling_flags == ISL_TILING_LINEAR_BIT); ok = isl_surf_init(&dev->isl_dev, &image->planes[plane].shadow_surface.isl, - .dim = vk_to_isl_surf_dim[vk_info->imageType], + .dim = vk_to_isl_surf_dim[image->type], .format = plane_format.isl_format, .width = image->extent.width, .height = image->extent.height, .depth = image->extent.depth, - .levels = vk_info->mipLevels, - .array_len = vk_info->arrayLayers, - .samples = vk_info->samples, + .levels = image->levels, + .array_len = image->array_size, + .samples = image->samples, .min_alignment_B = 0, - .row_pitch_B = anv_info->stride, + .row_pitch_B = stride, .usage = usage, .tiling_flags = ISL_TILING_ANY_MASK); @@ -406,12 +402,12 @@ /* It will never be used as an attachment, HiZ is pointless. */ } else if (dev->info.gen == 7) { anv_perf_warn(dev->instance, image, "Implement gen7 HiZ"); - } else if (vk_info->mipLevels > 1) { + } else if (image->levels > 1) { anv_perf_warn(dev->instance, image, "Enable multi-LOD HiZ"); - } else if (vk_info->arrayLayers > 1) { + } else if (image->array_size > 1) { anv_perf_warn(dev->instance, image, "Implement multi-arrayLayer HiZ clears and resolves"); - } else if (dev->info.gen == 8 && vk_info->samples > 1) { + } else if (dev->info.gen == 8 && image->samples > 1) { anv_perf_warn(dev->instance, image, "Enable gen8 multisampled HiZ"); } else if (!unlikely(INTEL_DEBUG & DEBUG_NO_HIZ)) { assert(image->planes[plane].aux_surface.isl.size_B == 0); @@ -422,7 +418,7 @@ add_surface(image, &image->planes[plane].aux_surface, plane); image->planes[plane].aux_usage = ISL_AUX_USAGE_HIZ; } - } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && vk_info->samples == 1) { + } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && image->samples == 1) { /* TODO: Disallow compression with : * * 1) non multiplanar images (We appear to hit a sampler bug with @@ -436,7 +432,7 @@ */ const bool allow_compression = image->n_planes == 1 && - (vk_info->flags & VK_IMAGE_CREATE_ALIAS_BIT) == 0 && + (image->create_flags & VK_IMAGE_CREATE_ALIAS_BIT) == 0 && likely((INTEL_DEBUG & DEBUG_NO_RBC) == 0); if (allow_compression) { @@ -463,7 +459,7 @@ } add_surface(image, &image->planes[plane].aux_surface, plane); - add_aux_state_tracking_buffer(image, aspect, plane, dev); + add_aux_state_tracking_buffer(image, plane, dev); /* For images created without MUTABLE_FORMAT_BIT set, we know that * they will always be used with the original format. In @@ -473,21 +469,21 @@ * a render target. This means that it's safe to just leave * compression on at all times for these formats. */ - if (!(vk_info->usage & VK_IMAGE_USAGE_STORAGE_BIT) && - all_formats_ccs_e_compatible(&dev->info, vk_info)) { + if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT) && + image->ccs_e_compatible) { image->planes[plane].aux_usage = ISL_AUX_USAGE_CCS_E; } } } - } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && vk_info->samples > 1) { - assert(!(vk_info->usage & VK_IMAGE_USAGE_STORAGE_BIT)); + } else if ((aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && image->samples > 1) { + assert(!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)); assert(image->planes[plane].aux_surface.isl.size_B == 0); ok = isl_surf_get_mcs_surf(&dev->isl_dev, &image->planes[plane].surface.isl, &image->planes[plane].aux_surface.isl); if (ok) { add_surface(image, &image->planes[plane].aux_surface, plane); - add_aux_state_tracking_buffer(image, aspect, plane, dev); + add_aux_state_tracking_buffer(image, plane, dev); image->planes[plane].aux_usage = ISL_AUX_USAGE_MCS; } } @@ -591,12 +587,22 @@ image->array_size = pCreateInfo->arrayLayers; image->samples = pCreateInfo->samples; image->usage = pCreateInfo->usage; + image->create_flags = pCreateInfo->flags; image->tiling = pCreateInfo->tiling; image->disjoint = pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT; image->needs_set_tiling = wsi_info && wsi_info->scanout; image->drm_format_mod = isl_mod_info ? isl_mod_info->modifier : DRM_FORMAT_MOD_INVALID; + /* In case of external format, We don't know format yet, + * so skip the rest for now. + */ + if (create_info->external_format) { + image->external_format = true; + *pImage = anv_image_to_handle(image); + return VK_SUCCESS; + } + const struct anv_format *format = anv_get_format(image->vk_format); assert(format != NULL); @@ -606,10 +612,17 @@ image->n_planes = format->n_planes; + const VkImageFormatListCreateInfoKHR *fmt_list = + vk_find_struct_const(pCreateInfo->pNext, + IMAGE_FORMAT_LIST_CREATE_INFO_KHR); + + image->ccs_e_compatible = + all_formats_ccs_e_compatible(&device->info, fmt_list, image); + uint32_t b; for_each_bit(b, image->aspects) { - r = make_surface(device, image, create_info, isl_tiling_flags, - (1 << b)); + r = make_surface(device, image, create_info->stride, isl_tiling_flags, + create_info->isl_extra_usage_flags, (1 << b)); if (r != VK_SUCCESS) goto fail; } @@ -631,14 +644,19 @@ const VkAllocationCallbacks *pAllocator, VkImage *pImage) { -#ifdef ANDROID + const struct VkExternalMemoryImageCreateInfo *create_info = + vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO); + + if (create_info && (create_info->handleTypes & + VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) + return anv_image_from_external(device, pCreateInfo, create_info, + pAllocator, pImage); + const VkNativeBufferANDROID *gralloc_info = vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID); - if (gralloc_info) return anv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage); -#endif return anv_image_create(device, &(struct anv_image_create_info) { @@ -688,6 +706,83 @@ }; } +/* We are binding AHardwareBuffer. Get a description, resolve the + * format and prepare anv_image properly. + */ +static void +resolve_ahw_image(struct anv_device *device, + struct anv_image *image, + struct anv_device_memory *mem) +{ +#ifdef ANDROID + assert(mem->ahw); + AHardwareBuffer_Desc desc; + AHardwareBuffer_describe(mem->ahw, &desc); + + /* Check tiling. */ + int i915_tiling = anv_gem_get_tiling(device, mem->bo->gem_handle); + VkImageTiling vk_tiling; + isl_tiling_flags_t isl_tiling_flags = 0; + + switch (i915_tiling) { + case I915_TILING_NONE: + vk_tiling = VK_IMAGE_TILING_LINEAR; + isl_tiling_flags = ISL_TILING_LINEAR_BIT; + break; + case I915_TILING_X: + vk_tiling = VK_IMAGE_TILING_OPTIMAL; + isl_tiling_flags = ISL_TILING_X_BIT; + break; + case I915_TILING_Y: + vk_tiling = VK_IMAGE_TILING_OPTIMAL; + isl_tiling_flags = ISL_TILING_Y0_BIT; + break; + case -1: + default: + unreachable("Invalid tiling flags."); + } + + assert(vk_tiling == VK_IMAGE_TILING_LINEAR || + vk_tiling == VK_IMAGE_TILING_OPTIMAL); + + /* Check format. */ + VkFormat vk_format = vk_format_from_android(desc.format); + enum isl_format isl_fmt = anv_get_isl_format(&device->info, + vk_format, + VK_IMAGE_ASPECT_COLOR_BIT, + vk_tiling); + assert(format != ISL_FORMAT_UNSUPPORTED); + + /* Handle RGB(X)->RGBA fallback. */ + switch (desc.format) { + case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM: + case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM: + if (isl_format_is_rgb(isl_fmt)) + isl_fmt = isl_format_rgb_to_rgba(isl_fmt); + break; + } + + /* Now we are able to fill anv_image fields properly and create + * isl_surface for it. + */ + image->vk_format = vk_format; + image->format = anv_get_format(vk_format); + image->aspects = vk_format_aspects(image->vk_format); + image->n_planes = image->format->n_planes; + image->ccs_e_compatible = false; + + uint32_t stride = desc.stride * + (isl_format_get_layout(isl_fmt)->bpb / 8); + + uint32_t b; + for_each_bit(b, image->aspects) { + VkResult r = make_surface(device, image, stride, isl_tiling_flags, + ISL_SURF_USAGE_DISABLE_AUX_BIT, (1 << b)); + assert(r == VK_SUCCESS); + } +#endif +} + VkResult anv_BindImageMemory( VkDevice _device, VkImage _image, @@ -698,6 +793,9 @@ ANV_FROM_HANDLE(anv_device_memory, mem, _memory); ANV_FROM_HANDLE(anv_image, image, _image); + if (mem->ahw) + resolve_ahw_image(device, image, mem); + uint32_t aspect_bit; anv_foreach_image_aspect_bit(aspect_bit, image, image->aspects) { uint32_t plane = @@ -719,8 +817,11 @@ const VkBindImageMemoryInfo *bind_info = &pBindInfos[i]; ANV_FROM_HANDLE(anv_device_memory, mem, bind_info->memory); ANV_FROM_HANDLE(anv_image, image, bind_info->image); - VkImageAspectFlags aspects = image->aspects; + if (mem->ahw) + resolve_ahw_image(device, image, mem); + + VkImageAspectFlags aspects = image->aspects; vk_foreach_struct_const(s, bind_info->pNext) { switch (s->sType) { case VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO: { @@ -757,7 +858,7 @@ ANV_FROM_HANDLE(anv_image, image, _image); const struct anv_surface *surface; - if (subresource->aspectMask == VK_IMAGE_ASPECT_PLANE_1_BIT_KHR && + if (subresource->aspectMask == VK_IMAGE_ASPECT_PLANE_1_BIT && image->drm_format_mod != DRM_FORMAT_MOD_INVALID && isl_drm_modifier_has_aux(image->drm_format_mod)) surface = &image->planes[0].aux_surface; @@ -921,6 +1022,9 @@ case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR: unreachable("VK_KHR_shared_presentable_image is unsupported"); + case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT: + unreachable("VK_EXT_fragment_density_map is unsupported"); + case VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV: unreachable("VK_NV_shading_rate_image is unsupported"); } @@ -1223,8 +1327,6 @@ } } - anv_state_flush(device, state_inout->state); - if (image_param_out) { assert(view_usage == ISL_SURF_USAGE_STORAGE_BIT); isl_surf_fill_image_param(&device->isl_dev, image_param_out, @@ -1248,6 +1350,28 @@ return view_aspects; } +static uint32_t +anv_image_aspect_get_planes(VkImageAspectFlags aspect_mask) +{ + uint32_t planes = 0; + + if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | + VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT | + VK_IMAGE_ASPECT_PLANE_0_BIT)) + planes++; + if (aspect_mask & VK_IMAGE_ASPECT_PLANE_1_BIT) + planes++; + if (aspect_mask & VK_IMAGE_ASPECT_PLANE_2_BIT) + planes++; + + if ((aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0 && + (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) + planes++; + + return planes; +} + VkResult anv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, @@ -1268,6 +1392,22 @@ assert(range->layerCount > 0); assert(range->baseMipLevel < image->levels); + /* Check if a conversion info was passed. */ + const struct anv_format *conv_format = NULL; + const struct VkSamplerYcbcrConversionInfo *conv_info = + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO); + + /* If image has an external format, the pNext chain must contain an instance of + * VKSamplerYcbcrConversionInfo with a conversion object created with the same + * external format as image." + */ + assert(!image->external_format || conv_info); + + if (conv_info) { + ANV_FROM_HANDLE(anv_ycbcr_conversion, conversion, conv_info->conversion); + conv_format = conversion->format; + } + const VkImageViewUsageCreateInfo *usage_info = vk_find_struct_const(pCreateInfo, IMAGE_VIEW_USAGE_CREATE_INFO); VkImageUsageFlags view_usage = usage_info ? usage_info->usage : image->usage; @@ -1296,7 +1436,7 @@ * VK_IMAGE_ASPECT_COLOR_BIT will be converted to * VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | * VK_IMAGE_ASPECT_PLANE_2_BIT for an image of format - * VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR. + * VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM. */ VkImageAspectFlags expanded_aspects = anv_image_expand_aspects(image, range->aspectMask); @@ -1312,6 +1452,15 @@ iview->n_planes = anv_image_aspect_get_planes(iview->aspect_mask); iview->vk_format = pCreateInfo->format; + /* "If image has an external format, format must be VK_FORMAT_UNDEFINED." */ + assert(!image->external_format || pCreateInfo->format == VK_FORMAT_UNDEFINED); + + /* Format is undefined, this can happen when using external formats. Set + * view format from the passed conversion info. + */ + if (iview->vk_format == VK_FORMAT_UNDEFINED && conv_format) + iview->vk_format = conv_format->vk_format; + iview->extent = (VkExtent3D) { .width = anv_minify(image->extent.width , range->baseMipLevel), .height = anv_minify(image->extent.height, range->baseMipLevel), @@ -1324,11 +1473,11 @@ uint32_t iaspect_bit, vplane = 0; anv_foreach_image_aspect_bit(iaspect_bit, image, expanded_aspects) { uint32_t iplane = - anv_image_aspect_to_plane(expanded_aspects, 1UL << iaspect_bit); + anv_image_aspect_to_plane(image->aspects, 1UL << iaspect_bit); VkImageAspectFlags vplane_aspect = anv_plane_to_aspect(iview->aspect_mask, vplane); struct anv_format_plane format = - anv_get_format_plane(&device->info, pCreateInfo->format, + anv_get_format_plane(&device->info, iview->vk_format, vplane_aspect, image->tiling); iview->planes[vplane].image_plane = iplane; diff -Nru mesa-18.3.3/src/intel/vulkan/anv_intel.c mesa-19.0.1/src/intel/vulkan/anv_intel.c --- mesa-18.3.3/src/intel/vulkan/anv_intel.c 2018-10-21 19:21:33.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_intel.c 2019-03-31 23:16:37.000000000 +0000 @@ -88,7 +88,7 @@ if (mem->bo->size < aligned_image_size) { result = vk_errorf(device->instance, device, - VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + VK_ERROR_INVALID_EXTERNAL_HANDLE, "dma-buf too small for image in " "vkCreateDmaBufImageINTEL: %"PRIu64"B < "PRIu64"B", mem->bo->size, aligned_image_size); diff -Nru mesa-18.3.3/src/intel/vulkan/anv_nir_apply_pipeline_layout.c mesa-19.0.1/src/intel/vulkan/anv_nir_apply_pipeline_layout.c --- mesa-18.3.3/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 2019-03-31 23:16:37.000000000 +0000 @@ -144,21 +144,11 @@ uint32_t array_size = state->layout->set[set].layout->binding[binding].array_size; - nir_const_value *const_array_index = nir_src_as_const_value(intrin->src[0]); + nir_ssa_def *array_index = nir_ssa_for_src(b, intrin->src[0], 1); + if (nir_src_is_const(intrin->src[0]) || state->add_bounds_checks) + array_index = nir_umin(b, array_index, nir_imm_int(b, array_size - 1)); - nir_ssa_def *block_index; - if (const_array_index) { - unsigned array_index = const_array_index->u32[0]; - array_index = MIN2(array_index, array_size - 1); - block_index = nir_imm_int(b, surface_index + array_index); - } else { - block_index = nir_ssa_for_src(b, intrin->src[0], 1); - - if (state->add_bounds_checks) - block_index = nir_umin(b, block_index, nir_imm_int(b, array_size - 1)); - - block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index); - } + nir_ssa_def *block_index = nir_iadd_imm(b, array_index, surface_index); assert(intrin->dest.is_ssa); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index)); @@ -187,6 +177,23 @@ } static void +lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin, + struct apply_pipeline_layout_state *state) +{ + nir_builder *b = &state->builder; + + b->cursor = nir_before_instr(&intrin->instr); + + /* We follow the nir_address_format_vk_index_offset model */ + assert(intrin->src[0].is_ssa); + nir_ssa_def *vec2 = nir_vec2(b, intrin->src[0].ssa, nir_imm_int(b, 0)); + + assert(intrin->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec2)); + nir_instr_remove(&intrin->instr); +} + +static void lower_image_intrinsic(nir_intrinsic_instr *intrin, struct apply_pipeline_layout_state *state) { @@ -301,9 +308,9 @@ if (deref->deref_type != nir_deref_type_var) { assert(deref->deref_type == nir_deref_type_array); - nir_const_value *const_index = nir_src_as_const_value(deref->arr.index); - if (const_index) { - *base_index += MIN2(const_index->u32[0], array_size - 1); + if (nir_src_is_const(deref->arr.index)) { + unsigned arr_index = nir_src_as_uint(deref->arr.index); + *base_index += MIN2(arr_index, array_size - 1); } else { nir_builder *b = &state->builder; @@ -339,8 +346,7 @@ if (plane_src_idx < 0) return 0; - unsigned plane = - nir_src_as_const_value(tex->src[plane_src_idx].src)->u32[0]; + unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src); nir_tex_instr_remove_src(tex, plane_src_idx); @@ -383,6 +389,9 @@ case nir_intrinsic_vulkan_resource_reindex: lower_res_reindex_intrinsic(intrin, state); break; + case nir_intrinsic_load_vulkan_descriptor: + lower_load_vulkan_descriptor(intrin, state); + break; case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic_add: diff -Nru mesa-18.3.3/src/intel/vulkan/anv_nir_lower_input_attachments.c mesa-19.0.1/src/intel/vulkan/anv_nir_lower_input_attachments.c --- mesa-18.3.3/src/intel/vulkan/anv_nir_lower_input_attachments.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_nir_lower_input_attachments.c 2019-03-31 23:16:37.000000000 +0000 @@ -61,8 +61,7 @@ nir_ssa_def *offset = nir_ssa_for_src(&b, load->src[1], 2); nir_ssa_def *pos = nir_iadd(&b, frag_coord, offset); - nir_ssa_def *layer = - nir_load_system_value(&b, nir_intrinsic_load_layer_id, 0); + nir_ssa_def *layer = nir_load_layer_id(&b); nir_ssa_def *coord = nir_vec3(&b, nir_channel(&b, pos, 0), nir_channel(&b, pos, 1), layer); diff -Nru mesa-18.3.3/src/intel/vulkan/anv_nir_lower_multiview.c mesa-19.0.1/src/intel/vulkan/anv_nir_lower_multiview.c --- mesa-18.3.3/src/intel/vulkan/anv_nir_lower_multiview.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_nir_lower_multiview.c 2019-03-31 23:16:37.000000000 +0000 @@ -125,7 +125,7 @@ const struct glsl_type *type = glsl_int_type(); if (b->shader->info.stage == MESA_SHADER_TESS_CTRL || b->shader->info.stage == MESA_SHADER_GEOMETRY) - type = glsl_array_type(type, 1); + type = glsl_array_type(type, 1, 0); nir_variable *idx_var = nir_variable_create(b->shader, nir_var_shader_in, diff -Nru mesa-18.3.3/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c mesa-19.0.1/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c --- mesa-18.3.3/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c 2019-03-31 23:16:37.000000000 +0000 @@ -38,7 +38,7 @@ y_range(nir_builder *b, nir_ssa_def *y_channel, int bpc, - VkSamplerYcbcrRangeKHR range) + VkSamplerYcbcrRange range) { switch (range) { case VK_SAMPLER_YCBCR_RANGE_ITU_FULL: @@ -60,7 +60,7 @@ chroma_range(nir_builder *b, nir_ssa_def *chroma_channel, int bpc, - VkSamplerYcbcrRangeKHR range) + VkSamplerYcbcrRange range) { switch (range) { case VK_SAMPLER_YCBCR_RANGE_ITU_FULL: @@ -80,7 +80,7 @@ } static const nir_const_value * -ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversionKHR model) +ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversion model) { switch (model) { case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601: { @@ -269,6 +269,7 @@ tex->texture_index = old_tex->texture_index; tex->texture_array_size = old_tex->texture_array_size; tex->sampler_index = old_tex->sampler_index; + tex->is_array = old_tex->is_array; nir_ssa_dest_init(&tex->instr, &tex->dest, old_tex->dest.ssa.num_components, @@ -344,10 +345,10 @@ unsigned array_index = 0; if (deref->deref_type != nir_deref_type_var) { assert(deref->deref_type == nir_deref_type_array); - nir_const_value *const_index = nir_src_as_const_value(deref->arr.index); - if (!const_index) + if (!nir_src_is_const(deref->arr.index)) return false; - array_index = MIN2(const_index->u32[0], binding->array_size - 1); + array_index = nir_src_as_uint(deref->arr.index); + array_index = MIN2(array_index, binding->array_size - 1); } const struct anv_sampler *sampler = binding->immutable_samplers[array_index]; @@ -373,11 +374,11 @@ uint8_t y_bpc = y_isl_layout->channels_array[0].bits; /* |ycbcr_comp| holds components in the order : Cr-Y-Cb */ - nir_ssa_def *ycbcr_comp[5] = { NULL, NULL, NULL, - /* Use extra 2 channels for following swizzle */ - nir_imm_float(builder, 1.0f), - nir_imm_float(builder, 0.0f), - }; + nir_ssa_def *zero = nir_imm_float(builder, 0.0f); + nir_ssa_def *one = nir_imm_float(builder, 1.0f); + /* Use extra 2 channels for following swizzle */ + nir_ssa_def *ycbcr_comp[5] = { zero, zero, zero, one, zero }; + uint8_t ycbcr_bpcs[5]; memset(ycbcr_bpcs, y_bpc, sizeof(ycbcr_bpcs)); diff -Nru mesa-18.3.3/src/intel/vulkan/anv_pass.c mesa-19.0.1/src/intel/vulkan/anv_pass.c --- mesa-18.3.3/src/intel/vulkan/anv_pass.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_pass.c 2019-03-31 23:16:37.000000000 +0000 @@ -74,6 +74,10 @@ subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED) subpass->depth_stencil_attachment = NULL; + if (subpass->ds_resolve_attachment && + subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED) + subpass->ds_resolve_attachment = NULL; + for (uint32_t j = 0; j < subpass->attachment_count; j++) { struct anv_subpass_attachment *subpass_att = &subpass->attachments[j]; if (subpass_att->attachment == VK_ATTACHMENT_UNUSED) @@ -100,7 +104,7 @@ } /* We have to handle resolve attachments specially */ - subpass->has_resolve = false; + subpass->has_color_resolve = false; if (subpass->resolve_attachments) { for (uint32_t j = 0; j < subpass->color_count; j++) { struct anv_subpass_attachment *color_att = @@ -110,12 +114,22 @@ if (resolve_att->attachment == VK_ATTACHMENT_UNUSED) continue; - subpass->has_resolve = true; + subpass->has_color_resolve = true; assert(resolve_att->usage == VK_IMAGE_USAGE_TRANSFER_DST_BIT); color_att->usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; } } + + if (subpass->ds_resolve_attachment) { + struct anv_subpass_attachment *ds_att = + subpass->depth_stencil_attachment; + UNUSED struct anv_subpass_attachment *resolve_att = + subpass->ds_resolve_attachment; + + assert(resolve_att->usage == VK_IMAGE_USAGE_TRANSFER_DST_BIT); + ds_att->usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + } } /* From the Vulkan 1.0.39 spec: @@ -164,12 +178,28 @@ * subpasses and checking to see if any of them don't have an external * dependency. Or, we could just be lazy and add a couple extra flushes. * We choose to be lazy. + * + * From the documentation for vkCmdNextSubpass: + * + * "Moving to the next subpass automatically performs any multisample + * resolve operations in the subpass being ended. End-of-subpass + * multisample resolves are treated as color attachment writes for the + * purposes of synchronization. This applies to resolve operations for + * both color and depth/stencil attachments. That is, they are + * considered to execute in the + * VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT pipeline stage and + * their writes are synchronized with + * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT." + * + * Therefore, the above flags concerning color attachments also apply to + * color and depth/stencil resolve attachments. */ if (all_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { pass->subpass_flushes[0] |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; } - if (all_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (all_usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT)) { pass->subpass_flushes[pass->subpass_count] |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; } @@ -318,8 +348,8 @@ vk_foreach_struct(ext, pCreateInfo->pNext) { switch (ext->sType) { - case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR: { - VkRenderPassMultiviewCreateInfoKHR *mv = (void *)ext; + case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO: { + VkRenderPassMultiviewCreateInfo *mv = (void *)ext; for (uint32_t i = 0; i < mv->subpassCount; i++) { pass->subpasses[i].view_mask = mv->pViewMasks[i]; @@ -342,10 +372,15 @@ static unsigned num_subpass_attachments2(const VkSubpassDescription2KHR *desc) { + const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve = + vk_find_struct_const(desc->pNext, + SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR); + return desc->inputAttachmentCount + desc->colorAttachmentCount + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + - (desc->pDepthStencilAttachment != NULL); + (desc->pDepthStencilAttachment != NULL) + + (ds_resolve && ds_resolve->pDepthStencilResolveAttachment); } VkResult anv_CreateRenderPass2KHR( @@ -460,6 +495,22 @@ .layout = desc->pDepthStencilAttachment->layout, }; } + + const VkSubpassDescriptionDepthStencilResolveKHR *ds_resolve = + vk_find_struct_const(desc->pNext, + SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR); + + if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) { + subpass->ds_resolve_attachment = subpass_attachments++; + + *subpass->ds_resolve_attachment = (struct anv_subpass_attachment) { + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, + .attachment = ds_resolve->pDepthStencilResolveAttachment->attachment, + .layout = ds_resolve->pDepthStencilResolveAttachment->layout, + }; + subpass->depth_resolve_mode = ds_resolve->depthResolveMode; + subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode; + } } for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) diff -Nru mesa-18.3.3/src/intel/vulkan/anv_pipeline.c mesa-19.0.1/src/intel/vulkan/anv_pipeline.c --- mesa-18.3.3/src/intel/vulkan/anv_pipeline.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_pipeline.c 2019-03-31 23:16:37.000000000 +0000 @@ -32,6 +32,7 @@ #include "anv_private.h" #include "compiler/brw_nir.h" #include "anv_nir.h" +#include "nir/nir_xfb_info.h" #include "spirv/nir_spirv.h" #include "vk_util.h" @@ -97,17 +98,16 @@ * we can't do that yet because we don't have the ability to copy nir. */ static nir_shader * -anv_shader_compile_to_nir(struct anv_pipeline *pipeline, +anv_shader_compile_to_nir(struct anv_device *device, void *mem_ctx, const struct anv_shader_module *module, const char *entrypoint_name, gl_shader_stage stage, const VkSpecializationInfo *spec_info) { - const struct anv_device *device = pipeline->device; - - const struct brw_compiler *compiler = - device->instance->physicalDevice.compiler; + const struct anv_physical_device *pdevice = + &device->instance->physicalDevice; + const struct brw_compiler *compiler = pdevice->compiler; const nir_shader_compiler_options *nir_options = compiler->glsl_compiler_options[stage].NirOptions; @@ -136,27 +136,34 @@ struct spirv_to_nir_options spirv_options = { .lower_workgroup_access_to_offsets = true, .caps = { - .float64 = device->instance->physicalDevice.info.gen >= 8, - .int64 = device->instance->physicalDevice.info.gen >= 8, - .tessellation = true, .device_group = true, .draw_parameters = true, + .float64 = pdevice->info.gen >= 8, + .geometry_streams = true, .image_write_without_format = true, + .int16 = pdevice->info.gen >= 8, + .int64 = pdevice->info.gen >= 8, + .min_lod = true, .multiview = true, - .variable_pointers = true, - .storage_16bit = device->instance->physicalDevice.info.gen >= 8, - .int16 = device->instance->physicalDevice.info.gen >= 8, + .post_depth_coverage = pdevice->info.gen >= 9, .shader_viewport_index_layer = true, + .stencil_export = pdevice->info.gen >= 9, + .storage_8bit = pdevice->info.gen >= 8, + .storage_16bit = pdevice->info.gen >= 8, .subgroup_arithmetic = true, .subgroup_basic = true, .subgroup_ballot = true, .subgroup_quad = true, .subgroup_shuffle = true, .subgroup_vote = true, - .stencil_export = device->instance->physicalDevice.info.gen >= 9, - .storage_8bit = device->instance->physicalDevice.info.gen >= 8, - .post_depth_coverage = device->instance->physicalDevice.info.gen >= 9, + .tessellation = true, + .transform_feedback = pdevice->info.gen >= 8, + .variable_pointers = true, }, + .ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), + .ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2), + .push_const_ptr_type = glsl_uint_type(), + .shared_ptr_type = glsl_uint_type(), }; nir_function *entry_point = @@ -180,10 +187,10 @@ * inline functions. That way they get properly initialized at the top * of the function and not at the top of its caller. */ - NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local); + NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp); NIR_PASS_V(nir, nir_lower_returns); NIR_PASS_V(nir, nir_inline_functions); - NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_deref); /* Pick off the single entrypoint that we want */ foreach_list_typed_safe(nir_function, func, node, &nir->functions) { @@ -208,8 +215,8 @@ NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out | nir_var_system_value); - if (stage == MESA_SHADER_FRAGMENT) - NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable); + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_mem_ssbo, + nir_address_format_vk_index_offset); NIR_PASS_V(nir, nir_propagate_invariant); NIR_PASS_V(nir, nir_lower_io_to_temporaries, @@ -220,9 +227,6 @@ nir = brw_preprocess_nir(compiler, nir); - if (stage == MESA_SHADER_FRAGMENT) - NIR_PASS_V(nir, anv_nir_lower_input_attachments); - return nir; } @@ -398,6 +402,8 @@ const char *entrypoint; const VkSpecializationInfo *spec_info; + unsigned char shader_sha1[20]; + union brw_any_prog_key key; struct { @@ -415,20 +421,27 @@ }; static void -anv_pipeline_hash_shader(struct mesa_sha1 *ctx, - struct anv_pipeline_stage *stage) +anv_pipeline_hash_shader(const struct anv_shader_module *module, + const char *entrypoint, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info, + unsigned char *sha1_out) { - _mesa_sha1_update(ctx, stage->module->sha1, sizeof(stage->module->sha1)); - _mesa_sha1_update(ctx, stage->entrypoint, strlen(stage->entrypoint)); - _mesa_sha1_update(ctx, &stage->stage, sizeof(stage->stage)); - if (stage->spec_info) { - _mesa_sha1_update(ctx, stage->spec_info->pMapEntries, - stage->spec_info->mapEntryCount * - sizeof(*stage->spec_info->pMapEntries)); - _mesa_sha1_update(ctx, stage->spec_info->pData, - stage->spec_info->dataSize); + struct mesa_sha1 ctx; + _mesa_sha1_init(&ctx); + + _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint)); + _mesa_sha1_update(&ctx, &stage, sizeof(stage)); + if (spec_info) { + _mesa_sha1_update(&ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * + sizeof(*spec_info->pMapEntries)); + _mesa_sha1_update(&ctx, spec_info->pData, + spec_info->dataSize); } - _mesa_sha1_update(ctx, &stage->key, brw_prog_key_size(stage->stage)); + + _mesa_sha1_final(&ctx, sha1_out); } static void @@ -450,8 +463,11 @@ _mesa_sha1_update(&ctx, &rba, sizeof(rba)); for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { - if (stages[s].entrypoint) - anv_pipeline_hash_shader(&ctx, &stages[s]); + if (stages[s].entrypoint) { + _mesa_sha1_update(&ctx, stages[s].shader_sha1, + sizeof(stages[s].shader_sha1)); + _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s)); + } } _mesa_sha1_final(&ctx, sha1_out); @@ -472,11 +488,48 @@ const bool rba = pipeline->device->robust_buffer_access; _mesa_sha1_update(&ctx, &rba, sizeof(rba)); - anv_pipeline_hash_shader(&ctx, stage); + _mesa_sha1_update(&ctx, stage->shader_sha1, + sizeof(stage->shader_sha1)); + _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs)); _mesa_sha1_final(&ctx, sha1_out); } +static nir_shader * +anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + void *mem_ctx, + struct anv_pipeline_stage *stage) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage->stage].NirOptions; + nir_shader *nir; + + nir = anv_device_search_for_nir(pipeline->device, cache, + nir_options, + stage->shader_sha1, + mem_ctx); + if (nir) { + assert(nir->info.stage == stage->stage); + return nir; + } + + nir = anv_shader_compile_to_nir(pipeline->device, + mem_ctx, + stage->module, + stage->entrypoint, + stage->stage, + stage->spec_info); + if (nir) { + anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1); + return nir; + } + + return NULL; +} + static void anv_pipeline_lower_nir(struct anv_pipeline *pipeline, void *mem_ctx, @@ -489,6 +542,11 @@ struct brw_stage_prog_data *prog_data = &stage->prog_data.base; nir_shader *nir = stage->nir; + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable); + NIR_PASS_V(nir, anv_nir_lower_input_attachments); + } + NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout); NIR_PASS_V(nir, anv_nir_lower_push_constants); @@ -536,6 +594,7 @@ pipeline->device->robust_buffer_access, layout, nir, prog_data, &stage->bind_map); + NIR_PASS_V(nir, nir_opt_constant_folding); } if (nir->info.stage != MESA_SHADER_COMPUTE) @@ -780,7 +839,7 @@ !(stage->key.wm.color_outputs_valid & (1 << rt))) { /* Unused or out-of-bounds, throw it away */ deleted_output = true; - var->data.mode = nir_var_local; + var->data.mode = nir_var_function_temp; exec_node_remove(&var->node); exec_list_push_tail(&impl->locals, &var->node); continue; @@ -875,6 +934,11 @@ stages[stage].module = anv_shader_module_from_handle(sinfo->module); stages[stage].entrypoint = sinfo->pName; stages[stage].spec_info = sinfo->pSpecializationInfo; + anv_pipeline_hash_shader(stages[stage].module, + stages[stage].entrypoint, + stage, + stages[stage].spec_info, + stages[stage].shader_sha1); const struct gen_device_info *devinfo = &pipeline->device->info; switch (stage) { @@ -976,11 +1040,9 @@ .sampler_to_descriptor = stages[s].sampler_to_descriptor }; - stages[s].nir = anv_shader_compile_to_nir(pipeline, pipeline_ctx, - stages[s].module, - stages[s].entrypoint, - stages[s].stage, - stages[s].spec_info); + stages[s].nir = anv_pipeline_stage_get_nir(pipeline, cache, + pipeline_ctx, + &stages[s]); if (stages[s].nir == NULL) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); goto fail; @@ -1023,6 +1085,12 @@ void *stage_ctx = ralloc_context(NULL); + nir_xfb_info *xfb_info = NULL; + if (s == MESA_SHADER_VERTEX || + s == MESA_SHADER_TESS_EVAL || + s == MESA_SHADER_GEOMETRY) + xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx); + anv_pipeline_lower_nir(pipeline, stage_ctx, &stages[s], layout); const unsigned *code; @@ -1064,7 +1132,7 @@ stages[s].nir->constant_data_size, &stages[s].prog_data.base, brw_prog_data_size(s), - &stages[s].bind_map); + xfb_info, &stages[s].bind_map); if (!bin) { ralloc_free(stage_ctx); result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1125,6 +1193,11 @@ .stage = MESA_SHADER_COMPUTE, } }; + anv_pipeline_hash_shader(stage.module, + stage.entrypoint, + MESA_SHADER_COMPUTE, + stage.spec_info, + stage.shader_sha1); struct anv_shader_bin *bin = NULL; @@ -1144,11 +1217,7 @@ void *mem_ctx = ralloc_context(NULL); - stage.nir = anv_shader_compile_to_nir(pipeline, mem_ctx, - stage.module, - stage.entrypoint, - stage.stage, - stage.spec_info); + stage.nir = anv_pipeline_stage_get_nir(pipeline, cache, mem_ctx, &stage); if (stage.nir == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -1177,7 +1246,7 @@ stage.nir->constant_data_size, &stage.prog_data.base, sizeof(stage.prog_data.cs), - &stage.bind_map); + NULL, &stage.bind_map); if (!bin) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); diff -Nru mesa-18.3.3/src/intel/vulkan/anv_pipeline_cache.c mesa-19.0.1/src/intel/vulkan/anv_pipeline_cache.c --- mesa-18.3.3/src/intel/vulkan/anv_pipeline_cache.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_pipeline_cache.c 2019-03-31 23:16:37.000000000 +0000 @@ -26,7 +26,9 @@ #include "util/debug.h" #include "util/disk_cache.h" #include "util/mesa-sha1.h" +#include "nir/nir_serialize.h" #include "anv_private.h" +#include "nir/nir_xfb_info.h" struct anv_shader_bin * anv_shader_bin_create(struct anv_device *device, @@ -35,12 +37,14 @@ const void *constant_data, uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data_in, uint32_t prog_data_size, const void *prog_data_param_in, + const nir_xfb_info *xfb_info_in, const struct anv_pipeline_bind_map *bind_map) { struct anv_shader_bin *shader; struct anv_shader_bin_key *key; struct brw_stage_prog_data *prog_data; uint32_t *prog_data_param; + nir_xfb_info *xfb_info; struct anv_pipeline_binding *surface_to_descriptor, *sampler_to_descriptor; ANV_MULTIALLOC(ma); @@ -48,6 +52,10 @@ anv_multialloc_add_size(&ma, &key, sizeof(*key) + key_size); anv_multialloc_add_size(&ma, &prog_data, prog_data_size); anv_multialloc_add(&ma, &prog_data_param, prog_data_in->nr_params); + if (xfb_info_in) { + uint32_t xfb_info_size = nir_xfb_info_size(xfb_info_in->output_count); + anv_multialloc_add_size(&ma, &xfb_info, xfb_info_size); + } anv_multialloc_add(&ma, &surface_to_descriptor, bind_map->surface_count); anv_multialloc_add(&ma, &sampler_to_descriptor, @@ -85,6 +93,15 @@ shader->prog_data = prog_data; shader->prog_data_size = prog_data_size; + if (xfb_info_in) { + *xfb_info = *xfb_info_in; + typed_memcpy(xfb_info->outputs, xfb_info_in->outputs, + xfb_info_in->output_count); + shader->xfb_info = xfb_info; + } else { + shader->xfb_info = NULL; + } + shader->bind_map = *bind_map; typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor, bind_map->surface_count); @@ -128,6 +145,15 @@ shader->prog_data->nr_params * sizeof(*shader->prog_data->param)); + if (shader->xfb_info) { + uint32_t xfb_info_size = + nir_xfb_info_size(shader->xfb_info->output_count); + ok = blob_write_uint32(blob, xfb_info_size); + ok = blob_write_bytes(blob, shader->xfb_info, xfb_info_size); + } else { + ok = blob_write_uint32(blob, 0); + } + ok = blob_write_uint32(blob, shader->bind_map.surface_count); ok = blob_write_uint32(blob, shader->bind_map.sampler_count); ok = blob_write_uint32(blob, shader->bind_map.image_count); @@ -162,6 +188,11 @@ const void *prog_data_param = blob_read_bytes(blob, prog_data->nr_params * sizeof(*prog_data->param)); + const nir_xfb_info *xfb_info = NULL; + uint32_t xfb_size = blob_read_uint32(blob); + if (xfb_size) + xfb_info = blob_read_bytes(blob, xfb_size); + struct anv_pipeline_bind_map bind_map; bind_map.surface_count = blob_read_uint32(blob); bind_map.sampler_count = blob_read_uint32(blob); @@ -181,7 +212,7 @@ kernel_data, kernel_size, constant_data, constant_data_size, prog_data, prog_data_size, prog_data_param, - &bind_map); + xfb_info, &bind_map); } /* Remaining work: @@ -211,6 +242,18 @@ return memcmp(a->data, b->data, a->size) == 0; } +static uint32_t +sha1_hash_func(const void *sha1) +{ + return _mesa_hash_data(sha1, 20); +} + +static bool +sha1_compare_func(const void *sha1_a, const void *sha1_b) +{ + return memcmp(sha1_a, sha1_b, 20) == 0; +} + void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, struct anv_device *device, @@ -222,8 +265,11 @@ if (cache_enabled) { cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func, shader_bin_key_compare_func); + cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func, + sha1_compare_func); } else { cache->cache = NULL; + cache->nir_cache = NULL; } } @@ -242,6 +288,13 @@ _mesa_hash_table_destroy(cache->cache, NULL); } + + if (cache->nir_cache) { + hash_table_foreach(cache->nir_cache, entry) + ralloc_free(entry->data); + + _mesa_hash_table_destroy(cache->nir_cache, NULL); + } } static struct anv_shader_bin * @@ -310,6 +363,7 @@ const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, const void *prog_data_param, + const nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map) { struct anv_shader_bin *shader = @@ -322,7 +376,7 @@ kernel_data, kernel_size, constant_data, constant_data_size, prog_data, prog_data_size, prog_data_param, - bind_map); + xfb_info, bind_map); if (!bin) return NULL; @@ -339,6 +393,7 @@ uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, + const nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map) { if (cache->cache) { @@ -349,7 +404,8 @@ kernel_data, kernel_size, constant_data, constant_data_size, prog_data, prog_data_size, - prog_data->param, bind_map); + prog_data->param, + xfb_info, bind_map); pthread_mutex_unlock(&cache->mutex); @@ -364,7 +420,8 @@ kernel_data, kernel_size, constant_data, constant_data_size, prog_data, prog_data_size, - prog_data->param, bind_map); + prog_data->param, + xfb_info, bind_map); } } @@ -601,6 +658,7 @@ uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, + const nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map) { struct anv_shader_bin *bin; @@ -609,13 +667,14 @@ kernel_data, kernel_size, constant_data, constant_data_size, prog_data, prog_data_size, - bind_map); + xfb_info, bind_map); } else { bin = anv_shader_bin_create(device, key_data, key_size, kernel_data, kernel_size, constant_data, constant_data_size, prog_data, prog_data_size, - prog_data->param, bind_map); + prog_data->param, + xfb_info, bind_map); } if (bin == NULL) @@ -641,3 +700,88 @@ return bin; } + +struct serialized_nir { + unsigned char sha1_key[20]; + size_t size; + char data[0]; +}; + +struct nir_shader * +anv_device_search_for_nir(struct anv_device *device, + struct anv_pipeline_cache *cache, + const nir_shader_compiler_options *nir_options, + unsigned char sha1_key[20], + void *mem_ctx) +{ + if (cache && cache->nir_cache) { + const struct serialized_nir *snir = NULL; + + pthread_mutex_lock(&cache->mutex); + struct hash_entry *entry = + _mesa_hash_table_search(cache->nir_cache, sha1_key); + if (entry) + snir = entry->data; + pthread_mutex_unlock(&cache->mutex); + + if (snir) { + struct blob_reader blob; + blob_reader_init(&blob, snir->data, snir->size); + + nir_shader *nir = nir_deserialize(mem_ctx, nir_options, &blob); + if (blob.overrun) { + ralloc_free(nir); + } else { + return nir; + } + } + } + + return NULL; +} + +void +anv_device_upload_nir(struct anv_device *device, + struct anv_pipeline_cache *cache, + const struct nir_shader *nir, + unsigned char sha1_key[20]) +{ + if (cache && cache->nir_cache) { + pthread_mutex_lock(&cache->mutex); + struct hash_entry *entry = + _mesa_hash_table_search(cache->nir_cache, sha1_key); + pthread_mutex_unlock(&cache->mutex); + if (entry) + return; + + struct blob blob; + blob_init(&blob); + + nir_serialize(&blob, nir); + if (blob.out_of_memory) { + blob_finish(&blob); + return; + } + + pthread_mutex_lock(&cache->mutex); + /* Because ralloc isn't thread-safe, we have to do all this inside the + * lock. We could unlock for the big memcpy but it's probably not worth + * the hassle. + */ + entry = _mesa_hash_table_search(cache->nir_cache, sha1_key); + if (entry) { + pthread_mutex_unlock(&cache->mutex); + return; + } + + struct serialized_nir *snir = + ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size); + memcpy(snir->sha1_key, sha1_key, 20); + snir->size = blob.size; + memcpy(snir->data, blob.data, blob.size); + + _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir); + + pthread_mutex_unlock(&cache->mutex); + } +} diff -Nru mesa-18.3.3/src/intel/vulkan/anv_private.h mesa-19.0.1/src/intel/vulkan/anv_private.h --- mesa-18.3.3/src/intel/vulkan/anv_private.h 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_private.h 2019-03-31 23:16:37.000000000 +0000 @@ -76,8 +76,8 @@ #include #include #include -#include +#include "anv_android.h" #include "anv_entrypoints.h" #include "anv_extensions.h" #include "isl/isl.h" @@ -151,6 +151,8 @@ #define ANV_HZ_FC_VAL 1.0f #define MAX_VBS 28 +#define MAX_XFB_BUFFERS 4 +#define MAX_XFB_STREAMS 4 #define MAX_SETS 8 #define MAX_RTS 8 #define MAX_VIEWPORTS 16 @@ -183,6 +185,11 @@ #define ANV_SVGS_VB_INDEX MAX_VBS #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1) +/* We reserve this MI ALU register for the purpose of handling predication. + * Other code which uses the MI ALU should leave it alone. + */ +#define ANV_PREDICATE_RESULT_REG MI_ALU_REG15 + #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b))) static inline uint32_t @@ -608,7 +615,7 @@ */ union anv_free_list { struct { - int32_t offset; + uint32_t offset; /* A simple count that is incremented every time the head changes. */ uint32_t count; @@ -616,7 +623,7 @@ uint64_t u64; }; -#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } }) +#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } }) struct anv_block_state { union { @@ -628,12 +635,21 @@ }; }; +#define anv_block_pool_foreach_bo(bo, pool) \ + for (bo = (pool)->bos; bo != &(pool)->bos[(pool)->nbos]; bo++) + +#define ANV_MAX_BLOCK_POOL_BOS 20 + struct anv_block_pool { struct anv_device *device; uint64_t bo_flags; - struct anv_bo bo; + struct anv_bo bos[ANV_MAX_BLOCK_POOL_BOS]; + struct anv_bo *bo; + uint32_t nbos; + + uint64_t size; /* The address where the start of the pool is pinned. The various bos that * are created as the pool grows will have addresses in the range @@ -655,6 +671,9 @@ * will be valid relative to this pointer. * * In particular, map == bo.map + center_offset + * + * DO NOT access this pointer directly. Use anv_block_pool_map() instead, + * since it will handle the softpin case as well, where this points to NULL. */ void *map; int fd; @@ -688,6 +707,7 @@ int32_t offset; uint32_t alloc_size; void *map; + uint32_t idx; }; #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 }) @@ -702,9 +722,25 @@ #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1) +struct anv_free_entry { + uint32_t next; + struct anv_state state; +}; + +struct anv_state_table { + struct anv_device *device; + int fd; + struct anv_free_entry *map; + uint32_t size; + struct anv_block_state state; + struct u_vector mmap_cleanups; +}; + struct anv_state_pool { struct anv_block_pool block_pool; + struct anv_state_table table; + /* The size of blocks which will be allocated from the block pool */ uint32_t block_size; @@ -742,9 +778,10 @@ uint64_t bo_flags); void anv_block_pool_finish(struct anv_block_pool *pool); int32_t anv_block_pool_alloc(struct anv_block_pool *pool, - uint32_t block_size); + uint32_t block_size, uint32_t *padding); int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool, uint32_t block_size); +void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset); VkResult anv_state_pool_init(struct anv_state_pool *pool, struct anv_device *device, @@ -763,6 +800,24 @@ struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream, uint32_t size, uint32_t alignment); +VkResult anv_state_table_init(struct anv_state_table *table, + struct anv_device *device, + uint32_t initial_entries); +void anv_state_table_finish(struct anv_state_table *table); +VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx, + uint32_t count); +void anv_free_list_push(union anv_free_list *list, + struct anv_state_table *table, + uint32_t idx, uint32_t count); +struct anv_state* anv_free_list_pop(union anv_free_list *list, + struct anv_state_table *table); + + +static inline struct anv_state * +anv_state_table_get(struct anv_state_table *table, uint32_t idx) +{ + return &table->map[idx].state; +} /** * Implements a pool of re-usable BOs. The interface is identical to that * of block_pool except that each block is its own BO. @@ -948,9 +1003,12 @@ struct anv_device * device; pthread_mutex_t mutex; + struct hash_table * nir_cache; + struct hash_table * cache; }; +struct nir_xfb_info; struct anv_pipeline_bind_map; void anv_pipeline_cache_init(struct anv_pipeline_cache *cache, @@ -969,6 +1027,7 @@ uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, + const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); struct anv_shader_bin * @@ -985,8 +1044,25 @@ uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, + const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); +struct nir_shader; +struct nir_shader_compiler_options; + +struct nir_shader * +anv_device_search_for_nir(struct anv_device *device, + struct anv_pipeline_cache *cache, + const struct nir_shader_compiler_options *nir_options, + unsigned char sha1_key[20], + void *mem_ctx); + +void +anv_device_upload_nir(struct anv_device *device, + struct anv_pipeline_cache *cache, + const struct nir_shader *nir, + unsigned char sha1_key[20]); + struct anv_device { VK_LOADER_DATA _loader_data; @@ -1072,15 +1148,6 @@ return device->default_mocs; } -static void inline -anv_state_flush(struct anv_device *device, struct anv_state state) -{ - if (device->info.has_llc) - return; - - gen_flush_range(state.map, state.alloc_size); -} - void anv_device_init_blorp(struct anv_device *device); void anv_device_finish_blorp(struct anv_device *device); @@ -1350,70 +1417,61 @@ _dst = NULL; \ })) -#define GEN7_MOCS (struct GEN7_MEMORY_OBJECT_CONTROL_STATE) { \ - .GraphicsDataTypeGFDT = 0, \ - .LLCCacheabilityControlLLCCC = 0, \ - .L3CacheabilityControlL3CC = 1, \ -} +/* MEMORY_OBJECT_CONTROL_STATE: + * .GraphicsDataTypeGFDT = 0, + * .LLCCacheabilityControlLLCCC = 0, + * .L3CacheabilityControlL3CC = 1, + */ +#define GEN7_MOCS 1 -#define GEN75_MOCS (struct GEN75_MEMORY_OBJECT_CONTROL_STATE) { \ - .LLCeLLCCacheabilityControlLLCCC = 0, \ - .L3CacheabilityControlL3CC = 1, \ -} +/* MEMORY_OBJECT_CONTROL_STATE: + * .LLCeLLCCacheabilityControlLLCCC = 0, + * .L3CacheabilityControlL3CC = 1, + */ +#define GEN75_MOCS 1 -#define GEN8_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) { \ - .MemoryTypeLLCeLLCCacheabilityControl = WB, \ - .TargetCache = L3DefertoPATforLLCeLLCselection, \ - .AgeforQUADLRU = 0 \ - } +/* MEMORY_OBJECT_CONTROL_STATE: + * .MemoryTypeLLCeLLCCacheabilityControl = WB, + * .TargetCache = L3DefertoPATforLLCeLLCselection, + * .AgeforQUADLRU = 0 + */ +#define GEN8_MOCS 0x78 -#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) { \ - .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle, \ - .TargetCache = L3DefertoPATforLLCeLLCselection, \ - .AgeforQUADLRU = 0 \ - } +/* MEMORY_OBJECT_CONTROL_STATE: + * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle, + * .TargetCache = L3DefertoPATforLLCeLLCselection, + * .AgeforQUADLRU = 0 + */ +#define GEN8_EXTERNAL_MOCS 0x18 /* Skylake: MOCS is now an index into an array of 62 different caching * configurations programmed by the kernel. */ -#define GEN9_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 2 \ - } +/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ +#define GEN9_MOCS (2 << 1) -#define GEN9_EXTERNAL_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 1 \ - } +/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ +#define GEN9_EXTERNAL_MOCS (1 << 1) /* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */ -#define GEN10_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 2 \ - } - -#define GEN10_EXTERNAL_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 1 \ - } +#define GEN10_MOCS GEN9_MOCS +#define GEN10_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS /* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */ -#define GEN11_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 2 \ - } - -#define GEN11_EXTERNAL_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 1 \ - } +#define GEN11_MOCS GEN9_MOCS +#define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS struct anv_device_memory { struct anv_bo * bo; struct anv_memory_type * type; VkDeviceSize map_size; void * map; + + /* If set, we are holding reference to AHardwareBuffer + * which we must release when memory is freed. + */ + struct AHardwareBuffer * ahw; }; /** @@ -1523,6 +1581,10 @@ uint32_t size; uint32_t buffer_count; struct anv_buffer_view *buffer_views; + + /* Link to descriptor pool's desc_sets list . */ + struct list_head pool_link; + struct anv_descriptor descriptors[0]; }; @@ -1556,6 +1618,8 @@ struct anv_state_stream surface_state_stream; void *surface_state_free_list; + struct list_head desc_sets; + char data[0]; }; @@ -1590,7 +1654,7 @@ /* The descriptor set this template corresponds to. This value is only * valid if the template was created with the templateType - * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR. + * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET. */ uint8_t set; @@ -1726,6 +1790,7 @@ ANV_CMD_DIRTY_PIPELINE = 1 << 9, ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10, ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11, + ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12, }; typedef uint32_t anv_cmd_dirty_mask_t; @@ -1750,11 +1815,12 @@ ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21), /* This bit does not exist directly in PIPE_CONTROL. It means that render - * target operations are ongoing. Some operations like copies on the - * command streamer might need to be aware of this to trigger the - * appropriate stall before they can proceed with the copy. + * target operations related to transfer commands with VkBuffer as + * destination are ongoing. Some operations like copies on the command + * streamer might need to be aware of this to trigger the appropriate stall + * before they can proceed with the copy. */ - ANV_PIPE_RENDER_TARGET_WRITES = (1 << 22), + ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 22), }; #define ANV_PIPE_FLUSH_BITS ( \ @@ -1784,19 +1850,47 @@ for_each_bit(b, flags) { switch ((VkAccessFlagBits)(1 << b)) { case VK_ACCESS_SHADER_WRITE_BIT: + /* We're transitioning a buffer that was previously used as write + * destination through the data port. To make its content available + * to future operations, flush the data cache. + */ pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT; break; case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + /* We're transitioning a buffer that was previously used as render + * target. To make its content available to future operations, flush + * the render target cache. + */ pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; break; case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + /* We're transitioning a buffer that was previously used as depth + * buffer. To make its content available to future operations, flush + * the depth cache. + */ pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; break; case VK_ACCESS_TRANSFER_WRITE_BIT: + /* We're transitioning a buffer that was previously used as a + * transfer write destination. Generic write operations include color + * & depth operations as well as buffer operations like : + * - vkCmdClearColorImage() + * - vkCmdClearDepthStencilImage() + * - vkCmdBlitImage() + * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*() + * + * Most of these operations are implemented using Blorp which writes + * through the render target, so flush that cache to make it visible + * to future operations. And for depth related operations we also + * need to flush the depth cache. + */ pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; break; case VK_ACCESS_MEMORY_WRITE_BIT: + /* We're transitioning a buffer for generic write operations. Flush + * all the caches. + */ pipe_bits |= ANV_PIPE_FLUSH_BITS; break; default: @@ -1816,25 +1910,67 @@ for_each_bit(b, flags) { switch ((VkAccessFlagBits)(1 << b)) { case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + /* Indirect draw commands take a buffer as input that we're going to + * read from the command streamer to load some of the HW registers + * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a + * command streamer stall so that all the cache flushes have + * completed before the command streamer loads from memory. + */ + pipe_bits |= ANV_PIPE_CS_STALL_BIT; + /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex + * through a vertex buffer, so invalidate that cache. + */ + pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; + /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a + * UBO from the buffer, so we need to invalidate constant cache. + */ + pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; + break; case VK_ACCESS_INDEX_READ_BIT: case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + /* We transitioning a buffer to be used for as input for vkCmdDraw* + * commands, so we invalidate the VF cache to make sure there is no + * stale data when we start rendering. + */ pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; break; case VK_ACCESS_UNIFORM_READ_BIT: + /* We transitioning a buffer to be used as uniform data. Because + * uniform is accessed through the data port & sampler, we need to + * invalidate the texture cache (sampler) & constant cache (data + * port) to avoid stale data. + */ pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; break; case VK_ACCESS_SHADER_READ_BIT: case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: case VK_ACCESS_TRANSFER_READ_BIT: + /* Transitioning a buffer to be read through the sampler, so + * invalidate the texture cache, we don't want any stale data. + */ pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; break; case VK_ACCESS_MEMORY_READ_BIT: + /* Transitioning a buffer for generic read, invalidate all the + * caches. + */ pipe_bits |= ANV_PIPE_INVALIDATE_BITS; break; case VK_ACCESS_MEMORY_WRITE_BIT: + /* Generic write, make sure all previously written things land in + * memory. + */ pipe_bits |= ANV_PIPE_FLUSH_BITS; break; + case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT: + /* Transitioning a buffer for conditional rendering. We'll load the + * content of this buffer into HW registers using the command + * streamer, so we need to stall the command streamer to make sure + * any in-flight flush operations have completed. + */ + pipe_bits |= ANV_PIPE_CS_STALL_BIT; + break; default: break; /* Nothing to do */ } @@ -1858,6 +1994,12 @@ VkDeviceSize offset; }; +struct anv_xfb_binding { + struct anv_buffer * buffer; + VkDeviceSize offset; + VkDeviceSize size; +}; + #define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset)) #define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff) @@ -2050,6 +2192,8 @@ VkRect2D render_area; uint32_t restart_index; struct anv_vertex_binding vertex_bindings[MAX_VBS]; + bool xfb_enabled; + struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS]; VkShaderStageFlags push_constant_stages; struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; struct anv_state binding_tables[MESA_SHADER_STAGES]; @@ -2069,6 +2213,8 @@ */ bool hiz_enabled; + bool conditional_render_enabled; + /** * Array length is anv_cmd_state::pass::attachment_count. Array content is * valid only when recording a render pass instance. @@ -2215,8 +2361,6 @@ struct anv_state anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer); -void anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer); - const struct anv_image_view * anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer); @@ -2228,6 +2372,8 @@ void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer); +void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer); + enum anv_fence_type { ANV_FENCE_TYPE_NONE = 0, ANV_FENCE_TYPE_BO, @@ -2404,6 +2550,8 @@ const struct brw_stage_prog_data *prog_data; uint32_t prog_data_size; + struct nir_xfb_info *xfb_info; + struct anv_pipeline_bind_map bind_map; }; @@ -2414,6 +2562,7 @@ const void *constant_data, uint32_t constant_data_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, const void *prog_data_param, + const struct nir_xfb_info *xfb_info, const struct anv_pipeline_bind_map *bind_map); void @@ -2463,6 +2612,8 @@ uint32_t instance_divisor; } vb[MAX_VBS]; + uint8_t xfb_used; + bool primitive_restart; uint32_t topology; @@ -2557,11 +2708,15 @@ /* How to map sampled ycbcr planes to a single 4 component element. */ struct isl_swizzle ycbcr_swizzle; + + /* What aspect is associated to this plane */ + VkImageAspectFlags aspect; }; struct anv_format { struct anv_format_plane planes[3]; + VkFormat vk_format; uint8_t n_planes; bool can_ycbcr; }; @@ -2589,28 +2744,6 @@ } } -static inline uint32_t -anv_image_aspect_get_planes(VkImageAspectFlags aspect_mask) -{ - uint32_t planes = 0; - - if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | - VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT | - VK_IMAGE_ASPECT_PLANE_0_BIT)) - planes++; - if (aspect_mask & VK_IMAGE_ASPECT_PLANE_1_BIT) - planes++; - if (aspect_mask & VK_IMAGE_ASPECT_PLANE_2_BIT) - planes++; - - if ((aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0 && - (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) - planes++; - - return planes; -} - static inline VkImageAspectFlags anv_plane_to_aspect(VkImageAspectFlags image_aspects, uint32_t plane) @@ -2697,6 +2830,7 @@ uint32_t samples; /**< VkImageCreateInfo::samples */ uint32_t n_planes; VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageCreateFlags create_flags; /* Flags used when creating image. */ VkImageTiling tiling; /** VkImageCreateInfo::tiling */ /** True if this is needs to be bound to an appropriately tiled BO. @@ -2722,6 +2856,14 @@ */ bool disjoint; + /* All the formats that can be used when creating views of this image + * are CCS_E compatible. + */ + bool ccs_e_compatible; + + /* Image was created with external format. */ + bool external_format; + /** * Image subsurfaces * @@ -2872,8 +3014,7 @@ const unsigned clear_color_state_size = device->info.gen >= 10 ? device->isl_dev.ss.clear_color_state_size : device->isl_dev.ss.clear_value_size; - addr.offset += clear_color_state_size; - return addr; + return anv_address_add(addr, clear_color_state_size); } static inline struct anv_address @@ -2943,6 +3084,20 @@ VkRect2D area, float depth_value, uint8_t stencil_value); void +anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *src_image, + enum isl_aux_usage src_aux_usage, + uint32_t src_level, uint32_t src_base_layer, + const struct anv_image *dst_image, + enum isl_aux_usage dst_aux_usage, + uint32_t dst_level, uint32_t dst_base_layer, + VkImageAspectFlagBits aspect, + uint32_t src_x, uint32_t src_y, + uint32_t dst_x, uint32_t dst_y, + uint32_t width, uint32_t height, + uint32_t layer_count, + enum blorp_filter filter); +void anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, VkImageAspectFlagBits aspect, uint32_t level, @@ -3099,6 +3254,7 @@ isl_surf_usage_flags_t isl_extra_usage_flags; uint32_t stride; + bool external_format; }; VkResult anv_image_create(VkDevice _device, @@ -3106,14 +3262,6 @@ const VkAllocationCallbacks* alloc, VkImage *pImage); -#ifdef ANDROID -VkResult anv_image_from_gralloc(VkDevice device_h, - const VkImageCreateInfo *base_info, - const VkNativeBufferANDROID *gralloc_info, - const VkAllocationCallbacks *alloc, - VkImage *pImage); -#endif - const struct anv_surface * anv_image_get_surface_for_aspect_mask(const struct anv_image *image, VkImageAspectFlags aspect_mask); @@ -3153,6 +3301,11 @@ } } +VkFormatFeatureFlags +anv_get_image_format_features(const struct gen_device_info *devinfo, + VkFormat vk_format, + const struct anv_format *anv_format, + VkImageTiling vk_tiling); void anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, @@ -3227,14 +3380,17 @@ struct anv_subpass_attachment * resolve_attachments; struct anv_subpass_attachment * depth_stencil_attachment; + struct anv_subpass_attachment * ds_resolve_attachment; + VkResolveModeFlagBitsKHR depth_resolve_mode; + VkResolveModeFlagBitsKHR stencil_resolve_mode; uint32_t view_mask; /** Subpass has a depth/stencil self-dependency */ bool has_ds_self_dep; - /** Subpass has at least one resolve attachment */ - bool has_resolve; + /** Subpass has at least one color resolve attachment */ + bool has_color_resolve; }; static inline unsigned @@ -3370,7 +3526,7 @@ ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, VkDescriptorPool) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout) -ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, VkDescriptorUpdateTemplateKHR) +ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, VkDescriptorUpdateTemplate) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence) ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent) diff -Nru mesa-18.3.3/src/intel/vulkan/anv_queue.c mesa-19.0.1/src/intel/vulkan/anv_queue.c --- mesa-18.3.3/src/intel/vulkan/anv_queue.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/anv_queue.c 2019-03-31 23:16:37.000000000 +0000 @@ -757,8 +757,8 @@ void anv_GetPhysicalDeviceExternalFenceProperties( VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo, - VkExternalFencePropertiesKHR* pExternalFenceProperties) + const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo, + VkExternalFenceProperties* pExternalFenceProperties) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); @@ -927,9 +927,9 @@ if (semaphore == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - const VkExportSemaphoreCreateInfoKHR *export = + const VkExportSemaphoreCreateInfo *export = vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO); - VkExternalSemaphoreHandleTypeFlagsKHR handleTypes = + VkExternalSemaphoreHandleTypeFlags handleTypes = export ? export->handleTypes : 0; if (handleTypes == 0) { @@ -1038,8 +1038,8 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties( VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo, - VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties) + const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo, + VkExternalSemaphoreProperties* pExternalSemaphoreProperties) { ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice); @@ -1056,7 +1056,8 @@ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: if (device->has_exec_fence) { - pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; + pExternalSemaphoreProperties->exportFromImportedHandleTypes = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; pExternalSemaphoreProperties->externalSemaphoreFeatures = @@ -1106,7 +1107,7 @@ if (new_impl.bo->size < 4096) { anv_bo_cache_release(device, &device->bo_cache, new_impl.bo); - return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE); } /* If we're going to use this as a fence, we need to *not* have the diff -Nru mesa-18.3.3/src/intel/vulkan/gen7_cmd_buffer.c mesa-19.0.1/src/intel/vulkan/gen7_cmd_buffer.c --- mesa-18.3.3/src/intel/vulkan/gen7_cmd_buffer.c 2018-10-21 19:21:33.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/gen7_cmd_buffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -70,12 +70,36 @@ }; const int max = 0xffff; + + uint32_t y_min = s->offset.y; + uint32_t x_min = s->offset.x; + uint32_t y_max = s->offset.y + s->extent.height - 1; + uint32_t x_max = s->offset.x + s->extent.width - 1; + + /* Do this math using int64_t so overflow gets clamped correctly. */ + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { + y_min = clamp_int64((uint64_t) y_min, + cmd_buffer->state.render_area.offset.y, max); + x_min = clamp_int64((uint64_t) x_min, + cmd_buffer->state.render_area.offset.x, max); + y_max = clamp_int64((uint64_t) y_max, 0, + cmd_buffer->state.render_area.offset.y + + cmd_buffer->state.render_area.extent.height - 1); + x_max = clamp_int64((uint64_t) x_max, 0, + cmd_buffer->state.render_area.offset.x + + cmd_buffer->state.render_area.extent.width - 1); + } else if (fb) { + y_min = clamp_int64((uint64_t) y_min, 0, max); + x_min = clamp_int64((uint64_t) x_min, 0, max); + y_max = clamp_int64((uint64_t) y_max, 0, fb->height - 1); + x_max = clamp_int64((uint64_t) x_max, 0, fb->width - 1); + } + struct GEN7_SCISSOR_RECT scissor = { - /* Do this math using int64_t so overflow gets clamped correctly. */ - .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), - .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), - .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, fb->height - 1), - .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, fb->width - 1) + .ScissorRectangleYMin = y_min, + .ScissorRectangleXMin = x_min, + .ScissorRectangleYMax = y_max, + .ScissorRectangleXMax = x_max }; if (s->extent.width <= 0 || s->extent.height <= 0) { @@ -90,8 +114,6 @@ GEN7_3DSTATE_SCISSOR_STATE_POINTERS, ssp) { ssp.ScissorRectPointer = scissor_state.offset; } - - anv_state_flush(cmd_buffer->device, scissor_state); } #endif @@ -191,7 +213,6 @@ .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff, }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); - anv_state_flush(cmd_buffer->device, cc_state); anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { ccp.ColorCalcStatePointer = cc_state.offset; @@ -246,7 +267,7 @@ ib.CutIndexEnable = pipeline->primitive_restart; #endif ib.IndexFormat = cmd_buffer->state.gfx.gen7.index_type; - ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, + ib.MOCS = anv_mocs_for_bo(cmd_buffer->device, buffer->address.bo); ib.BufferStartingAddress = anv_address_add(buffer->address, diff -Nru mesa-18.3.3/src/intel/vulkan/gen8_cmd_buffer.c mesa-19.0.1/src/intel/vulkan/gen8_cmd_buffer.c --- mesa-18.3.3/src/intel/vulkan/gen8_cmd_buffer.c 2018-10-21 19:21:33.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/gen8_cmd_buffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -68,8 +68,6 @@ &sf_clip_viewport); } - anv_state_flush(cmd_buffer->device, sf_clip_state); - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) { clip.SFClipViewportPointer = sf_clip_state.offset; @@ -97,8 +95,6 @@ GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport); } - anv_state_flush(cmd_buffer->device, cc_state); - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { cc.CCViewportPointer = cc_state.offset; @@ -441,8 +437,6 @@ }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); - anv_state_flush(cmd_buffer->device, cc_state); - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { ccp.ColorCalcStatePointer = cc_state.offset; ccp.ColorCalcStatePointerValid = true; @@ -491,8 +485,6 @@ }; GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); - anv_state_flush(cmd_buffer->device, cc_state); - anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { ccp.ColorCalcStatePointer = cc_state.offset; ccp.ColorCalcStatePointerValid = true; @@ -565,7 +557,7 @@ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) { ib.IndexFormat = vk_to_gen_index_type[indexType]; - ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, + ib.MOCS = anv_mocs_for_bo(cmd_buffer->device, buffer->address.bo); ib.BufferStartingAddress = anv_address_add(buffer->address, offset); ib.BufferSize = buffer->size - offset; @@ -610,7 +602,7 @@ pc.DestinationAddressType = DAT_PPGTT, pc.PostSyncOperation = WriteImmediateData, pc.Address = (struct anv_address) { - &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + cmd_buffer->device->dynamic_state_pool.block_pool.bo, event->state.offset }; pc.ImmediateData = VK_EVENT_SET; @@ -634,7 +626,7 @@ pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteImmediateData; pc.Address = (struct anv_address) { - &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + cmd_buffer->device->dynamic_state_pool.block_pool.bo, event->state.offset }; pc.ImmediateData = VK_EVENT_RESET; @@ -663,7 +655,7 @@ sem.CompareOperation = COMPARE_SAD_EQUAL_SDD, sem.SemaphoreDataDword = VK_EVENT_SET, sem.SemaphoreAddress = (struct anv_address) { - &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + cmd_buffer->device->dynamic_state_pool.block_pool.bo, event->state.offset }; } diff -Nru mesa-18.3.3/src/intel/vulkan/genX_blorp_exec.c mesa-19.0.1/src/intel/vulkan/genX_blorp_exec.c --- mesa-18.3.3/src/intel/vulkan/genX_blorp_exec.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/genX_blorp_exec.c 2019-03-31 23:16:37.000000000 +0000 @@ -63,20 +63,28 @@ if (result != VK_SUCCESS) anv_batch_set_error(&cmd_buffer->batch, result); - void *dest = cmd_buffer->device->surface_state_pool.block_pool.map + - ss_offset; + void *dest = anv_block_pool_map( + &cmd_buffer->device->surface_state_pool.block_pool, ss_offset); uint64_t val = ((struct anv_bo*)address.buffer)->offset + address.offset + delta; write_reloc(cmd_buffer->device, dest, val, false); } +static uint64_t +blorp_get_surface_address(struct blorp_batch *blorp_batch, + struct blorp_address address) +{ + /* We'll let blorp_surface_reloc write the address. */ + return 0ull; +} + #if GEN_GEN >= 7 && GEN_GEN < 10 static struct blorp_address blorp_get_surface_base_address(struct blorp_batch *batch) { struct anv_cmd_buffer *cmd_buffer = batch->driver_batch; return (struct blorp_address) { - .buffer = &cmd_buffer->device->surface_state_pool.block_pool.bo, + .buffer = cmd_buffer->device->surface_state_pool.block_pool.bo, .offset = 0, }; } @@ -124,8 +132,6 @@ surface_offsets[i] = surface_state.offset; surface_maps[i] = surface_state.map; } - - anv_state_flush(cmd_buffer->device, bt_state); } static void * @@ -150,7 +156,7 @@ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64); *addr = (struct blorp_address) { - .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo, .offset = vb_state.offset, .mocs = cmd_buffer->device->default_mocs, }; @@ -183,9 +189,8 @@ static void blorp_flush_range(struct blorp_batch *batch, void *start, size_t size) { - struct anv_device *device = batch->blorp->driver_ctx; - if (!device->info.has_llc) - gen_flush_range(start, size); + /* We don't need to flush states anymore, since everything will be snooped. + */ } static void @@ -263,5 +268,4 @@ cmd_buffer->state.gfx.vb_dirty = ~0; cmd_buffer->state.gfx.dirty = ~0; cmd_buffer->state.push_constants_dirty = ~0; - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } diff -Nru mesa-18.3.3/src/intel/vulkan/genX_cmd_buffer.c mesa-19.0.1/src/intel/vulkan/genX_cmd_buffer.c --- mesa-18.3.3/src/intel/vulkan/genX_cmd_buffer.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/genX_cmd_buffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -27,6 +27,7 @@ #include "anv_private.h" #include "vk_format_info.h" #include "vk_util.h" +#include "util/fast_idiv_by_const.h" #include "common/gen_l3_config.h" #include "genxml/gen_macros.h" @@ -86,26 +87,26 @@ anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) { sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 }; - sba.GeneralStateMemoryObjectControlState = GENX(MOCS); + sba.GeneralStateMOCS = GENX(MOCS); sba.GeneralStateBaseAddressModifyEnable = true; sba.SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer); - sba.SurfaceStateMemoryObjectControlState = GENX(MOCS); + sba.SurfaceStateMOCS = GENX(MOCS); sba.SurfaceStateBaseAddressModifyEnable = true; sba.DynamicStateBaseAddress = - (struct anv_address) { &device->dynamic_state_pool.block_pool.bo, 0 }; - sba.DynamicStateMemoryObjectControlState = GENX(MOCS); + (struct anv_address) { device->dynamic_state_pool.block_pool.bo, 0 }; + sba.DynamicStateMOCS = GENX(MOCS); sba.DynamicStateBaseAddressModifyEnable = true; sba.IndirectObjectBaseAddress = (struct anv_address) { NULL, 0 }; - sba.IndirectObjectMemoryObjectControlState = GENX(MOCS); + sba.IndirectObjectMOCS = GENX(MOCS); sba.IndirectObjectBaseAddressModifyEnable = true; sba.InstructionBaseAddress = - (struct anv_address) { &device->instruction_state_pool.block_pool.bo, 0 }; - sba.InstructionMemoryObjectControlState = GENX(MOCS); + (struct anv_address) { device->instruction_state_pool.block_pool.bo, 0 }; + sba.InstructionMOCS = GENX(MOCS); sba.InstructionBaseAddressModifyEnable = true; # if (GEN_GEN >= 8) @@ -124,13 +125,13 @@ # endif # if (GEN_GEN >= 9) sba.BindlessSurfaceStateBaseAddress = (struct anv_address) { NULL, 0 }; - sba.BindlessSurfaceStateMemoryObjectControlState = GENX(MOCS); + sba.BindlessSurfaceStateMOCS = GENX(MOCS); sba.BindlessSurfaceStateBaseAddressModifyEnable = true; sba.BindlessSurfaceStateSize = 0; # endif # if (GEN_GEN >= 10) sba.BindlessSamplerStateBaseAddress = (struct anv_address) { NULL, 0 }; - sba.BindlessSamplerStateMemoryObjectControlState = GENX(MOCS); + sba.BindlessSamplerStateMOCS = GENX(MOCS); sba.BindlessSamplerStateBaseAddressModifyEnable = true; sba.BindlessSamplerStateBufferSize = 0; # endif @@ -479,8 +480,9 @@ 0, 0, 1, hiz_op); } -#define MI_PREDICATE_SRC0 0x2400 -#define MI_PREDICATE_SRC1 0x2408 +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 +#define MI_PREDICATE_RESULT 0x2418 static void set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer, @@ -886,7 +888,7 @@ assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); struct anv_address ss_clear_addr = { - .bo = &cmd_buffer->device->surface_state_pool.block_pool.bo, + .bo = cmd_buffer->device->surface_state_pool.block_pool.bo, .offset = surface_state.offset + cmd_buffer->device->isl_dev.ss.clear_value_offset, }; @@ -1411,6 +1413,19 @@ cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; } +#if GEN_GEN >= 8 || GEN_IS_HASWELL + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info = + vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT); + + /* If secondary buffer supports conditional rendering + * we should emit commands as if conditional rendering is enabled. + */ + cmd_buffer->state.conditional_render_enabled = + conditional_rendering_info && conditional_rendering_info->conditionalRenderingEnable; + } +#endif + return result; } @@ -1515,6 +1530,19 @@ assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); assert(!anv_batch_has_error(&secondary->batch)); +#if GEN_GEN >= 8 || GEN_IS_HASWELL + if (secondary->state.conditional_render_enabled) { + if (!primary->state.conditional_render_enabled) { + /* Secondary buffer is constructed as if it will be executed + * with conditional rendering, we should satisfy this dependency + * regardless of conditional rendering being enabled in primary. + */ + emit_lri(&primary->batch, CS_GPR(ANV_PREDICATE_RESULT_REG), UINT32_MAX); + emit_lri(&primary->batch, CS_GPR(ANV_PREDICATE_RESULT_REG) + 4, UINT32_MAX); + } + } +#endif + if (secondary->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { /* If we're continuing a render pass from the primary, we need to @@ -1522,7 +1550,7 @@ * we allocated for them in BeginCommandBuffer. */ struct anv_bo *ss_bo = - &primary->device->surface_state_pool.block_pool.bo; + primary->device->surface_state_pool.block_pool.bo; struct anv_state src_state = primary->state.render_pass_states; struct anv_state dst_state = secondary->state.render_pass_states; assert(src_state.alloc_size == dst_state.alloc_size); @@ -1631,6 +1659,14 @@ uint32_t l3cr; anv_pack_struct(&l3cr, GENX(L3CNTLREG), .SLMEnable = has_slm, +#if GEN_GEN == 11 + /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set + * in L3CNTLREG register. The default setting of the bit is not the + * desirable behavior. + */ + .ErrorDetectionBehaviorControl = true, + .UseFullWays = true, +#endif .URBAllocation = cfg->n[GEN_L3P_URB], .ROAllocation = cfg->n[GEN_L3P_RO], .DCAllocation = cfg->n[GEN_L3P_DC], @@ -1776,7 +1812,7 @@ * saying that render target writes are ongoing. */ if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT) - bits &= ~(ANV_PIPE_RENDER_TARGET_WRITES); + bits &= ~(ANV_PIPE_RENDER_TARGET_BUFFER_WRITES); bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT); } @@ -2102,7 +2138,7 @@ anv_cmd_buffer_alloc_surface_state(cmd_buffer); struct anv_address constant_data = { - .bo = &pipeline->device->dynamic_state_pool.block_pool.bo, + .bo = pipeline->device->dynamic_state_pool.block_pool.bo, .offset = pipeline->shaders[stage]->constant_data.offset, }; unsigned constant_data_size = @@ -2243,8 +2279,6 @@ assert(image == map->image_count); out: - anv_state_flush(cmd_buffer->device, *bt_state); - #if GEN_GEN >= 11 /* The PIPE_CONTROL command description says: * @@ -2316,8 +2350,6 @@ sampler->state[binding->plane], sizeof(sampler->state[0])); } - anv_state_flush(cmd_buffer->device, *state); - return VK_SUCCESS; } @@ -2479,7 +2511,7 @@ uint32_t read_len; if (binding->set == ANV_DESCRIPTOR_SET_SHADER_CONSTANTS) { struct anv_address constant_data = { - .bo = &pipeline->device->dynamic_state_pool.block_pool.bo, + .bo = pipeline->device->dynamic_state_pool.block_pool.bo, .offset = pipeline->shaders[stage]->constant_data.offset, }; unsigned constant_data_size = @@ -2527,7 +2559,7 @@ if (state.alloc_size > 0) { c.ConstantBody.Buffer[n] = (struct anv_address) { - .bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, .offset = state.offset, }; c.ConstantBody.ReadLength[n] = @@ -2587,8 +2619,7 @@ struct GENX(VERTEX_BUFFER_STATE) state = { .VertexBufferIndex = vb, - .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, - buffer->address.bo), + .MOCS = anv_mocs_for_bo(cmd_buffer->device, buffer->address.bo), #if GEN_GEN <= 7 .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA, .InstanceDataStepRate = pipeline->vb[vb].instance_divisor, @@ -2612,6 +2643,34 @@ cmd_buffer->state.gfx.vb_dirty &= ~vb_emit; +#if GEN_GEN >= 8 + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_XFB_ENABLE) { + /* We don't need any per-buffer dirty tracking because you're not + * allowed to bind different XFB buffers while XFB is enabled. + */ + for (unsigned idx = 0; idx < MAX_XFB_BUFFERS; idx++) { + struct anv_xfb_binding *xfb = &cmd_buffer->state.xfb_bindings[idx]; + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) { + sob.SOBufferIndex = idx; + + if (cmd_buffer->state.xfb_enabled && xfb->buffer && xfb->size != 0) { + sob.SOBufferEnable = true; + sob.MOCS = cmd_buffer->device->default_mocs, + sob.StreamOffsetWriteEnable = false; + sob.SurfaceBaseAddress = anv_address_add(xfb->buffer->address, + xfb->offset); + /* Size is in DWords - 1 */ + sob.SurfaceSize = xfb->size / 4 - 1; + } + } + } + + /* CNL and later require a CS stall after 3DSTATE_SO_BUFFER */ + if (GEN_GEN >= 10) + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + } +#endif + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) { anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); @@ -2706,7 +2765,7 @@ .VertexBufferIndex = index, .AddressModifyEnable = true, .BufferPitch = 0, - .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo), + .MOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo), #if (GEN_GEN >= 8) .BufferStartingAddress = addr, .BufferSize = size @@ -2734,10 +2793,8 @@ ((uint32_t *)id_state.map)[0] = base_vertex; ((uint32_t *)id_state.map)[1] = base_instance; - anv_state_flush(cmd_buffer->device, id_state); - struct anv_address addr = { - .bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, .offset = id_state.offset, }; @@ -2752,10 +2809,8 @@ ((uint32_t *)state.map)[0] = draw_index; - anv_state_flush(cmd_buffer->device, state); - struct anv_address addr = { - .bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, .offset = state.offset, }; @@ -2778,6 +2833,9 @@ genX(cmd_buffer_flush_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); @@ -2790,6 +2848,7 @@ instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { + prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; prim.VertexAccessType = SEQUENTIAL; prim.PrimitiveTopologyType = pipeline->topology; prim.VertexCountPerInstance = vertexCount; @@ -2798,8 +2857,6 @@ prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = 0; } - - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } void genX(CmdDrawIndexed)( @@ -2819,6 +2876,9 @@ genX(cmd_buffer_flush_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance); @@ -2831,6 +2891,7 @@ instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { + prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; prim.VertexAccessType = RANDOM; prim.PrimitiveTopologyType = pipeline->topology; prim.VertexCountPerInstance = indexCount; @@ -2839,8 +2900,6 @@ prim.StartInstanceLocation = firstInstance; prim.BaseVertexLocation = vertexOffset; } - - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } /* Auto-Draw / Indirect Registers */ @@ -2901,8 +2960,154 @@ build_alu_multiply_gpr0(dw + 1, &num_dwords, N); } +static void +emit_alu_add(struct anv_batch *batch, unsigned dst_reg, + unsigned reg_a, unsigned reg_b) +{ + uint32_t *dw = anv_batch_emitn(batch, 1 + 4, GENX(MI_MATH)); + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, reg_a); + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, reg_b); + dw[3] = mi_alu(MI_ALU_ADD, 0, 0); + dw[4] = mi_alu(MI_ALU_STORE, dst_reg, MI_ALU_ACCU); +} + +static void +emit_add32_gpr0(struct anv_batch *batch, uint32_t N) +{ + emit_lri(batch, CS_GPR(1), N); + emit_alu_add(batch, MI_ALU_REG0, MI_ALU_REG0, MI_ALU_REG1); +} + +static void +emit_alu_shl(struct anv_batch *batch, unsigned dst_reg, + unsigned src_reg, unsigned shift) +{ + assert(shift > 0); + + uint32_t *dw = anv_batch_emitn(batch, 1 + 4 * shift, GENX(MI_MATH)); + for (unsigned i = 0; i < shift; i++) { + unsigned add_src = (i == 0) ? src_reg : dst_reg; + dw[1 + (i * 4) + 0] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, add_src); + dw[1 + (i * 4) + 1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, add_src); + dw[1 + (i * 4) + 2] = mi_alu(MI_ALU_ADD, 0, 0); + dw[1 + (i * 4) + 3] = mi_alu(MI_ALU_STORE, dst_reg, MI_ALU_ACCU); + } +} + +static void +emit_div32_gpr0(struct anv_batch *batch, uint32_t D) +{ + /* Zero out the top of GPR0 */ + emit_lri(batch, CS_GPR(0) + 4, 0); + + if (D == 0) { + /* This invalid, but we should do something so we set GPR0 to 0. */ + emit_lri(batch, CS_GPR(0), 0); + } else if (util_is_power_of_two_or_zero(D)) { + unsigned log2_D = util_logbase2(D); + assert(log2_D < 32); + /* We right-shift by log2(D) by left-shifting by 32 - log2(D) and taking + * the top 32 bits of the result. + */ + emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - log2_D); + emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4); + emit_lri(batch, CS_GPR(0) + 4, 0); + } else { + struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32); + assert(m.multiplier <= UINT32_MAX); + + if (m.pre_shift) { + /* We right-shift by L by left-shifting by 32 - l and taking the top + * 32 bits of the result. + */ + if (m.pre_shift < 32) + emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - m.pre_shift); + emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4); + emit_lri(batch, CS_GPR(0) + 4, 0); + } + + /* Do the 32x32 multiply into gpr0 */ + emit_mul_gpr0(batch, m.multiplier); + + if (m.increment) { + /* If we need to increment, save off a copy of GPR0 */ + emit_lri(batch, CS_GPR(1) + 0, m.multiplier); + emit_lri(batch, CS_GPR(1) + 4, 0); + emit_alu_add(batch, MI_ALU_REG0, MI_ALU_REG0, MI_ALU_REG1); + } + + /* Shift by 32 */ + emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4); + emit_lri(batch, CS_GPR(0) + 4, 0); + + if (m.post_shift) { + /* We right-shift by L by left-shifting by 32 - l and taking the top + * 32 bits of the result. + */ + if (m.post_shift < 32) + emit_alu_shl(batch, MI_ALU_REG0, MI_ALU_REG0, 32 - m.post_shift); + emit_lrr(batch, CS_GPR(0) + 0, CS_GPR(0) + 4); + emit_lri(batch, CS_GPR(0) + 4, 0); + } + } +} + #endif /* GEN_IS_HASWELL || GEN_GEN >= 8 */ +void genX(CmdDrawIndirectByteCountEXT)( + VkCommandBuffer commandBuffer, + uint32_t instanceCount, + uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, + uint32_t vertexStride) +{ +#if GEN_IS_HASWELL || GEN_GEN >= 8 + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, counter_buffer, counterBuffer); + struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + + /* firstVertex is always zero for this draw function */ + const uint32_t firstVertex = 0; + + if (anv_batch_has_error(&cmd_buffer->batch)) + return; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + if (vs_prog_data->uses_firstvertex || + vs_prog_data->uses_baseinstance) + emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance); + if (vs_prog_data->uses_drawid) + emit_draw_index(cmd_buffer, 0); + + /* Our implementation of VK_KHR_multiview uses instancing to draw the + * different views. We need to multiply instanceCount by the view count. + */ + instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass); + + emit_lrm(&cmd_buffer->batch, CS_GPR(0), + anv_address_add(counter_buffer->address, counterBufferOffset)); + if (counterOffset) + emit_add32_gpr0(&cmd_buffer->batch, -counterOffset); + emit_div32_gpr0(&cmd_buffer->batch, vertexStride); + emit_lrr(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, CS_GPR(0)); + + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, firstVertex); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, instanceCount); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, firstInstance); + emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { + prim.IndirectParameterEnable = true; + prim.VertexAccessType = SEQUENTIAL; + prim.PrimitiveTopologyType = pipeline->topology; + } +#endif /* GEN_IS_HASWELL || GEN_GEN >= 8 */ +} + static void load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer, struct anv_address addr, @@ -2955,6 +3160,9 @@ genX(cmd_buffer_flush_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + for (uint32_t i = 0; i < drawCount; i++) { struct anv_address draw = anv_address_add(buffer->address, offset); @@ -2968,14 +3176,13 @@ anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { prim.IndirectParameterEnable = true; + prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; prim.VertexAccessType = SEQUENTIAL; prim.PrimitiveTopologyType = pipeline->topology; } offset += stride; } - - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } void genX(CmdDrawIndexedIndirect)( @@ -2995,6 +3202,9 @@ genX(cmd_buffer_flush_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + for (uint32_t i = 0; i < drawCount; i++) { struct anv_address draw = anv_address_add(buffer->address, offset); @@ -3009,14 +3219,333 @@ anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { prim.IndirectParameterEnable = true; + prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; prim.VertexAccessType = RANDOM; prim.PrimitiveTopologyType = pipeline->topology; } offset += stride; } +} + +#define TMP_DRAW_COUNT_REG MI_ALU_REG14 + +static void +prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer, + struct anv_address count_address, + const bool conditional_render_enabled) +{ + if (conditional_render_enabled) { +#if GEN_GEN >= 8 || GEN_IS_HASWELL + emit_lrm(&cmd_buffer->batch, CS_GPR(TMP_DRAW_COUNT_REG), count_address); + emit_lri(&cmd_buffer->batch, CS_GPR(TMP_DRAW_COUNT_REG) + 4, 0); +#endif + } else { + /* Upload the current draw count from the draw parameters buffer to + * MI_PREDICATE_SRC0. + */ + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, count_address); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); + + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); + } +} + +static void +emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer, + uint32_t draw_index) +{ + /* Upload the index of the current primitive to MI_PREDICATE_SRC1. */ + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, draw_index); + + if (draw_index == 0) { + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOADINV; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + } else { + /* While draw_index < draw_count the predicate's result will be + * (draw_index == draw_count) ^ TRUE = TRUE + * When draw_index == draw_count the result is + * (TRUE) ^ TRUE = FALSE + * After this all results will be: + * (FALSE) ^ FALSE = FALSE + */ + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_XOR; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + } +} + +#if GEN_GEN >= 8 || GEN_IS_HASWELL +static void +emit_draw_count_predicate_with_conditional_render( + struct anv_cmd_buffer *cmd_buffer, + uint32_t draw_index) +{ + const int draw_index_reg = MI_ALU_REG0; + const int tmp_result_reg = MI_ALU_REG1; + + emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg), draw_index); + emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg) + 4, 0); + + uint32_t *dw; + /* Compute (draw_index < draw_count). + * We do this by subtracting and storing the carry bit. + */ + dw = anv_batch_emitn(&cmd_buffer->batch, 9, GENX(MI_MATH)); + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, draw_index_reg); + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, TMP_DRAW_COUNT_REG); + dw[3] = mi_alu(MI_ALU_SUB, 0, 0); + dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_CF); + /* & condition */ + dw[5] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, tmp_result_reg); + dw[6] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, ANV_PREDICATE_RESULT_REG); + dw[7] = mi_alu(MI_ALU_AND, 0, 0); + dw[8] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_ACCU); + +#if GEN_GEN >= 8 + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT, CS_GPR(tmp_result_reg)); +#else + /* MI_PREDICATE_RESULT is not whitelisted in i915 command parser + * so we emit MI_PREDICATE to set it. + */ + + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(tmp_result_reg)); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOADINV; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } +#endif +} +#endif + +void genX(CmdDrawIndirectCountKHR)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkBuffer _countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer); + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + + if (anv_batch_has_error(&cmd_buffer->batch)) + return; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + struct anv_address count_address = + anv_address_add(count_buffer->address, countBufferOffset); + + prepare_for_draw_count_predicate(cmd_buffer, count_address, + cmd_state->conditional_render_enabled); + + for (uint32_t i = 0; i < maxDrawCount; i++) { + struct anv_address draw = anv_address_add(buffer->address, offset); + +#if GEN_GEN >= 8 || GEN_IS_HASWELL + if (cmd_state->conditional_render_enabled) { + emit_draw_count_predicate_with_conditional_render(cmd_buffer, i); + } else { + emit_draw_count_predicate(cmd_buffer, i); + } +#else + emit_draw_count_predicate(cmd_buffer, i); +#endif + + if (vs_prog_data->uses_firstvertex || + vs_prog_data->uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, 8)); + if (vs_prog_data->uses_drawid) + emit_draw_index(cmd_buffer, i); + + load_indirect_parameters(cmd_buffer, draw, false); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { + prim.IndirectParameterEnable = true; + prim.PredicateEnable = true; + prim.VertexAccessType = SEQUENTIAL; + prim.PrimitiveTopologyType = pipeline->topology; + } + + offset += stride; + } +} + +void genX(CmdDrawIndexedIndirectCountKHR)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + VkBuffer _countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer); + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline; + const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); + + if (anv_batch_has_error(&cmd_buffer->batch)) + return; + + genX(cmd_buffer_flush_state)(cmd_buffer); + + struct anv_address count_address = + anv_address_add(count_buffer->address, countBufferOffset); + + prepare_for_draw_count_predicate(cmd_buffer, count_address, + cmd_state->conditional_render_enabled); + + for (uint32_t i = 0; i < maxDrawCount; i++) { + struct anv_address draw = anv_address_add(buffer->address, offset); + +#if GEN_GEN >= 8 || GEN_IS_HASWELL + if (cmd_state->conditional_render_enabled) { + emit_draw_count_predicate_with_conditional_render(cmd_buffer, i); + } else { + emit_draw_count_predicate(cmd_buffer, i); + } +#else + emit_draw_count_predicate(cmd_buffer, i); +#endif + + /* TODO: We need to stomp base vertex to 0 somehow */ + if (vs_prog_data->uses_firstvertex || + vs_prog_data->uses_baseinstance) + emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, 12)); + if (vs_prog_data->uses_drawid) + emit_draw_index(cmd_buffer, i); + + load_indirect_parameters(cmd_buffer, draw, true); + + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) { + prim.IndirectParameterEnable = true; + prim.PredicateEnable = true; + prim.VertexAccessType = RANDOM; + prim.PrimitiveTopologyType = pipeline->topology; + } + + offset += stride; + } +} + +void genX(CmdBeginTransformFeedbackEXT)( + VkCommandBuffer commandBuffer, + uint32_t firstCounterBuffer, + uint32_t counterBufferCount, + const VkBuffer* pCounterBuffers, + const VkDeviceSize* pCounterBufferOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(firstCounterBuffer < MAX_XFB_BUFFERS); + assert(counterBufferCount <= MAX_XFB_BUFFERS); + assert(firstCounterBuffer + counterBufferCount <= MAX_XFB_BUFFERS); + + /* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET: + * + * "Ssoftware must ensure that no HW stream output operations can be in + * process or otherwise pending at the point that the MI_LOAD/STORE + * commands are processed. This will likely require a pipeline flush." + */ + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + + for (uint32_t idx = 0; idx < MAX_XFB_BUFFERS; idx++) { + /* If we have a counter buffer, this is a resume so we need to load the + * value into the streamout offset register. Otherwise, this is a begin + * and we need to reset it to zero. + */ + if (pCounterBuffers && + idx >= firstCounterBuffer && + idx - firstCounterBuffer < counterBufferCount && + pCounterBuffers[idx - firstCounterBuffer] != VK_NULL_HANDLE) { + uint32_t cb_idx = idx - firstCounterBuffer; + ANV_FROM_HANDLE(anv_buffer, counter_buffer, pCounterBuffers[cb_idx]); + uint64_t offset = pCounterBufferOffsets ? + pCounterBufferOffsets[cb_idx] : 0; + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), lrm) { + lrm.RegisterAddress = GENX(SO_WRITE_OFFSET0_num) + idx * 4; + lrm.MemoryAddress = anv_address_add(counter_buffer->address, + offset); + } + } else { + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = GENX(SO_WRITE_OFFSET0_num) + idx * 4; + lri.DataDWord = 0; + } + } + } + + cmd_buffer->state.xfb_enabled = true; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE; +} + +void genX(CmdEndTransformFeedbackEXT)( + VkCommandBuffer commandBuffer, + uint32_t firstCounterBuffer, + uint32_t counterBufferCount, + const VkBuffer* pCounterBuffers, + const VkDeviceSize* pCounterBufferOffsets) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + assert(firstCounterBuffer < MAX_XFB_BUFFERS); + assert(counterBufferCount <= MAX_XFB_BUFFERS); + assert(firstCounterBuffer + counterBufferCount <= MAX_XFB_BUFFERS); + + /* From the SKL PRM Vol. 2c, SO_WRITE_OFFSET: + * + * "Ssoftware must ensure that no HW stream output operations can be in + * process or otherwise pending at the point that the MI_LOAD/STORE + * commands are processed. This will likely require a pipeline flush." + */ + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + + for (uint32_t cb_idx = 0; cb_idx < counterBufferCount; cb_idx++) { + unsigned idx = firstCounterBuffer + cb_idx; + + /* If we have a counter buffer, this is a resume so we need to load the + * value into the streamout offset register. Otherwise, this is a begin + * and we need to reset it to zero. + */ + if (pCounterBuffers && + cb_idx < counterBufferCount && + pCounterBuffers[cb_idx] != VK_NULL_HANDLE) { + ANV_FROM_HANDLE(anv_buffer, counter_buffer, pCounterBuffers[cb_idx]); + uint64_t offset = pCounterBufferOffsets ? + pCounterBufferOffsets[cb_idx] : 0; + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) { + srm.MemoryAddress = anv_address_add(counter_buffer->address, + offset); + srm.RegisterAddress = GENX(SO_WRITE_OFFSET0_num) + idx * 4; + } + } + } + + cmd_buffer->state.xfb_enabled = false; + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_XFB_ENABLE; } static VkResult @@ -3215,16 +3744,19 @@ sizes[0] = groupCountX; sizes[1] = groupCountY; sizes[2] = groupCountZ; - anv_state_flush(cmd_buffer->device, state); cmd_buffer->state.compute.num_workgroups = (struct anv_address) { - .bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo, .offset = state.offset, }; } genX(cmd_buffer_flush_compute_state)(cmd_buffer); + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) { + ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled; ggw.SIMDSize = prog_data->simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; @@ -3312,17 +3844,33 @@ } /* predicate = !predicate; */ -#define COMPARE_FALSE 1 anv_batch_emit(batch, GENX(MI_PREDICATE), mip) { mip.LoadOperation = LOAD_LOADINV; mip.CombineOperation = COMBINE_OR; mip.CompareOperation = COMPARE_FALSE; } + +#if GEN_IS_HASWELL + if (cmd_buffer->state.conditional_render_enabled) { + emit_lrr(batch, MI_PREDICATE_SRC0, CS_GPR(ANV_PREDICATE_RESULT_REG)); + /* predicate &= !(conditional_rendering_predicate == 0); */ + anv_batch_emit(batch, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOADINV; + mip.CombineOperation = COMBINE_AND; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + } +#endif + +#else /* GEN_GEN > 7 */ + if (cmd_buffer->state.conditional_render_enabled) + genX(cmd_emit_conditional_render_predicate)(cmd_buffer); #endif anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) { ggw.IndirectParameterEnable = true; - ggw.PredicateEnable = GEN_GEN <= 7; + ggw.PredicateEnable = GEN_GEN <= 7 || + cmd_buffer->state.conditional_render_enabled; ggw.SIMDSize = prog_data->simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; @@ -3888,16 +4436,209 @@ cmd_buffer_emit_depth_stencil(cmd_buffer); } +static enum blorp_filter +vk_to_blorp_resolve_mode(VkResolveModeFlagBitsKHR vk_mode) +{ + switch (vk_mode) { + case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR: + return BLORP_FILTER_SAMPLE_0; + case VK_RESOLVE_MODE_AVERAGE_BIT_KHR: + return BLORP_FILTER_AVERAGE; + case VK_RESOLVE_MODE_MIN_BIT_KHR: + return BLORP_FILTER_MIN_SAMPLE; + case VK_RESOLVE_MODE_MAX_BIT_KHR: + return BLORP_FILTER_MAX_SAMPLE; + default: + return BLORP_FILTER_NONE; + } +} + static void cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) { struct anv_cmd_state *cmd_state = &cmd_buffer->state; struct anv_subpass *subpass = cmd_state->subpass; uint32_t subpass_id = anv_get_subpass_id(&cmd_buffer->state); + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - anv_cmd_buffer_resolve_subpass(cmd_buffer); + if (subpass->has_color_resolve) { + /* We are about to do some MSAA resolves. We need to flush so that the + * result of writes to the MSAA color attachments show up in the sampler + * when we blit to the single-sampled resolve target. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + + for (uint32_t i = 0; i < subpass->color_count; ++i) { + uint32_t src_att = subpass->color_attachments[i].attachment; + uint32_t dst_att = subpass->resolve_attachments[i].attachment; + + if (dst_att == VK_ATTACHMENT_UNUSED) + continue; + + assert(src_att < cmd_buffer->state.pass->attachment_count); + assert(dst_att < cmd_buffer->state.pass->attachment_count); + + if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) { + /* From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a + * resolve attachment, then the loadOp is effectively ignored + * as the resolve is guaranteed to overwrite all pixels in the + * render area. + */ + cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0; + } + + struct anv_image_view *src_iview = fb->attachments[src_att]; + struct anv_image_view *dst_iview = fb->attachments[dst_att]; + + const VkRect2D render_area = cmd_buffer->state.render_area; + + enum isl_aux_usage src_aux_usage = + cmd_buffer->state.attachments[src_att].aux_usage; + enum isl_aux_usage dst_aux_usage = + cmd_buffer->state.attachments[dst_att].aux_usage; + + assert(src_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && + dst_iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT); + + anv_image_msaa_resolve(cmd_buffer, + src_iview->image, src_aux_usage, + src_iview->planes[0].isl.base_level, + src_iview->planes[0].isl.base_array_layer, + dst_iview->image, dst_aux_usage, + dst_iview->planes[0].isl.base_level, + dst_iview->planes[0].isl.base_array_layer, + VK_IMAGE_ASPECT_COLOR_BIT, + render_area.offset.x, render_area.offset.y, + render_area.offset.x, render_area.offset.y, + render_area.extent.width, + render_area.extent.height, + fb->layers, BLORP_FILTER_NONE); + } + } + + if (subpass->ds_resolve_attachment) { + /* We are about to do some MSAA resolves. We need to flush so that the + * result of writes to the MSAA depth attachments show up in the sampler + * when we blit to the single-sampled resolve target. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; + + uint32_t src_att = subpass->depth_stencil_attachment->attachment; + uint32_t dst_att = subpass->ds_resolve_attachment->attachment; + + assert(src_att < cmd_buffer->state.pass->attachment_count); + assert(dst_att < cmd_buffer->state.pass->attachment_count); + + if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) { + /* From the Vulkan 1.0 spec: + * + * If the first use of an attachment in a render pass is as a + * resolve attachment, then the loadOp is effectively ignored + * as the resolve is guaranteed to overwrite all pixels in the + * render area. + */ + cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0; + } + + struct anv_image_view *src_iview = fb->attachments[src_att]; + struct anv_image_view *dst_iview = fb->attachments[dst_att]; + + const VkRect2D render_area = cmd_buffer->state.render_area; + + if ((src_iview->image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && + subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) { + + struct anv_attachment_state *src_state = + &cmd_state->attachments[src_att]; + struct anv_attachment_state *dst_state = + &cmd_state->attachments[dst_att]; + + /* MSAA resolves sample from the source attachment. Transition the + * depth attachment first to get rid of any HiZ that we may not be + * able to handle. + */ + transition_depth_buffer(cmd_buffer, src_iview->image, + src_state->current_layout, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + src_state->aux_usage = + anv_layout_to_aux_usage(&cmd_buffer->device->info, src_iview->image, + VK_IMAGE_ASPECT_DEPTH_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + src_state->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + /* MSAA resolves write to the resolve attachment as if it were any + * other transfer op. Transition the resolve attachment accordingly. + */ + VkImageLayout dst_initial_layout = dst_state->current_layout; + + /* If our render area is the entire size of the image, we're going to + * blow it all away so we can claim the initial layout is UNDEFINED + * and we'll get a HiZ ambiguate instead of a resolve. + */ + if (dst_iview->image->type != VK_IMAGE_TYPE_3D && + render_area.offset.x == 0 && render_area.offset.y == 0 && + render_area.extent.width == dst_iview->extent.width && + render_area.extent.height == dst_iview->extent.height) + dst_initial_layout = VK_IMAGE_LAYOUT_UNDEFINED; + + transition_depth_buffer(cmd_buffer, dst_iview->image, + dst_initial_layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + dst_state->aux_usage = + anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_iview->image, + VK_IMAGE_ASPECT_DEPTH_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + dst_state->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + enum blorp_filter filter = + vk_to_blorp_resolve_mode(subpass->depth_resolve_mode); + + anv_image_msaa_resolve(cmd_buffer, + src_iview->image, src_state->aux_usage, + src_iview->planes[0].isl.base_level, + src_iview->planes[0].isl.base_array_layer, + dst_iview->image, dst_state->aux_usage, + dst_iview->planes[0].isl.base_level, + dst_iview->planes[0].isl.base_array_layer, + VK_IMAGE_ASPECT_DEPTH_BIT, + render_area.offset.x, render_area.offset.y, + render_area.offset.x, render_area.offset.y, + render_area.extent.width, + render_area.extent.height, + fb->layers, filter); + } + + if ((src_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) { + + enum isl_aux_usage src_aux_usage = ISL_AUX_USAGE_NONE; + enum isl_aux_usage dst_aux_usage = ISL_AUX_USAGE_NONE; + + enum blorp_filter filter = + vk_to_blorp_resolve_mode(subpass->stencil_resolve_mode); + + anv_image_msaa_resolve(cmd_buffer, + src_iview->image, src_aux_usage, + src_iview->planes[0].isl.base_level, + src_iview->planes[0].isl.base_array_layer, + dst_iview->image, dst_aux_usage, + dst_iview->planes[0].isl.base_level, + dst_iview->planes[0].isl.base_array_layer, + VK_IMAGE_ASPECT_STENCIL_BIT, + render_area.offset.x, render_area.offset.y, + render_area.offset.x, render_area.offset.y, + render_area.extent.width, + render_area.extent.height, + fb->layers, filter); + } + } - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; for (uint32_t i = 0; i < subpass->attachment_count; ++i) { const uint32_t a = subpass->attachments[i].attachment; if (a == VK_ATTACHMENT_UNUSED) @@ -4085,3 +4826,75 @@ { genX(CmdEndRenderPass)(commandBuffer); } + +void +genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer) +{ +#if GEN_GEN >= 8 || GEN_IS_HASWELL + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(ANV_PREDICATE_RESULT_REG)); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0); + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOADINV; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } +#endif +} + +#if GEN_GEN >= 8 || GEN_IS_HASWELL +void genX(CmdBeginConditionalRenderingEXT)( + VkCommandBuffer commandBuffer, + const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, pConditionalRenderingBegin->buffer); + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + struct anv_address value_address = + anv_address_add(buffer->address, pConditionalRenderingBegin->offset); + + const bool isInverted = pConditionalRenderingBegin->flags & + VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; + + cmd_state->conditional_render_enabled = true; + + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + + /* Section 19.4 of the Vulkan 1.1.85 spec says: + * + * If the value of the predicate in buffer memory changes + * while conditional rendering is active, the rendering commands + * may be discarded in an implementation-dependent way. + * Some implementations may latch the value of the predicate + * upon beginning conditional rendering while others + * may read it before every rendering command. + * + * So it's perfectly fine to read a value from the buffer once. + */ + emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG0), value_address); + /* Zero the top 32-bits of MI_PREDICATE_SRC0 */ + emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG0) + 4, 0); + + /* Precompute predicate result, it is necessary to support secondary + * command buffers since it is unknown if conditional rendering is + * inverted when populating them. + */ + uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH)); + dw[1] = mi_alu(MI_ALU_LOAD0, MI_ALU_SRCA, 0); + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, MI_ALU_REG0); + dw[3] = mi_alu(MI_ALU_SUB, 0, 0); + dw[4] = mi_alu(isInverted ? MI_ALU_STOREINV : MI_ALU_STORE, + ANV_PREDICATE_RESULT_REG, MI_ALU_CF); +} + +void genX(CmdEndConditionalRenderingEXT)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + struct anv_cmd_state *cmd_state = &cmd_buffer->state; + + cmd_state->conditional_render_enabled = false; +} +#endif diff -Nru mesa-18.3.3/src/intel/vulkan/genX_gpu_memcpy.c mesa-19.0.1/src/intel/vulkan/genX_gpu_memcpy.c --- mesa-18.3.3/src/intel/vulkan/genX_gpu_memcpy.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/genX_gpu_memcpy.c 2019-03-31 23:16:37.000000000 +0000 @@ -133,9 +133,6 @@ if (size == 0) return; - assert(dst.offset + size <= dst.bo->size); - assert(src.offset + size <= src.bo->size); - /* The maximum copy block size is 4 32-bit components at a time. */ assert(size % 4 == 0); unsigned bs = gcd_pow2_u64(16, size); @@ -167,7 +164,7 @@ .AddressModifyEnable = true, .BufferStartingAddress = src, .BufferPitch = bs, - .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo), + .MOCS = anv_mocs_for_bo(cmd_buffer->device, src.bo), #if (GEN_GEN >= 8) .BufferSize = size, #else @@ -227,7 +224,7 @@ anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) { sob.SOBufferIndex = 0; - sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo), + sob.MOCS = anv_mocs_for_bo(cmd_buffer->device, dst.bo), sob.SurfaceBaseAddress = dst; #if GEN_GEN >= 8 @@ -302,5 +299,4 @@ } cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE; - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_WRITES; } diff -Nru mesa-18.3.3/src/intel/vulkan/genX_pipeline.c mesa-19.0.1/src/intel/vulkan/genX_pipeline.c --- mesa-18.3.3/src/intel/vulkan/genX_pipeline.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/genX_pipeline.c 2019-03-31 23:16:37.000000000 +0000 @@ -28,6 +28,7 @@ #include "common/gen_l3_config.h" #include "common/gen_sample_positions.h" +#include "nir/nir_xfb_info.h" #include "vk_util.h" #include "vk_format_info.h" @@ -105,9 +106,7 @@ __builtin_popcount(elements_double) / 2; const uint32_t total_elems = - elem_count + needs_svgs_elem + vs_prog_data->uses_drawid; - if (total_elems == 0) - return; + MAX2(1, elem_count + needs_svgs_elem + vs_prog_data->uses_drawid); uint32_t *p; @@ -465,6 +464,7 @@ sf.TriangleStripListProvokingVertexSelect = 0; sf.LineStripListProvokingVertexSelect = 0; sf.TriangleFanProvokingVertexSelect = 1; + sf.VertexSubPixelPrecisionSelect = _8Bit; const struct brw_vue_prog_data *last_vue_prog_data = anv_pipeline_get_last_vue_prog_data(pipeline); @@ -1055,7 +1055,6 @@ #endif GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); - anv_state_flush(device, pipeline->blend_state); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { bsp.BlendStatePointer = pipeline->blend_state.offset; @@ -1079,6 +1078,10 @@ clip.APIMode = APIMODE_D3D, clip.ViewportXYClipTestEnable = true; +#if GEN_GEN >= 8 + clip.VertexSubPixelPrecisionSelect = _8Bit; +#endif + clip.ClipMode = CLIPMODE_NORMAL; clip.TriangleStripListProvokingVertexSelect = 0; @@ -1116,10 +1119,8 @@ clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace]; clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode]; clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable; - if (last) { - clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask; - clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask; - } + clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask; + clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask; #else clip.NonPerspectiveBarycentricEnable = wm_prog_data ? (wm_prog_data->barycentric_interp_modes & @@ -1132,9 +1133,148 @@ emit_3dstate_streamout(struct anv_pipeline *pipeline, const VkPipelineRasterizationStateCreateInfo *rs_info) { +#if GEN_GEN >= 8 + const struct brw_vue_prog_data *prog_data = + anv_pipeline_get_last_vue_prog_data(pipeline); + const struct brw_vue_map *vue_map = &prog_data->vue_map; +#endif + + nir_xfb_info *xfb_info; + if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) + xfb_info = pipeline->shaders[MESA_SHADER_GEOMETRY]->xfb_info; + else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) + xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info; + else + xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info; + + pipeline->xfb_used = xfb_info ? xfb_info->buffers_written : 0; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), so) { so.RenderingDisable = rs_info->rasterizerDiscardEnable; + +#if GEN_GEN >= 8 + if (xfb_info) { + so.SOFunctionEnable = true; + so.SOStatisticsEnable = true; + + const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info = + vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); + so.RenderStreamSelect = stream_info ? + stream_info->rasterizationStream : 0; + + so.Buffer0SurfacePitch = xfb_info->strides[0]; + so.Buffer1SurfacePitch = xfb_info->strides[1]; + so.Buffer2SurfacePitch = xfb_info->strides[2]; + so.Buffer3SurfacePitch = xfb_info->strides[3]; + + int urb_entry_read_offset = 0; + int urb_entry_read_length = + (prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset; + + /* We always read the whole vertex. This could be reduced at some + * point by reading less and offsetting the register index in the + * SO_DECLs. + */ + so.Stream0VertexReadOffset = urb_entry_read_offset; + so.Stream0VertexReadLength = urb_entry_read_length - 1; + so.Stream1VertexReadOffset = urb_entry_read_offset; + so.Stream1VertexReadLength = urb_entry_read_length - 1; + so.Stream2VertexReadOffset = urb_entry_read_offset; + so.Stream2VertexReadLength = urb_entry_read_length - 1; + so.Stream3VertexReadOffset = urb_entry_read_offset; + so.Stream3VertexReadLength = urb_entry_read_length - 1; + } +#endif /* GEN_GEN >= 8 */ + } + +#if GEN_GEN >= 8 + if (xfb_info) { + struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128]; + int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0}; + int decls[MAX_XFB_STREAMS] = {0, 0, 0, 0}; + + memset(so_decl, 0, sizeof(so_decl)); + + for (unsigned i = 0; i < xfb_info->output_count; i++) { + const nir_xfb_output_info *output = &xfb_info->outputs[i]; + unsigned buffer = output->buffer; + unsigned stream = xfb_info->buffer_to_stream[buffer]; + + /* Our hardware is unusual in that it requires us to program SO_DECLs + * for fake "hole" components, rather than simply taking the offset + * for each real varying. Each hole can have size 1, 2, 3, or 4; we + * program as many size = 4 holes as we can, then a final hole to + * accommodate the final 1, 2, or 3 remaining. + */ + int hole_dwords = (output->offset - next_offset[buffer]) / 4; + while (hole_dwords > 0) { + so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) { + .HoleFlag = 1, + .OutputBufferSlot = buffer, + .ComponentMask = (1 << MIN2(hole_dwords, 4)) - 1, + }; + hole_dwords -= 4; + } + + int varying = output->location; + uint8_t component_mask = output->component_mask; + /* VARYING_SLOT_PSIZ contains three scalar fields packed together: + * - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y + * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z + * - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w + */ + if (varying == VARYING_SLOT_LAYER) { + varying = VARYING_SLOT_PSIZ; + component_mask = 1 << 1; // SO_DECL_COMPMASK_Y + } else if (varying == VARYING_SLOT_VIEWPORT) { + varying = VARYING_SLOT_PSIZ; + component_mask = 1 << 2; // SO_DECL_COMPMASK_Z + } else if (varying == VARYING_SLOT_PSIZ) { + component_mask = 1 << 3; // SO_DECL_COMPMASK_W + } + + next_offset[buffer] = output->offset + + __builtin_popcount(component_mask) * 4; + + so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) { + .OutputBufferSlot = buffer, + .RegisterIndex = vue_map->varying_to_slot[varying], + .ComponentMask = component_mask, + }; + } + + int max_decls = 0; + for (unsigned s = 0; s < MAX_XFB_STREAMS; s++) + max_decls = MAX2(max_decls, decls[s]); + + uint8_t sbs[MAX_XFB_STREAMS] = { }; + for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) { + if (xfb_info->buffers_written & (1 << b)) + sbs[xfb_info->buffer_to_stream[b]] |= 1 << b; + } + + uint32_t *dw = anv_batch_emitn(&pipeline->batch, 3 + 2 * max_decls, + GENX(3DSTATE_SO_DECL_LIST), + .StreamtoBufferSelects0 = sbs[0], + .StreamtoBufferSelects1 = sbs[1], + .StreamtoBufferSelects2 = sbs[2], + .StreamtoBufferSelects3 = sbs[3], + .NumEntries0 = decls[0], + .NumEntries1 = decls[1], + .NumEntries2 = decls[2], + .NumEntries3 = decls[3]); + + for (int i = 0; i < max_decls; i++) { + GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2, + &(struct GENX(SO_DECL_ENTRY)) { + .Stream0Decl = so_decl[0][i], + .Stream1Decl = so_decl[1][i], + .Stream2Decl = so_decl[2][i], + .Stream3Decl = so_decl[3][i], + }); + } } +#endif /* GEN_GEN >= 8 */ } static uint32_t @@ -1198,7 +1338,12 @@ vs.SingleVertexDispatch = false; #endif vs.VectorMaskEnable = false; - vs.SamplerCount = get_sampler_count(vs_bin); + /* WA_1606682166: + * Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes. + * Disable the Sampler state prefetch functionality in the SARB by + * programming 0xB000[30] to '1'. + */ + vs.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(vs_bin); /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to * disable prefetching of binding tables on A0 and B0 steppings. * TODO: Revisit this WA on newer steppings. @@ -1273,8 +1418,8 @@ hs.Enable = true; hs.StatisticsEnable = true; hs.KernelStartPointer = tcs_bin->kernel.offset; - - hs.SamplerCount = get_sampler_count(tcs_bin); + /* WA_1606682166 */ + hs.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(tcs_bin); /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ hs.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(tcs_bin); hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; @@ -1291,17 +1436,17 @@ get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin); } - const VkPipelineTessellationDomainOriginStateCreateInfoKHR *domain_origin_state = - tess_info ? vk_find_struct_const(tess_info, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR) : NULL; + const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state = + tess_info ? vk_find_struct_const(tess_info, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO) : NULL; - VkTessellationDomainOriginKHR uv_origin = + VkTessellationDomainOrigin uv_origin = domain_origin_state ? domain_origin_state->domainOrigin : - VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR; + VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te) { te.Partitioning = tes_prog_data->partitioning; - if (uv_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT_KHR) { + if (uv_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) { te.OutputTopology = tes_prog_data->output_topology; } else { /* When the origin is upper-left, we have to flip the winding order */ @@ -1324,8 +1469,8 @@ ds.Enable = true; ds.StatisticsEnable = true; ds.KernelStartPointer = tes_bin->kernel.offset; - - ds.SamplerCount = get_sampler_count(tes_bin); + /* WA_1606682166 */ + ds.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(tes_bin); /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ ds.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(tes_bin); ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; @@ -1383,7 +1528,8 @@ gs.SingleProgramFlow = false; gs.VectorMaskEnable = false; - gs.SamplerCount = get_sampler_count(gs_bin); + /* WA_1606682166 */ + gs.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(gs_bin); /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ gs.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(gs_bin); gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles; @@ -1616,7 +1762,8 @@ ps.SingleProgramFlow = false; ps.VectorMaskEnable = true; - ps.SamplerCount = get_sampler_count(fs_bin); + /* WA_1606682166 */ + ps.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(fs_bin); /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ ps.BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(fs_bin); ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || @@ -1947,10 +2094,14 @@ struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { .KernelStartPointer = cs_bin->kernel.offset, - - .SamplerCount = get_sampler_count(cs_bin), - /* Gen 11 workarounds table #2056 WABTPPrefetchDisable */ - .BindingTableEntryCount = GEN_GEN == 11 ? 0 : get_binding_table_entry_count(cs_bin), + /* WA_1606682166 */ + .SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(cs_bin), + /* Gen 11 workarounds table #2056 WABTPPrefetchDisable + * + * We add 1 because the CS indirect parameters buffer isn't accounted + * for in bind_map.surface_count. + */ + .BindingTableEntryCount = GEN_GEN == 11 ? 0 : 1 + MIN2(cs_bin->bind_map.surface_count, 30), .BarrierEnable = cs_prog_data->uses_barrier, .SharedLocalMemorySize = encode_slm_size(GEN_GEN, cs_prog_data->base.total_shared), diff -Nru mesa-18.3.3/src/intel/vulkan/genX_query.c mesa-19.0.1/src/intel/vulkan/genX_query.c --- mesa-18.3.3/src/intel/vulkan/genX_query.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/genX_query.c 2019-03-31 23:16:37.000000000 +0000 @@ -72,6 +72,12 @@ /* Statistics queries have a min and max for every statistic */ uint64s_per_slot += 2 * util_bitcount(pipeline_statistics); break; + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + /* Transform feedback queries are 4 values, begin/end for + * written/available. + */ + uint64s_per_slot += 4; + break; default: assert(!"Invalid query type"); } @@ -220,7 +226,8 @@ assert(pool->type == VK_QUERY_TYPE_OCCLUSION || pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS || - pool->type == VK_QUERY_TYPE_TIMESTAMP); + pool->type == VK_QUERY_TYPE_TIMESTAMP || + pool->type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT); if (anv_device_is_lost(device)) return VK_ERROR_DEVICE_LOST; @@ -284,6 +291,15 @@ break; } + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + if (write_results) + cpu_write_query_result(pData, flags, idx, slot[2] - slot[1]); + idx++; + if (write_results) + cpu_write_query_result(pData, flags, idx, slot[4] - slot[3]); + idx++; + break; + case VK_QUERY_TYPE_TIMESTAMP: if (write_results) cpu_write_query_result(pData, flags, idx, slot[1]); @@ -411,12 +427,47 @@ emit_srm64(&cmd_buffer->batch, addr, vk_pipeline_stat_to_reg[stat]); } +static void +emit_xfb_query(struct anv_cmd_buffer *cmd_buffer, uint32_t stream, + struct anv_address addr) +{ + assert(stream < MAX_XFB_STREAMS); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { + lrm.RegisterAddress = GENX(SO_NUM_PRIMS_WRITTEN0_num) + 0 + stream * 8; + lrm.MemoryAddress = anv_address_add(addr, 0); + } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { + lrm.RegisterAddress = GENX(SO_NUM_PRIMS_WRITTEN0_num) + 4 + stream * 8; + lrm.MemoryAddress = anv_address_add(addr, 4); + } + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { + lrm.RegisterAddress = GENX(SO_PRIM_STORAGE_NEEDED0_num) + 0 + stream * 8; + lrm.MemoryAddress = anv_address_add(addr, 16); + } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), lrm) { + lrm.RegisterAddress = GENX(SO_PRIM_STORAGE_NEEDED0_num) + 4 + stream * 8; + lrm.MemoryAddress = anv_address_add(addr, 20); + } +} + void genX(CmdBeginQuery)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags) { + genX(CmdBeginQueryIndexedEXT)(commandBuffer, queryPool, query, flags, 0); +} + +void genX(CmdBeginQueryIndexedEXT)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags, + uint32_t index) +{ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); struct anv_address query_addr = anv_query_address(pool, query); @@ -444,6 +495,14 @@ break; } + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.CommandStreamerStallEnable = true; + pc.StallAtPixelScoreboard = true; + } + emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 8)); + break; + default: unreachable(""); } @@ -452,7 +511,16 @@ void genX(CmdEndQuery)( VkCommandBuffer commandBuffer, VkQueryPool queryPool, - uint32_t query) + VkQueryControlFlags flags) +{ + genX(CmdEndQueryIndexedEXT)(commandBuffer, queryPool, flags, 0); +} + +void genX(CmdEndQueryIndexedEXT)( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + uint32_t index) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); @@ -484,6 +552,16 @@ break; } + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.CommandStreamerStallEnable = true; + pc.StallAtPixelScoreboard = true; + } + + emit_xfb_query(cmd_buffer, index, anv_address_add(query_addr, 16)); + emit_query_availability(cmd_buffer, query_addr); + break; + default: unreachable(""); } @@ -733,7 +811,7 @@ * to ensure proper ordering of the commands from the 3d pipe and the * command streamer. */ - if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_WRITES) { + if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_BUFFER_WRITES) { cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; } @@ -778,6 +856,17 @@ break; } + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + compute_query_result(&cmd_buffer->batch, MI_ALU_REG2, + anv_address_add(query_addr, 8)); + gpu_write_query_result(&cmd_buffer->batch, dest_addr, + flags, idx++, CS_GPR(2)); + compute_query_result(&cmd_buffer->batch, MI_ALU_REG2, + anv_address_add(query_addr, 24)); + gpu_write_query_result(&cmd_buffer->batch, dest_addr, + flags, idx++, CS_GPR(2)); + break; + case VK_QUERY_TYPE_TIMESTAMP: emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(2), anv_address_add(query_addr, 8)); diff -Nru mesa-18.3.3/src/intel/vulkan/genX_state.c mesa-19.0.1/src/intel/vulkan/genX_state.c --- mesa-18.3.3/src/intel/vulkan/genX_state.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/genX_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -91,11 +91,9 @@ VkResult genX(init_device_state)(struct anv_device *device) { - GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, - &GENX(MOCS)); + device->default_mocs = GENX(MOCS); #if GEN_GEN >= 8 - GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs, - &GENX(EXTERNAL_MOCS)); + device->external_mocs = GENX(EXTERNAL_MOCS); #else device->external_mocs = device->default_mocs; #endif @@ -334,7 +332,12 @@ ANV_FROM_HANDLE(anv_ycbcr_conversion, conversion, pSamplerConversion->conversion); - if (conversion == NULL) + /* Ignore conversion for non-YUV formats. This fulfills a requirement + * for clients that want to utilize same code path for images with + * external formats (VK_FORMAT_UNDEFINED) and "regular" RGBA images + * where format is known. + */ + if (conversion == NULL || !conversion->format->can_ycbcr) break; sampler->n_planes = conversion->format->n_planes; diff -Nru mesa-18.3.3/src/intel/vulkan/meson.build mesa-19.0.1/src/intel/vulkan/meson.build --- mesa-18.3.3/src/intel/vulkan/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -1,4 +1,4 @@ -# Copyright © 2017-2018 Intel Corporation +# Copyright © 2017-2019 Intel Corporation # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -111,6 +111,8 @@ libanv_files = files( 'anv_allocator.c', + 'anv_android_stubs.c', + 'anv_android.h', 'anv_batch_chain.c', 'anv_blorp.c', 'anv_cmd_buffer.c', @@ -176,7 +178,10 @@ libanv_common = static_library( 'anv_common', - [libanv_files, anv_entrypoints, anv_extensions_c, anv_extensions_h, sha1_h], + [ + libanv_files, anv_entrypoints, anv_extensions_c, anv_extensions_h, sha1_h, + gen_xml_pack, + ], include_directories : [ inc_common, inc_intel, inc_compiler, inc_drm_uapi, inc_vulkan_util, inc_vulkan_wsi, @@ -225,18 +230,21 @@ ) foreach t : ['block_pool_no_free', 'state_pool_no_free', - 'state_pool_free_list_only', 'state_pool'] + 'state_pool_free_list_only', 'state_pool', + 'state_pool_padding'] test( 'anv_@0@'.format(t), executable( t, ['tests/@0@.c'.format(t), anv_entrypoints[0], anv_extensions_h], + c_args : [ c_sse2_args ], link_with : libvulkan_intel_test, dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind], include_directories : [ inc_common, inc_intel, inc_compiler, inc_vulkan_util, inc_vulkan_wsi, ], - ) + ), + suite : ['intel'], ) endforeach endif diff -Nru mesa-18.3.3/src/intel/vulkan/tests/block_pool_no_free.c mesa-19.0.1/src/intel/vulkan/tests/block_pool_no_free.c --- mesa-18.3.3/src/intel/vulkan/tests/block_pool_no_free.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/tests/block_pool_no_free.c 2019-03-31 23:16:37.000000000 +0000 @@ -33,8 +33,8 @@ pthread_t thread; unsigned id; struct anv_block_pool *pool; - uint32_t blocks[BLOCKS_PER_THREAD]; - uint32_t back_blocks[BLOCKS_PER_THREAD]; + int32_t blocks[BLOCKS_PER_THREAD]; + int32_t back_blocks[BLOCKS_PER_THREAD]; } jobs[NUM_THREADS]; @@ -46,14 +46,14 @@ int32_t block, *data; for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { - block = anv_block_pool_alloc(job->pool, block_size); - data = job->pool->map + block; + block = anv_block_pool_alloc(job->pool, block_size, NULL); + data = anv_block_pool_map(job->pool, block); *data = block; assert(block >= 0); job->blocks[i] = block; block = anv_block_pool_alloc_back(job->pool, block_size); - data = job->pool->map + block; + data = anv_block_pool_map(job->pool, block); *data = block; assert(block < 0); job->back_blocks[i] = -block; @@ -61,18 +61,18 @@ for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { block = job->blocks[i]; - data = job->pool->map + block; + data = anv_block_pool_map(job->pool, block); assert(*data == block); block = -job->back_blocks[i]; - data = job->pool->map + block; + data = anv_block_pool_map(job->pool, block); assert(*data == block); } return NULL; } -static void validate_monotonic(uint32_t **blocks) +static void validate_monotonic(int32_t **blocks) { /* A list of indices, one per thread */ unsigned next[NUM_THREADS]; @@ -80,30 +80,30 @@ int highest = -1; while (true) { - /* First, we find which thread has the highest next element */ - int thread_max = -1; - int max_thread_idx = -1; + /* First, we find which thread has the lowest next element */ + int32_t thread_min = INT32_MAX; + int min_thread_idx = -1; for (unsigned i = 0; i < NUM_THREADS; i++) { if (next[i] >= BLOCKS_PER_THREAD) continue; - if (thread_max < blocks[i][next[i]]) { - thread_max = blocks[i][next[i]]; - max_thread_idx = i; + if (thread_min > blocks[i][next[i]]) { + thread_min = blocks[i][next[i]]; + min_thread_idx = i; } } /* The only way this can happen is if all of the next[] values are at * BLOCKS_PER_THREAD, in which case, we're done. */ - if (thread_max == -1) + if (thread_min == INT32_MAX) break; /* That next element had better be higher than the previous highest */ - assert(blocks[max_thread_idx][next[max_thread_idx]] > highest); + assert(blocks[min_thread_idx][next[min_thread_idx]] > highest); - highest = blocks[max_thread_idx][next[max_thread_idx]]; - next[max_thread_idx]++; + highest = blocks[min_thread_idx][next[min_thread_idx]]; + next[min_thread_idx]++; } } @@ -128,7 +128,7 @@ pthread_join(jobs[i].thread, NULL); /* Validate that the block allocations were monotonic */ - uint32_t *block_ptrs[NUM_THREADS]; + int32_t *block_ptrs[NUM_THREADS]; for (unsigned i = 0; i < NUM_THREADS; i++) block_ptrs[i] = jobs[i].blocks; validate_monotonic(block_ptrs); diff -Nru mesa-18.3.3/src/intel/vulkan/tests/state_pool_padding.c mesa-19.0.1/src/intel/vulkan/tests/state_pool_padding.c --- mesa-18.3.3/src/intel/vulkan/tests/state_pool_padding.c 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/tests/state_pool_padding.c 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,73 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_private.h" + +int main(int argc, char **argv) +{ + struct anv_instance instance; + struct anv_device device = { + .instance = &instance, + }; + struct anv_state_pool state_pool; + + anv_state_pool_init(&state_pool, &device, 4096, 4096, EXEC_OBJECT_PINNED); + + /* Get the size of the underlying block_pool */ + struct anv_block_pool *bp = &state_pool.block_pool; + uint64_t pool_size = bp->size; + + /* Grab one so the pool has some initial usage */ + anv_state_pool_alloc(&state_pool, 16, 16); + + /* Grab a state that is the size of the initial allocation */ + struct anv_state state = anv_state_pool_alloc(&state_pool, pool_size, 16); + + /* The pool must have grown */ + assert(bp->size > pool_size); + + /* And the state must have been allocated at the end of the original size */ + assert(state.offset == pool_size); + + /* A new allocation that fits into the returned empty space should have an + * offset within the original pool size + */ + state = anv_state_pool_alloc(&state_pool, 4096, 16); + assert(state.offset + state.alloc_size <= pool_size); + + /* We should be able to allocate pool->block_size'd chunks in the returned area + */ + int left_chunks = pool_size / 4096 - 2; + for (int i = 0; i < left_chunks; i++) { + state = anv_state_pool_alloc(&state_pool, 4096, 16); + assert(state.offset + state.alloc_size <= pool_size); + } + + /* Now the next chunk to be allocated should make the pool grow again */ + pool_size = bp->size; + state = anv_state_pool_alloc(&state_pool, 4096, 16); + assert(bp->size > pool_size); + assert(state.offset == pool_size); + + anv_state_pool_finish(&state_pool); +} diff -Nru mesa-18.3.3/src/intel/vulkan/vk_format_info.h mesa-19.0.1/src/intel/vulkan/vk_format_info.h --- mesa-18.3.3/src/intel/vulkan/vk_format_info.h 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/src/intel/vulkan/vk_format_info.h 2019-03-31 23:16:37.000000000 +0000 @@ -27,6 +27,56 @@ #include #include +#ifdef ANDROID +#include +/* See i915_private_android_types.h in minigbm. */ +#define HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL 0x100 + +static inline VkFormat +vk_format_from_android(unsigned android_format) +{ + switch (android_format) { + case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM: + return VK_FORMAT_R8G8B8A8_UNORM; + case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM: + case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM: + return VK_FORMAT_R8G8B8_UNORM; + case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM: + return VK_FORMAT_R5G6B5_UNORM_PACK16; + case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM: + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + case HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL: + return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + case AHARDWAREBUFFER_FORMAT_BLOB: + default: + return VK_FORMAT_UNDEFINED; + } +} + +static inline unsigned +android_format_from_vk(unsigned vk_format) +{ + switch (vk_format) { + case VK_FORMAT_R8G8B8A8_UNORM: + return AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_R8G8B8_UNORM: + return AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM; + case VK_FORMAT_R5G6B5_UNORM_PACK16: + return AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM; + case VK_FORMAT_R16G16B16A16_SFLOAT: + return AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT; + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + return AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM; + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + return HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL; + default: + return AHARDWAREBUFFER_FORMAT_BLOB; + } +} +#endif + static inline VkImageAspectFlags vk_format_aspects(VkFormat format) { diff -Nru mesa-18.3.3/src/loader/loader.c mesa-19.0.1/src/loader/loader.c --- mesa-18.3.3/src/loader/loader.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/loader/loader.c 2019-03-31 23:16:37.000000000 +0000 @@ -26,6 +26,7 @@ * Rob Clark */ +#include #include #include #include @@ -35,12 +36,15 @@ #include #include #include +#include #ifdef MAJOR_IN_MKDEV #include #endif #ifdef MAJOR_IN_SYSMACROS #include #endif +#include +#include #include "loader.h" #ifdef HAVE_LIBDRM @@ -64,7 +68,7 @@ } } -static void (*log_)(int level, const char *fmt, ...) = default_logger; +static loader_logger *log_ = default_logger; int loader_open_device(const char *device_name) @@ -487,19 +491,11 @@ } void -loader_set_logger(void (*logger)(int level, const char *fmt, ...)) +loader_set_logger(loader_logger *logger) { log_ = logger; } -/* XXX: Local definition to avoid pulling the heavyweight GL/gl.h and - * GL/internal/dri_interface.h - */ - -#ifndef __DRI_DRIVER_GET_EXTENSIONS -#define __DRI_DRIVER_GET_EXTENSIONS "__driDriverGetExtensions" -#endif - char * loader_get_extensions_name(const char *driver_name) { @@ -516,3 +512,91 @@ return name; } + +/** + * Opens a DRI driver using its driver name, returning the __DRIextension + * entrypoints. + * + * \param driverName - a name like "i965", "radeon", "nouveau", etc. + * \param out_driver - Address where the dlopen() return value will be stored. + * \param search_path_vars - NULL-terminated list of env vars that can be used + * to override the DEFAULT_DRIVER_DIR search path. + */ +const struct __DRIextensionRec ** +loader_open_driver(const char *driver_name, + void **out_driver_handle, + const char **search_path_vars) +{ + char path[PATH_MAX], *search_paths, *next, *end; + char *get_extensions_name; + const struct __DRIextensionRec **extensions = NULL; + const struct __DRIextensionRec **(*get_extensions)(void); + + search_paths = NULL; + if (geteuid() == getuid() && search_path_vars) { + for (int i = 0; search_path_vars[i] != NULL; i++) { + search_paths = getenv(search_path_vars[i]); + if (search_paths) + break; + } + } + if (search_paths == NULL) + search_paths = DEFAULT_DRIVER_DIR; + + void *driver = NULL; + end = search_paths + strlen(search_paths); + for (char *p = search_paths; p < end; p = next + 1) { + int len; + next = strchr(p, ':'); + if (next == NULL) + next = end; + + len = next - p; +#if GLX_USE_TLS + snprintf(path, sizeof(path), "%.*s/tls/%s_dri.so", len, p, driver_name); + driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL); +#endif + if (driver == NULL) { + snprintf(path, sizeof(path), "%.*s/%s_dri.so", len, p, driver_name); + driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL); + if (driver == NULL) + log_(_LOADER_DEBUG, "MESA-LOADER: failed to open %s: %s\n", + path, dlerror()); + } + /* not need continue to loop all paths once the driver is found */ + if (driver != NULL) + break; + } + + if (driver == NULL) { + log_(_LOADER_WARNING, "MESA-LOADER: failed to open %s (search paths %s)\n", + driver_name, search_paths); + *out_driver_handle = NULL; + return NULL; + } + + log_(_LOADER_DEBUG, "MESA-LOADER: dlopen(%s)\n", path); + + get_extensions_name = loader_get_extensions_name(driver_name); + if (get_extensions_name) { + get_extensions = dlsym(driver, get_extensions_name); + if (get_extensions) { + extensions = get_extensions(); + } else { + log_(_LOADER_DEBUG, "MESA-LOADER: driver does not expose %s(): %s\n", + get_extensions_name, dlerror()); + } + free(get_extensions_name); + } + + if (!extensions) + extensions = dlsym(driver, __DRI_DRIVER_EXTENSIONS); + if (extensions == NULL) { + log_(_LOADER_WARNING, + "MESA-LOADER: driver exports no extensions (%s)\n", dlerror()); + dlclose(driver); + } + + *out_driver_handle = driver; + return extensions; +} diff -Nru mesa-18.3.3/src/loader/loader_dri3_helper.c mesa-19.0.1/src/loader/loader_dri3_helper.c --- mesa-18.3.3/src/loader/loader_dri3_helper.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/loader/loader_dri3_helper.c 2019-03-31 23:16:37.000000000 +0000 @@ -101,6 +101,32 @@ return NULL; } +/* Sets the adaptive sync window property state. */ +static void +set_adaptive_sync_property(xcb_connection_t *conn, xcb_drawable_t drawable, + uint32_t state) +{ + static char const name[] = "_VARIABLE_REFRESH"; + xcb_intern_atom_cookie_t cookie; + xcb_intern_atom_reply_t* reply; + xcb_void_cookie_t check; + + cookie = xcb_intern_atom(conn, 0, strlen(name), name); + reply = xcb_intern_atom_reply(conn, cookie, NULL); + if (reply == NULL) + return; + + if (state) + check = xcb_change_property_checked(conn, XCB_PROP_MODE_REPLACE, + drawable, reply->atom, + XCB_ATOM_CARDINAL, 32, 1, &state); + else + check = xcb_delete_property_checked(conn, drawable, reply->atom); + + xcb_discard_reply(conn, check.sequence); + free(reply); +} + /* Get red channel mask for given drawable at given depth. */ static unsigned int dri3_get_red_mask_for_depth(struct loader_dri3_drawable *draw, int depth) @@ -331,16 +357,30 @@ draw->have_back = 0; draw->have_fake_front = 0; draw->first_init = true; + draw->adaptive_sync = false; + draw->adaptive_sync_active = false; draw->cur_blit_source = -1; draw->back_format = __DRI_IMAGE_FORMAT_NONE; mtx_init(&draw->mtx, mtx_plain); cnd_init(&draw->event_cnd); - if (draw->ext->config) + if (draw->ext->config) { + unsigned char adaptive_sync = 0; + draw->ext->config->configQueryi(draw->dri_screen, "vblank_mode", &vblank_mode); + draw->ext->config->configQueryb(draw->dri_screen, + "adaptive_sync", + &adaptive_sync); + + draw->adaptive_sync = adaptive_sync; + } + + if (!draw->adaptive_sync) + set_adaptive_sync_property(conn, draw->drawable, false); + switch (vblank_mode) { case DRI_CONF_VBLANK_NEVER: case DRI_CONF_VBLANK_DEF_INTERVAL_0: @@ -879,6 +919,12 @@ back = dri3_find_back_alloc(draw); mtx_lock(&draw->mtx); + + if (draw->adaptive_sync && !draw->adaptive_sync_active) { + set_adaptive_sync_property(draw->conn, draw->drawable, true); + draw->adaptive_sync_active = true; + } + if (draw->is_different_gpu && back) { /* Update the linear buffer before presenting the pixmap */ (void) loader_dri3_blit_image(draw, diff -Nru mesa-18.3.3/src/loader/loader_dri3_helper.h mesa-19.0.1/src/loader/loader_dri3_helper.h --- mesa-18.3.3/src/loader/loader_dri3_helper.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/loader/loader_dri3_helper.h 2019-03-31 23:16:37.000000000 +0000 @@ -156,6 +156,8 @@ xcb_special_event_t *special_event; bool first_init; + bool adaptive_sync; + bool adaptive_sync_active; int swap_interval; struct loader_dri3_extensions *ext; diff -Nru mesa-18.3.3/src/loader/loader.h mesa-19.0.1/src/loader/loader.h --- mesa-18.3.3/src/loader/loader.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/loader/loader.h 2019-03-31 23:16:37.000000000 +0000 @@ -33,6 +33,8 @@ extern "C" { #endif +struct __DRIextensionRec; + /* Helpers to figure out driver and device name, eg. from pci-id, etc. */ int @@ -47,6 +49,11 @@ char * loader_get_driver_for_fd(int fd); +const struct __DRIextensionRec ** +loader_open_driver(const char *driver_name, + void **out_driver_handle, + const char **search_path_vars); + char * loader_get_device_name_for_fd(int fd); @@ -67,8 +74,9 @@ #define _LOADER_INFO 2 /* just useful info */ #define _LOADER_DEBUG 3 /* useful info for debugging */ +typedef void loader_logger(int level, const char *fmt, ...); void -loader_set_logger(void (*logger)(int level, const char *fmt, ...)); +loader_set_logger(loader_logger *logger); char * loader_get_extensions_name(const char *driver_name); diff -Nru mesa-18.3.3/src/loader/Makefile.am mesa-19.0.1/src/loader/Makefile.am --- mesa-18.3.3/src/loader/Makefile.am 2018-03-17 22:00:11.000000000 +0000 +++ mesa-19.0.1/src/loader/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -28,6 +28,7 @@ AM_CPPFLAGS = \ -I$(top_builddir)/src/util/ \ -DUSE_DRICONF \ + -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \ $(DEFINES) \ -I$(top_srcdir)/include \ -I$(top_srcdir)/include/drm-uapi \ diff -Nru mesa-18.3.3/src/loader/meson.build mesa-19.0.1/src/loader/meson.build --- mesa-18.3.3/src/loader/meson.build 2018-03-13 20:41:43.000000000 +0000 +++ mesa-19.0.1/src/loader/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -39,7 +39,9 @@ 'loader', ['loader.c', 'loader.h', 'pci_id_driver_map.c', 'pci_id_driver_map.h', xmlpool_options_h], - c_args : [c_vis_args, '-DUSE_DRICONF'], + c_args : [c_vis_args, '-DUSE_DRICONF', + '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path), +], include_directories : [inc_include, inc_src, inc_util], dependencies : [dep_libdrm, dep_thread], build_by_default : false, diff -Nru mesa-18.3.3/src/loader/SConscript mesa-19.0.1/src/loader/SConscript --- mesa-18.3.3/src/loader/SConscript 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/loader/SConscript 2019-03-31 23:16:37.000000000 +0000 @@ -12,6 +12,8 @@ env.PkgUseModules('DRM') env.Append(CPPDEFINES = ['HAVE_LIBDRM']) +env.Append(CPPDEFINES = ['DEFAULT_DRIVER_DIR=\\"/usr/local/lib/dri\\"']) + # parse Makefile.sources sources = env.ParseSourceList('Makefile.sources', 'LOADER_C_FILES') diff -Nru mesa-18.3.3/src/Makefile.am mesa-19.0.1/src/Makefile.am --- mesa-18.3.3/src/Makefile.am 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -81,6 +81,10 @@ SUBDIRS += broadcom endif +if HAVE_FREEDRENO_DRIVERS +SUBDIRS += freedreno +endif + if NEED_OPENGL_COMMON SUBDIRS += mesa endif diff -Nru mesa-18.3.3/src/mapi/es1api/ABI-check mesa-19.0.1/src/mapi/es1api/ABI-check --- mesa-18.3.3/src/mapi/es1api/ABI-check 2018-02-27 16:44:19.000000000 +0000 +++ mesa-19.0.1/src/mapi/es1api/ABI-check 2019-03-31 23:16:37.000000000 +0000 @@ -1,11 +1,7 @@ #!/bin/sh set -eu -# Print defined gl.* functions not in GL ES 1.1 or in -# (FIXME, none of these should be part of the ABI) -# GL_EXT_multi_draw_arrays -# GL_OES_EGL_image - +# Print defined gl.* functions not in GL ES 1.1 # or in extensions that are part of the ES 1.1 extension pack. # (see http://www.khronos.org/registry/gles/specs/1.1/opengles_spec_1_1_extension_pack.pdf) @@ -65,8 +61,6 @@ glDisableClientState glDrawArrays glDrawElements -glEGLImageTargetRenderbufferStorageOES -glEGLImageTargetTexture2DOES glEnable glEnableClientState glFinish @@ -123,8 +117,6 @@ glMaterialx glMaterialxv glMatrixMode -glMultiDrawArraysEXT -glMultiDrawElementsEXT glMultiTexCoord4f glMultiTexCoord4x glMultMatrixf diff -Nru mesa-18.3.3/src/mapi/es1api/meson.build mesa-19.0.1/src/mapi/es1api/meson.build --- mesa-18.3.3/src/mapi/es1api/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/es1api/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -20,10 +20,10 @@ es1_glapi_mapi_tmp_h = custom_target( 'es1_glapi_mapi_tmp.h', - input : [mapi_abi_py, gl_and_es_api_files], + input : [glapi_gen_mapi_py, glapi_gen_gl_xml], output : 'glapi_mapi_tmp.h', - command : [prog_python, '@INPUT0@', '--printer', 'es1api', '@INPUT1@'], - depend_files : api_xml_files, + command : [prog_python, '@INPUT0@', 'glesv1', '@INPUT1@'], + depend_files : glapi_gen_mapi_deps, capture : true, ) @@ -56,6 +56,7 @@ 'es1-ABI-check', find_program('ABI-check'), env : env_test, - args : libglesv1_cm + args : libglesv1_cm, + suite : ['mapi'], ) endif diff -Nru mesa-18.3.3/src/mapi/es2api/ABI-check mesa-19.0.1/src/mapi/es2api/ABI-check --- mesa-18.3.3/src/mapi/es2api/ABI-check 2018-02-27 16:44:19.000000000 +0000 +++ mesa-19.0.1/src/mapi/es2api/ABI-check 2019-03-31 23:16:37.000000000 +0000 @@ -1,10 +1,7 @@ #!/bin/sh set -eu -# Print defined gl.* functions not in GL ES 3.0 or in -# (FIXME, none of these should be part of the ABI) -# GL_EXT_multi_draw_arrays -# GL_OES_EGL_image +# Print defined gl.* functions not in GL ES 3.{0..2} case "$(uname)" in Darwin) @@ -34,7 +31,6 @@ glBindBuffer glBindBufferBase glBindBufferRange -glBindFragDataLocationEXT glBindFramebuffer glBindImageTexture glBindProgramPipeline @@ -118,8 +114,6 @@ glDrawElementsInstancedBaseVertex glDrawRangeElements glDrawRangeElementsBaseVertex -glEGLImageTargetRenderbufferStorageOES -glEGLImageTargetTexture2DOES glEnable glEnableVertexAttribArray glEnablei @@ -200,9 +194,7 @@ glGetTexLevelParameterfv glGetTexLevelParameteriv glGetTexParameterIiv -glGetTexParameterIivEXT glGetTexParameterIuiv -glGetTexParameterIuivEXT glGetTexParameterfv glGetTexParameteriv glGetTransformFeedbackVarying @@ -243,8 +235,6 @@ glMemoryBarrier glMemoryBarrierByRegion glMinSampleShading -glMultiDrawArraysEXT -glMultiDrawElementsEXT glObjectLabel glObjectPtrLabel glPatchParameteri @@ -318,9 +308,7 @@ glTexImage2D glTexImage3D glTexParameterIiv -glTexParameterIivEXT glTexParameterIuiv -glTexParameterIuivEXT glTexParameterf glTexParameterfv glTexParameteri diff -Nru mesa-18.3.3/src/mapi/es2api/meson.build mesa-19.0.1/src/mapi/es2api/meson.build --- mesa-18.3.3/src/mapi/es2api/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/es2api/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -20,10 +20,10 @@ es2_glapi_mapi_tmp_h = custom_target( 'es2_glapi_mapi_tmp.h', - input : [mapi_abi_py, gl_and_es_api_files], + input : [glapi_gen_mapi_py, glapi_gen_gl_xml], output : 'glapi_mapi_tmp.h', - command : [prog_python, '@INPUT0@', '--printer', 'es2api', '@INPUT1@'], - depend_files : api_xml_files, + command : [prog_python, '@INPUT0@', 'glesv2', '@INPUT1@'], + depend_files : glapi_gen_mapi_deps, capture : true, ) @@ -56,6 +56,7 @@ 'es2-ABI-check', find_program('ABI-check'), env : env_test, - args : libgles2 + args : libgles2, + suite : ['mapi'], ) endif diff -Nru mesa-18.3.3/src/mapi/glapi/gen/ARB_framebuffer_object.xml mesa-19.0.1/src/mapi/glapi/gen/ARB_framebuffer_object.xml --- mesa-18.3.3/src/mapi/glapi/gen/ARB_framebuffer_object.xml 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/gen/ARB_framebuffer_object.xml 2019-03-31 23:16:37.000000000 +0000 @@ -172,7 +172,15 @@ - + + diff -Nru mesa-18.3.3/src/mapi/glapi/gen/es_EXT.xml mesa-19.0.1/src/mapi/glapi/gen/es_EXT.xml --- mesa-18.3.3/src/mapi/glapi/gen/es_EXT.xml 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/gen/es_EXT.xml 2019-03-31 23:16:37.000000000 +0000 @@ -810,6 +810,8 @@ + + @@ -1452,6 +1454,19 @@ + + + + + + + + + + + + + diff -Nru mesa-18.3.3/src/mapi/glapi/gen/EXT_multisampled_render_to_texture.xml mesa-19.0.1/src/mapi/glapi/gen/EXT_multisampled_render_to_texture.xml --- mesa-18.3.3/src/mapi/glapi/gen/EXT_multisampled_render_to_texture.xml 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/gen/EXT_multisampled_render_to_texture.xml 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mesa-18.3.3/src/mapi/glapi/gen/gl_API.xml mesa-19.0.1/src/mapi/glapi/gen/gl_API.xml --- mesa-18.3.3/src/mapi/glapi/gen/gl_API.xml 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/gen/gl_API.xml 2019-03-31 23:16:37.000000000 +0000 @@ -1148,7 +1148,7 @@ - + diff -Nru mesa-18.3.3/src/mapi/glapi/gen/gl_marshal.py mesa-19.0.1/src/mapi/glapi/gen/gl_marshal.py --- mesa-18.3.3/src/mapi/glapi/gen/gl_marshal.py 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/gen/gl_marshal.py 2019-03-31 23:16:37.000000000 +0000 @@ -249,7 +249,7 @@ out('if ({0}) {{'.format(func.marshal_fail)) with indent(): out('_mesa_glthread_finish(ctx);') - out('_mesa_glthread_restore_dispatch(ctx);') + out('_mesa_glthread_restore_dispatch(ctx, __func__);') self.print_sync_dispatch(func) out('return;') out('}') diff -Nru mesa-18.3.3/src/mapi/glapi/gen/gl_XML.py mesa-19.0.1/src/mapi/glapi/gen/gl_XML.py --- mesa-18.3.3/src/mapi/glapi/gen/gl_XML.py 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/gen/gl_XML.py 2019-03-31 23:16:37.000000000 +0000 @@ -611,21 +611,11 @@ self.deprecated = None self.has_no_error_variant = False - # self.entry_point_api_map[name][api] is a decimal value - # indicating the earliest version of the given API in which - # each entry point exists. Every entry point is included in - # the first level of the map; the second level of the map only - # lists APIs which contain the entry point in at least one - # version. For example, - # self.entry_point_api_map['ClipPlanex'] == { 'es1': - # Decimal('1.1') }. - self.entry_point_api_map = {} - # self.api_map[api] is a decimal value indicating the earliest # version of the given API in which ANY alias for the function # exists. The map only lists APIs which contain the function # in at least one version. For example, for the ClipPlanex - # function, self.entry_point_api_map == { 'es1': + # function, self.api_map == { 'es1': # Decimal('1.1') }. self.api_map = {} @@ -658,13 +648,11 @@ self.entry_points.append( name ) - self.entry_point_api_map[name] = {} for api in ('es1', 'es2'): version_str = element.get(api, 'none') assert version_str is not None if version_str != 'none': version_decimal = Decimal(version_str) - self.entry_point_api_map[name][api] = version_decimal if api not in self.api_map or \ version_decimal < self.api_map[api]: self.api_map[api] = version_decimal @@ -693,7 +681,7 @@ # Only try to set the offset when a non-alias entry-point # is being processed. - if name in static_data.offsets: + if name in static_data.offsets and static_data.offsets[name] <= static_data.MAX_OFFSETS: self.offset = static_data.offsets[name] else: self.offset = -1 @@ -826,23 +814,6 @@ else: return "_dispatch_stub_%u" % (self.offset) - def entry_points_for_api_version(self, api, version = None): - """Return a list of the entry point names for this function - which are supported in the given API (and optionally, version). - - Use the decimal.Decimal type to precisely express non-integer - versions. - """ - result = [] - for entry_point, api_to_ver in self.entry_point_api_map.items(): - if api not in api_to_ver: - continue - if version is not None and version < api_to_ver[api]: - continue - result.append(entry_point) - return result - - class gl_item_factory(object): """Factory to create objects derived from gl_item.""" @@ -878,31 +849,6 @@ typeexpr.create_initial_types() return - def filter_functions(self, entry_point_list): - """Filter out entry points not in entry_point_list.""" - functions_by_name = {} - for func in self.functions_by_name.values(): - entry_points = [ent for ent in func.entry_points if ent in entry_point_list] - if entry_points: - func.filter_entry_points(entry_points) - functions_by_name[func.name] = func - - self.functions_by_name = functions_by_name - - def filter_functions_by_api(self, api, version = None): - """Filter out entry points not in the given API (or - optionally, not in the given version of the given API). - """ - functions_by_name = {} - for func in self.functions_by_name.values(): - entry_points = func.entry_points_for_api_version(api, version) - if entry_points: - func.filter_entry_points(entry_points) - functions_by_name[func.name] = func - - self.functions_by_name = functions_by_name - - def parse_file(self, file_name): doc = ET.parse( file_name ) self.process_element(file_name, doc) diff -Nru mesa-18.3.3/src/mapi/glapi/gen/Makefile.am mesa-19.0.1/src/mapi/glapi/gen/Makefile.am --- mesa-18.3.3/src/mapi/glapi/gen/Makefile.am 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/gen/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -200,6 +200,7 @@ EXT_external_objects_fd.xml \ EXT_framebuffer_object.xml \ EXT_gpu_shader4.xml \ + EXT_multisampled_render_to_texture.xml \ EXT_packed_depth_stencil.xml \ EXT_provoking_vertex.xml \ EXT_separate_shader_objects.xml \ diff -Nru mesa-18.3.3/src/mapi/glapi/gen/meson.build mesa-19.0.1/src/mapi/glapi/gen/meson.build --- mesa-18.3.3/src/mapi/glapi/gen/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/gen/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -18,6 +18,13 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +glapi_gen_gl_xml = files('../registry/gl.xml') +glapi_gen_mapi_deps = [ + glapi_gen_gl_xml, + genCommon_py, + glapi_gen_gl_xml, +] + gl_and_es_api_files = files('gl_and_es_API.xml') api_xml_files = files( @@ -107,6 +114,7 @@ 'EXT_external_objects_fd.xml', 'EXT_framebuffer_object.xml', 'EXT_gpu_shader4.xml', + 'EXT_multisampled_render_to_texture.xml', 'EXT_packed_depth_stencil.xml', 'EXT_provoking_vertex.xml', 'EXT_separate_shader_objects.xml', diff -Nru mesa-18.3.3/src/mapi/glapi/gen/static_data.py mesa-19.0.1/src/mapi/glapi/gen/static_data.py --- mesa-18.3.3/src/mapi/glapi/gen/static_data.py 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/gen/static_data.py 2019-03-31 23:16:37.000000000 +0000 @@ -20,8 +20,17 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. + +"""The maximum entries of actual static data required by indirect GLX.""" + + +MAX_OFFSETS = 407 + """Table of functions that have ABI-mandated offsets in the dispatch table. +The first MAX_OFFSETS entries are required by indirect GLX. The rest are +required to preserve the glapi <> drivers ABI. This is to be addressed shortly. + This list will never change.""" offsets = { "NewList": 0, @@ -331,33 +340,33 @@ "Translatef": 304, "Viewport": 305, "ArrayElement": 306, + "BindTexture": 307, "ColorPointer": 308, "DisableClientState": 309, "DrawArrays": 310, "DrawElements": 311, "EdgeFlagPointer": 312, "EnableClientState": 313, - "GetPointerv": 329, "IndexPointer": 314, + "Indexub": 315, + "Indexubv": 316, "InterleavedArrays": 317, "NormalPointer": 318, + "PolygonOffset": 319, "TexCoordPointer": 320, "VertexPointer": 321, - "PolygonOffset": 319, + "AreTexturesResident": 322, "CopyTexImage1D": 323, "CopyTexImage2D": 324, "CopyTexSubImage1D": 325, "CopyTexSubImage2D": 326, - "TexSubImage1D": 332, - "TexSubImage2D": 333, - "AreTexturesResident": 322, - "BindTexture": 307, "DeleteTextures": 327, "GenTextures": 328, + "GetPointerv": 329, "IsTexture": 330, "PrioritizeTextures": 331, - "Indexub": 315, - "Indexubv": 316, + "TexSubImage1D": 332, + "TexSubImage2D": 333, "PopClientAttrib": 334, "PushClientAttrib": 335, "BlendColor": 336, @@ -431,7 +440,1019 @@ "MultiTexCoord4i": 404, "MultiTexCoord4iv": 405, "MultiTexCoord4s": 406, - "MultiTexCoord4sv": 407 + "MultiTexCoord4sv": 407, + "CompressedTexImage1D": 408, + "CompressedTexImage2D": 409, + "CompressedTexImage3D": 410, + "CompressedTexSubImage1D": 411, + "CompressedTexSubImage2D": 412, + "CompressedTexSubImage3D": 413, + "GetCompressedTexImage": 414, + "LoadTransposeMatrixd": 415, + "LoadTransposeMatrixf": 416, + "MultTransposeMatrixd": 417, + "MultTransposeMatrixf": 418, + "SampleCoverage": 419, + "BlendFuncSeparate": 420, + "FogCoordPointer": 421, + "FogCoordd": 422, + "FogCoorddv": 423, + "MultiDrawArrays": 424, + "PointParameterf": 425, + "PointParameterfv": 426, + "PointParameteri": 427, + "PointParameteriv": 428, + "SecondaryColor3b": 429, + "SecondaryColor3bv": 430, + "SecondaryColor3d": 431, + "SecondaryColor3dv": 432, + "SecondaryColor3i": 433, + "SecondaryColor3iv": 434, + "SecondaryColor3s": 435, + "SecondaryColor3sv": 436, + "SecondaryColor3ub": 437, + "SecondaryColor3ubv": 438, + "SecondaryColor3ui": 439, + "SecondaryColor3uiv": 440, + "SecondaryColor3us": 441, + "SecondaryColor3usv": 442, + "SecondaryColorPointer": 443, + "WindowPos2d": 444, + "WindowPos2dv": 445, + "WindowPos2f": 446, + "WindowPos2fv": 447, + "WindowPos2i": 448, + "WindowPos2iv": 449, + "WindowPos2s": 450, + "WindowPos2sv": 451, + "WindowPos3d": 452, + "WindowPos3dv": 453, + "WindowPos3f": 454, + "WindowPos3fv": 455, + "WindowPos3i": 456, + "WindowPos3iv": 457, + "WindowPos3s": 458, + "WindowPos3sv": 459, + "BeginQuery": 460, + "BindBuffer": 461, + "BufferData": 462, + "BufferSubData": 463, + "DeleteBuffers": 464, + "DeleteQueries": 465, + "EndQuery": 466, + "GenBuffers": 467, + "GenQueries": 468, + "GetBufferParameteriv": 469, + "GetBufferPointerv": 470, + "GetBufferSubData": 471, + "GetQueryObjectiv": 472, + "GetQueryObjectuiv": 473, + "GetQueryiv": 474, + "IsBuffer": 475, + "IsQuery": 476, + "MapBuffer": 477, + "UnmapBuffer": 478, + "AttachShader": 479, + "BindAttribLocation": 480, + "BlendEquationSeparate": 481, + "CompileShader": 482, + "CreateProgram": 483, + "CreateShader": 484, + "DeleteProgram": 485, + "DeleteShader": 486, + "DetachShader": 487, + "DisableVertexAttribArray": 488, + "DrawBuffers": 489, + "EnableVertexAttribArray": 490, + "GetActiveAttrib": 491, + "GetActiveUniform": 492, + "GetAttachedShaders": 493, + "GetAttribLocation": 494, + "GetProgramInfoLog": 495, + "GetProgramiv": 496, + "GetShaderInfoLog": 497, + "GetShaderSource": 498, + "GetShaderiv": 499, + "GetUniformLocation": 500, + "GetUniformfv": 501, + "GetUniformiv": 502, + "GetVertexAttribPointerv": 503, + "GetVertexAttribdv": 504, + "GetVertexAttribfv": 505, + "GetVertexAttribiv": 506, + "IsProgram": 507, + "IsShader": 508, + "LinkProgram": 509, + "ShaderSource": 510, + "StencilFuncSeparate": 511, + "StencilMaskSeparate": 512, + "StencilOpSeparate": 513, + "Uniform1f": 514, + "Uniform1fv": 515, + "Uniform1i": 516, + "Uniform1iv": 517, + "Uniform2f": 518, + "Uniform2fv": 519, + "Uniform2i": 520, + "Uniform2iv": 521, + "Uniform3f": 522, + "Uniform3fv": 523, + "Uniform3i": 524, + "Uniform3iv": 525, + "Uniform4f": 526, + "Uniform4fv": 527, + "Uniform4i": 528, + "Uniform4iv": 529, + "UniformMatrix2fv": 530, + "UniformMatrix3fv": 531, + "UniformMatrix4fv": 532, + "UseProgram": 533, + "ValidateProgram": 534, + "VertexAttrib1d": 535, + "VertexAttrib1dv": 536, + "VertexAttrib1s": 537, + "VertexAttrib1sv": 538, + "VertexAttrib2d": 539, + "VertexAttrib2dv": 540, + "VertexAttrib2s": 541, + "VertexAttrib2sv": 542, + "VertexAttrib3d": 543, + "VertexAttrib3dv": 544, + "VertexAttrib3s": 545, + "VertexAttrib3sv": 546, + "VertexAttrib4Nbv": 547, + "VertexAttrib4Niv": 548, + "VertexAttrib4Nsv": 549, + "VertexAttrib4Nub": 550, + "VertexAttrib4Nubv": 551, + "VertexAttrib4Nuiv": 552, + "VertexAttrib4Nusv": 553, + "VertexAttrib4bv": 554, + "VertexAttrib4d": 555, + "VertexAttrib4dv": 556, + "VertexAttrib4iv": 557, + "VertexAttrib4s": 558, + "VertexAttrib4sv": 559, + "VertexAttrib4ubv": 560, + "VertexAttrib4uiv": 561, + "VertexAttrib4usv": 562, + "VertexAttribPointer": 563, + "UniformMatrix2x3fv": 564, + "UniformMatrix2x4fv": 565, + "UniformMatrix3x2fv": 566, + "UniformMatrix3x4fv": 567, + "UniformMatrix4x2fv": 568, + "UniformMatrix4x3fv": 569, + "BeginConditionalRender": 570, + "BeginTransformFeedback": 571, + "BindBufferBase": 572, + "BindBufferRange": 573, + "BindFragDataLocation": 574, + "ClampColor": 575, + "ClearBufferfi": 576, + "ClearBufferfv": 577, + "ClearBufferiv": 578, + "ClearBufferuiv": 579, + "ColorMaski": 580, + "Disablei": 581, + "Enablei": 582, + "EndConditionalRender": 583, + "EndTransformFeedback": 584, + "GetBooleani_v": 585, + "GetFragDataLocation": 586, + "GetIntegeri_v": 587, + "GetStringi": 588, + "GetTexParameterIiv": 589, + "GetTexParameterIuiv": 590, + "GetTransformFeedbackVarying": 591, + "GetUniformuiv": 592, + "GetVertexAttribIiv": 593, + "GetVertexAttribIuiv": 594, + "IsEnabledi": 595, + "TexParameterIiv": 596, + "TexParameterIuiv": 597, + "TransformFeedbackVaryings": 598, + "Uniform1ui": 599, + "Uniform1uiv": 600, + "Uniform2ui": 601, + "Uniform2uiv": 602, + "Uniform3ui": 603, + "Uniform3uiv": 604, + "Uniform4ui": 605, + "Uniform4uiv": 606, + "VertexAttribI1iv": 607, + "VertexAttribI1uiv": 608, + "VertexAttribI4bv": 609, + "VertexAttribI4sv": 610, + "VertexAttribI4ubv": 611, + "VertexAttribI4usv": 612, + "VertexAttribIPointer": 613, + "PrimitiveRestartIndex": 614, + "TexBuffer": 615, + "FramebufferTexture": 616, + "GetBufferParameteri64v": 617, + "GetInteger64i_v": 618, + "VertexAttribDivisor": 619, + "MinSampleShading": 620, + "MemoryBarrierByRegion": 621, + "BindProgramARB": 622, + "DeleteProgramsARB": 623, + "GenProgramsARB": 624, + "GetProgramEnvParameterdvARB": 625, + "GetProgramEnvParameterfvARB": 626, + "GetProgramLocalParameterdvARB": 627, + "GetProgramLocalParameterfvARB": 628, + "GetProgramStringARB": 629, + "GetProgramivARB": 630, + "IsProgramARB": 631, + "ProgramEnvParameter4dARB": 632, + "ProgramEnvParameter4dvARB": 633, + "ProgramEnvParameter4fARB": 634, + "ProgramEnvParameter4fvARB": 635, + "ProgramLocalParameter4dARB": 636, + "ProgramLocalParameter4dvARB": 637, + "ProgramLocalParameter4fARB": 638, + "ProgramLocalParameter4fvARB": 639, + "ProgramStringARB": 640, + "VertexAttrib1fARB": 641, + "VertexAttrib1fvARB": 642, + "VertexAttrib2fARB": 643, + "VertexAttrib2fvARB": 644, + "VertexAttrib3fARB": 645, + "VertexAttrib3fvARB": 646, + "VertexAttrib4fARB": 647, + "VertexAttrib4fvARB": 648, + "AttachObjectARB": 649, + "CreateProgramObjectARB": 650, + "CreateShaderObjectARB": 651, + "DeleteObjectARB": 652, + "DetachObjectARB": 653, + "GetAttachedObjectsARB": 654, + "GetHandleARB": 655, + "GetInfoLogARB": 656, + "GetObjectParameterfvARB": 657, + "GetObjectParameterivARB": 658, + "DrawArraysInstancedARB": 659, + "DrawElementsInstancedARB": 660, + "BindFramebuffer": 661, + "BindRenderbuffer": 662, + "BlitFramebuffer": 663, + "CheckFramebufferStatus": 664, + "DeleteFramebuffers": 665, + "DeleteRenderbuffers": 666, + "FramebufferRenderbuffer": 667, + "FramebufferTexture1D": 668, + "FramebufferTexture2D": 669, + "FramebufferTexture3D": 670, + "FramebufferTextureLayer": 671, + "GenFramebuffers": 672, + "GenRenderbuffers": 673, + "GenerateMipmap": 674, + "GetFramebufferAttachmentParameteriv": 675, + "GetRenderbufferParameteriv": 676, + "IsFramebuffer": 677, + "IsRenderbuffer": 678, + "RenderbufferStorage": 679, + "RenderbufferStorageMultisample": 680, + "FlushMappedBufferRange": 681, + "MapBufferRange": 682, + "BindVertexArray": 683, + "DeleteVertexArrays": 684, + "GenVertexArrays": 685, + "IsVertexArray": 686, + "GetActiveUniformBlockName": 687, + "GetActiveUniformBlockiv": 688, + "GetActiveUniformName": 689, + "GetActiveUniformsiv": 690, + "GetUniformBlockIndex": 691, + "GetUniformIndices": 692, + "UniformBlockBinding": 693, + "CopyBufferSubData": 694, + "ClientWaitSync": 695, + "DeleteSync": 696, + "FenceSync": 697, + "GetInteger64v": 698, + "GetSynciv": 699, + "IsSync": 700, + "WaitSync": 701, + "DrawElementsBaseVertex": 702, + "DrawElementsInstancedBaseVertex": 703, + "DrawRangeElementsBaseVertex": 704, + "MultiDrawElementsBaseVertex": 705, + "ProvokingVertex": 706, + "GetMultisamplefv": 707, + "SampleMaski": 708, + "TexImage2DMultisample": 709, + "TexImage3DMultisample": 710, + "BlendEquationSeparateiARB": 711, + "BlendEquationiARB": 712, + "BlendFuncSeparateiARB": 713, + "BlendFunciARB": 714, + "BindFragDataLocationIndexed": 715, + "GetFragDataIndex": 716, + "BindSampler": 717, + "DeleteSamplers": 718, + "GenSamplers": 719, + "GetSamplerParameterIiv": 720, + "GetSamplerParameterIuiv": 721, + "GetSamplerParameterfv": 722, + "GetSamplerParameteriv": 723, + "IsSampler": 724, + "SamplerParameterIiv": 725, + "SamplerParameterIuiv": 726, + "SamplerParameterf": 727, + "SamplerParameterfv": 728, + "SamplerParameteri": 729, + "SamplerParameteriv": 730, + "GetQueryObjecti64v": 731, + "GetQueryObjectui64v": 732, + "QueryCounter": 733, + "ColorP3ui": 734, + "ColorP3uiv": 735, + "ColorP4ui": 736, + "ColorP4uiv": 737, + "MultiTexCoordP1ui": 738, + "MultiTexCoordP1uiv": 739, + "MultiTexCoordP2ui": 740, + "MultiTexCoordP2uiv": 741, + "MultiTexCoordP3ui": 742, + "MultiTexCoordP3uiv": 743, + "MultiTexCoordP4ui": 744, + "MultiTexCoordP4uiv": 745, + "NormalP3ui": 746, + "NormalP3uiv": 747, + "SecondaryColorP3ui": 748, + "SecondaryColorP3uiv": 749, + "TexCoordP1ui": 750, + "TexCoordP1uiv": 751, + "TexCoordP2ui": 752, + "TexCoordP2uiv": 753, + "TexCoordP3ui": 754, + "TexCoordP3uiv": 755, + "TexCoordP4ui": 756, + "TexCoordP4uiv": 757, + "VertexAttribP1ui": 758, + "VertexAttribP1uiv": 759, + "VertexAttribP2ui": 760, + "VertexAttribP2uiv": 761, + "VertexAttribP3ui": 762, + "VertexAttribP3uiv": 763, + "VertexAttribP4ui": 764, + "VertexAttribP4uiv": 765, + "VertexP2ui": 766, + "VertexP2uiv": 767, + "VertexP3ui": 768, + "VertexP3uiv": 769, + "VertexP4ui": 770, + "VertexP4uiv": 771, + "DrawArraysIndirect": 772, + "DrawElementsIndirect": 773, + "GetUniformdv": 774, + "Uniform1d": 775, + "Uniform1dv": 776, + "Uniform2d": 777, + "Uniform2dv": 778, + "Uniform3d": 779, + "Uniform3dv": 780, + "Uniform4d": 781, + "Uniform4dv": 782, + "UniformMatrix2dv": 783, + "UniformMatrix2x3dv": 784, + "UniformMatrix2x4dv": 785, + "UniformMatrix3dv": 786, + "UniformMatrix3x2dv": 787, + "UniformMatrix3x4dv": 788, + "UniformMatrix4dv": 789, + "UniformMatrix4x2dv": 790, + "UniformMatrix4x3dv": 791, + "GetActiveSubroutineName": 792, + "GetActiveSubroutineUniformName": 793, + "GetActiveSubroutineUniformiv": 794, + "GetProgramStageiv": 795, + "GetSubroutineIndex": 796, + "GetSubroutineUniformLocation": 797, + "GetUniformSubroutineuiv": 798, + "UniformSubroutinesuiv": 799, + "PatchParameterfv": 800, + "PatchParameteri": 801, + "BindTransformFeedback": 802, + "DeleteTransformFeedbacks": 803, + "DrawTransformFeedback": 804, + "GenTransformFeedbacks": 805, + "IsTransformFeedback": 806, + "PauseTransformFeedback": 807, + "ResumeTransformFeedback": 808, + "BeginQueryIndexed": 809, + "DrawTransformFeedbackStream": 810, + "EndQueryIndexed": 811, + "GetQueryIndexediv": 812, + "ClearDepthf": 813, + "DepthRangef": 814, + "GetShaderPrecisionFormat": 815, + "ReleaseShaderCompiler": 816, + "ShaderBinary": 817, + "GetProgramBinary": 818, + "ProgramBinary": 819, + "ProgramParameteri": 820, + "GetVertexAttribLdv": 821, + "VertexAttribL1d": 822, + "VertexAttribL1dv": 823, + "VertexAttribL2d": 824, + "VertexAttribL2dv": 825, + "VertexAttribL3d": 826, + "VertexAttribL3dv": 827, + "VertexAttribL4d": 828, + "VertexAttribL4dv": 829, + "VertexAttribLPointer": 830, + "DepthRangeArrayv": 831, + "DepthRangeIndexed": 832, + "GetDoublei_v": 833, + "GetFloati_v": 834, + "ScissorArrayv": 835, + "ScissorIndexed": 836, + "ScissorIndexedv": 837, + "ViewportArrayv": 838, + "ViewportIndexedf": 839, + "ViewportIndexedfv": 840, + "GetGraphicsResetStatusARB": 841, + "GetnColorTableARB": 842, + "GetnCompressedTexImageARB": 843, + "GetnConvolutionFilterARB": 844, + "GetnHistogramARB": 845, + "GetnMapdvARB": 846, + "GetnMapfvARB": 847, + "GetnMapivARB": 848, + "GetnMinmaxARB": 849, + "GetnPixelMapfvARB": 850, + "GetnPixelMapuivARB": 851, + "GetnPixelMapusvARB": 852, + "GetnPolygonStippleARB": 853, + "GetnSeparableFilterARB": 854, + "GetnTexImageARB": 855, + "GetnUniformdvARB": 856, + "GetnUniformfvARB": 857, + "GetnUniformivARB": 858, + "GetnUniformuivARB": 859, + "ReadnPixelsARB": 860, + "DrawArraysInstancedBaseInstance": 861, + "DrawElementsInstancedBaseInstance": 862, + "DrawElementsInstancedBaseVertexBaseInstance": 863, + "DrawTransformFeedbackInstanced": 864, + "DrawTransformFeedbackStreamInstanced": 865, + "GetInternalformativ": 866, + "GetActiveAtomicCounterBufferiv": 867, + "BindImageTexture": 868, + "MemoryBarrier": 869, + "TexStorage1D": 870, + "TexStorage2D": 871, + "TexStorage3D": 872, + "TextureStorage1DEXT": 873, + "TextureStorage2DEXT": 874, + "TextureStorage3DEXT": 875, + "ClearBufferData": 876, + "ClearBufferSubData": 877, + "DispatchCompute": 878, + "DispatchComputeIndirect": 879, + "CopyImageSubData": 880, + "TextureView": 881, + "BindVertexBuffer": 882, + "VertexAttribBinding": 883, + "VertexAttribFormat": 884, + "VertexAttribIFormat": 885, + "VertexAttribLFormat": 886, + "VertexBindingDivisor": 887, + "FramebufferParameteri": 888, + "GetFramebufferParameteriv": 889, + "GetInternalformati64v": 890, + "MultiDrawArraysIndirect": 891, + "MultiDrawElementsIndirect": 892, + "GetProgramInterfaceiv": 893, + "GetProgramResourceIndex": 894, + "GetProgramResourceLocation": 895, + "GetProgramResourceLocationIndex": 896, + "GetProgramResourceName": 897, + "GetProgramResourceiv": 898, + "ShaderStorageBlockBinding": 899, + "TexBufferRange": 900, + "TexStorage2DMultisample": 901, + "TexStorage3DMultisample": 902, + "BufferStorage": 903, + "ClearTexImage": 904, + "ClearTexSubImage": 905, + "BindBuffersBase": 906, + "BindBuffersRange": 907, + "BindImageTextures": 908, + "BindSamplers": 909, + "BindTextures": 910, + "BindVertexBuffers": 911, + "GetImageHandleARB": 912, + "GetTextureHandleARB": 913, + "GetTextureSamplerHandleARB": 914, + "GetVertexAttribLui64vARB": 915, + "IsImageHandleResidentARB": 916, + "IsTextureHandleResidentARB": 917, + "MakeImageHandleNonResidentARB": 918, + "MakeImageHandleResidentARB": 919, + "MakeTextureHandleNonResidentARB": 920, + "MakeTextureHandleResidentARB": 921, + "ProgramUniformHandleui64ARB": 922, + "ProgramUniformHandleui64vARB": 923, + "UniformHandleui64ARB": 924, + "UniformHandleui64vARB": 925, + "VertexAttribL1ui64ARB": 926, + "VertexAttribL1ui64vARB": 927, + "DispatchComputeGroupSizeARB": 928, + "MultiDrawArraysIndirectCountARB": 929, + "MultiDrawElementsIndirectCountARB": 930, + "ClipControl": 931, + "BindTextureUnit": 932, + "BlitNamedFramebuffer": 933, + "CheckNamedFramebufferStatus": 934, + "ClearNamedBufferData": 935, + "ClearNamedBufferSubData": 936, + "ClearNamedFramebufferfi": 937, + "ClearNamedFramebufferfv": 938, + "ClearNamedFramebufferiv": 939, + "ClearNamedFramebufferuiv": 940, + "CompressedTextureSubImage1D": 941, + "CompressedTextureSubImage2D": 942, + "CompressedTextureSubImage3D": 943, + "CopyNamedBufferSubData": 944, + "CopyTextureSubImage1D": 945, + "CopyTextureSubImage2D": 946, + "CopyTextureSubImage3D": 947, + "CreateBuffers": 948, + "CreateFramebuffers": 949, + "CreateProgramPipelines": 950, + "CreateQueries": 951, + "CreateRenderbuffers": 952, + "CreateSamplers": 953, + "CreateTextures": 954, + "CreateTransformFeedbacks": 955, + "CreateVertexArrays": 956, + "DisableVertexArrayAttrib": 957, + "EnableVertexArrayAttrib": 958, + "FlushMappedNamedBufferRange": 959, + "GenerateTextureMipmap": 960, + "GetCompressedTextureImage": 961, + "GetNamedBufferParameteri64v": 962, + "GetNamedBufferParameteriv": 963, + "GetNamedBufferPointerv": 964, + "GetNamedBufferSubData": 965, + "GetNamedFramebufferAttachmentParameteriv": 966, + "GetNamedFramebufferParameteriv": 967, + "GetNamedRenderbufferParameteriv": 968, + "GetQueryBufferObjecti64v": 969, + "GetQueryBufferObjectiv": 970, + "GetQueryBufferObjectui64v": 971, + "GetQueryBufferObjectuiv": 972, + "GetTextureImage": 973, + "GetTextureLevelParameterfv": 974, + "GetTextureLevelParameteriv": 975, + "GetTextureParameterIiv": 976, + "GetTextureParameterIuiv": 977, + "GetTextureParameterfv": 978, + "GetTextureParameteriv": 979, + "GetTransformFeedbacki64_v": 980, + "GetTransformFeedbacki_v": 981, + "GetTransformFeedbackiv": 982, + "GetVertexArrayIndexed64iv": 983, + "GetVertexArrayIndexediv": 984, + "GetVertexArrayiv": 985, + "InvalidateNamedFramebufferData": 986, + "InvalidateNamedFramebufferSubData": 987, + "MapNamedBuffer": 988, + "MapNamedBufferRange": 989, + "NamedBufferData": 990, + "NamedBufferStorage": 991, + "NamedBufferSubData": 992, + "NamedFramebufferDrawBuffer": 993, + "NamedFramebufferDrawBuffers": 994, + "NamedFramebufferParameteri": 995, + "NamedFramebufferReadBuffer": 996, + "NamedFramebufferRenderbuffer": 997, + "NamedFramebufferTexture": 998, + "NamedFramebufferTextureLayer": 999, + "NamedRenderbufferStorage": 1000, + "NamedRenderbufferStorageMultisample": 1001, + "TextureBuffer": 1002, + "TextureBufferRange": 1003, + "TextureParameterIiv": 1004, + "TextureParameterIuiv": 1005, + "TextureParameterf": 1006, + "TextureParameterfv": 1007, + "TextureParameteri": 1008, + "TextureParameteriv": 1009, + "TextureStorage1D": 1010, + "TextureStorage2D": 1011, + "TextureStorage2DMultisample": 1012, + "TextureStorage3D": 1013, + "TextureStorage3DMultisample": 1014, + "TextureSubImage1D": 1015, + "TextureSubImage2D": 1016, + "TextureSubImage3D": 1017, + "TransformFeedbackBufferBase": 1018, + "TransformFeedbackBufferRange": 1019, + "UnmapNamedBuffer": 1020, + "VertexArrayAttribBinding": 1021, + "VertexArrayAttribFormat": 1022, + "VertexArrayAttribIFormat": 1023, + "VertexArrayAttribLFormat": 1024, + "VertexArrayBindingDivisor": 1025, + "VertexArrayElementBuffer": 1026, + "VertexArrayVertexBuffer": 1027, + "VertexArrayVertexBuffers": 1028, + "GetCompressedTextureSubImage": 1029, + "GetTextureSubImage": 1030, + "BufferPageCommitmentARB": 1031, + "NamedBufferPageCommitmentARB": 1032, + "GetUniformi64vARB": 1033, + "GetUniformui64vARB": 1034, + "GetnUniformi64vARB": 1035, + "GetnUniformui64vARB": 1036, + "ProgramUniform1i64ARB": 1037, + "ProgramUniform1i64vARB": 1038, + "ProgramUniform1ui64ARB": 1039, + "ProgramUniform1ui64vARB": 1040, + "ProgramUniform2i64ARB": 1041, + "ProgramUniform2i64vARB": 1042, + "ProgramUniform2ui64ARB": 1043, + "ProgramUniform2ui64vARB": 1044, + "ProgramUniform3i64ARB": 1045, + "ProgramUniform3i64vARB": 1046, + "ProgramUniform3ui64ARB": 1047, + "ProgramUniform3ui64vARB": 1048, + "ProgramUniform4i64ARB": 1049, + "ProgramUniform4i64vARB": 1050, + "ProgramUniform4ui64ARB": 1051, + "ProgramUniform4ui64vARB": 1052, + "Uniform1i64ARB": 1053, + "Uniform1i64vARB": 1054, + "Uniform1ui64ARB": 1055, + "Uniform1ui64vARB": 1056, + "Uniform2i64ARB": 1057, + "Uniform2i64vARB": 1058, + "Uniform2ui64ARB": 1059, + "Uniform2ui64vARB": 1060, + "Uniform3i64ARB": 1061, + "Uniform3i64vARB": 1062, + "Uniform3ui64ARB": 1063, + "Uniform3ui64vARB": 1064, + "Uniform4i64ARB": 1065, + "Uniform4i64vARB": 1066, + "Uniform4ui64ARB": 1067, + "Uniform4ui64vARB": 1068, + "EvaluateDepthValuesARB": 1069, + "FramebufferSampleLocationsfvARB": 1070, + "NamedFramebufferSampleLocationsfvARB": 1071, + "SpecializeShaderARB": 1072, + "InvalidateBufferData": 1073, + "InvalidateBufferSubData": 1074, + "InvalidateFramebuffer": 1075, + "InvalidateSubFramebuffer": 1076, + "InvalidateTexImage": 1077, + "InvalidateTexSubImage": 1078, + "DrawTexfOES": 1079, + "DrawTexfvOES": 1080, + "DrawTexiOES": 1081, + "DrawTexivOES": 1082, + "DrawTexsOES": 1083, + "DrawTexsvOES": 1084, + "DrawTexxOES": 1085, + "DrawTexxvOES": 1086, + "PointSizePointerOES": 1087, + "QueryMatrixxOES": 1088, + "SampleMaskSGIS": 1089, + "SamplePatternSGIS": 1090, + "ColorPointerEXT": 1091, + "EdgeFlagPointerEXT": 1092, + "IndexPointerEXT": 1093, + "NormalPointerEXT": 1094, + "TexCoordPointerEXT": 1095, + "VertexPointerEXT": 1096, + "DiscardFramebufferEXT": 1097, + "ActiveShaderProgram": 1098, + "BindProgramPipeline": 1099, + "CreateShaderProgramv": 1100, + "DeleteProgramPipelines": 1101, + "GenProgramPipelines": 1102, + "GetProgramPipelineInfoLog": 1103, + "GetProgramPipelineiv": 1104, + "IsProgramPipeline": 1105, + "LockArraysEXT": 1106, + "ProgramUniform1d": 1107, + "ProgramUniform1dv": 1108, + "ProgramUniform1f": 1109, + "ProgramUniform1fv": 1110, + "ProgramUniform1i": 1111, + "ProgramUniform1iv": 1112, + "ProgramUniform1ui": 1113, + "ProgramUniform1uiv": 1114, + "ProgramUniform2d": 1115, + "ProgramUniform2dv": 1116, + "ProgramUniform2f": 1117, + "ProgramUniform2fv": 1118, + "ProgramUniform2i": 1119, + "ProgramUniform2iv": 1120, + "ProgramUniform2ui": 1121, + "ProgramUniform2uiv": 1122, + "ProgramUniform3d": 1123, + "ProgramUniform3dv": 1124, + "ProgramUniform3f": 1125, + "ProgramUniform3fv": 1126, + "ProgramUniform3i": 1127, + "ProgramUniform3iv": 1128, + "ProgramUniform3ui": 1129, + "ProgramUniform3uiv": 1130, + "ProgramUniform4d": 1131, + "ProgramUniform4dv": 1132, + "ProgramUniform4f": 1133, + "ProgramUniform4fv": 1134, + "ProgramUniform4i": 1135, + "ProgramUniform4iv": 1136, + "ProgramUniform4ui": 1137, + "ProgramUniform4uiv": 1138, + "ProgramUniformMatrix2dv": 1139, + "ProgramUniformMatrix2fv": 1140, + "ProgramUniformMatrix2x3dv": 1141, + "ProgramUniformMatrix2x3fv": 1142, + "ProgramUniformMatrix2x4dv": 1143, + "ProgramUniformMatrix2x4fv": 1144, + "ProgramUniformMatrix3dv": 1145, + "ProgramUniformMatrix3fv": 1146, + "ProgramUniformMatrix3x2dv": 1147, + "ProgramUniformMatrix3x2fv": 1148, + "ProgramUniformMatrix3x4dv": 1149, + "ProgramUniformMatrix3x4fv": 1150, + "ProgramUniformMatrix4dv": 1151, + "ProgramUniformMatrix4fv": 1152, + "ProgramUniformMatrix4x2dv": 1153, + "ProgramUniformMatrix4x2fv": 1154, + "ProgramUniformMatrix4x3dv": 1155, + "ProgramUniformMatrix4x3fv": 1156, + "UnlockArraysEXT": 1157, + "UseProgramStages": 1158, + "ValidateProgramPipeline": 1159, + "FramebufferTexture2DMultisampleEXT": 1160, + "DebugMessageCallback": 1161, + "DebugMessageControl": 1162, + "DebugMessageInsert": 1163, + "GetDebugMessageLog": 1164, + "GetObjectLabel": 1165, + "GetObjectPtrLabel": 1166, + "ObjectLabel": 1167, + "ObjectPtrLabel": 1168, + "PopDebugGroup": 1169, + "PushDebugGroup": 1170, + "SecondaryColor3fEXT": 1171, + "SecondaryColor3fvEXT": 1172, + "MultiDrawElementsEXT": 1173, + "FogCoordfEXT": 1174, + "FogCoordfvEXT": 1175, + "ResizeBuffersMESA": 1176, + "WindowPos4dMESA": 1177, + "WindowPos4dvMESA": 1178, + "WindowPos4fMESA": 1179, + "WindowPos4fvMESA": 1180, + "WindowPos4iMESA": 1181, + "WindowPos4ivMESA": 1182, + "WindowPos4sMESA": 1183, + "WindowPos4svMESA": 1184, + "MultiModeDrawArraysIBM": 1185, + "MultiModeDrawElementsIBM": 1186, + "AreProgramsResidentNV": 1187, + "ExecuteProgramNV": 1188, + "GetProgramParameterdvNV": 1189, + "GetProgramParameterfvNV": 1190, + "GetProgramStringNV": 1191, + "GetProgramivNV": 1192, + "GetTrackMatrixivNV": 1193, + "GetVertexAttribdvNV": 1194, + "GetVertexAttribfvNV": 1195, + "GetVertexAttribivNV": 1196, + "LoadProgramNV": 1197, + "ProgramParameters4dvNV": 1198, + "ProgramParameters4fvNV": 1199, + "RequestResidentProgramsNV": 1200, + "TrackMatrixNV": 1201, + "VertexAttrib1dNV": 1202, + "VertexAttrib1dvNV": 1203, + "VertexAttrib1fNV": 1204, + "VertexAttrib1fvNV": 1205, + "VertexAttrib1sNV": 1206, + "VertexAttrib1svNV": 1207, + "VertexAttrib2dNV": 1208, + "VertexAttrib2dvNV": 1209, + "VertexAttrib2fNV": 1210, + "VertexAttrib2fvNV": 1211, + "VertexAttrib2sNV": 1212, + "VertexAttrib2svNV": 1213, + "VertexAttrib3dNV": 1214, + "VertexAttrib3dvNV": 1215, + "VertexAttrib3fNV": 1216, + "VertexAttrib3fvNV": 1217, + "VertexAttrib3sNV": 1218, + "VertexAttrib3svNV": 1219, + "VertexAttrib4dNV": 1220, + "VertexAttrib4dvNV": 1221, + "VertexAttrib4fNV": 1222, + "VertexAttrib4fvNV": 1223, + "VertexAttrib4sNV": 1224, + "VertexAttrib4svNV": 1225, + "VertexAttrib4ubNV": 1226, + "VertexAttrib4ubvNV": 1227, + "VertexAttribPointerNV": 1228, + "VertexAttribs1dvNV": 1229, + "VertexAttribs1fvNV": 1230, + "VertexAttribs1svNV": 1231, + "VertexAttribs2dvNV": 1232, + "VertexAttribs2fvNV": 1233, + "VertexAttribs2svNV": 1234, + "VertexAttribs3dvNV": 1235, + "VertexAttribs3fvNV": 1236, + "VertexAttribs3svNV": 1237, + "VertexAttribs4dvNV": 1238, + "VertexAttribs4fvNV": 1239, + "VertexAttribs4svNV": 1240, + "VertexAttribs4ubvNV": 1241, + "GetTexBumpParameterfvATI": 1242, + "GetTexBumpParameterivATI": 1243, + "TexBumpParameterfvATI": 1244, + "TexBumpParameterivATI": 1245, + "AlphaFragmentOp1ATI": 1246, + "AlphaFragmentOp2ATI": 1247, + "AlphaFragmentOp3ATI": 1248, + "BeginFragmentShaderATI": 1249, + "BindFragmentShaderATI": 1250, + "ColorFragmentOp1ATI": 1251, + "ColorFragmentOp2ATI": 1252, + "ColorFragmentOp3ATI": 1253, + "DeleteFragmentShaderATI": 1254, + "EndFragmentShaderATI": 1255, + "GenFragmentShadersATI": 1256, + "PassTexCoordATI": 1257, + "SampleMapATI": 1258, + "SetFragmentShaderConstantATI": 1259, + "DepthRangeArrayfvOES": 1260, + "DepthRangeIndexedfOES": 1261, + "ActiveStencilFaceEXT": 1262, + "GetProgramNamedParameterdvNV": 1263, + "GetProgramNamedParameterfvNV": 1264, + "ProgramNamedParameter4dNV": 1265, + "ProgramNamedParameter4dvNV": 1266, + "ProgramNamedParameter4fNV": 1267, + "ProgramNamedParameter4fvNV": 1268, + "PrimitiveRestartNV": 1269, + "GetTexGenxvOES": 1270, + "TexGenxOES": 1271, + "TexGenxvOES": 1272, + "DepthBoundsEXT": 1273, + "BindFramebufferEXT": 1274, + "BindRenderbufferEXT": 1275, + "StringMarkerGREMEDY": 1276, + "BufferParameteriAPPLE": 1277, + "FlushMappedBufferRangeAPPLE": 1278, + "VertexAttribI1iEXT": 1279, + "VertexAttribI1uiEXT": 1280, + "VertexAttribI2iEXT": 1281, + "VertexAttribI2ivEXT": 1282, + "VertexAttribI2uiEXT": 1283, + "VertexAttribI2uivEXT": 1284, + "VertexAttribI3iEXT": 1285, + "VertexAttribI3ivEXT": 1286, + "VertexAttribI3uiEXT": 1287, + "VertexAttribI3uivEXT": 1288, + "VertexAttribI4iEXT": 1289, + "VertexAttribI4ivEXT": 1290, + "VertexAttribI4uiEXT": 1291, + "VertexAttribI4uivEXT": 1292, + "ClearColorIiEXT": 1293, + "ClearColorIuiEXT": 1294, + "BindBufferOffsetEXT": 1295, + "BeginPerfMonitorAMD": 1296, + "DeletePerfMonitorsAMD": 1297, + "EndPerfMonitorAMD": 1298, + "GenPerfMonitorsAMD": 1299, + "GetPerfMonitorCounterDataAMD": 1300, + "GetPerfMonitorCounterInfoAMD": 1301, + "GetPerfMonitorCounterStringAMD": 1302, + "GetPerfMonitorCountersAMD": 1303, + "GetPerfMonitorGroupStringAMD": 1304, + "GetPerfMonitorGroupsAMD": 1305, + "SelectPerfMonitorCountersAMD": 1306, + "GetObjectParameterivAPPLE": 1307, + "ObjectPurgeableAPPLE": 1308, + "ObjectUnpurgeableAPPLE": 1309, + "ActiveProgramEXT": 1310, + "CreateShaderProgramEXT": 1311, + "UseShaderProgramEXT": 1312, + "TextureBarrierNV": 1313, + "VDPAUFiniNV": 1314, + "VDPAUGetSurfaceivNV": 1315, + "VDPAUInitNV": 1316, + "VDPAUIsSurfaceNV": 1317, + "VDPAUMapSurfacesNV": 1318, + "VDPAURegisterOutputSurfaceNV": 1319, + "VDPAURegisterVideoSurfaceNV": 1320, + "VDPAUSurfaceAccessNV": 1321, + "VDPAUUnmapSurfacesNV": 1322, + "VDPAUUnregisterSurfaceNV": 1323, + "BeginPerfQueryINTEL": 1324, + "CreatePerfQueryINTEL": 1325, + "DeletePerfQueryINTEL": 1326, + "EndPerfQueryINTEL": 1327, + "GetFirstPerfQueryIdINTEL": 1328, + "GetNextPerfQueryIdINTEL": 1329, + "GetPerfCounterInfoINTEL": 1330, + "GetPerfQueryDataINTEL": 1331, + "GetPerfQueryIdByNameINTEL": 1332, + "GetPerfQueryInfoINTEL": 1333, + "PolygonOffsetClampEXT": 1334, + "SubpixelPrecisionBiasNV": 1335, + "ConservativeRasterParameterfNV": 1336, + "ConservativeRasterParameteriNV": 1337, + "WindowRectanglesEXT": 1338, + "BufferStorageMemEXT": 1339, + "CreateMemoryObjectsEXT": 1340, + "DeleteMemoryObjectsEXT": 1341, + "DeleteSemaphoresEXT": 1342, + "GenSemaphoresEXT": 1343, + "GetMemoryObjectParameterivEXT": 1344, + "GetSemaphoreParameterui64vEXT": 1345, + "GetUnsignedBytei_vEXT": 1346, + "GetUnsignedBytevEXT": 1347, + "IsMemoryObjectEXT": 1348, + "IsSemaphoreEXT": 1349, + "MemoryObjectParameterivEXT": 1350, + "NamedBufferStorageMemEXT": 1351, + "SemaphoreParameterui64vEXT": 1352, + "SignalSemaphoreEXT": 1353, + "TexStorageMem1DEXT": 1354, + "TexStorageMem2DEXT": 1355, + "TexStorageMem2DMultisampleEXT": 1356, + "TexStorageMem3DEXT": 1357, + "TexStorageMem3DMultisampleEXT": 1358, + "TextureStorageMem1DEXT": 1359, + "TextureStorageMem2DEXT": 1360, + "TextureStorageMem2DMultisampleEXT": 1361, + "TextureStorageMem3DEXT": 1362, + "TextureStorageMem3DMultisampleEXT": 1363, + "WaitSemaphoreEXT": 1364, + "ImportMemoryFdEXT": 1365, + "ImportSemaphoreFdEXT": 1366, + "FramebufferFetchBarrierEXT": 1367, + "NamedRenderbufferStorageMultisampleAdvancedAMD": 1368, + "RenderbufferStorageMultisampleAdvancedAMD": 1369, + "StencilFuncSeparateATI": 1370, + "ProgramEnvParameters4fvEXT": 1371, + "ProgramLocalParameters4fvEXT": 1372, + "EGLImageTargetRenderbufferStorageOES": 1373, + "EGLImageTargetTexture2DOES": 1374, + "AlphaFuncx": 1375, + "ClearColorx": 1376, + "ClearDepthx": 1377, + "Color4x": 1378, + "DepthRangex": 1379, + "Fogx": 1380, + "Fogxv": 1381, + "Frustumf": 1382, + "Frustumx": 1383, + "LightModelx": 1384, + "LightModelxv": 1385, + "Lightx": 1386, + "Lightxv": 1387, + "LineWidthx": 1388, + "LoadMatrixx": 1389, + "Materialx": 1390, + "Materialxv": 1391, + "MultMatrixx": 1392, + "MultiTexCoord4x": 1393, + "Normal3x": 1394, + "Orthof": 1395, + "Orthox": 1396, + "PointSizex": 1397, + "PolygonOffsetx": 1398, + "Rotatex": 1399, + "SampleCoveragex": 1400, + "Scalex": 1401, + "TexEnvx": 1402, + "TexEnvxv": 1403, + "TexParameterx": 1404, + "Translatex": 1405, + "ClipPlanef": 1406, + "ClipPlanex": 1407, + "GetClipPlanef": 1408, + "GetClipPlanex": 1409, + "GetFixedv": 1410, + "GetLightxv": 1411, + "GetMaterialxv": 1412, + "GetTexEnvxv": 1413, + "GetTexParameterxv": 1414, + "PointParameterx": 1415, + "PointParameterxv": 1416, + "TexParameterxv": 1417, + "BlendBarrier": 1418, + "PrimitiveBoundingBox": 1419, } functions = [ diff -Nru mesa-18.3.3/src/mapi/glapi/meson.build mesa-19.0.1/src/mapi/glapi/meson.build --- mesa-18.3.3/src/mapi/glapi/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/glapi/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -87,6 +87,7 @@ include_directories : [inc_include, inc_src, inc_mesa, inc_mapi], link_with : [libglapi_static], dependencies : [idep_gtest, dep_thread], - ) + ), + suite : ['mapi'], ) endif diff -Nru mesa-18.3.3/src/mapi/Makefile.am mesa-19.0.1/src/mapi/Makefile.am --- mesa-18.3.3/src/mapi/Makefile.am 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -31,6 +31,8 @@ pkgconfig_DATA = EXTRA_DIST = \ + new/genCommon.py \ + new/gen_gldispatch_mapi.py \ es1api/ABI-check \ es2api/ABI-check \ mapi_abi.py \ @@ -61,11 +63,20 @@ MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) PYTHON_GEN = $(AM_V_GEN)$(PYTHON) $(PYTHON_FLAGS) -glapi_gen_mapi_deps := \ +shared_glapi_gen_mapi_deps := \ mapi_abi.py \ $(wildcard $(top_srcdir)/src/mapi/glapi/gen/*.xml) \ $(wildcard $(top_srcdir)/src/mapi/glapi/gen/*.py) +glapi_gen_gl_xml := \ + $(srcdir)/glapi/registry/gl.xml +glapi_gen_mapi_script = $(srcdir)/new/gen_gldispatch_mapi.py +glapi_gen_mapi_deps = \ + $(glapi_gen_mapi_script) \ + $(srcdir)/new/genCommon.py \ + $(glapi_gen_gl_xml) +glapi_gen_mapi = $(AM_V_GEN)$(PYTHON) $(PYTHON_FLAGS) $(glapi_gen_mapi_script) + if HAVE_SHARED_GLAPI BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h @@ -99,7 +110,7 @@ $(top_builddir)/src/gtest/libgtest.la endif -shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) +shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(shared_glapi_gen_mapi_deps) $(MKDIR_GEN) $(PYTHON_GEN) $(srcdir)/mapi_abi.py --printer shared-glapi \ $(srcdir)/glapi/gen/gl_and_es_API.xml > $@ @@ -198,10 +209,9 @@ es1api_libGLESv1_CM_la_LIBADD += shared-glapi/libglapi.la endif -es1api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) +es1api/glapi_mapi_tmp.h: $(glapi_gen_mapi_deps) $(MKDIR_GEN) - $(PYTHON_GEN) $(srcdir)/mapi_abi.py --printer es1api \ - $(srcdir)/glapi/gen/gl_and_es_API.xml > $@ + $(glapi_gen_mapi) glesv1 $(glapi_gen_gl_xml) > $@ if HAVE_OPENGL_ES2 TESTS += es2api/ABI-check @@ -243,10 +253,9 @@ es2api_libGLESv2_la_LIBADD += shared-glapi/libglapi.la endif -es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) +es2api/glapi_mapi_tmp.h: $(glapi_gen_mapi_deps) $(MKDIR_GEN) - $(PYTHON_GEN) $(srcdir)/mapi_abi.py --printer es2api \ - $(srcdir)/glapi/gen/gl_and_es_API.xml > $@ + $(glapi_gen_mapi) glesv2 $(glapi_gen_gl_xml) > $@ include $(top_srcdir)/install-lib-links.mk diff -Nru mesa-18.3.3/src/mapi/mapi_abi.py mesa-19.0.1/src/mapi/mapi_abi.py --- mesa-18.3.3/src/mapi/mapi_abi.py 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/mapi_abi.py 2019-03-31 23:16:37.000000000 +0000 @@ -29,8 +29,8 @@ import sys # make it possible to import glapi import os -GLAPI = os.path.join(".", os.path.dirname(sys.argv[0]), "glapi/gen") -sys.path.append(GLAPI) +GLAPI = os.path.join(".", os.path.dirname(__file__), "glapi", "gen") +sys.path.insert(0, GLAPI) from operator import attrgetter import re @@ -184,75 +184,6 @@ return entries -def abi_parse_line(line): - cols = [col.strip() for col in line.split(',')] - - attrs = { - 'slot': -1, - 'hidden': False, - 'alias': None, - 'handcode': None, - } - - # extract attributes from the first column - vals = cols[0].split(':') - while len(vals) > 1: - val = vals.pop(0) - if val.startswith('slot='): - attrs['slot'] = int(val[5:]) - elif val == 'hidden': - attrs['hidden'] = True - elif val.startswith('alias='): - attrs['alias'] = val[6:] - elif val.startswith('handcode='): - attrs['handcode'] = val[9:] - elif not val: - pass - else: - raise Exception('unknown attribute %s' % val) - cols[0] = vals[0] - - return (attrs, cols) - -def abi_parse(filename): - """Parse a CSV file for ABI entries.""" - fp = open(filename) if filename != '-' else sys.stdin - lines = [line.strip() for line in fp.readlines() - if not line.startswith('#') and line.strip()] - - entry_dict = {} - next_slot = 0 - for line in lines: - attrs, cols = abi_parse_line(line) - - # post-process attributes - if attrs['alias']: - try: - alias = entry_dict[attrs['alias']] - except KeyError: - raise Exception('failed to alias %s' % attrs['alias']) - if alias.alias: - raise Exception('recursive alias %s' % ent.name) - slot = alias.slot - attrs['alias'] = alias - else: - slot = next_slot - next_slot += 1 - - if attrs['slot'] < 0: - attrs['slot'] = slot - elif attrs['slot'] != slot: - raise Exception('invalid slot in %s' % (line)) - - ent = ABIEntry(cols, attrs) - if ent.name in entry_dict: - raise Exception('%s is duplicated' % (ent.name)) - entry_dict[ent.name] = ent - - entries = sorted(entry_dict.values()) - - return entries - def abi_sanity_check(entries): if not entries: return @@ -334,7 +265,8 @@ if not self.need_entry_point(ent): continue export = self.api_call if not ent.hidden else '' - decls.append(self._c_decl(ent, prefix, True, export) + ';') + if not ent.hidden or not self.lib_need_non_hidden_entries: + decls.append(self._c_decl(ent, prefix, True, export) + ';') return "\n".join(decls) @@ -684,62 +616,6 @@ return header -class ES1APIPrinter(GLAPIPrinter): - """OpenGL ES 1.x API Printer""" - - def __init__(self, entries): - super(ES1APIPrinter, self).__init__(entries) - self.prefix_lib = 'gl' - self.prefix_warn = 'gl' - - def _override_for_api(self, ent): - if ent.xml_data is None: - raise Exception('ES2 API printer requires XML input') - ent.hidden = (ent.name not in \ - ent.xml_data.entry_points_for_api_version('es1')) \ - or ent.hidden - ent.handcode = False - - def _get_c_header(self): - header = """#ifndef _GLAPI_TMP_H_ -#define _GLAPI_TMP_H_ -typedef int GLclampx; -#endif /* _GLAPI_TMP_H_ */""" - - return header - -class ES2APIPrinter(GLAPIPrinter): - """OpenGL ES 2.x API Printer""" - - def __init__(self, entries): - super(ES2APIPrinter, self).__init__(entries) - self.prefix_lib = 'gl' - self.prefix_warn = 'gl' - - def _override_for_api(self, ent): - if ent.xml_data is None: - raise Exception('ES2 API printer requires XML input') - ent.hidden = (ent.name not in \ - ent.xml_data.entry_points_for_api_version('es2')) \ - or ent.hidden - - # This is hella ugly. The same-named function in desktop OpenGL is - # hidden, but it needs to be exposed by libGLESv2 for OpenGL ES 3.0. - # There's no way to express in the XML that a function should be be - # hidden in one API but exposed in another. - if ent.name == 'GetInternalformativ': - ent.hidden = False - - ent.handcode = False - - def _get_c_header(self): - header = """#ifndef _GLAPI_TMP_H_ -#define _GLAPI_TMP_H_ -typedef int GLclampx; -#endif /* _GLAPI_TMP_H_ */""" - - return header - class SharedGLAPIPrinter(GLAPIPrinter): """Shared GLAPI API Printer""" @@ -770,7 +646,7 @@ def parse_args(): printers = ['glapi', 'es1api', 'es2api', 'shared-glapi'] - parser = OptionParser(usage='usage: %prog [options] ') + parser = OptionParser(usage='usage: %prog [options] ') parser.add_option('-p', '--printer', dest='printer', help='printer to use: %s' % (", ".join(printers))) @@ -779,22 +655,21 @@ parser.print_help() sys.exit(1) + if not args[0].endswith('.xml'): + parser.print_help() + sys.exit(1) + return (args[0], options) def main(): printers = { 'glapi': GLAPIPrinter, - 'es1api': ES1APIPrinter, - 'es2api': ES2APIPrinter, 'shared-glapi': SharedGLAPIPrinter, } filename, options = parse_args() - if filename.endswith('.xml'): - entries = abi_parse_xml(filename) - else: - entries = abi_parse(filename) + entries = abi_parse_xml(filename) abi_sanity_check(entries) printer = printers[options.printer](entries) diff -Nru mesa-18.3.3/src/mapi/meson.build mesa-19.0.1/src/mapi/meson.build --- mesa-18.3.3/src/mapi/meson.build 2017-11-27 17:45:57.000000000 +0000 +++ mesa-19.0.1/src/mapi/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -25,6 +25,8 @@ 'u_execmem.h', ) +genCommon_py = files('new/genCommon.py') +glapi_gen_mapi_py = files('new/gen_gldispatch_mapi.py') mapi_abi_py = files('mapi_abi.py') subdir('glapi') diff -Nru mesa-18.3.3/src/mapi/new/genCommon.py mesa-19.0.1/src/mapi/new/genCommon.py --- mesa-18.3.3/src/mapi/new/genCommon.py 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/mapi/new/genCommon.py 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,241 @@ +#!/usr/bin/env python + +# (C) Copyright 2015, NVIDIA CORPORATION. +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Kyle Brenneman + +import collections +import re +import sys +import xml.etree.cElementTree as etree + +import os +GLAPI = os.path.join(os.path.dirname(__file__), "..", "glapi", "gen") +sys.path.insert(0, GLAPI) +import static_data + +MAPI_TABLE_NUM_DYNAMIC = 4096 + +_LIBRARY_FEATURE_NAMES = { + # libGL and libGLdiapatch both include every function. + "gl" : None, + "gldispatch" : None, + "opengl" : frozenset(( "GL_VERSION_1_0", "GL_VERSION_1_1", + "GL_VERSION_1_2", "GL_VERSION_1_3", "GL_VERSION_1_4", "GL_VERSION_1_5", + "GL_VERSION_2_0", "GL_VERSION_2_1", "GL_VERSION_3_0", "GL_VERSION_3_1", + "GL_VERSION_3_2", "GL_VERSION_3_3", "GL_VERSION_4_0", "GL_VERSION_4_1", + "GL_VERSION_4_2", "GL_VERSION_4_3", "GL_VERSION_4_4", "GL_VERSION_4_5", + )), + "glesv1" : frozenset(("GL_VERSION_ES_CM_1_0", "GL_OES_point_size_array")), + "glesv2" : frozenset(("GL_ES_VERSION_2_0", "GL_ES_VERSION_3_0", + "GL_ES_VERSION_3_1", "GL_ES_VERSION_3_2", + )), +} + +def getFunctions(xmlFiles): + """ + Reads an XML file and returns all of the functions defined in it. + + xmlFile should be the path to Khronos's gl.xml file. The return value is a + sequence of FunctionDesc objects, ordered by slot number. + """ + roots = [ etree.parse(xmlFile).getroot() for xmlFile in xmlFiles ] + return getFunctionsFromRoots(roots) + +def getFunctionsFromRoots(roots): + functions = {} + for root in roots: + for func in _getFunctionList(root): + functions[func.name] = func + functions = functions.values() + + # Sort the function list by name. + functions = sorted(functions, key=lambda f: f.name) + + # Lookup for fixed offset/slot functions and use it if available. + # Assign a slot number to each function. This isn't strictly necessary, + # since you can just look at the index in the list, but it makes it easier + # to include the slot when formatting output. + + next_slot = 0 + for i in range(len(functions)): + name = functions[i].name[2:] + + if name in static_data.offsets: + functions[i] = functions[i]._replace(slot=static_data.offsets[name]) + elif not name.endswith("ARB") and name + "ARB" in static_data.offsets: + functions[i] = functions[i]._replace(slot=static_data.offsets[name + "ARB"]) + elif not name.endswith("EXT") and name + "EXT" in static_data.offsets: + functions[i] = functions[i]._replace(slot=static_data.offsets[name + "EXT"]) + else: + functions[i] = functions[i]._replace(slot=next_slot) + next_slot += 1 + + return functions + +def getExportNamesFromRoots(target, roots): + """ + Goes through the tags from gl.xml and returns a set of OpenGL + functions that a library should export. + + target should be one of "gl", "gldispatch", "opengl", "glesv1", or + "glesv2". + """ + featureNames = _LIBRARY_FEATURE_NAMES[target] + if featureNames is None: + return set(func.name for func in getFunctionsFromRoots(roots)) + + names = set() + for root in roots: + features = [] + for featElem in root.findall("feature"): + if featElem.get("name") in featureNames: + features.append(featElem) + for featElem in root.findall("extensions/extension"): + if featElem.get("name") in featureNames: + features.append(featElem) + for featElem in features: + for commandElem in featElem.findall("require/command"): + names.add(commandElem.get("name")) + return names + +class FunctionArg(collections.namedtuple("FunctionArg", "type name")): + @property + def dec(self): + """ + Returns a "TYPE NAME" string, suitable for a function prototype. + """ + rv = str(self.type) + if not rv.endswith("*"): + rv += " " + rv += self.name + return rv + +class FunctionDesc(collections.namedtuple("FunctionDesc", "name rt args slot")): + def hasReturn(self): + """ + Returns true if the function returns a value. + """ + return (self.rt != "void") + + @property + def decArgs(self): + """ + Returns a string with the types and names of the arguments, as you + would use in a function declaration. + """ + if not self.args: + return "void" + else: + return ", ".join(arg.dec for arg in self.args) + + @property + def callArgs(self): + """ + Returns a string with the names of the arguments, as you would use in a + function call. + """ + return ", ".join(arg.name for arg in self.args) + + @property + def basename(self): + assert self.name.startswith("gl") + return self.name[2:] + +def _getFunctionList(root): + for elem in root.findall("commands/command"): + yield _parseCommandElem(elem) + +def _parseCommandElem(elem): + protoElem = elem.find("proto") + (rt, name) = _parseProtoElem(protoElem) + + args = [] + for ch in elem.findall("param"): + # tags have the same format as a tag. + args.append(FunctionArg(*_parseProtoElem(ch))) + func = FunctionDesc(name, rt, tuple(args), slot=None) + + return func + +def _parseProtoElem(elem): + # If I just remove the tags and string the text together, I'll get valid C code. + text = _flattenText(elem) + text = text.strip() + m = re.match(r"^(.+)\b(\w+)(?:\s*\[\s*(\d*)\s*\])?$", text, re.S) + if m: + typename = _fixupTypeName(m.group(1)) + name = m.group(2) + if m.group(3): + # HACK: glPathGlyphIndexRangeNV defines an argument like this: + # GLuint baseAndCount[2] + # Convert it to a pointer and hope for the best. + typename += "*" + return (typename, name) + else: + raise ValueError("Can't parse element %r -> %r" % (elem, text)) + +def _flattenText(elem): + """ + Returns the text in an element and all child elements, with the tags + removed. + """ + text = "" + if elem.text is not None: + text = elem.text + for ch in elem: + text += _flattenText(ch) + if ch.tail is not None: + text += ch.tail + return text + +def _fixupTypeName(typeName): + """ + Converts a typename into a more consistent format. + """ + + rv = typeName.strip() + + # Replace "GLvoid" with just plain "void". + rv = re.sub(r"\bGLvoid\b", "void", rv) + + # Remove the vendor suffixes from types that have a suffix-less version. + rv = re.sub(r"\b(GLhalf|GLintptr|GLsizeiptr|GLint64|GLuint64)(?:ARB|EXT|NV|ATI)\b", r"\1", rv) + + rv = re.sub(r"\bGLDEBUGPROCKHR\b", "GLDEBUGPROC", rv) + + # Clear out any leading and trailing whitespace. + rv = rv.strip() + + # Remove any whitespace before a '*' + rv = re.sub(r"\s+\*", r"*", rv) + + # Change "foo*" to "foo *" + rv = re.sub(r"([^\*])\*", r"\1 *", rv) + + # Condense all whitespace into a single space. + rv = re.sub(r"\s+", " ", rv) + + return rv + diff -Nru mesa-18.3.3/src/mapi/new/gen_gldispatch_mapi.py mesa-19.0.1/src/mapi/new/gen_gldispatch_mapi.py --- mesa-18.3.3/src/mapi/new/gen_gldispatch_mapi.py 1970-01-01 00:00:00.000000000 +0000 +++ mesa-19.0.1/src/mapi/new/gen_gldispatch_mapi.py 2019-03-31 23:16:37.000000000 +0000 @@ -0,0 +1,193 @@ +#!/usr/bin/env python + +# Copyright (C) 2010 LunarG Inc. +# (C) Copyright 2015, NVIDIA CORPORATION. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +# Authors: +# Kyle Brenneman +# +# Based on code ogiginally by: +# Chia-I Wu + + +""" +Generates the glapi_mapi_tmp.h header file from Khronos's XML file. +""" + +import sys +import xml.etree.cElementTree as etree + +import genCommon + +def _main(): + target = sys.argv[1] + xmlFiles = sys.argv[2:] + + roots = [ etree.parse(filename).getroot() for filename in xmlFiles ] + allFunctions = genCommon.getFunctionsFromRoots(roots) + + names = genCommon.getExportNamesFromRoots(target, roots) + functions = [f for f in allFunctions if(f.name in names)] + + if (target in ("gl", "gldispatch")): + assert(len(functions) == len(allFunctions)) + assert(all(functions[i] == allFunctions[i] for i in range(len(functions)))) + assert(all(functions[i].slot == i for i in range(len(functions)))) + + print(r""" +/* This file is automatically generated by mapi_abi.py. Do not modify. */ + +#ifndef _GLAPI_TMP_H_ +#define _GLAPI_TMP_H_ +typedef int GLclampx; +#endif /* _GLAPI_TMP_H_ */ +""".lstrip("\n")) + + print(generate_defines(functions)) + if target == "gldispatch": + print(generate_table(functions, allFunctions)) + print(generate_noop_array(functions)) + print(generate_public_stubs(functions)) + print(generate_public_entries(functions)) + if target == "gldispatch": + print(generate_public_entries_table(functions)) + print(generate_undef_public_entries()) + print(generate_stub_asm_gcc(functions)) + +def generate_defines(functions): + text = r""" +#ifdef MAPI_TMP_DEFINES +#define GL_GLEXT_PROTOTYPES +#include "GL/gl.h" +#include "GL/glext.h" + +""".lstrip("\n") + for func in functions: + text += "GLAPI {f.rt} APIENTRY {f.name}({f.decArgs});\n".format(f=func) + text += "#undef MAPI_TMP_DEFINES\n" + text += "#endif /* MAPI_TMP_DEFINES */\n" + return text + +def generate_table(functions, allFunctions): + text = "#ifdef MAPI_TMP_TABLE\n" + text += "#define MAPI_TABLE_NUM_STATIC %d\n" % (len(allFunctions)) + text += "#define MAPI_TABLE_NUM_DYNAMIC %d\n" % (genCommon.MAPI_TABLE_NUM_DYNAMIC,) + text += "#undef MAPI_TMP_TABLE\n" + text += "#endif /* MAPI_TMP_TABLE */\n" + return text + +def generate_noop_array(functions): + text = "#ifdef MAPI_TMP_NOOP_ARRAY\n" + text += "#ifdef DEBUG\n\n" + + for func in functions: + text += "static {f.rt} APIENTRY noop{f.basename}({f.decArgs})\n".format(f=func) + text += "{\n" + if (len(func.args) > 0): + text += " " + for arg in func.args: + text += " (void) {a.name};".format(a=arg) + text += "\n" + text += " noop_warn(\"{f.name}\");\n".format(f=func) + if (func.hasReturn()): + text += " return ({f.rt}) 0;\n".format(f=func) + text += "}\n\n" + + text += "const mapi_func table_noop_array[] = {\n" + for func in functions: + text += " (mapi_func) noop{f.basename},\n".format(f=func) + for i in range(genCommon.MAPI_TABLE_NUM_DYNAMIC - 1): + text += " (mapi_func) noop_generic,\n" + text += " (mapi_func) noop_generic\n" + text += "};\n\n" + text += "#else /* DEBUG */\n\n" + text += "const mapi_func table_noop_array[] = {\n" + for i in range(len(functions) + genCommon.MAPI_TABLE_NUM_DYNAMIC - 1): + text += " (mapi_func) noop_generic,\n" + text += " (mapi_func) noop_generic\n" + + text += "};\n\n" + text += "#endif /* DEBUG */\n" + text += "#undef MAPI_TMP_NOOP_ARRAY\n" + text += "#endif /* MAPI_TMP_NOOP_ARRAY */\n" + return text + +def generate_public_stubs(functions): + text = "#ifdef MAPI_TMP_PUBLIC_STUBS\n" + + text += "static const struct mapi_stub public_stubs[] = {\n" + for func in functions: + text += " { \"%s\", %d, NULL },\n" % (func.name, func.slot) + text += "};\n" + text += "#undef MAPI_TMP_PUBLIC_STUBS\n" + text += "#endif /* MAPI_TMP_PUBLIC_STUBS */\n" + return text + +def generate_public_entries(functions): + text = "#ifdef MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN\n" + + for func in functions: + retStr = ("return " if func.hasReturn() else "") + text += r""" +GLAPI {f.rt} APIENTRY {f.name}({f.decArgs}) +{{ + const struct _glapi_table *_tbl = entry_current_get(); + mapi_func _func = ((const mapi_func *) _tbl)[{f.slot}]; + {retStr}(({f.rt} (APIENTRY *)({f.decArgs})) _func)({f.callArgs}); +}} + +""".lstrip("\n").format(f=func, retStr=retStr) + + text += "\n" + text += "#endif /* MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN */\n" + return text + +def generate_public_entries_table(functions): + text = "#ifdef MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN\n" + text += "static const mapi_func public_entries[] = {\n" + for func in functions: + text += " (mapi_func) %s,\n" % (func.name,) + text += "};\n" + text += "#endif /* MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN */\n" + return text + +def generate_undef_public_entries(): + text = "#ifdef MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN\n" + text += "#undef MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN\n" + text += "#endif /* MAPI_TMP_PUBLIC_ENTRIES_NO_HIDDEN */\n" + return text + +def generate_stub_asm_gcc(functions): + text = "#ifdef MAPI_TMP_STUB_ASM_GCC_NO_HIDDEN\n" + text += "__asm__(\n" + + for func in functions: + text += 'STUB_ASM_ENTRY("%s")"\\n"\n' % (func.name,) + text += '"\\t"STUB_ASM_CODE("%d")"\\n"\n\n' % (func.slot,) + + text += ");\n" + text += "#undef MAPI_TMP_STUB_ASM_GCC_NO_HIDDEN\n" + text += "#endif /* MAPI_TMP_STUB_ASM_GCC_NO_HIDDEN */\n" + return text + +if (__name__ == "__main__"): + _main() + diff -Nru mesa-18.3.3/src/mapi/shared-glapi/meson.build mesa-19.0.1/src/mapi/shared-glapi/meson.build --- mesa-18.3.3/src/mapi/shared-glapi/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/shared-glapi/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -60,6 +60,7 @@ include_directories : [inc_src, inc_include, inc_mapi], link_with : [libglapi], dependencies : [dep_thread, idep_gtest], - ) + ), + suite : ['mapi'], ) endif diff -Nru mesa-18.3.3/src/mapi/shared-glapi/SConscript mesa-19.0.1/src/mapi/shared-glapi/SConscript --- mesa-18.3.3/src/mapi/shared-glapi/SConscript 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mapi/shared-glapi/SConscript 2019-03-31 23:16:37.000000000 +0000 @@ -27,13 +27,27 @@ # generate ABI header GLAPI = '../glapi/' - header = env.CodeGenerate( - target = header_name, - script = '../mapi_abi.py', - source = [GLAPI + 'gen/gl_and_es_API.xml'] + env.Glob(GLAPI + 'gen/*.xml'), - command = python_cmd + ' $SCRIPT ' + \ - '--printer %s $SOURCE > $TARGET' % (printer), - ) + if printer != 'glapi': + if printer == 'es1api': + abi_tag = 'glesv1' + else: + abi_tag = 'glesv2' + + header = env.CodeGenerate( + target = header_name, + script = '../new/gen_gldispatch_mapi.py', + source = GLAPI + 'registry/gl.xml' + command = python_cmd + ' $SCRIPT ' + \ + '%s $SOURCE > $TARGET' % (abi_tag), + ) + else: + header = env.CodeGenerate( + target = header_name, + script = '../mapi_abi.py', + source = [GLAPI + 'gen/gl_and_es_API.xml'] + env.Glob(GLAPI + 'gen/*.xml'), + command = python_cmd + ' $SCRIPT ' + \ + '--printer %s $SOURCE > $TARGET' % (printer), + ) cpppath = [ header[0].dir, diff -Nru mesa-18.3.3/src/mesa/drivers/common/meta.c mesa-19.0.1/src/mesa/drivers/common/meta.c --- mesa-18.3.3/src/mesa/drivers/common/meta.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/common/meta.c 2019-03-31 23:16:37.000000000 +0000 @@ -127,7 +127,7 @@ assert(att); _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, texTarget, - level, layer, false); + level, att->NumSamples, layer, false); } static struct gl_shader * diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i915/intel_context.c mesa-19.0.1/src/mesa/drivers/dri/i915/intel_context.c --- mesa-18.3.3/src/mesa/drivers/dri/i915/intel_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i915/intel_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -442,8 +442,6 @@ intel->is_945 = IS_945(devID); - intel->has_swizzling = intel->intelScreen->hw_has_swizzling; - memset(&ctx->TextureFormatSupported, 0, sizeof(ctx->TextureFormatSupported)); diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i915/intel_context.h mesa-19.0.1/src/mesa/drivers/dri/i915/intel_context.h --- mesa-18.3.3/src/mesa/drivers/dri/i915/intel_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i915/intel_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -159,7 +159,6 @@ */ int gen; bool is_945; - bool has_swizzling; struct intel_batchbuffer batch; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i915/intel_screen.c mesa-19.0.1/src/mesa/drivers/dri/i915/intel_screen.c --- mesa-18.3.3/src/mesa/drivers/dri/i915/intel_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i915/intel_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -1020,30 +1020,6 @@ return true; } -static bool -intel_detect_swizzling(struct intel_screen *screen) -{ - drm_intel_bo *buffer; - unsigned long flags = 0; - unsigned long aligned_pitch; - uint32_t tiling = I915_TILING_X; - uint32_t swizzle_mode = 0; - - buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test", - 64, 64, 4, - &tiling, &aligned_pitch, flags); - if (buffer == NULL) - return false; - - drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode); - drm_intel_bo_unreference(buffer); - - if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE) - return false; - else - return true; -} - static __DRIconfig** intel_screen_make_configs(__DRIscreen *dri_screen) { @@ -1200,8 +1176,6 @@ intelScreen->gen = 2; } - intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); - set_max_gl_versions(intelScreen); psp->extensions = intelScreenExtensions; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i915/intel_screen.h mesa-19.0.1/src/mesa/drivers/dri/i915/intel_screen.h --- mesa-18.3.3/src/mesa/drivers/dri/i915/intel_screen.h 2018-01-29 17:10:31.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i915/intel_screen.h 2019-03-31 23:16:37.000000000 +0000 @@ -44,8 +44,6 @@ bool no_hw; - bool hw_has_swizzling; - bool no_vbo; dri_bufmgr *bufmgr; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/Android.mk mesa-19.0.1/src/mesa/drivers/dri/i965/Android.mk --- mesa-18.3.3/src/mesa/drivers/dri/i965/Android.mk 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/Android.mk 2019-03-31 23:16:37.000000000 +0000 @@ -51,42 +51,6 @@ libmesa_i965_gen10 \ libmesa_i965_gen11 - -# --------------------------------------- -# Build libmesa_intel_tiled_memcpy -# --------------------------------------- - -include $(CLEAR_VARS) - -LOCAL_MODULE := libmesa_intel_tiled_memcpy - -LOCAL_C_INCLUDES := $(I965_PERGEN_COMMON_INCLUDES) - -LOCAL_SRC_FILES := $(intel_tiled_memcpy_FILES) - -include $(MESA_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) - -# --------------------------------------- -# Build libmesa_intel_tiled_memcpy_sse41 -# --------------------------------------- - -ifeq ($(ARCH_X86_HAVE_SSE4_1),true) -include $(CLEAR_VARS) - -LOCAL_MODULE := libmesa_intel_tiled_memcpy_sse41 - -LOCAL_C_INCLUDES := $(I965_PERGEN_COMMON_INCLUDES) - -LOCAL_SRC_FILES := $(intel_tiled_memcpy_sse41_FILES) - -LOCAL_CFLAGS += \ - -DUSE_SSE41 -msse4.1 -mstackrealign - -include $(MESA_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) -endif - # --------------------------------------- # Build libmesa_i965_gen4 # --------------------------------------- @@ -312,6 +276,7 @@ LOCAL_C_INCLUDES := \ $(MESA_DRI_C_INCLUDES) \ + $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,) \ $(MESA_TOP)/include/drm-uapi LOCAL_SRC_FILES := \ @@ -320,7 +285,6 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ $(MESA_DRI_WHOLE_STATIC_LIBRARIES) \ $(I965_PERGEN_LIBS) \ - libmesa_intel_tiled_memcpy \ libmesa_intel_dev \ libmesa_intel_common \ libmesa_isl \ @@ -330,8 +294,6 @@ ifeq ($(ARCH_X86_HAVE_SSE4_1),true) LOCAL_CFLAGS += \ -DUSE_SSE41 -LOCAL_WHOLE_STATIC_LIBRARIES += \ - libmesa_intel_tiled_memcpy_sse41 endif LOCAL_SHARED_LIBRARIES := \ diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_blorp.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_blorp.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_blorp.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_blorp.c 2019-03-31 23:16:37.000000000 +0000 @@ -43,24 +43,24 @@ #define FILE_DEBUG_FLAG DEBUG_BLORP static bool -brw_blorp_lookup_shader(struct blorp_context *blorp, +brw_blorp_lookup_shader(struct blorp_batch *batch, const void *key, uint32_t key_size, uint32_t *kernel_out, void *prog_data_out) { - struct brw_context *brw = blorp->driver_ctx; + struct brw_context *brw = batch->driver_batch; return brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size, kernel_out, prog_data_out, true); } static bool -brw_blorp_upload_shader(struct blorp_context *blorp, +brw_blorp_upload_shader(struct blorp_batch *batch, const void *key, uint32_t key_size, const void *kernel, uint32_t kernel_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, uint32_t *kernel_out, void *prog_data_out) { - struct brw_context *brw = blorp->driver_ctx; + struct brw_context *brw = batch->driver_batch; brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size, kernel, kernel_size, prog_data, prog_data_size, kernel_out, prog_data_out); diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_compute.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_compute.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_compute.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_compute.c 2019-03-31 23:16:37.000000000 +0000 @@ -35,135 +35,6 @@ static void -prepare_indirect_gpgpu_walker(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - GLintptr indirect_offset = brw->compute.num_work_groups_offset; - struct brw_bo *bo = brw->compute.num_work_groups_bo; - - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, indirect_offset + 0); - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, indirect_offset + 4); - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, indirect_offset + 8); - - if (devinfo->gen > 7) - return; - - /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ - BEGIN_BATCH(7); - OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2)); - OUT_BATCH(MI_PREDICATE_SRC0 + 4); - OUT_BATCH(0u); - OUT_BATCH(MI_PREDICATE_SRC1 + 0); - OUT_BATCH(0u); - OUT_BATCH(MI_PREDICATE_SRC1 + 4); - OUT_BATCH(0u); - ADVANCE_BATCH(); - - /* Load compute_dispatch_indirect_x_size into SRC0 */ - brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, indirect_offset + 0); - - /* predicate = (compute_dispatch_indirect_x_size == 0); */ - BEGIN_BATCH(1); - OUT_BATCH(GEN7_MI_PREDICATE | - MI_PREDICATE_LOADOP_LOAD | - MI_PREDICATE_COMBINEOP_SET | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL); - ADVANCE_BATCH(); - - /* Load compute_dispatch_indirect_y_size into SRC0 */ - brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, indirect_offset + 4); - - /* predicate |= (compute_dispatch_indirect_y_size == 0); */ - BEGIN_BATCH(1); - OUT_BATCH(GEN7_MI_PREDICATE | - MI_PREDICATE_LOADOP_LOAD | - MI_PREDICATE_COMBINEOP_OR | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL); - ADVANCE_BATCH(); - - /* Load compute_dispatch_indirect_z_size into SRC0 */ - brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, indirect_offset + 8); - - /* predicate |= (compute_dispatch_indirect_z_size == 0); */ - BEGIN_BATCH(1); - OUT_BATCH(GEN7_MI_PREDICATE | - MI_PREDICATE_LOADOP_LOAD | - MI_PREDICATE_COMBINEOP_OR | - MI_PREDICATE_COMPAREOP_SRCS_EQUAL); - ADVANCE_BATCH(); - - /* predicate = !predicate; */ - BEGIN_BATCH(1); - OUT_BATCH(GEN7_MI_PREDICATE | - MI_PREDICATE_LOADOP_LOADINV | - MI_PREDICATE_COMBINEOP_OR | - MI_PREDICATE_COMPAREOP_FALSE); - ADVANCE_BATCH(); -} - -static void -brw_emit_gpgpu_walker(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - const struct brw_cs_prog_data *prog_data = - brw_cs_prog_data(brw->cs.base.prog_data); - - const GLuint *num_groups = brw->compute.num_work_groups; - uint32_t indirect_flag; - - if (brw->compute.num_work_groups_bo == NULL) { - indirect_flag = 0; - } else { - indirect_flag = - GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE | - (devinfo->gen == 7 ? GEN7_GPGPU_PREDICATE_ENABLE : 0); - prepare_indirect_gpgpu_walker(brw); - } - - const unsigned simd_size = prog_data->simd_size; - unsigned group_size = prog_data->local_size[0] * - prog_data->local_size[1] * prog_data->local_size[2]; - unsigned thread_width_max = - (group_size + simd_size - 1) / simd_size; - - uint32_t right_mask = 0xffffffffu >> (32 - simd_size); - const unsigned right_non_aligned = group_size & (simd_size - 1); - if (right_non_aligned != 0) - right_mask >>= (simd_size - right_non_aligned); - - uint32_t dwords = devinfo->gen < 8 ? 11 : 15; - BEGIN_BATCH(dwords); - OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag); - OUT_BATCH(0); - if (devinfo->gen >= 8) { - OUT_BATCH(0); /* Indirect Data Length */ - OUT_BATCH(0); /* Indirect Data Start Address */ - } - assert(thread_width_max <= brw->screen->devinfo.max_cs_threads); - OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) | - SET_FIELD(thread_width_max - 1, GPGPU_WALKER_THREAD_WIDTH_MAX)); - OUT_BATCH(0); /* Thread Group ID Starting X */ - if (devinfo->gen >= 8) - OUT_BATCH(0); /* MBZ */ - OUT_BATCH(num_groups[0]); /* Thread Group ID X Dimension */ - OUT_BATCH(0); /* Thread Group ID Starting Y */ - if (devinfo->gen >= 8) - OUT_BATCH(0); /* MBZ */ - OUT_BATCH(num_groups[1]); /* Thread Group ID Y Dimension */ - OUT_BATCH(0); /* Thread Group ID Starting/Resume Z */ - OUT_BATCH(num_groups[2]); /* Thread Group ID Z Dimension */ - OUT_BATCH(right_mask); /* Right Execution Mask */ - OUT_BATCH(0xffffffff); /* Bottom Execution Mask */ - ADVANCE_BATCH(); - - BEGIN_BATCH(2); - OUT_BATCH(MEDIA_STATE_FLUSH << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -static void brw_dispatch_compute_common(struct gl_context *ctx) { struct brw_context *brw = brw_context(ctx); @@ -191,7 +62,7 @@ brw->batch.no_wrap = true; brw_upload_compute_state(brw); - brw_emit_gpgpu_walker(brw); + brw->vtbl.emit_compute_walker(brw); brw->batch.no_wrap = false; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_conditional_render.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_conditional_render.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_conditional_render.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_conditional_render.c 2019-03-31 23:16:37.000000000 +0000 @@ -66,7 +66,7 @@ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); hsw_overflow_result_to_gpr0(brw, query, count); - brw_load_register_reg64(brw, HSW_CS_GPR(0), MI_PREDICATE_SRC0); + brw_load_register_reg64(brw, MI_PREDICATE_SRC0, HSW_CS_GPR(0)); brw_load_register_imm64(brw, MI_PREDICATE_SRC1, 0ull); } diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_context.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_context.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -390,15 +390,15 @@ */ assert(devinfo->gen >= 7); + ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7; + ctx->Const.SpirVCapabilities.draw_parameters = true; ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8; + ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7; + ctx->Const.SpirVCapabilities.image_write_without_format = true; ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8; ctx->Const.SpirVCapabilities.tessellation = true; - ctx->Const.SpirVCapabilities.draw_parameters = true; - ctx->Const.SpirVCapabilities.image_write_without_format = true; - ctx->Const.SpirVCapabilities.variable_pointers = true; - ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7; ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7; - ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7; + ctx->Const.SpirVCapabilities.variable_pointers = true; } static void diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_context.h mesa-19.0.1/src/mesa/drivers/dri/i965/brw_context.h --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -752,6 +752,8 @@ struct brw_bo *bo, uint32_t offset_in_bytes, uint32_t report_id); + + void (*emit_compute_walker)(struct brw_context *brw); } vtbl; struct brw_bufmgr *bufmgr; @@ -842,6 +844,8 @@ GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */ + bool object_preemption; /**< Object level preemption enabled. */ + GLenum reduced_primitive; /** @@ -1002,6 +1006,9 @@ /* High bits of the last seen index buffer address (for workarounds). */ uint16_t last_bo_high_bits; + + /* Used to understand is GPU state of primitive restart is up to date */ + bool enable_cut_index; } ib; /* Active vertex program: @@ -1377,13 +1384,6 @@ /*====================================================================== * brw_misc_state.c */ -void -brw_meta_resolve_color(struct brw_context *brw, - struct intel_mipmap_tree *mt); - -/*====================================================================== - * brw_misc_state.c - */ void brw_workaround_depthstencil_alignment(struct brw_context *brw, GLbitfield clear_mask); @@ -1435,10 +1435,10 @@ uint32_t reg, uint32_t imm); void brw_load_register_imm64(struct brw_context *brw, uint32_t reg, uint64_t imm); -void brw_load_register_reg(struct brw_context *brw, uint32_t src, - uint32_t dest); -void brw_load_register_reg64(struct brw_context *brw, uint32_t src, - uint32_t dest); +void brw_load_register_reg(struct brw_context *brw, uint32_t dst, + uint32_t src); +void brw_load_register_reg64(struct brw_context *brw, uint32_t dst, + uint32_t src); void brw_store_data_imm32(struct brw_context *brw, struct brw_bo *bo, uint32_t offset, uint32_t imm); void brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo, @@ -1493,7 +1493,7 @@ /* brw_draw_upload.c */ unsigned brw_get_vertex_surface_type(struct brw_context *brw, - const struct gl_array_attributes *glattr); + const struct gl_vertex_format *glformat); static inline unsigned brw_get_index_type(unsigned index_size) diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_cs.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_cs.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_cs.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_cs.c 2019-03-31 23:16:37.000000000 +0000 @@ -58,6 +58,7 @@ struct brw_cs_prog_data prog_data; bool start_busy = false; double start_time = 0; + nir_shader *nir = nir_shader_clone(mem_ctx, cp->program.nir); memset(&prog_data, 0, sizeof(prog_data)); @@ -76,7 +77,7 @@ assign_cs_binding_table_offsets(devinfo, &cp->program, &prog_data); - brw_nir_setup_glsl_uniforms(mem_ctx, cp->program.nir, + brw_nir_setup_glsl_uniforms(mem_ctx, nir, &cp->program, &prog_data.base, true); if (unlikely(brw->perf_debug)) { @@ -91,8 +92,7 @@ char *error_str; program = brw_compile_cs(brw->screen->compiler, brw, mem_ctx, key, - &prog_data, cp->program.nir, st_index, - &error_str); + &prog_data, nir, st_index, &error_str); if (program == NULL) { cp->program.sh.data->LinkStatus = LINKING_FAILURE; ralloc_strcat(&cp->program.sh.data->InfoLog, error_str); diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_defines.h mesa-19.0.1/src/mesa/drivers/dri/i965/brw_defines.h --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_defines.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_defines.h 2019-03-31 23:16:37.000000000 +0000 @@ -38,7 +38,7 @@ /* Using the GNU statement expression extension */ #define SET_FIELD(value, field) \ ({ \ - uint32_t fieldval = (value) << field ## _SHIFT; \ + uint32_t fieldval = (uint32_t)(value) << field ## _SHIFT; \ assert((fieldval & ~ field ## _MASK) == 0); \ fieldval & field ## _MASK; \ }) @@ -1646,6 +1646,8 @@ # define GEN8_L3CNTLREG_DC_ALLOC_MASK INTEL_MASK(24, 18) # define GEN8_L3CNTLREG_ALL_ALLOC_SHIFT 25 # define GEN8_L3CNTLREG_ALL_ALLOC_MASK INTEL_MASK(31, 25) +# define GEN8_L3CNTLREG_EDBC_NO_HANG (1 << 9) +# define GEN11_L3CNTLREG_USE_FULL_WAYS (1 << 10) #define GEN10_CACHE_MODE_SS 0x0e420 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) @@ -1681,4 +1683,9 @@ # define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS (1 << 5) # define HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK REG_MASK(1 << 5) +#define CS_CHICKEN1 0x2580 /* Gen9+ */ +# define GEN9_REPLAY_MODE_MIDBUFFER (0 << 0) +# define GEN9_REPLAY_MODE_MIDOBJECT (1 << 0) +# define GEN9_REPLAY_MODE_MASK REG_MASK(1 << 0) + #endif diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_draw.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_draw.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_draw.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -303,16 +303,16 @@ * 2_10_10_10_REV vertex formats. Set appropriate workaround flags. */ while (mask) { - const struct gl_array_attributes *glattrib; + const struct gl_vertex_format *glformat; uint8_t wa_flags = 0; i = u_bit_scan64(&mask); - glattrib = brw->vb.inputs[i].glattrib; + glformat = &brw->vb.inputs[i].glattrib->Format; - switch (glattrib->Type) { + switch (glformat->Type) { case GL_FIXED: - wa_flags = glattrib->Size; + wa_flags = glformat->Size; break; case GL_INT_2_10_10_10_REV: @@ -320,12 +320,12 @@ /* fallthough */ case GL_UNSIGNED_INT_2_10_10_10_REV: - if (glattrib->Format == GL_BGRA) + if (glformat->Format == GL_BGRA) wa_flags |= BRW_ATTRIB_WA_BGRA; - if (glattrib->Normalized) + if (glformat->Normalized) wa_flags |= BRW_ATTRIB_WA_NORMALIZE; - else if (!glattrib->Integer) + else if (!glformat->Integer) wa_flags |= BRW_ATTRIB_WA_SCALE; break; @@ -872,6 +872,66 @@ } } +/** + * Implement workarounds for preemption: + * - WaDisableMidObjectPreemptionForGSLineStripAdj + * - WaDisableMidObjectPreemptionForTrifanOrPolygon + * - WaDisableMidObjectPreemptionForLineLoop + * - WA#0798 + */ +static void +gen9_emit_preempt_wa(struct brw_context *brw, + const struct _mesa_prim *prim) +{ + bool object_preemption = true; + const struct gen_device_info *devinfo = &brw->screen->devinfo; + + /* Only apply these workarounds for gen9 */ + assert(devinfo->gen == 9); + + /* WaDisableMidObjectPreemptionForGSLineStripAdj + * + * WA: Disable mid-draw preemption when draw-call is a linestrip_adj and + * GS is enabled. + */ + if (brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled) + object_preemption = false; + + /* WaDisableMidObjectPreemptionForTrifanOrPolygon + * + * TriFan miscompare in Execlist Preemption test. Cut index that is on a + * previous context. End the previous, the resume another context with a + * tri-fan or polygon, and the vertex count is corrupted. If we prempt + * again we will cause corruption. + * + * WA: Disable mid-draw preemption when draw-call has a tri-fan. + */ + if (brw->primitive == _3DPRIM_TRIFAN) + object_preemption = false; + + /* WaDisableMidObjectPreemptionForLineLoop + * + * VF Stats Counters Missing a vertex when preemption enabled. + * + * WA: Disable mid-draw preemption when the draw uses a lineloop + * topology. + */ + if (brw->primitive == _3DPRIM_LINELOOP) + object_preemption = false; + + /* WA#0798 + * + * VF is corrupting GAFS data when preempted on an instance boundary and + * replayed with instancing enabled. + * + * WA: Disable preemption when using instanceing. + */ + if (prim->num_instances > 1) + object_preemption = false; + + brw_enable_obj_preemption(brw, object_preemption); +} + /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ @@ -987,6 +1047,9 @@ brw_upload_render_state(brw); } + if (devinfo->gen == 9) + gen9_emit_preempt_wa(brw, prim); + brw_emit_prim(brw, prim, brw->primitive, xfb_obj, stream); brw->batch.no_wrap = false; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_draw_upload.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_draw_upload.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_draw_upload.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_draw_upload.c 2019-03-31 23:16:37.000000000 +0000 @@ -249,21 +249,21 @@ */ unsigned brw_get_vertex_surface_type(struct brw_context *brw, - const struct gl_array_attributes *glattrib) + const struct gl_vertex_format *glformat) { - int size = glattrib->Size; + int size = glformat->Size; const struct gen_device_info *devinfo = &brw->screen->devinfo; const bool is_ivybridge_or_older = devinfo->gen <= 7 && !devinfo->is_baytrail && !devinfo->is_haswell; if (unlikely(INTEL_DEBUG & DEBUG_VERTS)) fprintf(stderr, "type %s size %d normalized %d\n", - _mesa_enum_to_string(glattrib->Type), - glattrib->Size, glattrib->Normalized); + _mesa_enum_to_string(glformat->Type), + glformat->Size, glformat->Normalized); - if (glattrib->Integer) { - assert(glattrib->Format == GL_RGBA); /* sanity check */ - switch (glattrib->Type) { + if (glformat->Integer) { + assert(glformat->Format == GL_RGBA); /* sanity check */ + switch (glformat->Type) { case GL_INT: return int_types_direct[size]; case GL_SHORT: if (is_ivybridge_or_older && size == 3) @@ -288,11 +288,11 @@ return ubyte_types_direct[size]; default: unreachable("not reached"); } - } else if (glattrib->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) { + } else if (glformat->Type == GL_UNSIGNED_INT_10F_11F_11F_REV) { return ISL_FORMAT_R11G11B10_FLOAT; - } else if (glattrib->Normalized) { - switch (glattrib->Type) { - case GL_DOUBLE: return double_types(size, glattrib->Doubles); + } else if (glformat->Normalized) { + switch (glformat->Type) { + case GL_DOUBLE: return double_types(size, glformat->Doubles); case GL_FLOAT: return float_types[size]; case GL_HALF_FLOAT: case GL_HALF_FLOAT_OES: @@ -306,7 +306,7 @@ case GL_UNSIGNED_INT: return uint_types_norm[size]; case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; case GL_UNSIGNED_BYTE: - if (glattrib->Format == GL_BGRA) { + if (glformat->Format == GL_BGRA) { /* See GL_EXT_vertex_array_bgra */ assert(size == 4); return ISL_FORMAT_B8G8R8A8_UNORM; @@ -330,7 +330,7 @@ case GL_INT_2_10_10_10_REV: assert(size == 4); if (devinfo->gen >= 8 || devinfo->is_haswell) { - return glattrib->Format == GL_BGRA + return glformat->Format == GL_BGRA ? ISL_FORMAT_B10G10R10A2_SNORM : ISL_FORMAT_R10G10B10A2_SNORM; } @@ -338,7 +338,7 @@ case GL_UNSIGNED_INT_2_10_10_10_REV: assert(size == 4); if (devinfo->gen >= 8 || devinfo->is_haswell) { - return glattrib->Format == GL_BGRA + return glformat->Format == GL_BGRA ? ISL_FORMAT_B10G10R10A2_UNORM : ISL_FORMAT_R10G10B10A2_UNORM; } @@ -352,26 +352,26 @@ * like to use here, so upload everything as UINT and fix * it in the shader */ - if (glattrib->Type == GL_INT_2_10_10_10_REV) { + if (glformat->Type == GL_INT_2_10_10_10_REV) { assert(size == 4); if (devinfo->gen >= 8 || devinfo->is_haswell) { - return glattrib->Format == GL_BGRA + return glformat->Format == GL_BGRA ? ISL_FORMAT_B10G10R10A2_SSCALED : ISL_FORMAT_R10G10B10A2_SSCALED; } return ISL_FORMAT_R10G10B10A2_UINT; - } else if (glattrib->Type == GL_UNSIGNED_INT_2_10_10_10_REV) { + } else if (glformat->Type == GL_UNSIGNED_INT_2_10_10_10_REV) { assert(size == 4); if (devinfo->gen >= 8 || devinfo->is_haswell) { - return glattrib->Format == GL_BGRA + return glformat->Format == GL_BGRA ? ISL_FORMAT_B10G10R10A2_USCALED : ISL_FORMAT_R10G10B10A2_USCALED; } return ISL_FORMAT_R10G10B10A2_UINT; } - assert(glattrib->Format == GL_RGBA); /* sanity check */ - switch (glattrib->Type) { - case GL_DOUBLE: return double_types(size, glattrib->Doubles); + assert(glformat->Format == GL_RGBA); /* sanity check */ + switch (glformat->Type) { + case GL_DOUBLE: return double_types(size, glformat->Doubles); case GL_FLOAT: return float_types[size]; case GL_HALF_FLOAT: case GL_HALF_FLOAT_OES: @@ -407,6 +407,7 @@ { const struct gl_vertex_buffer_binding *glbinding = element->glbinding; const struct gl_array_attributes *glattrib = element->glattrib; + const struct gl_vertex_format *glformat = &glattrib->Format; const int src_stride = glbinding->Stride; /* If the source stride is zero, we just want to upload the current @@ -414,11 +415,11 @@ * to replicate it out. */ if (src_stride == 0) { - brw_upload_data(&brw->upload, glattrib->Ptr, glattrib->_ElementSize, - glattrib->_ElementSize, &buffer->bo, &buffer->offset); + brw_upload_data(&brw->upload, glattrib->Ptr, glformat->_ElementSize, + glformat->_ElementSize, &buffer->bo, &buffer->offset); buffer->stride = 0; - buffer->size = glattrib->_ElementSize; + buffer->size = glformat->_ElementSize; return; } @@ -531,13 +532,13 @@ start = offset + glbinding->Stride * brw->baseinstance; range = (glbinding->Stride * ((brw->num_instances - 1) / glbinding->InstanceDivisor) + - glattrib->_ElementSize); + glattrib->Format._ElementSize); } } else { if (brw->vb.index_bounds_valid) { start = offset + min_index * glbinding->Stride; range = (glbinding->Stride * (max_index - min_index) + - glattrib->_ElementSize); + glattrib->Format._ElementSize); } } @@ -594,7 +595,8 @@ else if (interleaved != glbinding->Stride || glbinding->InstanceDivisor != 0 || glattrib->Ptr < ptr || - (uintptr_t)(glattrib->Ptr - ptr) + glattrib->_ElementSize > interleaved) + (uintptr_t)(glattrib->Ptr - ptr) + + glattrib->Format._ElementSize > interleaved) { /* If our stride is different from the first attribute's stride, * or if we are using an instance divisor or if the first @@ -677,7 +679,7 @@ const struct gl_array_attributes *glattrib = upload[i]->glattrib; if (glbinding->InstanceDivisor == 0) { copy_array_to_vbo_array(brw, upload[i], min_index, max_index, - buffer, glattrib->_ElementSize); + buffer, glattrib->Format._ElementSize); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the @@ -686,7 +688,7 @@ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / glbinding->InstanceDivisor; copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, - buffer, glattrib->_ElementSize); + buffer, glattrib->Format._ElementSize); } buffer->offset -= delta * buffer->stride; buffer->size += delta * buffer->stride; @@ -774,6 +776,14 @@ brw->ib.index_size = index_buffer->index_size; brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; } + + /* We need to re-emit an index buffer state each time + * when cut index flag is changed + */ + if (brw->prim_restart.enable_cut_index != brw->ib.enable_cut_index) { + brw->ib.enable_cut_index = brw->prim_restart.enable_cut_index; + brw->ctx.NewDriverState |= BRW_NEW_INDEX_BUFFER; + } } const struct brw_tracked_state brw_indices = { diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_gs.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_gs.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_gs.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_gs.c 2019-03-31 23:16:37.000000000 +0000 @@ -89,15 +89,17 @@ void *mem_ctx = ralloc_context(NULL); + nir_shader *nir = nir_shader_clone(mem_ctx, gp->program.nir); + assign_gs_binding_table_offsets(devinfo, &gp->program, &prog_data); - brw_nir_setup_glsl_uniforms(mem_ctx, gp->program.nir, &gp->program, + brw_nir_setup_glsl_uniforms(mem_ctx, nir, &gp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_GEOMETRY]); - brw_nir_analyze_ubo_ranges(compiler, gp->program.nir, NULL, + brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data.base.base.ubo_ranges); - uint64_t outputs_written = gp->program.nir->info.outputs_written; + uint64_t outputs_written = nir->info.outputs_written; brw_compute_vue_map(devinfo, &prog_data.base.vue_map, outputs_written, @@ -115,8 +117,7 @@ char *error_str; const unsigned *program = brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key, - &prog_data, gp->program.nir, &gp->program, - st_index, &error_str); + &prog_data, nir, &gp->program, st_index, &error_str); if (program == NULL) { ralloc_strcat(&gp->program.sh.data->InfoLog, error_str); _mesa_problem(NULL, "Failed to compile geometry shader: %s\n", error_str); diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp mesa-19.0.1/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -109,10 +109,6 @@ image_idx, offsetof(brw_image_param, swizzling), 2); param += BRW_IMAGE_PARAM_SIZE; - - brw_mark_surface_used( - stage_prog_data, - stage_prog_data->binding_table.image_start + image_idx); } } diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_pipe_control.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_pipe_control.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_pipe_control.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_pipe_control.c 2019-03-31 23:16:37.000000000 +0000 @@ -308,7 +308,7 @@ void gen7_emit_vs_workaround_flush(struct brw_context *brw) { - const struct gen_device_info *devinfo = &brw->screen->devinfo; + MAYBE_UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo; assert(devinfo->gen == 7); brw_emit_pipe_control_write(brw, diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_program_binary.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_program_binary.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_program_binary.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_program_binary.c 2019-03-31 23:16:37.000000000 +0000 @@ -206,14 +206,14 @@ break; switch ((enum driver_cache_blob_part)part_type) { case GEN_PART: { - uint32_t gen_size = blob_read_uint32(&reader); + MAYBE_UNUSED uint32_t gen_size = blob_read_uint32(&reader); assert(!reader.overrun && (uintptr_t)(reader.end - reader.current) > gen_size); deserialize_gen_program(&reader, ctx, prog, stage); break; } case NIR_PART: { - uint32_t nir_size = blob_read_uint32(&reader); + MAYBE_UNUSED uint32_t nir_size = blob_read_uint32(&reader); assert(!reader.overrun && (uintptr_t)(reader.end - reader.current) > nir_size); const struct nir_shader_compiler_options *options = diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_program.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_program.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -40,7 +40,9 @@ #include "tnl/tnl.h" #include "util/ralloc.h" #include "compiler/glsl/ir.h" +#include "compiler/glsl/program.h" #include "compiler/glsl/glsl_to_nir.h" +#include "glsl/float64_glsl.h" #include "brw_program.h" #include "brw_context.h" @@ -53,6 +55,9 @@ #include "brw_vs.h" #include "brw_wm.h" +#include "main/shaderapi.h" +#include "main/shaderobj.h" + static bool brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) { @@ -67,6 +72,54 @@ } } +static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target, + GLuint id, bool is_arb_asm); + +static nir_shader * +compile_fp64_funcs(struct gl_context *ctx, + const nir_shader_compiler_options *options, + void *mem_ctx, + gl_shader_stage stage) +{ + const GLuint name = ~0; + struct gl_shader *sh; + + sh = _mesa_new_shader(name, stage); + + sh->Source = float64_source; + sh->CompileStatus = COMPILE_FAILURE; + _mesa_glsl_compile_shader(ctx, sh, false, false, true); + + if (!sh->CompileStatus) { + if (sh->InfoLog) { + _mesa_problem(ctx, + "fp64 software impl compile failed:\n%s\nsource:\n%s\n", + sh->InfoLog, float64_source); + } + } + + struct gl_shader_program *sh_prog; + sh_prog = _mesa_new_shader_program(name); + sh_prog->Label = NULL; + sh_prog->NumShaders = 1; + sh_prog->Shaders = malloc(sizeof(struct gl_shader *)); + sh_prog->Shaders[0] = sh; + + struct gl_linked_shader *linked = rzalloc(NULL, struct gl_linked_shader); + linked->Stage = stage; + linked->Program = + brwNewProgram(ctx, + _mesa_shader_stage_to_program(stage), + name, false); + + linked->ir = sh->ir; + sh_prog->_LinkedShaders[stage] = linked; + + nir_shader *nir = glsl_to_nir(sh_prog, stage, options); + + return nir_shader_clone(mem_ctx, nir); +} + nir_shader * brw_create_nir(struct brw_context *brw, const struct gl_shader_program *shader_prog, @@ -101,6 +154,15 @@ } nir_validate_shader(nir, "before brw_preprocess_nir"); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + if (!devinfo->has_64bit_types && nir->info.uses_64bit) { + nir_shader *fp64 = compile_fp64_funcs(ctx, options, ralloc_parent(nir), stage); + + nir_validate_shader(fp64, "fp64"); + exec_list_append(&nir->functions, &fp64->functions); + } + nir = brw_preprocess_nir(brw->screen->compiler, nir); NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo); @@ -407,7 +469,7 @@ * and we wish to view that there are 4 subslices per slice * instead of the actual number of subslices per slice. */ - if (devinfo->gen >= 9) + if (devinfo->gen >= 9 && devinfo->gen < 11) subslices = 4 * brw->screen->devinfo.num_slices; unsigned scratch_ids_per_subslice; @@ -835,7 +897,10 @@ stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset; next_binding_table_offset += num_textures; - /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */ + /* Set the binding table size. Some callers may append new entries + * and increase this accordingly. + */ + stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4; assert(next_binding_table_offset <= BRW_MAX_SURFACES); return next_binding_table_offset; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_state.h mesa-19.0.1/src/mesa/drivers/dri/i965/brw_state.h --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_state.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_state.h 2019-03-31 23:16:37.000000000 +0000 @@ -128,7 +128,7 @@ void brw_disk_cache_write_render_programs(struct brw_context *brw); /*********************************************************************** - * brw_state.c + * brw_state_upload.c */ void brw_upload_render_state(struct brw_context *brw); void brw_render_state_finished(struct brw_context *brw); @@ -138,6 +138,7 @@ void brw_destroy_state(struct brw_context *brw); void brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline); +void brw_enable_obj_preemption(struct brw_context *brw, bool enable); static inline void brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_state_upload.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_state_upload.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_state_upload.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_state_upload.c 2019-03-31 23:16:37.000000000 +0000 @@ -45,6 +45,28 @@ #include "brw_cs.h" #include "main/framebuffer.h" +void +brw_enable_obj_preemption(struct brw_context *brw, bool enable) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + assert(devinfo->gen >= 9); + + if (enable == brw->object_preemption) + return; + + /* A fixed function pipe flush is required before modifying this field */ + brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH); + + bool replay_mode = enable ? + GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER; + + /* enable object level preemption */ + brw_load_register_imm32(brw, CS_CHICKEN1, + replay_mode | GEN9_REPLAY_MODE_MASK); + + brw->object_preemption = enable; +} + static void brw_upload_initial_gpu_state(struct brw_context *brw) { @@ -79,6 +101,13 @@ brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7, TEXEL_OFFSET_FIX_MASK | TEXEL_OFFSET_FIX_ENABLE); + + /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set + * in L3CNTLREG register. The default setting of the bit is not the + * desirable behavior. + */ + brw_load_register_imm32(brw, GEN8_L3CNTLREG, + GEN8_L3CNTLREG_EDBC_NO_HANG); } if (devinfo->gen == 10 || devinfo->gen == 11) { @@ -153,6 +182,11 @@ ADVANCE_BATCH(); } } + + brw->object_preemption = false; + + if (devinfo->gen >= 10) + brw_enable_obj_preemption(brw, true); } static inline const struct brw_tracked_state * diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_surface_formats.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_surface_formats.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_surface_formats.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_surface_formats.c 2019-03-31 23:16:37.000000000 +0000 @@ -67,6 +67,7 @@ [MESA_FORMAT_B8G8R8A8_SRGB] = ISL_FORMAT_B8G8R8A8_UNORM_SRGB, [MESA_FORMAT_R8G8B8A8_SRGB] = ISL_FORMAT_R8G8B8A8_UNORM_SRGB, [MESA_FORMAT_B8G8R8X8_SRGB] = ISL_FORMAT_B8G8R8X8_UNORM_SRGB, + [MESA_FORMAT_R_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB, [MESA_FORMAT_L_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB, [MESA_FORMAT_L8A8_SRGB] = ISL_FORMAT_L8A8_UNORM_SRGB, [MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB, diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_tcs.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_tcs.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_tcs.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_tcs.c 2019-03-31 23:16:37.000000000 +0000 @@ -84,7 +84,7 @@ void *mem_ctx = ralloc_context(NULL); if (tcp) { - nir = tcp->program.nir; + nir = nir_shader_clone(mem_ctx, tcp->program.nir); } else { const nir_shader_compiler_options *options = ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions; @@ -100,7 +100,7 @@ brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_CTRL]); - brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir, NULL, + brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data.base.base.ubo_ranges); } else { /* Upload the Patch URB Header as the first two uniforms. diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_tes.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_tes.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_tes.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_tes.c 2019-03-31 23:16:37.000000000 +0000 @@ -70,7 +70,6 @@ const struct brw_compiler *compiler = brw->screen->compiler; const struct gen_device_info *devinfo = &brw->screen->devinfo; struct brw_stage_state *stage_state = &brw->tes.base; - nir_shader *nir = tep->program.nir; struct brw_tes_prog_data prog_data; bool start_busy = false; double start_time = 0; @@ -79,13 +78,15 @@ void *mem_ctx = ralloc_context(NULL); + nir_shader *nir = nir_shader_clone(mem_ctx, tep->program.nir); + brw_assign_common_binding_table_offsets(devinfo, &tep->program, &prog_data.base.base, 0); brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tep->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); - brw_nir_analyze_ubo_ranges(compiler, tep->program.nir, NULL, + brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data.base.base.ubo_ranges); int st_index = -1; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_vs.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_vs.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_vs.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_vs.c 2019-03-31 23:16:37.000000000 +0000 @@ -174,26 +174,28 @@ mem_ctx = ralloc_context(NULL); + nir_shader *nir = nir_shader_clone(mem_ctx, vp->program.nir); + brw_assign_common_binding_table_offsets(devinfo, &vp->program, &prog_data.base.base, 0); if (!vp->program.is_arb_asm) { - brw_nir_setup_glsl_uniforms(mem_ctx, vp->program.nir, &vp->program, + brw_nir_setup_glsl_uniforms(mem_ctx, nir, &vp->program, &prog_data.base.base, compiler->scalar_stage[MESA_SHADER_VERTEX]); - brw_nir_analyze_ubo_ranges(compiler, vp->program.nir, key, + brw_nir_analyze_ubo_ranges(compiler, nir, key, prog_data.base.base.ubo_ranges); } else { - brw_nir_setup_arb_uniforms(mem_ctx, vp->program.nir, &vp->program, + brw_nir_setup_arb_uniforms(mem_ctx, nir, &vp->program, &prog_data.base.base); } uint64_t outputs_written = - brw_vs_outputs_written(brw, key, vp->program.nir->info.outputs_written); + brw_vs_outputs_written(brw, key, nir->info.outputs_written); brw_compute_vue_map(devinfo, &prog_data.base.vue_map, outputs_written, - vp->program.nir->info.separate_shader); + nir->info.separate_shader); if (0) { _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true); @@ -220,8 +222,7 @@ */ char *error_str; program = brw_compile_vs(compiler, brw, mem_ctx, key, &prog_data, - vp->program.nir, - st_index, &error_str); + nir, st_index, &error_str); if (program == NULL) { if (!vp->program.is_arb_asm) { vp->program.sh.data->LinkStatus = LINKING_FAILURE; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_wm.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_wm.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_wm.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_wm.c 2019-03-31 23:16:37.000000000 +0000 @@ -63,6 +63,9 @@ next_binding_table_offset; next_binding_table_offset += key->nr_color_regions; } + + /* Update the binding table size */ + prog_data->base.binding_table.size_bytes = next_binding_table_offset * 4; } static void @@ -139,6 +142,8 @@ bool start_busy = false; double start_time = 0; + nir_shader *nir = nir_shader_clone(mem_ctx, fp->program.nir); + memset(&prog_data, 0, sizeof(prog_data)); /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ @@ -148,13 +153,12 @@ assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data); if (!fp->program.is_arb_asm) { - brw_nir_setup_glsl_uniforms(mem_ctx, fp->program.nir, &fp->program, + brw_nir_setup_glsl_uniforms(mem_ctx, nir, &fp->program, &prog_data.base, true); - brw_nir_analyze_ubo_ranges(brw->screen->compiler, fp->program.nir, + brw_nir_analyze_ubo_ranges(brw->screen->compiler, nir, NULL, prog_data.base.ubo_ranges); } else { - brw_nir_setup_arb_uniforms(mem_ctx, fp->program.nir, &fp->program, - &prog_data.base); + brw_nir_setup_arb_uniforms(mem_ctx, nir, &fp->program, &prog_data.base); if (unlikely(INTEL_DEBUG & DEBUG_WM)) brw_dump_arb_asm("fragment", &fp->program); @@ -178,7 +182,7 @@ char *error_str = NULL; program = brw_compile_fs(brw->screen->compiler, brw, mem_ctx, - key, &prog_data, fp->program.nir, + key, &prog_data, nir, &fp->program, st_index8, st_index16, st_index32, true, false, vue_map, &error_str); @@ -263,6 +267,9 @@ found |= key_debug(brw, "xy_uxvx image bound", old_key->xy_uxvx_image_mask, key->xy_uxvx_image_mask); + found |= key_debug(brw, "ayuv image bound", + old_key->ayuv_image_mask, + key->ayuv_image_mask); for (unsigned int i = 0; i < MAX_SAMPLERS; i++) { @@ -412,6 +419,9 @@ case __DRI_IMAGE_COMPONENTS_Y_UXVX: key->xy_uxvx_image_mask |= 1 << s; break; + case __DRI_IMAGE_COMPONENTS_AYUV: + key->ayuv_image_mask |= 1 << s; + break; default: break; } diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/brw_wm_surface_state.c mesa-19.0.1/src/mesa/drivers/dri/i965/brw_wm_surface_state.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -156,7 +156,7 @@ struct isl_surf *aux_surf = NULL; uint64_t aux_offset = 0; struct brw_bo *clear_bo = NULL; - uint32_t clear_offset = 0; + uint64_t clear_offset = 0; if (aux_usage != ISL_AUX_USAGE_NONE) { aux_surf = &mt->aux_buf->surf; @@ -420,6 +420,14 @@ } break; case GL_RED: + if (img->TexFormat == MESA_FORMAT_R_SRGB8) { + swizzles[0] = SWIZZLE_X; + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_ONE; + break; + } + /* fallthrough */ case GL_RG: case GL_RGB: if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 || diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/gen7_l3_state.c mesa-19.0.1/src/mesa/drivers/dri/i965/gen7_l3_state.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/gen7_l3_state.c 2018-04-03 17:32:26.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/gen7_l3_state.c 2019-03-31 23:16:37.000000000 +0000 @@ -119,6 +119,7 @@ assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]); const unsigned imm_data = ((has_slm ? GEN8_L3CNTLREG_SLM_ENABLE : 0) | + (devinfo->gen == 11 ? GEN11_L3CNTLREG_USE_FULL_WAYS : 0) | SET_FIELD(cfg->n[GEN_L3P_URB], GEN8_L3CNTLREG_URB_ALLOC) | SET_FIELD(cfg->n[GEN_L3P_RO], GEN8_L3CNTLREG_RO_ALLOC) | SET_FIELD(cfg->n[GEN_L3P_DC], GEN8_L3CNTLREG_DC_ALLOC) | diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/genX_blorp_exec.c mesa-19.0.1/src/mesa/drivers/dri/i965/genX_blorp_exec.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/genX_blorp_exec.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/genX_blorp_exec.c 2019-03-31 23:16:37.000000000 +0000 @@ -94,6 +94,14 @@ #endif } +static uint64_t +blorp_get_surface_address(struct blorp_batch *blorp_batch, + struct blorp_address address) +{ + /* We'll let blorp_surface_reloc write the address. */ + return 0ull; +} + #if GEN_GEN >= 7 && GEN_GEN < 10 static struct blorp_address blorp_get_surface_base_address(struct blorp_batch *batch) @@ -197,7 +205,7 @@ const struct blorp_address *addrs, unsigned num_vbs) { -#if GEN_GEN >= 8 +#if GEN_GEN >= 8 && GEN_GEN < 11 struct brw_context *brw = batch->driver_batch; bool need_invalidate = false; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/genX_state_upload.c mesa-19.0.1/src/mesa/drivers/dri/i965/genX_state_upload.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/genX_state_upload.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/genX_state_upload.c 2019-03-31 23:16:37.000000000 +0000 @@ -197,6 +197,37 @@ _brw_cmd_pack(cmd)(brw, (void *)_dst, &name), \ _dst = NULL) +#if GEN_GEN >= 7 +MAYBE_UNUSED static void +emit_lrm(struct brw_context *brw, uint32_t reg, struct brw_address addr) +{ + brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_MEM), lrm) { + lrm.RegisterAddress = reg; + lrm.MemoryAddress = addr; + } +} +#endif + +MAYBE_UNUSED static void +emit_lri(struct brw_context *brw, uint32_t reg, uint32_t imm) +{ + brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = reg; + lri.DataDWord = imm; + } +} + +#if GEN_IS_HASWELL || GEN_GEN >= 8 +MAYBE_UNUSED static void +emit_lrr(struct brw_context *brw, uint32_t dst, uint32_t src) +{ + brw_batch_emit(brw, GENX(MI_LOAD_REGISTER_REG), lrr) { + lrr.SourceRegisterAddress = src; + lrr.DestinationRegisterAddress = dst; + } +} +#endif + /** * Polygon stipple packet */ @@ -363,15 +394,15 @@ #endif #if GEN_GEN == 11 - .VertexBufferMOCS = ICL_MOCS_WB, + .MOCS = ICL_MOCS_WB, #elif GEN_GEN == 10 - .VertexBufferMOCS = CNL_MOCS_WB, + .MOCS = CNL_MOCS_WB, #elif GEN_GEN == 9 - .VertexBufferMOCS = SKL_MOCS_WB, + .MOCS = SKL_MOCS_WB, #elif GEN_GEN == 8 - .VertexBufferMOCS = BDW_MOCS_WB, + .MOCS = BDW_MOCS_WB, #elif GEN_GEN == 7 - .VertexBufferMOCS = GEN7_MOCS_L3, + .MOCS = GEN7_MOCS_L3, #endif }; @@ -499,11 +530,13 @@ * In the relocation world, we have no idea what the addresses will be, so * we can't apply this workaround. Instead, we tell the kernel to move it * to the low 4GB regardless. + * + * This HW issue is gone on Gen11+. */ static void vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw) { -#if GEN_GEN >= 8 +#if GEN_GEN >= 8 && GEN_GEN < 11 bool need_invalidate = false; for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { @@ -633,7 +666,7 @@ for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_array_attributes *glattrib = input->glattrib; - uint32_t format = brw_get_vertex_surface_type(brw, glattrib); + uint32_t format = brw_get_vertex_surface_type(brw, &glattrib->Format); if (uploads_needed(format, input->is_dual_slot) > 1) nr_elements++; @@ -726,7 +759,7 @@ for (i = 0; i < brw->vb.nr_enabled; i++) { const struct brw_vertex_element *input = brw->vb.enabled[i]; const struct gl_array_attributes *glattrib = input->glattrib; - uint32_t format = brw_get_vertex_surface_type(brw, glattrib); + uint32_t format = brw_get_vertex_surface_type(brw, &glattrib->Format); uint32_t comp0 = VFCOMP_STORE_SRC; uint32_t comp1 = VFCOMP_STORE_SRC; uint32_t comp2 = VFCOMP_STORE_SRC; @@ -769,16 +802,16 @@ const struct gl_array_attributes *glattrib = input->glattrib; const int size = (GEN_GEN < 8 && is_passthru_format(format)) ? - upload_format_size(upload_format) : glattrib->Size; + upload_format_size(upload_format) : glattrib->Format.Size; switch (size) { case 0: comp0 = VFCOMP_STORE_0; case 1: comp1 = VFCOMP_STORE_0; case 2: comp2 = VFCOMP_STORE_0; case 3: - if (GEN_GEN >= 8 && glattrib->Doubles) { + if (GEN_GEN >= 8 && glattrib->Format.Doubles) { comp3 = VFCOMP_STORE_0; - } else if (glattrib->Integer) { + } else if (glattrib->Format.Integer) { comp3 = VFCOMP_STORE_1_INT; } else { comp3 = VFCOMP_STORE_1_FP; @@ -803,7 +836,7 @@ * to be specified as VFCOMP_STORE_0 in order to output a 256-bit * vertex element." */ - if (glattrib->Doubles && !input->is_dual_slot) { + if (glattrib->Format.Doubles && !input->is_dual_slot) { /* Store vertex elements which correspond to double and dvec2 vertex * shader inputs as 128-bit vertex elements, instead of 256-bits. */ @@ -890,7 +923,7 @@ #if GEN_GEN >= 6 if (gen6_edgeflag_input) { const struct gl_array_attributes *glattrib = gen6_edgeflag_input->glattrib; - const uint32_t format = brw_get_vertex_surface_type(brw, glattrib); + const uint32_t format = brw_get_vertex_surface_type(brw, &glattrib->Format); struct GENX(VERTEX_ELEMENT_STATE) elem_state = { .Valid = true, @@ -965,7 +998,8 @@ brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) { #if GEN_GEN < 8 && !GEN_IS_HASWELL - ib.CutIndexEnable = brw->prim_restart.enable_cut_index; + assert(brw->ib.enable_cut_index == brw->prim_restart.enable_cut_index); + ib.CutIndexEnable = brw->ib.enable_cut_index; #endif ib.IndexFormat = brw_get_index_type(index_buffer->index_size); @@ -978,7 +1012,7 @@ */ ib.BufferStartingAddress = ro_32_bo(brw->ib.bo, 0); #if GEN_GEN >= 8 - ib.IndexBufferMOCS = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; + ib.MOCS = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; ib.BufferSize = brw->ib.size; #else ib.BufferEndingAddress = ro_bo(brw->ib.bo, brw->ib.size - 1); @@ -2017,7 +2051,8 @@ if (wm_prog_data->base.use_alt_mode) wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate; - wm.SamplerCount = GEN_GEN == 5 ? + /* WA_1606682166 */ + wm.SamplerCount = (GEN_GEN == 5 || GEN_GEN == 11) ? 0 : DIV_ROUND_UP(stage_state->sampler_count, 4); wm.BindingTableEntryCount = @@ -2179,7 +2214,10 @@ #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \ pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \ + /* WA_1606682166 */ \ pkt.SamplerCount = \ + GEN_GEN == 11 ? \ + 0 : \ DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to \ * disable prefetching of binding tables in A0 and B0 steppings. \ @@ -2408,7 +2446,7 @@ bbox[0] = MAX2(ctx->ViewportArray[i].X, 0); bbox[1] = MIN2(bbox[0] + ctx->ViewportArray[i].Width, fb_width); - bbox[2] = MAX2(ctx->ViewportArray[i].Y, 0); + bbox[2] = CLAMP(ctx->ViewportArray[i].Y, 0, fb_height); bbox[3] = MIN2(bbox[2] + ctx->ViewportArray[i].Height, fb_height); _mesa_intersect_scissor_bounding_box(ctx, i, bbox); @@ -3856,7 +3894,7 @@ sob.SOBufferEnable = true; sob.StreamOffsetWriteEnable = true; sob.StreamOutputBufferOffsetAddressEnable = true; - sob.SOBufferMOCS = mocs_wb; + sob.MOCS = mocs_wb; sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1; sob.StreamOutputBufferOffsetAddress = @@ -4010,8 +4048,13 @@ */ ps.VectorMaskEnable = GEN_GEN >= 8; - ps.SamplerCount = - DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); + /* WA_1606682166: + * "Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes. + * Disable the Sampler state prefetch functionality in the SARB by + * programming 0xB000[30] to '1'." + */ + ps.SamplerCount = GEN_GEN == 11 ? + 0 : DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); /* BRW_NEW_FS_PROG_DATA */ /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to disable @@ -4552,6 +4595,107 @@ .emit = genX(upload_cs_state) }; +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 + +static void +prepare_indirect_gpgpu_walker(struct brw_context *brw) +{ + GLintptr indirect_offset = brw->compute.num_work_groups_offset; + struct brw_bo *bo = brw->compute.num_work_groups_bo; + + emit_lrm(brw, GPGPU_DISPATCHDIMX, ro_bo(bo, indirect_offset + 0)); + emit_lrm(brw, GPGPU_DISPATCHDIMY, ro_bo(bo, indirect_offset + 4)); + emit_lrm(brw, GPGPU_DISPATCHDIMZ, ro_bo(bo, indirect_offset + 8)); + +#if GEN_GEN <= 7 + /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ + emit_lri(brw, MI_PREDICATE_SRC0 + 4, 0); + emit_lri(brw, MI_PREDICATE_SRC1 , 0); + emit_lri(brw, MI_PREDICATE_SRC1 + 4, 0); + + /* Load compute_dispatch_indirect_x_size into SRC0 */ + emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 0)); + + /* predicate = (compute_dispatch_indirect_x_size == 0); */ + brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_SET; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + + /* Load compute_dispatch_indirect_y_size into SRC0 */ + emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 4)); + + /* predicate |= (compute_dispatch_indirect_y_size == 0); */ + brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_OR; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + + /* Load compute_dispatch_indirect_z_size into SRC0 */ + emit_lrm(brw, MI_PREDICATE_SRC0, ro_bo(bo, indirect_offset + 8)); + + /* predicate |= (compute_dispatch_indirect_z_size == 0); */ + brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOAD; + mip.CombineOperation = COMBINE_OR; + mip.CompareOperation = COMPARE_SRCS_EQUAL; + } + + /* predicate = !predicate; */ +#define COMPARE_FALSE 1 + brw_batch_emit(brw, GENX(MI_PREDICATE), mip) { + mip.LoadOperation = LOAD_LOADINV; + mip.CombineOperation = COMBINE_OR; + mip.CompareOperation = COMPARE_FALSE; + } +#endif +} + +static void +genX(emit_gpgpu_walker)(struct brw_context *brw) +{ + const struct brw_cs_prog_data *prog_data = + brw_cs_prog_data(brw->cs.base.prog_data); + + const GLuint *num_groups = brw->compute.num_work_groups; + + bool indirect = brw->compute.num_work_groups_bo != NULL; + if (indirect) + prepare_indirect_gpgpu_walker(brw); + + const unsigned simd_size = prog_data->simd_size; + unsigned group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + + uint32_t right_mask = 0xffffffffu >> (32 - simd_size); + const unsigned right_non_aligned = group_size & (simd_size - 1); + if (right_non_aligned != 0) + right_mask >>= (simd_size - right_non_aligned); + + brw_batch_emit(brw, GENX(GPGPU_WALKER), ggw) { + ggw.IndirectParameterEnable = indirect; + ggw.PredicateEnable = GEN_GEN <= 7 && indirect; + ggw.SIMDSize = prog_data->simd_size / 16; + ggw.ThreadDepthCounterMaximum = 0; + ggw.ThreadHeightCounterMaximum = 0; + ggw.ThreadWidthCounterMaximum = prog_data->threads - 1; + ggw.ThreadGroupIDXDimension = num_groups[0]; + ggw.ThreadGroupIDYDimension = num_groups[1]; + ggw.ThreadGroupIDZDimension = num_groups[2]; + ggw.RightExecutionMask = right_mask; + ggw.BottomExecutionMask = 0xffffffff; + } + + brw_batch_emit(brw, GENX(MEDIA_STATE_FLUSH), msf); +} + #endif /* ---------------------------------------------------------------------- */ @@ -5945,5 +6089,6 @@ compute_atoms, ARRAY_SIZE(compute_atoms)); brw->vtbl.emit_mi_report_perf_count = genX(emit_mi_report_perf_count); + brw->vtbl.emit_compute_walker = genX(emit_gpgpu_walker); #endif } diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/hsw_queryobj.c mesa-19.0.1/src/mesa/drivers/dri/i965/hsw_queryobj.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/hsw_queryobj.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/hsw_queryobj.c 2019-03-31 23:16:37.000000000 +0000 @@ -154,7 +154,7 @@ shr_gpr0_by_2_bits(struct brw_context *brw) { shl_gpr0_by_30_bits(brw); - brw_load_register_reg(brw, HSW_CS_GPR(0) + 4, HSW_CS_GPR(0)); + brw_load_register_reg(brw, HSW_CS_GPR(0), HSW_CS_GPR(0) + 4); brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0); } diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/hsw_sol.c mesa-19.0.1/src/mesa/drivers/dri/i965/hsw_sol.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/hsw_sol.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/hsw_sol.c 2019-03-31 23:16:37.000000000 +0000 @@ -98,7 +98,8 @@ brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo, START_OFFSET + i * sizeof(uint64_t)); /* GPR2 = Ending Snapshot */ - brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2)); + brw_load_register_reg64(brw, HSW_CS_GPR(2), + GEN7_SO_NUM_PRIMS_WRITTEN(i)); BEGIN_BATCH(9); OUT_BATCH(HSW_MI_MATH | (9 - 2)); diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_batchbuffer.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_batchbuffer.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_batchbuffer.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_batchbuffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -732,10 +732,10 @@ /* Update brw_bo::gtt_offset */ if (batch->validation_list[i].offset != bo->gtt_offset) { - assert(!(bo->kflags & EXEC_OBJECT_PINNED)); DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%llx\n", bo->gem_handle, bo->gtt_offset, batch->validation_list[i].offset); + assert(!(bo->kflags & EXEC_OBJECT_PINNED)); bo->gtt_offset = batch->validation_list[i].offset; } } @@ -1218,7 +1218,7 @@ * Copies a 32-bit register. */ void -brw_load_register_reg(struct brw_context *brw, uint32_t src, uint32_t dest) +brw_load_register_reg(struct brw_context *brw, uint32_t dest, uint32_t src) { assert(brw->screen->devinfo.gen >= 8 || brw->screen->devinfo.is_haswell); @@ -1233,7 +1233,7 @@ * Copies a 64-bit register. */ void -brw_load_register_reg64(struct brw_context *brw, uint32_t src, uint32_t dest) +brw_load_register_reg64(struct brw_context *brw, uint32_t dest, uint32_t src) { assert(brw->screen->devinfo.gen >= 8 || brw->screen->devinfo.is_haswell); diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_buffer_objects.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_buffer_objects.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_buffer_objects.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_buffer_objects.c 2019-03-31 23:16:37.000000000 +0000 @@ -40,7 +40,6 @@ #include "brw_blorp.h" #include "intel_buffer_objects.h" #include "intel_batchbuffer.h" -#include "intel_tiled_memcpy.h" static void mark_buffer_gpu_usage(struct intel_buffer_object *intel_obj, @@ -320,6 +319,8 @@ mark_buffer_valid_data(intel_obj, offset, size); } +/* Typedef for memcpy function (used in brw_get_buffer_subdata below). */ +typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n); /** * The GetBufferSubData() driver hook. diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_extensions.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_extensions.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_extensions.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_extensions.c 2019-03-31 23:16:37.000000000 +0000 @@ -104,6 +104,7 @@ ctx->Extensions.EXT_point_parameters = true; ctx->Extensions.EXT_provoking_vertex = true; ctx->Extensions.EXT_render_snorm = true; + ctx->Extensions.EXT_sRGB = true; ctx->Extensions.EXT_stencil_two_side = true; ctx->Extensions.EXT_texture_array = true; ctx->Extensions.EXT_texture_env_dot3 = true; @@ -113,6 +114,7 @@ ctx->Extensions.EXT_texture_snorm = true; ctx->Extensions.EXT_texture_sRGB = true; ctx->Extensions.EXT_texture_sRGB_decode = true; + ctx->Extensions.EXT_texture_sRGB_R8 = true; ctx->Extensions.EXT_texture_swizzle = true; ctx->Extensions.EXT_texture_type_2_10_10_10_REV = true; ctx->Extensions.EXT_vertex_array_bgra = true; @@ -180,14 +182,16 @@ ctx->Extensions.ARB_conditional_render_inverted = true; ctx->Extensions.ARB_cull_distance = true; ctx->Extensions.ARB_draw_buffers_blend = true; - if (ctx->API != API_OPENGL_COMPAT) + if (ctx->API != API_OPENGL_COMPAT || + ctx->Const.AllowHigherCompatVersion) ctx->Extensions.ARB_enhanced_layouts = true; ctx->Extensions.ARB_ES3_compatibility = true; ctx->Extensions.ARB_fragment_layer_viewport = true; ctx->Extensions.ARB_pipeline_statistics_query = true; ctx->Extensions.ARB_sample_shading = true; ctx->Extensions.ARB_shading_language_420pack = true; - if (ctx->API != API_OPENGL_COMPAT) { + if (ctx->API != API_OPENGL_COMPAT || + ctx->Const.AllowHigherCompatVersion) { ctx->Extensions.ARB_texture_buffer_object = true; ctx->Extensions.ARB_texture_buffer_object_rgb32 = true; ctx->Extensions.ARB_texture_buffer_range = true; @@ -197,7 +201,8 @@ ctx->Extensions.ARB_texture_multisample = true; ctx->Extensions.ARB_uniform_buffer_object = true; - if (ctx->API != API_OPENGL_COMPAT) + if (ctx->API != API_OPENGL_COMPAT || + ctx->Const.AllowHigherCompatVersion) ctx->Extensions.AMD_vertex_shader_layer = true; ctx->Extensions.EXT_framebuffer_multisample = true; ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true; @@ -226,9 +231,10 @@ ctx->Extensions.ARB_conservative_depth = true; ctx->Extensions.ARB_derivative_control = true; ctx->Extensions.ARB_framebuffer_no_attachments = true; - if (ctx->API != API_OPENGL_COMPAT) { + if (ctx->API != API_OPENGL_COMPAT || + ctx->Const.AllowHigherCompatVersion) { ctx->Extensions.ARB_gpu_shader5 = true; - ctx->Extensions.ARB_gpu_shader_fp64 = devinfo->has_64bit_types; + ctx->Extensions.ARB_gpu_shader_fp64 = true; } ctx->Extensions.ARB_shader_atomic_counters = true; ctx->Extensions.ARB_shader_atomic_counter_ops = true; @@ -237,16 +243,16 @@ ctx->Extensions.ARB_shader_image_size = true; ctx->Extensions.ARB_shader_precision = true; ctx->Extensions.ARB_shader_texture_image_samples = true; - if (ctx->API != API_OPENGL_COMPAT) + if (ctx->API != API_OPENGL_COMPAT || + ctx->Const.AllowHigherCompatVersion) ctx->Extensions.ARB_tessellation_shader = true; ctx->Extensions.ARB_texture_compression_bptc = true; ctx->Extensions.ARB_texture_view = true; ctx->Extensions.ARB_shader_storage_buffer_object = true; - ctx->Extensions.ARB_vertex_attrib_64bit = devinfo->has_64bit_types; + ctx->Extensions.ARB_vertex_attrib_64bit = true; ctx->Extensions.EXT_shader_samples_identical = true; ctx->Extensions.OES_primitive_bounding_box = true; ctx->Extensions.OES_texture_buffer = true; - ctx->Extensions.ARB_fragment_shader_interlock = true; if (can_do_pipelined_register_writes(brw->screen)) { ctx->Extensions.ARB_draw_indirect = true; @@ -294,9 +300,9 @@ } if (devinfo->gen >= 8) { - ctx->Extensions.ARB_gpu_shader_int64 = devinfo->has_64bit_types; + ctx->Extensions.ARB_gpu_shader_int64 = true; /* requires ARB_gpu_shader_int64 */ - ctx->Extensions.ARB_shader_ballot = devinfo->has_64bit_types; + ctx->Extensions.ARB_shader_ballot = true; ctx->Extensions.ARB_ES3_2_compatibility = true; } @@ -311,6 +317,30 @@ ctx->Extensions.KHR_blend_equation_advanced_coherent = true; ctx->Extensions.KHR_texture_compression_astc_ldr = true; ctx->Extensions.KHR_texture_compression_astc_sliced_3d = true; + + /* + * From the Skylake PRM Vol. 7 (Memory Fence Message, page 221): + * "A memory fence message issued by a thread causes further messages + * issued by the thread to be blocked until all previous data port + * messages have completed, or the results can be globally observed from + * the point of view of other threads in the system." + * + * From the Haswell PRM Vol. 7 (Memory Fence, page 256): + * "A memory fence message issued by a thread causes further messages + * issued by the thread to be blocked until all previous messages issued + * by the thread to that data port (data cache or render cache) have + * been globally observed from the point of view of other threads in the + * system." + * + * Summarized: For ARB_fragment_shader_interlock to work, we need to + * ensure memory access ordering for all messages to the dataport from + * all threads. Memory fence messages prior to SKL only provide memory + * access ordering for messages from the same thread, so we can only + * support the feature from Gen9 onwards. + * + */ + + ctx->Extensions.ARB_fragment_shader_interlock = true; } if (gen_device_info_is_9lp(devinfo)) @@ -319,7 +349,8 @@ if (devinfo->gen >= 6) ctx->Extensions.INTEL_performance_query = true; - if (ctx->API != API_OPENGL_COMPAT) + if (ctx->API != API_OPENGL_COMPAT || + ctx->Const.AllowHigherCompatVersion) ctx->Extensions.ARB_base_instance = true; if (ctx->API != API_OPENGL_CORE) ctx->Extensions.ARB_color_buffer_float = true; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_fbo.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_fbo.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_fbo.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_fbo.c 2019-03-31 23:16:37.000000000 +0000 @@ -629,7 +629,7 @@ } -#define fbo_incomplete(fb, ...) do { \ +#define fbo_incomplete(fb, error_id, ...) do { \ static GLuint msg_id = 0; \ if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) { \ _mesa_gl_debug(ctx, &msg_id, \ @@ -639,7 +639,7 @@ __VA_ARGS__); \ } \ DBG(__VA_ARGS__); \ - fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED; \ + fb->_Status = error_id; \ } while (0) /** @@ -693,7 +693,7 @@ d_depth != s_depth || depthRb->mt_level != stencilRb->mt_level || depthRb->mt_layer != stencilRb->mt_layer) { - fbo_incomplete(fb, + fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, "FBO incomplete: depth and stencil must match in" "width, height, depth, LOD and layer\n"); } @@ -705,7 +705,7 @@ */ if (depthRb->mt_level != stencilRb->mt_level || depthRb->mt_layer != stencilRb->mt_layer) { - fbo_incomplete(fb, + fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, "FBO incomplete: depth image level/layer %d/%d != " "stencil image %d/%d\n", depthRb->mt_level, @@ -715,13 +715,14 @@ } } else { if (!brw->has_separate_stencil) { - fbo_incomplete(fb, "FBO incomplete: separate stencil " - "unsupported\n"); + fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, + "FBO incomplete: separate stencil unsupported\n"); } if (stencil_mt->format != MESA_FORMAT_S_UINT8) { - fbo_incomplete(fb, "FBO incomplete: separate stencil is %s " - "instead of S8\n", - _mesa_get_format_name(stencil_mt->format)); + fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, + "FBO incomplete: separate stencil is %s " + "instead of S8\n", + _mesa_get_format_name(stencil_mt->format)); } if (devinfo->gen < 7 && !intel_renderbuffer_has_hiz(depthRb)) { /* Before Gen7, separate depth and stencil buffers can be used @@ -730,8 +731,8 @@ * [DevSNB]: This field must be set to the same value (enabled * or disabled) as Hierarchical Depth Buffer Enable. */ - fbo_incomplete(fb, "FBO incomplete: separate stencil " - "without HiZ\n"); + fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, + "FBO incomplete: separate stencil without HiZ\n"); } } } @@ -749,29 +750,39 @@ */ rb = fb->Attachment[i].Renderbuffer; if (rb == NULL) { - fbo_incomplete(fb, "FBO incomplete: attachment without " - "renderbuffer\n"); + fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, + "FBO incomplete: attachment without " + "renderbuffer\n"); continue; } if (fb->Attachment[i].Type == GL_TEXTURE) { if (rb->TexImage->Border) { - fbo_incomplete(fb, "FBO incomplete: texture with border\n"); + fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, + "FBO incomplete: texture with border\n"); continue; } } irb = intel_renderbuffer(rb); if (irb == NULL) { - fbo_incomplete(fb, "FBO incomplete: software rendering " - "renderbuffer\n"); + fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, + "FBO incomplete: software rendering renderbuffer\n"); continue; } + if (rb->Format == MESA_FORMAT_R_SRGB8) { + fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT, + "FBO incomplete: Format not color renderable: %s\n", + _mesa_get_format_name(rb->Format)); + continue; + } + if (!brw_render_target_supported(brw, rb)) { - fbo_incomplete(fb, "FBO incomplete: Unsupported HW " - "texture/renderbuffer format attached: %s\n", - _mesa_get_format_name(intel_rb_format(irb))); + fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, + "FBO incomplete: Unsupported HW " + "texture/renderbuffer format attached: %s\n", + _mesa_get_format_name(intel_rb_format(irb))); } } } diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_image.h mesa-19.0.1/src/mesa/drivers/dri/i965/intel_image.h --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_image.h 2018-02-19 19:52:02.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_image.h 2019-03-31 23:16:37.000000000 +0000 @@ -89,9 +89,6 @@ GLuint tile_y; bool has_depthstencil; - /** The image was created with EGL_EXT_image_dma_buf_import. */ - bool dma_buf_imported; - /** Offset of the auxiliary compression surface in the bo. */ uint32_t aux_offset; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_mipmap_tree.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 2019-03-31 23:16:37.000000000 +0000 @@ -31,8 +31,6 @@ #include "intel_image.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" -#include "intel_tiled_memcpy.h" -#include "intel_tiled_memcpy_sse41.h" #include "intel_blit.h" #include "intel_fbo.h" @@ -3126,9 +3124,9 @@ char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); dst += mt->offset; - linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch_B, - map->stride, brw->has_swizzling, mt->surf.tiling, - INTEL_COPY_MEMCPY); + isl_memcpy_linear_to_tiled( + x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch_B, map->stride, + brw->has_swizzling, mt->surf.tiling, ISL_MEMCPY); intel_miptree_unmap_raw(mt); } @@ -3136,6 +3134,66 @@ map->buffer = map->ptr = NULL; } +/** + * Determine which copy function to use for the given format combination + * + * The only two possible copy functions which are ever returned are a + * direct memcpy and a RGBA <-> BGRA copy function. Since RGBA -> BGRA and + * BGRA -> RGBA are exactly the same operation (and memcpy is obviously + * symmetric), it doesn't matter whether the copy is from the tiled image + * to the untiled or vice versa. The copy function required is the same in + * either case so this function can be used. + * + * \param[in] tiledFormat The format of the tiled image + * \param[in] format The GL format of the client data + * \param[in] type The GL type of the client data + * \param[out] mem_copy Will be set to one of either the standard + * library's memcpy or a different copy function + * that performs an RGBA to BGRA conversion + * \param[out] cpp Number of bytes per channel + * + * \return true if the format and type combination are valid + */ +MAYBE_UNUSED isl_memcpy_type +intel_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type, + uint32_t *cpp) +{ + if (type == GL_UNSIGNED_INT_8_8_8_8_REV && + !(format == GL_RGBA || format == GL_BGRA)) + return ISL_MEMCPY_INVALID; /* Invalid type/format combination */ + + if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) || + (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) { + *cpp = 1; + return ISL_MEMCPY; + } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || + (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || + (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || + (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { + *cpp = 4; + if (format == GL_BGRA) { + return ISL_MEMCPY; + } else if (format == GL_RGBA) { + return ISL_MEMCPY_BGRA8; + } + } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || + (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || + (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || + (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { + *cpp = 4; + if (format == GL_BGRA) { + /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can + * use the same function. + */ + return ISL_MEMCPY_BGRA8; + } else if (format == GL_RGBA) { + return ISL_MEMCPY; + } + } + + return ISL_MEMCPY_INVALID; +} + static void intel_miptree_map_tiled_memcpy(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -3162,21 +3220,16 @@ char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); src += mt->offset; - const tiled_to_linear_fn ttl_func = -#if defined(USE_SSE41) - cpu_has_sse4_1 ? tiled_to_linear_sse41 : -#endif - tiled_to_linear; - - const mem_copy_fn_type copy_type = + const isl_memcpy_type copy_type = #if defined(USE_SSE41) - cpu_has_sse4_1 ? INTEL_COPY_STREAMING_LOAD : + cpu_has_sse4_1 ? ISL_MEMCPY_STREAMING_LOAD : #endif - INTEL_COPY_MEMCPY; + ISL_MEMCPY; - ttl_func(x1, x2, y1, y2, map->ptr, src, map->stride, - mt->surf.row_pitch_B, brw->has_swizzling, mt->surf.tiling, - copy_type); + isl_memcpy_tiled_to_linear( + x1, x2, y1, y2, map->ptr, src, map->stride, + mt->surf.row_pitch_B, brw->has_swizzling, mt->surf.tiling, + copy_type); intel_miptree_unmap_raw(mt); } @@ -3865,7 +3918,7 @@ const struct intel_mipmap_tree *mt, enum isl_format view_format, bool sampling, struct brw_bo **clear_color_bo, - uint32_t *clear_color_offset) + uint64_t *clear_color_offset) { assert(mt->aux_buf); diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.h mesa-19.0.1/src/mesa/drivers/dri/i965/intel_mipmap_tree.h --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 2019-03-31 23:16:37.000000000 +0000 @@ -714,7 +714,7 @@ const struct intel_mipmap_tree *mt, enum isl_format view_format, bool sampling, struct brw_bo **clear_color_bo, - uint32_t *clear_color_offset); + uint64_t *clear_color_offset); static inline int @@ -726,6 +726,10 @@ return pitch; } +isl_memcpy_type +intel_miptree_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type, + uint32_t *cpp); + #ifdef __cplusplus } #endif diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_pixel_read.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_pixel_read.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_pixel_read.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_pixel_read.c 2019-03-31 23:16:37.000000000 +0000 @@ -44,7 +44,6 @@ #include "intel_mipmap_tree.h" #include "intel_pixel.h" #include "intel_buffer_objects.h" -#include "intel_tiled_memcpy.h" #define FILE_DEBUG_FLAG DEBUG_PIXEL @@ -87,7 +86,7 @@ struct brw_bo *bo; uint32_t cpp; - mem_copy_fn_type copy_type; + isl_memcpy_type copy_type; /* This fastpath is restricted to specific renderbuffer types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support @@ -125,7 +124,8 @@ if (rb->_BaseFormat == GL_RGB) return false; - if (!intel_get_memcpy_type(rb->Format, format, type, ©_type, &cpp)) + copy_type = intel_miptree_get_memcpy_type(rb->Format, format, type, &cpp); + if (copy_type == ISL_MEMCPY_INVALID) return false; if (!irb->mt || @@ -198,7 +198,7 @@ pack->Alignment, pack->RowLength, pack->SkipPixels, pack->SkipRows); - tiled_to_linear( + isl_memcpy_tiled_to_linear( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, pixels, diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_screen.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_screen.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_screen.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_screen.c 2019-03-31 23:16:37.000000000 +0000 @@ -286,6 +286,9 @@ { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } }, + { __DRI_IMAGE_FOURCC_AYUV, __DRI_IMAGE_COMPONENTS_AYUV, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } }, + /* For YUYV and UYVY buffers, we set up two overlapping DRI images * and treat them as planar buffers in the compositors. * Plane 0 is GR88 and samples YU or YV pairs and places Y into @@ -957,7 +960,6 @@ image->tile_y = orig_image->tile_y; image->has_depthstencil = orig_image->has_depthstencil; image->data = loaderPrivate; - image->dma_buf_imported = orig_image->dma_buf_imported; image->aux_offset = orig_image->aux_offset; image->aux_pitch = orig_image->aux_pitch; @@ -1237,7 +1239,6 @@ return NULL; } - image->dma_buf_imported = true; image->yuv_color_space = yuv_color_space; image->sample_range = sample_range; image->horizontal_siting = horizontal_siting; diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tex_image.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tex_image.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tex_image.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tex_image.c 2019-03-31 23:16:37.000000000 +0000 @@ -23,7 +23,6 @@ #include "intel_tex.h" #include "intel_fbo.h" #include "intel_image.h" -#include "intel_tiled_memcpy.h" #include "brw_context.h" #include "brw_blorp.h" @@ -192,7 +191,7 @@ struct brw_bo *bo; uint32_t cpp; - mem_copy_fn_type copy_type; + isl_memcpy_type copy_type; /* This fastpath is restricted to specific texture types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support @@ -222,8 +221,9 @@ if (ctx->_ImageTransferState) return false; - if (!intel_get_memcpy_type(texImage->TexFormat, format, type, ©_type, - &cpp)) + copy_type = intel_miptree_get_memcpy_type(texImage->TexFormat, format, type, + &cpp); + if (copy_type == ISL_MEMCPY_INVALID) return false; /* If this is a nontrivial texture view, let another path handle it instead. */ @@ -290,7 +290,7 @@ xoffset += level_x; yoffset += level_y; - linear_to_tiled( + isl_memcpy_linear_to_tiled( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, map, @@ -614,16 +614,6 @@ if (image == NULL) return; - /* We support external textures only for EGLImages created with - * EGL_EXT_image_dma_buf_import. We may lift that restriction in the future. - */ - if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetTexture2DOES(external target is enabled only " - "for images created with EGL_EXT_image_dma_buf_import"); - return; - } - /* Disallow depth/stencil textures: we don't have a way to pass the * separate stencil miptree of a GL_DEPTH_STENCIL texture through. */ @@ -695,7 +685,7 @@ struct brw_bo *bo; uint32_t cpp; - mem_copy_fn_type copy_type; + isl_memcpy_type copy_type; /* This fastpath is restricted to specific texture types: * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support @@ -729,8 +719,9 @@ if (texImage->_BaseFormat == GL_RGB) return false; - if (!intel_get_memcpy_type(texImage->TexFormat, format, type, ©_type, - &cpp)) + copy_type = intel_miptree_get_memcpy_type(texImage->TexFormat, format, type, + &cpp); + if (copy_type == ISL_MEMCPY_INVALID) return false; /* If this is a nontrivial texture view, let another path handle it instead. */ @@ -794,7 +785,7 @@ xoffset += level_x; yoffset += level_y; - tiled_to_linear( + isl_memcpy_tiled_to_linear( xoffset * cpp, (xoffset + width) * cpp, yoffset, yoffset + height, pixels, diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,1003 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright 2012 Intel Corporation - * Copyright 2013 Google - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chad Versace - * Frank Henigman - */ - -#include - -#include "util/macros.h" - -#include "brw_context.h" -#include "intel_tiled_memcpy.h" - -#if defined(__SSSE3__) -#include -#elif defined(__SSE2__) -#include -#endif - -#define FILE_DEBUG_FLAG DEBUG_TEXTURE - -#define ALIGN_DOWN(a, b) ROUND_DOWN_TO(a, b) -#define ALIGN_UP(a, b) ALIGN(a, b) - -/* Tile dimensions. Width and span are in bytes, height is in pixels (i.e. - * unitless). A "span" is the most number of bytes we can copy from linear - * to tiled without needing to calculate a new destination address. - */ -static const uint32_t xtile_width = 512; -static const uint32_t xtile_height = 8; -static const uint32_t xtile_span = 64; -static const uint32_t ytile_width = 128; -static const uint32_t ytile_height = 32; -static const uint32_t ytile_span = 16; - -static inline uint32_t -ror(uint32_t n, uint32_t d) -{ - return (n >> d) | (n << (32 - d)); -} - -static inline uint32_t -bswap32(uint32_t n) -{ -#if defined(HAVE___BUILTIN_BSWAP32) - return __builtin_bswap32(n); -#else - return (n >> 24) | - ((n >> 8) & 0x0000ff00) | - ((n << 8) & 0x00ff0000) | - (n << 24); -#endif -} - -/** - * Copy RGBA to BGRA - swap R and B. - */ -static inline void * -rgba8_copy(void *dst, const void *src, size_t bytes) -{ - uint32_t *d = dst; - uint32_t const *s = src; - - assert(bytes % 4 == 0); - - while (bytes >= 4) { - *d = ror(bswap32(*s), 8); - d += 1; - s += 1; - bytes -= 4; - } - return dst; -} - -#ifdef __SSSE3__ -static const uint8_t rgba8_permutation[16] = - { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 }; - -static inline void -rgba8_copy_16_aligned_dst(void *dst, const void *src) -{ - _mm_store_si128(dst, - _mm_shuffle_epi8(_mm_loadu_si128(src), - *(__m128i *)rgba8_permutation)); -} - -static inline void -rgba8_copy_16_aligned_src(void *dst, const void *src) -{ - _mm_storeu_si128(dst, - _mm_shuffle_epi8(_mm_load_si128(src), - *(__m128i *)rgba8_permutation)); -} - -#elif defined(__SSE2__) -static inline void -rgba8_copy_16_aligned_dst(void *dst, const void *src) -{ - __m128i srcreg, dstreg, agmask, ag, rb, br; - - agmask = _mm_set1_epi32(0xFF00FF00); - srcreg = _mm_loadu_si128((__m128i *)src); - - rb = _mm_andnot_si128(agmask, srcreg); - ag = _mm_and_si128(agmask, srcreg); - br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)), - _MM_SHUFFLE(2, 3, 0, 1)); - dstreg = _mm_or_si128(ag, br); - - _mm_store_si128((__m128i *)dst, dstreg); -} - -static inline void -rgba8_copy_16_aligned_src(void *dst, const void *src) -{ - __m128i srcreg, dstreg, agmask, ag, rb, br; - - agmask = _mm_set1_epi32(0xFF00FF00); - srcreg = _mm_load_si128((__m128i *)src); - - rb = _mm_andnot_si128(agmask, srcreg); - ag = _mm_and_si128(agmask, srcreg); - br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)), - _MM_SHUFFLE(2, 3, 0, 1)); - dstreg = _mm_or_si128(ag, br); - - _mm_storeu_si128((__m128i *)dst, dstreg); -} -#endif - -/** - * Copy RGBA to BGRA - swap R and B, with the destination 16-byte aligned. - */ -static inline void * -rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes) -{ - assert(bytes == 0 || !(((uintptr_t)dst) & 0xf)); - -#if defined(__SSSE3__) || defined(__SSE2__) - if (bytes == 64) { - rgba8_copy_16_aligned_dst(dst + 0, src + 0); - rgba8_copy_16_aligned_dst(dst + 16, src + 16); - rgba8_copy_16_aligned_dst(dst + 32, src + 32); - rgba8_copy_16_aligned_dst(dst + 48, src + 48); - return dst; - } - - while (bytes >= 16) { - rgba8_copy_16_aligned_dst(dst, src); - src += 16; - dst += 16; - bytes -= 16; - } -#endif - - rgba8_copy(dst, src, bytes); - - return dst; -} - -/** - * Copy RGBA to BGRA - swap R and B, with the source 16-byte aligned. - */ -static inline void * -rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes) -{ - assert(bytes == 0 || !(((uintptr_t)src) & 0xf)); - -#if defined(__SSSE3__) || defined(__SSE2__) - if (bytes == 64) { - rgba8_copy_16_aligned_src(dst + 0, src + 0); - rgba8_copy_16_aligned_src(dst + 16, src + 16); - rgba8_copy_16_aligned_src(dst + 32, src + 32); - rgba8_copy_16_aligned_src(dst + 48, src + 48); - return dst; - } - - while (bytes >= 16) { - rgba8_copy_16_aligned_src(dst, src); - src += 16; - dst += 16; - bytes -= 16; - } -#endif - - rgba8_copy(dst, src, bytes); - - return dst; -} - -/** - * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3). - * These ranges are in bytes, i.e. pixels * bytes-per-pixel. - * The first and last ranges must be shorter than a "span" (the longest linear - * stretch within a tile) and the middle must equal a whole number of spans. - * Ranges may be empty. The region copied must land entirely within one tile. - * 'dst' is the start of the tile and 'src' is the corresponding - * address to copy from, though copying begins at (x0, y0). - * To enable swizzling 'swizzle_bit' must be 1<<6, otherwise zero. - * Swizzling flips bit 6 in the copy destination offset, when certain other - * bits are set in it. - */ -typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, - uint32_t y0, uint32_t y1, - char *dst, const char *src, - int32_t linear_pitch, - uint32_t swizzle_bit, - mem_copy_fn_type copy_type); - -/** - * Copy texture data from linear to X tile layout. - * - * \copydoc tile_copy_fn - * - * The mem_copy parameters allow the user to specify an alternative mem_copy - * function that, for instance, may do RGBA -> BGRA swizzling. The first - * function must handle any memory alignment while the second function must - * only handle 16-byte alignment in whichever side (source or destination) is - * tiled. - */ -static inline void -linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, - uint32_t y0, uint32_t y1, - char *dst, const char *src, - int32_t src_pitch, - uint32_t swizzle_bit, - mem_copy_fn mem_copy, - mem_copy_fn mem_copy_align16) -{ - /* The copy destination offset for each range copied is the sum of - * an X offset 'x0' or 'xo' and a Y offset 'yo.' - */ - uint32_t xo, yo; - - src += (ptrdiff_t)y0 * src_pitch; - - for (yo = y0 * xtile_width; yo < y1 * xtile_width; yo += xtile_width) { - /* Bits 9 and 10 of the copy destination offset control swizzling. - * Only 'yo' contributes to those bits in the total offset, - * so calculate 'swizzle' just once per row. - * Move bits 9 and 10 three and four places respectively down - * to bit 6 and xor them. - */ - uint32_t swizzle = ((yo >> 3) ^ (yo >> 4)) & swizzle_bit; - - mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0); - - for (xo = x1; xo < x2; xo += xtile_span) { - mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span); - } - - mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); - - src += src_pitch; - } -} - -/** - * Copy texture data from linear to Y tile layout. - * - * \copydoc tile_copy_fn - */ -static inline void -linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, - uint32_t y0, uint32_t y3, - char *dst, const char *src, - int32_t src_pitch, - uint32_t swizzle_bit, - mem_copy_fn mem_copy, - mem_copy_fn mem_copy_align16) -{ - /* Y tiles consist of columns that are 'ytile_span' wide (and the same height - * as the tile). Thus the destination offset for (x,y) is the sum of: - * (x % column_width) // position within column - * (x / column_width) * bytes_per_column // column number * bytes per column - * y * column_width - * - * The copy destination offset for each range copied is the sum of - * an X offset 'xo0' or 'xo' and a Y offset 'yo.' - */ - const uint32_t column_width = ytile_span; - const uint32_t bytes_per_column = column_width * ytile_height; - - uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 4)); - uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 4)); - - uint32_t xo0 = (x0 % ytile_span) + (x0 / ytile_span) * bytes_per_column; - uint32_t xo1 = (x1 % ytile_span) + (x1 / ytile_span) * bytes_per_column; - - /* Bit 9 of the destination offset control swizzling. - * Only the X offset contributes to bit 9 of the total offset, - * so swizzle can be calculated in advance for these X positions. - * Move bit 9 three places down to bit 6. - */ - uint32_t swizzle0 = (xo0 >> 3) & swizzle_bit; - uint32_t swizzle1 = (xo1 >> 3) & swizzle_bit; - - uint32_t x, yo; - - src += (ptrdiff_t)y0 * src_pitch; - - if (y0 != y1) { - for (yo = y0 * column_width; yo < y1 * column_width; yo += column_width) { - uint32_t xo = xo1; - uint32_t swizzle = swizzle1; - - mem_copy(dst + ((xo0 + yo) ^ swizzle0), src + x0, x1 - x0); - - /* Step by spans/columns. As it happens, the swizzle bit flips - * at each step so we don't need to calculate it explicitly. - */ - for (x = x1; x < x2; x += ytile_span) { - mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span); - xo += bytes_per_column; - swizzle ^= swizzle_bit; - } - - mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); - - src += src_pitch; - } - } - - for (yo = y1 * column_width; yo < y2 * column_width; yo += 4 * column_width) { - uint32_t xo = xo1; - uint32_t swizzle = swizzle1; - - if (x0 != x1) { - mem_copy(dst + ((xo0 + yo + 0 * column_width) ^ swizzle0), src + x0 + 0 * src_pitch, x1 - x0); - mem_copy(dst + ((xo0 + yo + 1 * column_width) ^ swizzle0), src + x0 + 1 * src_pitch, x1 - x0); - mem_copy(dst + ((xo0 + yo + 2 * column_width) ^ swizzle0), src + x0 + 2 * src_pitch, x1 - x0); - mem_copy(dst + ((xo0 + yo + 3 * column_width) ^ swizzle0), src + x0 + 3 * src_pitch, x1 - x0); - } - - /* Step by spans/columns. As it happens, the swizzle bit flips - * at each step so we don't need to calculate it explicitly. - */ - for (x = x1; x < x2; x += ytile_span) { - mem_copy_align16(dst + ((xo + yo + 0 * column_width) ^ swizzle), src + x + 0 * src_pitch, ytile_span); - mem_copy_align16(dst + ((xo + yo + 1 * column_width) ^ swizzle), src + x + 1 * src_pitch, ytile_span); - mem_copy_align16(dst + ((xo + yo + 2 * column_width) ^ swizzle), src + x + 2 * src_pitch, ytile_span); - mem_copy_align16(dst + ((xo + yo + 3 * column_width) ^ swizzle), src + x + 3 * src_pitch, ytile_span); - xo += bytes_per_column; - swizzle ^= swizzle_bit; - } - - if (x2 != x3) { - mem_copy_align16(dst + ((xo + yo + 0 * column_width) ^ swizzle), src + x2 + 0 * src_pitch, x3 - x2); - mem_copy_align16(dst + ((xo + yo + 1 * column_width) ^ swizzle), src + x2 + 1 * src_pitch, x3 - x2); - mem_copy_align16(dst + ((xo + yo + 2 * column_width) ^ swizzle), src + x2 + 2 * src_pitch, x3 - x2); - mem_copy_align16(dst + ((xo + yo + 3 * column_width) ^ swizzle), src + x2 + 3 * src_pitch, x3 - x2); - } - - src += 4 * src_pitch; - } - - if (y2 != y3) { - for (yo = y2 * column_width; yo < y3 * column_width; yo += column_width) { - uint32_t xo = xo1; - uint32_t swizzle = swizzle1; - - mem_copy(dst + ((xo0 + yo) ^ swizzle0), src + x0, x1 - x0); - - /* Step by spans/columns. As it happens, the swizzle bit flips - * at each step so we don't need to calculate it explicitly. - */ - for (x = x1; x < x2; x += ytile_span) { - mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span); - xo += bytes_per_column; - swizzle ^= swizzle_bit; - } - - mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); - - src += src_pitch; - } - } -} - -/** - * Copy texture data from X tile layout to linear. - * - * \copydoc tile_copy_fn - */ -static inline void -xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, - uint32_t y0, uint32_t y1, - char *dst, const char *src, - int32_t dst_pitch, - uint32_t swizzle_bit, - mem_copy_fn mem_copy, - mem_copy_fn mem_copy_align16) -{ - /* The copy destination offset for each range copied is the sum of - * an X offset 'x0' or 'xo' and a Y offset 'yo.' - */ - uint32_t xo, yo; - - dst += (ptrdiff_t)y0 * dst_pitch; - - for (yo = y0 * xtile_width; yo < y1 * xtile_width; yo += xtile_width) { - /* Bits 9 and 10 of the copy destination offset control swizzling. - * Only 'yo' contributes to those bits in the total offset, - * so calculate 'swizzle' just once per row. - * Move bits 9 and 10 three and four places respectively down - * to bit 6 and xor them. - */ - uint32_t swizzle = ((yo >> 3) ^ (yo >> 4)) & swizzle_bit; - - mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0); - - for (xo = x1; xo < x2; xo += xtile_span) { - mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span); - } - - mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); - - dst += dst_pitch; - } -} - - /** - * Copy texture data from Y tile layout to linear. - * - * \copydoc tile_copy_fn - */ -static inline void -ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, - uint32_t y0, uint32_t y3, - char *dst, const char *src, - int32_t dst_pitch, - uint32_t swizzle_bit, - mem_copy_fn mem_copy, - mem_copy_fn mem_copy_align16) -{ - /* Y tiles consist of columns that are 'ytile_span' wide (and the same height - * as the tile). Thus the destination offset for (x,y) is the sum of: - * (x % column_width) // position within column - * (x / column_width) * bytes_per_column // column number * bytes per column - * y * column_width - * - * The copy destination offset for each range copied is the sum of - * an X offset 'xo0' or 'xo' and a Y offset 'yo.' - */ - const uint32_t column_width = ytile_span; - const uint32_t bytes_per_column = column_width * ytile_height; - - uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 4)); - uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 4)); - - uint32_t xo0 = (x0 % ytile_span) + (x0 / ytile_span) * bytes_per_column; - uint32_t xo1 = (x1 % ytile_span) + (x1 / ytile_span) * bytes_per_column; - - /* Bit 9 of the destination offset control swizzling. - * Only the X offset contributes to bit 9 of the total offset, - * so swizzle can be calculated in advance for these X positions. - * Move bit 9 three places down to bit 6. - */ - uint32_t swizzle0 = (xo0 >> 3) & swizzle_bit; - uint32_t swizzle1 = (xo1 >> 3) & swizzle_bit; - - uint32_t x, yo; - - dst += (ptrdiff_t)y0 * dst_pitch; - - if (y0 != y1) { - for (yo = y0 * column_width; yo < y1 * column_width; yo += column_width) { - uint32_t xo = xo1; - uint32_t swizzle = swizzle1; - - mem_copy(dst + x0, src + ((xo0 + yo) ^ swizzle0), x1 - x0); - - /* Step by spans/columns. As it happens, the swizzle bit flips - * at each step so we don't need to calculate it explicitly. - */ - for (x = x1; x < x2; x += ytile_span) { - mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span); - xo += bytes_per_column; - swizzle ^= swizzle_bit; - } - - mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); - - dst += dst_pitch; - } - } - - for (yo = y1 * column_width; yo < y2 * column_width; yo += 4 * column_width) { - uint32_t xo = xo1; - uint32_t swizzle = swizzle1; - - if (x0 != x1) { - mem_copy(dst + x0 + 0 * dst_pitch, src + ((xo0 + yo + 0 * column_width) ^ swizzle0), x1 - x0); - mem_copy(dst + x0 + 1 * dst_pitch, src + ((xo0 + yo + 1 * column_width) ^ swizzle0), x1 - x0); - mem_copy(dst + x0 + 2 * dst_pitch, src + ((xo0 + yo + 2 * column_width) ^ swizzle0), x1 - x0); - mem_copy(dst + x0 + 3 * dst_pitch, src + ((xo0 + yo + 3 * column_width) ^ swizzle0), x1 - x0); - } - - /* Step by spans/columns. As it happens, the swizzle bit flips - * at each step so we don't need to calculate it explicitly. - */ - for (x = x1; x < x2; x += ytile_span) { - mem_copy_align16(dst + x + 0 * dst_pitch, src + ((xo + yo + 0 * column_width) ^ swizzle), ytile_span); - mem_copy_align16(dst + x + 1 * dst_pitch, src + ((xo + yo + 1 * column_width) ^ swizzle), ytile_span); - mem_copy_align16(dst + x + 2 * dst_pitch, src + ((xo + yo + 2 * column_width) ^ swizzle), ytile_span); - mem_copy_align16(dst + x + 3 * dst_pitch, src + ((xo + yo + 3 * column_width) ^ swizzle), ytile_span); - xo += bytes_per_column; - swizzle ^= swizzle_bit; - } - - if (x2 != x3) { - mem_copy_align16(dst + x2 + 0 * dst_pitch, src + ((xo + yo + 0 * column_width) ^ swizzle), x3 - x2); - mem_copy_align16(dst + x2 + 1 * dst_pitch, src + ((xo + yo + 1 * column_width) ^ swizzle), x3 - x2); - mem_copy_align16(dst + x2 + 2 * dst_pitch, src + ((xo + yo + 2 * column_width) ^ swizzle), x3 - x2); - mem_copy_align16(dst + x2 + 3 * dst_pitch, src + ((xo + yo + 3 * column_width) ^ swizzle), x3 - x2); - } - - dst += 4 * dst_pitch; - } - - if (y2 != y3) { - for (yo = y2 * column_width; yo < y3 * column_width; yo += column_width) { - uint32_t xo = xo1; - uint32_t swizzle = swizzle1; - - mem_copy(dst + x0, src + ((xo0 + yo) ^ swizzle0), x1 - x0); - - /* Step by spans/columns. As it happens, the swizzle bit flips - * at each step so we don't need to calculate it explicitly. - */ - for (x = x1; x < x2; x += ytile_span) { - mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span); - xo += bytes_per_column; - swizzle ^= swizzle_bit; - } - - mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); - - dst += dst_pitch; - } - } -} - -#if defined(INLINE_SSE41) -static ALWAYS_INLINE void * -_memcpy_streaming_load(void *dest, const void *src, size_t count) -{ - if (count == 16) { - __m128i val = _mm_stream_load_si128((__m128i *)src); - _mm_storeu_si128((__m128i *)dest, val); - return dest; - } else if (count == 64) { - __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0); - __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1); - __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2); - __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3); - _mm_storeu_si128(((__m128i *)dest) + 0, val0); - _mm_storeu_si128(((__m128i *)dest) + 1, val1); - _mm_storeu_si128(((__m128i *)dest) + 2, val2); - _mm_storeu_si128(((__m128i *)dest) + 3, val3); - return dest; - } else { - assert(count < 64); /* and (count < 16) for ytiled */ - return memcpy(dest, src, count); - } -} -#endif - -static mem_copy_fn -choose_copy_function(mem_copy_fn_type copy_type) -{ - switch(copy_type) { - case INTEL_COPY_MEMCPY: - return memcpy; - case INTEL_COPY_RGBA8: - return rgba8_copy; -#if defined(INLINE_SSE41) - case INTEL_COPY_STREAMING_LOAD: - return _memcpy_streaming_load; -#endif - case INTEL_COPY_INVALID: - unreachable("invalid copy_type"); - } - unreachable("unhandled copy_type"); - return NULL; -} - -/** - * Copy texture data from linear to X tile layout, faster. - * - * Same as \ref linear_to_xtiled but faster, because it passes constant - * parameters for common cases, allowing the compiler to inline code - * optimized for those cases. - * - * \copydoc tile_copy_fn - */ -static FLATTEN void -linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, - uint32_t y0, uint32_t y1, - char *dst, const char *src, - int32_t src_pitch, - uint32_t swizzle_bit, - mem_copy_fn_type copy_type) -{ - mem_copy_fn mem_copy = choose_copy_function(copy_type); - - if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) { - if (mem_copy == memcpy) - return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, src_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy) - return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, src_pitch, swizzle_bit, - rgba8_copy, rgba8_copy_aligned_dst); - else - unreachable("not reached"); - } else { - if (mem_copy == memcpy) - return linear_to_xtiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, - memcpy, memcpy); - else if (mem_copy == rgba8_copy) - return linear_to_xtiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, - rgba8_copy, rgba8_copy_aligned_dst); - else - unreachable("not reached"); - } - linear_to_xtiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); -} - -/** - * Copy texture data from linear to Y tile layout, faster. - * - * Same as \ref linear_to_ytiled but faster, because it passes constant - * parameters for common cases, allowing the compiler to inline code - * optimized for those cases. - * - * \copydoc tile_copy_fn - */ -static FLATTEN void -linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, - uint32_t y0, uint32_t y1, - char *dst, const char *src, - int32_t src_pitch, - uint32_t swizzle_bit, - mem_copy_fn_type copy_type) -{ - mem_copy_fn mem_copy = choose_copy_function(copy_type); - - if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) { - if (mem_copy == memcpy) - return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, src_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy) - return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, src_pitch, swizzle_bit, - rgba8_copy, rgba8_copy_aligned_dst); - else - unreachable("not reached"); - } else { - if (mem_copy == memcpy) - return linear_to_ytiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy) - return linear_to_ytiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, - rgba8_copy, rgba8_copy_aligned_dst); - else - unreachable("not reached"); - } - linear_to_ytiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); -} - -/** - * Copy texture data from X tile layout to linear, faster. - * - * Same as \ref xtile_to_linear but faster, because it passes constant - * parameters for common cases, allowing the compiler to inline code - * optimized for those cases. - * - * \copydoc tile_copy_fn - */ -static FLATTEN void -xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, - uint32_t y0, uint32_t y1, - char *dst, const char *src, - int32_t dst_pitch, - uint32_t swizzle_bit, - mem_copy_fn_type copy_type) -{ - mem_copy_fn mem_copy = choose_copy_function(copy_type); - - if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) { - if (mem_copy == memcpy) - return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy) - return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, dst_pitch, swizzle_bit, - rgba8_copy, rgba8_copy_aligned_src); -#if defined(INLINE_SSE41) - else if (mem_copy == _memcpy_streaming_load) - return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif - else - unreachable("not reached"); - } else { - if (mem_copy == memcpy) - return xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy) - return xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, - rgba8_copy, rgba8_copy_aligned_src); -#if defined(INLINE_SSE41) - else if (mem_copy == _memcpy_streaming_load) - return xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif - else - unreachable("not reached"); - } - xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); -} - -/** - * Copy texture data from Y tile layout to linear, faster. - * - * Same as \ref ytile_to_linear but faster, because it passes constant - * parameters for common cases, allowing the compiler to inline code - * optimized for those cases. - * - * \copydoc tile_copy_fn - */ -static FLATTEN void -ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, - uint32_t y0, uint32_t y1, - char *dst, const char *src, - int32_t dst_pitch, - uint32_t swizzle_bit, - mem_copy_fn_type copy_type) -{ - mem_copy_fn mem_copy = choose_copy_function(copy_type); - - if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) { - if (mem_copy == memcpy) - return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy) - return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, dst_pitch, swizzle_bit, - rgba8_copy, rgba8_copy_aligned_src); -#if defined(INLINE_SSE41) - else if (copy_type == INTEL_COPY_STREAMING_LOAD) - return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif - else - unreachable("not reached"); - } else { - if (mem_copy == memcpy) - return ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy) - return ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, - rgba8_copy, rgba8_copy_aligned_src); -#if defined(INLINE_SSE41) - else if (copy_type == INTEL_COPY_STREAMING_LOAD) - return ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif - else - unreachable("not reached"); - } - ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); -} - -/** - * Copy from linear to tiled texture. - * - * Divide the region given by X range [xt1, xt2) and Y range [yt1, yt2) into - * pieces that do not cross tile boundaries and copy each piece with a tile - * copy function (\ref tile_copy_fn). - * The X range is in bytes, i.e. pixels * bytes-per-pixel. - * The Y range is in pixels (i.e. unitless). - * 'dst' is the address of (0, 0) in the destination tiled texture. - * 'src' is the address of (xt1, yt1) in the source linear texture. - */ -static void -intel_linear_to_tiled(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - uint32_t dst_pitch, int32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type) -{ - tile_copy_fn tile_copy; - uint32_t xt0, xt3; - uint32_t yt0, yt3; - uint32_t xt, yt; - uint32_t tw, th, span; - uint32_t swizzle_bit = has_swizzling ? 1<<6 : 0; - - if (tiling == ISL_TILING_X) { - tw = xtile_width; - th = xtile_height; - span = xtile_span; - tile_copy = linear_to_xtiled_faster; - } else if (tiling == ISL_TILING_Y0) { - tw = ytile_width; - th = ytile_height; - span = ytile_span; - tile_copy = linear_to_ytiled_faster; - } else { - unreachable("unsupported tiling"); - } - - /* Round out to tile boundaries. */ - xt0 = ALIGN_DOWN(xt1, tw); - xt3 = ALIGN_UP (xt2, tw); - yt0 = ALIGN_DOWN(yt1, th); - yt3 = ALIGN_UP (yt2, th); - - /* Loop over all tiles to which we have something to copy. - * 'xt' and 'yt' are the origin of the destination tile, whether copying - * copying a full or partial tile. - * tile_copy() copies one tile or partial tile. - * Looping x inside y is the faster memory access pattern. - */ - for (yt = yt0; yt < yt3; yt += th) { - for (xt = xt0; xt < xt3; xt += tw) { - /* The area to update is [x0,x3) x [y0,y1). - * May not want the whole tile, hence the min and max. - */ - uint32_t x0 = MAX2(xt1, xt); - uint32_t y0 = MAX2(yt1, yt); - uint32_t x3 = MIN2(xt2, xt + tw); - uint32_t y1 = MIN2(yt2, yt + th); - - /* [x0,x3) is split into [x0,x1), [x1,x2), [x2,x3) such that - * the middle interval is the longest span-aligned part. - * The sub-ranges could be empty. - */ - uint32_t x1, x2; - x1 = ALIGN_UP(x0, span); - if (x1 > x3) - x1 = x2 = x3; - else - x2 = ALIGN_DOWN(x3, span); - - assert(x0 <= x1 && x1 <= x2 && x2 <= x3); - assert(x1 - x0 < span && x3 - x2 < span); - assert(x3 - x0 <= tw); - assert((x2 - x1) % span == 0); - - /* Translate by (xt,yt) for single-tile copier. */ - tile_copy(x0-xt, x1-xt, x2-xt, x3-xt, - y0-yt, y1-yt, - dst + (ptrdiff_t)xt * th + (ptrdiff_t)yt * dst_pitch, - src + (ptrdiff_t)xt - xt1 + ((ptrdiff_t)yt - yt1) * src_pitch, - src_pitch, - swizzle_bit, - copy_type); - } - } -} - -/** - * Copy from tiled to linear texture. - * - * Divide the region given by X range [xt1, xt2) and Y range [yt1, yt2) into - * pieces that do not cross tile boundaries and copy each piece with a tile - * copy function (\ref tile_copy_fn). - * The X range is in bytes, i.e. pixels * bytes-per-pixel. - * The Y range is in pixels (i.e. unitless). - * 'dst' is the address of (xt1, yt1) in the destination linear texture. - * 'src' is the address of (0, 0) in the source tiled texture. - */ -static void -intel_tiled_to_linear(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - int32_t dst_pitch, uint32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type) -{ - tile_copy_fn tile_copy; - uint32_t xt0, xt3; - uint32_t yt0, yt3; - uint32_t xt, yt; - uint32_t tw, th, span; - uint32_t swizzle_bit = has_swizzling ? 1<<6 : 0; - - if (tiling == ISL_TILING_X) { - tw = xtile_width; - th = xtile_height; - span = xtile_span; - tile_copy = xtiled_to_linear_faster; - } else if (tiling == ISL_TILING_Y0) { - tw = ytile_width; - th = ytile_height; - span = ytile_span; - tile_copy = ytiled_to_linear_faster; - } else { - unreachable("unsupported tiling"); - } - -#if defined(INLINE_SSE41) - if (copy_type == INTEL_COPY_STREAMING_LOAD) { - /* The hidden cacheline sized register used by movntdqa can apparently - * give you stale data, so do an mfence to invalidate it. - */ - _mm_mfence(); - } -#endif - - /* Round out to tile boundaries. */ - xt0 = ALIGN_DOWN(xt1, tw); - xt3 = ALIGN_UP (xt2, tw); - yt0 = ALIGN_DOWN(yt1, th); - yt3 = ALIGN_UP (yt2, th); - - /* Loop over all tiles to which we have something to copy. - * 'xt' and 'yt' are the origin of the destination tile, whether copying - * copying a full or partial tile. - * tile_copy() copies one tile or partial tile. - * Looping x inside y is the faster memory access pattern. - */ - for (yt = yt0; yt < yt3; yt += th) { - for (xt = xt0; xt < xt3; xt += tw) { - /* The area to update is [x0,x3) x [y0,y1). - * May not want the whole tile, hence the min and max. - */ - uint32_t x0 = MAX2(xt1, xt); - uint32_t y0 = MAX2(yt1, yt); - uint32_t x3 = MIN2(xt2, xt + tw); - uint32_t y1 = MIN2(yt2, yt + th); - - /* [x0,x3) is split into [x0,x1), [x1,x2), [x2,x3) such that - * the middle interval is the longest span-aligned part. - * The sub-ranges could be empty. - */ - uint32_t x1, x2; - x1 = ALIGN_UP(x0, span); - if (x1 > x3) - x1 = x2 = x3; - else - x2 = ALIGN_DOWN(x3, span); - - assert(x0 <= x1 && x1 <= x2 && x2 <= x3); - assert(x1 - x0 < span && x3 - x2 < span); - assert(x3 - x0 <= tw); - assert((x2 - x1) % span == 0); - - /* Translate by (xt,yt) for single-tile copier. */ - tile_copy(x0-xt, x1-xt, x2-xt, x3-xt, - y0-yt, y1-yt, - dst + (ptrdiff_t)xt - xt1 + ((ptrdiff_t)yt - yt1) * dst_pitch, - src + (ptrdiff_t)xt * th + (ptrdiff_t)yt * src_pitch, - dst_pitch, - swizzle_bit, - copy_type); - } - } -} diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,139 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright 2012 Intel Corporation - * Copyright 2013 Google - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chad Versace - * Frank Henigman - */ - -#ifndef INTEL_TILED_MEMCPY_H -#define INTEL_TILED_MEMCPY_H - -#include -#include "main/mtypes.h" - -typedef enum { - INTEL_COPY_MEMCPY = 0, - INTEL_COPY_RGBA8, - INTEL_COPY_STREAMING_LOAD, - INTEL_COPY_INVALID, -} mem_copy_fn_type; - -typedef void *(*mem_copy_fn)(void *dest, const void *src, size_t n); - -typedef void (*tiled_to_linear_fn) - (uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - int32_t dst_pitch, uint32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type); - -void -linear_to_tiled(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - uint32_t dst_pitch, int32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type); - -void -tiled_to_linear(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - int32_t dst_pitch, uint32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type); - -/** - * Determine which copy function to use for the given format combination - * - * The only two possible copy functions which are ever returned are a - * direct memcpy and a RGBA <-> BGRA copy function. Since RGBA -> BGRA and - * BGRA -> RGBA are exactly the same operation (and memcpy is obviously - * symmetric), it doesn't matter whether the copy is from the tiled image - * to the untiled or vice versa. The copy function required is the same in - * either case so this function can be used. - * - * \param[in] tiledFormat The format of the tiled image - * \param[in] format The GL format of the client data - * \param[in] type The GL type of the client data - * \param[out] mem_copy Will be set to one of either the standard - * library's memcpy or a different copy function - * that performs an RGBA to BGRA conversion - * \param[out] cpp Number of bytes per channel - * - * \return true if the format and type combination are valid - */ -static MAYBE_UNUSED bool -intel_get_memcpy_type(mesa_format tiledFormat, GLenum format, GLenum type, - mem_copy_fn_type *copy_type, uint32_t *cpp) -{ - *copy_type = INTEL_COPY_INVALID; - - if (type == GL_UNSIGNED_INT_8_8_8_8_REV && - !(format == GL_RGBA || format == GL_BGRA)) - return false; /* Invalid type/format combination */ - - if ((tiledFormat == MESA_FORMAT_L_UNORM8 && format == GL_LUMINANCE) || - (tiledFormat == MESA_FORMAT_A_UNORM8 && format == GL_ALPHA)) { - *cpp = 1; - *copy_type = INTEL_COPY_MEMCPY; - } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - *copy_type = INTEL_COPY_MEMCPY; - } else if (format == GL_RGBA) { - *copy_type = INTEL_COPY_RGBA8; - } - } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { - *cpp = 4; - if (format == GL_BGRA) { - /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can - * use the same function. - */ - *copy_type = INTEL_COPY_RGBA8; - } else if (format == GL_RGBA) { - *copy_type = INTEL_COPY_MEMCPY; - } - } - - if (*copy_type == INTEL_COPY_INVALID) - return false; - - return true; -} - -#endif /* INTEL_TILED_MEMCPY */ diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_normal.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,59 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright 2012 Intel Corporation - * Copyright 2013 Google - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chad Versace - * Frank Henigman - */ - - -#include "intel_tiled_memcpy.c" - -void -linear_to_tiled(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - uint32_t dst_pitch, int32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type) -{ - intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, - has_swizzling, tiling, copy_type); -} - -void -tiled_to_linear(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - int32_t dst_pitch, uint32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type) -{ - intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, - has_swizzling, tiling, copy_type); -} diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.c 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright 2012 Intel Corporation - * Copyright 2013 Google - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chad Versace - * Frank Henigman - */ - -#define INLINE_SSE41 - -#include "intel_tiled_memcpy_sse41.h" -#include "intel_tiled_memcpy.c" - -void -linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - uint32_t dst_pitch, int32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type) -{ - intel_linear_to_tiled(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, - has_swizzling, tiling, copy_type); -} - -void -tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - int32_t dst_pitch, uint32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type) -{ - intel_tiled_to_linear(xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, - has_swizzling, tiling, copy_type); -} diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h --- mesa-18.3.3/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/intel_tiled_memcpy_sse41.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,59 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright 2012 Intel Corporation - * Copyright 2013 Google - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chad Versace - * Frank Henigman - */ - -#ifndef INTEL_TILED_MEMCPY_SSE41_H -#define INTEL_TILED_MEMCPY_SSE41_H - -#include -#include "main/mtypes.h" -#include "isl/isl.h" - -#include "intel_tiled_memcpy.h" - -void -linear_to_tiled_sse41(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - uint32_t dst_pitch, int32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type); - -void -tiled_to_linear_sse41(uint32_t xt1, uint32_t xt2, - uint32_t yt1, uint32_t yt2, - char *dst, const char *src, - int32_t dst_pitch, uint32_t src_pitch, - bool has_swizzling, - enum isl_tiling tiling, - mem_copy_fn_type copy_type); - -#endif /* INTEL_TILED_MEMCPY_SSE41_H */ diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/Makefile.am mesa-19.0.1/src/mesa/drivers/dri/i965/Makefile.am --- mesa-18.3.3/src/mesa/drivers/dri/i965/Makefile.am 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/Makefile.am 2019-03-31 23:16:37.000000000 +0000 @@ -26,6 +26,7 @@ AM_CFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src/ \ + -I$(top_builddir)/src/ \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa/ \ -I$(top_srcdir)/src/gallium/include \ @@ -33,6 +34,8 @@ -I$(top_builddir)/src/util \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_srcdir)/src/gtest/include \ + -I$(top_builddir)/src/compiler \ + -I$(top_srcdir)/src/compiler \ -I$(top_builddir)/src/compiler/glsl \ -I$(top_builddir)/src/compiler/nir \ -I$(top_srcdir)/src/compiler/nir \ @@ -92,20 +95,8 @@ noinst_LTLIBRARIES = \ libi965_dri.la \ - libintel_tiled_memcpy.la \ - libintel_tiled_memcpy_sse41.la \ $(I965_PERGEN_LIBS) -libintel_tiled_memcpy_la_SOURCES = \ - $(intel_tiled_memcpy_FILES) -libintel_tiled_memcpy_la_CFLAGS = \ - $(AM_CFLAGS) - -libintel_tiled_memcpy_sse41_la_SOURCES = \ - $(intel_tiled_memcpy_sse41_FILES) -libintel_tiled_memcpy_sse41_la_CFLAGS = \ - $(AM_CFLAGS) $(SSE41_CFLAGS) - libi965_dri_la_SOURCES = \ $(i965_FILES) \ $(i965_oa_GENERATED_FILES) @@ -116,8 +107,6 @@ $(top_builddir)/src/intel/compiler/libintel_compiler.la \ $(top_builddir)/src/intel/blorp/libblorp.la \ $(I965_PERGEN_LIBS) \ - libintel_tiled_memcpy.la \ - libintel_tiled_memcpy_sse41.la \ $(LIBDRM_LIBS) BUILT_SOURCES = $(i965_oa_GENERATED_FILES) @@ -126,7 +115,6 @@ EXTRA_DIST = \ brw_oa.py \ $(i965_oa_xml_FILES) \ - $(intel_tiled_memcpy_dep_FILES) \ meson.build brw_oa_metrics.c: brw_oa.py $(i965_oa_xml_FILES) @@ -136,6 +124,3 @@ $(i965_oa_xml_FILES:%=$(srcdir)/%) brw_oa_metrics.h: brw_oa_metrics.c - -intel_tiled_memcpy_normal.c: $(intel_tiled_memcpy_dep_FILES) -intel_tiled_memcpy_sse41.c: $(intel_tiled_memcpy_dep_FILES) diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/Makefile.sources mesa-19.0.1/src/mesa/drivers/dri/i965/Makefile.sources --- mesa-18.3.3/src/mesa/drivers/dri/i965/Makefile.sources 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/Makefile.sources 2019-03-31 23:16:37.000000000 +0000 @@ -113,17 +113,6 @@ intel_upload.c \ libdrm_macros.h -intel_tiled_memcpy_FILES = \ - intel_tiled_memcpy_normal.c \ - intel_tiled_memcpy.h - -intel_tiled_memcpy_sse41_FILES = \ - intel_tiled_memcpy_sse41.c \ - intel_tiled_memcpy_sse41.h - -intel_tiled_memcpy_dep_FILES = \ - intel_tiled_memcpy.c - i965_gen4_FILES = \ genX_blorp_exec.c \ genX_state_upload.c diff -Nru mesa-18.3.3/src/mesa/drivers/dri/i965/meson.build mesa-19.0.1/src/mesa/drivers/dri/i965/meson.build --- mesa-18.3.3/src/mesa/drivers/dri/i965/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/i965/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -133,16 +133,6 @@ 'libdrm_macros.h', ) -files_intel_tiled_memcpy = files( - 'intel_tiled_memcpy_normal.c', - 'intel_tiled_memcpy.h', -) - -files_intel_tiled_memcpy_sse41 = files( - 'intel_tiled_memcpy_sse41.c', - 'intel_tiled_memcpy_sse41.h', -) - i965_gen_libs = [] foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110'] i965_gen_libs += static_library( @@ -184,36 +174,10 @@ ], ) -intel_tiled_memcpy = static_library( - 'intel_tiled_memcpy', - [files_intel_tiled_memcpy], - include_directories : [ - inc_common, inc_intel, inc_dri_common, inc_drm_uapi, - ], - c_args : [c_vis_args, no_override_init_args, '-msse2'], - extra_files : ['intel_tiled_memcpy.c'] -) - -if with_sse41 - intel_tiled_memcpy_sse41 = static_library( - 'intel_tiled_memcpy_sse41', - [files_intel_tiled_memcpy_sse41], - include_directories : [ - inc_common, inc_intel, inc_dri_common, inc_drm_uapi, - ], - link_args : ['-Wl,--exclude-libs=ALL'], - c_args : [c_vis_args, no_override_init_args, '-Wl,--exclude-libs=ALL', '-msse2', sse41_args], - extra_files : ['intel_tiled_memcpy.c'] - ) -else - intel_tiled_memcpy_sse41 = [] -endif - - libi965 = static_library( 'i965', [files_i965, i965_oa_sources, ir_expression_operation_h, - xmlpool_options_h], + xmlpool_options_h, float64_glsl_h], include_directories : [ inc_common, inc_intel, inc_dri_common, inc_util, inc_drm_uapi, ], @@ -221,7 +185,7 @@ cpp_args : [cpp_vis_args, c_sse2_args], link_with : [ i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler, - libblorp, intel_tiled_memcpy, intel_tiled_memcpy_sse41 + libblorp ], dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers], ) diff -Nru mesa-18.3.3/src/mesa/drivers/dri/meson.build mesa-19.0.1/src/mesa/drivers/dri/meson.build --- mesa-18.3.3/src/mesa/drivers/dri/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -78,7 +78,7 @@ filebase : 'dri', description : 'Direct Rendering Infrastructure', version : meson.project_version(), - variables : ['dridriverdir=${prefix}/' + dri_drivers_path], + variables : ['dridriverdir=' + dri_drivers_path], requires_private : dri_req_private, ) endif diff -Nru mesa-18.3.3/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c mesa-19.0.1/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c --- mesa-18.3.3/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c 2019-03-31 23:16:37.000000000 +0000 @@ -49,7 +49,7 @@ if (render->mode == VBO && !_mesa_is_bufferobj(binding->BufferObj)) { const struct gl_array_attributes *attrib = a->VertexAttrib; /* Pack client buffers. */ - return align(_mesa_sizeof_type(attrib->Type) * attrib->Size, 4); + return align(attrib->Format._ElementSize, 4); } else { return binding->Stride; } @@ -86,7 +86,7 @@ nouveau_init_array(&render->attrs[attr], attr, get_array_stride(ctx, array), - attrib->Size, attrib->Type, + attrib->Format.Size, attrib->Format.Type, imm ? binding->BufferObj : NULL, p, imm, ctx); } @@ -154,8 +154,8 @@ return; /* Constant attribute. */ - nouveau_init_array(a, attr, binding->Stride, attrib->Size, - attrib->Type, binding->BufferObj, p, + nouveau_init_array(a, attr, binding->Stride, attrib->Format.Size, + attrib->Format.Type, binding->BufferObj, p, GL_TRUE, ctx); EMIT_IMM(ctx, a, 0); nouveau_deinit_array(a); @@ -166,7 +166,7 @@ if (render->mode == VBO) { render->map[info->vbo_index] = attr; - render->vertex_size += attrib->_ElementSize; + render->vertex_size += attrib->Format._ElementSize; render->attr_count = MAX2(render->attr_count, info->vbo_index + 1); } else { diff -Nru mesa-18.3.3/src/mesa/drivers/dri/r200/radeon_dma.c mesa-19.0.1/src/mesa/drivers/dri/r200/radeon_dma.c --- mesa-18.3.3/src/mesa/drivers/dri/r200/radeon_dma.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/r200/radeon_dma.c 2019-03-31 23:16:37.000000000 +0000 @@ -217,7 +217,7 @@ if (size > rmesa->dma.minimum_size) rmesa->dma.minimum_size = (size + 15) & (~15); - radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n", + radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %zi\n", __func__, size, rmesa->dma.minimum_size); if (is_empty_list(&rmesa->dma.free) diff -Nru mesa-18.3.3/src/mesa/drivers/dri/radeon/radeon_dma.c mesa-19.0.1/src/mesa/drivers/dri/radeon/radeon_dma.c --- mesa-18.3.3/src/mesa/drivers/dri/radeon/radeon_dma.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/drivers/dri/radeon/radeon_dma.c 2019-03-31 23:16:37.000000000 +0000 @@ -217,7 +217,7 @@ if (size > rmesa->dma.minimum_size) rmesa->dma.minimum_size = (size + 15) & (~15); - radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n", + radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %zi\n", __func__, size, rmesa->dma.minimum_size); if (is_empty_list(&rmesa->dma.free) diff -Nru mesa-18.3.3/src/mesa/main/api_arrayelt.c mesa-19.0.1/src/mesa/main/api_arrayelt.c --- mesa-18.3.3/src/mesa/main/api_arrayelt.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/api_arrayelt.c 2019-03-31 23:16:37.000000000 +0000 @@ -1572,15 +1572,15 @@ actx->nr_vbos = 0; /* conventional vertex arrays */ - if (vao->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) { + if (vao->Enabled & VERT_BIT_COLOR_INDEX) { aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]; aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex]; - aa->offset = IndexFuncs[TYPE_IDX(aa->array->Type)]; + aa->offset = IndexFuncs[TYPE_IDX(aa->array->Format.Type)]; check_vbo(actx, aa->binding->BufferObj); aa++; } - if (vao->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) { + if (vao->Enabled & VERT_BIT_EDGEFLAG) { aa->array = &vao->VertexAttrib[VERT_ATTRIB_EDGEFLAG]; aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex]; aa->offset = _gloffset_EdgeFlagv; @@ -1588,51 +1588,51 @@ aa++; } - if (vao->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) { + if (vao->Enabled & VERT_BIT_NORMAL) { aa->array = &vao->VertexAttrib[VERT_ATTRIB_NORMAL]; aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex]; - aa->offset = NormalFuncs[TYPE_IDX(aa->array->Type)]; + aa->offset = NormalFuncs[TYPE_IDX(aa->array->Format.Type)]; check_vbo(actx, aa->binding->BufferObj); aa++; } - if (vao->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) { + if (vao->Enabled & VERT_BIT_COLOR0) { aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR0]; aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex]; - aa->offset = ColorFuncs[aa->array->Size-3][TYPE_IDX(aa->array->Type)]; + aa->offset = ColorFuncs[aa->array->Format.Size-3][TYPE_IDX(aa->array->Format.Type)]; check_vbo(actx, aa->binding->BufferObj); aa++; } - if (vao->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) { + if (vao->Enabled & VERT_BIT_COLOR1) { aa->array = &vao->VertexAttrib[VERT_ATTRIB_COLOR1]; aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex]; - aa->offset = SecondaryColorFuncs[TYPE_IDX(aa->array->Type)]; + aa->offset = SecondaryColorFuncs[TYPE_IDX(aa->array->Format.Type)]; check_vbo(actx, aa->binding->BufferObj); aa++; } - if (vao->VertexAttrib[VERT_ATTRIB_FOG].Enabled) { + if (vao->Enabled & VERT_BIT_FOG) { aa->array = &vao->VertexAttrib[VERT_ATTRIB_FOG]; aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex]; - aa->offset = FogCoordFuncs[TYPE_IDX(aa->array->Type)]; + aa->offset = FogCoordFuncs[TYPE_IDX(aa->array->Format.Type)]; check_vbo(actx, aa->binding->BufferObj); aa++; } for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { - struct gl_array_attributes *attribArray = - &vao->VertexAttrib[VERT_ATTRIB_TEX(i)]; - if (attribArray->Enabled) { + if (vao->Enabled & VERT_BIT_TEX(i)) { + struct gl_array_attributes *attribArray = + &vao->VertexAttrib[VERT_ATTRIB_TEX(i)]; /* NOTE: we use generic glVertexAttribNV functions here. * If we ever remove GL_NV_vertex_program this will have to change. */ at->array = attribArray; at->binding = &vao->BufferBinding[attribArray->BufferBindingIndex]; - assert(!at->array->Normalized); - at->func = AttribFuncsNV[at->array->Normalized] - [at->array->Size-1] - [TYPE_IDX(at->array->Type)]; + assert(!at->array->Format.Normalized); + at->func = AttribFuncsNV[at->array->Format.Normalized] + [at->array->Format.Size-1] + [TYPE_IDX(at->array->Format.Type)]; at->index = VERT_ATTRIB_TEX0 + i; check_vbo(actx, at->binding->BufferObj); at++; @@ -1641,9 +1641,9 @@ /* generic vertex attribute arrays */ for (i = 1; i < VERT_ATTRIB_GENERIC_MAX; i++) { /* skip zero! */ - struct gl_array_attributes *attribArray = - &vao->VertexAttrib[VERT_ATTRIB_GENERIC(i)]; - if (attribArray->Enabled) { + if (vao->Enabled & VERT_BIT_GENERIC(i)) { + struct gl_array_attributes *attribArray = + &vao->VertexAttrib[VERT_ATTRIB_GENERIC(i)]; GLint intOrNorm; at->array = attribArray; at->binding = &vao->BufferBinding[attribArray->BufferBindingIndex]; @@ -1652,18 +1652,18 @@ * change from one execution of _ae_ArrayElement() to * the next. Doing so caused UT to break. */ - if (at->array->Doubles) + if (at->array->Format.Doubles) intOrNorm = 3; - else if (at->array->Integer) + else if (at->array->Format.Integer) intOrNorm = 2; - else if (at->array->Normalized) + else if (at->array->Format.Normalized) intOrNorm = 1; else intOrNorm = 0; at->func = AttribFuncsARB[intOrNorm] - [at->array->Size-1] - [TYPE_IDX(at->array->Type)]; + [at->array->Format.Size-1] + [TYPE_IDX(at->array->Format.Type)]; at->index = i; check_vbo(actx, at->binding->BufferObj); @@ -1672,21 +1672,21 @@ } /* finally, vertex position */ - if (vao->VertexAttrib[VERT_ATTRIB_GENERIC0].Enabled) { + if (vao->Enabled & VERT_BIT_GENERIC0) { /* Use glVertex(v) instead of glVertexAttrib(0, v) to be sure it's * issued as the last (provoking) attribute). */ aa->array = &vao->VertexAttrib[VERT_ATTRIB_GENERIC0]; aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex]; - assert(aa->array->Size >= 2); /* XXX fix someday? */ - aa->offset = VertexFuncs[aa->array->Size-2][TYPE_IDX(aa->array->Type)]; + assert(aa->array->Format.Size >= 2); /* XXX fix someday? */ + aa->offset = VertexFuncs[aa->array->Format.Size-2][TYPE_IDX(aa->array->Format.Type)]; check_vbo(actx, aa->binding->BufferObj); aa++; } - else if (vao->VertexAttrib[VERT_ATTRIB_POS].Enabled) { + else if (vao->Enabled & VERT_BIT_POS) { aa->array = &vao->VertexAttrib[VERT_ATTRIB_POS]; aa->binding = &vao->BufferBinding[aa->array->BufferBindingIndex]; - aa->offset = VertexFuncs[aa->array->Size-2][TYPE_IDX(aa->array->Type)]; + aa->offset = VertexFuncs[aa->array->Format.Size-2][TYPE_IDX(aa->array->Format.Type)]; check_vbo(actx, aa->binding->BufferObj); aa++; } diff -Nru mesa-18.3.3/src/mesa/main/arrayobj.c mesa-19.0.1/src/mesa/main/arrayobj.c --- mesa-18.3.3/src/mesa/main/arrayobj.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/arrayobj.c 2019-03-31 23:16:37.000000000 +0000 @@ -385,23 +385,17 @@ assert(index < ARRAY_SIZE(vao->BufferBinding)); struct gl_vertex_buffer_binding *binding = &vao->BufferBinding[index]; - array->Size = size; - array->Type = type; - array->Format = GL_RGBA; /* only significant for GL_EXT_vertex_array_bgra */ + _mesa_set_vertex_format(&array->Format, size, type, GL_RGBA, + GL_FALSE, GL_FALSE, GL_FALSE); array->Stride = 0; array->Ptr = NULL; array->RelativeOffset = 0; - array->Enabled = GL_FALSE; - array->Normalized = GL_FALSE; - array->Integer = GL_FALSE; - array->Doubles = GL_FALSE; - array->_ElementSize = size * _mesa_sizeof_type(type); ASSERT_BITFIELD_SIZE(struct gl_array_attributes, BufferBindingIndex, VERT_ATTRIB_MAX - 1); array->BufferBindingIndex = index; binding->Offset = 0; - binding->Stride = array->_ElementSize; + binding->Stride = array->Format._ElementSize; binding->BufferObj = NULL; binding->_BoundArrays = BITFIELD_BIT(index); @@ -442,7 +436,7 @@ init_array(ctx, vao, VERT_ATTRIB_COLOR_INDEX, 1, GL_FLOAT); break; case VERT_ATTRIB_EDGEFLAG: - init_array(ctx, vao, VERT_ATTRIB_EDGEFLAG, 1, GL_BOOL); + init_array(ctx, vao, VERT_ATTRIB_EDGEFLAG, 1, GL_UNSIGNED_BYTE); break; case VERT_ATTRIB_POINT_SIZE: init_array(ctx, vao, VERT_ATTRIB_POINT_SIZE, 1, GL_FLOAT); @@ -478,7 +472,7 @@ GLuint max_offset = 0; /* We work on the unmapped originaly VAO array entries. */ - GLbitfield mask = vao->_Enabled & binding->_BoundArrays; + GLbitfield mask = vao->Enabled & binding->_BoundArrays; /* The binding should be active somehow, not to return inverted ranges */ assert(mask); while (mask) { @@ -597,7 +591,7 @@ */ const gl_attribute_map_mode mode = vao->_AttributeMapMode; /* Enabled array bits. */ - const GLbitfield enabled = vao->_Enabled; + const GLbitfield enabled = vao->Enabled; /* VBO array bits. */ const GLbitfield vbos = vao->VertexAttribBufferMask; @@ -642,9 +636,6 @@ attrib2->_EffBufferBindingIndex = bindex; attrib2->_EffRelativeOffset = attrib2->RelativeOffset; assert(attrib2->_EffRelativeOffset <= MaxRelativeOffset); - - /* Only enabled arrays shall appear in the unique bindings */ - assert(attrib2->Enabled); } /* Finally this is the set of effectively bound arrays with the * original binding offset. @@ -720,9 +711,6 @@ attrib2->_EffRelativeOffset = binding2->Offset + attrib2->RelativeOffset - min_offset; assert(attrib2->_EffRelativeOffset <= MaxRelativeOffset); - - /* Only enabled arrays shall appear in the unique bindings */ - assert(attrib2->Enabled); } /* Finally this is the set of effectively bound arrays */ binding->_EffOffset = min_offset; @@ -756,14 +744,14 @@ * grouping information in a seperate array beside * gl_array_attributes/gl_vertex_buffer_binding. */ - assert(util_bitcount(binding->_BoundArrays & vao->_Enabled) == 1 - || (vao->_Enabled & ~binding->_BoundArrays) == 0); + assert(util_bitcount(binding->_BoundArrays & vao->Enabled) == 1 + || (vao->Enabled & ~binding->_BoundArrays) == 0); /* Start this current effective binding with the array */ GLbitfield eff_bound_arrays = bound; const GLubyte *ptr = attrib->Ptr; - unsigned vertex_end = attrib->_ElementSize; + unsigned vertex_end = attrib->Format._ElementSize; /* Walk other user space arrays and see which are interleaved * using the same binding parameters. @@ -776,8 +764,8 @@ &vao->BufferBinding[attrib2->BufferBindingIndex]; /* See the comment at the same assert above. */ - assert(util_bitcount(binding2->_BoundArrays & vao->_Enabled) == 1 - || (vao->_Enabled & ~binding->_BoundArrays) == 0); + assert(util_bitcount(binding2->_BoundArrays & vao->Enabled) == 1 + || (vao->Enabled & ~binding->_BoundArrays) == 0); /* Check if we have an identical binding */ if (binding->Stride != binding2->Stride) @@ -785,9 +773,10 @@ if (binding->InstanceDivisor != binding2->InstanceDivisor) continue; if (ptr <= attrib2->Ptr) { - if (ptr + binding->Stride < attrib2->Ptr + attrib2->_ElementSize) + if (ptr + binding->Stride < attrib2->Ptr + + attrib2->Format._ElementSize) continue; - unsigned end = attrib2->Ptr + attrib2->_ElementSize - ptr; + unsigned end = attrib2->Ptr + attrib2->Format._ElementSize - ptr; vertex_end = MAX2(vertex_end, end); } else { if (attrib2->Ptr + binding->Stride < ptr + vertex_end) @@ -812,9 +801,6 @@ attrib2->_EffBufferBindingIndex = bindex; attrib2->_EffRelativeOffset = attrib2->Ptr - ptr; assert(attrib2->_EffRelativeOffset <= binding->Stride); - - /* Only enabled arrays shall appear in the unique bindings */ - assert(attrib2->Enabled); } /* Finally this is the set of effectively bound arrays */ binding->_EffOffset = (GLintptr)ptr; @@ -832,8 +818,9 @@ for (gl_vert_attrib attr = 0; attr < VERT_ATTRIB_MAX; ++attr) { /* Query the original api defined attrib/binding information ... */ const unsigned char *const map =_mesa_vao_attribute_map[mode]; - const struct gl_array_attributes *attrib = &vao->VertexAttrib[map[attr]]; - if (attrib->Enabled) { + if (vao->Enabled & VERT_BIT(map[attr])) { + const struct gl_array_attributes *attrib = + &vao->VertexAttrib[map[attr]]; const struct gl_vertex_buffer_binding *binding = &vao->BufferBinding[attrib->BufferBindingIndex]; /* ... and compare that with the computed attrib/binding */ @@ -871,7 +858,7 @@ _mesa_all_varyings_in_vbos(const struct gl_vertex_array_object *vao) { /* Walk those enabled arrays that have the default vbo attached */ - GLbitfield mask = vao->_Enabled & ~vao->VertexAttribBufferMask; + GLbitfield mask = vao->Enabled & ~vao->VertexAttribBufferMask; while (mask) { /* Do not use u_bit_scan64 as we can walk multiple @@ -883,8 +870,6 @@ const struct gl_vertex_buffer_binding *buffer_binding = &vao->BufferBinding[attrib_array->BufferBindingIndex]; - /* Only enabled arrays shall appear in the _Enabled bitmask */ - assert(attrib_array->Enabled); /* We have already masked out vao->VertexAttribBufferMask */ assert(!_mesa_is_bufferobj(buffer_binding->BufferObj)); @@ -905,7 +890,7 @@ _mesa_all_buffers_are_unmapped(const struct gl_vertex_array_object *vao) { /* Walk the enabled arrays that have a vbo attached */ - GLbitfield mask = vao->_Enabled & vao->VertexAttribBufferMask; + GLbitfield mask = vao->Enabled & vao->VertexAttribBufferMask; while (mask) { const int i = ffs(mask) - 1; @@ -914,8 +899,6 @@ const struct gl_vertex_buffer_binding *buffer_binding = &vao->BufferBinding[attrib_array->BufferBindingIndex]; - /* Only enabled arrays shall appear in the _Enabled bitmask */ - assert(attrib_array->Enabled); /* We have already masked with vao->VertexAttribBufferMask */ assert(_mesa_is_bufferobj(buffer_binding->BufferObj)); diff -Nru mesa-18.3.3/src/mesa/main/arrayobj.h mesa-19.0.1/src/mesa/main/arrayobj.h --- mesa-18.3.3/src/mesa/main/arrayobj.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/arrayobj.h 2019-03-31 23:16:37.000000000 +0000 @@ -111,7 +111,7 @@ /** * Apply the position/generic0 aliasing map to a bitfield from the vao. - * Use for example to convert gl_vertex_array_object::_Enabled + * Use for example to convert gl_vertex_array_object::Enabled * or gl_vertex_buffer_binding::_VertexBinding from the vao numbering to * the numbering used with vertex processing inputs. */ @@ -143,7 +143,7 @@ _mesa_get_vao_vp_inputs(const struct gl_vertex_array_object *vao) { const gl_attribute_map_mode mode = vao->_AttributeMapMode; - return _mesa_vao_enable_to_vp_inputs(mode, vao->_Enabled); + return _mesa_vao_enable_to_vp_inputs(mode, vao->Enabled); } diff -Nru mesa-18.3.3/src/mesa/main/attrib.c mesa-19.0.1/src/mesa/main/attrib.c --- mesa-18.3.3/src/mesa/main/attrib.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/attrib.c 2019-03-31 23:16:37.000000000 +0000 @@ -1576,8 +1576,8 @@ _mesa_copy_vertex_buffer_binding(ctx, &dest->BufferBinding[i], &src->BufferBinding[i]); } - /* _Enabled must be the same than on push */ - dest->_Enabled = src->_Enabled; + /* Enabled must be the same than on push */ + dest->Enabled = src->Enabled; dest->_EffEnabledVBO = src->_EffEnabledVBO; /* The bitmask of bound VBOs needs to match the VertexBinding array */ dest->VertexAttribBufferMask = src->VertexAttribBufferMask; diff -Nru mesa-18.3.3/src/mesa/main/context.h mesa-19.0.1/src/mesa/main/context.h --- mesa-18.3.3/src/mesa/main/context.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/context.h 2019-03-31 23:16:37.000000000 +0000 @@ -337,6 +337,64 @@ } +static inline bool +_mesa_has_integer_textures(const struct gl_context *ctx) +{ + return _mesa_has_EXT_texture_integer(ctx) || _mesa_is_gles3(ctx); +} + +static inline bool +_mesa_has_half_float_textures(const struct gl_context *ctx) +{ + return _mesa_has_ARB_texture_float(ctx) || + _mesa_has_OES_texture_half_float(ctx) || _mesa_is_gles3(ctx); +} + +static inline bool +_mesa_has_float_textures(const struct gl_context *ctx) +{ + return _mesa_has_ARB_texture_float(ctx) || + _mesa_has_OES_texture_float(ctx) || _mesa_is_gles3(ctx); + } + +static inline bool +_mesa_has_texture_rgb10_a2ui(const struct gl_context *ctx) +{ + return _mesa_has_ARB_texture_rgb10_a2ui(ctx) || _mesa_is_gles3(ctx); +} + +static inline bool +_mesa_has_float_depth_buffer(const struct gl_context *ctx) +{ + return _mesa_has_ARB_depth_buffer_float(ctx) || _mesa_is_gles3(ctx); +} + +static inline bool +_mesa_has_packed_float(const struct gl_context *ctx) +{ + return _mesa_has_EXT_packed_float(ctx) || _mesa_is_gles3(ctx); +} + +static inline bool +_mesa_has_rg_textures(const struct gl_context *ctx) +{ + return _mesa_has_ARB_texture_rg(ctx) || _mesa_has_EXT_texture_rg(ctx) || + _mesa_is_gles3(ctx); +} + +static inline bool +_mesa_has_texture_shared_exponent(const struct gl_context *ctx) +{ + return _mesa_has_EXT_texture_shared_exponent(ctx) || _mesa_is_gles3(ctx); +} + +static inline bool +_mesa_has_texture_type_2_10_10_10_REV(const struct gl_context *ctx) +{ + return _mesa_is_desktop_gl(ctx) || + _mesa_has_EXT_texture_type_2_10_10_10_REV(ctx); +} + /** * Checks if the context supports geometry shaders. */ @@ -361,7 +419,7 @@ /** * Checks if the context supports tessellation. */ -static inline GLboolean +static inline bool _mesa_has_tessellation(const struct gl_context *ctx) { /* _mesa_has_EXT_tessellation_shader(ctx) is redundant with the OES diff -Nru mesa-18.3.3/src/mesa/main/dd.h mesa-19.0.1/src/mesa/main/dd.h --- mesa-18.3.3/src/mesa/main/dd.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/dd.h 2019-03-31 23:16:37.000000000 +0000 @@ -784,9 +784,8 @@ GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); - void (*DiscardFramebuffer)(struct gl_context *ctx, - GLenum target, GLsizei numAttachments, - const GLenum *attachments); + void (*DiscardFramebuffer)(struct gl_context *ctx, struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att); /** * \name Functions for GL_ARB_sample_locations diff -Nru mesa-18.3.3/src/mesa/main/dlist.c mesa-19.0.1/src/mesa/main/dlist.c --- mesa-18.3.3/src/mesa/main/dlist.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/dlist.c 2019-03-31 23:16:37.000000000 +0000 @@ -962,6 +962,8 @@ dlist->Name = name; dlist->Head = malloc(sizeof(Node) * count); dlist->Head[0].opcode = OPCODE_END_OF_LIST; + /* All InstSize[] entries must be non-zero */ + InstSize[OPCODE_END_OF_LIST] = 1; return dlist; } diff -Nru mesa-18.3.3/src/mesa/main/draw.c mesa-19.0.1/src/mesa/main/draw.c --- mesa-18.3.3/src/mesa/main/draw.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -67,7 +67,7 @@ GLuint attrib, GLuint j) { const struct gl_array_attributes *array = &vao->VertexAttrib[attrib]; - if (array->Enabled) { + if (vao->Enabled & VERT_BIT(attrib)) { const struct gl_vertex_buffer_binding *binding = &vao->BufferBinding[array->BufferBindingIndex]; struct gl_buffer_object *bo = binding->BufferObj; @@ -82,18 +82,19 @@ data = ADD_POINTERS(_mesa_vertex_attrib_address(array, binding), bo->Mappings[MAP_INTERNAL].Pointer); } - switch (array->Type) { + switch (array->Format.Type) { case GL_FLOAT: { GLfloat *f = (GLfloat *) ((GLubyte *) data + binding->Stride * j); GLint k; - for (k = 0; k < array->Size; k++) { + for (k = 0; k < array->Format.Size; k++) { if (IS_INF_OR_NAN(f[k]) || f[k] >= 1.0e20F || f[k] <= -1.0e10F) { printf("Bad array data:\n"); printf(" Element[%u].%u = %f\n", j, k, f[k]); printf(" Array %u at %p\n", attrib, (void *) array); printf(" Type 0x%x, Size %d, Stride %d\n", - array->Type, array->Size, binding->Stride); + array->Format.Type, array->Format.Size, + binding->Stride); printf(" Address/offset %p in Buffer Object %u\n", array->Ptr, bo->Name); f[k] = 1.0F; /* XXX replace the bad value! */ @@ -117,7 +118,7 @@ GLuint attrib) { const struct gl_array_attributes *array = &vao->VertexAttrib[attrib]; - if (array->Enabled) { + if (vao->Enabled & VERT_BIT(attrib)) { const struct gl_vertex_buffer_binding *binding = &vao->BufferBinding[array->BufferBindingIndex]; struct gl_buffer_object *bo = binding->BufferObj; @@ -225,7 +226,7 @@ case API_OPENGLES: /* For OpenGL ES, only draw if we have vertex positions */ - if (!ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS].Enabled) + if (!(ctx->Array.VAO->Enabled & VERT_BIT_POS)) return true; break; @@ -252,8 +253,7 @@ /* Draw if we have vertex positions (GL_VERTEX_ARRAY or generic * array [0]). */ - return (!ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS].Enabled && - !ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC0].Enabled); + return !(ctx->Array.VAO->Enabled & (VERT_BIT_POS|VERT_BIT_GENERIC0)); } break; @@ -274,23 +274,22 @@ { const struct gl_vertex_array_object *vao = ctx->Array.VAO; - printf("_mesa_exec_DrawArrays(mode 0x%x, start %d, count %d):\n", + printf("_mesa_DrawArrays(mode 0x%x, start %d, count %d):\n", mode, start, count); - unsigned i; - for (i = 0; i < VERT_ATTRIB_MAX; ++i) { + GLbitfield mask = vao->Enabled; + while (mask) { + const gl_vert_attrib i = u_bit_scan(&mask); const struct gl_array_attributes *array = &vao->VertexAttrib[i]; - if (!array->Enabled) - continue; const struct gl_vertex_buffer_binding *binding = &vao->BufferBinding[array->BufferBindingIndex]; struct gl_buffer_object *bufObj = binding->BufferObj; - printf("attr %s: size %d stride %d enabled %d " + printf("attr %s: size %d stride %d " "ptr %p Bufobj %u\n", gl_vert_attrib_name((gl_vert_attrib) i), - array->Size, binding->Stride, array->Enabled, + array->Format.Size, binding->Stride, array->Ptr, bufObj->Name); if (_mesa_is_bufferobj(bufObj)) { @@ -301,7 +300,7 @@ _mesa_vertex_attrib_address(array, binding); unsigned multiplier; - switch (array->Type) { + switch (array->Format.Type) { case GL_DOUBLE: case GL_INT64_ARB: case GL_UNSIGNED_INT64_ARB: @@ -315,7 +314,7 @@ int *k = (int *) f; int i = 0; int n = (count - 1) * (binding->Stride / (4 * multiplier)) - + array->Size; + + array->Format.Size; if (n > 32) n = 32; printf(" Data at offset %d:\n", offset); @@ -538,8 +537,8 @@ /** * Called from glDrawArrays when in immediate mode (not display list mode). */ -static void GLAPIENTRY -_mesa_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) +void GLAPIENTRY +_mesa_DrawArrays(GLenum mode, GLint start, GLsizei count) { GET_CURRENT_CONTEXT(ctx); @@ -573,9 +572,9 @@ * Called from glDrawArraysInstanced when in immediate mode (not * display list mode). */ -static void GLAPIENTRY -_mesa_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, - GLsizei numInstances) +void GLAPIENTRY +_mesa_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, + GLsizei numInstances) { GET_CURRENT_CONTEXT(ctx); @@ -859,10 +858,10 @@ /** * Called by glDrawRangeElementsBaseVertex() in immediate mode. */ -static void GLAPIENTRY -_mesa_exec_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end, - GLsizei count, GLenum type, - const GLvoid * indices, GLint basevertex) +void GLAPIENTRY +_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid * indices, GLint basevertex) { static GLuint warnCount = 0; GLboolean index_bounds_valid = GL_TRUE; @@ -958,9 +957,9 @@ /** * Called by glDrawRangeElements() in immediate mode. */ -static void GLAPIENTRY -_mesa_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end, - GLsizei count, GLenum type, const GLvoid * indices) +void GLAPIENTRY +_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, + GLsizei count, GLenum type, const GLvoid * indices) { if (MESA_VERBOSE & VERBOSE_DRAW) { GET_CURRENT_CONTEXT(ctx); @@ -970,17 +969,17 @@ _mesa_enum_to_string(type), indices); } - _mesa_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, - indices, 0); + _mesa_DrawRangeElementsBaseVertex(mode, start, end, count, type, + indices, 0); } /** * Called by glDrawElements() in immediate mode. */ -static void GLAPIENTRY -_mesa_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, - const GLvoid * indices) +void GLAPIENTRY +_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid * indices) { GET_CURRENT_CONTEXT(ctx); @@ -1009,9 +1008,9 @@ /** * Called by glDrawElementsBaseVertex() in immediate mode. */ -static void GLAPIENTRY -_mesa_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, - const GLvoid * indices, GLint basevertex) +void GLAPIENTRY +_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, + const GLvoid * indices, GLint basevertex) { GET_CURRENT_CONTEXT(ctx); @@ -1321,10 +1320,9 @@ } -static void GLAPIENTRY -_mesa_exec_MultiDrawElements(GLenum mode, - const GLsizei *count, GLenum type, - const GLvoid * const *indices, GLsizei primcount) +void GLAPIENTRY +_mesa_MultiDrawElements(GLenum mode, const GLsizei *count, GLenum type, + const GLvoid * const *indices, GLsizei primcount) { GET_CURRENT_CONTEXT(ctx); @@ -1344,12 +1342,12 @@ } -static void GLAPIENTRY -_mesa_exec_MultiDrawElementsBaseVertex(GLenum mode, - const GLsizei *count, GLenum type, - const GLvoid * const *indices, - GLsizei primcount, - const GLsizei *basevertex) +void GLAPIENTRY +_mesa_MultiDrawElementsBaseVertex(GLenum mode, + const GLsizei *count, GLenum type, + const GLvoid * const *indices, + GLsizei primcount, + const GLsizei *basevertex) { GET_CURRENT_CONTEXT(ctx); @@ -1444,8 +1442,8 @@ * glVertexPointer, glColorPointer, etc. * Part of GL_ARB_transform_feedback2. */ -static void GLAPIENTRY -_mesa_exec_DrawTransformFeedback(GLenum mode, GLuint name) +void GLAPIENTRY +_mesa_DrawTransformFeedback(GLenum mode, GLuint name) { GET_CURRENT_CONTEXT(ctx); struct gl_transform_feedback_object *obj = @@ -1997,15 +1995,15 @@ _mesa_initialize_exec_dispatch(const struct gl_context *ctx, struct _glapi_table *exec) { - SET_DrawArrays(exec, _mesa_exec_DrawArrays); - SET_DrawElements(exec, _mesa_exec_DrawElements); + SET_DrawArrays(exec, _mesa_DrawArrays); + SET_DrawElements(exec, _mesa_DrawElements); if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) { - SET_DrawRangeElements(exec, _mesa_exec_DrawRangeElements); + SET_DrawRangeElements(exec, _mesa_DrawRangeElements); } SET_MultiDrawArrays(exec, _mesa_exec_MultiDrawArrays); - SET_MultiDrawElementsEXT(exec, _mesa_exec_MultiDrawElements); + SET_MultiDrawElementsEXT(exec, _mesa_MultiDrawElements); if (ctx->API == API_OPENGL_COMPAT) { SET_Rectf(exec, _mesa_exec_Rectf); @@ -2015,13 +2013,13 @@ if (ctx->API != API_OPENGLES && ctx->Extensions.ARB_draw_elements_base_vertex) { - SET_DrawElementsBaseVertex(exec, _mesa_exec_DrawElementsBaseVertex); + SET_DrawElementsBaseVertex(exec, _mesa_DrawElementsBaseVertex); SET_MultiDrawElementsBaseVertex(exec, - _mesa_exec_MultiDrawElementsBaseVertex); + _mesa_MultiDrawElementsBaseVertex); if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) { SET_DrawRangeElementsBaseVertex(exec, - _mesa_exec_DrawRangeElementsBaseVertex); + _mesa_DrawRangeElementsBaseVertex); SET_DrawElementsInstancedBaseVertex(exec, _mesa_exec_DrawElementsInstancedBaseVertex); } @@ -2042,12 +2040,12 @@ } if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) { - SET_DrawArraysInstancedARB(exec, _mesa_exec_DrawArraysInstanced); + SET_DrawArraysInstancedARB(exec, _mesa_DrawArraysInstanced); SET_DrawElementsInstancedARB(exec, _mesa_exec_DrawElementsInstanced); } if (_mesa_is_desktop_gl(ctx)) { - SET_DrawTransformFeedback(exec, _mesa_exec_DrawTransformFeedback); + SET_DrawTransformFeedback(exec, _mesa_DrawTransformFeedback); SET_DrawTransformFeedbackStream(exec, _mesa_exec_DrawTransformFeedbackStream); SET_DrawTransformFeedbackInstanced(exec, @@ -2065,87 +2063,6 @@ -/** - * The following functions are only used for OpenGL ES 1/2 support. - * And some aren't even supported (yet) in ES 1/2. - */ - - -void GLAPIENTRY -_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count) -{ - _mesa_exec_DrawArrays(mode, first, count); -} - - -void GLAPIENTRY -_mesa_DrawArraysInstanced(GLenum mode, GLint first, GLsizei count, - GLsizei primcount) -{ - _mesa_exec_DrawArraysInstanced(mode, first, count, primcount); -} - - -void GLAPIENTRY -_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices) -{ - _mesa_exec_DrawElements(mode, count, type, indices); -} - - -void GLAPIENTRY -_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices, GLint basevertex) -{ - _mesa_exec_DrawElementsBaseVertex(mode, count, type, indices, basevertex); -} - - -void GLAPIENTRY -_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, - GLenum type, const GLvoid * indices) -{ - _mesa_exec_DrawRangeElements(mode, start, end, count, type, indices); -} - - -void GLAPIENTRY -_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end, - GLsizei count, GLenum type, - const GLvoid *indices, GLint basevertex) -{ - _mesa_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, - indices, basevertex); -} - - -void GLAPIENTRY -_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type, - const GLvoid ** indices, GLsizei primcount) -{ - _mesa_exec_MultiDrawElements(mode, count, type, indices, primcount); -} - - -void GLAPIENTRY -_mesa_MultiDrawElementsBaseVertex(GLenum mode, - const GLsizei *count, GLenum type, - const GLvoid **indices, GLsizei primcount, - const GLint *basevertex) -{ - _mesa_exec_MultiDrawElementsBaseVertex(mode, count, type, indices, - primcount, basevertex); -} - - -void GLAPIENTRY -_mesa_DrawTransformFeedback(GLenum mode, GLuint name) -{ - _mesa_exec_DrawTransformFeedback(mode, name); -} - - /* GL_IBM_multimode_draw_arrays */ void GLAPIENTRY _mesa_MultiModeDrawArraysIBM( const GLenum * mode, const GLint * first, diff -Nru mesa-18.3.3/src/mesa/main/draw.h mesa-19.0.1/src/mesa/main/draw.h --- mesa-18.3.3/src/mesa/main/draw.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/draw.h 2019-03-31 23:16:37.000000000 +0000 @@ -129,14 +129,14 @@ void GLAPIENTRY -_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type, - const GLvoid **indices, GLsizei primcount); +_mesa_MultiDrawElements(GLenum mode, const GLsizei *count, GLenum type, + const GLvoid *const *indices, GLsizei primcount); void GLAPIENTRY _mesa_MultiDrawElementsBaseVertex(GLenum mode, const GLsizei *count, GLenum type, - const GLvoid **indices, GLsizei primcount, + const GLvoid * const * indices, GLsizei primcount, const GLint *basevertex); diff -Nru mesa-18.3.3/src/mesa/main/draw_validate.c mesa-19.0.1/src/mesa/main/draw_validate.c --- mesa-18.3.3/src/mesa/main/draw_validate.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/draw_validate.c 2019-03-31 23:16:37.000000000 +0000 @@ -1100,7 +1100,7 @@ * buffer bound. */ if (_mesa_is_gles31(ctx) && - ctx->Array.VAO->_Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) { + ctx->Array.VAO->Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name); return GL_FALSE; } diff -Nru mesa-18.3.3/src/mesa/main/enable.c mesa-19.0.1/src/mesa/main/enable.c --- mesa-18.3.3/src/mesa/main/enable.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/enable.c 2019-03-31 23:16:37.000000000 +0000 @@ -1125,8 +1125,6 @@ /* GL3.0 - GL_framebuffer_sRGB */ case GL_FRAMEBUFFER_SRGB_EXT: - if (!_mesa_is_desktop_gl(ctx)) - goto invalid_enum_error; CHECK_EXTENSION(EXT_framebuffer_sRGB, cap); _mesa_set_framebuffer_srgb(ctx, state); return; @@ -1582,41 +1580,40 @@ case GL_VERTEX_ARRAY: if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES) goto invalid_enum_error; - return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS].Enabled; + return !!(ctx->Array.VAO->Enabled & VERT_BIT_POS); case GL_NORMAL_ARRAY: if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES) goto invalid_enum_error; - return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled; + return !!(ctx->Array.VAO->Enabled & VERT_BIT_NORMAL); case GL_COLOR_ARRAY: if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES) goto invalid_enum_error; - return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled; + return !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR0); case GL_INDEX_ARRAY: if (ctx->API != API_OPENGL_COMPAT) goto invalid_enum_error; - return ctx->Array.VAO-> - VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled; + return !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR_INDEX); case GL_TEXTURE_COORD_ARRAY: if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES) goto invalid_enum_error; - return ctx->Array.VAO-> - VertexAttrib[VERT_ATTRIB_TEX(ctx->Array.ActiveTexture)].Enabled; + return !!(ctx->Array.VAO->Enabled & + VERT_BIT_TEX(ctx->Array.ActiveTexture)); case GL_EDGE_FLAG_ARRAY: if (ctx->API != API_OPENGL_COMPAT) goto invalid_enum_error; - return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled; + return !!(ctx->Array.VAO->Enabled & VERT_BIT_EDGEFLAG); case GL_FOG_COORDINATE_ARRAY_EXT: if (ctx->API != API_OPENGL_COMPAT) goto invalid_enum_error; - return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_FOG].Enabled; + return !!(ctx->Array.VAO->Enabled & VERT_BIT_FOG); case GL_SECONDARY_COLOR_ARRAY_EXT: if (ctx->API != API_OPENGL_COMPAT) goto invalid_enum_error; - return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled; + return !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR1); case GL_POINT_SIZE_ARRAY_OES: if (ctx->API != API_OPENGLES) goto invalid_enum_error; - return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled; + return !!(ctx->Array.VAO->Enabled & VERT_BIT_POINT_SIZE); /* GL_ARB_texture_cube_map */ case GL_TEXTURE_CUBE_MAP: @@ -1765,8 +1762,6 @@ /* GL3.0 - GL_framebuffer_sRGB */ case GL_FRAMEBUFFER_SRGB_EXT: - if (!_mesa_is_desktop_gl(ctx)) - goto invalid_enum_error; CHECK_EXTENSION(EXT_framebuffer_sRGB); return ctx->Color.sRGBEnabled; diff -Nru mesa-18.3.3/src/mesa/main/errors.c mesa-19.0.1/src/mesa/main/errors.c --- mesa-18.3.3/src/mesa/main/errors.c 2018-01-06 23:02:18.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/errors.c 2019-03-31 23:16:37.000000000 +0000 @@ -231,6 +231,9 @@ _mesa_debug_get_id(id); len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args); + if (len >= MAX_DEBUG_MESSAGE_LENGTH) + /* message was truncated */ + len = MAX_DEBUG_MESSAGE_LENGTH - 1; _mesa_log_msg(ctx, source, type, *id, severity, len, s); } diff -Nru mesa-18.3.3/src/mesa/main/extensions_table.h mesa-19.0.1/src/mesa/main/extensions_table.h --- mesa-18.3.3/src/mesa/main/extensions_table.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/extensions_table.h 2019-03-31 23:16:37.000000000 +0000 @@ -20,6 +20,7 @@ EXT(AMD_seamless_cubemap_per_texture , AMD_seamless_cubemap_per_texture , GLL, GLC, x , x , 2009) EXT(AMD_shader_stencil_export , ARB_shader_stencil_export , GLL, GLC, x , x , 2009) EXT(AMD_shader_trinary_minmax , dummy_true , GLL, GLC, x , x , 2012) +EXT(AMD_texture_texture4 , ARB_texture_gather , GLL, GLC, x , x , 2008) EXT(AMD_vertex_shader_layer , AMD_vertex_shader_layer , GLL, GLC, x , x , 2012) EXT(AMD_vertex_shader_viewport_index , AMD_vertex_shader_viewport_index , GLL, GLC, x , x , 2012) @@ -241,7 +242,9 @@ EXT(EXT_memory_object , EXT_memory_object , GLL, GLC, x , ES2, 2017) EXT(EXT_memory_object_fd , EXT_memory_object_fd , GLL, GLC, x , ES2, 2017) EXT(EXT_multi_draw_arrays , dummy_true , GLL, x , ES1, ES2, 1999) -EXT(EXT_occlusion_query_boolean , ARB_occlusion_query , x , x , x , ES2, 2001) +EXT(EXT_multisampled_render_to_texture , EXT_multisampled_render_to_texture , x , x , x , ES2, 2016) +EXT(EXT_multisampled_render_to_texture2 , EXT_multisampled_render_to_texture , x , x , x , ES2, 2016) +EXT(EXT_occlusion_query_boolean , ARB_occlusion_query2 , x , x , x , ES2, 2011) EXT(EXT_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2005) EXT(EXT_packed_float , EXT_packed_float , GLL, GLC, x , x , 2004) EXT(EXT_packed_pixels , dummy_true , GLL, x , x , x , 1997) @@ -254,6 +257,7 @@ EXT(EXT_render_snorm , EXT_render_snorm , x , x , x, 31, 2014) EXT(EXT_rescale_normal , dummy_true , GLL, x , x , x , 1997) EXT(EXT_robustness , KHR_robustness , x, x, x , ES2, 2011) +EXT(EXT_sRGB_write_control , EXT_framebuffer_sRGB , x, x , x , 30, 2013) EXT(EXT_secondary_color , dummy_true , GLL, x , x , x , 1999) EXT(EXT_semaphore , EXT_semaphore , GLL, GLC, x , ES2, 2017) EXT(EXT_semaphore_fd , EXT_semaphore_fd , GLL, GLC, x , ES2, 2017) @@ -261,6 +265,7 @@ EXT(EXT_separate_specular_color , dummy_true , GLL, x , x , x , 1997) EXT(EXT_shader_framebuffer_fetch , EXT_shader_framebuffer_fetch , GLL, GLC, x , ES2, 2013) EXT(EXT_shader_framebuffer_fetch_non_coherent, EXT_shader_framebuffer_fetch_non_coherent, GLL, GLC, x, ES2, 2018) +EXT(EXT_shader_implicit_conversions , dummy_true , x , x , x , 31, 2013) EXT(EXT_shader_integer_mix , EXT_shader_integer_mix , GLL, GLC, x , 30, 2013) EXT(EXT_shader_io_blocks , dummy_true , x , x , x , 31, 2014) EXT(EXT_shader_samples_identical , EXT_shader_samples_identical , GLL, GLC, x , 31, 2015) @@ -275,9 +280,10 @@ EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006) EXT(EXT_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014) EXT(EXT_texture_buffer , OES_texture_buffer , x , x , x , 31, 2014) +EXT(EXT_texture_compression_bptc , ARB_texture_compression_bptc , x , x , x , 30, 2017) EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004) EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006) -EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004) +EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , 30, 2004) EXT(EXT_texture_compression_s3tc , EXT_texture_compression_s3tc , GLL, GLC, x , ES2, 2000) EXT(EXT_texture_cube_map , ARB_texture_cube_map , GLL, x , x , x , 2001) EXT(EXT_texture_cube_map_array , OES_texture_cube_map_array , x , x , x , 31, 2014) @@ -295,11 +301,13 @@ EXT(EXT_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2004) EXT(EXT_texture_rg , ARB_texture_rg , x , x , x , ES2, 2011) EXT(EXT_texture_sRGB , EXT_texture_sRGB , GLL, GLC, x , x , 2004) +EXT(EXT_texture_sRGB_R8 , EXT_texture_sRGB_R8 , x , x , x , 30, 2015) EXT(EXT_texture_sRGB_decode , EXT_texture_sRGB_decode , GLL, GLC, x , 30, 2006) EXT(EXT_texture_shared_exponent , EXT_texture_shared_exponent , GLL, GLC, x , x , 2004) EXT(EXT_texture_snorm , EXT_texture_snorm , GLL, GLC, x , x , 2009) EXT(EXT_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008) EXT(EXT_texture_type_2_10_10_10_REV , EXT_texture_type_2_10_10_10_REV , x , x , x , ES2, 2008) +EXT(EXT_texture_view , OES_texture_view , x , x , x , 31, 2014) EXT(EXT_timer_query , EXT_timer_query , GLL, GLC, x , x , 2006) EXT(EXT_transform_feedback , EXT_transform_feedback , GLL, GLC, x , x , 2011) EXT(EXT_unpack_subimage , dummy_true , x , x , x , ES2, 2011) diff -Nru mesa-18.3.3/src/mesa/main/fbobject.c mesa-19.0.1/src/mesa/main/fbobject.c --- mesa-18.3.3/src/mesa/main/fbobject.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/fbobject.c 2019-03-31 23:16:37.000000000 +0000 @@ -497,8 +497,8 @@ struct gl_framebuffer *fb, struct gl_renderbuffer_attachment *att, struct gl_texture_object *texObj, - GLenum texTarget, GLuint level, GLuint layer, - GLboolean layered) + GLenum texTarget, GLuint level, GLsizei samples, + GLuint layer, GLboolean layered) { struct gl_renderbuffer *rb = att->Renderbuffer; @@ -520,6 +520,7 @@ /* always update these fields */ att->TextureLevel = level; + att->NumSamples = samples; att->CubeMapFace = _mesa_tex_target_to_face(texTarget); att->Zoffset = layer; att->Layered = layered; @@ -750,6 +751,7 @@ case GL_SRGB8: case GL_RGB10: case GL_RGB9_E5: + case GL_SR8_EXT: return GL_FALSE; default: break; @@ -1001,6 +1003,7 @@ fb->_HasSNormOrFloatColorBuffer = GL_FALSE; fb->_HasAttachments = true; fb->_IntegerBuffers = 0; + fb->_RGBBuffers = 0; /* Start at -2 to more easily loop over all attachment points. * -2: depth buffer @@ -1084,8 +1087,11 @@ return; } - attNumSamples = texImg->NumSamples; - attNumStorageSamples = texImg->NumSamples; + if (att->NumSamples > 0) + attNumSamples = att->NumSamples; + else + attNumSamples = texImg->NumSamples; + attNumStorageSamples = attNumSamples; } else if (att->Type == GL_RENDERBUFFER_EXT) { minWidth = MIN2(minWidth, att->Renderbuffer->Width); @@ -1144,6 +1150,9 @@ if (_mesa_is_format_integer_color(attFormat)) fb->_IntegerBuffers |= (1 << i); + if (f == GL_RGB) + fb->_RGBBuffers |= (1 << i); + fb->_AllColorBuffersFixedPoint = fb->_AllColorBuffersFixedPoint && (type == GL_UNSIGNED_NORMALIZED || type == GL_SIGNED_NORMALIZED); @@ -3497,7 +3506,8 @@ GLenum attachment, struct gl_renderbuffer_attachment *att, struct gl_texture_object *texObj, GLenum textarget, - GLint level, GLuint layer, GLboolean layered) + GLint level, GLsizei samples, + GLuint layer, GLboolean layered) { FLUSH_VERTICES(ctx, _NEW_BUFFERS); @@ -3508,6 +3518,7 @@ level == fb->Attachment[BUFFER_STENCIL].TextureLevel && _mesa_tex_target_to_face(textarget) == fb->Attachment[BUFFER_STENCIL].CubeMapFace && + samples == fb->Attachment[BUFFER_STENCIL].NumSamples && layer == fb->Attachment[BUFFER_STENCIL].Zoffset) { /* The texture object is already attached to the stencil attachment * point. Don't create a new renderbuffer; just reuse the stencil @@ -3521,13 +3532,14 @@ level == fb->Attachment[BUFFER_DEPTH].TextureLevel && _mesa_tex_target_to_face(textarget) == fb->Attachment[BUFFER_DEPTH].CubeMapFace && + samples == fb->Attachment[BUFFER_DEPTH].NumSamples && layer == fb->Attachment[BUFFER_DEPTH].Zoffset) { /* As above, but with depth and stencil transposed. */ reuse_framebuffer_texture_attachment(fb, BUFFER_STENCIL, BUFFER_DEPTH); } else { set_texture_attachment(ctx, fb, att, texObj, textarget, - level, layer, layered); + level, samples, layer, layered); if (attachment == GL_DEPTH_STENCIL_ATTACHMENT) { /* Above we created a new renderbuffer and attached it to the @@ -3582,15 +3594,15 @@ get_attachment(ctx, fb, attachment, NULL); _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget, - level, layer, GL_FALSE); + level, 0, layer, GL_FALSE); } static void framebuffer_texture_with_dims(int dims, GLenum target, GLenum attachment, GLenum textarget, - GLuint texture, GLint level, GLint layer, - const char *caller) + GLuint texture, GLint level, GLsizei samples, + GLint layer, const char *caller) { GET_CURRENT_CONTEXT(ctx); struct gl_framebuffer *fb; @@ -3625,7 +3637,7 @@ return; _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget, - level, layer, GL_FALSE); + level, samples, layer, GL_FALSE); } @@ -3644,7 +3656,7 @@ GLenum textarget, GLuint texture, GLint level) { framebuffer_texture_with_dims(1, target, attachment, textarget, texture, - level, 0, "glFramebufferTexture1D"); + level, 0, 0, "glFramebufferTexture1D"); } @@ -3663,7 +3675,17 @@ GLenum textarget, GLuint texture, GLint level) { framebuffer_texture_with_dims(2, target, attachment, textarget, texture, - level, 0, "glFramebufferTexture2D"); + level, 0, 0, "glFramebufferTexture2D"); +} + + +void GLAPIENTRY +_mesa_FramebufferTexture2DMultisampleEXT(GLenum target, GLenum attachment, + GLenum textarget, GLuint texture, + GLint level, GLsizei samples) +{ + framebuffer_texture_with_dims(2, target, attachment, textarget, texture, + level, samples, 0, "glFramebufferTexture2DMultisampleEXT"); } @@ -3683,7 +3705,7 @@ GLint level, GLint layer) { framebuffer_texture_with_dims(3, target, attachment, textarget, texture, - level, layer, "glFramebufferTexture3D"); + level, 0, layer, "glFramebufferTexture3D"); } @@ -3773,7 +3795,7 @@ } _mesa_framebuffer_texture(ctx, fb, attachment, att, texObj, textarget, - level, layer, layered); + level, 0, layer, layered); } void GLAPIENTRY @@ -4252,7 +4274,7 @@ } } else { - if (ctx->Extensions.EXT_framebuffer_sRGB) { + if (ctx->Extensions.EXT_sRGB) { *params = _mesa_get_format_color_encoding(att->Renderbuffer->Format); } @@ -4356,6 +4378,18 @@ goto invalid_pname_enum; } return; + case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_SAMPLES_EXT: + if (!ctx->Extensions.EXT_multisampled_render_to_texture) { + goto invalid_pname_enum; + } else if (att->Type == GL_TEXTURE) { + *params = att->NumSamples; + } else if (att->Type == GL_NONE) { + _mesa_error(ctx, err, "%s(invalid pname %s)", caller, + _mesa_enum_to_string(pname)); + } else { + goto invalid_pname_enum; + } + return; default: goto invalid_pname_enum; } @@ -4607,6 +4641,86 @@ return; } +static struct gl_renderbuffer_attachment * +get_fb_attachment(struct gl_context *ctx, struct gl_framebuffer *fb, + const GLenum attachment) +{ + switch (attachment) { + case GL_COLOR: + return &fb->Attachment[BUFFER_BACK_LEFT]; + case GL_COLOR_ATTACHMENT0: + case GL_COLOR_ATTACHMENT1: + case GL_COLOR_ATTACHMENT2: + case GL_COLOR_ATTACHMENT3: + case GL_COLOR_ATTACHMENT4: + case GL_COLOR_ATTACHMENT5: + case GL_COLOR_ATTACHMENT6: + case GL_COLOR_ATTACHMENT7: + case GL_COLOR_ATTACHMENT8: + case GL_COLOR_ATTACHMENT9: + case GL_COLOR_ATTACHMENT10: + case GL_COLOR_ATTACHMENT11: + case GL_COLOR_ATTACHMENT12: + case GL_COLOR_ATTACHMENT13: + case GL_COLOR_ATTACHMENT14: + case GL_COLOR_ATTACHMENT15: { + const unsigned i = attachment - GL_COLOR_ATTACHMENT0; + if (i >= ctx->Const.MaxColorAttachments) + return NULL; + return &fb->Attachment[BUFFER_COLOR0 + i]; + } + case GL_DEPTH: + case GL_DEPTH_ATTACHMENT: + case GL_DEPTH_STENCIL_ATTACHMENT: + return &fb->Attachment[BUFFER_DEPTH]; + case GL_STENCIL: + case GL_STENCIL_ATTACHMENT: + return &fb->Attachment[BUFFER_STENCIL]; + default: + return NULL; + } +} + +static void +discard_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLsizei numAttachments, const GLenum *attachments) +{ + if (!ctx->Driver.DiscardFramebuffer) + return; + + for (int i = 0; i < numAttachments; i++) { + struct gl_renderbuffer_attachment *att = + get_fb_attachment(ctx, fb, attachments[i]); + + if (!att) + continue; + + /* If we're asked to invalidate just depth or just stencil, but the + * attachment is packed depth/stencil, then we can only use + * Driver.DiscardFramebuffer if the attachments list includes both depth + * and stencil and they both point at the same renderbuffer. + */ + if ((attachments[i] == GL_DEPTH_ATTACHMENT || + attachments[i] == GL_STENCIL_ATTACHMENT) && + (!att->Renderbuffer || + att->Renderbuffer->_BaseFormat == GL_DEPTH_STENCIL)) { + GLenum other_format = (attachments[i] == GL_DEPTH_ATTACHMENT ? + GL_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT); + bool has_both = false; + for (int j = 0; j < numAttachments; j++) { + if (attachments[j] == other_format) + has_both = true; + break; + } + + if (fb->Attachment[BUFFER_DEPTH].Renderbuffer != + fb->Attachment[BUFFER_STENCIL].Renderbuffer || !has_both) + continue; + } + + ctx->Driver.DiscardFramebuffer(ctx, fb, att); + } +} void GLAPIENTRY _mesa_InvalidateSubFramebuffer_no_error(GLenum target, GLsizei numAttachments, @@ -4667,12 +4781,18 @@ "glInvalidateNamedFramebufferSubData"); } - void GLAPIENTRY _mesa_InvalidateFramebuffer_no_error(GLenum target, GLsizei numAttachments, const GLenum *attachments) { - /* no-op */ + struct gl_framebuffer *fb; + GET_CURRENT_CONTEXT(ctx); + + fb = get_framebuffer_target(ctx, target); + if (!fb) + return; + + discard_framebuffer(ctx, fb, numAttachments, attachments); } @@ -4708,6 +4828,8 @@ ctx->Const.MaxViewportWidth, ctx->Const.MaxViewportHeight, "glInvalidateFramebuffer"); + + discard_framebuffer(ctx, fb, numAttachments, attachments); } @@ -4794,8 +4916,7 @@ } } - if (ctx->Driver.DiscardFramebuffer) - ctx->Driver.DiscardFramebuffer(ctx, target, numAttachments, attachments); + discard_framebuffer(ctx, fb, numAttachments, attachments); return; diff -Nru mesa-18.3.3/src/mesa/main/fbobject.h mesa-19.0.1/src/mesa/main/fbobject.h --- mesa-18.3.3/src/mesa/main/fbobject.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/fbobject.h 2019-03-31 23:16:37.000000000 +0000 @@ -129,7 +129,8 @@ GLenum attachment, struct gl_renderbuffer_attachment *att, struct gl_texture_object *texObj, GLenum textarget, - GLint level, GLuint layer, GLboolean layered); + GLint level, GLsizei samples, + GLuint layer, GLboolean layered); extern GLenum _mesa_check_framebuffer_status(struct gl_context *ctx, @@ -250,6 +251,11 @@ _mesa_FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +void GLAPIENTRY +_mesa_FramebufferTexture2DMultisampleEXT(GLenum target, GLenum attachment, + GLenum textarget, GLuint texture, + GLint level, GLsizei samples); + extern void GLAPIENTRY _mesa_FramebufferTexture3D_no_error(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, diff -Nru mesa-18.3.3/src/mesa/main/formatquery.c mesa-19.0.1/src/mesa/main/formatquery.c --- mesa-18.3.3/src/mesa/main/formatquery.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/formatquery.c 2019-03-31 23:16:37.000000000 +0000 @@ -1241,7 +1241,7 @@ break; case GL_SRGB_WRITE: - if (!_mesa_has_EXT_framebuffer_sRGB(ctx) || + if (!ctx->Extensions.EXT_sRGB || !_mesa_is_color_format(internalformat)) { goto end; } diff -Nru mesa-18.3.3/src/mesa/main/formats.c mesa-19.0.1/src/mesa/main/formats.c --- mesa-18.3.3/src/mesa/main/formats.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/formats.c 2019-03-31 23:16:37.000000000 +0000 @@ -1108,6 +1108,7 @@ *comps = 4; return; case MESA_FORMAT_L_SRGB8: + case MESA_FORMAT_R_SRGB8: *datatype = GL_UNSIGNED_BYTE; *comps = 1; return; @@ -1670,6 +1671,7 @@ (type == GL_UNSIGNED_SHORT_8_8_REV_MESA && littleEndian != swapBytes)); case MESA_FORMAT_R_UNORM8: + case MESA_FORMAT_R_SRGB8: return format == GL_RED && type == GL_UNSIGNED_BYTE; case MESA_FORMAT_R8G8_UNORM: return format == GL_RG && type == GL_UNSIGNED_BYTE && littleEndian; diff -Nru mesa-18.3.3/src/mesa/main/formats.csv mesa-19.0.1/src/mesa/main/formats.csv --- mesa-18.3.3/src/mesa/main/formats.csv 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/formats.csv 2019-03-31 23:16:37.000000000 +0000 @@ -158,6 +158,7 @@ MESA_FORMAT_A8L8_SRGB , packed, 1, 1, 1, un8 , un8 , , , yyyx, srgb # Array sRGB formats +MESA_FORMAT_R_SRGB8 , array , 1, 1, 1, un8 , , , , x001, srgb MESA_FORMAT_L_SRGB8 , array , 1, 1, 1, un8 , , , , xxx1, srgb MESA_FORMAT_BGR_SRGB8 , array , 1, 1, 1, un8 , un8 , un8 , , zyx1, srgb diff -Nru mesa-18.3.3/src/mesa/main/formats.h mesa-19.0.1/src/mesa/main/formats.h --- mesa-18.3.3/src/mesa/main/formats.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/formats.h 2019-03-31 23:16:37.000000000 +0000 @@ -440,6 +440,7 @@ MESA_FORMAT_X8B8G8R8_SRGB, /* RRRR RRRR GGGG GGGG BBBB BBBB xxxx xxxx */ MESA_FORMAT_L8A8_SRGB, /* AAAA AAAA LLLL LLLL */ MESA_FORMAT_A8L8_SRGB, /* LLLL LLLL AAAA AAAA */ + MESA_FORMAT_R_SRGB8, /* RRRR RRRR */ /* Array sRGB formats */ MESA_FORMAT_L_SRGB8, /* ubyte[i] = L */ diff -Nru mesa-18.3.3/src/mesa/main/framebuffer.c mesa-19.0.1/src/mesa/main/framebuffer.c --- mesa-18.3.3/src/mesa/main/framebuffer.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/framebuffer.c 2019-03-31 23:16:37.000000000 +0000 @@ -459,7 +459,7 @@ fb->Visual.rgbBits = fb->Visual.redBits + fb->Visual.greenBits + fb->Visual.blueBits; if (_mesa_get_format_color_encoding(fmt) == GL_SRGB) - fb->Visual.sRGBCapable = ctx->Extensions.EXT_framebuffer_sRGB; + fb->Visual.sRGBCapable = ctx->Extensions.EXT_sRGB; break; } } diff -Nru mesa-18.3.3/src/mesa/main/get.c mesa-19.0.1/src/mesa/main/get.c --- mesa-18.3.3/src/mesa/main/get.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/get.c 2019-03-31 23:16:37.000000000 +0000 @@ -727,14 +727,50 @@ v->value_matrix = ctx->TextureMatrixStack[unit].Top; break; + case GL_VERTEX_ARRAY: + v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_POS); + break; + case GL_NORMAL_ARRAY: + v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_NORMAL); + break; + case GL_COLOR_ARRAY: + v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR0); + break; case GL_TEXTURE_COORD_ARRAY: - case GL_TEXTURE_COORD_ARRAY_SIZE: + v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_TEX(ctx->Array.ActiveTexture)); + break; + case GL_INDEX_ARRAY: + v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR_INDEX); + break; + case GL_EDGE_FLAG_ARRAY: + v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_EDGEFLAG); + break; + case GL_SECONDARY_COLOR_ARRAY: + v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_COLOR1); + break; + case GL_FOG_COORDINATE_ARRAY: + v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_FOG); + break; + case GL_POINT_SIZE_ARRAY_OES: + v->value_bool = !!(ctx->Array.VAO->Enabled & VERT_BIT_POINT_SIZE); + break; + case GL_TEXTURE_COORD_ARRAY_TYPE: case GL_TEXTURE_COORD_ARRAY_STRIDE: array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_TEX(ctx->Array.ActiveTexture)]; v->value_int = *(GLuint *) ((char *) array + d->offset); break; + case GL_TEXTURE_COORD_ARRAY_SIZE: + array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_TEX(ctx->Array.ActiveTexture)]; + v->value_int = array->Format.Size; + break; + + case GL_VERTEX_ARRAY_SIZE: + array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS]; + v->value_int = array->Format.Size; + break; + case GL_ACTIVE_TEXTURE_ARB: v->value_int = GL_TEXTURE0_ARB + ctx->Texture.CurrentUnit; break; @@ -870,6 +906,9 @@ break; /* GL_EXT_external_objects */ + case GL_NUM_DEVICE_UUIDS_EXT: + v->value_int = 1; + break; case GL_DRIVER_UUID_EXT: _mesa_get_driver_uuid(ctx, v->value_int_4); break; @@ -942,11 +981,11 @@ /* ARB_vertex_array_bgra */ case GL_COLOR_ARRAY_SIZE: array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_COLOR0]; - v->value_int = array->Format == GL_BGRA ? GL_BGRA : array->Size; + v->value_int = array->Format.Format == GL_BGRA ? GL_BGRA : array->Format.Size; break; case GL_SECONDARY_COLOR_ARRAY_SIZE: array = &ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_COLOR1]; - v->value_int = array->Format == GL_BGRA ? GL_BGRA : array->Size; + v->value_int = array->Format.Format == GL_BGRA ? GL_BGRA : array->Format.Size; break; /* ARB_copy_buffer */ diff -Nru mesa-18.3.3/src/mesa/main/get_hash_generator.py mesa-19.0.1/src/mesa/main/get_hash_generator.py --- mesa-18.3.3/src/mesa/main/get_hash_generator.py 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/get_hash_generator.py 2019-03-31 23:16:37.000000000 +0000 @@ -30,15 +30,14 @@ from __future__ import print_function -import os, sys, imp, getopt +import os, sys, getopt from collections import defaultdict import get_hash_params -cur_dir = os.path.dirname(sys.argv[0]) -param_desc_file = "%s/get_hash_params.py" % cur_dir +param_desc_file = os.path.join(os.path.dirname(__file__), "get_hash_params.py") -GLAPI = "%s/../../mapi/glapi/gen" % cur_dir -sys.path.append(GLAPI) +GLAPI = os.path.join(os.path.dirname(__file__), "..", "..", "mapi", "glapi", "gen") +sys.path.insert(0, GLAPI) import gl_XML prime_factor = 89 diff -Nru mesa-18.3.3/src/mesa/main/get_hash_params.py mesa-19.0.1/src/mesa/main/get_hash_params.py --- mesa-18.3.3/src/mesa/main/get_hash_params.py 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/get_hash_params.py 2019-03-31 23:16:37.000000000 +0000 @@ -211,20 +211,20 @@ [ "TEXTURE_2D", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], [ "TEXTURE_MATRIX", "LOC_CUSTOM, TYPE_MATRIX, 0, extra_valid_texture_unit" ], [ "TEXTURE_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, extra_valid_texture_unit" ], - [ "VERTEX_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_POS].Enabled), NO_EXTRA" ], - [ "VERTEX_ARRAY_SIZE", "ARRAY_UBYTE(VertexAttrib[VERT_ATTRIB_POS].Size), NO_EXTRA" ], - [ "VERTEX_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_POS].Type), NO_EXTRA" ], + [ "VERTEX_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], + [ "VERTEX_ARRAY_SIZE", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], + [ "VERTEX_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_POS].Format.Type), NO_EXTRA" ], [ "VERTEX_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_POS].Stride), NO_EXTRA" ], - [ "NORMAL_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_NORMAL].Enabled), NO_EXTRA" ], - [ "NORMAL_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_NORMAL].Type), NO_EXTRA" ], + [ "NORMAL_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], + [ "NORMAL_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_NORMAL].Format.Type), NO_EXTRA" ], [ "NORMAL_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_NORMAL].Stride), NO_EXTRA" ], - [ "COLOR_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_COLOR0].Enabled), NO_EXTRA" ], + [ "COLOR_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], [ "COLOR_ARRAY_SIZE", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], - [ "COLOR_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR0].Type), NO_EXTRA" ], + [ "COLOR_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR0].Format.Type), NO_EXTRA" ], [ "COLOR_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_COLOR0].Stride), NO_EXTRA" ], - [ "TEXTURE_COORD_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, offsetof(struct gl_array_attributes, Enabled), NO_EXTRA" ], - [ "TEXTURE_COORD_ARRAY_SIZE", "LOC_CUSTOM, TYPE_UBYTE, offsetof(struct gl_array_attributes, Size), NO_EXTRA" ], - [ "TEXTURE_COORD_ARRAY_TYPE", "LOC_CUSTOM, TYPE_ENUM16, offsetof(struct gl_array_attributes, Type), NO_EXTRA" ], + [ "TEXTURE_COORD_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], + [ "TEXTURE_COORD_ARRAY_SIZE", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], + [ "TEXTURE_COORD_ARRAY_TYPE", "LOC_CUSTOM, TYPE_ENUM16, offsetof(struct gl_array_attributes, Format.Type), NO_EXTRA" ], [ "TEXTURE_COORD_ARRAY_STRIDE", "LOC_CUSTOM, TYPE_SHORT, offsetof(struct gl_array_attributes, Stride), NO_EXTRA" ], # GL_ARB_multitexture @@ -253,8 +253,8 @@ { "apis": ["GLES"], "params": [ # OES_point_size_array - [ "POINT_SIZE_ARRAY_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled, TYPE_BOOLEAN), NO_EXTRA" ], - [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Type, TYPE_ENUM16), NO_EXTRA" ], + [ "POINT_SIZE_ARRAY_OES", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], + [ "POINT_SIZE_ARRAY_TYPE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Format.Type, TYPE_ENUM16), NO_EXTRA" ], [ "POINT_SIZE_ARRAY_STRIDE_OES", "ARRAY_FIELD(VertexAttrib[VERT_ATTRIB_POINT_SIZE].Stride, TYPE_SHORT), NO_EXTRA" ], [ "POINT_SIZE_ARRAY_BUFFER_BINDING_OES", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], ]}, @@ -463,6 +463,9 @@ [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5_or_OES_sample_variables" ], [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5_or_OES_sample_variables" ], [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5_or_OES_sample_variables" ], + +# GL_EXT_framebuffer_EXT / GLES 3.0 + EXT_sRGB_write_control + [ "FRAMEBUFFER_SRGB_EXT", "CONTEXT_BOOL(Color.sRGBEnabled), extra_EXT_framebuffer_sRGB" ], ]}, { "apis": ["GLES", "GLES2"], "params": [ @@ -793,12 +796,12 @@ [ "VERTEX_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ], [ "NORMAL_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ], [ "COLOR_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ], - [ "INDEX_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled), NO_EXTRA" ], - [ "INDEX_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Type), NO_EXTRA" ], + [ "INDEX_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], + [ "INDEX_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Format.Type), NO_EXTRA" ], [ "INDEX_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Stride), NO_EXTRA" ], [ "INDEX_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ], [ "TEXTURE_COORD_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ], - [ "EDGE_FLAG_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled), NO_EXTRA" ], + [ "EDGE_FLAG_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], [ "EDGE_FLAG_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_EDGEFLAG].Stride), NO_EXTRA" ], [ "EDGE_FLAG_ARRAY_COUNT_EXT", "CONST(0), NO_EXTRA" ], @@ -827,15 +830,15 @@ # GL_EXT_secondary_color [ "COLOR_SUM", "CONTEXT_BOOL(Fog.ColorSumEnabled), NO_EXTRA" ], [ "CURRENT_SECONDARY_COLOR", "CONTEXT_FIELD(Current.Attrib[VERT_ATTRIB_COLOR1][0], TYPE_FLOATN_4), extra_flush_current" ], - [ "SECONDARY_COLOR_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_COLOR1].Enabled), NO_EXTRA" ], - [ "SECONDARY_COLOR_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR1].Type), NO_EXTRA" ], + [ "SECONDARY_COLOR_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], + [ "SECONDARY_COLOR_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_COLOR1].Format.Type), NO_EXTRA" ], [ "SECONDARY_COLOR_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_COLOR1].Stride), NO_EXTRA" ], [ "SECONDARY_COLOR_ARRAY_SIZE", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], # GL_EXT_fog_coord [ "CURRENT_FOG_COORDINATE", "CONTEXT_FLOAT(Current.Attrib[VERT_ATTRIB_FOG][0]), extra_flush_current" ], - [ "FOG_COORDINATE_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_FOG].Enabled), NO_EXTRA" ], - [ "FOG_COORDINATE_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_FOG].Type), NO_EXTRA" ], + [ "FOG_COORDINATE_ARRAY", "LOC_CUSTOM, TYPE_BOOLEAN, 0, NO_EXTRA" ], + [ "FOG_COORDINATE_ARRAY_TYPE", "ARRAY_ENUM16(VertexAttrib[VERT_ATTRIB_FOG].Format.Type), NO_EXTRA" ], [ "FOG_COORDINATE_ARRAY_STRIDE", "ARRAY_SHORT(VertexAttrib[VERT_ATTRIB_FOG].Stride), NO_EXTRA" ], [ "FOG_COORDINATE_SOURCE", "CONTEXT_ENUM16(Fog.FogCoordinateSource), NO_EXTRA" ], @@ -934,7 +937,6 @@ [ "RGBA_FLOAT_MODE_ARB", "BUFFER_FIELD(Visual.floatMode, TYPE_BOOLEAN), extra_core_ARB_color_buffer_float_and_new_buffers" ], # GL3.0 / GL_EXT_framebuffer_sRGB - [ "FRAMEBUFFER_SRGB_EXT", "CONTEXT_BOOL(Color.sRGBEnabled), extra_EXT_framebuffer_sRGB" ], [ "FRAMEBUFFER_SRGB_CAPABLE_EXT", "BUFFER_INT(Visual.sRGBCapable), extra_EXT_framebuffer_sRGB_and_new_buffers" ], # GL 3.1 diff -Nru mesa-18.3.3/src/mesa/main/glformats.c mesa-19.0.1/src/mesa/main/glformats.c --- mesa-18.3.3/src/mesa/main/glformats.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/glformats.c 2019-03-31 23:16:37.000000000 +0000 @@ -1352,11 +1352,9 @@ case GL_RGB4_S3TC: case GL_RGBA_S3TC: case GL_RGBA4_S3TC: - return _mesa_is_desktop_gl(ctx) && - ctx->Extensions.ANGLE_texture_compression_dxt; + return _mesa_has_S3_s3tc(ctx); case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI: - return ctx->API == API_OPENGL_COMPAT - && ctx->Extensions.ATI_texture_compression_3dc; + return _mesa_has_ATI_texture_compression_3dc(ctx); case GL_PALETTE4_RGB8_OES: case GL_PALETTE4_RGBA8_OES: case GL_PALETTE4_R5_G6_B5_OES: @@ -1373,34 +1371,27 @@ switch (_mesa_get_format_layout(m_format)) { case MESA_FORMAT_LAYOUT_S3TC: if (_mesa_get_format_color_encoding(m_format) == GL_LINEAR) { - /* Assume that the ANGLE flag will always be set if the - * EXT flag is set. - */ - return ctx->Extensions.ANGLE_texture_compression_dxt; + return _mesa_has_EXT_texture_compression_s3tc(ctx); } else { - return _mesa_is_desktop_gl(ctx) - && ctx->Extensions.EXT_texture_sRGB - && ctx->Extensions.EXT_texture_compression_s3tc; + return _mesa_has_EXT_texture_sRGB(ctx) && + _mesa_has_EXT_texture_compression_s3tc(ctx); } case MESA_FORMAT_LAYOUT_FXT1: - return _mesa_is_desktop_gl(ctx) - && ctx->Extensions.TDFX_texture_compression_FXT1; + return _mesa_has_3DFX_texture_compression_FXT1(ctx); case MESA_FORMAT_LAYOUT_RGTC: - return _mesa_is_desktop_gl(ctx) - && ctx->Extensions.ARB_texture_compression_rgtc; + return _mesa_has_ARB_texture_compression_rgtc(ctx) || + _mesa_has_EXT_texture_compression_rgtc(ctx); case MESA_FORMAT_LAYOUT_LATC: - return ctx->API == API_OPENGL_COMPAT - && ctx->Extensions.EXT_texture_compression_latc; + return _mesa_has_EXT_texture_compression_latc(ctx); case MESA_FORMAT_LAYOUT_ETC1: - return _mesa_is_gles(ctx) - && ctx->Extensions.OES_compressed_ETC1_RGB8_texture; + return _mesa_has_OES_compressed_ETC1_RGB8_texture(ctx); case MESA_FORMAT_LAYOUT_ETC2: - return _mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility; + return _mesa_is_gles3(ctx) || _mesa_has_ARB_ES3_compatibility(ctx); case MESA_FORMAT_LAYOUT_BPTC: - return _mesa_is_desktop_gl(ctx) && - ctx->Extensions.ARB_texture_compression_bptc; + return _mesa_has_ARB_texture_compression_bptc(ctx) || + _mesa_has_EXT_texture_compression_bptc(ctx); case MESA_FORMAT_LAYOUT_ASTC: - return ctx->Extensions.KHR_texture_compression_astc_ldr; + return _mesa_has_KHR_texture_compression_astc_ldr(ctx); default: return GL_FALSE; } @@ -1811,7 +1802,7 @@ break; /* OK */ } if (format == GL_RGB_INTEGER_EXT && - ctx->Extensions.ARB_texture_rgb10_a2ui) { + _mesa_has_texture_rgb10_a2ui(ctx)) { break; /* OK */ } return GL_INVALID_OPERATION; @@ -1826,7 +1817,7 @@ break; /* OK */ } if ((format == GL_RGBA_INTEGER_EXT || format == GL_BGRA_INTEGER_EXT) && - ctx->Extensions.ARB_texture_rgb10_a2ui) { + _mesa_has_texture_rgb10_a2ui(ctx)) { break; /* OK */ } return GL_INVALID_OPERATION; @@ -1840,7 +1831,7 @@ break; /* OK */ } if ((format == GL_RGBA_INTEGER_EXT || format == GL_BGRA_INTEGER_EXT) && - ctx->Extensions.ARB_texture_rgb10_a2ui) { + _mesa_has_texture_rgb10_a2ui(ctx)) { break; /* OK */ } if (type == GL_UNSIGNED_INT_2_10_10_10_REV && format == GL_RGB && @@ -1860,7 +1851,7 @@ return GL_NO_ERROR; case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: - if (!ctx->Extensions.ARB_depth_buffer_float) { + if (!_mesa_has_float_depth_buffer(ctx)) { return GL_INVALID_ENUM; } if (format != GL_DEPTH_STENCIL) { @@ -1869,7 +1860,7 @@ return GL_NO_ERROR; case GL_UNSIGNED_INT_10F_11F_11F_REV: - if (!ctx->Extensions.EXT_packed_float) { + if (!_mesa_has_packed_float(ctx)) { return GL_INVALID_ENUM; } if (format != GL_RGB) { @@ -1887,7 +1878,7 @@ return GL_NO_ERROR; case GL_RG: case GL_RED: - if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_texture_rg) + if (_mesa_has_rg_textures(ctx)) return GL_NO_ERROR; default: return GL_INVALID_OPERATION; @@ -1941,8 +1932,8 @@ } case GL_RG: - if (!ctx->Extensions.ARB_texture_rg) - return GL_INVALID_ENUM; + if (!_mesa_has_rg_textures(ctx)) + return GL_INVALID_ENUM; switch (type) { case GL_BYTE: case GL_UNSIGNED_BYTE: @@ -1977,10 +1968,10 @@ return (ctx->API == API_OPENGLES2) ? GL_NO_ERROR : GL_INVALID_ENUM; case GL_UNSIGNED_INT_5_9_9_9_REV: - return ctx->Extensions.EXT_texture_shared_exponent + return _mesa_has_texture_shared_exponent(ctx) ? GL_NO_ERROR : GL_INVALID_ENUM; case GL_UNSIGNED_INT_10F_11F_11F_REV: - return ctx->Extensions.EXT_packed_float + return _mesa_has_packed_float(ctx) ? GL_NO_ERROR : GL_INVALID_ENUM; default: return GL_INVALID_ENUM; @@ -2048,7 +2039,7 @@ } case GL_YCBCR_MESA: - if (!ctx->Extensions.MESA_ycbcr_texture) + if (!_mesa_has_MESA_ycbcr_texture(ctx)) return GL_INVALID_ENUM; if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_SHORT_8_8_REV_MESA) @@ -2059,7 +2050,7 @@ case GL_DEPTH_STENCIL: if (type == GL_UNSIGNED_INT_24_8) return GL_NO_ERROR; - else if (ctx->Extensions.ARB_depth_buffer_float && + else if (_mesa_has_float_depth_buffer(ctx) && type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) return GL_NO_ERROR; else @@ -2078,8 +2069,7 @@ case GL_UNSIGNED_SHORT: case GL_INT: case GL_UNSIGNED_INT: - return (ctx->Version >= 30 || - ctx->Extensions.EXT_texture_integer) + return _mesa_has_integer_textures(ctx) ? GL_NO_ERROR : GL_INVALID_ENUM; default: return GL_INVALID_ENUM; @@ -2093,14 +2083,13 @@ case GL_UNSIGNED_SHORT: case GL_INT: case GL_UNSIGNED_INT: - return (ctx->Version >= 30 || - ctx->Extensions.EXT_texture_integer) + return _mesa_has_integer_textures(ctx) ? GL_NO_ERROR : GL_INVALID_ENUM; case GL_UNSIGNED_BYTE_3_3_2: case GL_UNSIGNED_BYTE_2_3_3_REV: case GL_UNSIGNED_SHORT_5_6_5: case GL_UNSIGNED_SHORT_5_6_5_REV: - return ctx->Extensions.ARB_texture_rgb10_a2ui + return _mesa_has_texture_rgb10_a2ui(ctx) ? GL_NO_ERROR : GL_INVALID_ENUM; default: return GL_INVALID_ENUM; @@ -2115,8 +2104,7 @@ case GL_INT: case GL_UNSIGNED_INT: /* NOTE: no packed formats w/ BGR format */ - return (ctx->Version >= 30 || - ctx->Extensions.EXT_texture_integer) + return _mesa_has_integer_textures(ctx) ? GL_NO_ERROR : GL_INVALID_ENUM; default: return GL_INVALID_ENUM; @@ -2131,8 +2119,7 @@ case GL_UNSIGNED_SHORT: case GL_INT: case GL_UNSIGNED_INT: - return (ctx->Version >= 30 || - ctx->Extensions.EXT_texture_integer) + return _mesa_has_integer_textures(ctx) ? GL_NO_ERROR : GL_INVALID_ENUM; case GL_UNSIGNED_SHORT_4_4_4_4: case GL_UNSIGNED_SHORT_4_4_4_4_REV: @@ -2142,7 +2129,7 @@ case GL_UNSIGNED_INT_8_8_8_8_REV: case GL_UNSIGNED_INT_10_10_10_2: case GL_UNSIGNED_INT_2_10_10_10_REV: - return ctx->Extensions.ARB_texture_rgb10_a2ui + return _mesa_has_texture_rgb10_a2ui(ctx) ? GL_NO_ERROR : GL_INVALID_ENUM; default: return GL_INVALID_ENUM; @@ -2157,7 +2144,7 @@ case GL_UNSIGNED_SHORT: case GL_INT: case GL_UNSIGNED_INT: - return ctx->Extensions.EXT_texture_integer + return _mesa_has_integer_textures(ctx) ? GL_NO_ERROR : GL_INVALID_ENUM; default: return GL_INVALID_ENUM; @@ -2185,7 +2172,7 @@ switch (format) { case GL_RED: case GL_RG: - if (ctx->API == API_OPENGLES || !ctx->Extensions.ARB_texture_rg) + if (!_mesa_has_rg_textures(ctx)) return GL_INVALID_VALUE; /* fallthrough */ case GL_ALPHA: @@ -2209,7 +2196,7 @@ || type == GL_UNSIGNED_SHORT_5_5_5_1 || type == GL_FLOAT || type == GL_HALF_FLOAT_OES - || (ctx->Extensions.EXT_texture_type_2_10_10_10_REV && + || (_mesa_has_texture_type_2_10_10_10_REV(ctx) && type == GL_UNSIGNED_INT_2_10_10_10_REV)); break; @@ -2327,7 +2314,9 @@ } } - if (ctx->Extensions.ARB_ES2_compatibility) { + if (_mesa_has_ARB_ES2_compatibility(ctx) || + _mesa_has_OES_framebuffer_object(ctx) || + ctx->API == API_OPENGLES2) { switch (internalFormat) { case GL_RGB565: return GL_RGB; @@ -2336,7 +2325,8 @@ } } - if (ctx->Extensions.ARB_depth_texture) { + if (_mesa_has_ARB_depth_texture(ctx) || _mesa_has_OES_depth_texture(ctx) || + ctx->API == API_OPENGL_CORE) { switch (internalFormat) { case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT16: @@ -2351,7 +2341,8 @@ } } - if (ctx->Extensions.ARB_texture_stencil8) { + if (_mesa_has_ARB_texture_stencil8(ctx) || + _mesa_has_OES_texture_stencil8(ctx)) { switch (internalFormat) { case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1: @@ -2388,43 +2379,52 @@ return base_compressed; } - if ((ctx->Extensions.KHR_texture_compression_astc_ldr && + if ((_mesa_has_KHR_texture_compression_astc_ldr(ctx) && is_astc_2d_format(internalFormat)) || - (ctx->Extensions.OES_texture_compression_astc && + (_mesa_has_OES_texture_compression_astc(ctx) && is_astc_3d_format(internalFormat))) return GL_RGBA; - if (ctx->Extensions.MESA_ycbcr_texture) { + if (!_mesa_has_MESA_ycbcr_texture(ctx)) { if (internalFormat == GL_YCBCR_MESA) return GL_YCBCR_MESA; } - if (ctx->Extensions.ARB_texture_float) { + if (_mesa_has_half_float_textures(ctx)) { switch (internalFormat) { case GL_ALPHA16F_ARB: - case GL_ALPHA32F_ARB: return GL_ALPHA; case GL_RGBA16F_ARB: - case GL_RGBA32F_ARB: return GL_RGBA; case GL_RGB16F_ARB: - case GL_RGB32F_ARB: return GL_RGB; case GL_INTENSITY16F_ARB: - case GL_INTENSITY32F_ARB: return GL_INTENSITY; case GL_LUMINANCE16F_ARB: - case GL_LUMINANCE32F_ARB: return GL_LUMINANCE; case GL_LUMINANCE_ALPHA16F_ARB: + return GL_LUMINANCE_ALPHA; + } + } + + if (_mesa_has_float_textures(ctx)) { + switch (internalFormat) { + case GL_ALPHA32F_ARB: + return GL_ALPHA; + case GL_RGBA32F_ARB: + return GL_RGBA; + case GL_RGB32F_ARB: + return GL_RGB; + case GL_INTENSITY32F_ARB: + return GL_INTENSITY; + case GL_LUMINANCE32F_ARB: + return GL_LUMINANCE; case GL_LUMINANCE_ALPHA32F_ARB: return GL_LUMINANCE_ALPHA; - default: - ; /* fallthrough */ } } - if (ctx->Extensions.EXT_texture_snorm) { + if (_mesa_has_EXT_texture_snorm(ctx) || _mesa_is_gles3(ctx)) { switch (internalFormat) { case GL_RED_SNORM: case GL_R8_SNORM: @@ -2463,7 +2463,7 @@ } } - if (ctx->Extensions.EXT_texture_sRGB) { + if (_mesa_has_EXT_texture_sRGB(ctx) || _mesa_is_gles3(ctx)) { switch (internalFormat) { case GL_SRGB_EXT: case GL_SRGB8_EXT: @@ -2486,8 +2486,16 @@ } } - if (ctx->Version >= 30 || - ctx->Extensions.EXT_texture_integer) { + if (_mesa_has_EXT_texture_sRGB_R8(ctx)) { + switch (internalFormat) { + case GL_SR8_EXT: + return GL_RED; + default: + ; /* fallthrough */ + } + } + + if (_mesa_has_integer_textures(ctx)) { switch (internalFormat) { case GL_RGBA8UI_EXT: case GL_RGBA16UI_EXT: @@ -2506,14 +2514,14 @@ } } - if (ctx->Extensions.ARB_texture_rgb10_a2ui) { + if (_mesa_has_texture_rgb10_a2ui(ctx)) { switch (internalFormat) { case GL_RGB10_A2UI: return GL_RGBA; } } - if (ctx->Extensions.EXT_texture_integer) { + if (_mesa_has_integer_textures(ctx)) { switch (internalFormat) { case GL_ALPHA8UI_EXT: case GL_ALPHA16UI_EXT: @@ -2548,12 +2556,15 @@ } } - if (ctx->Extensions.ARB_texture_rg) { + if (_mesa_has_rg_textures(ctx)) { switch (internalFormat) { case GL_R16F: + if (!_mesa_has_half_float_textures(ctx)) + break; + return GL_RED; case GL_R32F: - if (!ctx->Extensions.ARB_texture_float) - break; + if (!_mesa_has_float_textures(ctx)) + break; return GL_RED; case GL_R8I: case GL_R8UI: @@ -2561,9 +2572,9 @@ case GL_R16UI: case GL_R32I: case GL_R32UI: - if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer) - break; - /* FALLTHROUGH */ + if (!_mesa_has_integer_textures(ctx)) + break; + /* FALLTHROUGH */ case GL_R8: case GL_R16: case GL_RED: @@ -2571,9 +2582,12 @@ return GL_RED; case GL_RG16F: + if (!_mesa_has_half_float_textures(ctx)) + break; + return GL_RG; case GL_RG32F: - if (!ctx->Extensions.ARB_texture_float) - break; + if (!_mesa_has_float_textures(ctx)) + break; return GL_RG; case GL_RG8I: case GL_RG8UI: @@ -2581,9 +2595,9 @@ case GL_RG16UI: case GL_RG32I: case GL_RG32UI: - if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer) - break; - /* FALLTHROUGH */ + if (!_mesa_has_integer_textures(ctx)) + break; + /* FALLTHROUGH */ case GL_RG: case GL_RG8: case GL_RG16: @@ -2594,7 +2608,7 @@ } } - if (ctx->Extensions.EXT_texture_shared_exponent) { + if (_mesa_has_texture_shared_exponent(ctx)) { switch (internalFormat) { case GL_RGB9_E5_EXT: return GL_RGB; @@ -2603,7 +2617,7 @@ } } - if (ctx->Extensions.EXT_packed_float) { + if (_mesa_has_packed_float(ctx)) { switch (internalFormat) { case GL_R11F_G11F_B10F_EXT: return GL_RGB; @@ -2612,7 +2626,7 @@ } } - if (ctx->Extensions.ARB_depth_buffer_float) { + if (_mesa_has_float_depth_buffer(ctx)) { switch (internalFormat) { case GL_DEPTH_COMPONENT32F: return GL_DEPTH_COMPONENT; @@ -2644,8 +2658,8 @@ * \param type the texture type */ static GLenum -_mesa_es3_effective_internal_format_for_format_and_type(GLenum format, - GLenum type) +gles_effective_internal_format_for_format_and_type(GLenum format, + GLenum type) { switch (type) { case GL_UNSIGNED_BYTE: @@ -2758,9 +2772,9 @@ * \return error code, or GL_NO_ERROR. */ GLenum -_mesa_es3_error_check_format_and_type(const struct gl_context *ctx, - GLenum format, GLenum type, - GLenum internalFormat) +_mesa_gles_error_check_format_and_type(const struct gl_context *ctx, + GLenum format, GLenum type, + GLenum internalFormat) { /* If internalFormat is an unsized format, then the effective internal * format derived from format and type should be used instead. Page 127, @@ -2778,7 +2792,7 @@ */ if (_mesa_is_enum_format_unsized(internalFormat)) { GLenum effectiveInternalFormat = - _mesa_es3_effective_internal_format_for_format_and_type(format, type); + gles_effective_internal_format_for_format_and_type(format, type); if (effectiveInternalFormat == GL_NONE) return GL_INVALID_OPERATION; @@ -2806,7 +2820,7 @@ /* The GLES variant of EXT_texture_compression_s3tc is very vague and * doesn't list valid types. Just do exactly what the spec says. */ - if (ctx->Extensions.EXT_texture_compression_s3tc && + if (_mesa_has_EXT_texture_compression_s3tc(ctx) && (internalFormat == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || internalFormat == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT || internalFormat == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT || @@ -2833,6 +2847,11 @@ if (ctx->Version <= 20) return GL_INVALID_OPERATION; break; + case GL_COMPRESSED_RGBA_BPTC_UNORM: + case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM: + if (!_mesa_has_EXT_texture_compression_bptc(ctx)) + return GL_INVALID_OPERATION; + break; default: return GL_INVALID_OPERATION; } @@ -2879,7 +2898,7 @@ case GL_RGBA: case GL_RGB10_A2: case GL_RGB5_A1: - if (!ctx->Extensions.EXT_texture_type_2_10_10_10_REV) + if (!_mesa_has_texture_type_2_10_10_10_REV(ctx)) return GL_INVALID_OPERATION; break; default: @@ -2900,7 +2919,7 @@ return GL_INVALID_OPERATION; break; case GL_RGBA: - if (ctx->Extensions.OES_texture_float && internalFormat == format) + if (_mesa_has_OES_texture_float(ctx) && internalFormat == format) break; default: return GL_INVALID_OPERATION; @@ -2908,7 +2927,7 @@ break; case GL_HALF_FLOAT_OES: - if (ctx->Extensions.OES_texture_half_float && internalFormat == format) + if (_mesa_has_OES_texture_half_float(ctx) && internalFormat == format) break; default: return GL_INVALID_OPERATION; @@ -3035,15 +3054,20 @@ return GL_INVALID_OPERATION; break; case GL_RGB: - if (ctx->Extensions.OES_texture_float && internalFormat == format) + if (_mesa_has_OES_texture_float(ctx) && internalFormat == format) break; + case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT: + case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT: + if (!_mesa_has_EXT_texture_compression_bptc(ctx)) + return GL_INVALID_OPERATION; + break; default: return GL_INVALID_OPERATION; } break; case GL_HALF_FLOAT_OES: - if (!ctx->Extensions.OES_texture_half_float || internalFormat != format) + if (!_mesa_has_OES_texture_half_float(ctx) || internalFormat != format) return GL_INVALID_OPERATION; break; @@ -3057,7 +3081,7 @@ * GLES3 doesn't, and GL_OES_required_internalformat extends that * to allow the sized RGB internalformats as well. */ - if (!ctx->Extensions.EXT_texture_type_2_10_10_10_REV) + if (!_mesa_has_texture_type_2_10_10_10_REV(ctx)) return GL_INVALID_OPERATION; break; default: @@ -3110,16 +3134,20 @@ break; case GL_RG: - if (!ctx->Extensions.ARB_texture_rg) + if (!_mesa_has_rg_textures(ctx)) return GL_INVALID_OPERATION; switch (type) { case GL_UNSIGNED_BYTE: - if (internalFormat != GL_RG8) + if (internalFormat != GL_RG8 && + (!_mesa_has_EXT_texture_compression_rgtc(ctx) || + internalFormat != GL_COMPRESSED_RED_GREEN_RGTC2_EXT)) return GL_INVALID_OPERATION; break; case GL_BYTE: - if (internalFormat != GL_RG8_SNORM) + if (internalFormat != GL_RG8_SNORM && + (!_mesa_has_EXT_texture_compression_rgtc(ctx) || + internalFormat != GL_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT)) return GL_INVALID_OPERATION; break; @@ -3142,8 +3170,8 @@ return GL_INVALID_OPERATION; break; case GL_RG: - if (ctx->Extensions.ARB_texture_rg && - ctx->Extensions.OES_texture_half_float) + if (_mesa_has_rg_textures(ctx) && + _mesa_has_OES_texture_half_float(ctx)) break; /* fallthrough */ default: @@ -3157,8 +3185,8 @@ case GL_RG32F: break; case GL_RG: - if (ctx->Extensions.ARB_texture_rg && - ctx->Extensions.OES_texture_float) + if (_mesa_has_rg_textures(ctx) && + _mesa_has_OES_texture_float(ctx)) break; /* fallthrough */ default: @@ -3211,16 +3239,22 @@ break; case GL_RED: - if (!ctx->Extensions.ARB_texture_rg) + if (!_mesa_has_rg_textures(ctx)) return GL_INVALID_OPERATION; switch (type) { case GL_UNSIGNED_BYTE: - if (internalFormat != GL_R8) - return GL_INVALID_OPERATION; - break; + if (internalFormat == GL_R8 || + ((internalFormat == GL_SR8_EXT) && + _mesa_has_EXT_texture_sRGB_R8(ctx)) || + (internalFormat == GL_COMPRESSED_RED_RGTC1_EXT && + _mesa_has_EXT_texture_compression_rgtc(ctx))) + break; + return GL_INVALID_OPERATION; case GL_BYTE: - if (internalFormat != GL_R8_SNORM) + if (internalFormat != GL_R8_SNORM && + (!_mesa_has_EXT_texture_compression_rgtc(ctx) || + internalFormat != GL_COMPRESSED_SIGNED_RED_RGTC1_EXT)) return GL_INVALID_OPERATION; break; @@ -3244,8 +3278,8 @@ break; case GL_RG: case GL_RED: - if (ctx->Extensions.ARB_texture_rg && - ctx->Extensions.OES_texture_half_float) + if (_mesa_has_rg_textures(ctx) && + _mesa_has_OES_texture_half_float(ctx)) break; /* fallthrough */ default: @@ -3259,8 +3293,8 @@ case GL_R32F: break; case GL_RED: - if (ctx->Extensions.ARB_texture_rg && - ctx->Extensions.OES_texture_float) + if (_mesa_has_rg_textures(ctx) && + _mesa_has_OES_texture_float(ctx)) break; /* fallthrough */ default: @@ -3372,11 +3406,11 @@ case GL_LUMINANCE_ALPHA: switch (type) { case GL_FLOAT: - if (!ctx->Extensions.OES_texture_float || internalFormat != format) + if (!_mesa_has_OES_texture_float(ctx) || internalFormat != format) return GL_INVALID_OPERATION; break; case GL_HALF_FLOAT_OES: - if (!ctx->Extensions.OES_texture_half_float || internalFormat != format) + if (!_mesa_has_OES_texture_half_float(ctx) || internalFormat != format) return GL_INVALID_OPERATION; break; case GL_UNSIGNED_BYTE: @@ -3844,7 +3878,7 @@ * internal formats to base internal formats ... and use cases ...'') * for the R32F, RG32F, RGB32F, and RGBA32F formats." */ - return ctx->Extensions.OES_texture_float_linear; + return _mesa_has_OES_texture_float_linear(ctx); default: return false; } diff -Nru mesa-18.3.3/src/mesa/main/glformats.h mesa-19.0.1/src/mesa/main/glformats.h --- mesa-18.3.3/src/mesa/main/glformats.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/glformats.h 2019-03-31 23:16:37.000000000 +0000 @@ -138,9 +138,9 @@ unsigned dimensions); extern GLenum -_mesa_es3_error_check_format_and_type(const struct gl_context *ctx, - GLenum format, GLenum type, - GLenum internalFormat); +_mesa_gles_error_check_format_and_type(const struct gl_context *ctx, + GLenum format, GLenum type, + GLenum internalFormat); extern GLint _mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat ); diff -Nru mesa-18.3.3/src/mesa/main/glheader.h mesa-19.0.1/src/mesa/main/glheader.h --- mesa-18.3.3/src/mesa/main/glheader.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/glheader.h 2019-03-31 23:16:37.000000000 +0000 @@ -138,6 +138,9 @@ #define GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI 0x8837 #endif +#ifndef GL_EXT_texture_sRGB_R8 +#define GL_SR8_EXT 0x8FBD +#endif /** * Internal token to represent a GLSL shader program (a collection of @@ -148,6 +151,9 @@ */ #define GL_SHADER_PROGRAM_MESA 0x9999 +#ifndef GL_EXT_multisampled_render_to_texture +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_SAMPLES_EXT 0x8D6C +#endif #ifdef __cplusplus } diff -Nru mesa-18.3.3/src/mesa/main/glspirv.c mesa-19.0.1/src/mesa/main/glspirv.c --- mesa-18.3.3/src/mesa/main/glspirv.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/glspirv.c 2019-03-31 23:16:37.000000000 +0000 @@ -212,6 +212,7 @@ const struct spirv_to_nir_options spirv_options = { .lower_workgroup_access_to_offsets = true, + .lower_ubo_ssbo_access_to_offsets = true, .caps = ctx->Const.SpirVCapabilities }; @@ -242,10 +243,10 @@ * inline functions. That way they get properly initialized at the top * of the function and not at the top of its caller. */ - NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local); + NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_function_temp); NIR_PASS_V(nir, nir_lower_returns); NIR_PASS_V(nir, nir_inline_functions); - NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_deref); /* Pick off the single entrypoint that we want */ foreach_list_typed_safe(nir_function, func, node, &nir->functions) { diff -Nru mesa-18.3.3/src/mesa/main/glthread.c mesa-19.0.1/src/mesa/main/glthread.c --- mesa-18.3.3/src/mesa/main/glthread.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/glthread.c 2019-03-31 23:16:37.000000000 +0000 @@ -121,11 +121,11 @@ free(glthread); ctx->GLThread = NULL; - _mesa_glthread_restore_dispatch(ctx); + _mesa_glthread_restore_dispatch(ctx, "destroy"); } void -_mesa_glthread_restore_dispatch(struct gl_context *ctx) +_mesa_glthread_restore_dispatch(struct gl_context *ctx, const char *func) { /* Remove ourselves from the dispatch table except if another ctx/thread * already installed a new dispatch table. @@ -136,6 +136,9 @@ if (_glapi_get_dispatch() == ctx->MarshalExec) { ctx->CurrentClientDispatch = ctx->CurrentServerDispatch; _glapi_set_dispatch(ctx->CurrentClientDispatch); +#if 0 + printf("glthread disabled: %s\n", func); +#endif } } diff -Nru mesa-18.3.3/src/mesa/main/glthread.h mesa-19.0.1/src/mesa/main/glthread.h --- mesa-18.3.3/src/mesa/main/glthread.h 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/glthread.h 2019-03-31 23:16:37.000000000 +0000 @@ -99,7 +99,7 @@ void _mesa_glthread_init(struct gl_context *ctx); void _mesa_glthread_destroy(struct gl_context *ctx); -void _mesa_glthread_restore_dispatch(struct gl_context *ctx); +void _mesa_glthread_restore_dispatch(struct gl_context *ctx, const char *func); void _mesa_glthread_flush_batch(struct gl_context *ctx); void _mesa_glthread_finish(struct gl_context *ctx); diff -Nru mesa-18.3.3/src/mesa/main/marshal.c mesa-19.0.1/src/mesa/main/marshal.c --- mesa-18.3.3/src/mesa/main/marshal.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/marshal.c 2019-03-31 23:16:37.000000000 +0000 @@ -89,7 +89,7 @@ if (cap == GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB) { _mesa_glthread_finish(ctx); - _mesa_glthread_restore_dispatch(ctx); + _mesa_glthread_restore_dispatch(ctx, "Enable(DEBUG_OUTPUT_SYNCHRONOUS)"); } else { cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_Enable, sizeof(*cmd)); diff -Nru mesa-18.3.3/src/mesa/main/mtypes.h mesa-19.0.1/src/mesa/main/mtypes.h --- mesa-18.3.3/src/mesa/main/mtypes.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/mtypes.h 2019-03-31 23:16:37.000000000 +0000 @@ -457,6 +457,21 @@ /** + * Vertex format to describe a vertex element. + */ +struct gl_vertex_format +{ + GLenum16 Type; /**< datatype: GL_FLOAT, GL_INT, etc */ + GLenum16 Format; /**< default: GL_RGBA, but may be GL_BGRA */ + GLubyte Size:5; /**< components per element (1,2,3,4) */ + GLubyte Normalized:1; /**< GL_ARB_vertex_program */ + GLubyte Integer:1; /**< Integer-valued? */ + GLubyte Doubles:1; /**< double values are not converted to floats */ + GLubyte _ElementSize; /**< Size of each element in bytes */ +}; + + +/** * Current attribute group (GL_CURRENT_BIT). */ struct gl_current_attrib @@ -1419,17 +1434,12 @@ const GLubyte *Ptr; /** Offset of the first element relative to the binding offset */ GLuint RelativeOffset; - GLshort Stride; /**< Stride as specified with gl*Pointer() */ - GLenum16 Type; /**< Datatype: GL_FLOAT, GL_INT, etc */ - GLenum16 Format; /**< Default: GL_RGBA, but may be GL_BGRA */ - GLboolean Enabled; /**< Whether the array is enabled */ - GLubyte Size; /**< Components per element (1,2,3,4) */ - unsigned Normalized:1; /**< Fixed-point values are normalized when converted to floats */ - unsigned Integer:1; /**< Fixed-point values are not converted to floats */ - unsigned Doubles:1; /**< double precision values are not converted to floats */ - unsigned _ElementSize:8; /**< Size of each element in bytes */ + /** Vertex format */ + struct gl_vertex_format Format; + /** Stride as specified with gl*Pointer() */ + GLshort Stride; /** Index into gl_vertex_array_object::BufferBinding[] array */ - unsigned BufferBindingIndex:6; + GLubyte BufferBindingIndex; /** * Derived effective buffer binding index @@ -1444,7 +1454,7 @@ * Note that _mesa_update_vao_derived_arrays is called when binding * the VAO to Array._DrawVAO. */ - unsigned _EffBufferBindingIndex:6; + GLubyte _EffBufferBindingIndex; /** * Derived effective relative offset. * @@ -1538,7 +1548,7 @@ GLbitfield VertexAttribBufferMask; /** Mask of VERT_BIT_* values indicating which arrays are enabled */ - GLbitfield _Enabled; + GLbitfield Enabled; /** * Mask of VERT_BIT_* enabled arrays past position/generic0 mapping @@ -2566,8 +2576,7 @@ { COMPILE_FAILURE = 0, COMPILE_SUCCESS, - COMPILE_SKIPPED, - COMPILED_NO_OPTS + COMPILE_SKIPPED }; /** @@ -3404,6 +3413,7 @@ */ struct gl_texture_object *Texture; GLuint TextureLevel; /**< Attached mipmap level. */ + GLsizei NumSamples; /**< from FramebufferTexture2DMultisampleEXT */ GLuint CubeMapFace; /**< 0 .. 5, for cube map textures. */ GLuint Zoffset; /**< Slice for 3D textures, or layer for both 1D * and 2D array textures */ @@ -3495,6 +3505,7 @@ bool _HasAttachments; GLbitfield _IntegerBuffers; /**< Which color buffers are integer valued */ + GLbitfield _RGBBuffers; /**< Which color buffers have baseformat == RGB */ /* ARB_color_buffer_float */ GLboolean _AllColorBuffersFixedPoint; /* no integer, no float */ @@ -4244,6 +4255,7 @@ GLboolean EXT_gpu_shader4; GLboolean EXT_memory_object; GLboolean EXT_memory_object_fd; + GLboolean EXT_multisampled_render_to_texture; GLboolean EXT_packed_float; GLboolean EXT_pixel_buffer_object; GLboolean EXT_point_parameters; @@ -4253,6 +4265,7 @@ GLboolean EXT_semaphore_fd; GLboolean EXT_shader_integer_mix; GLboolean EXT_shader_samples_identical; + GLboolean EXT_sRGB; GLboolean EXT_stencil_two_side; GLboolean EXT_texture_array; GLboolean EXT_texture_compression_latc; @@ -4264,6 +4277,7 @@ GLboolean EXT_texture_shared_exponent; GLboolean EXT_texture_snorm; GLboolean EXT_texture_sRGB; + GLboolean EXT_texture_sRGB_R8; GLboolean EXT_texture_sRGB_decode; GLboolean EXT_texture_swizzle; GLboolean EXT_texture_type_2_10_10_10_REV; diff -Nru mesa-18.3.3/src/mesa/main/queryobj.c mesa-19.0.1/src/mesa/main/queryobj.c --- mesa-18.3.3/src/mesa/main/queryobj.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/queryobj.c 2019-03-31 23:16:37.000000000 +0000 @@ -146,11 +146,10 @@ get_pipe_stats_binding_point(struct gl_context *ctx, GLenum target) { - const int which = target - GL_VERTICES_SUBMITTED_ARB; + const int which = target - GL_VERTICES_SUBMITTED; assert(which < MAX_PIPELINE_STATISTICS); - if (!_mesa_is_desktop_gl(ctx) || - !ctx->Extensions.ARB_pipeline_statistics_query) + if (!_mesa_has_ARB_pipeline_statistics_query(ctx)) return NULL; return &ctx->Query.pipeline_stats[which]; @@ -164,89 +163,80 @@ static struct gl_query_object ** get_query_binding_point(struct gl_context *ctx, GLenum target, GLuint index) { - - /* From GL_EXT_occlusion_query_boolean spec: - * - * "Accepted by the parameter of BeginQueryEXT, EndQueryEXT, - * and GetQueryivEXT: - * - * ANY_SAMPLES_PASSED_EXT 0x8C2F - * ANY_SAMPLES_PASSED_CONSERVATIVE_EXT 0x8D6A" - */ - if ((_mesa_is_gles(ctx) && ctx->Version == 20) && - (target != GL_ANY_SAMPLES_PASSED && - target != GL_ANY_SAMPLES_PASSED_CONSERVATIVE)) - return NULL; - switch (target) { - case GL_SAMPLES_PASSED_ARB: - if (ctx->Extensions.ARB_occlusion_query) + case GL_SAMPLES_PASSED: + if (_mesa_has_ARB_occlusion_query(ctx) || + _mesa_has_ARB_occlusion_query2(ctx)) return &ctx->Query.CurrentOcclusionObject; else return NULL; case GL_ANY_SAMPLES_PASSED: - if (ctx->Extensions.ARB_occlusion_query2) + if (_mesa_has_ARB_occlusion_query2(ctx) || + _mesa_has_EXT_occlusion_query_boolean(ctx)) return &ctx->Query.CurrentOcclusionObject; else return NULL; case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - if (ctx->Extensions.ARB_ES3_compatibility - || (ctx->API == API_OPENGLES2 && ctx->Version >= 30)) + if (_mesa_has_ARB_ES3_compatibility(ctx) || + _mesa_has_EXT_occlusion_query_boolean(ctx)) return &ctx->Query.CurrentOcclusionObject; else return NULL; - case GL_TIME_ELAPSED_EXT: - if (ctx->Extensions.EXT_timer_query) + case GL_TIME_ELAPSED: + if (_mesa_has_EXT_timer_query(ctx) || + _mesa_has_EXT_disjoint_timer_query(ctx)) return &ctx->Query.CurrentTimerObject; else return NULL; case GL_PRIMITIVES_GENERATED: - if (ctx->Extensions.EXT_transform_feedback) + if (_mesa_has_EXT_transform_feedback(ctx) || + _mesa_has_EXT_tessellation_shader(ctx) || + _mesa_has_OES_geometry_shader(ctx)) return &ctx->Query.PrimitivesGenerated[index]; else return NULL; case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: - if (ctx->Extensions.EXT_transform_feedback) + if (_mesa_has_EXT_transform_feedback(ctx) || _mesa_is_gles3(ctx)) return &ctx->Query.PrimitivesWritten[index]; else return NULL; - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: - if (ctx->Extensions.ARB_transform_feedback_overflow_query) + case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW: + if (_mesa_has_ARB_transform_feedback_overflow_query(ctx)) return &ctx->Query.TransformFeedbackOverflow[index]; else return NULL; - case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: - if (ctx->Extensions.ARB_transform_feedback_overflow_query) + case GL_TRANSFORM_FEEDBACK_OVERFLOW: + if (_mesa_has_ARB_transform_feedback_overflow_query(ctx)) return &ctx->Query.TransformFeedbackOverflowAny; else return NULL; - case GL_VERTICES_SUBMITTED_ARB: - case GL_PRIMITIVES_SUBMITTED_ARB: - case GL_VERTEX_SHADER_INVOCATIONS_ARB: - case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: - case GL_CLIPPING_INPUT_PRIMITIVES_ARB: - case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: + case GL_VERTICES_SUBMITTED: + case GL_PRIMITIVES_SUBMITTED: + case GL_VERTEX_SHADER_INVOCATIONS: + case GL_FRAGMENT_SHADER_INVOCATIONS: + case GL_CLIPPING_INPUT_PRIMITIVES: + case GL_CLIPPING_OUTPUT_PRIMITIVES: return get_pipe_stats_binding_point(ctx, target); case GL_GEOMETRY_SHADER_INVOCATIONS: /* GL_GEOMETRY_SHADER_INVOCATIONS is defined in a non-sequential order */ - target = GL_VERTICES_SUBMITTED_ARB + MAX_PIPELINE_STATISTICS - 1; + target = GL_VERTICES_SUBMITTED + MAX_PIPELINE_STATISTICS - 1; /* fallthrough */ - case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED: if (_mesa_has_geometry_shaders(ctx)) return get_pipe_stats_binding_point(ctx, target); else return NULL; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: + case GL_TESS_CONTROL_SHADER_PATCHES: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS: if (_mesa_has_tessellation(ctx)) return get_pipe_stats_binding_point(ctx, target); else return NULL; - case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + case GL_COMPUTE_SHADER_INVOCATIONS: if (_mesa_has_compute_shaders(ctx)) return get_pipe_stats_binding_point(ctx, target); else @@ -316,8 +306,8 @@ case GL_TIMESTAMP: case GL_PRIMITIVES_GENERATED: case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: - case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: + case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW: + case GL_TRANSFORM_FEEDBACK_OVERFLOW: break; default: _mesa_error(ctx, GL_INVALID_ENUM, "glCreateQueries(invalid target = %s)", @@ -393,7 +383,7 @@ switch (target) { case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: case GL_PRIMITIVES_GENERATED: - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: + case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW: if (index >= ctx->Const.MaxVertexStreams) { _mesa_error(ctx, GL_INVALID_VALUE, "glBeginQueryIndexed(index>=MaxVertexStreams)"); @@ -676,7 +666,8 @@ } if (target == GL_TIMESTAMP) { - if (!ctx->Extensions.ARB_timer_query) { + if (!_mesa_has_ARB_timer_query(ctx) && + !_mesa_has_EXT_disjoint_timer_query(ctx)) { _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryARB(target)"); return; } @@ -692,7 +683,7 @@ } switch (pname) { - case GL_QUERY_COUNTER_BITS_ARB: + case GL_QUERY_COUNTER_BITS: switch (target) { case GL_SAMPLES_PASSED: *params = ctx->Const.QueryCounterBits.SamplesPassed; @@ -717,45 +708,45 @@ case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: *params = ctx->Const.QueryCounterBits.PrimitivesWritten; break; - case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: - case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: + case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW: + case GL_TRANSFORM_FEEDBACK_OVERFLOW: /* The minimum value of this is 1 if it's nonzero, and the value * is only ever GL_TRUE or GL_FALSE, so no sense in reporting more * bits. */ *params = 1; break; - case GL_VERTICES_SUBMITTED_ARB: + case GL_VERTICES_SUBMITTED: *params = ctx->Const.QueryCounterBits.VerticesSubmitted; break; - case GL_PRIMITIVES_SUBMITTED_ARB: + case GL_PRIMITIVES_SUBMITTED: *params = ctx->Const.QueryCounterBits.PrimitivesSubmitted; break; - case GL_VERTEX_SHADER_INVOCATIONS_ARB: + case GL_VERTEX_SHADER_INVOCATIONS: *params = ctx->Const.QueryCounterBits.VsInvocations; break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + case GL_TESS_CONTROL_SHADER_PATCHES: *params = ctx->Const.QueryCounterBits.TessPatches; break; - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: + case GL_TESS_EVALUATION_SHADER_INVOCATIONS: *params = ctx->Const.QueryCounterBits.TessInvocations; break; case GL_GEOMETRY_SHADER_INVOCATIONS: *params = ctx->Const.QueryCounterBits.GsInvocations; break; - case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED: *params = ctx->Const.QueryCounterBits.GsPrimitives; break; - case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: + case GL_FRAGMENT_SHADER_INVOCATIONS: *params = ctx->Const.QueryCounterBits.FsInvocations; break; - case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + case GL_COMPUTE_SHADER_INVOCATIONS: *params = ctx->Const.QueryCounterBits.ComputeInvocations; break; - case GL_CLIPPING_INPUT_PRIMITIVES_ARB: + case GL_CLIPPING_INPUT_PRIMITIVES: *params = ctx->Const.QueryCounterBits.ClInPrimitives; break; - case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: + case GL_CLIPPING_OUTPUT_PRIMITIVES: *params = ctx->Const.QueryCounterBits.ClOutPrimitives; break; default: @@ -766,7 +757,7 @@ break; } break; - case GL_CURRENT_QUERY_ARB: + case GL_CURRENT_QUERY: *params = (q && q->Target == target) ? q->Id : 0; break; default: @@ -822,7 +813,7 @@ if (buf && buf != ctx->Shared->NullBufferObj) { bool is_64bit = ptype == GL_INT64_ARB || ptype == GL_UNSIGNED_INT64_ARB; - if (!ctx->Extensions.ARB_query_buffer_object) { + if (!_mesa_has_ARB_query_buffer_object(ctx)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(not supported)", func); return; } @@ -855,7 +846,7 @@ value = q->Result; break; case GL_QUERY_RESULT_NO_WAIT: - if (!ctx->Extensions.ARB_query_buffer_object) + if (!_mesa_has_ARB_query_buffer_object(ctx)) goto invalid_enum; ctx->Driver.CheckQuery(ctx, q); if (!q->Ready) diff -Nru mesa-18.3.3/src/mesa/main/shaderobj.h mesa-19.0.1/src/mesa/main/shaderobj.h --- mesa-18.3.3/src/mesa/main/shaderobj.h 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/shaderobj.h 2019-03-31 23:16:37.000000000 +0000 @@ -225,6 +225,9 @@ return GL_TESS_EVALUATION_SUBROUTINE; case MESA_SHADER_NONE: break; + case MESA_SHADER_KERNEL: + unreachable("not reached"); + break; } unreachable("not reached"); } @@ -246,6 +249,7 @@ case MESA_SHADER_TESS_EVAL: return GL_TESS_EVALUATION_SUBROUTINE_UNIFORM; case MESA_SHADER_NONE: + case MESA_SHADER_KERNEL: break; } unreachable("not reached"); diff -Nru mesa-18.3.3/src/mesa/main/tests/dispatch_sanity.cpp mesa-19.0.1/src/mesa/main/tests/dispatch_sanity.cpp --- mesa-18.3.3/src/mesa/main/tests/dispatch_sanity.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/tests/dispatch_sanity.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -2236,6 +2236,10 @@ /* GL_NV_conservative_raster_pre_snap_triangles */ { "glConservativeRasterParameteriNV", 20, -1 }, + /* GL_EXT_multisampled_render_to_texture */ + { "glRenderbufferStorageMultisampleEXT", 20, -1 }, + { "glFramebufferTexture2DMultisampleEXT", 20, -1 }, + { NULL, 0, -1 } }; @@ -2330,7 +2334,7 @@ // glProgramParameteri aliases glProgramParameteriEXT in GLES 2 // We check for the aliased -NV version in GLES 2 // { "glReadBuffer", 30, -1 }, - { "glRenderbufferStorageMultisample", 30, -1 }, + // glRenderbufferStorageMultisample aliases glRenderbufferStorageMultisampleEXT in GLES 2 { "glResumeTransformFeedback", 30, -1 }, { "glSamplerParameterf", 30, -1 }, { "glSamplerParameterfv", 30, -1 }, diff -Nru mesa-18.3.3/src/mesa/main/tests/meson.build mesa-19.0.1/src/mesa/main/tests/meson.build --- mesa-18.3.3/src/mesa/main/tests/meson.build 2018-02-08 14:40:56.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/tests/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -41,5 +41,6 @@ include_directories : [inc_include, inc_src, inc_mapi, inc_mesa], dependencies : [idep_gtest, dep_clock, dep_dl, dep_thread], link_with : [libmesa_classic, link_main_test], - ) + ), + suite : ['mesa'], ) diff -Nru mesa-18.3.3/src/mesa/main/texcompress.c mesa-19.0.1/src/mesa/main/texcompress.c --- mesa-18.3.3/src/mesa/main/texcompress.c 2018-02-08 14:40:56.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/texcompress.c 2019-03-31 23:16:37.000000000 +0000 @@ -327,6 +327,23 @@ formats[n++] = GL_ETC1_RGB8_OES; } + /* Required by EXT_texture_compression_bptc in GLES. */ + if (_mesa_has_EXT_texture_compression_bptc(ctx)) { + formats[n++] = GL_COMPRESSED_RGBA_BPTC_UNORM; + formats[n++] = GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM; + formats[n++] = GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT; + formats[n++] = GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; + } + + /* Required by EXT_texture_compression_rgtc in GLES. */ + if (_mesa_is_gles3(ctx) && + _mesa_has_EXT_texture_compression_rgtc(ctx)) { + formats[n++] = GL_COMPRESSED_RED_RGTC1_EXT; + formats[n++] = GL_COMPRESSED_SIGNED_RED_RGTC1_EXT; + formats[n++] = GL_COMPRESSED_RED_GREEN_RGTC2_EXT; + formats[n++] = GL_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT; + } + if (ctx->API == API_OPENGLES) { formats[n++] = GL_PALETTE4_RGB8_OES; formats[n++] = GL_PALETTE4_RGBA8_OES; diff -Nru mesa-18.3.3/src/mesa/main/texformat.c mesa-19.0.1/src/mesa/main/texformat.c --- mesa-18.3.3/src/mesa/main/texformat.c 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/texformat.c 2019-03-31 23:16:37.000000000 +0000 @@ -477,6 +477,9 @@ RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_SRGB); RETURN_IF_SUPPORTED(MESA_FORMAT_A8R8G8B8_SRGB); break; + case GL_SR8_EXT: + RETURN_IF_SUPPORTED(MESA_FORMAT_R_SRGB8); + break; case GL_SLUMINANCE_EXT: case GL_SLUMINANCE8_EXT: RETURN_IF_SUPPORTED(MESA_FORMAT_L_SRGB8); diff -Nru mesa-18.3.3/src/mesa/main/teximage.c mesa-19.0.1/src/mesa/main/teximage.c --- mesa-18.3.3/src/mesa/main/teximage.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/teximage.c 2019-03-31 23:16:37.000000000 +0000 @@ -1798,8 +1798,8 @@ texture_format_error_check_gles(struct gl_context *ctx, GLenum format, GLenum type, GLenum internalFormat, const char *callerName) { - GLenum err = _mesa_es3_error_check_format_and_type(ctx, format, type, - internalFormat); + GLenum err = _mesa_gles_error_check_format_and_type(ctx, format, type, + internalFormat); if (err != GL_NO_ERROR) { _mesa_error(ctx, err, "%s(format = %s, type = %s, internalformat = %s)", @@ -2438,7 +2438,7 @@ bool rb_is_srgb = false; bool dst_is_srgb = false; - if (ctx->Extensions.EXT_framebuffer_sRGB && + if (ctx->Extensions.EXT_sRGB && _mesa_get_format_color_encoding(rb->Format) == GL_SRGB) { rb_is_srgb = true; } diff -Nru mesa-18.3.3/src/mesa/main/varray.c mesa-19.0.1/src/mesa/main/varray.c --- mesa-18.3.3/src/mesa/main/varray.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/varray.c 2019-03-31 23:16:37.000000000 +0000 @@ -33,6 +33,7 @@ #include "context.h" #include "enable.h" #include "enums.h" +#include "glformats.h" #include "hash.h" #include "image.h" #include "macros.h" @@ -141,7 +142,7 @@ if (ctx->API != API_OPENGL_COMPAT) return; /* The generic0 attribute superseeds the position attribute */ - const GLbitfield enabled = vao->_Enabled; + const GLbitfield enabled = vao->Enabled; if (enabled & VERT_BIT_GENERIC0) vao->_AttributeMapMode = ATTRIBUTE_MAP_MODE_GENERIC0; else if (enabled & VERT_BIT_POS) @@ -177,7 +178,7 @@ array->BufferBindingIndex = bindingIndex; - vao->NewArrays |= vao->_Enabled & array_bit; + vao->NewArrays |= vao->Enabled & array_bit; if (vao == ctx->Array.VAO) ctx->NewState |= _NEW_ARRAY; } @@ -213,7 +214,7 @@ else vao->VertexAttribBufferMask |= binding->_BoundArrays; - vao->NewArrays |= vao->_Enabled & binding->_BoundArrays; + vao->NewArrays |= vao->Enabled & binding->_BoundArrays; if (vao == ctx->Array.VAO) ctx->NewState |= _NEW_ARRAY; } @@ -236,13 +237,31 @@ if (binding->InstanceDivisor != divisor) { binding->InstanceDivisor = divisor; - vao->NewArrays |= vao->_Enabled & binding->_BoundArrays; + vao->NewArrays |= vao->Enabled & binding->_BoundArrays; if (vao == ctx->Array.VAO) ctx->NewState |= _NEW_ARRAY; } } +void +_mesa_set_vertex_format(struct gl_vertex_format *vertex_format, + GLubyte size, GLenum16 type, GLenum16 format, + GLboolean normalized, GLboolean integer, + GLboolean doubles) +{ + assert(size <= 4); + vertex_format->Type = type; + vertex_format->Format = format; + vertex_format->Size = size; + vertex_format->Normalized = normalized; + vertex_format->Integer = integer; + vertex_format->Doubles = doubles; + vertex_format->_ElementSize = _mesa_bytes_per_vertex_attrib(size, type); + assert(vertex_format->_ElementSize <= 4*sizeof(double)); +} + + /** * Examine the API profile and extensions to determine which types are legal * for vertex arrays. This is called once from update_array_format(). @@ -330,24 +349,15 @@ GLuint relativeOffset) { struct gl_array_attributes *const array = &vao->VertexAttrib[attrib]; - GLint elementSize; assert(!vao->SharedAndImmutable); assert(size <= 4); - elementSize = _mesa_bytes_per_vertex_attrib(size, type); - assert(elementSize != -1); - - array->Size = size; - array->Type = type; - array->Format = format; - array->Normalized = normalized; - array->Integer = integer; - array->Doubles = doubles; array->RelativeOffset = relativeOffset; - array->_ElementSize = elementSize; + _mesa_set_vertex_format(&array->Format, size, type, format, + normalized, integer, doubles); - vao->NewArrays |= vao->_Enabled & VERT_BIT(attrib); + vao->NewArrays |= vao->Enabled & VERT_BIT(attrib); if (vao == ctx->Array.VAO) ctx->NewState |= _NEW_ARRAY; } @@ -605,11 +615,12 @@ * to the VAO. But but that is done already unconditionally in * _mesa_update_array_format called above. */ - assert((vao->NewArrays | ~vao->_Enabled) & VERT_BIT(attrib)); + assert((vao->NewArrays | ~vao->Enabled) & VERT_BIT(attrib)); array->Ptr = ptr; /* Update the vertex buffer binding */ - GLsizei effectiveStride = stride != 0 ? stride : array->_ElementSize; + GLsizei effectiveStride = stride != 0 ? + stride : array->Format._ElementSize; _mesa_bind_vertex_buffer(ctx, vao, attrib, ctx->Array.ArrayBufferObj, (GLintptr) ptr, effectiveStride); @@ -1071,25 +1082,25 @@ void -_mesa_enable_vertex_array_attrib(struct gl_context *ctx, - struct gl_vertex_array_object *vao, - gl_vert_attrib attrib) +_mesa_enable_vertex_array_attribs(struct gl_context *ctx, + struct gl_vertex_array_object *vao, + GLbitfield attrib_bits) { - assert(attrib < ARRAY_SIZE(vao->VertexAttrib)); + assert((attrib_bits & ~VERT_BIT_ALL) == 0); assert(!vao->SharedAndImmutable); - if (!vao->VertexAttrib[attrib].Enabled) { + /* Only work on bits that are disabled */ + attrib_bits &= ~vao->Enabled; + if (attrib_bits) { /* was disabled, now being enabled */ - vao->VertexAttrib[attrib].Enabled = GL_TRUE; - const GLbitfield array_bit = VERT_BIT(attrib); - vao->_Enabled |= array_bit; - vao->NewArrays |= array_bit; + vao->Enabled |= attrib_bits; + vao->NewArrays |= attrib_bits; if (vao == ctx->Array.VAO) ctx->NewState |= _NEW_ARRAY; /* Update the map mode if needed */ - if (array_bit & (VERT_BIT_POS|VERT_BIT_GENERIC0)) + if (attrib_bits & (VERT_BIT_POS|VERT_BIT_GENERIC0)) update_attribute_map_mode(ctx, vao); } } @@ -1158,25 +1169,25 @@ void -_mesa_disable_vertex_array_attrib(struct gl_context *ctx, - struct gl_vertex_array_object *vao, - gl_vert_attrib attrib) +_mesa_disable_vertex_array_attribs(struct gl_context *ctx, + struct gl_vertex_array_object *vao, + GLbitfield attrib_bits) { - assert(attrib < ARRAY_SIZE(vao->VertexAttrib)); + assert((attrib_bits & ~VERT_BIT_ALL) == 0); assert(!vao->SharedAndImmutable); - if (vao->VertexAttrib[attrib].Enabled) { + /* Only work on bits that are enabled */ + attrib_bits &= vao->Enabled; + if (attrib_bits) { /* was enabled, now being disabled */ - vao->VertexAttrib[attrib].Enabled = GL_FALSE; - const GLbitfield array_bit = VERT_BIT(attrib); - vao->_Enabled &= ~array_bit; - vao->NewArrays |= array_bit; + vao->Enabled &= ~attrib_bits; + vao->NewArrays |= attrib_bits; if (vao == ctx->Array.VAO) ctx->NewState |= _NEW_ARRAY; /* Update the map mode if needed */ - if (array_bit & (VERT_BIT_POS|VERT_BIT_GENERIC0)) + if (attrib_bits & (VERT_BIT_POS|VERT_BIT_GENERIC0)) update_attribute_map_mode(ctx, vao); } } @@ -1267,27 +1278,27 @@ switch (pname) { case GL_VERTEX_ATTRIB_ARRAY_ENABLED_ARB: - return array->Enabled; + return !!(vao->Enabled & VERT_BIT_GENERIC(index)); case GL_VERTEX_ATTRIB_ARRAY_SIZE_ARB: - return (array->Format == GL_BGRA) ? GL_BGRA : array->Size; + return (array->Format.Format == GL_BGRA) ? GL_BGRA : array->Format.Size; case GL_VERTEX_ATTRIB_ARRAY_STRIDE_ARB: return array->Stride; case GL_VERTEX_ATTRIB_ARRAY_TYPE_ARB: - return array->Type; + return array->Format.Type; case GL_VERTEX_ATTRIB_ARRAY_NORMALIZED_ARB: - return array->Normalized; + return array->Format.Normalized; case GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING_ARB: return vao->BufferBinding[array->BufferBindingIndex].BufferObj->Name; case GL_VERTEX_ATTRIB_ARRAY_INTEGER: if ((_mesa_is_desktop_gl(ctx) && (ctx->Version >= 30 || ctx->Extensions.EXT_gpu_shader4)) || _mesa_is_gles3(ctx)) { - return array->Integer; + return array->Format.Integer; } goto error; case GL_VERTEX_ATTRIB_ARRAY_LONG: if (_mesa_is_desktop_gl(ctx)) { - return array->Doubles; + return array->Format.Doubles; } goto error; case GL_VERTEX_ATTRIB_ARRAY_DIVISOR_ARB: @@ -2771,18 +2782,11 @@ struct gl_array_attributes *dst, const struct gl_array_attributes *src) { - dst->Size = src->Size; - dst->Type = src->Type; - dst->Format = src->Format; - dst->BufferBindingIndex = src->BufferBindingIndex; + dst->Ptr = src->Ptr; dst->RelativeOffset = src->RelativeOffset; dst->Format = src->Format; - dst->Integer = src->Integer; - dst->Doubles = src->Doubles; - dst->Normalized = src->Normalized; - dst->Ptr = src->Ptr; - dst->Enabled = src->Enabled; - dst->_ElementSize = src->_ElementSize; + dst->Stride = src->Stride; + dst->BufferBindingIndex = src->BufferBindingIndex; dst->_EffBufferBindingIndex = src->_EffBufferBindingIndex; dst->_EffRelativeOffset = src->_EffRelativeOffset; } @@ -2812,11 +2816,10 @@ fprintf(stderr, "Array Object %u\n", vao->Name); - gl_vert_attrib i; - for (i = 0; i < VERT_ATTRIB_MAX; ++i) { + GLbitfield mask = vao->Enabled; + while (mask) { + const gl_vert_attrib i = u_bit_scan(&mask); const struct gl_array_attributes *array = &vao->VertexAttrib[i]; - if (!array->Enabled) - continue; const struct gl_vertex_buffer_binding *binding = &vao->BufferBinding[array->BufferBindingIndex]; @@ -2825,8 +2828,9 @@ fprintf(stderr, " %s: Ptr=%p, Type=%s, Size=%d, ElemSize=%u, " "Stride=%d, Buffer=%u(Size %lu)\n", gl_vert_attrib_name((gl_vert_attrib)i), - array->Ptr, _mesa_enum_to_string(array->Type), array->Size, - array->_ElementSize, binding->Stride, bo->Name, + array->Ptr, _mesa_enum_to_string(array->Format.Type), + array->Format.Size, + array->Format._ElementSize, binding->Stride, bo->Name, (unsigned long) bo->Size); } } diff -Nru mesa-18.3.3/src/mesa/main/varray.h mesa-19.0.1/src/mesa/main/varray.h --- mesa-18.3.3/src/mesa/main/varray.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/varray.h 2019-03-31 23:16:37.000000000 +0000 @@ -29,6 +29,12 @@ #include "bufferobj.h" +void +_mesa_set_vertex_format(struct gl_vertex_format *vertex_format, + GLubyte size, GLenum16 type, GLenum16 format, + GLboolean normalized, GLboolean integer, + GLboolean doubles); + /** * Returns a pointer to the vertex attribute data in a client array, @@ -62,15 +68,33 @@ GLuint relativeOffset); extern void +_mesa_enable_vertex_array_attribs(struct gl_context *ctx, + struct gl_vertex_array_object *vao, + GLbitfield attrib_bits); + +static inline void _mesa_enable_vertex_array_attrib(struct gl_context *ctx, struct gl_vertex_array_object *vao, - gl_vert_attrib attrib); + gl_vert_attrib attrib) +{ + assert(attrib < VERT_ATTRIB_MAX); + _mesa_enable_vertex_array_attribs(ctx, vao, VERT_BIT(attrib)); +} extern void +_mesa_disable_vertex_array_attribs(struct gl_context *ctx, + struct gl_vertex_array_object *vao, + GLbitfield attrib_bits); + +static inline void _mesa_disable_vertex_array_attrib(struct gl_context *ctx, struct gl_vertex_array_object *vao, - gl_vert_attrib attrib); + gl_vert_attrib attrib) +{ + assert(attrib < VERT_ATTRIB_MAX); + _mesa_disable_vertex_array_attribs(ctx, vao, VERT_BIT(attrib)); +} extern void diff -Nru mesa-18.3.3/src/mesa/main/version.c mesa-19.0.1/src/mesa/main/version.c --- mesa-18.3.3/src/mesa/main/version.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/main/version.c 2019-03-31 23:16:37.000000000 +0000 @@ -509,20 +509,24 @@ extensions->ARB_internalformat_query && extensions->ARB_map_buffer_range && extensions->ARB_shader_texture_lod && - extensions->ARB_texture_float && + extensions->OES_texture_float && + extensions->OES_texture_half_float && + extensions->OES_texture_half_float_linear && extensions->ARB_texture_rg && extensions->ARB_depth_buffer_float && - /* extensions->ARB_framebuffer_object && */ - extensions->EXT_framebuffer_sRGB && + extensions->ARB_framebuffer_object && + extensions->EXT_sRGB && extensions->EXT_packed_float && extensions->EXT_texture_array && extensions->EXT_texture_shared_exponent && + extensions->EXT_texture_sRGB && extensions->EXT_transform_feedback && extensions->ARB_draw_instanced && extensions->ARB_uniform_buffer_object && extensions->EXT_texture_snorm && extensions->NV_primitive_restart && - extensions->OES_depth_texture_cube_map); + extensions->OES_depth_texture_cube_map && + extensions->EXT_texture_type_2_10_10_10_REV); const bool es31_compute_shader = consts->MaxComputeWorkGroupInvocations >= 128; const bool ver_3_1 = (ver_3_0 && diff -Nru mesa-18.3.3/src/mesa/program/prog_parameter.c mesa-19.0.1/src/mesa/program/prog_parameter.c --- mesa-18.3.3/src/mesa/program/prog_parameter.c 2018-03-26 16:53:06.000000000 +0000 +++ mesa-19.0.1/src/mesa/program/prog_parameter.c 2019-03-31 23:16:37.000000000 +0000 @@ -271,6 +271,7 @@ p->Name = strdup(name ? name : ""); p->Type = type; p->Size = size; + p->Padded = pad_and_align; p->DataType = datatype; paramList->ParameterValueOffset[oldNum] = oldValNum; diff -Nru mesa-18.3.3/src/mesa/program/prog_parameter.h mesa-19.0.1/src/mesa/program/prog_parameter.h --- mesa-18.3.3/src/mesa/program/prog_parameter.h 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/mesa/program/prog_parameter.h 2019-03-31 23:16:37.000000000 +0000 @@ -104,6 +104,12 @@ * A sequence of STATE_* tokens and integers to identify GL state. */ gl_state_index16 StateIndexes[STATE_LENGTH]; + + /** + * We need to keep track of whether the param is padded for use in the + * shader cache. + */ + bool Padded; }; diff -Nru mesa-18.3.3/src/mesa/program/prog_to_nir.c mesa-19.0.1/src/mesa/program/prog_to_nir.c --- mesa-18.3.3/src/mesa/program/prog_to_nir.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/program/prog_to_nir.c 2019-03-31 23:16:37.000000000 +0000 @@ -393,7 +393,7 @@ ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) { if (b->shader->options->native_integers) { - ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1]))); + ptn_move_dest(b, dest, nir_b2f32(b, nir_flt(b, src[0], src[1]))); } else { ptn_move_dest(b, dest, nir_slt(b, src[0], src[1])); } @@ -406,7 +406,7 @@ ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) { if (b->shader->options->native_integers) { - ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1]))); + ptn_move_dest(b, dest, nir_b2f32(b, nir_fge(b, src[0], src[1]))); } else { ptn_move_dest(b, dest, nir_sge(b, src[0], src[1])); } @@ -983,7 +983,7 @@ if (prog->Parameters->NumParameters > 0) { c->parameters = rzalloc(s, nir_variable); c->parameters->type = - glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters); + glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0); c->parameters->name = "parameters"; c->parameters->data.read_only = true; c->parameters->data.mode = nir_var_uniform; diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_atom_array.c mesa-19.0.1/src/mesa/state_tracker/st_atom_array.c --- mesa-18.3.3/src/mesa/state_tracker/st_atom_array.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_atom_array.c 2019-03-31 23:16:37.000000000 +0000 @@ -237,19 +237,19 @@ /** * Return a PIPE_FORMAT_x for the given GL datatype and size. */ -enum pipe_format -st_pipe_vertex_format(const struct gl_array_attributes *attrib) +static enum pipe_format +st_pipe_vertex_format(const struct gl_vertex_format *vformat) { - const GLubyte size = attrib->Size; - const GLenum16 format = attrib->Format; - const bool normalized = attrib->Normalized; - const bool integer = attrib->Integer; - GLenum16 type = attrib->Type; + const GLubyte size = vformat->Size; + const GLenum16 format = vformat->Format; + const bool normalized = vformat->Normalized; + const bool integer = vformat->Integer; + GLenum16 type = vformat->Type; unsigned index; assert(size >= 1 && size <= 4); assert(format == GL_RGBA || format == GL_BGRA); - assert(attrib->_ElementSize == _mesa_bytes_per_vertex_attrib(size, type)); + assert(vformat->_ElementSize == _mesa_bytes_per_vertex_attrib(size, type)); switch (type) { case GL_HALF_FLOAT_OES: @@ -320,13 +320,13 @@ static void init_velement_lowered(const struct st_vertex_program *vp, struct pipe_vertex_element *velements, - const struct gl_array_attributes *attrib, + const struct gl_vertex_format *vformat, int src_offset, int instance_divisor, int vbo_index, int idx) { - const GLubyte nr_components = attrib->Size; + const GLubyte nr_components = vformat->Size; - if (attrib->Doubles) { + if (vformat->Doubles) { int lower_format; if (nr_components < 2) @@ -357,7 +357,7 @@ } } } else { - const unsigned format = st_pipe_vertex_format(attrib); + const unsigned format = st_pipe_vertex_format(vformat); init_velement(&velements[idx], src_offset, format, instance_divisor, vbo_index); @@ -384,25 +384,17 @@ } void -st_update_array(struct st_context *st) +st_setup_arrays(struct st_context *st, + const struct st_vertex_program *vp, + const struct st_vp_variant *vp_variant, + struct pipe_vertex_element *velements, + struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers) { struct gl_context *ctx = st->ctx; - /* vertex program validation must be done before this */ - const struct st_vertex_program *vp = st->vp; - /* _NEW_PROGRAM, ST_NEW_VS_STATE */ - const GLbitfield inputs_read = st->vp_variant->vert_attrib_mask; const struct gl_vertex_array_object *vao = ctx->Array._DrawVAO; + const GLbitfield inputs_read = vp_variant->vert_attrib_mask; const ubyte *input_to_index = vp->input_to_index; - struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; - unsigned num_vbuffers = 0; - - st->vertex_array_out_of_memory = FALSE; - st->draw_needs_minmax_index = false; - - /* _NEW_PROGRAM */ - /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */ /* Process attribute array data. */ GLbitfield mask = inputs_read & _mesa_draw_array_bits(ctx); while (mask) { @@ -410,7 +402,7 @@ const gl_vert_attrib i = ffs(mask) - 1; const struct gl_vertex_buffer_binding *const binding = _mesa_draw_buffer_binding(vao, i); - const unsigned bufidx = num_vbuffers++; + const unsigned bufidx = (*num_vbuffers)++; if (_mesa_is_bufferobj(binding->BufferObj)) { struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj); @@ -447,35 +439,47 @@ const struct gl_array_attributes *const attrib = _mesa_draw_array_attrib(vao, attr); const GLuint off = _mesa_draw_attributes_relative_offset(attrib); - init_velement_lowered(vp, velements, attrib, off, + init_velement_lowered(vp, velements, &attrib->Format, off, binding->InstanceDivisor, bufidx, input_to_index[attr]); } } +} + +void +st_setup_current(struct st_context *st, + const struct st_vertex_program *vp, + const struct st_vp_variant *vp_variant, + struct pipe_vertex_element *velements, + struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers) +{ + struct gl_context *ctx = st->ctx; + const GLbitfield inputs_read = vp_variant->vert_attrib_mask; - const unsigned first_current_vbuffer = num_vbuffers; - /* _NEW_PROGRAM | _NEW_CURRENT_ATTRIB */ /* Process values that should have better been uniforms in the application */ GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx); if (curmask) { + /* vertex program validation must be done before this */ + const struct st_vertex_program *vp = st->vp; + const ubyte *input_to_index = vp->input_to_index; /* For each attribute, upload the maximum possible size. */ GLubyte data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4]; GLubyte *cursor = data; - const unsigned bufidx = num_vbuffers++; + const unsigned bufidx = (*num_vbuffers)++; unsigned max_alignment = 1; while (curmask) { const gl_vert_attrib attr = u_bit_scan(&curmask); const struct gl_array_attributes *const attrib = _mesa_draw_current_attrib(ctx, attr); - const unsigned size = attrib->_ElementSize; + const unsigned size = attrib->Format._ElementSize; const unsigned alignment = util_next_power_of_two(size); max_alignment = MAX2(max_alignment, alignment); memcpy(cursor, attrib->Ptr, size); if (alignment != size) memset(cursor + size, 0, alignment - size); - init_velement_lowered(vp, velements, attrib, cursor - data, 0, + init_velement_lowered(vp, velements, &attrib->Format, cursor - data, 0, bufidx, input_to_index[attr]); cursor += alignment; @@ -498,17 +502,79 @@ 0, cursor - data, max_alignment, data, &vbuffer[bufidx].buffer_offset, &vbuffer[bufidx].buffer.resource); + + if (!ctx->Const.AllowMappedBuffersDuringExecution && + !st->can_bind_const_buffer_as_vertex) { + u_upload_unmap(st->pipe->stream_uploader); + } } +} + +void +st_setup_current_user(struct st_context *st, + const struct st_vertex_program *vp, + const struct st_vp_variant *vp_variant, + struct pipe_vertex_element *velements, + struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers) +{ + struct gl_context *ctx = st->ctx; + const GLbitfield inputs_read = vp_variant->vert_attrib_mask; + const ubyte *input_to_index = vp->input_to_index; - if (!ctx->Const.AllowMappedBuffersDuringExecution) { - u_upload_unmap(st->pipe->stream_uploader); + /* Process values that should have better been uniforms in the application */ + GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx); + /* For each attribute, make an own user buffer binding. */ + while (curmask) { + const gl_vert_attrib attr = u_bit_scan(&curmask); + const struct gl_array_attributes *const attrib + = _mesa_draw_current_attrib(ctx, attr); + const unsigned bufidx = (*num_vbuffers)++; + + init_velement_lowered(vp, velements, &attrib->Format, 0, 0, + bufidx, input_to_index[attr]); + + vbuffer[bufidx].is_user_buffer = true; + vbuffer[bufidx].buffer.user = attrib->Ptr; + vbuffer[bufidx].buffer_offset = 0; + vbuffer[bufidx].stride = 0; } +} - const unsigned num_inputs = st->vp_variant->num_inputs; - set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_inputs); +void +st_update_array(struct st_context *st) +{ + /* vertex program validation must be done before this */ + /* _NEW_PROGRAM, ST_NEW_VS_STATE */ + const struct st_vertex_program *vp = st->vp; + const struct st_vp_variant *vp_variant = st->vp_variant; + + struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS]; + unsigned num_vbuffers = 0, first_upload_vbuffer; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; + unsigned num_velements; + + st->vertex_array_out_of_memory = FALSE; + st->draw_needs_minmax_index = false; + + /* ST_NEW_VERTEX_ARRAYS alias ctx->DriverFlags.NewArray */ + /* Setup arrays */ + st_setup_arrays(st, vp, vp_variant, velements, vbuffer, &num_vbuffers); + if (st->vertex_array_out_of_memory) + return; + + /* _NEW_CURRENT_ATTRIB */ + /* Setup current uploads */ + first_upload_vbuffer = num_vbuffers; + st_setup_current(st, vp, vp_variant, velements, vbuffer, &num_vbuffers); + if (st->vertex_array_out_of_memory) + return; + + /* Set the array into cso */ + num_velements = vp_variant->num_inputs; + set_vertex_attribs(st, vbuffer, num_vbuffers, velements, num_velements); - /* Unreference uploaded zero-stride vertex buffers. */ - for (unsigned i = first_current_vbuffer; i < num_vbuffers; ++i) { + /* Unreference uploaded buffer resources. */ + for (unsigned i = first_upload_vbuffer; i < num_vbuffers; ++i) { pipe_resource_reference(&vbuffer[i].buffer.resource, NULL); } } diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_atom_blend.c mesa-19.0.1/src/mesa/state_tracker/st_atom_blend.c --- mesa-18.3.3/src/mesa/state_tracker/st_atom_blend.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_atom_blend.c 2019-03-31 23:16:37.000000000 +0000 @@ -41,6 +41,7 @@ #include "framebuffer.h" #include "main/blend.h" +#include "main/glformats.h" #include "main/macros.h" /** @@ -126,8 +127,9 @@ * Figure out if blend enables/state are different per rt. */ static GLboolean -blend_per_rt(const struct gl_context *ctx, unsigned num_cb) +blend_per_rt(const struct st_context *st, unsigned num_cb) { + const struct gl_context *ctx = st->ctx; GLbitfield cb_mask = u_bit_consecutive(0, num_cb); GLbitfield blend_enabled = ctx->Color.BlendEnabled & cb_mask; @@ -145,9 +147,49 @@ * must be handled on a per buffer basis. */ return GL_TRUE; } + + if (st->needs_rgb_dst_alpha_override && ctx->DrawBuffer->_RGBBuffers) { + /* Overriding requires independent blend functions (not just enables), + * require drivers exposing PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND to + * also expose PIPE_CAP_INDEP_BLEND_FUNC. + */ + assert(st->has_indep_blend_func); + + /* If some of the buffers are RGB, we may need to override blend + * factors that reference destination-alpha to constants. We may + * need different blend factor overrides per buffer (say one uses + * a DST_ALPHA factor and another uses INV_DST_ALPHA), so we flip + * on independent blending. This may not be required in all cases, + * but burning the CPU to figure it out is probably not worthwhile. + */ + return GL_TRUE; + } + return GL_FALSE; } +/** + * Modify blend function to force destination alpha to 1.0 + * + * If \c function specifies a blend function that uses destination alpha, + * replace it with a function that hard-wires destination alpha to 1.0. + * This is useful when emulating a GL RGB format with an RGBA pipe_format. + */ +static enum pipe_blendfactor +fix_xrgb_alpha(enum pipe_blendfactor factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_DST_ALPHA: + return PIPE_BLENDFACTOR_ONE; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return PIPE_BLENDFACTOR_ZERO; + default: + return factor; + } +} + void st_update_blend( struct st_context *st ) { @@ -160,7 +202,7 @@ memset(blend, 0, sizeof(*blend)); if (num_cb > 1 && - (blend_per_rt(ctx, num_cb) || colormask_per_rt(ctx, num_cb))) { + (blend_per_rt(st, num_cb) || colormask_per_rt(ctx, num_cb))) { num_state = num_cb; blend->independent_blend_enable = 1; } @@ -216,6 +258,18 @@ blend->rt[i].alpha_dst_factor = translate_blend(ctx->Color.Blend[j].DstA); } + + const struct gl_renderbuffer *rb = + ctx->DrawBuffer->_ColorDrawBuffers[i]; + + if (st->needs_rgb_dst_alpha_override && rb && + (ctx->DrawBuffer->_RGBBuffers & (1 << i))) { + struct pipe_rt_blend_state *rt = &blend->rt[i]; + rt->rgb_src_factor = fix_xrgb_alpha(rt->rgb_src_factor); + rt->rgb_dst_factor = fix_xrgb_alpha(rt->rgb_dst_factor); + rt->alpha_src_factor = fix_xrgb_alpha(rt->alpha_src_factor); + rt->alpha_dst_factor = fix_xrgb_alpha(rt->alpha_dst_factor); + } } } else { diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_atom.h mesa-19.0.1/src/mesa/state_tracker/st_atom.h --- mesa-18.3.3/src/mesa/state_tracker/st_atom.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_atom.h 2019-03-31 23:16:37.000000000 +0000 @@ -37,6 +37,10 @@ #include "main/glheader.h" struct st_context; +struct st_vertex_program; +struct st_vp_variant; +struct pipe_vertex_buffer; +struct pipe_vertex_element; /** * Enumeration of state tracker pipelines. @@ -54,9 +58,26 @@ void st_validate_state( struct st_context *st, enum st_pipeline pipeline ); GLuint st_compare_func_to_pipe(GLenum func); -enum pipe_format -st_pipe_vertex_format(const struct gl_array_attributes *attrib); - +void +st_setup_arrays(struct st_context *st, + const struct st_vertex_program *vp, + const struct st_vp_variant *vp_variant, + struct pipe_vertex_element *velements, + struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers); + +void +st_setup_current(struct st_context *st, + const struct st_vertex_program *vp, + const struct st_vp_variant *vp_variant, + struct pipe_vertex_element *velements, + struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers); + +void +st_setup_current_user(struct st_context *st, + const struct st_vertex_program *vp, + const struct st_vp_variant *vp_variant, + struct pipe_vertex_element *velements, + struct pipe_vertex_buffer *vbuffer, unsigned *num_vbuffers); /* Define ST_NEW_xxx_INDEX */ enum { diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_bitmap.c mesa-19.0.1/src/mesa/state_tracker/st_cb_bitmap.c --- mesa-18.3.3/src/mesa/state_tracker/st_cb_bitmap.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_bitmap.c 2019-03-31 23:16:37.000000000 +0000 @@ -46,6 +46,7 @@ #include "st_draw.h" #include "st_program.h" #include "st_cb_bitmap.h" +#include "st_cb_drawpixels.h" #include "st_sampler_view.h" #include "st_texture.h" @@ -53,7 +54,6 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "util/u_inlines.h" -#include "util/u_simple_shaders.h" #include "util/u_upload_mgr.h" #include "program/prog_instruction.h" #include "cso_cache/cso_context.h" @@ -214,7 +214,7 @@ cso_set_fragment_shader_handle(cso, fpv->driver_shader); /* vertex shader state: position + texcoord pass-through */ - cso_set_vertex_shader_handle(cso, st->bitmap.vs); + cso_set_vertex_shader_handle(cso, st->passthrough_vs); /* disable other shaders */ cso_set_tessctrl_shader_handle(cso, NULL); @@ -538,7 +538,7 @@ struct pipe_screen *screen = pipe->screen; /* This function should only be called once */ - assert(st->bitmap.vs == NULL); + assert(!st->bitmap.tex_format); assert(st->internal_target == PIPE_TEXTURE_2D || st->internal_target == PIPE_TEXTURE_RECT); @@ -585,17 +585,7 @@ } /* Create the vertex shader */ - { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_COLOR, - st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD : - TGSI_SEMANTIC_GENERIC }; - const uint semantic_indexes[] = { 0, 0, 0 }; - st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3, - semantic_names, - semantic_indexes, - FALSE); - } + st_make_passthrough_vertex_shader(st); reset_cache(st); } @@ -617,7 +607,7 @@ st_invalidate_readpix_cache(st); - if (!st->bitmap.vs) { + if (!st->bitmap.tex_format) { init_bitmap_state(st); } @@ -677,7 +667,7 @@ struct pipe_vertex_buffer vb = {0}; unsigned i; - if (!st->bitmap.vs) { + if (!st->bitmap.tex_format) { init_bitmap_state(st); } @@ -807,11 +797,6 @@ struct pipe_context *pipe = st->pipe; struct st_bitmap_cache *cache = &st->bitmap.cache; - if (st->bitmap.vs) { - cso_delete_vertex_shader(st->cso_context, st->bitmap.vs); - st->bitmap.vs = NULL; - } - if (cache->trans && cache->buffer) { pipe_transfer_unmap(pipe, cache->trans); } diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_clear.c mesa-19.0.1/src/mesa/state_tracker/st_cb_clear.c --- mesa-18.3.3/src/mesa/state_tracker/st_cb_clear.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_clear.c 2019-03-31 23:16:37.000000000 +0000 @@ -392,12 +392,18 @@ if (!strb || !strb->surface) continue; - if (!GET_COLORMASK(ctx->Color.ColorMask, colormask_index)) + unsigned colormask = + GET_COLORMASK(ctx->Color.ColorMask, colormask_index); + + if (!colormask) continue; + unsigned surf_colormask = + util_format_colormask(util_format_description(strb->surface->format)); + if (is_scissor_enabled(ctx, rb) || is_window_rectangle_enabled(ctx) || - GET_COLORMASK(ctx->Color.ColorMask, colormask_index) != 0xf) + ((colormask & surf_colormask) != surf_colormask)) quad_buffers |= PIPE_CLEAR_COLOR0 << i; else clear_buffers |= PIPE_CLEAR_COLOR0 << i; @@ -442,9 +448,6 @@ * use pipe->clear. We want to always use pipe->clear for the other * renderbuffers, because it's likely to be faster. */ - if (quad_buffers) { - clear_with_quad(ctx, quad_buffers); - } if (clear_buffers) { /* We can't translate the clear color to the colorbuffer format, * because different colorbuffers may have different formats. @@ -453,6 +456,9 @@ (union pipe_color_union*)&ctx->Color.ClearColor, ctx->Depth.Clear, ctx->Stencil.Clear); } + if (quad_buffers) { + clear_with_quad(ctx, quad_buffers); + } if (mask & BUFFER_BIT_ACCUM) _mesa_clear_accum_buffer(ctx); } diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_drawpixels.c mesa-19.0.1/src/mesa/state_tracker/st_cb_drawpixels.c --- mesa-18.3.3/src/mesa/state_tracker/st_cb_drawpixels.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_drawpixels.c 2019-03-31 23:16:37.000000000 +0000 @@ -72,6 +72,7 @@ #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_math.h" +#include "util/u_simple_shaders.h" #include "util/u_tile.h" #include "cso_cache/cso_context.h" @@ -191,45 +192,23 @@ /** * Create a simple vertex shader that just passes through the - * vertex position and texcoord (and optionally, color). + * vertex position, texcoord, and color. */ -static void * -make_passthrough_vertex_shader(struct st_context *st, - GLboolean passColor) +void +st_make_passthrough_vertex_shader(struct st_context *st) { - const enum tgsi_semantic texcoord_semantic = st->needs_texcoord_semantic ? - TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; - - if (!st->drawpix.vert_shaders[passColor]) { - struct ureg_program *ureg = ureg_create( PIPE_SHADER_VERTEX ); - - if (ureg == NULL) - return NULL; - - /* MOV result.pos, vertex.pos; */ - ureg_MOV(ureg, - ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ), - ureg_DECL_vs_input( ureg, 0 )); - - if (passColor) { - /* MOV result.color0, vertex.attr[1]; */ - ureg_MOV(ureg, - ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ), - ureg_DECL_vs_input( ureg, 1 )); - } - - /* MOV result.texcoord0, vertex.attr[2]; */ - ureg_MOV(ureg, - ureg_DECL_output( ureg, texcoord_semantic, 0 ), - ureg_DECL_vs_input( ureg, 2 )); - - ureg_END( ureg ); - - st->drawpix.vert_shaders[passColor] = - ureg_create_shader_and_destroy( ureg, st->pipe ); - } + if (st->passthrough_vs) + return; - return st->drawpix.vert_shaders[passColor]; + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR, + st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD : + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0, 0 }; + + st->passthrough_vs = + util_make_vertex_passthrough_shader(st->pipe, 3, semantic_names, + semantic_indexes, false); } @@ -1135,7 +1114,7 @@ GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, const void *pixels) { - void *driver_vp, *driver_fp; + void *driver_fp; struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; @@ -1185,19 +1164,19 @@ return; } + st_make_passthrough_vertex_shader(st); + /* * Get vertex/fragment shaders */ if (write_depth || write_stencil) { driver_fp = get_drawpix_z_stencil_program(st, write_depth, write_stencil); - driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); } else { fpv = get_color_fp_variant(st); driver_fp = fpv->driver_shader; - driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); if (ctx->Pixel.MapColorFlag) { pipe_sampler_view_reference(&sv[1], @@ -1246,7 +1225,7 @@ ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, sv, num_sampler_view, - driver_vp, + st->passthrough_vs, driver_fp, fpv, ctx->Current.RasterColor, GL_FALSE, write_depth, write_stencil); @@ -1506,7 +1485,7 @@ struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; struct st_renderbuffer *rbRead; - void *driver_vp, *driver_fp; + void *driver_fp; struct pipe_resource *pt; struct pipe_sampler_view *sv[2] = { NULL }; struct st_fp_variant *fpv = NULL; @@ -1547,6 +1526,7 @@ * are handled. */ + st_make_passthrough_vertex_shader(st); /* * Get vertex/fragment shaders @@ -1557,7 +1537,6 @@ rbRead = st_get_color_read_renderbuffer(ctx); driver_fp = fpv->driver_shader; - driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); if (ctx->Pixel.MapColorFlag) { pipe_sampler_view_reference(&sv[1], @@ -1576,7 +1555,6 @@ Attachment[BUFFER_DEPTH].Renderbuffer); driver_fp = get_drawpix_z_stencil_program(st, GL_TRUE, GL_FALSE); - driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); } /* Choose the format for the temporary texture. */ @@ -1703,7 +1681,7 @@ width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, sv, num_sampler_view, - driver_vp, + st->passthrough_vs, driver_fp, fpv, ctx->Current.Attrib[VERT_ATTRIB_COLOR0], invertTex, GL_FALSE, GL_FALSE); @@ -1732,10 +1710,8 @@ st->drawpix.zs_shaders[i]); } - if (st->drawpix.vert_shaders[0]) - cso_delete_vertex_shader(st->cso_context, st->drawpix.vert_shaders[0]); - if (st->drawpix.vert_shaders[1]) - cso_delete_vertex_shader(st->cso_context, st->drawpix.vert_shaders[1]); + if (st->passthrough_vs) + cso_delete_vertex_shader(st->cso_context, st->passthrough_vs); /* Free cache data */ for (i = 0; i < ARRAY_SIZE(st->drawpix_cache.entries); i++) { diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_drawpixels.h mesa-19.0.1/src/mesa/state_tracker/st_cb_drawpixels.h --- mesa-18.3.3/src/mesa/state_tracker/st_cb_drawpixels.h 2018-01-24 16:24:53.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_drawpixels.h 2019-03-31 23:16:37.000000000 +0000 @@ -47,4 +47,7 @@ unsigned drawpix_sampler, unsigned pixelmap_sampler, unsigned texcoord_const, unsigned tex_target); +extern void +st_make_passthrough_vertex_shader(struct st_context *st); + #endif /* ST_CB_DRAWPIXELS_H */ diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_eglimage.c mesa-19.0.1/src/mesa/state_tracker/st_cb_eglimage.c --- mesa-18.3.3/src/mesa/state_tracker/st_cb_eglimage.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_eglimage.c 2019-03-31 23:16:37.000000000 +0000 @@ -159,20 +159,11 @@ if (!ps) return; - strb->Base.Width = ps->width; - strb->Base.Height = ps->height; strb->Base.Format = st_pipe_format_to_mesa_format(ps->format); strb->Base._BaseFormat = st_pipe_format_to_base_format(ps->format); strb->Base.InternalFormat = strb->Base._BaseFormat; - struct pipe_surface **psurf = - util_format_is_srgb(ps->format) ? &strb->surface_srgb : - &strb->surface_linear; - - pipe_surface_reference(psurf, ps); - strb->surface = *psurf; - pipe_resource_reference(&strb->texture, ps->texture); - + st_set_ws_renderbuffer_surface(strb, ps); pipe_surface_reference(&ps, NULL); } } diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_fbo.c mesa-19.0.1/src/mesa/state_tracker/st_cb_fbo.c --- mesa-18.3.3/src/mesa/state_tracker/st_cb_fbo.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_fbo.c 2019-03-31 23:16:37.000000000 +0000 @@ -139,7 +139,7 @@ /* If an sRGB framebuffer is unsupported, sRGB formats behave like linear * formats. */ - if (!ctx->Extensions.EXT_framebuffer_sRGB) { + if (!ctx->Extensions.EXT_sRGB) { internalFormat = _mesa_get_linear_internalformat(internalFormat); } @@ -519,6 +519,7 @@ surf->texture != resource || surf->width != rtt_width || surf->height != rtt_height || + surf->nr_samples != strb->rtt_nr_samples || surf->u.tex.level != level || surf->u.tex.first_layer != first_layer || surf->u.tex.last_layer != last_layer) { @@ -526,6 +527,7 @@ struct pipe_surface surf_tmpl; memset(&surf_tmpl, 0, sizeof(surf_tmpl)); surf_tmpl.format = format; + surf_tmpl.nr_samples = strb->rtt_nr_samples; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = first_layer; surf_tmpl.u.tex.last_layer = last_layer; @@ -575,6 +577,7 @@ strb->rtt_face = att->CubeMapFace; strb->rtt_slice = att->Zoffset; strb->rtt_layered = att->Layered; + strb->rtt_nr_samples = att->NumSamples; pipe_resource_reference(&strb->texture, pt); st_update_renderbuffer_surface(st, strb); @@ -659,7 +662,7 @@ /* If the encoding is sRGB and sRGB rendering cannot be enabled, * check for linear format support instead. * Later when we create a surface, we change the format to a linear one. */ - if (!ctx->Extensions.EXT_framebuffer_sRGB && + if (!ctx->Extensions.EXT_sRGB && _mesa_get_format_color_encoding(texFormat) == GL_SRGB) { const mesa_format linearFormat = _mesa_get_srgb_format_linear(texFormat); format = st_mesa_format_to_pipe_format(st_context(ctx), linearFormat); @@ -760,6 +763,30 @@ /** + * Called by ctx->Driver.DiscardFramebuffer + */ +static void +st_discard_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att) +{ + struct st_context *st = st_context(ctx); + struct pipe_resource *prsc; + + if (!att->Renderbuffer) + return; + + prsc = st_renderbuffer(att->Renderbuffer)->surface->texture; + + /* using invalidate_resource will only work for simple 2D resources */ + if (prsc->depth0 != 1 || prsc->array_size != 1 || prsc->last_level != 0) + return; + + if (st->pipe->invalidate_resource) + st->pipe->invalidate_resource(st->pipe, prsc); +} + + +/** * Called via glDrawBuffer. We only provide this driver function so that we * can check if we need to allocate a new renderbuffer. Specifically, we * don't usually allocate a front color buffer when using a double-buffered @@ -936,6 +963,7 @@ functions->RenderTexture = st_render_texture; functions->FinishRenderTexture = st_finish_render_texture; functions->ValidateFramebuffer = st_validate_framebuffer; + functions->DiscardFramebuffer = st_discard_framebuffer; functions->DrawBufferAllocate = st_DrawBufferAllocate; functions->ReadBuffer = st_ReadBuffer; diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_fbo.h mesa-19.0.1/src/mesa/state_tracker/st_cb_fbo.h --- mesa-18.3.3/src/mesa/state_tracker/st_cb_fbo.h 2018-01-24 16:24:53.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_fbo.h 2019-03-31 23:16:37.000000000 +0000 @@ -69,6 +69,7 @@ boolean is_rtt; /**< whether Driver.RenderTexture was called */ unsigned rtt_face, rtt_slice; boolean rtt_layered; /**< whether glFramebufferTexture was called */ + unsigned rtt_nr_samples; /**< from FramebufferTexture2DMultisampleEXT */ }; diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_queryobj.c mesa-19.0.1/src/mesa/state_tracker/st_cb_queryobj.c --- mesa-18.3.3/src/mesa/state_tracker/st_cb_queryobj.c 2018-01-24 16:24:53.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_queryobj.c 2019-03-31 23:16:37.000000000 +0000 @@ -88,6 +88,45 @@ free(stq); } +static int +target_to_index(const struct st_context *st, const struct gl_query_object *q) +{ + if (q->Target == GL_PRIMITIVES_GENERATED || + q->Target == GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN || + q->Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB) + return q->Stream; + + if (st->has_single_pipe_stat) { + switch (q->Target) { + case GL_VERTICES_SUBMITTED_ARB: + return PIPE_STAT_QUERY_IA_VERTICES; + case GL_PRIMITIVES_SUBMITTED_ARB: + return PIPE_STAT_QUERY_IA_PRIMITIVES; + case GL_VERTEX_SHADER_INVOCATIONS_ARB: + return PIPE_STAT_QUERY_VS_INVOCATIONS; + case GL_GEOMETRY_SHADER_INVOCATIONS: + return PIPE_STAT_QUERY_GS_INVOCATIONS; + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: + return PIPE_STAT_QUERY_GS_PRIMITIVES; + case GL_CLIPPING_INPUT_PRIMITIVES_ARB: + return PIPE_STAT_QUERY_C_INVOCATIONS; + case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: + return PIPE_STAT_QUERY_C_PRIMITIVES; + case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: + return PIPE_STAT_QUERY_PS_INVOCATIONS; + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + return PIPE_STAT_QUERY_HS_INVOCATIONS; + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: + return PIPE_STAT_QUERY_DS_INVOCATIONS; + case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + return PIPE_STAT_QUERY_CS_INVOCATIONS; + default: + break; + } + } + + return 0; +} static void st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q) @@ -140,7 +179,8 @@ case GL_COMPUTE_SHADER_INVOCATIONS_ARB: case GL_CLIPPING_INPUT_PRIMITIVES_ARB: case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: - type = PIPE_QUERY_PIPELINE_STATISTICS; + type = st->has_single_pipe_stat ? PIPE_QUERY_PIPELINE_STATISTICS_SINGLE + : PIPE_QUERY_PIPELINE_STATISTICS; break; default: assert(0 && "unexpected query target in st_BeginQuery()"); @@ -164,7 +204,7 @@ ret = pipe->end_query(pipe, stq->pq_begin); } else { if (!stq->pq) { - stq->pq = pipe->create_query(pipe, type, q->Stream); + stq->pq = pipe->create_query(pipe, type, target_to_index(st, q)); stq->type = type; } if (stq->pq) @@ -226,53 +266,55 @@ if (!pipe->get_query_result(pipe, stq->pq, wait, &data)) return FALSE; - switch (stq->base.Target) { - case GL_VERTICES_SUBMITTED_ARB: - stq->base.Result = data.pipeline_statistics.ia_vertices; - break; - case GL_PRIMITIVES_SUBMITTED_ARB: - stq->base.Result = data.pipeline_statistics.ia_primitives; - break; - case GL_VERTEX_SHADER_INVOCATIONS_ARB: - stq->base.Result = data.pipeline_statistics.vs_invocations; - break; - case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - stq->base.Result = data.pipeline_statistics.hs_invocations; - break; - case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: - stq->base.Result = data.pipeline_statistics.ds_invocations; - break; - case GL_GEOMETRY_SHADER_INVOCATIONS: - stq->base.Result = data.pipeline_statistics.gs_invocations; - break; - case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: - stq->base.Result = data.pipeline_statistics.gs_primitives; - break; - case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: - stq->base.Result = data.pipeline_statistics.ps_invocations; - break; - case GL_COMPUTE_SHADER_INVOCATIONS_ARB: - stq->base.Result = data.pipeline_statistics.cs_invocations; - break; - case GL_CLIPPING_INPUT_PRIMITIVES_ARB: - stq->base.Result = data.pipeline_statistics.c_invocations; - break; - case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: - stq->base.Result = data.pipeline_statistics.c_primitives; - break; - default: - switch (stq->type) { - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - stq->base.Result = !!data.b; + switch (stq->type) { + case PIPE_QUERY_PIPELINE_STATISTICS: + switch (stq->base.Target) { + case GL_VERTICES_SUBMITTED_ARB: + stq->base.Result = data.pipeline_statistics.ia_vertices; break; - default: - stq->base.Result = data.u64; + case GL_PRIMITIVES_SUBMITTED_ARB: + stq->base.Result = data.pipeline_statistics.ia_primitives; + break; + case GL_VERTEX_SHADER_INVOCATIONS_ARB: + stq->base.Result = data.pipeline_statistics.vs_invocations; + break; + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: + stq->base.Result = data.pipeline_statistics.hs_invocations; + break; + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: + stq->base.Result = data.pipeline_statistics.ds_invocations; + break; + case GL_GEOMETRY_SHADER_INVOCATIONS: + stq->base.Result = data.pipeline_statistics.gs_invocations; + break; + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: + stq->base.Result = data.pipeline_statistics.gs_primitives; + break; + case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: + stq->base.Result = data.pipeline_statistics.ps_invocations; + break; + case GL_COMPUTE_SHADER_INVOCATIONS_ARB: + stq->base.Result = data.pipeline_statistics.cs_invocations; break; + case GL_CLIPPING_INPUT_PRIMITIVES_ARB: + stq->base.Result = data.pipeline_statistics.c_invocations; + break; + case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: + stq->base.Result = data.pipeline_statistics.c_primitives; + break; + default: + unreachable("invalid pipeline statistics counter"); } break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: + stq->base.Result = !!data.b; + break; + default: + stq->base.Result = data.u64; + break; } if (stq->base.Target == GL_TIME_ELAPSED && @@ -386,37 +428,37 @@ } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) { switch (q->Target) { case GL_VERTICES_SUBMITTED_ARB: - index = 0; + index = PIPE_STAT_QUERY_IA_VERTICES; break; case GL_PRIMITIVES_SUBMITTED_ARB: - index = 1; + index = PIPE_STAT_QUERY_IA_PRIMITIVES; break; case GL_VERTEX_SHADER_INVOCATIONS_ARB: - index = 2; + index = PIPE_STAT_QUERY_VS_INVOCATIONS; break; case GL_GEOMETRY_SHADER_INVOCATIONS: - index = 3; + index = PIPE_STAT_QUERY_GS_INVOCATIONS; break; case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: - index = 4; + index = PIPE_STAT_QUERY_GS_PRIMITIVES; break; case GL_CLIPPING_INPUT_PRIMITIVES_ARB: - index = 5; + index = PIPE_STAT_QUERY_C_INVOCATIONS; break; case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: - index = 6; + index = PIPE_STAT_QUERY_C_PRIMITIVES; break; case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: - index = 7; + index = PIPE_STAT_QUERY_PS_INVOCATIONS; break; case GL_TESS_CONTROL_SHADER_PATCHES_ARB: - index = 8; + index = PIPE_STAT_QUERY_HS_INVOCATIONS; break; case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: - index = 9; + index = PIPE_STAT_QUERY_DS_INVOCATIONS; break; case GL_COMPUTE_SHADER_INVOCATIONS_ARB: - index = 10; + index = PIPE_STAT_QUERY_CS_INVOCATIONS; break; default: unreachable("Unexpected target"); diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_rasterpos.c mesa-19.0.1/src/mesa/state_tracker/st_cb_rasterpos.c --- mesa-18.3.3/src/mesa/state_tracker/st_cb_rasterpos.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_rasterpos.c 2019-03-31 23:16:37.000000000 +0000 @@ -208,6 +208,10 @@ rs->prim.end = 1; rs->prim.start = 0; rs->prim.count = 1; + rs->prim.pad = 0; + rs->prim.num_instances = 1; + rs->prim.base_instance = 0; + rs->prim.is_indirect = 0; return rs; } diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_cb_texture.c mesa-19.0.1/src/mesa/state_tracker/st_cb_texture.c --- mesa-18.3.3/src/mesa/state_tracker/st_cb_texture.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_cb_texture.c 2019-03-31 23:16:37.000000000 +0000 @@ -1192,7 +1192,6 @@ return false; cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS | - CSO_BIT_FRAGMENT_SAMPLERS | CSO_BIT_VERTEX_ELEMENTS | CSO_BIT_AUX_VERTEX_BUFFER_SLOT | CSO_BIT_FRAMEBUFFER | @@ -1216,8 +1215,6 @@ { struct pipe_sampler_view templ; struct pipe_sampler_view *sampler_view; - struct pipe_sampler_state sampler = {0}; - const struct pipe_sampler_state *samplers[1] = {&sampler}; memset(&templ, 0, sizeof(templ)); templ.target = PIPE_BUFFER; @@ -1237,8 +1234,6 @@ cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view); pipe_sampler_view_reference(&sampler_view, NULL); - - cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers); } /* Framebuffer_state */ @@ -1248,11 +1243,9 @@ fb.width = surface->width; fb.height = surface->height; fb.nr_cbufs = 1; - pipe_surface_reference(&fb.cbufs[0], surface); + fb.cbufs[0] = surface; cso_set_framebuffer(cso, &fb); - - pipe_surface_reference(&fb.cbufs[0], NULL); } cso_set_viewport_dims(cso, surface->width, surface->height, FALSE); diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_context.c mesa-19.0.1/src/mesa/state_tracker/st_context.c --- mesa-18.3.3/src/mesa/state_tracker/st_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -462,6 +462,12 @@ screen->get_param(screen, PIPE_CAP_TGSI_PACK_HALF_FLOAT); st->has_multi_draw_indirect = screen->get_param(screen, PIPE_CAP_MULTI_DRAW_INDIRECT); + st->has_single_pipe_stat = + screen->get_param(screen, PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE); + st->has_indep_blend_func = + screen->get_param(screen, PIPE_CAP_INDEP_BLEND_FUNC); + st->needs_rgb_dst_alpha_override = + screen->get_param(screen, PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND); st->has_hw_atomics = screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_context.h mesa-19.0.1/src/mesa/state_tracker/st_context.h --- mesa-18.3.3/src/mesa/state_tracker/st_context.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_context.h 2019-03-31 23:16:37.000000000 +0000 @@ -127,6 +127,9 @@ boolean has_shareable_shaders; boolean has_half_float_packing; boolean has_multi_draw_indirect; + boolean has_single_pipe_stat; + boolean has_indep_blend_func; + boolean needs_rgb_dst_alpha_override; boolean can_bind_const_buffer_as_vertex; /** @@ -193,6 +196,8 @@ /** This masks out unused shader resources. Only valid in draw calls. */ uint64_t active_states; + unsigned pin_thread_counter; /* for L3 thread pinning on AMD Zen */ + /* If true, further analysis of states is required to know if something * has changed. Used mainly for shaders. */ @@ -222,14 +227,12 @@ struct pipe_sampler_state sampler; struct pipe_sampler_state atlas_sampler; enum pipe_format tex_format; - void *vs; struct st_bitmap_cache cache; } bitmap; /** for glDraw/CopyPixels */ struct { void *zs_shaders[4]; - void *vert_shaders[2]; /**< ureg shaders */ } drawpix; /** Cache of glDrawPixels images */ @@ -276,7 +279,8 @@ /** for drawing with st_util_vertex */ struct pipe_vertex_element util_velems[3]; - void *passthrough_fs; /**< simple pass-through frag shader */ + /** passthrough vertex shader matching the util_velem attributes */ + void *passthrough_vs; enum pipe_texture_target internal_target; diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_draw.c mesa-19.0.1/src/mesa/state_tracker/st_draw.c --- mesa-18.3.3/src/mesa/state_tracker/st_draw.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -58,6 +58,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "util/u_cpu_detect.h" #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_prim.h" @@ -66,6 +67,13 @@ #include "draw/draw_context.h" #include "cso_cache/cso_context.h" +#if defined(PIPE_OS_LINUX) && !defined(ANDROID) +#include +#define HAVE_SCHED_GETCPU 1 +#else +#define sched_getcpu() 0 +#define HAVE_SCHED_GETCPU 0 +#endif /** * Set the restart index. @@ -122,12 +130,38 @@ st->gfx_shaders_may_be_dirty) { st_validate_state(st, ST_PIPELINE_RENDER); } + + struct pipe_context *pipe = st->pipe; + + /* Pin threads regularly to the same Zen CCX that the main thread is + * running on. The main thread can move between CCXs. + */ + if (unlikely(HAVE_SCHED_GETCPU && /* Linux */ + /* AMD Zen */ + util_cpu_caps.nr_cpus != util_cpu_caps.cores_per_L3 && + /* no glthread */ + ctx->CurrentClientDispatch != ctx->MarshalExec && + /* driver support */ + pipe->set_context_param && + /* do it occasionally */ + ++st->pin_thread_counter % 512 == 0)) { + int cpu = sched_getcpu(); + if (cpu >= 0) { + unsigned L3_cache = cpu / util_cpu_caps.cores_per_L3; + + pipe->set_context_param(pipe, + PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE, + L3_cache); + } + } } /** * This function gets plugged into the VBO module and is called when * we have something to render. * Basically, translate the information into the format expected by gallium. + * + * Try to keep this logic in sync with st_feedback_draw_vbo. */ static void st_draw_vbo(struct gl_context *ctx, diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_draw_feedback.c mesa-19.0.1/src/mesa/state_tracker/st_draw_feedback.c --- mesa-18.3.3/src/mesa/state_tracker/st_draw_feedback.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_draw_feedback.c 2019-03-31 23:16:37.000000000 +0000 @@ -85,30 +85,9 @@ /** - * Helper for drawing current vertex arrays. - */ -static void -draw_arrays(struct draw_context *draw, unsigned mode, - unsigned start, unsigned count) -{ - struct pipe_draw_info info; - - util_draw_init_info(&info); - - info.mode = mode; - info.start = start; - info.count = count; - info.min_index = start; - info.max_index = start + count - 1; - - draw_vbo(draw, &info); -} - - -/** * Called by VBO to draw arrays when in selection or feedback mode and * to implement glRasterPos. - * This is very much like the normal draw_vbo() function above. + * This function mirrors the normal st_draw_vbo(). * Look at code refactoring some day. */ void @@ -127,17 +106,27 @@ struct pipe_context *pipe = st->pipe; struct draw_context *draw = st_get_draw_context(st); const struct st_vertex_program *vp; + struct st_vp_variant *vp_variant; const struct pipe_shader_state *vs; struct pipe_vertex_buffer vbuffers[PIPE_MAX_SHADER_INPUTS]; + unsigned num_vbuffers = 0; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {NULL}; struct pipe_transfer *ib_transfer = NULL; - GLuint attr, i; + GLuint i; const void *mapped_indices = NULL; + struct pipe_draw_info info; if (!draw) return; + /* Initialize pipe_draw_info. */ + info.primitive_restart = false; + info.vertices_per_patch = ctx->TessCtrlProgram.patch_vertices; + info.indirect = NULL; + info.count_from_stream_output = NULL; + info.restart_index = 0; + st_flush_bitmap_cache(st); st_invalidate_readpix_cache(st); @@ -148,10 +137,11 @@ /* must get these after state validation! */ vp = st->vp; - vs = &st->vp_variant->tgsi; + vp_variant = st->vp_variant; + vs = &vp_variant->tgsi; - if (!st->vp_variant->draw_shader) { - st->vp_variant->draw_shader = draw_create_vertex_shader(draw, vs); + if (!vp_variant->draw_shader) { + vp_variant->draw_shader = draw_create_vertex_shader(draw, vs); } /* @@ -164,64 +154,30 @@ draw_set_viewport_states(draw, 0, 1, &st->state.viewport[0]); draw_set_clip_state(draw, &st->state.clip); draw_set_rasterizer_state(draw, &st->state.rasterizer, NULL); - draw_bind_vertex_shader(draw, st->vp_variant->draw_shader); + draw_bind_vertex_shader(draw, vp_variant->draw_shader); set_feedback_vertex_format(ctx); - /* loop over TGSI shader inputs to determine vertex buffer - * and attribute info - */ - for (attr = 0; attr < vp->num_inputs; attr++) { - const GLuint mesaAttr = vp->index_to_input[attr]; - const struct gl_vertex_buffer_binding *binding; - const struct gl_array_attributes *attrib; - void *map; - - _mesa_draw_attrib_and_binding(ctx, mesaAttr, &attrib, &binding); - - if (_mesa_is_bufferobj(binding->BufferObj)) { - /* Attribute data is in a VBO. */ - struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj); - assert(stobj->buffer); - - vbuffers[attr].buffer.resource = NULL; - vbuffers[attr].is_user_buffer = false; - pipe_resource_reference(&vbuffers[attr].buffer.resource, stobj->buffer); - vbuffers[attr].buffer_offset = _mesa_draw_binding_offset(binding); - velements[attr].src_offset = - _mesa_draw_attributes_relative_offset(attrib); - - /* map the attrib buffer */ - map = pipe_buffer_map(pipe, vbuffers[attr].buffer.resource, - PIPE_TRANSFER_READ, - &vb_transfer[attr]); - draw_set_mapped_vertex_buffer(draw, attr, map, - vbuffers[attr].buffer.resource->width0); - } - else { - /* Attribute data is in a user space array. */ - vbuffers[attr].buffer.user = attrib->Ptr; - vbuffers[attr].is_user_buffer = true; - vbuffers[attr].buffer_offset = 0; - velements[attr].src_offset = 0; - - draw_set_mapped_vertex_buffer(draw, attr, - vbuffers[attr].buffer.user, ~0); + /* Must setup these after state validation! */ + /* Setup arrays */ + st_setup_arrays(st, vp, vp_variant, velements, vbuffers, &num_vbuffers); + /* Setup current values as userspace arrays */ + st_setup_current_user(st, vp, vp_variant, velements, vbuffers, &num_vbuffers); + + /* Map all buffers and tell draw about their mapping */ + for (unsigned buf = 0; buf < num_vbuffers; ++buf) { + struct pipe_vertex_buffer *vbuffer = &vbuffers[buf]; + + if (vbuffer->is_user_buffer) { + draw_set_mapped_vertex_buffer(draw, buf, vbuffer->buffer.user, ~0); + } else { + void *map = pipe_buffer_map(pipe, vbuffer->buffer.resource, + PIPE_TRANSFER_READ, &vb_transfer[buf]); + draw_set_mapped_vertex_buffer(draw, buf, map, + vbuffer->buffer.resource->width0); } - - /* common-case setup */ - vbuffers[attr].stride = binding->Stride; /* in bytes */ - velements[attr].instance_divisor = 0; - velements[attr].vertex_buffer_index = attr; - velements[attr].src_format = st_pipe_vertex_format(attrib); - assert(velements[attr].src_format); - - /* tell draw about this attribute */ -#if 0 - draw_set_vertex_buffer(draw, attr, &vbuffer[attr]); -#endif } - draw_set_vertex_buffers(draw, 0, vp->num_inputs, vbuffers); + draw_set_vertex_buffers(draw, 0, num_vbuffers, vbuffers); draw_set_vertex_elements(draw, vp->num_inputs, velements); unsigned start = 0; @@ -244,9 +200,23 @@ mapped_indices = ib->ptr; } + info.index_size = ib->index_size; + info.min_index = min_index; + info.max_index = max_index; + info.has_user_indices = true; + info.index.user = mapped_indices; + draw_set_indexes(draw, (ubyte *) mapped_indices, index_size, ~0); + + if (ctx->Array._PrimitiveRestart) { + info.primitive_restart = true; + info.restart_index = _mesa_primitive_restart_index(ctx, info.index_size); + } + } else { + info.index_size = 0; + info.has_user_indices = false; } /* set the constant buffer */ @@ -257,7 +227,23 @@ /* draw here */ for (i = 0; i < nr_prims; i++) { - draw_arrays(draw, prims[i].mode, start + prims[i].start, prims[i].count); + info.count = prims[i].count; + + if (!info.count) + continue; + + info.mode = prims[i].mode; + info.start = start + prims[i].start; + info.start_instance = prims[i].base_instance; + info.instance_count = prims[i].num_instances; + info.index_bias = prims[i].basevertex; + info.drawid = prims[i].draw_id; + if (!ib) { + info.min_index = info.start; + info.max_index = info.start + info.count - 1; + } + + draw_vbo(draw, &info); } @@ -271,11 +257,10 @@ } out_unref_vertex: - for (attr = 0; attr < vp->num_inputs; attr++) { - if (vb_transfer[attr]) - pipe_buffer_unmap(pipe, vb_transfer[attr]); - draw_set_mapped_vertex_buffer(draw, attr, NULL, 0); - pipe_vertex_buffer_unreference(&vbuffers[attr]); + for (unsigned buf = 0; buf < num_vbuffers; ++buf) { + if (vb_transfer[buf]) + pipe_buffer_unmap(pipe, vb_transfer[buf]); + draw_set_mapped_vertex_buffer(draw, buf, NULL, 0); } - draw_set_vertex_buffers(draw, 0, vp->num_inputs, NULL); + draw_set_vertex_buffers(draw, 0, num_vbuffers, NULL); } diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_extensions.c mesa-19.0.1/src/mesa/state_tracker/st_extensions.c --- mesa-18.3.3/src/mesa/state_tracker/st_extensions.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_extensions.c 2019-03-31 23:16:37.000000000 +0000 @@ -183,7 +183,8 @@ continue; supported_irs = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUPPORTED_IRS); - if (!(supported_irs & (1 << PIPE_SHADER_IR_TGSI))) + if (!(supported_irs & ((1 << PIPE_SHADER_IR_TGSI) | + (1 << PIPE_SHADER_IR_NIR)))) continue; } @@ -222,8 +223,13 @@ pc->MaxUniformComponents = MIN2(pc->MaxUniformComponents, MAX_UNIFORMS * 4); + /* For ARB programs, prog_src_register::Index is a signed 13-bit number. + * This gives us a limit of 4096 values - but we may need to generate + * internal values in addition to what the source program uses. So, we + * drop the limit one step lower, to 2048, to be safe. + */ pc->MaxParameters = - pc->MaxNativeParameters = pc->MaxUniformComponents / 4; + pc->MaxNativeParameters = MIN2(pc->MaxUniformComponents / 4, 2048); pc->MaxInputComponents = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INPUTS) * 4; pc->MaxOutputComponents = @@ -333,6 +339,8 @@ c->GLSLOptimizeConservatively = screen->get_param(screen, PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY); + c->GLSLTessLevelsAsInputs = + screen->get_param(screen, PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS); c->LowerTessLevel = true; c->LowerCsDerivedVariables = true; c->PrimitiveRestartForPatches = @@ -359,10 +367,7 @@ c->Program[MESA_SHADER_VERTEX].MaxAttribs = MIN2(c->Program[MESA_SHADER_VERTEX].MaxAttribs, 16); - /* PIPE_SHADER_CAP_MAX_INPUTS for the FS specifies the maximum number - * of inputs. It's always 2 colors + N generic inputs. */ - c->MaxVarying = screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, - PIPE_SHADER_CAP_MAX_INPUTS); + c->MaxVarying = screen->get_param(screen, PIPE_CAP_MAX_VARYINGS); c->MaxVarying = MIN2(c->MaxVarying, MAX_VARYING); c->MaxGeometryOutputVertices = screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES); @@ -703,6 +708,7 @@ { o(ARB_occlusion_query), PIPE_CAP_OCCLUSION_QUERY }, { o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY }, { o(ARB_pipeline_statistics_query), PIPE_CAP_QUERY_PIPELINE_STATISTICS }, + { o(ARB_pipeline_statistics_query), PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE }, { o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE }, { o(ARB_polygon_offset_clamp), PIPE_CAP_POLYGON_OFFSET_CLAMP }, { o(ARB_post_depth_coverage), PIPE_CAP_POST_DEPTH_COVERAGE }, @@ -740,6 +746,7 @@ { o(EXT_draw_buffers2), PIPE_CAP_INDEP_BLEND_ENABLE }, { o(EXT_memory_object), PIPE_CAP_MEMOBJ }, { o(EXT_memory_object_fd), PIPE_CAP_MEMOBJ }, + { o(EXT_multisampled_render_to_texture), PIPE_CAP_SURFACE_SAMPLE_COUNT }, { o(EXT_semaphore), PIPE_CAP_FENCE_SIGNAL }, { o(EXT_semaphore_fd), PIPE_CAP_FENCE_SIGNAL }, { o(EXT_texture_array), PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS }, @@ -759,6 +766,7 @@ { o(NV_conditional_render), PIPE_CAP_CONDITIONAL_RENDER }, { o(NV_fill_rectangle), PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE }, { o(NV_primitive_restart), PIPE_CAP_PRIMITIVE_RESTART }, + { o(NV_shader_atomic_float), PIPE_CAP_TGSI_ATOMFADD }, { o(NV_texture_barrier), PIPE_CAP_TEXTURE_BARRIER }, { o(NVX_gpu_memory_info), PIPE_CAP_QUERY_MEMORY_INFO }, /* GL_NV_point_sprite is not supported by gallium because we don't @@ -767,14 +775,11 @@ { o(OES_standard_derivatives), PIPE_CAP_SM3 }, { o(OES_texture_float_linear), PIPE_CAP_TEXTURE_FLOAT_LINEAR }, { o(OES_texture_half_float_linear), PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR }, + { o(OES_texture_view), PIPE_CAP_SAMPLER_VIEW_TARGET }, }; /* Required: render target and sampler support */ static const struct st_extension_format_mapping rendertarget_mapping[] = { - { { o(ARB_texture_float) }, - { PIPE_FORMAT_R32G32B32A32_FLOAT, - PIPE_FORMAT_R16G16B16A16_FLOAT } }, - { { o(OES_texture_float) }, { PIPE_FORMAT_R32G32B32A32_FLOAT } }, @@ -786,7 +791,7 @@ PIPE_FORMAT_B10G10R10A2_UINT }, GL_TRUE }, /* at least one format must be supported */ - { { o(EXT_framebuffer_sRGB) }, + { { o(EXT_sRGB) }, { PIPE_FORMAT_A8B8G8R8_SRGB, PIPE_FORMAT_B8G8R8A8_SRGB, PIPE_FORMAT_R8G8B8A8_SRGB }, @@ -802,6 +807,14 @@ { { o(ARB_texture_rg) }, { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8G8_UNORM } }, + + { { o(EXT_render_snorm) }, + { PIPE_FORMAT_R8_SNORM, + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8B8A8_SNORM, + PIPE_FORMAT_R16_SNORM, + PIPE_FORMAT_R16G16_SNORM, + PIPE_FORMAT_R16G16B16A16_SNORM } }, }; /* Required: depth stencil and sampler support */ @@ -889,6 +902,10 @@ PIPE_FORMAT_R8G8B8A8_SRGB}, GL_TRUE }, /* at least one format must be supported */ + { { o(EXT_texture_sRGB_R8) }, + { PIPE_FORMAT_R8_SRGB }, + GL_TRUE }, + { { o(EXT_texture_type_2_10_10_10_REV) }, { PIPE_FORMAT_R10G10B10A2_UNORM, PIPE_FORMAT_B10G10R10A2_UNORM }, @@ -1312,6 +1329,10 @@ extensions->ARB_texture_buffer_object_rgb32 && extensions->ARB_shader_image_load_store; + extensions->EXT_framebuffer_sRGB = + screen->get_param(screen, PIPE_CAP_DEST_SURFACE_SRGB_CONTROL) && + extensions->EXT_sRGB; + /* Unpacking a varying in the fragment shader costs 1 texture indirection. * If the number of available texture indirections is very limited, then we * prefer to disable varying packing rather than run the risk of varying @@ -1406,18 +1427,22 @@ int compute_supported_irs = screen->get_shader_param(screen, PIPE_SHADER_COMPUTE, PIPE_SHADER_CAP_SUPPORTED_IRS); - if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) { + if (compute_supported_irs & ((1 << PIPE_SHADER_IR_TGSI) | + (1 << PIPE_SHADER_IR_NIR))) { + enum pipe_shader_ir ir = + (compute_supported_irs & PIPE_SHADER_IR_NIR) ? + PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; uint64_t grid_size[3], block_size[3]; uint64_t max_local_size, max_threads_per_block; - screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + screen->get_compute_param(screen, ir, PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size); - screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + screen->get_compute_param(screen, ir, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size); - screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + screen->get_compute_param(screen, ir, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, &max_threads_per_block); - screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + screen->get_compute_param(screen, ir, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, &max_local_size); @@ -1436,7 +1461,7 @@ if (extensions->ARB_compute_shader) { uint64_t max_variable_threads_per_block = 0; - screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + screen->get_compute_param(screen, ir, PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK, &max_variable_threads_per_block); @@ -1457,6 +1482,10 @@ } } + extensions->ARB_texture_float = + extensions->OES_texture_half_float && + extensions->OES_texture_float; + if (extensions->EXT_texture_filter_anisotropic && screen->get_paramf(screen, PIPE_CAPF_MAX_TEXTURE_ANISOTROPY) >= 16.0) extensions->ARB_texture_filter_anisotropic = GL_TRUE; diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_format.c mesa-19.0.1/src/mesa/state_tracker/st_format.c --- mesa-18.3.3/src/mesa/state_tracker/st_format.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_format.c 2019-03-31 23:16:37.000000000 +0000 @@ -169,6 +169,8 @@ return PIPE_FORMAT_AL88_SRGB; case MESA_FORMAT_L_SRGB8: return PIPE_FORMAT_L8_SRGB; + case MESA_FORMAT_R_SRGB8: + return PIPE_FORMAT_R8_SRGB; case MESA_FORMAT_BGR_SRGB8: return PIPE_FORMAT_R8G8B8_SRGB; case MESA_FORMAT_A8B8G8R8_SRGB: @@ -719,6 +721,8 @@ return MESA_FORMAT_A8L8_SRGB; case PIPE_FORMAT_L8_SRGB: return MESA_FORMAT_L_SRGB8; + case PIPE_FORMAT_R8_SRGB: + return MESA_FORMAT_R_SRGB8; case PIPE_FORMAT_R8G8B8_SRGB: return MESA_FORMAT_BGR_SRGB8; case PIPE_FORMAT_ABGR8888_SRGB: @@ -1423,6 +1427,10 @@ 0 }, { PIPE_FORMAT_L8_SRGB, DEFAULT_SRGBA_FORMATS } }, + { + { GL_SR8_EXT, 0 }, + { PIPE_FORMAT_R8_SRGB, 0 } + }, /* 16-bit float formats */ { @@ -2348,6 +2356,8 @@ bindings |= PIPE_BIND_DEPTH_STENCIL; else if (is_renderbuffer || internalFormat == 3 || internalFormat == 4 || internalFormat == GL_RGB || internalFormat == GL_RGBA || + internalFormat == GL_RGBA2 || + internalFormat == GL_RGB4 || internalFormat == GL_RGBA4 || internalFormat == GL_RGB8 || internalFormat == GL_RGBA8 || internalFormat == GL_BGRA || internalFormat == GL_RGB16F || @@ -2449,7 +2459,7 @@ /* If an sRGB framebuffer is unsupported, sRGB formats behave like linear * formats. */ - if (!ctx->Extensions.EXT_framebuffer_sRGB) { + if (!ctx->Extensions.EXT_sRGB) { internalFormat = _mesa_get_linear_internalformat(internalFormat); } diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_nir.cpp mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_nir.cpp --- mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_nir.cpp 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_nir.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -81,7 +81,7 @@ * on varying-slot w/ the VS outputs) */ static void -st_nir_assign_vs_in_locations(struct gl_program *prog, nir_shader *nir) +st_nir_assign_vs_in_locations(nir_shader *nir) { nir->num_inputs = 0; nir_foreach_variable_safe(var, &nir->inputs) { @@ -103,7 +103,7 @@ * set. */ exec_node_remove(&var->node); - var->data.mode = nir_var_global; + var->data.mode = nir_var_shader_temp; exec_list_push_tail(&nir->globals, &var->node); } } @@ -240,7 +240,6 @@ static void st_nir_assign_uniform_locations(struct gl_context *ctx, struct gl_program *prog, - struct gl_shader_program *shader_program, struct exec_list *uniform_list, unsigned *size) { int max = 0; @@ -254,8 +253,7 @@ * UBO's have their own address spaces, so don't count them towards the * number of global uniforms */ - if ((uniform->data.mode == nir_var_uniform || uniform->data.mode == nir_var_shader_storage) && - uniform->interface_type != NULL) + if (uniform->data.mode == nir_var_mem_ubo || uniform->data.mode == nir_var_mem_ssbo) continue; const struct glsl_type *type = glsl_without_array(uniform->type); @@ -329,7 +327,7 @@ NIR_PASS(progress, nir, nir_opt_if); NIR_PASS(progress, nir, nir_opt_dead_cf); NIR_PASS(progress, nir, nir_opt_cse); - NIR_PASS(progress, nir, nir_opt_peephole_select, 8); + NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true); NIR_PASS(progress, nir, nir_opt_algebraic); NIR_PASS(progress, nir, nir_opt_constant_folding); @@ -587,8 +585,19 @@ static void st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar) { + if (scalar) { + NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out); + NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); + } + nir_lower_io_arrays_to_elements(*producer, *consumer); + st_nir_opts(*producer, scalar); + st_nir_opts(*consumer, scalar); + + if (nir_link_opt_varyings(*producer, *consumer)) + st_nir_opts(*consumer, scalar); + NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out); NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in); @@ -607,7 +616,7 @@ * See the following thread for more details of the problem: * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html */ - nir_variable_mode indirect_mask = nir_var_local; + nir_variable_mode indirect_mask = nir_var_function_temp; NIR_PASS_V(*producer, nir_lower_indirect_derefs, indirect_mask); NIR_PASS_V(*consumer, nir_lower_indirect_derefs, indirect_mask); @@ -617,8 +626,63 @@ } } +static void +st_lower_patch_vertices_in(struct gl_shader_program *shader_prog) +{ + struct gl_linked_shader *linked_tcs = + shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]; + struct gl_linked_shader *linked_tes = + shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; + + /* If we have a TCS and TES linked together, lower TES patch vertices. */ + if (linked_tcs && linked_tes) { + nir_shader *tcs_nir = linked_tcs->Program->nir; + nir_shader *tes_nir = linked_tes->Program->nir; + + /* The TES input vertex count is the TCS output vertex count, + * lower TES gl_PatchVerticesIn to a constant. + */ + uint32_t tes_patch_verts = tcs_nir->info.tess.tcs_vertices_out; + NIR_PASS_V(tes_nir, nir_lower_patch_vertices, tes_patch_verts, NULL); + } +} + extern "C" { +void +st_nir_lower_wpos_ytransform(struct nir_shader *nir, + struct gl_program *prog, + struct pipe_screen *pscreen) +{ + if (nir->info.stage != MESA_SHADER_FRAGMENT) + return; + + static const gl_state_index16 wposTransformState[STATE_LENGTH] = { + STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM + }; + nir_lower_wpos_ytransform_options wpos_options = { { 0 } }; + + memcpy(wpos_options.state_tokens, wposTransformState, + sizeof(wpos_options.state_tokens)); + wpos_options.fs_coord_origin_upper_left = + pscreen->get_param(pscreen, + PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT); + wpos_options.fs_coord_origin_lower_left = + pscreen->get_param(pscreen, + PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT); + wpos_options.fs_coord_pixel_center_integer = + pscreen->get_param(pscreen, + PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER); + wpos_options.fs_coord_pixel_center_half_integer = + pscreen->get_param(pscreen, + PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER); + + if (nir_lower_wpos_ytransform(nir, &wpos_options)) { + nir_validate_shader(nir, "after nir_lower_wpos_ytransform"); + _mesa_add_state_reference(prog->Parameters, wposTransformState); + } +} + bool st_link_nir(struct gl_context *ctx, struct gl_shader_program *shader_program) @@ -627,49 +691,23 @@ struct pipe_screen *screen = st->pipe->screen; bool is_scalar[MESA_SHADER_STAGES]; - /* Determine scalar property of each shader stage */ + unsigned last_stage = 0; for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_linked_shader *shader = shader_program->_LinkedShaders[i]; - enum pipe_shader_type type; - if (shader == NULL) continue; - type = pipe_shader_type_from_mesa(shader->Stage); - is_scalar[i] = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA); - } - - /* Determine first and last stage. */ - unsigned first = MESA_SHADER_STAGES; - unsigned last = 0; - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (!shader_program->_LinkedShaders[i]) - continue; - if (first == MESA_SHADER_STAGES) - first = i; - last = i; - } - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_linked_shader *shader = shader_program->_LinkedShaders[i]; - if (shader == NULL) - continue; + /* Determine scalar property of each shader stage */ + enum pipe_shader_type type = pipe_shader_type_from_mesa(shader->Stage); + is_scalar[i] = screen->get_shader_param(screen, type, + PIPE_SHADER_CAP_SCALAR_ISA); st_nir_get_mesa_program(ctx, shader_program, shader); + last_stage = i; - nir_variable_mode mask = (nir_variable_mode) 0; - if (i != first) - mask = (nir_variable_mode)(mask | nir_var_shader_in); - - if (i != last) - mask = (nir_variable_mode)(mask | nir_var_shader_out); - - nir_shader *nir = shader->Program->nir; - - if (is_scalar[i]) - NIR_PASS_V(nir, nir_lower_io_to_scalar_early, mask); - - st_nir_opts(nir, is_scalar[i]); + if (is_scalar[i]) { + NIR_PASS_V(shader->Program->nir, nir_lower_load_const_to_scalar); + } } /* Linking the stages in the opposite order (from fragment to vertex) @@ -677,7 +715,7 @@ * are eliminated if they are (transitively) not used in a later * stage. */ - int next = last; + int next = last_stage; for (int i = next - 1; i >= 0; i--) { struct gl_linked_shader *shader = shader_program->_LinkedShaders[i]; if (shader == NULL) @@ -697,35 +735,8 @@ nir_shader *nir = shader->Program->nir; - /* fragment shaders may need : */ - if (nir->info.stage == MESA_SHADER_FRAGMENT) { - static const gl_state_index16 wposTransformState[STATE_LENGTH] = { - STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM - }; - nir_lower_wpos_ytransform_options wpos_options = { { 0 } }; - struct pipe_screen *pscreen = st->pipe->screen; - - memcpy(wpos_options.state_tokens, wposTransformState, - sizeof(wpos_options.state_tokens)); - wpos_options.fs_coord_origin_upper_left = - pscreen->get_param(pscreen, - PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT); - wpos_options.fs_coord_origin_lower_left = - pscreen->get_param(pscreen, - PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT); - wpos_options.fs_coord_pixel_center_integer = - pscreen->get_param(pscreen, - PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER); - wpos_options.fs_coord_pixel_center_half_integer = - pscreen->get_param(pscreen, - PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER); - - if (nir_lower_wpos_ytransform(nir, &wpos_options)) { - nir_validate_shader(nir, "after nir_lower_wpos_ytransform"); - _mesa_add_state_reference(shader->Program->Parameters, - wposTransformState); - } - } + NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, shader->Program, + st->pipe->screen); NIR_PASS_V(nir, nir_lower_system_values); @@ -757,6 +768,8 @@ prev = i; } + st_lower_patch_vertices_in(shader_program); + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_linked_shader *shader = shader_program->_LinkedShaders[i]; if (shader == NULL) @@ -801,7 +814,7 @@ if (nir->info.stage == MESA_SHADER_VERTEX) { /* Needs special handling so drvloc matches the vbo state: */ - st_nir_assign_vs_in_locations(prog, nir); + st_nir_assign_vs_in_locations(nir); /* Re-lower global vars, to deal with any dead VS inputs. */ NIR_PASS_V(nir, nir_lower_global_vars_to_local); @@ -842,7 +855,7 @@ NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers); - st_nir_assign_uniform_locations(st->ctx, prog, shader_program, + st_nir_assign_uniform_locations(st->ctx, prog, &nir->uniforms, &nir->num_uniforms); if (st->ctx->Const.PackedDriverUniformStorage) { diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi.cpp mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi.cpp --- mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 2019-03-31 23:16:37.000000000 +0000 @@ -781,6 +781,7 @@ case7(ISHR, LAST, ISHR, USHR, LAST, I64SHR, U64SHR); case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST, LAST, LAST); case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST, LAST, LAST); + case7(ATOMUADD,ATOMFADD,ATOMUADD,ATOMUADD,LAST, LAST, LAST); casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ); casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE); @@ -6204,6 +6205,7 @@ case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: case TGSI_OPCODE_IMG2HND: for (i = num_src - 1; i >= 0; i--) src[i + 1] = src[i]; @@ -7469,25 +7471,17 @@ } void -st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, - const ubyte outputMapping[], - struct pipe_stream_output_info *so) -{ - if (!glsl_to_tgsi->shader_program->last_vert_prog) - return; - - struct gl_transform_feedback_info *info = - glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback; - st_translate_stream_output_info2(info, outputMapping, so); -} - -void -st_translate_stream_output_info2(struct gl_transform_feedback_info *info, +st_translate_stream_output_info(struct gl_transform_feedback_info *info, const ubyte outputMapping[], struct pipe_stream_output_info *so) { unsigned i; + if (!info) { + so->num_outputs = 0; + return; + } + for (i = 0; i < info->NumOutputs; i++) { so->output[i].register_index = outputMapping[info->Outputs[i].OutputRegister]; diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi.h mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi.h --- mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi.h 2019-03-31 23:16:37.000000000 +0000 @@ -61,12 +61,7 @@ GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); void -st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi, - const ubyte outputMapping[], - struct pipe_stream_output_info *so); - -void -st_translate_stream_output_info2(struct gl_transform_feedback_info *info, +st_translate_stream_output_info(struct gl_transform_feedback_info *info, const ubyte outputMapping[], struct pipe_stream_output_info *so); diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi_private.h mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi_private.h --- mesa-18.3.3/src/mesa/state_tracker/st_glsl_to_tgsi_private.h 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_glsl_to_tgsi_private.h 2019-03-31 23:16:37.000000000 +0000 @@ -181,6 +181,7 @@ case TGSI_OPCODE_ATOMUMAX: case TGSI_OPCODE_ATOMIMIN: case TGSI_OPCODE_ATOMIMAX: + case TGSI_OPCODE_ATOMFADD: case TGSI_OPCODE_IMG2HND: return true; default: diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_manager.c mesa-19.0.1/src/mesa/state_tracker/st_manager.c --- mesa-18.3.3/src/mesa/state_tracker/st_manager.c 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_manager.c 2019-03-31 23:16:37.000000000 +0000 @@ -173,6 +173,26 @@ } +void +st_set_ws_renderbuffer_surface(struct st_renderbuffer *strb, + struct pipe_surface *surf) +{ + pipe_surface_reference(&strb->surface_srgb, NULL); + pipe_surface_reference(&strb->surface_linear, NULL); + + if (util_format_is_srgb(surf->format)) + pipe_surface_reference(&strb->surface_srgb, surf); + else + pipe_surface_reference(&strb->surface_linear, surf); + + strb->surface = surf; /* just assign, don't ref */ + pipe_resource_reference(&strb->texture, surf->texture); + + strb->Base.Width = surf->width; + strb->Base.Height = surf->height; +} + + /** * Validate a framebuffer to make sure up-to-date pipe_textures are used. * The context is only used for creating pipe surfaces and for calling @@ -234,21 +254,11 @@ u_surface_default_template(&surf_tmpl, textures[i]); ps = st->pipe->create_surface(st->pipe, textures[i], &surf_tmpl); if (ps) { - struct pipe_surface **psurf = - util_format_is_srgb(ps->format) ? &strb->surface_srgb : - &strb->surface_linear; - - pipe_surface_reference(psurf, ps); - strb->surface = *psurf; - pipe_resource_reference(&strb->texture, ps->texture); - /* ownership transfered */ + st_set_ws_renderbuffer_surface(strb, ps); pipe_surface_reference(&ps, NULL); changed = TRUE; - strb->Base.Width = strb->surface->width; - strb->Base.Height = strb->surface->height; - width = strb->Base.Width; height = strb->Base.Height; } @@ -295,7 +305,7 @@ */ static boolean st_framebuffer_add_renderbuffer(struct st_framebuffer *stfb, - gl_buffer_index idx) + gl_buffer_index idx, bool prefer_srgb) { struct gl_renderbuffer *rb; enum pipe_format format; @@ -318,7 +328,7 @@ break; default: format = stfb->iface->visual->color_format; - if (stfb->Base.Visual.sRGBCapable) + if (prefer_srgb) format = util_format_srgb(format); sw = FALSE; break; @@ -436,6 +446,7 @@ struct st_framebuffer *stfb; struct gl_config mode; gl_buffer_index idx; + bool prefer_srgb = false; if (!stfbi) return NULL; @@ -457,14 +468,15 @@ * format such that util_format_srgb(visual->color_format) can be supported * by the pipe driver. We still need to advertise the capability here. * - * For GLES, however, sRGB framebuffer write is controlled only by the - * capability of the framebuffer. There is GL_EXT_sRGB_write_control to - * give applications the control back, but sRGB write is still enabled by - * default. To avoid unexpected results, we should not advertise the - * capability. This could change when we add support for - * EGL_KHR_gl_colorspace. + * For GLES, however, sRGB framebuffer write is initially only controlled + * by the capability of the framebuffer, with GL_EXT_sRGB_write_control + * control is given back to the applications, but GL_FRAMEBUFFER_SRGB is + * still enabled by default since this is the behaviour when + * EXT_sRGB_write_control is not available. Since GL_EXT_sRGB_write_control + * brings GLES on par with desktop GLs EXT_framebuffer_sRGB, in mesa this + * is also expressed by using the same extension flag */ - if (_mesa_is_desktop_gl(st->ctx)) { + if (_mesa_has_EXT_framebuffer_sRGB(st->ctx)) { struct pipe_screen *screen = st->pipe->screen; const enum pipe_format srgb_format = util_format_srgb(stfbi->visual->color_format); @@ -475,8 +487,14 @@ PIPE_TEXTURE_2D, stfbi->visual->samples, stfbi->visual->samples, (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_RENDER_TARGET))) + PIPE_BIND_RENDER_TARGET))) { mode.sRGBCapable = GL_TRUE; + /* Since GL_FRAMEBUFFER_SRGB is enabled by default on GLES we must not + * create renderbuffers with an sRGB format derived from the + * visual->color_format, but we still want sRGB for desktop GL. + */ + prefer_srgb = _mesa_is_desktop_gl(st->ctx); + } } _mesa_initialize_window_framebuffer(&stfb->Base, &mode); @@ -487,13 +505,13 @@ /* add the color buffer */ idx = stfb->Base._ColorDrawBufferIndexes[0]; - if (!st_framebuffer_add_renderbuffer(stfb, idx)) { + if (!st_framebuffer_add_renderbuffer(stfb, idx, prefer_srgb)) { free(stfb); return NULL; } - st_framebuffer_add_renderbuffer(stfb, BUFFER_DEPTH); - st_framebuffer_add_renderbuffer(stfb, BUFFER_ACCUM); + st_framebuffer_add_renderbuffer(stfb, BUFFER_DEPTH, false); + st_framebuffer_add_renderbuffer(stfb, BUFFER_ACCUM, false); stfb->stamp = 0; st_framebuffer_update_attachments(stfb); @@ -800,6 +818,17 @@ struct st_context *st = (struct st_context *) stctxi; _mesa_glthread_init(st->ctx); + + /* Pin all driver threads to one L3 cache for optimal performance + * on AMD Zen. This is only done if glthread is enabled. + * + * If glthread is disabled, st_draw.c re-pins driver threads regularly + * based on the location of the app thread. + */ + struct glthread_state *glthread = st->ctx->GLThread; + if (glthread && st->pipe->set_context_param) { + util_pin_driver_threads_to_random_L3(st->pipe, &glthread->queue.threads[0]); + } } @@ -888,6 +917,9 @@ else if (attribs->flags & ST_CONTEXT_FLAG_HIGH_PRIORITY) ctx_flags |= PIPE_CONTEXT_HIGH_PRIORITY; + if (attribs->flags & ST_CONTEXT_FLAG_RESET_NOTIFICATION_ENABLED) + ctx_flags |= PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET; + pipe = smapi->screen->context_create(smapi->screen, NULL, ctx_flags); if (!pipe) { *error = ST_CONTEXT_ERROR_NO_MEMORY; @@ -1185,7 +1217,8 @@ return FALSE; } - if (!st_framebuffer_add_renderbuffer(stfb, idx)) + if (!st_framebuffer_add_renderbuffer(stfb, idx, + stfb->Base.Visual.sRGBCapable)) return FALSE; st_framebuffer_update_attachments(stfb); diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_manager.h mesa-19.0.1/src/mesa/state_tracker/st_manager.h --- mesa-18.3.3/src/mesa/state_tracker/st_manager.h 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_manager.h 2019-03-31 23:16:37.000000000 +0000 @@ -35,6 +35,8 @@ struct st_context; struct st_framebuffer; struct st_framebuffer_interface; +struct st_renderbuffer; +struct pipe_surface; void st_manager_flush_frontbuffer(struct st_context *st); @@ -56,4 +58,8 @@ void st_manager_flush_swapbuffers(void); +void +st_set_ws_renderbuffer_surface(struct st_renderbuffer *strb, + struct pipe_surface *surf); + #endif /* ST_MANAGER_H */ diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_nir.h mesa-19.0.1/src/mesa/state_tracker/st_nir.h --- mesa-18.3.3/src/mesa/state_tracker/st_nir.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_nir.h 2019-03-31 23:16:37.000000000 +0000 @@ -38,6 +38,10 @@ unsigned lower_2plane, unsigned lower_3plane); bool st_nir_lower_uniforms_to_ubo(struct nir_shader *shader); +void st_nir_lower_wpos_ytransform(struct nir_shader *nir, + struct gl_program *prog, + struct pipe_screen *pscreen); + void st_finalize_nir(struct st_context *st, struct gl_program *prog, struct gl_shader_program *shader_program, struct nir_shader *nir); diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_nir_lower_builtin.c mesa-19.0.1/src/mesa/state_tracker/st_nir_lower_builtin.c --- mesa-18.3.3/src/mesa/state_tracker/st_nir_lower_builtin.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_nir_lower_builtin.c 2019-03-31 23:16:37.000000000 +0000 @@ -107,10 +107,6 @@ memcpy(tokens, element->tokens, sizeof(tokens)); if (path->path[idx]->deref_type == nir_deref_type_array) { - nir_const_value *c = nir_src_as_const_value(path->path[idx]->arr.index); - - assert(c); - /* we need to fixup the array index slot: */ switch (tokens[0]) { case STATE_MODELVIEW_MATRIX: @@ -123,7 +119,7 @@ case STATE_TEXGEN: case STATE_TEXENV_COLOR: case STATE_CLIPPLANE: - tokens[1] = c->u32[0]; + tokens[1] = nir_src_as_uint(path->path[idx]->arr.index); break; } } diff -Nru mesa-18.3.3/src/mesa/state_tracker/st_program.c mesa-19.0.1/src/mesa/state_tracker/st_program.c --- mesa-18.3.3/src/mesa/state_tracker/st_program.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/st_program.c 2019-03-31 23:16:37.000000000 +0000 @@ -458,12 +458,9 @@ } if (stvp->shader_program) { - struct gl_program *prog = stvp->shader_program->last_vert_prog; - if (prog) { - st_translate_stream_output_info2(prog->sh.LinkedTransformFeedback, - stvp->result_to_output, - &stvp->tgsi.stream_output); - } + st_translate_stream_output_info(stvp->Base.sh.LinkedTransformFeedback, + stvp->result_to_output, + &stvp->tgsi.stream_output); st_store_ir_in_disk_cache(st, &stvp->Base, true); return true; @@ -505,7 +502,7 @@ output_semantic_name, output_semantic_index); - st_translate_stream_output_info(stvp->glsl_to_tgsi, + st_translate_stream_output_info(stvp->Base.sh.LinkedTransformFeedback, stvp->result_to_output, &stvp->tgsi.stream_output); @@ -1106,6 +1103,10 @@ key->external.lower_iyuv); } + /* Some of the lowering above may have introduced new varyings */ + nir_shader_gather_info(tgsi.ir.nir, + nir_shader_get_entrypoint(tgsi.ir.nir)); + variant->driver_shader = pipe->create_fs_state(pipe, &tgsi); variant->key = *key; @@ -1417,7 +1418,7 @@ } ureg_destroy(ureg); - st_translate_stream_output_info(glsl_to_tgsi, + st_translate_stream_output_info(prog->sh.LinkedTransformFeedback, outputMapping, &out_state->stream_output); @@ -1464,9 +1465,9 @@ } } - st_translate_stream_output_info2(prog->sh.LinkedTransformFeedback, - outputMapping, - stream_output); + st_translate_stream_output_info(prog->sh.LinkedTransformFeedback, + outputMapping, + stream_output); } /** diff -Nru mesa-18.3.3/src/mesa/state_tracker/tests/meson.build mesa-19.0.1/src/mesa/state_tracker/tests/meson.build --- mesa-18.3.3/src/mesa/state_tracker/tests/meson.build 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/state_tracker/tests/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -35,8 +35,9 @@ libmesa_st_test_common, libmesa_gallium, libglapi, libgallium, libmesa_util, ], - dependencies : [idep_gtest, dep_thread] - ) + dependencies : [idep_gtest, dep_thread], + ), + suite : ['st_mesa'], ) test( @@ -50,5 +51,6 @@ libmesa_util, ], dependencies : [idep_gtest, dep_thread] - ) + ), + suite : ['st_mesa'], ) diff -Nru mesa-18.3.3/src/mesa/swrast/s_texfetch.c mesa-19.0.1/src/mesa/swrast/s_texfetch.c --- mesa-18.3.3/src/mesa/swrast/s_texfetch.c 2018-04-16 21:31:06.000000000 +0000 +++ mesa-19.0.1/src/mesa/swrast/s_texfetch.c 2019-03-31 23:16:37.000000000 +0000 @@ -257,6 +257,7 @@ FETCH_FUNCS(A8L8_SRGB), /* Array sRGB formats */ + FETCH_FUNCS(R_SRGB8), FETCH_FUNCS(L_SRGB8), FETCH_FUNCS(BGR_SRGB8), diff -Nru mesa-18.3.3/src/mesa/swrast/s_texfetch_tmp.h mesa-19.0.1/src/mesa/swrast/s_texfetch_tmp.h --- mesa-18.3.3/src/mesa/swrast/s_texfetch_tmp.h 2017-11-05 00:14:08.000000000 +0000 +++ mesa-19.0.1/src/mesa/swrast/s_texfetch_tmp.h 2019-03-31 23:16:37.000000000 +0000 @@ -153,6 +153,7 @@ FETCH_RGBA(R8G8B8A8_SRGB, GLuint, 1) FETCH_RGBA(R8G8B8X8_SRGB, GLuint, 1) FETCH_RGBA(X8B8G8R8_SRGB, GLuint, 1) +FETCH_RGBA(R_SRGB8, GLubyte, 1) FETCH_RGBA(L_SRGB8, GLubyte, 1) FETCH_RGBA(L8A8_SRGB, GLushort, 1) FETCH_RGBA(A8L8_SRGB, GLushort, 2) diff -Nru mesa-18.3.3/src/mesa/tnl/t_draw.c mesa-19.0.1/src/mesa/tnl/t_draw.c --- mesa-18.3.3/src/mesa/tnl/t_draw.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/tnl/t_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -70,7 +70,7 @@ */ #define CONVERT( TYPE, MACRO ) do { \ GLuint i, j; \ - if (attrib->Normalized) { \ + if (attrib->Format.Normalized) { \ for (i = 0; i < count; i++) { \ const TYPE *in = (TYPE *)ptr; \ for (j = 0; j < sz; j++) { \ @@ -104,8 +104,8 @@ GLuint count ) { GLuint i; - assert(attrib->Normalized); - assert(attrib->Size == 4); + assert(attrib->Format.Normalized); + assert(attrib->Format.Size == 4); for (i = 0; i < count; i++) { const GLubyte *in = (GLubyte *) ptr; /* in is in BGRA order */ *fptr++ = UBYTE_TO_FLOAT(in[2]); /* red */ @@ -152,9 +152,9 @@ { GLuint i; GLint j; - const GLint size = attrib->Size; + const GLint size = attrib->Format.Size; - if (attrib->Normalized) { + if (attrib->Format.Normalized) { for (i = 0; i < count; ++i) { const GLfixed *in = (GLfixed *) ptr; for (j = 0; j < size; ++j) { @@ -187,17 +187,17 @@ struct vertex_buffer *VB = &tnl->vb; GLuint stride = binding->Stride; - if (attrib->Type != GL_FLOAT) { - const GLuint sz = attrib->Size; + if (attrib->Format.Type != GL_FLOAT) { + const GLuint sz = attrib->Format.Size; GLubyte *buf = get_space(ctx, count * sz * sizeof(GLfloat)); GLfloat *fptr = (GLfloat *)buf; - switch (attrib->Type) { + switch (attrib->Format.Type) { case GL_BYTE: CONVERT(GLbyte, BYTE_TO_FLOAT); break; case GL_UNSIGNED_BYTE: - if (attrib->Format == GL_BGRA) { + if (attrib->Format.Format == GL_BGRA) { /* See GL_EXT_vertex_array_bgra */ convert_bgra_to_float(binding, attrib, ptr, fptr, count); } @@ -240,11 +240,11 @@ VB->AttribPtr[attr]->start = (GLfloat *)ptr; VB->AttribPtr[attr]->count = count; VB->AttribPtr[attr]->stride = stride; - VB->AttribPtr[attr]->size = attrib->Size; + VB->AttribPtr[attr]->size = attrib->Format.Size; /* This should die, but so should the whole GLvector4f concept: */ - VB->AttribPtr[attr]->flags = (((1<Size)-1) | + VB->AttribPtr[attr]->flags = (((1<Format.Size)-1) | VEC_NOT_WRITEABLE | (stride == 4*sizeof(GLfloat) ? 0 : VEC_BAD_STRIDE)); diff -Nru mesa-18.3.3/src/mesa/tnl/t_split_copy.c mesa-19.0.1/src/mesa/tnl/t_split_copy.c --- mesa-18.3.3/src/mesa/tnl/t_split_copy.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/tnl/t_split_copy.c 2019-03-31 23:16:37.000000000 +0000 @@ -105,13 +105,6 @@ }; -static GLuint -attr_size(const struct gl_array_attributes *attrib) -{ - return attrib->Size * _mesa_sizeof_type(attrib->Type); -} - - /** * Shallow copy one vertex array to another. */ @@ -176,7 +169,7 @@ const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding); printf(" array %d at %p:\n", j, (void*) &arrays[j]); printf(" ptr %p, size %d, type 0x%x, stride %d\n", - ptr, attrib->Size, attrib->Type, binding->Stride); + ptr, attrib->Format.Size, attrib->Format.Type, binding->Stride); if (0) { GLint k = prims[i].start + prims[i].count - 1; GLfloat *last = (GLfloat *) (ptr + binding->Stride * k); @@ -276,7 +269,7 @@ csr += copy->varying[i].size; #ifdef NAN_CHECK - if (srcarray->Type == GL_FLOAT) { + if (srcarray->Format.Type == GL_FLOAT) { GLuint k; GLfloat *f = (GLfloat *) srcptr; for (k = 0; k < srcarray->Size; k++) { @@ -458,8 +451,8 @@ copy->varying[j].attr = i; copy->varying[j].array = ©->array[i]; - copy->varying[j].size = attr_size(attrib); - copy->vertex_size += attr_size(attrib); + copy->varying[j].size = attrib->Format._ElementSize; + copy->vertex_size += attrib->Format._ElementSize; if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL)) @@ -535,16 +528,10 @@ struct gl_vertex_buffer_binding *dstbind = ©->varying[i].dstbinding; struct gl_array_attributes *dstattr = ©->varying[i].dstattribs; - dstattr->Size = srcattr->Size; - dstattr->Type = srcattr->Type; - dstattr->Format = GL_RGBA; - dstbind->Stride = copy->vertex_size; + dstattr->Format = srcattr->Format; dstattr->Ptr = copy->dstbuf + offset; - dstattr->Normalized = srcattr->Normalized; - dstattr->Integer = srcattr->Integer; - dstattr->Doubles = srcattr->Doubles; + dstbind->Stride = copy->vertex_size; dstbind->BufferObj = ctx->Shared->NullBufferObj; - dstattr->_ElementSize = srcattr->_ElementSize; dst->BufferBinding = dstbind; dst->VertexAttrib = dstattr; diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_context.c mesa-19.0.1/src/mesa/vbo/vbo_context.c --- mesa-18.3.3/src/mesa/vbo/vbo_context.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/vbo/vbo_context.c 2019-03-31 23:16:37.000000000 +0000 @@ -58,11 +58,8 @@ { memset(attrib, 0, sizeof(*attrib)); - attrib->Size = size; - attrib->Type = GL_FLOAT; - attrib->Format = GL_RGBA; + vbo_set_vertex_format(&attrib->Format, size, GL_FLOAT); attrib->Stride = 0; - attrib->_ElementSize = size * sizeof(GLfloat); attrib->Ptr = pointer; } diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_exec_api.c mesa-19.0.1/src/mesa/vbo/vbo_exec_api.c --- mesa-18.3.3/src/mesa/vbo/vbo_exec_api.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/vbo/vbo_exec_api.c 2019-03-31 23:16:37.000000000 +0000 @@ -195,7 +195,7 @@ exec->vtx.attrtype[i]); } - if (exec->vtx.attrtype[i] != vbo->current[i].Type || + if (exec->vtx.attrtype[i] != vbo->current[i].Format.Type || memcmp(current, tmp, 4 * sizeof(GLfloat) * dmul) != 0) { memcpy(current, tmp, 4 * sizeof(GLfloat) * dmul); @@ -205,14 +205,9 @@ * directly. */ /* Size here is in components - not bytes */ - vbo->current[i].Size = exec->vtx.attrsz[i] / dmul; - vbo->current[i]._ElementSize = - vbo->current[i].Size * sizeof(GLfloat) * dmul; - vbo->current[i].Type = exec->vtx.attrtype[i]; - vbo->current[i].Integer = - vbo_attrtype_to_integer_flag(exec->vtx.attrtype[i]); - vbo->current[i].Doubles = - vbo_attrtype_to_double_flag(exec->vtx.attrtype[i]); + vbo_set_vertex_format(&vbo->current[i].Format, + exec->vtx.attrsz[i] / dmul, + exec->vtx.attrtype[i]); /* This triggers rather too much recalculation of Mesa state * that doesn't get used (eg light positions). @@ -803,11 +798,14 @@ ctx->Driver.CurrentExecPrimitive = mode; ctx->Exec = ctx->BeginEnd; + /* We may have been called from a display list, in which case we should * leave dlist.c's dispatch table in place. */ - if (ctx->CurrentClientDispatch == ctx->OutsideBeginEnd) { - ctx->CurrentClientDispatch = ctx->BeginEnd; + if (ctx->CurrentClientDispatch == ctx->MarshalExec) { + ctx->CurrentServerDispatch = ctx->Exec; + } else if (ctx->CurrentClientDispatch == ctx->OutsideBeginEnd) { + ctx->CurrentClientDispatch = ctx->Exec; _glapi_set_dispatch(ctx->CurrentClientDispatch); } else { assert(ctx->CurrentClientDispatch == ctx->Save); @@ -858,8 +856,11 @@ } ctx->Exec = ctx->OutsideBeginEnd; - if (ctx->CurrentClientDispatch == ctx->BeginEnd) { - ctx->CurrentClientDispatch = ctx->OutsideBeginEnd; + + if (ctx->CurrentClientDispatch == ctx->MarshalExec) { + ctx->CurrentServerDispatch = ctx->Exec; + } else if (ctx->CurrentClientDispatch == ctx->BeginEnd) { + ctx->CurrentClientDispatch = ctx->Exec; _glapi_set_dispatch(ctx->CurrentClientDispatch); } diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_exec_draw.c mesa-19.0.1/src/mesa/vbo/vbo_exec_draw.c --- mesa-18.3.3/src/mesa/vbo/vbo_exec_draw.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/vbo/vbo_exec_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -191,12 +191,8 @@ GLbitfield vao_enabled = _vbo_get_vao_enabled_from_vbo(mode, exec->vtx.enabled); /* At first disable arrays no longer needed */ - GLbitfield mask = vao->_Enabled & ~vao_enabled; - while (mask) { - const int vao_attr = u_bit_scan(&mask); - _mesa_disable_vertex_array_attrib(ctx, vao, vao_attr); - } - assert((~vao_enabled & vao->_Enabled) == 0); + _mesa_disable_vertex_array_attribs(ctx, vao, VERT_BIT_ALL & ~vao_enabled); + assert((~vao_enabled & vao->Enabled) == 0); /* Bind the buffer object */ const GLuint stride = exec->vtx.vertex_size*sizeof(GLfloat); @@ -208,7 +204,7 @@ */ const GLubyte *const vao_to_vbo_map = _vbo_attribute_alias_map[mode]; /* Now set the enabled arrays */ - mask = vao_enabled; + GLbitfield mask = vao_enabled; while (mask) { const int vao_attr = u_bit_scan(&mask); const GLubyte vbo_attr = vao_to_vbo_map[vao_attr]; @@ -222,13 +218,12 @@ /* Set and enable */ _vbo_set_attrib_format(ctx, vao, vao_attr, buffer_offset, size, type, offset); - if ((vao->_Enabled & VERT_BIT(vao_attr)) == 0) - _mesa_enable_vertex_array_attrib(ctx, vao, vao_attr); /* The vao is initially created with all bindings set to 0. */ assert(vao->VertexAttrib[vao_attr].BufferBindingIndex == 0); } - assert(vao_enabled == vao->_Enabled); + _mesa_enable_vertex_array_attribs(ctx, vao, vao_enabled); + assert(vao_enabled == vao->Enabled); assert(!_mesa_is_bufferobj(exec->vtx.bufferobj) || (vao_enabled & ~vao->VertexAttribBufferMask) == 0); diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_private.h mesa-19.0.1/src/mesa/vbo/vbo_private.h --- mesa-18.3.3/src/mesa/vbo/vbo_private.h 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/vbo/vbo_private.h 2019-03-31 23:16:37.000000000 +0000 @@ -115,6 +115,16 @@ } +static inline void +vbo_set_vertex_format(struct gl_vertex_format* vertex_format, + GLubyte size, GLenum16 type) +{ + _mesa_set_vertex_format(vertex_format, size, type, GL_RGBA, GL_FALSE, + vbo_attrtype_to_integer_flag(type), + vbo_attrtype_to_double_flag(type)); +} + + /** * Return default component values for the given format. * The return type is an array of fi_types, because that's how we declare @@ -224,7 +234,7 @@ * to the VAO. But but that is done already unconditionally in * _mesa_update_array_format called above. */ - assert((vao->NewArrays | ~vao->_Enabled) & VERT_BIT(attr)); + assert((vao->NewArrays | ~vao->Enabled) & VERT_BIT(attr)); vao->VertexAttrib[attr].Ptr = ADD_POINTERS(buffer_offset, offset); } diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_save_api.c mesa-19.0.1/src/mesa/vbo/vbo_save_api.c --- mesa-18.3.3/src/mesa/vbo/vbo_save_api.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/vbo/vbo_save_api.c 2019-03-31 23:16:37.000000000 +0000 @@ -426,7 +426,7 @@ return false; /* If the enabled arrays are not the same we are not equal. */ - if (vao_enabled != vao->_Enabled) + if (vao_enabled != vao->Enabled) return false; /* Check the buffer binding at 0 */ @@ -450,15 +450,14 @@ const struct gl_array_attributes *attrib = &vao->VertexAttrib[attr]; if (attrib->RelativeOffset + vao->BufferBinding[0].Offset != off) return false; - if (attrib->Type != tp) + if (attrib->Format.Type != tp) return false; - if (attrib->Size != size[vbo_attr]) + if (attrib->Format.Size != size[vbo_attr]) return false; - assert(attrib->Format == GL_RGBA); - assert(attrib->Enabled == GL_TRUE); - assert(attrib->Normalized == GL_FALSE); - assert(attrib->Integer == vbo_attrtype_to_integer_flag(tp)); - assert(attrib->Doubles == vbo_attrtype_to_double_flag(tp)); + assert(attrib->Format.Format == GL_RGBA); + assert(attrib->Format.Normalized == GL_FALSE); + assert(attrib->Format.Integer == vbo_attrtype_to_integer_flag(tp)); + assert(attrib->Format.Doubles == vbo_attrtype_to_double_flag(tp)); assert(attrib->BufferBindingIndex == 0); } @@ -515,9 +514,9 @@ _vbo_set_attrib_format(ctx, *vao, vao_attr, buffer_offset, size[vbo_attr], type[vbo_attr], offset[vbo_attr]); _mesa_vertex_attrib_binding(ctx, *vao, vao_attr, 0); - _mesa_enable_vertex_array_attrib(ctx, *vao, vao_attr); } - assert(vao_enabled == (*vao)->_Enabled); + _mesa_enable_vertex_array_attribs(ctx, *vao, vao_enabled); + assert(vao_enabled == (*vao)->Enabled); assert((vao_enabled & ~(*vao)->VertexAttribBufferMask) == 0); /* Finalize and freeze the VAO */ diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_save_draw.c mesa-19.0.1/src/mesa/vbo/vbo_save_draw.c --- mesa-18.3.3/src/mesa/vbo/vbo_save_draw.c 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/mesa/vbo/vbo_save_draw.c 2019-03-31 23:16:37.000000000 +0000 @@ -47,13 +47,13 @@ { struct vbo_context *vbo = vbo_context(ctx); - mask &= vao->_Enabled; + mask &= vao->Enabled; while (mask) { const int i = u_bit_scan(&mask); const struct gl_array_attributes *attrib = &vao->VertexAttrib[i]; struct gl_array_attributes *currval = &vbo->current[shift + i]; - const GLubyte size = attrib->Size; - const GLenum16 type = attrib->Type; + const GLubyte size = attrib->Format.Size; + const GLenum16 type = attrib->Format.Type; fi_type tmp[8]; int dmul = 1; @@ -66,17 +66,11 @@ else COPY_CLEAN_4V_TYPE_AS_UNION(tmp, size, *data, type); - if (type != currval->Type || + if (type != currval->Format.Type || memcmp(currval->Ptr, tmp, 4 * sizeof(GLfloat) * dmul) != 0) { memcpy((fi_type*)currval->Ptr, tmp, 4 * sizeof(GLfloat) * dmul); - currval->Size = size; - currval->_ElementSize = size * sizeof(GLfloat) * dmul; - currval->Type = type; - currval->Integer = vbo_attrtype_to_integer_flag(type); - currval->Doubles = vbo_attrtype_to_double_flag(type); - currval->Normalized = GL_FALSE; - currval->Format = GL_RGBA; + vbo_set_vertex_format(&currval->Format, size, type); ctx->NewState |= state; } diff -Nru mesa-18.3.3/src/mesa/vbo/vbo_save_loopback.c mesa-19.0.1/src/mesa/vbo/vbo_save_loopback.c --- mesa-18.3.3/src/mesa/vbo/vbo_save_loopback.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/mesa/vbo/vbo_save_loopback.c 2019-03-31 23:16:37.000000000 +0000 @@ -139,7 +139,7 @@ { la[*nr].index = shift + i; la[*nr].offset = vao->VertexAttrib[i].RelativeOffset; - la[*nr].func = vert_attrfunc[vao->VertexAttrib[i].Size - 1]; + la[*nr].func = vert_attrfunc[vao->VertexAttrib[i].Format.Size - 1]; (*nr)++; } @@ -155,23 +155,23 @@ * the NV attributes entrypoints: */ const struct gl_vertex_array_object *vao = node->VAO[VP_MODE_FF]; - GLbitfield mask = vao->_Enabled & VERT_BIT_MAT_ALL; + GLbitfield mask = vao->Enabled & VERT_BIT_MAT_ALL; while (mask) { const int i = u_bit_scan(&mask); append_attr(&nr, la, i, VBO_MATERIAL_SHIFT, vao); } vao = node->VAO[VP_MODE_SHADER]; - mask = vao->_Enabled & ~(VERT_BIT_POS | VERT_BIT_GENERIC0); + mask = vao->Enabled & ~(VERT_BIT_POS | VERT_BIT_GENERIC0); while (mask) { const int i = u_bit_scan(&mask); append_attr(&nr, la, i, 0, vao); } /* The last in the list should be the vertex provoking attribute */ - if (vao->_Enabled & VERT_BIT_GENERIC0) { + if (vao->Enabled & VERT_BIT_GENERIC0) { append_attr(&nr, la, VERT_ATTRIB_GENERIC0, 0, vao); - } else if (vao->_Enabled & VERT_BIT_POS) { + } else if (vao->Enabled & VERT_BIT_POS) { append_attr(&nr, la, VERT_ATTRIB_POS, 0, vao); } diff -Nru mesa-18.3.3/src/meson.build mesa-19.0.1/src/meson.build --- mesa-18.3.3/src/meson.build 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/src/meson.build 2019-03-31 23:16:37.000000000 +0000 @@ -63,6 +63,9 @@ if with_gallium_vc4 or with_gallium_v3d subdir('broadcom') endif +if with_gallium_freedreno + subdir('freedreno') +endif if with_dri_i965 or with_intel_vk subdir('intel') endif diff -Nru mesa-18.3.3/src/util/00-mesa-defaults.conf mesa-19.0.1/src/util/00-mesa-defaults.conf --- mesa-18.3.3/src/util/00-mesa-defaults.conf 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/util/00-mesa-defaults.conf 2019-03-31 23:16:37.000000000 +0000 @@ -21,6 +21,8 @@ built-ins (specifically gl_VertexID), which causes the vertex shaders to fail to compile. +* Applications that are not suitable for adapative sync are blacklisted here. + TODO: document the other workarounds. --> @@ -227,6 +229,9 @@ + + @@ -4230,6 +4364,10 @@ + + + + @@ -4458,6 +4596,21 @@ Placeholder for validation enums to be defined for VK_EXT_Validation_flags extension + + + + Placeholder for validation feature enable enums to be defined for VK_EXT_validation_features extension + + + + + + + + + + Placeholder for validation feature disable enums to be defined for VK_EXT_validation_features extension + @@ -4682,10 +4835,18 @@ + + + + + + + + @@ -4706,36 +4867,50 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - VkResult vkCreateInstance @@ -5059,7 +5234,7 @@ VkDeviceSize stride VkQueryResultFlags flags - + VkResult vkCreateBuffer VkDevice device const VkBufferCreateInfo* pCreateInfo @@ -5759,19 +5934,6 @@ const VkAllocationCallbacks* pAllocator VkSwapchainKHR* pSwapchains - - VkResult vkCreateMirSurfaceKHR - VkInstance instance - const VkMirSurfaceCreateInfoKHR* pCreateInfo - const VkAllocationCallbacks* pAllocator - VkSurfaceKHR* pSurface - - - VkBool32 vkGetPhysicalDeviceMirPresentationSupportKHR - VkPhysicalDevice physicalDevice - uint32_t queueFamilyIndex - MirConnection* connection - void vkDestroySurfaceKHR VkInstance instance @@ -6825,89 +6987,84 @@ uint32_t stride - VkResult vkCompileDeferredNVX + VkResult vkCompileDeferredNV VkDevice device VkPipeline pipeline uint32_t shader - VkResult vkCreateAccelerationStructureNVX + VkResult vkCreateAccelerationStructureNV VkDevice device - const VkAccelerationStructureCreateInfoNVX* pCreateInfo + const VkAccelerationStructureCreateInfoNV* pCreateInfo const VkAllocationCallbacks* pAllocator - VkAccelerationStructureNVX* pAccelerationStructure + VkAccelerationStructureNV* pAccelerationStructure - void vkDestroyAccelerationStructureNVX + void vkDestroyAccelerationStructureNV VkDevice device - VkAccelerationStructureNVX accelerationStructure + VkAccelerationStructureNV accelerationStructure const VkAllocationCallbacks* pAllocator - void vkGetAccelerationStructureMemoryRequirementsNVX - VkDevice device - const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo - VkMemoryRequirements2KHR* pMemoryRequirements - - - void vkGetAccelerationStructureScratchMemoryRequirementsNVX + void vkGetAccelerationStructureMemoryRequirementsNV VkDevice device - const VkAccelerationStructureMemoryRequirementsInfoNVX* pInfo + const VkAccelerationStructureMemoryRequirementsInfoNV* pInfo VkMemoryRequirements2KHR* pMemoryRequirements - VkResult vkBindAccelerationStructureMemoryNVX + VkResult vkBindAccelerationStructureMemoryNV VkDevice device uint32_t bindInfoCount - const VkBindAccelerationStructureMemoryInfoNVX* pBindInfos + const VkBindAccelerationStructureMemoryInfoNV* pBindInfos - - void vkCmdCopyAccelerationStructureNVX + + void vkCmdCopyAccelerationStructureNV VkCommandBuffer commandBuffer - VkAccelerationStructureNVX dst - VkAccelerationStructureNVX src - VkCopyAccelerationStructureModeNVX mode + VkAccelerationStructureNV dst + VkAccelerationStructureNV src + VkCopyAccelerationStructureModeNV mode - - void vkCmdWriteAccelerationStructurePropertiesNVX + + void vkCmdWriteAccelerationStructuresPropertiesNV VkCommandBuffer commandBuffer - VkAccelerationStructureNVX accelerationStructure + uint32_t accelerationStructureCount + const VkAccelerationStructureNV* pAccelerationStructures VkQueryType queryType VkQueryPool queryPool - uint32_t query + uint32_t firstQuery - - void vkCmdBuildAccelerationStructureNVX + + void vkCmdBuildAccelerationStructureNV VkCommandBuffer commandBuffer - VkAccelerationStructureTypeNVX type - uint32_t instanceCount + const VkAccelerationStructureInfoNV* pInfo VkBuffer instanceData VkDeviceSize instanceOffset - uint32_t geometryCount - const VkGeometryNVX* pGeometries - VkBuildAccelerationStructureFlagsNVX flags VkBool32 update - VkAccelerationStructureNVX dst - VkAccelerationStructureNVX src + VkAccelerationStructureNV dst + VkAccelerationStructureNV src VkBuffer scratch VkDeviceSize scratchOffset - - void vkCmdTraceRaysNVX + + void vkCmdTraceRaysNV VkCommandBuffer commandBuffer VkBuffer raygenShaderBindingTableBuffer VkDeviceSize raygenShaderBindingOffset - VkBuffer missShaderBindingTableBuffer + VkBuffer missShaderBindingTableBuffer VkDeviceSize missShaderBindingOffset VkDeviceSize missShaderBindingStride - VkBuffer hitShaderBindingTableBuffer + VkBuffer hitShaderBindingTableBuffer VkDeviceSize hitShaderBindingOffset VkDeviceSize hitShaderBindingStride + VkBuffer callableShaderBindingTableBuffer + VkDeviceSize callableShaderBindingOffset + VkDeviceSize callableShaderBindingStride uint32_t width uint32_t height + uint32_t depth - VkResult vkGetRaytracingShaderHandlesNVX + VkResult vkGetRayTracingShaderGroupHandlesNV VkDevice device VkPipeline pipeline uint32_t firstGroup @@ -6916,18 +7073,18 @@ void* pData - VkResult vkGetAccelerationStructureHandleNVX + VkResult vkGetAccelerationStructureHandleNV VkDevice device - VkAccelerationStructureNVX accelerationStructure + VkAccelerationStructureNV accelerationStructure size_t dataSize void* pData - VkResult vkCreateRaytracingPipelinesNVX + VkResult vkCreateRayTracingPipelinesNV VkDevice device VkPipelineCache pipelineCache uint32_t createInfoCount - const VkRaytracingPipelineCreateInfoNVX* pCreateInfos + const VkRayTracingPipelineCreateInfoNV* pCreateInfos const VkAllocationCallbacks* pAllocator VkPipeline* pPipelines @@ -6937,6 +7094,11 @@ VkImage image VkImageDrmFormatModifierPropertiesEXT* pProperties + + VkDeviceAddress vkGetBufferDeviceAddressEXT + VkDevice device + const VkBufferDeviceAddressInfoEXT* pInfo + @@ -7619,15 +7781,11 @@ - + + - - - - - @@ -7771,14 +7929,38 @@ - - + + + + + + + + + + + + + + - - + + + + + + + + + + + + + + @@ -8417,10 +8599,12 @@ - + - - + + + + @@ -9355,11 +9539,8 @@ - - - - - + + @@ -9470,70 +9651,76 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + @@ -9712,10 +9899,13 @@ - + - - + + + + + @@ -9773,10 +9963,12 @@ - + - - + + + + @@ -9786,16 +9978,22 @@ - + - - - - - - - - + + + + + + + + + + + + + + @@ -9898,7 +10096,7 @@ - + @@ -9938,15 +10136,25 @@ - + - - - - - - - + + + + + + + + + + + + + + + + + @@ -9962,10 +10170,12 @@ - + - - + + + + @@ -9976,13 +10186,13 @@ - + - + @@ -10058,16 +10268,22 @@ - - - - - - - + - - + + + + + + + + + + + + + + @@ -10100,5 +10316,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru mesa-18.3.3/src/vulkan/util/gen_enum_to_str.py mesa-19.0.1/src/vulkan/util/gen_enum_to_str.py --- mesa-18.3.3/src/vulkan/util/gen_enum_to_str.py 2018-09-27 19:13:54.000000000 +0000 +++ mesa-19.0.1/src/vulkan/util/gen_enum_to_str.py 2019-03-31 23:16:37.000000000 +0000 @@ -101,6 +101,10 @@ #include #include + #ifdef __cplusplus + extern "C" { + #endif + % for ext in extensions: #define _${ext.name}_number (${ext.number}) % endfor @@ -109,6 +113,10 @@ const char * vk_${enum.name[2:]}_to_str(${enum.name} input); % endfor + #ifdef __cplusplus + } /* extern "C" */ + #endif + #endif"""), output_encoding='utf-8') diff -Nru mesa-18.3.3/src/vulkan/util/vk_util.c mesa-19.0.1/src/vulkan/util/vk_util.c --- mesa-18.3.3/src/vulkan/util/vk_util.c 2018-03-08 23:00:46.000000000 +0000 +++ mesa-19.0.1/src/vulkan/util/vk_util.c 2019-03-31 23:16:37.000000000 +0000 @@ -29,12 +29,12 @@ uint32_t vk_get_driver_version(void) { - const char *minor_string = strchr(VERSION, '.'); + const char *minor_string = strchr(PACKAGE_VERSION, '.'); const char *patch_string = minor_string ? strchr(minor_string + 1, '.') : NULL; - int major = atoi(VERSION); + int major = atoi(PACKAGE_VERSION); int minor = minor_string ? atoi(minor_string + 1) : 0; int patch = patch_string ? atoi(patch_string + 1) : 0; - if (strstr(VERSION, "devel")) { + if (strstr(PACKAGE_VERSION, "devel")) { if (patch == 0) { patch = 99; if (minor == 0) { diff -Nru mesa-18.3.3/src/vulkan/wsi/wsi_common.c mesa-19.0.1/src/vulkan/wsi/wsi_common.c --- mesa-18.3.3/src/vulkan/wsi/wsi_common.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/vulkan/wsi/wsi_common.c 2019-03-31 23:16:37.000000000 +0000 @@ -278,8 +278,8 @@ .sType = VK_STRUCTURE_TYPE_WSI_FORMAT_MODIFIER_PROPERTIES_LIST_MESA, .pNext = NULL, }; - VkFormatProperties2KHR format_props = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2_KHR, + VkFormatProperties2 format_props = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, .pNext = &modifier_props_list, }; wsi->GetPhysicalDeviceFormatProperties2KHR(wsi->pdevice, @@ -379,13 +379,13 @@ .pNext = NULL, .implicit_sync = true, }; - const VkExportMemoryAllocateInfoKHR memory_export_info = { - .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, + const VkExportMemoryAllocateInfo memory_export_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, .pNext = &memory_wsi_info, .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, }; - const VkMemoryDedicatedAllocateInfoKHR memory_dedicated_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR, + const VkMemoryDedicatedAllocateInfo memory_dedicated_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, .pNext = &memory_export_info, .image = image->image, .buffer = VK_NULL_HANDLE, @@ -431,7 +431,7 @@ for (uint32_t p = 0; p < image->num_planes; p++) { const VkImageSubresource image_subresource = { - .aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT_KHR << p, + .aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT << p, .mipLevel = 0, .arrayLayer = 0, }; @@ -504,8 +504,8 @@ uint32_t linear_size = linear_stride * pCreateInfo->imageExtent.height; linear_size = align_u32(linear_size, 4096); - const VkExternalMemoryBufferCreateInfoKHR prime_buffer_external_info = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR, + const VkExternalMemoryBufferCreateInfo prime_buffer_external_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, .pNext = NULL, .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, }; @@ -530,13 +530,13 @@ .pNext = NULL, .implicit_sync = true, }; - const VkExportMemoryAllocateInfoKHR prime_memory_export_info = { - .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR, + const VkExportMemoryAllocateInfo prime_memory_export_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, .pNext = &memory_wsi_info, .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, }; - const VkMemoryDedicatedAllocateInfoKHR prime_memory_dedicated_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR, + const VkMemoryDedicatedAllocateInfo prime_memory_dedicated_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, .pNext = &prime_memory_export_info, .image = VK_NULL_HANDLE, .buffer = image->prime.buffer, @@ -585,8 +585,8 @@ wsi->GetImageMemoryRequirements(chain->device, image->image, &reqs); - const VkMemoryDedicatedAllocateInfoKHR memory_dedicated_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR, + const VkMemoryDedicatedAllocateInfo memory_dedicated_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, .pNext = NULL, .image = image->image, .buffer = VK_NULL_HANDLE, diff -Nru mesa-18.3.3/src/vulkan/wsi/wsi_common_display.c mesa-19.0.1/src/vulkan/wsi/wsi_common_display.c --- mesa-18.3.3/src/vulkan/wsi/wsi_common_display.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/vulkan/wsi/wsi_common_display.c 2019-03-31 23:16:37.000000000 +0000 @@ -834,6 +834,7 @@ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; return VK_SUCCESS; diff -Nru mesa-18.3.3/src/vulkan/wsi/wsi_common_wayland.c mesa-19.0.1/src/vulkan/wsi/wsi_common_wayland.c --- mesa-18.3.3/src/vulkan/wsi/wsi_common_wayland.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/vulkan/wsi/wsi_common_wayland.c 2019-03-31 23:16:37.000000000 +0000 @@ -508,6 +508,7 @@ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; return VK_SUCCESS; diff -Nru mesa-18.3.3/src/vulkan/wsi/wsi_common_x11.c mesa-19.0.1/src/vulkan/wsi/wsi_common_x11.c --- mesa-18.3.3/src/vulkan/wsi/wsi_common_x11.c 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/src/vulkan/wsi/wsi_common_x11.c 2019-03-31 23:16:37.000000000 +0000 @@ -515,6 +515,7 @@ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; return VK_SUCCESS; diff -Nru mesa-18.3.3/.travis.yml mesa-19.0.1/.travis.yml --- mesa-18.3.3/.travis.yml 2018-12-07 18:58:04.000000000 +0000 +++ mesa-19.0.1/.travis.yml 2019-03-31 23:16:37.000000000 +0000 @@ -1,7 +1,6 @@ language: c -sudo: false -dist: trusty +dist: xenial cache: apt: true @@ -16,7 +15,7 @@ - GLPROTO_VERSION=glproto-1.4.17 - DRI2PROTO_VERSION=dri2proto-2.8 - LIBPCIACCESS_VERSION=libpciaccess-0.13.4 - - LIBDRM_VERSION=libdrm-2.4.74 + - LIBDRM_VERSION=libdrm-2.4.97 - XCBPROTO_VERSION=xcb-proto-1.13 - RANDRPROTO_VERSION=randrproto-1.3.0 - LIBXRANDR_VERSION=libXrandr-1.3.0 @@ -35,20 +34,19 @@ - env: - LABEL="meson Vulkan" - BUILD=meson - - DRI_DRIVERS="" - - GALLIUM_DRIVERS="" + - UNWIND="false" + - DRI_LOADERS="-Dglx=disabled -Dgbm=false -Degl=false -Dplatforms=x11,wayland,drm -Dosmesa=none" + - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled" - VULKAN_DRIVERS="intel,amd" - - LLVM_VERSION=6.0 + - LLVM_VERSION=7 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" addons: apt: sources: - - llvm-toolchain-trusty-6.0 - # llvm-6 requires libstdc++4.9 which is not in main repo - - ubuntu-toolchain-r-test + - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main' + key_url: https://apt.llvm.org/llvm-snapshot.gpg.key packages: - # From sources above - - llvm-6.0-dev + - llvm-7-dev # Common - xz-utils - libexpat1-dev @@ -56,23 +54,27 @@ - libelf-dev - python3.5 - python3-pip + - python3-setuptools - env: - LABEL="meson loaders/classic DRI" - BUILD=meson + - UNWIND="false" + - DRI_LOADERS="-Dglx=dri -Dgbm=true -Degl=true -Dplatforms=x11,wayland,drm,surfaceless -Dosmesa=classic" - DRI_DRIVERS="i915,i965,r100,r200,swrast,nouveau" - - GALLIUM_DRIVERS="" - - VULKAN_DRIVERS="" + - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled" addons: apt: packages: - xz-utils - x11proto-xf86vidmode-dev + - libxxf86vm-dev - libexpat1-dev - libx11-xcb-dev - libxdamage-dev - libxfixes-dev - python3.5 - python3-pip + - python3-setuptools - env: - LABEL="make loaders/classic DRI" - BUILD=make @@ -89,11 +91,200 @@ packages: - xz-utils - x11proto-xf86vidmode-dev + - libxxf86vm-dev - libexpat1-dev - libx11-xcb-dev - libxdamage-dev - libxfixes-dev - python3-pip + - python3-setuptools + - env: + # NOTE: Building SWR is 2x (yes two) times slower than all the other + # gallium drivers combined. + # Start this early so that it doesn't hunder the run time. + - LABEL="meson Gallium Drivers SWR" + - BUILD=meson + - UNWIND="true" + - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false" + - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled" + - GALLIUM_DRIVERS="swr" + - LLVM_VERSION=6.0 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + addons: + apt: + packages: + - llvm-6.0-dev + # Common + - xz-utils + - libexpat1-dev + - libx11-xcb-dev + - libelf-dev + - libunwind8-dev + - python3.5 + - python3-pip + - python3-setuptools + - env: + - LABEL="meson Gallium Drivers RadeonSI" + - BUILD=meson + - UNWIND="true" + - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false" + - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled" + - GALLIUM_DRIVERS="radeonsi" + - LLVM_VERSION=7 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + addons: + apt: + sources: + - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main' + key_url: https://apt.llvm.org/llvm-snapshot.gpg.key + packages: + # From sources above + - llvm-7-dev + # Common + - xz-utils + - libexpat1-dev + - libx11-xcb-dev + - libelf-dev + - libunwind8-dev + - python3.5 + - python3-pip + - python3-setuptools + - env: + - LABEL="meson Gallium Drivers Other" + - BUILD=meson + - UNWIND="true" + - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false" + - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled" + - GALLIUM_DRIVERS="i915,nouveau,kmsro,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv" + - LLVM_VERSION=5.0 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + addons: + apt: + packages: + # LLVM packaging is broken and misses these dependencies + - libedit-dev + - llvm-5.0-dev + # Common + - xz-utils + - libexpat1-dev + - libx11-xcb-dev + - libelf-dev + - libunwind8-dev + - python3.5 + - python3-pip + - python3-setuptools + - env: + - LABEL="meson Gallium ST Clover LLVM-5.0" + - BUILD=meson + - UNWIND="true" + - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false" + - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=icd" + - GALLIUM_DRIVERS="r600" + - LLVM_VERSION=5.0 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + addons: + apt: + packages: + - libclc-dev + # LLVM packaging is broken and misses these dependencies + - libedit-dev + - llvm-5.0-dev + - clang-5.0 + - libclang-5.0-dev + # Common + - xz-utils + - libexpat1-dev + - libx11-xcb-dev + - libelf-dev + - libunwind8-dev + - python3-pip + - python3-setuptools + - env: + - LABEL="meson Gallium ST Clover LLVM-6.0" + - BUILD=meson + - UNWIND="true" + - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false" + - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=icd" + - GALLIUM_DRIVERS="r600" + - LLVM_VERSION=6.0 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + addons: + apt: + packages: + - libclc-dev + - llvm-6.0-dev + - clang-6.0 + - libclang-6.0-dev + # Common + - xz-utils + - libexpat1-dev + - libx11-xcb-dev + - libelf-dev + - libunwind8-dev + - python3.5 + - python3-pip + - python3-setuptools + - env: + - LABEL="meson Gallium ST Clover LLVM-7" + - BUILD=meson + - UNWIND="true" + - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false" + - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=icd" + - GALLIUM_DRIVERS="r600,radeonsi" + - LLVM_VERSION=7 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + addons: + apt: + sources: + - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main' + key_url: https://apt.llvm.org/llvm-snapshot.gpg.key + packages: + - libclc-dev + # From sources above + - llvm-7-dev + - clang-7 + - libclang-7-dev + # Common + - xz-utils + - libexpat1-dev + - libx11-xcb-dev + - libelf-dev + - libunwind8-dev + - python3.5 + - python3-pip + - python3-setuptools + - env: + - LABEL="meson Gallium ST Other" + - BUILD=meson + - UNWIND="true" + - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false" + - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=true -Dgallium-xvmc=true -Dgallium-omx=bellagio -Dgallium-va=true -Dgallium-xa=true -Dgallium-nine=true -Dgallium-opencl=disabled -Dosmesa=gallium" + # We need swrast for osmesa and nine. + # Nouveau supports, or builds at least against all ST. + - GALLIUM_DRIVERS="nouveau,swrast" + - LLVM_VERSION=5.0 + - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" + addons: + apt: + packages: + - llvm-5.0-dev + # LLVM packaging is broken and misses these dependencies + - libedit-dev + # Nine requires gcc 4.6... which is the one we have right ? + - libxvmc-dev + # Build locally, for now. + #- libvdpau-dev + #- libva-dev + - libomxil-bellagio-dev + # Common + - xz-utils + - libexpat1-dev + - libx11-xcb-dev + - libelf-dev + - libunwind8-dev + - python3.5 + - python3-pip + - python3-setuptools - env: # NOTE: Building SWR is 2x (yes two) times slower than all the other # gallium drivers combined. @@ -112,12 +303,7 @@ - LIBUNWIND_FLAGS="--enable-libunwind" addons: apt: - sources: - - llvm-toolchain-trusty-6.0 - # llvm-6 requires libstdc++4.9 which is not in main repo - - ubuntu-toolchain-r-test packages: - # From sources above - llvm-6.0-dev # Common - xz-utils @@ -126,12 +312,13 @@ - libelf-dev - libunwind8-dev - python3-pip + - python3-setuptools - env: - LABEL="make Gallium Drivers RadeonSI" - BUILD=make - MAKEFLAGS="-j4" - MAKE_CHECK_COMMAND="true" - - LLVM_VERSION=6.0 + - LLVM_VERSION=7 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl" - DRI_DRIVERS="" @@ -142,12 +329,11 @@ addons: apt: sources: - - llvm-toolchain-trusty-6.0 - # llvm-6 requires libstdc++4.9 which is not in main repo - - ubuntu-toolchain-r-test + - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main' + key_url: https://apt.llvm.org/llvm-snapshot.gpg.key packages: # From sources above - - llvm-6.0-dev + - llvm-7-dev # Common - xz-utils - libexpat1-dev @@ -155,6 +341,7 @@ - libelf-dev - libunwind8-dev - python3-pip + - python3-setuptools - env: - LABEL="make Gallium Drivers Other" - BUILD=make @@ -162,23 +349,17 @@ - MAKE_CHECK_COMMAND="true" - LLVM_VERSION=3.9 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - # New binutils linker is required for llvm-3.9 - - OVERRIDE_PATH=/usr/lib/binutils-2.26/bin - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl" - DRI_DRIVERS="" - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa" - - GALLIUM_DRIVERS="i915,nouveau,pl111,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv,imx" + - GALLIUM_DRIVERS="i915,nouveau,kmsro,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv" - VULKAN_DRIVERS="" - LIBUNWIND_FLAGS="--enable-libunwind" addons: apt: - sources: - - llvm-toolchain-trusty-3.9 packages: - - binutils-2.26 # LLVM packaging is broken and misses these dependencies - libedit-dev - # From sources above - llvm-3.9-dev # Common - xz-utils @@ -187,6 +368,7 @@ - libelf-dev - libunwind8-dev - python3-pip + - python3-setuptools - env: - LABEL="make Gallium ST Clover LLVM-3.9" - BUILD=make @@ -194,10 +376,6 @@ - MAKE_CHECK_COMMAND="true" - LLVM_VERSION=3.9 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - - OVERRIDE_CC=gcc-4.7 - - OVERRIDE_CXX=g++-4.7 - # New binutils linker is required for llvm-3.9 - - OVERRIDE_PATH=/usr/lib/binutils-2.26/bin - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl" - DRI_DRIVERS="" - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa" @@ -206,15 +384,10 @@ - LIBUNWIND_FLAGS="--enable-libunwind" addons: apt: - sources: - - llvm-toolchain-trusty-3.9 packages: - - binutils-2.26 - libclc-dev # LLVM packaging is broken and misses these dependencies - libedit-dev - - g++-4.7 - # From sources above - llvm-3.9-dev - clang-3.9 - libclang-3.9-dev @@ -225,6 +398,7 @@ - libelf-dev - libunwind8-dev - python3-pip + - python3-setuptools - env: - LABEL="make Gallium ST Clover LLVM-4.0" - BUILD=make @@ -232,8 +406,6 @@ - MAKE_CHECK_COMMAND="true" - LLVM_VERSION=4.0 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - - OVERRIDE_CC=gcc-4.8 - - OVERRIDE_CXX=g++-4.8 - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl" - DRI_DRIVERS="" - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa" @@ -242,14 +414,10 @@ - LIBUNWIND_FLAGS="--enable-libunwind" addons: apt: - sources: - - llvm-toolchain-trusty-4.0 packages: - libclc-dev # LLVM packaging is broken and misses these dependencies - libedit-dev - - g++-4.8 - # From sources above - llvm-4.0-dev - clang-4.0 - libclang-4.0-dev @@ -260,6 +428,7 @@ - libelf-dev - libunwind8-dev - python3-pip + - python3-setuptools - env: - LABEL="make Gallium ST Clover LLVM-5.0" - BUILD=make @@ -267,8 +436,6 @@ - MAKE_CHECK_COMMAND="true" - LLVM_VERSION=5.0 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - - OVERRIDE_CC=gcc-4.8 - - OVERRIDE_CXX=g++-4.8 - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl" - DRI_DRIVERS="" - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa" @@ -277,14 +444,10 @@ - LIBUNWIND_FLAGS="--enable-libunwind" addons: apt: - sources: - - llvm-toolchain-trusty-5.0 packages: - libclc-dev # LLVM packaging is broken and misses these dependencies - libedit-dev - - g++-4.8 - # From sources above - llvm-5.0-dev - clang-5.0 - libclang-5.0-dev @@ -295,6 +458,7 @@ - libelf-dev - libunwind8-dev - python3-pip + - python3-setuptools - env: - LABEL="make Gallium ST Clover LLVM-6.0" - BUILD=make @@ -305,18 +469,13 @@ - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl" - DRI_DRIVERS="" - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa" - - GALLIUM_DRIVERS="r600,radeonsi" + - GALLIUM_DRIVERS="r600" - VULKAN_DRIVERS="" - LIBUNWIND_FLAGS="--enable-libunwind" addons: apt: - sources: - - llvm-toolchain-trusty-6.0 - # llvm-6 requires libstdc++4.9 which is not in main repo - - ubuntu-toolchain-r-test packages: - libclc-dev - # From sources above - llvm-6.0-dev - clang-6.0 - libclang-6.0-dev @@ -327,6 +486,7 @@ - libelf-dev - libunwind8-dev - python3-pip + - python3-setuptools - env: - LABEL="make Gallium ST Clover LLVM-7" - BUILD=make @@ -343,10 +503,8 @@ addons: apt: sources: - - sourceline: 'deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main' + - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main' key_url: https://apt.llvm.org/llvm-snapshot.gpg.key - # llvm-7 requires libstdc++4.9 which is not in main repo - - ubuntu-toolchain-r-test packages: - libclc-dev # From sources above @@ -364,7 +522,7 @@ - BUILD=make - MAKEFLAGS="-j4" - MAKE_CHECK_COMMAND="true" - - LLVM_VERSION=3.3 + - LLVM_VERSION=3.5 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl" - DRI_DRIVERS="" @@ -378,8 +536,8 @@ addons: apt: packages: - # We actually want to test against llvm-3.3 - - llvm-3.3-dev + # We actually want to test against llvm-3.3, yet 3.5 is available + - llvm-3.5-dev # Nine requires gcc 4.6... which is the one we have right ? - libxvmc-dev # Build locally, for now. @@ -395,12 +553,13 @@ - libelf-dev - libunwind8-dev - python3-pip + - python3-setuptools - env: - LABEL="make Vulkan" - BUILD=make - MAKEFLAGS="-j4" - MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check" - - LLVM_VERSION=6.0 + - LLVM_VERSION=7 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl --with-platforms=x11,wayland" - DRI_DRIVERS="" @@ -411,18 +570,18 @@ addons: apt: sources: - - llvm-toolchain-trusty-6.0 - # llvm-6 requires libstdc++4.9 which is not in main repo - - ubuntu-toolchain-r-test + - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main' + key_url: https://apt.llvm.org/llvm-snapshot.gpg.key packages: # From sources above - - llvm-6.0-dev + - llvm-7-dev # Common - xz-utils - libexpat1-dev - libx11-xcb-dev - libelf-dev - python3-pip + - python3-setuptools - env: - LABEL="scons" - BUILD=scons @@ -447,14 +606,15 @@ - SCONS_TARGET="llvm=1" # Keep it symmetrical to the make build. - SCONS_CHECK_COMMAND="scons llvm=1 check" - - LLVM_VERSION=3.3 + - LLVM_VERSION=3.5 - LLVM_CONFIG="llvm-config-${LLVM_VERSION}" addons: apt: packages: # LLVM packaging is broken and misses these dependencies - libedit-dev - - llvm-3.3-dev + # We actually want to test against llvm-3.3, yet 3.5 is available + - llvm-3.5-dev # Common - xz-utils - x11proto-xf86vidmode-dev @@ -472,12 +632,7 @@ - SCONS_CHECK_COMMAND="true" addons: apt: - sources: - - llvm-toolchain-trusty-6.0 - # llvm-6 requires libstdc++4.9 which is not in main repo - - ubuntu-toolchain-r-test packages: - # From sources above - llvm-6.0-dev # Common - xz-utils @@ -495,6 +650,9 @@ - env: - LABEL="macOS meson" - BUILD=meson + - UNWIND="false" + - DRI_LOADERS="-Dglx=dri -Dgbm=false -Degl=false -Dplatforms=x11 -Dosmesa=none" + - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled" os: osx before_install: @@ -522,10 +680,8 @@ install: # Install a more modern meson from pip, since the version in the - # ubuntu repos is often quite old. This requires python>=3.5, so - # let's make it default + # ubuntu repos is often quite old. - if test "x$BUILD" = xmeson; then - sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.5 10; pip3 install --user meson; pip3 install --user mako; fi @@ -541,16 +697,6 @@ pip2 install --user mako; fi - # Since libdrm gets updated in configure.ac regularly, try to pick up the - # latest version from there. - - for line in `grep "^LIBDRM.*_REQUIRED=" configure.ac`; do - old_ver=`echo $LIBDRM_VERSION | sed 's/libdrm-//'`; - new_ver=`echo $line | sed 's/.*REQUIRED=//'`; - if `echo "$old_ver,$new_ver" | tr ',' '\n' | sort -Vc 2> /dev/null`; then - export LIBDRM_VERSION="libdrm-$new_ver"; - fi; - done - # Install dependencies where we require specific versions (or where # disallowed by Travis CI's package whitelisting). @@ -612,7 +758,7 @@ tar -axvf $WAYLAND_PROTOCOLS_VERSION.tar.xz (cd $WAYLAND_PROTOCOLS_VERSION && ./configure --prefix=$HOME/prefix && make install) - # Meson requires ninja >= 1.6, but trusty has 1.3.x + # Meson requires ninja >= 1.6, but xenial has 1.3.x wget https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-linux.zip unzip ninja-linux.zip mv ninja $HOME/prefix/bin/ @@ -655,15 +801,13 @@ script: - if test "x$BUILD" = xmake; then - test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC"; - test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX"; - test -n "$OVERRIDE_PATH" && export PATH="$OVERRIDE_PATH:$PATH"; - export CFLAGS="$CFLAGS -isystem`pwd`"; mkdir build && cd build && - ../autogen.sh --enable-debug + ../autogen.sh + --enable-autotools + --enable-debug $LIBUNWIND_FLAGS $DRI_LOADERS --with-dri-drivers=$DRI_DRIVERS @@ -676,41 +820,33 @@ fi - if test "x$BUILD" = xscons; then - test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC"; - test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX"; scons $SCONS_TARGET && eval $SCONS_CHECK_COMMAND; fi - | if test "x$BUILD" = xmeson; then - - if test "x$TRAVIS_OS_NAME" == xosx; then - MESON_OPTIONS="-Degl=false" + if test -n "$LLVM_CONFIG"; then + # We need to control the version of llvm-config we're using, so we'll + # generate a native file to do so. This requires meson >=0.49 + # + echo -e "[binaries]\nllvm-config = '`which $LLVM_CONFIG`'" > native.file + + $LLVM_CONFIG --version + else + : > native.file fi - if test "x$TRAVIS_OS_NAME" == xlinux; then - MESON_OPTIONS="-Ddri-drivers=${DRI_DRIVERS:-[]} -Dgallium-drivers=${GALLIUM_DRIVERS:-[]} -Dvulkan-drivers=${VULKAN_DRIVERS:-[]}" - fi - - # Travis CI has moved to LLVM 5.0, and meson is detecting - # automatically the available version in /usr/local/bin based on - # the PATH env variable order preference. - # - # As for 0.44.x, Meson cannot receive the path to the - # llvm-config binary as a configuration parameter. See - # https://github.com/mesonbuild/meson/issues/2887 and - # https://github.com/dcbaker/meson/commit/7c8b6ee3fa42f43c9ac7dcacc61a77eca3f1bcef - # - # We want to use the custom (APT) installed version. Therefore, - # let's make Meson find our wanted version sooner than the one - # at /usr/local/bin - # - # Once this is corrected, we would still need a patch similar - # to: - # https://lists.freedesktop.org/archives/mesa-dev/2017-December/180217.html - test -f /usr/bin/$LLVM_CONFIG && ln -s /usr/bin/$LLVM_CONFIG $HOME/prefix/bin/llvm-config - export CFLAGS="$CFLAGS -isystem`pwd`" - meson _build $MESON_OPTIONS + meson _build \ + --native-file=native.file \ + -Dbuild-tests=true \ + -Dlibunwind=${UNWIND} \ + ${DRI_LOADERS} \ + -Ddri-drivers=${DRI_DRIVERS:-[]} \ + ${GALLIUM_ST} \ + -Dgallium-drivers=${GALLIUM_DRIVERS:-[]} \ + -Dvulkan-drivers=${VULKAN_DRIVERS:-[]} + meson configure _build ninja -C _build + ninja -C _build test fi diff -Nru mesa-18.3.3/VERSION mesa-19.0.1/VERSION --- mesa-18.3.3/VERSION 2019-02-01 12:03:20.000000000 +0000 +++ mesa-19.0.1/VERSION 2019-03-31 23:16:37.000000000 +0000 @@ -1 +1 @@ -18.3.3 +19.0.1